@livekit/agents 0.0.0-20260120144724

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (987) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +17 -0
  3. package/dist/_exceptions.cjs +109 -0
  4. package/dist/_exceptions.cjs.map +1 -0
  5. package/dist/_exceptions.d.cts +64 -0
  6. package/dist/_exceptions.d.ts +64 -0
  7. package/dist/_exceptions.d.ts.map +1 -0
  8. package/dist/_exceptions.js +80 -0
  9. package/dist/_exceptions.js.map +1 -0
  10. package/dist/audio.cjs +170 -0
  11. package/dist/audio.cjs.map +1 -0
  12. package/dist/audio.d.cts +46 -0
  13. package/dist/audio.d.ts +46 -0
  14. package/dist/audio.d.ts.map +1 -0
  15. package/dist/audio.js +133 -0
  16. package/dist/audio.js.map +1 -0
  17. package/dist/cli.cjs +171 -0
  18. package/dist/cli.cjs.map +1 -0
  19. package/dist/cli.d.cts +14 -0
  20. package/dist/cli.d.ts +14 -0
  21. package/dist/cli.d.ts.map +1 -0
  22. package/dist/cli.js +145 -0
  23. package/dist/cli.js.map +1 -0
  24. package/dist/connection_pool.cjs +242 -0
  25. package/dist/connection_pool.cjs.map +1 -0
  26. package/dist/connection_pool.d.cts +123 -0
  27. package/dist/connection_pool.d.ts +123 -0
  28. package/dist/connection_pool.d.ts.map +1 -0
  29. package/dist/connection_pool.js +218 -0
  30. package/dist/connection_pool.js.map +1 -0
  31. package/dist/connection_pool.test.cjs +256 -0
  32. package/dist/connection_pool.test.cjs.map +1 -0
  33. package/dist/connection_pool.test.js +255 -0
  34. package/dist/connection_pool.test.js.map +1 -0
  35. package/dist/constants.cjs +44 -0
  36. package/dist/constants.cjs.map +1 -0
  37. package/dist/constants.d.cts +7 -0
  38. package/dist/constants.d.ts +7 -0
  39. package/dist/constants.d.ts.map +1 -0
  40. package/dist/constants.js +15 -0
  41. package/dist/constants.js.map +1 -0
  42. package/dist/generator.cjs +36 -0
  43. package/dist/generator.cjs.map +1 -0
  44. package/dist/generator.d.cts +23 -0
  45. package/dist/generator.d.ts +23 -0
  46. package/dist/generator.d.ts.map +1 -0
  47. package/dist/generator.js +11 -0
  48. package/dist/generator.js.map +1 -0
  49. package/dist/http_server.cjs +75 -0
  50. package/dist/http_server.cjs.map +1 -0
  51. package/dist/http_server.d.cts +20 -0
  52. package/dist/http_server.d.ts +20 -0
  53. package/dist/http_server.d.ts.map +1 -0
  54. package/dist/http_server.js +51 -0
  55. package/dist/http_server.js.map +1 -0
  56. package/dist/index.cjs +100 -0
  57. package/dist/index.cjs.map +1 -0
  58. package/dist/index.d.cts +35 -0
  59. package/dist/index.d.ts +35 -0
  60. package/dist/index.d.ts.map +1 -0
  61. package/dist/index.js +40 -0
  62. package/dist/index.js.map +1 -0
  63. package/dist/inference/api_protos.cjs +104 -0
  64. package/dist/inference/api_protos.cjs.map +1 -0
  65. package/dist/inference/api_protos.d.cts +222 -0
  66. package/dist/inference/api_protos.d.ts +222 -0
  67. package/dist/inference/api_protos.d.ts.map +1 -0
  68. package/dist/inference/api_protos.js +70 -0
  69. package/dist/inference/api_protos.js.map +1 -0
  70. package/dist/inference/index.cjs +56 -0
  71. package/dist/inference/index.cjs.map +1 -0
  72. package/dist/inference/index.d.cts +8 -0
  73. package/dist/inference/index.d.ts +8 -0
  74. package/dist/inference/index.d.ts.map +1 -0
  75. package/dist/inference/index.js +23 -0
  76. package/dist/inference/index.js.map +1 -0
  77. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +152 -0
  78. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +1 -0
  79. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +50 -0
  80. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +50 -0
  81. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +1 -0
  82. package/dist/inference/interruption/AdaptiveInterruptionDetector.js +125 -0
  83. package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +1 -0
  84. package/dist/inference/interruption/InterruptionStream.cjs +310 -0
  85. package/dist/inference/interruption/InterruptionStream.cjs.map +1 -0
  86. package/dist/inference/interruption/InterruptionStream.d.cts +57 -0
  87. package/dist/inference/interruption/InterruptionStream.d.ts +57 -0
  88. package/dist/inference/interruption/InterruptionStream.d.ts.map +1 -0
  89. package/dist/inference/interruption/InterruptionStream.js +288 -0
  90. package/dist/inference/interruption/InterruptionStream.js.map +1 -0
  91. package/dist/inference/interruption/defaults.cjs +76 -0
  92. package/dist/inference/interruption/defaults.cjs.map +1 -0
  93. package/dist/inference/interruption/defaults.d.cts +14 -0
  94. package/dist/inference/interruption/defaults.d.ts +14 -0
  95. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  96. package/dist/inference/interruption/defaults.js +42 -0
  97. package/dist/inference/interruption/defaults.js.map +1 -0
  98. package/dist/inference/interruption/errors.cjs +2 -0
  99. package/dist/inference/interruption/errors.cjs.map +1 -0
  100. package/dist/inference/interruption/errors.d.cts +2 -0
  101. package/dist/inference/interruption/errors.d.ts +2 -0
  102. package/dist/inference/interruption/errors.d.ts.map +1 -0
  103. package/dist/inference/interruption/errors.js +1 -0
  104. package/dist/inference/interruption/errors.js.map +1 -0
  105. package/dist/inference/interruption/http_transport.cjs +57 -0
  106. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  107. package/dist/inference/interruption/http_transport.d.cts +23 -0
  108. package/dist/inference/interruption/http_transport.d.ts +23 -0
  109. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  110. package/dist/inference/interruption/http_transport.js +33 -0
  111. package/dist/inference/interruption/http_transport.js.map +1 -0
  112. package/dist/inference/interruption/index.cjs +34 -0
  113. package/dist/inference/interruption/index.cjs.map +1 -0
  114. package/dist/inference/interruption/index.d.cts +5 -0
  115. package/dist/inference/interruption/index.d.ts +5 -0
  116. package/dist/inference/interruption/index.d.ts.map +1 -0
  117. package/dist/inference/interruption/index.js +7 -0
  118. package/dist/inference/interruption/index.js.map +1 -0
  119. package/dist/inference/interruption/interruption.cjs +85 -0
  120. package/dist/inference/interruption/interruption.cjs.map +1 -0
  121. package/dist/inference/interruption/interruption.d.cts +48 -0
  122. package/dist/inference/interruption/interruption.d.ts +48 -0
  123. package/dist/inference/interruption/interruption.d.ts.map +1 -0
  124. package/dist/inference/interruption/interruption.js +59 -0
  125. package/dist/inference/interruption/interruption.js.map +1 -0
  126. package/dist/inference/llm.cjs +347 -0
  127. package/dist/inference/llm.cjs.map +1 -0
  128. package/dist/inference/llm.d.cts +114 -0
  129. package/dist/inference/llm.d.ts +114 -0
  130. package/dist/inference/llm.d.ts.map +1 -0
  131. package/dist/inference/llm.js +318 -0
  132. package/dist/inference/llm.js.map +1 -0
  133. package/dist/inference/stt.cjs +371 -0
  134. package/dist/inference/stt.cjs.map +1 -0
  135. package/dist/inference/stt.d.cts +91 -0
  136. package/dist/inference/stt.d.ts +91 -0
  137. package/dist/inference/stt.d.ts.map +1 -0
  138. package/dist/inference/stt.js +350 -0
  139. package/dist/inference/stt.js.map +1 -0
  140. package/dist/inference/tts.cjs +439 -0
  141. package/dist/inference/tts.cjs.map +1 -0
  142. package/dist/inference/tts.d.cts +80 -0
  143. package/dist/inference/tts.d.ts +80 -0
  144. package/dist/inference/tts.d.ts.map +1 -0
  145. package/dist/inference/tts.js +417 -0
  146. package/dist/inference/tts.js.map +1 -0
  147. package/dist/inference/utils.cjs +89 -0
  148. package/dist/inference/utils.cjs.map +1 -0
  149. package/dist/inference/utils.d.cts +6 -0
  150. package/dist/inference/utils.d.ts +6 -0
  151. package/dist/inference/utils.d.ts.map +1 -0
  152. package/dist/inference/utils.js +63 -0
  153. package/dist/inference/utils.js.map +1 -0
  154. package/dist/inference/utils.test.cjs +20 -0
  155. package/dist/inference/utils.test.cjs.map +1 -0
  156. package/dist/inference/utils.test.js +19 -0
  157. package/dist/inference/utils.test.js.map +1 -0
  158. package/dist/inference_runner.cjs +37 -0
  159. package/dist/inference_runner.cjs.map +1 -0
  160. package/dist/inference_runner.d.cts +11 -0
  161. package/dist/inference_runner.d.ts +11 -0
  162. package/dist/inference_runner.d.ts.map +1 -0
  163. package/dist/inference_runner.js +13 -0
  164. package/dist/inference_runner.js.map +1 -0
  165. package/dist/ipc/index.cjs +23 -0
  166. package/dist/ipc/index.cjs.map +1 -0
  167. package/dist/ipc/index.d.cts +2 -0
  168. package/dist/ipc/index.d.ts +2 -0
  169. package/dist/ipc/index.d.ts.map +1 -0
  170. package/dist/ipc/index.js +2 -0
  171. package/dist/ipc/index.js.map +1 -0
  172. package/dist/ipc/inference_executor.cjs +17 -0
  173. package/dist/ipc/inference_executor.cjs.map +1 -0
  174. package/dist/ipc/inference_executor.d.cts +4 -0
  175. package/dist/ipc/inference_executor.d.ts +4 -0
  176. package/dist/ipc/inference_executor.d.ts.map +1 -0
  177. package/dist/ipc/inference_executor.js +1 -0
  178. package/dist/ipc/inference_executor.js.map +1 -0
  179. package/dist/ipc/inference_proc_executor.cjs +101 -0
  180. package/dist/ipc/inference_proc_executor.cjs.map +1 -0
  181. package/dist/ipc/inference_proc_executor.d.cts +23 -0
  182. package/dist/ipc/inference_proc_executor.d.ts +23 -0
  183. package/dist/ipc/inference_proc_executor.d.ts.map +1 -0
  184. package/dist/ipc/inference_proc_executor.js +75 -0
  185. package/dist/ipc/inference_proc_executor.js.map +1 -0
  186. package/dist/ipc/inference_proc_lazy_main.cjs +86 -0
  187. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -0
  188. package/dist/ipc/inference_proc_lazy_main.d.cts +2 -0
  189. package/dist/ipc/inference_proc_lazy_main.d.ts +2 -0
  190. package/dist/ipc/inference_proc_lazy_main.d.ts.map +1 -0
  191. package/dist/ipc/inference_proc_lazy_main.js +85 -0
  192. package/dist/ipc/inference_proc_lazy_main.js.map +1 -0
  193. package/dist/ipc/job_executor.cjs +34 -0
  194. package/dist/ipc/job_executor.cjs.map +1 -0
  195. package/dist/ipc/job_executor.d.cts +18 -0
  196. package/dist/ipc/job_executor.d.ts +18 -0
  197. package/dist/ipc/job_executor.d.ts.map +1 -0
  198. package/dist/ipc/job_executor.js +10 -0
  199. package/dist/ipc/job_executor.js.map +1 -0
  200. package/dist/ipc/job_proc_executor.cjs +115 -0
  201. package/dist/ipc/job_proc_executor.cjs.map +1 -0
  202. package/dist/ipc/job_proc_executor.d.cts +19 -0
  203. package/dist/ipc/job_proc_executor.d.ts +19 -0
  204. package/dist/ipc/job_proc_executor.d.ts.map +1 -0
  205. package/dist/ipc/job_proc_executor.js +89 -0
  206. package/dist/ipc/job_proc_executor.js.map +1 -0
  207. package/dist/ipc/job_proc_lazy_main.cjs +210 -0
  208. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -0
  209. package/dist/ipc/job_proc_lazy_main.d.cts +2 -0
  210. package/dist/ipc/job_proc_lazy_main.d.ts +2 -0
  211. package/dist/ipc/job_proc_lazy_main.d.ts.map +1 -0
  212. package/dist/ipc/job_proc_lazy_main.js +187 -0
  213. package/dist/ipc/job_proc_lazy_main.js.map +1 -0
  214. package/dist/ipc/message.cjs +17 -0
  215. package/dist/ipc/message.cjs.map +1 -0
  216. package/dist/ipc/message.d.cts +58 -0
  217. package/dist/ipc/message.d.ts +58 -0
  218. package/dist/ipc/message.d.ts.map +1 -0
  219. package/dist/ipc/message.js +1 -0
  220. package/dist/ipc/message.js.map +1 -0
  221. package/dist/ipc/proc_pool.cjs +164 -0
  222. package/dist/ipc/proc_pool.cjs.map +1 -0
  223. package/dist/ipc/proc_pool.d.cts +31 -0
  224. package/dist/ipc/proc_pool.d.ts +31 -0
  225. package/dist/ipc/proc_pool.d.ts.map +1 -0
  226. package/dist/ipc/proc_pool.js +140 -0
  227. package/dist/ipc/proc_pool.js.map +1 -0
  228. package/dist/ipc/supervised_proc.cjs +229 -0
  229. package/dist/ipc/supervised_proc.cjs.map +1 -0
  230. package/dist/ipc/supervised_proc.d.cts +32 -0
  231. package/dist/ipc/supervised_proc.d.ts +32 -0
  232. package/dist/ipc/supervised_proc.d.ts.map +1 -0
  233. package/dist/ipc/supervised_proc.js +195 -0
  234. package/dist/ipc/supervised_proc.js.map +1 -0
  235. package/dist/ipc/supervised_proc.test.cjs +145 -0
  236. package/dist/ipc/supervised_proc.test.cjs.map +1 -0
  237. package/dist/ipc/supervised_proc.test.js +122 -0
  238. package/dist/ipc/supervised_proc.test.js.map +1 -0
  239. package/dist/job.cjs +373 -0
  240. package/dist/job.cjs.map +1 -0
  241. package/dist/job.d.cts +141 -0
  242. package/dist/job.d.ts +141 -0
  243. package/dist/job.d.ts.map +1 -0
  244. package/dist/job.js +332 -0
  245. package/dist/job.js.map +1 -0
  246. package/dist/llm/chat_context.cjs +527 -0
  247. package/dist/llm/chat_context.cjs.map +1 -0
  248. package/dist/llm/chat_context.d.cts +223 -0
  249. package/dist/llm/chat_context.d.ts +223 -0
  250. package/dist/llm/chat_context.d.ts.map +1 -0
  251. package/dist/llm/chat_context.js +496 -0
  252. package/dist/llm/chat_context.js.map +1 -0
  253. package/dist/llm/chat_context.test.cjs +911 -0
  254. package/dist/llm/chat_context.test.cjs.map +1 -0
  255. package/dist/llm/chat_context.test.js +916 -0
  256. package/dist/llm/chat_context.test.js.map +1 -0
  257. package/dist/llm/fallback_adapter.cjs +278 -0
  258. package/dist/llm/fallback_adapter.cjs.map +1 -0
  259. package/dist/llm/fallback_adapter.d.cts +73 -0
  260. package/dist/llm/fallback_adapter.d.ts +73 -0
  261. package/dist/llm/fallback_adapter.d.ts.map +1 -0
  262. package/dist/llm/fallback_adapter.js +254 -0
  263. package/dist/llm/fallback_adapter.js.map +1 -0
  264. package/dist/llm/fallback_adapter.test.cjs +176 -0
  265. package/dist/llm/fallback_adapter.test.cjs.map +1 -0
  266. package/dist/llm/fallback_adapter.test.js +175 -0
  267. package/dist/llm/fallback_adapter.test.js.map +1 -0
  268. package/dist/llm/index.cjs +79 -0
  269. package/dist/llm/index.cjs.map +1 -0
  270. package/dist/llm/index.d.cts +9 -0
  271. package/dist/llm/index.d.ts +9 -0
  272. package/dist/llm/index.d.ts.map +1 -0
  273. package/dist/llm/index.js +61 -0
  274. package/dist/llm/index.js.map +1 -0
  275. package/dist/llm/llm.cjs +226 -0
  276. package/dist/llm/llm.cjs.map +1 -0
  277. package/dist/llm/llm.d.cts +94 -0
  278. package/dist/llm/llm.d.ts +94 -0
  279. package/dist/llm/llm.d.ts.map +1 -0
  280. package/dist/llm/llm.js +201 -0
  281. package/dist/llm/llm.js.map +1 -0
  282. package/dist/llm/provider_format/google.cjs +132 -0
  283. package/dist/llm/provider_format/google.cjs.map +1 -0
  284. package/dist/llm/provider_format/google.d.cts +6 -0
  285. package/dist/llm/provider_format/google.d.ts +6 -0
  286. package/dist/llm/provider_format/google.d.ts.map +1 -0
  287. package/dist/llm/provider_format/google.js +108 -0
  288. package/dist/llm/provider_format/google.js.map +1 -0
  289. package/dist/llm/provider_format/google.test.cjs +724 -0
  290. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  291. package/dist/llm/provider_format/google.test.js +728 -0
  292. package/dist/llm/provider_format/google.test.js.map +1 -0
  293. package/dist/llm/provider_format/index.cjs +40 -0
  294. package/dist/llm/provider_format/index.cjs.map +1 -0
  295. package/dist/llm/provider_format/index.d.cts +4 -0
  296. package/dist/llm/provider_format/index.d.ts +4 -0
  297. package/dist/llm/provider_format/index.d.ts.map +1 -0
  298. package/dist/llm/provider_format/index.js +16 -0
  299. package/dist/llm/provider_format/index.js.map +1 -0
  300. package/dist/llm/provider_format/openai.cjs +138 -0
  301. package/dist/llm/provider_format/openai.cjs.map +1 -0
  302. package/dist/llm/provider_format/openai.d.cts +3 -0
  303. package/dist/llm/provider_format/openai.d.ts +3 -0
  304. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  305. package/dist/llm/provider_format/openai.js +114 -0
  306. package/dist/llm/provider_format/openai.js.map +1 -0
  307. package/dist/llm/provider_format/openai.test.cjs +557 -0
  308. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  309. package/dist/llm/provider_format/openai.test.js +561 -0
  310. package/dist/llm/provider_format/openai.test.js.map +1 -0
  311. package/dist/llm/provider_format/utils.cjs +146 -0
  312. package/dist/llm/provider_format/utils.cjs.map +1 -0
  313. package/dist/llm/provider_format/utils.d.cts +38 -0
  314. package/dist/llm/provider_format/utils.d.ts +38 -0
  315. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  316. package/dist/llm/provider_format/utils.js +122 -0
  317. package/dist/llm/provider_format/utils.js.map +1 -0
  318. package/dist/llm/realtime.cjs +77 -0
  319. package/dist/llm/realtime.cjs.map +1 -0
  320. package/dist/llm/realtime.d.cts +106 -0
  321. package/dist/llm/realtime.d.ts +106 -0
  322. package/dist/llm/realtime.d.ts.map +1 -0
  323. package/dist/llm/realtime.js +52 -0
  324. package/dist/llm/realtime.js.map +1 -0
  325. package/dist/llm/remote_chat_context.cjs +112 -0
  326. package/dist/llm/remote_chat_context.cjs.map +1 -0
  327. package/dist/llm/remote_chat_context.d.cts +25 -0
  328. package/dist/llm/remote_chat_context.d.ts +25 -0
  329. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  330. package/dist/llm/remote_chat_context.js +88 -0
  331. package/dist/llm/remote_chat_context.js.map +1 -0
  332. package/dist/llm/remote_chat_context.test.cjs +225 -0
  333. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  334. package/dist/llm/remote_chat_context.test.js +224 -0
  335. package/dist/llm/remote_chat_context.test.js.map +1 -0
  336. package/dist/llm/tool_context.cjs +152 -0
  337. package/dist/llm/tool_context.cjs.map +1 -0
  338. package/dist/llm/tool_context.d.cts +153 -0
  339. package/dist/llm/tool_context.d.ts +153 -0
  340. package/dist/llm/tool_context.d.ts.map +1 -0
  341. package/dist/llm/tool_context.js +119 -0
  342. package/dist/llm/tool_context.js.map +1 -0
  343. package/dist/llm/tool_context.test.cjs +359 -0
  344. package/dist/llm/tool_context.test.cjs.map +1 -0
  345. package/dist/llm/tool_context.test.js +336 -0
  346. package/dist/llm/tool_context.test.js.map +1 -0
  347. package/dist/llm/tool_context.type.test.cjs +92 -0
  348. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  349. package/dist/llm/tool_context.type.test.js +91 -0
  350. package/dist/llm/tool_context.type.test.js.map +1 -0
  351. package/dist/llm/utils.cjs +267 -0
  352. package/dist/llm/utils.cjs.map +1 -0
  353. package/dist/llm/utils.d.cts +41 -0
  354. package/dist/llm/utils.d.ts +41 -0
  355. package/dist/llm/utils.d.ts.map +1 -0
  356. package/dist/llm/utils.js +230 -0
  357. package/dist/llm/utils.js.map +1 -0
  358. package/dist/llm/utils.test.cjs +513 -0
  359. package/dist/llm/utils.test.cjs.map +1 -0
  360. package/dist/llm/utils.test.js +490 -0
  361. package/dist/llm/utils.test.js.map +1 -0
  362. package/dist/llm/zod-utils.cjs +102 -0
  363. package/dist/llm/zod-utils.cjs.map +1 -0
  364. package/dist/llm/zod-utils.d.cts +65 -0
  365. package/dist/llm/zod-utils.d.ts +65 -0
  366. package/dist/llm/zod-utils.d.ts.map +1 -0
  367. package/dist/llm/zod-utils.js +64 -0
  368. package/dist/llm/zod-utils.js.map +1 -0
  369. package/dist/llm/zod-utils.test.cjs +472 -0
  370. package/dist/llm/zod-utils.test.cjs.map +1 -0
  371. package/dist/llm/zod-utils.test.js +455 -0
  372. package/dist/llm/zod-utils.test.js.map +1 -0
  373. package/dist/log.cjs +81 -0
  374. package/dist/log.cjs.map +1 -0
  375. package/dist/log.d.cts +20 -0
  376. package/dist/log.d.ts +20 -0
  377. package/dist/log.d.ts.map +1 -0
  378. package/dist/log.js +54 -0
  379. package/dist/log.js.map +1 -0
  380. package/dist/metrics/base.cjs +17 -0
  381. package/dist/metrics/base.cjs.map +1 -0
  382. package/dist/metrics/base.d.cts +150 -0
  383. package/dist/metrics/base.d.ts +150 -0
  384. package/dist/metrics/base.d.ts.map +1 -0
  385. package/dist/metrics/base.js +1 -0
  386. package/dist/metrics/base.js.map +1 -0
  387. package/dist/metrics/index.cjs +32 -0
  388. package/dist/metrics/index.cjs.map +1 -0
  389. package/dist/metrics/index.d.cts +4 -0
  390. package/dist/metrics/index.d.ts +4 -0
  391. package/dist/metrics/index.d.ts.map +1 -0
  392. package/dist/metrics/index.js +7 -0
  393. package/dist/metrics/index.js.map +1 -0
  394. package/dist/metrics/usage_collector.cjs +58 -0
  395. package/dist/metrics/usage_collector.cjs.map +1 -0
  396. package/dist/metrics/usage_collector.d.cts +15 -0
  397. package/dist/metrics/usage_collector.d.ts +15 -0
  398. package/dist/metrics/usage_collector.d.ts.map +1 -0
  399. package/dist/metrics/usage_collector.js +34 -0
  400. package/dist/metrics/usage_collector.js.map +1 -0
  401. package/dist/metrics/utils.cjs +74 -0
  402. package/dist/metrics/utils.cjs.map +1 -0
  403. package/dist/metrics/utils.d.cts +3 -0
  404. package/dist/metrics/utils.d.ts +3 -0
  405. package/dist/metrics/utils.d.ts.map +1 -0
  406. package/dist/metrics/utils.js +50 -0
  407. package/dist/metrics/utils.js.map +1 -0
  408. package/dist/plugin.cjs +62 -0
  409. package/dist/plugin.cjs.map +1 -0
  410. package/dist/plugin.d.cts +24 -0
  411. package/dist/plugin.d.ts +24 -0
  412. package/dist/plugin.d.ts.map +1 -0
  413. package/dist/plugin.js +37 -0
  414. package/dist/plugin.js.map +1 -0
  415. package/dist/stream/deferred_stream.cjs +106 -0
  416. package/dist/stream/deferred_stream.cjs.map +1 -0
  417. package/dist/stream/deferred_stream.d.cts +32 -0
  418. package/dist/stream/deferred_stream.d.ts +32 -0
  419. package/dist/stream/deferred_stream.d.ts.map +1 -0
  420. package/dist/stream/deferred_stream.js +81 -0
  421. package/dist/stream/deferred_stream.js.map +1 -0
  422. package/dist/stream/deferred_stream.test.cjs +527 -0
  423. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  424. package/dist/stream/deferred_stream.test.js +526 -0
  425. package/dist/stream/deferred_stream.test.js.map +1 -0
  426. package/dist/stream/identity_transform.cjs +42 -0
  427. package/dist/stream/identity_transform.cjs.map +1 -0
  428. package/dist/stream/identity_transform.d.cts +6 -0
  429. package/dist/stream/identity_transform.d.ts +6 -0
  430. package/dist/stream/identity_transform.d.ts.map +1 -0
  431. package/dist/stream/identity_transform.js +18 -0
  432. package/dist/stream/identity_transform.js.map +1 -0
  433. package/dist/stream/identity_transform.test.cjs +125 -0
  434. package/dist/stream/identity_transform.test.cjs.map +1 -0
  435. package/dist/stream/identity_transform.test.js +124 -0
  436. package/dist/stream/identity_transform.test.js.map +1 -0
  437. package/dist/stream/index.cjs +38 -0
  438. package/dist/stream/index.cjs.map +1 -0
  439. package/dist/stream/index.d.cts +5 -0
  440. package/dist/stream/index.d.ts +5 -0
  441. package/dist/stream/index.d.ts.map +1 -0
  442. package/dist/stream/index.js +11 -0
  443. package/dist/stream/index.js.map +1 -0
  444. package/dist/stream/merge_readable_streams.cjs +59 -0
  445. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  446. package/dist/stream/merge_readable_streams.d.cts +4 -0
  447. package/dist/stream/merge_readable_streams.d.ts +4 -0
  448. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  449. package/dist/stream/merge_readable_streams.js +35 -0
  450. package/dist/stream/merge_readable_streams.js.map +1 -0
  451. package/dist/stream/stream_channel.cjs +57 -0
  452. package/dist/stream/stream_channel.cjs.map +1 -0
  453. package/dist/stream/stream_channel.d.cts +11 -0
  454. package/dist/stream/stream_channel.d.ts +11 -0
  455. package/dist/stream/stream_channel.d.ts.map +1 -0
  456. package/dist/stream/stream_channel.js +33 -0
  457. package/dist/stream/stream_channel.js.map +1 -0
  458. package/dist/stream/stream_channel.test.cjs +124 -0
  459. package/dist/stream/stream_channel.test.cjs.map +1 -0
  460. package/dist/stream/stream_channel.test.js +123 -0
  461. package/dist/stream/stream_channel.test.js.map +1 -0
  462. package/dist/stt/index.cjs +38 -0
  463. package/dist/stt/index.cjs.map +1 -0
  464. package/dist/stt/index.d.cts +3 -0
  465. package/dist/stt/index.d.ts +3 -0
  466. package/dist/stt/index.d.ts.map +1 -0
  467. package/dist/stt/index.js +14 -0
  468. package/dist/stt/index.js.map +1 -0
  469. package/dist/stt/stream_adapter.cjs +115 -0
  470. package/dist/stt/stream_adapter.cjs.map +1 -0
  471. package/dist/stt/stream_adapter.d.cts +23 -0
  472. package/dist/stt/stream_adapter.d.ts +23 -0
  473. package/dist/stt/stream_adapter.d.ts.map +1 -0
  474. package/dist/stt/stream_adapter.js +90 -0
  475. package/dist/stt/stream_adapter.js.map +1 -0
  476. package/dist/stt/stt.cjs +253 -0
  477. package/dist/stt/stt.cjs.map +1 -0
  478. package/dist/stt/stt.d.cts +158 -0
  479. package/dist/stt/stt.d.ts +158 -0
  480. package/dist/stt/stt.d.ts.map +1 -0
  481. package/dist/stt/stt.js +227 -0
  482. package/dist/stt/stt.js.map +1 -0
  483. package/dist/telemetry/index.cjs +72 -0
  484. package/dist/telemetry/index.cjs.map +1 -0
  485. package/dist/telemetry/index.d.cts +7 -0
  486. package/dist/telemetry/index.d.ts +7 -0
  487. package/dist/telemetry/index.d.ts.map +1 -0
  488. package/dist/telemetry/index.js +37 -0
  489. package/dist/telemetry/index.js.map +1 -0
  490. package/dist/telemetry/logging.cjs +65 -0
  491. package/dist/telemetry/logging.cjs.map +1 -0
  492. package/dist/telemetry/logging.d.cts +21 -0
  493. package/dist/telemetry/logging.d.ts +21 -0
  494. package/dist/telemetry/logging.d.ts.map +1 -0
  495. package/dist/telemetry/logging.js +40 -0
  496. package/dist/telemetry/logging.js.map +1 -0
  497. package/dist/telemetry/otel_http_exporter.cjs +147 -0
  498. package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
  499. package/dist/telemetry/otel_http_exporter.d.cts +62 -0
  500. package/dist/telemetry/otel_http_exporter.d.ts +62 -0
  501. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
  502. package/dist/telemetry/otel_http_exporter.js +123 -0
  503. package/dist/telemetry/otel_http_exporter.js.map +1 -0
  504. package/dist/telemetry/pino_otel_transport.cjs +217 -0
  505. package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
  506. package/dist/telemetry/pino_otel_transport.d.cts +58 -0
  507. package/dist/telemetry/pino_otel_transport.d.ts +58 -0
  508. package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
  509. package/dist/telemetry/pino_otel_transport.js +189 -0
  510. package/dist/telemetry/pino_otel_transport.js.map +1 -0
  511. package/dist/telemetry/trace_types.cjs +206 -0
  512. package/dist/telemetry/trace_types.cjs.map +1 -0
  513. package/dist/telemetry/trace_types.d.cts +61 -0
  514. package/dist/telemetry/trace_types.d.ts +61 -0
  515. package/dist/telemetry/trace_types.d.ts.map +1 -0
  516. package/dist/telemetry/trace_types.js +123 -0
  517. package/dist/telemetry/trace_types.js.map +1 -0
  518. package/dist/telemetry/traces.cjs +444 -0
  519. package/dist/telemetry/traces.cjs.map +1 -0
  520. package/dist/telemetry/traces.d.cts +114 -0
  521. package/dist/telemetry/traces.d.ts +114 -0
  522. package/dist/telemetry/traces.d.ts.map +1 -0
  523. package/dist/telemetry/traces.js +409 -0
  524. package/dist/telemetry/traces.js.map +1 -0
  525. package/dist/telemetry/utils.cjs +86 -0
  526. package/dist/telemetry/utils.cjs.map +1 -0
  527. package/dist/telemetry/utils.d.cts +5 -0
  528. package/dist/telemetry/utils.d.ts +5 -0
  529. package/dist/telemetry/utils.d.ts.map +1 -0
  530. package/dist/telemetry/utils.js +51 -0
  531. package/dist/telemetry/utils.js.map +1 -0
  532. package/dist/tokenize/basic/basic.cjs +105 -0
  533. package/dist/tokenize/basic/basic.cjs.map +1 -0
  534. package/dist/tokenize/basic/basic.d.cts +24 -0
  535. package/dist/tokenize/basic/basic.d.ts +24 -0
  536. package/dist/tokenize/basic/basic.d.ts.map +1 -0
  537. package/dist/tokenize/basic/basic.js +67 -0
  538. package/dist/tokenize/basic/basic.js.map +1 -0
  539. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  540. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  541. package/dist/tokenize/basic/hyphenator.d.cts +17 -0
  542. package/dist/tokenize/basic/hyphenator.d.ts +17 -0
  543. package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
  544. package/dist/tokenize/basic/hyphenator.js +401 -0
  545. package/dist/tokenize/basic/hyphenator.js.map +1 -0
  546. package/dist/tokenize/basic/index.cjs +37 -0
  547. package/dist/tokenize/basic/index.cjs.map +1 -0
  548. package/dist/tokenize/basic/index.d.cts +2 -0
  549. package/dist/tokenize/basic/index.d.ts +2 -0
  550. package/dist/tokenize/basic/index.d.ts.map +1 -0
  551. package/dist/tokenize/basic/index.js +15 -0
  552. package/dist/tokenize/basic/index.js.map +1 -0
  553. package/dist/tokenize/basic/paragraph.cjs +57 -0
  554. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  555. package/dist/tokenize/basic/paragraph.d.cts +5 -0
  556. package/dist/tokenize/basic/paragraph.d.ts +5 -0
  557. package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
  558. package/dist/tokenize/basic/paragraph.js +33 -0
  559. package/dist/tokenize/basic/paragraph.js.map +1 -0
  560. package/dist/tokenize/basic/sentence.cjs +97 -0
  561. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  562. package/dist/tokenize/basic/sentence.d.cts +5 -0
  563. package/dist/tokenize/basic/sentence.d.ts +5 -0
  564. package/dist/tokenize/basic/sentence.d.ts.map +1 -0
  565. package/dist/tokenize/basic/sentence.js +73 -0
  566. package/dist/tokenize/basic/sentence.js.map +1 -0
  567. package/dist/tokenize/basic/word.cjs +44 -0
  568. package/dist/tokenize/basic/word.cjs.map +1 -0
  569. package/dist/tokenize/basic/word.d.cts +5 -0
  570. package/dist/tokenize/basic/word.d.ts +5 -0
  571. package/dist/tokenize/basic/word.d.ts.map +1 -0
  572. package/dist/tokenize/basic/word.js +20 -0
  573. package/dist/tokenize/basic/word.js.map +1 -0
  574. package/dist/tokenize/index.cjs +55 -0
  575. package/dist/tokenize/index.cjs.map +1 -0
  576. package/dist/tokenize/index.d.cts +5 -0
  577. package/dist/tokenize/index.d.ts +5 -0
  578. package/dist/tokenize/index.d.ts.map +1 -0
  579. package/dist/tokenize/index.js +19 -0
  580. package/dist/tokenize/index.js.map +1 -0
  581. package/dist/tokenize/token_stream.cjs +168 -0
  582. package/dist/tokenize/token_stream.cjs.map +1 -0
  583. package/dist/tokenize/token_stream.d.cts +40 -0
  584. package/dist/tokenize/token_stream.d.ts +40 -0
  585. package/dist/tokenize/token_stream.d.ts.map +1 -0
  586. package/dist/tokenize/token_stream.js +142 -0
  587. package/dist/tokenize/token_stream.js.map +1 -0
  588. package/dist/tokenize/tokenizer.cjs +184 -0
  589. package/dist/tokenize/tokenizer.cjs.map +1 -0
  590. package/dist/tokenize/tokenizer.d.cts +55 -0
  591. package/dist/tokenize/tokenizer.d.ts +55 -0
  592. package/dist/tokenize/tokenizer.d.ts.map +1 -0
  593. package/dist/tokenize/tokenizer.js +156 -0
  594. package/dist/tokenize/tokenizer.js.map +1 -0
  595. package/dist/tokenize/tokenizer.test.cjs +220 -0
  596. package/dist/tokenize/tokenizer.test.cjs.map +1 -0
  597. package/dist/tokenize/tokenizer.test.js +219 -0
  598. package/dist/tokenize/tokenizer.test.js.map +1 -0
  599. package/dist/transcription.cjs +247 -0
  600. package/dist/transcription.cjs.map +1 -0
  601. package/dist/transcription.d.cts +31 -0
  602. package/dist/transcription.d.ts +31 -0
  603. package/dist/transcription.d.ts.map +1 -0
  604. package/dist/transcription.js +222 -0
  605. package/dist/transcription.js.map +1 -0
  606. package/dist/tts/index.cjs +38 -0
  607. package/dist/tts/index.cjs.map +1 -0
  608. package/dist/tts/index.d.cts +3 -0
  609. package/dist/tts/index.d.ts +3 -0
  610. package/dist/tts/index.d.ts.map +1 -0
  611. package/dist/tts/index.js +14 -0
  612. package/dist/tts/index.js.map +1 -0
  613. package/dist/tts/stream_adapter.cjs +105 -0
  614. package/dist/tts/stream_adapter.cjs.map +1 -0
  615. package/dist/tts/stream_adapter.d.cts +20 -0
  616. package/dist/tts/stream_adapter.d.ts +20 -0
  617. package/dist/tts/stream_adapter.d.ts.map +1 -0
  618. package/dist/tts/stream_adapter.js +80 -0
  619. package/dist/tts/stream_adapter.js.map +1 -0
  620. package/dist/tts/tts.cjs +431 -0
  621. package/dist/tts/tts.cjs.map +1 -0
  622. package/dist/tts/tts.d.cts +161 -0
  623. package/dist/tts/tts.d.ts +161 -0
  624. package/dist/tts/tts.d.ts.map +1 -0
  625. package/dist/tts/tts.js +405 -0
  626. package/dist/tts/tts.js.map +1 -0
  627. package/dist/types.cjs +49 -0
  628. package/dist/types.cjs.map +1 -0
  629. package/dist/types.d.cts +44 -0
  630. package/dist/types.d.ts +44 -0
  631. package/dist/types.d.ts.map +1 -0
  632. package/dist/types.js +23 -0
  633. package/dist/types.js.map +1 -0
  634. package/dist/utils/ws_transport.cjs +51 -0
  635. package/dist/utils/ws_transport.cjs.map +1 -0
  636. package/dist/utils/ws_transport.d.cts +9 -0
  637. package/dist/utils/ws_transport.d.ts +9 -0
  638. package/dist/utils/ws_transport.d.ts.map +1 -0
  639. package/dist/utils/ws_transport.js +17 -0
  640. package/dist/utils/ws_transport.js.map +1 -0
  641. package/dist/utils/ws_transport.test.cjs +212 -0
  642. package/dist/utils/ws_transport.test.cjs.map +1 -0
  643. package/dist/utils/ws_transport.test.js +211 -0
  644. package/dist/utils/ws_transport.test.js.map +1 -0
  645. package/dist/utils.cjs +669 -0
  646. package/dist/utils.cjs.map +1 -0
  647. package/dist/utils.d.cts +244 -0
  648. package/dist/utils.d.ts +244 -0
  649. package/dist/utils.d.ts.map +1 -0
  650. package/dist/utils.js +617 -0
  651. package/dist/utils.js.map +1 -0
  652. package/dist/utils.test.cjs +492 -0
  653. package/dist/utils.test.cjs.map +1 -0
  654. package/dist/utils.test.js +491 -0
  655. package/dist/utils.test.js.map +1 -0
  656. package/dist/vad.cjs +211 -0
  657. package/dist/vad.cjs.map +1 -0
  658. package/dist/vad.d.cts +105 -0
  659. package/dist/vad.d.ts +105 -0
  660. package/dist/vad.d.ts.map +1 -0
  661. package/dist/vad.js +185 -0
  662. package/dist/vad.js.map +1 -0
  663. package/dist/version.cjs +29 -0
  664. package/dist/version.cjs.map +1 -0
  665. package/dist/version.d.cts +2 -0
  666. package/dist/version.d.ts +2 -0
  667. package/dist/version.d.ts.map +1 -0
  668. package/dist/version.js +5 -0
  669. package/dist/version.js.map +1 -0
  670. package/dist/voice/agent.cjs +308 -0
  671. package/dist/voice/agent.cjs.map +1 -0
  672. package/dist/voice/agent.d.cts +83 -0
  673. package/dist/voice/agent.d.ts +83 -0
  674. package/dist/voice/agent.d.ts.map +1 -0
  675. package/dist/voice/agent.js +287 -0
  676. package/dist/voice/agent.js.map +1 -0
  677. package/dist/voice/agent.test.cjs +61 -0
  678. package/dist/voice/agent.test.cjs.map +1 -0
  679. package/dist/voice/agent.test.js +60 -0
  680. package/dist/voice/agent.test.js.map +1 -0
  681. package/dist/voice/agent_activity.cjs +1784 -0
  682. package/dist/voice/agent_activity.cjs.map +1 -0
  683. package/dist/voice/agent_activity.d.cts +116 -0
  684. package/dist/voice/agent_activity.d.ts +116 -0
  685. package/dist/voice/agent_activity.d.ts.map +1 -0
  686. package/dist/voice/agent_activity.js +1780 -0
  687. package/dist/voice/agent_activity.js.map +1 -0
  688. package/dist/voice/agent_session.cjs +592 -0
  689. package/dist/voice/agent_session.cjs.map +1 -0
  690. package/dist/voice/agent_session.d.cts +165 -0
  691. package/dist/voice/agent_session.d.ts +165 -0
  692. package/dist/voice/agent_session.d.ts.map +1 -0
  693. package/dist/voice/agent_session.js +582 -0
  694. package/dist/voice/agent_session.js.map +1 -0
  695. package/dist/voice/audio_recognition.cjs +668 -0
  696. package/dist/voice/audio_recognition.cjs.map +1 -0
  697. package/dist/voice/audio_recognition.d.cts +127 -0
  698. package/dist/voice/audio_recognition.d.ts +127 -0
  699. package/dist/voice/audio_recognition.d.ts.map +1 -0
  700. package/dist/voice/audio_recognition.js +647 -0
  701. package/dist/voice/audio_recognition.js.map +1 -0
  702. package/dist/voice/avatar/datastream_io.cjs +204 -0
  703. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  704. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  705. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  706. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  707. package/dist/voice/avatar/datastream_io.js +188 -0
  708. package/dist/voice/avatar/datastream_io.js.map +1 -0
  709. package/dist/voice/avatar/index.cjs +23 -0
  710. package/dist/voice/avatar/index.cjs.map +1 -0
  711. package/dist/voice/avatar/index.d.cts +2 -0
  712. package/dist/voice/avatar/index.d.ts +2 -0
  713. package/dist/voice/avatar/index.d.ts.map +1 -0
  714. package/dist/voice/avatar/index.js +2 -0
  715. package/dist/voice/avatar/index.js.map +1 -0
  716. package/dist/voice/background_audio.cjs +366 -0
  717. package/dist/voice/background_audio.cjs.map +1 -0
  718. package/dist/voice/background_audio.d.cts +121 -0
  719. package/dist/voice/background_audio.d.ts +121 -0
  720. package/dist/voice/background_audio.d.ts.map +1 -0
  721. package/dist/voice/background_audio.js +342 -0
  722. package/dist/voice/background_audio.js.map +1 -0
  723. package/dist/voice/events.cjs +147 -0
  724. package/dist/voice/events.cjs.map +1 -0
  725. package/dist/voice/events.d.cts +127 -0
  726. package/dist/voice/events.d.ts +127 -0
  727. package/dist/voice/events.d.ts.map +1 -0
  728. package/dist/voice/events.js +112 -0
  729. package/dist/voice/events.js.map +1 -0
  730. package/dist/voice/generation.cjs +747 -0
  731. package/dist/voice/generation.cjs.map +1 -0
  732. package/dist/voice/generation.d.cts +116 -0
  733. package/dist/voice/generation.d.ts +116 -0
  734. package/dist/voice/generation.d.ts.map +1 -0
  735. package/dist/voice/generation.js +719 -0
  736. package/dist/voice/generation.js.map +1 -0
  737. package/dist/voice/generation_tools.test.cjs +236 -0
  738. package/dist/voice/generation_tools.test.cjs.map +1 -0
  739. package/dist/voice/generation_tools.test.js +235 -0
  740. package/dist/voice/generation_tools.test.js.map +1 -0
  741. package/dist/voice/index.cjs +49 -0
  742. package/dist/voice/index.cjs.map +1 -0
  743. package/dist/voice/index.d.cts +10 -0
  744. package/dist/voice/index.d.ts +10 -0
  745. package/dist/voice/index.d.ts.map +1 -0
  746. package/dist/voice/index.js +16 -0
  747. package/dist/voice/index.js.map +1 -0
  748. package/dist/voice/interruption_detection.test.cjs +114 -0
  749. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  750. package/dist/voice/interruption_detection.test.js +113 -0
  751. package/dist/voice/interruption_detection.test.js.map +1 -0
  752. package/dist/voice/io.cjs +270 -0
  753. package/dist/voice/io.cjs.map +1 -0
  754. package/dist/voice/io.d.cts +126 -0
  755. package/dist/voice/io.d.ts +126 -0
  756. package/dist/voice/io.d.ts.map +1 -0
  757. package/dist/voice/io.js +242 -0
  758. package/dist/voice/io.js.map +1 -0
  759. package/dist/voice/recorder_io/index.cjs +23 -0
  760. package/dist/voice/recorder_io/index.cjs.map +1 -0
  761. package/dist/voice/recorder_io/index.d.cts +2 -0
  762. package/dist/voice/recorder_io/index.d.ts +2 -0
  763. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  764. package/dist/voice/recorder_io/index.js +2 -0
  765. package/dist/voice/recorder_io/index.js.map +1 -0
  766. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  767. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  768. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  769. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  770. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  771. package/dist/voice/recorder_io/recorder_io.js +508 -0
  772. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  773. package/dist/voice/report.cjs +75 -0
  774. package/dist/voice/report.cjs.map +1 -0
  775. package/dist/voice/report.d.cts +42 -0
  776. package/dist/voice/report.d.ts +42 -0
  777. package/dist/voice/report.d.ts.map +1 -0
  778. package/dist/voice/report.js +50 -0
  779. package/dist/voice/report.js.map +1 -0
  780. package/dist/voice/room_io/_input.cjs +133 -0
  781. package/dist/voice/room_io/_input.cjs.map +1 -0
  782. package/dist/voice/room_io/_input.d.cts +24 -0
  783. package/dist/voice/room_io/_input.d.ts +24 -0
  784. package/dist/voice/room_io/_input.d.ts.map +1 -0
  785. package/dist/voice/room_io/_input.js +114 -0
  786. package/dist/voice/room_io/_input.js.map +1 -0
  787. package/dist/voice/room_io/_output.cjs +359 -0
  788. package/dist/voice/room_io/_output.cjs.map +1 -0
  789. package/dist/voice/room_io/_output.d.cts +77 -0
  790. package/dist/voice/room_io/_output.d.ts +77 -0
  791. package/dist/voice/room_io/_output.d.ts.map +1 -0
  792. package/dist/voice/room_io/_output.js +343 -0
  793. package/dist/voice/room_io/_output.js.map +1 -0
  794. package/dist/voice/room_io/index.cjs +25 -0
  795. package/dist/voice/room_io/index.cjs.map +1 -0
  796. package/dist/voice/room_io/index.d.cts +3 -0
  797. package/dist/voice/room_io/index.d.ts +3 -0
  798. package/dist/voice/room_io/index.d.ts.map +1 -0
  799. package/dist/voice/room_io/index.js +3 -0
  800. package/dist/voice/room_io/index.js.map +1 -0
  801. package/dist/voice/room_io/room_io.cjs +373 -0
  802. package/dist/voice/room_io/room_io.cjs.map +1 -0
  803. package/dist/voice/room_io/room_io.d.cts +94 -0
  804. package/dist/voice/room_io/room_io.d.ts +94 -0
  805. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  806. package/dist/voice/room_io/room_io.js +364 -0
  807. package/dist/voice/room_io/room_io.js.map +1 -0
  808. package/dist/voice/run_context.cjs +51 -0
  809. package/dist/voice/run_context.cjs.map +1 -0
  810. package/dist/voice/run_context.d.cts +22 -0
  811. package/dist/voice/run_context.d.ts +22 -0
  812. package/dist/voice/run_context.d.ts.map +1 -0
  813. package/dist/voice/run_context.js +27 -0
  814. package/dist/voice/run_context.js.map +1 -0
  815. package/dist/voice/speech_handle.cjs +228 -0
  816. package/dist/voice/speech_handle.cjs.map +1 -0
  817. package/dist/voice/speech_handle.d.cts +97 -0
  818. package/dist/voice/speech_handle.d.ts +97 -0
  819. package/dist/voice/speech_handle.d.ts.map +1 -0
  820. package/dist/voice/speech_handle.js +204 -0
  821. package/dist/voice/speech_handle.js.map +1 -0
  822. package/dist/voice/transcription/_utils.cjs +45 -0
  823. package/dist/voice/transcription/_utils.cjs.map +1 -0
  824. package/dist/voice/transcription/_utils.d.cts +3 -0
  825. package/dist/voice/transcription/_utils.d.ts +3 -0
  826. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  827. package/dist/voice/transcription/_utils.js +21 -0
  828. package/dist/voice/transcription/_utils.js.map +1 -0
  829. package/dist/voice/transcription/index.cjs +23 -0
  830. package/dist/voice/transcription/index.cjs.map +1 -0
  831. package/dist/voice/transcription/index.d.cts +2 -0
  832. package/dist/voice/transcription/index.d.ts +2 -0
  833. package/dist/voice/transcription/index.d.ts.map +1 -0
  834. package/dist/voice/transcription/index.js +2 -0
  835. package/dist/voice/transcription/index.js.map +1 -0
  836. package/dist/voice/transcription/synchronizer.cjs +379 -0
  837. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  838. package/dist/voice/transcription/synchronizer.d.cts +87 -0
  839. package/dist/voice/transcription/synchronizer.d.ts +87 -0
  840. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  841. package/dist/voice/transcription/synchronizer.js +354 -0
  842. package/dist/voice/transcription/synchronizer.js.map +1 -0
  843. package/dist/worker.cjs +680 -0
  844. package/dist/worker.cjs.map +1 -0
  845. package/dist/worker.d.cts +119 -0
  846. package/dist/worker.d.ts +119 -0
  847. package/dist/worker.d.ts.map +1 -0
  848. package/dist/worker.js +645 -0
  849. package/dist/worker.js.map +1 -0
  850. package/package.json +86 -0
  851. package/resources/NOTICE +2 -0
  852. package/resources/keyboard-typing.ogg +0 -0
  853. package/resources/keyboard-typing2.ogg +0 -0
  854. package/resources/office-ambience.ogg +0 -0
  855. package/src/_exceptions.ts +137 -0
  856. package/src/audio.ts +205 -0
  857. package/src/cli.ts +224 -0
  858. package/src/connection_pool.test.ts +346 -0
  859. package/src/connection_pool.ts +307 -0
  860. package/src/constants.ts +9 -0
  861. package/src/generator.ts +38 -0
  862. package/src/http_server.ts +64 -0
  863. package/src/index.ts +41 -0
  864. package/src/inference/api_protos.ts +82 -0
  865. package/src/inference/index.ts +32 -0
  866. package/src/inference/interruption/AdaptiveInterruptionDetector.ts +166 -0
  867. package/src/inference/interruption/InterruptionStream.ts +397 -0
  868. package/src/inference/interruption/defaults.ts +33 -0
  869. package/src/inference/interruption/errors.ts +0 -0
  870. package/src/inference/interruption/http_transport.ts +61 -0
  871. package/src/inference/interruption/index.ts +4 -0
  872. package/src/inference/interruption/interruption.ts +88 -0
  873. package/src/inference/llm.ts +532 -0
  874. package/src/inference/stt.ts +524 -0
  875. package/src/inference/tts.ts +574 -0
  876. package/src/inference/utils.test.ts +31 -0
  877. package/src/inference/utils.ts +81 -0
  878. package/src/inference_runner.ts +19 -0
  879. package/src/ipc/index.ts +5 -0
  880. package/src/ipc/inference_executor.ts +7 -0
  881. package/src/ipc/inference_proc_executor.ts +101 -0
  882. package/src/ipc/inference_proc_lazy_main.ts +115 -0
  883. package/src/ipc/job_executor.ts +23 -0
  884. package/src/ipc/job_proc_executor.ts +122 -0
  885. package/src/ipc/job_proc_lazy_main.ts +247 -0
  886. package/src/ipc/message.ts +52 -0
  887. package/src/ipc/proc_pool.ts +164 -0
  888. package/src/ipc/supervised_proc.test.ts +153 -0
  889. package/src/ipc/supervised_proc.ts +242 -0
  890. package/src/job.ts +461 -0
  891. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  892. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  893. package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
  894. package/src/llm/chat_context.test.ts +1057 -0
  895. package/src/llm/chat_context.ts +759 -0
  896. package/src/llm/fallback_adapter.test.ts +238 -0
  897. package/src/llm/fallback_adapter.ts +391 -0
  898. package/src/llm/index.ts +74 -0
  899. package/src/llm/llm.ts +303 -0
  900. package/src/llm/provider_format/google.test.ts +843 -0
  901. package/src/llm/provider_format/google.ts +134 -0
  902. package/src/llm/provider_format/index.ts +23 -0
  903. package/src/llm/provider_format/openai.test.ts +675 -0
  904. package/src/llm/provider_format/openai.ts +146 -0
  905. package/src/llm/provider_format/utils.ts +187 -0
  906. package/src/llm/realtime.ts +163 -0
  907. package/src/llm/remote_chat_context.test.ts +290 -0
  908. package/src/llm/remote_chat_context.ts +114 -0
  909. package/src/llm/tool_context.test.ts +407 -0
  910. package/src/llm/tool_context.ts +343 -0
  911. package/src/llm/tool_context.type.test.ts +115 -0
  912. package/src/llm/utils.test.ts +670 -0
  913. package/src/llm/utils.ts +336 -0
  914. package/src/llm/zod-utils.test.ts +577 -0
  915. package/src/llm/zod-utils.ts +153 -0
  916. package/src/log.ts +83 -0
  917. package/src/metrics/base.ts +168 -0
  918. package/src/metrics/index.ts +15 -0
  919. package/src/metrics/usage_collector.ts +46 -0
  920. package/src/metrics/utils.ts +64 -0
  921. package/src/plugin.ts +46 -0
  922. package/src/stream/deferred_stream.test.ts +755 -0
  923. package/src/stream/deferred_stream.ts +127 -0
  924. package/src/stream/identity_transform.test.ts +179 -0
  925. package/src/stream/identity_transform.ts +18 -0
  926. package/src/stream/index.ts +7 -0
  927. package/src/stream/merge_readable_streams.ts +40 -0
  928. package/src/stream/stream_channel.test.ts +166 -0
  929. package/src/stream/stream_channel.ts +44 -0
  930. package/src/stt/index.ts +15 -0
  931. package/src/stt/stream_adapter.ts +107 -0
  932. package/src/stt/stt.ts +374 -0
  933. package/src/telemetry/index.ts +28 -0
  934. package/src/telemetry/logging.ts +55 -0
  935. package/src/telemetry/otel_http_exporter.ts +195 -0
  936. package/src/telemetry/pino_otel_transport.ts +265 -0
  937. package/src/telemetry/trace_types.ts +95 -0
  938. package/src/telemetry/traces.ts +612 -0
  939. package/src/telemetry/utils.ts +61 -0
  940. package/src/tokenize/basic/basic.ts +83 -0
  941. package/src/tokenize/basic/hyphenator.ts +434 -0
  942. package/src/tokenize/basic/index.ts +11 -0
  943. package/src/tokenize/basic/paragraph.ts +43 -0
  944. package/src/tokenize/basic/sentence.ts +89 -0
  945. package/src/tokenize/basic/word.ts +27 -0
  946. package/src/tokenize/index.ts +16 -0
  947. package/src/tokenize/token_stream.ts +180 -0
  948. package/src/tokenize/tokenizer.test.ts +255 -0
  949. package/src/tokenize/tokenizer.ts +152 -0
  950. package/src/transcription.ts +307 -0
  951. package/src/tts/index.ts +12 -0
  952. package/src/tts/stream_adapter.ts +110 -0
  953. package/src/tts/tts.ts +598 -0
  954. package/src/types.ts +66 -0
  955. package/src/utils/ws_transport.test.ts +282 -0
  956. package/src/utils/ws_transport.ts +22 -0
  957. package/src/utils.test.ts +651 -0
  958. package/src/utils.ts +871 -0
  959. package/src/vad.ts +262 -0
  960. package/src/version.ts +5 -0
  961. package/src/voice/agent.test.ts +80 -0
  962. package/src/voice/agent.ts +418 -0
  963. package/src/voice/agent_activity.ts +2375 -0
  964. package/src/voice/agent_session.ts +866 -0
  965. package/src/voice/audio_recognition.ts +877 -0
  966. package/src/voice/avatar/datastream_io.ts +247 -0
  967. package/src/voice/avatar/index.ts +4 -0
  968. package/src/voice/background_audio.ts +491 -0
  969. package/src/voice/events.ts +261 -0
  970. package/src/voice/generation.ts +946 -0
  971. package/src/voice/generation_tools.test.ts +268 -0
  972. package/src/voice/index.ts +12 -0
  973. package/src/voice/interruption_detection.test.ts +151 -0
  974. package/src/voice/io.ts +347 -0
  975. package/src/voice/recorder_io/index.ts +4 -0
  976. package/src/voice/recorder_io/recorder_io.ts +690 -0
  977. package/src/voice/report.ts +100 -0
  978. package/src/voice/room_io/_input.ts +162 -0
  979. package/src/voice/room_io/_output.ts +439 -0
  980. package/src/voice/room_io/index.ts +5 -0
  981. package/src/voice/room_io/room_io.ts +518 -0
  982. package/src/voice/run_context.ts +34 -0
  983. package/src/voice/speech_handle.ts +250 -0
  984. package/src/voice/transcription/_utils.ts +25 -0
  985. package/src/voice/transcription/index.ts +4 -0
  986. package/src/voice/transcription/synchronizer.ts +477 -0
  987. package/src/worker.ts +798 -0
@@ -0,0 +1,877 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { AudioFrame } from '@livekit/rtc-node';
5
+ import type { Context, Span } from '@opentelemetry/api';
6
+ import type { WritableStreamDefaultWriter } from 'node:stream/web';
7
+ import { ReadableStream } from 'node:stream/web';
8
+ import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
9
+ import {
10
+ InterruptionStreamBase,
11
+ InterruptionStreamSentinel,
12
+ } from '../inference/interruption/InterruptionStream.js';
13
+ import type { InterruptionEvent } from '../inference/interruption/interruption.js';
14
+ import { type ChatContext } from '../llm/chat_context.js';
15
+ import { log } from '../log.js';
16
+ import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
17
+ import { IdentityTransform } from '../stream/identity_transform.js';
18
+ import { mergeReadableStreams } from '../stream/merge_readable_streams.js';
19
+ import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
20
+ import { traceTypes, tracer } from '../telemetry/index.js';
21
+ import { Task, delay } from '../utils.js';
22
+ import { type VAD, type VADEvent, VADEventType } from '../vad.js';
23
+ import type { TurnDetectionMode } from './agent_session.js';
24
+ import type { STTNode } from './io.js';
25
+
26
+ export interface EndOfTurnInfo {
27
+ newTranscript: string;
28
+ transcriptConfidence: number;
29
+ transcriptionDelay: number;
30
+ endOfUtteranceDelay: number;
31
+ startedSpeakingAt: number | undefined;
32
+ stoppedSpeakingAt: number | undefined;
33
+ }
34
+
35
+ export interface PreemptiveGenerationInfo {
36
+ newTranscript: string;
37
+ transcriptConfidence: number;
38
+ }
39
+
40
+ export interface RecognitionHooks {
41
+ onStartOfSpeech: (ev: VADEvent) => void;
42
+ onVADInferenceDone: (ev: VADEvent) => void;
43
+ onEndOfSpeech: (ev: VADEvent) => void;
44
+ onInterimTranscript: (ev: SpeechEvent) => void;
45
+ onFinalTranscript: (ev: SpeechEvent) => void;
46
+ onEndOfTurn: (info: EndOfTurnInfo) => Promise<boolean>;
47
+ onPreemptiveGeneration: (info: PreemptiveGenerationInfo) => void;
48
+ onInterruption: (ev: InterruptionEvent) => void;
49
+
50
+ retrieveChatCtx: () => ChatContext;
51
+ }
52
+
53
+ export interface _TurnDetector {
54
+ unlikelyThreshold: (language?: string) => Promise<number | undefined>;
55
+ supportsLanguage: (language?: string) => Promise<boolean>;
56
+ predictEndOfTurn(chatCtx: ChatContext): Promise<number>;
57
+ }
58
+
59
+ export interface AudioRecognitionOptions {
60
+ recognitionHooks: RecognitionHooks;
61
+ stt?: STTNode;
62
+ vad?: VAD;
63
+ interruptionDetector?: AdaptiveInterruptionDetector;
64
+ turnDetector?: _TurnDetector;
65
+ turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
66
+ minEndpointingDelay: number;
67
+ maxEndpointingDelay: number;
68
+ rootSpanContext?: Context;
69
+ }
70
+
71
+ export class AudioRecognition {
72
+ private hooks: RecognitionHooks;
73
+ private stt?: STTNode;
74
+ private vad?: VAD;
75
+ private turnDetector?: _TurnDetector;
76
+ private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
77
+ private minEndpointingDelay: number;
78
+ private maxEndpointingDelay: number;
79
+ private lastLanguage?: string;
80
+ private rootSpanContext?: Context;
81
+
82
+ private deferredInputStream: DeferredReadableStream<AudioFrame>;
83
+ private logger = log();
84
+ private lastFinalTranscriptTime = 0;
85
+ private audioTranscript = '';
86
+ private audioInterimTranscript = '';
87
+ private audioPreflightTranscript = '';
88
+ private finalTranscriptConfidence: number[] = [];
89
+ private lastSpeakingTime: number | undefined;
90
+ private speechStartTime: number | undefined;
91
+ private userTurnCommitted = false;
92
+ private speaking = false;
93
+ private sampleRate?: number;
94
+
95
+ private userTurnSpan?: Span;
96
+
97
+ private vadInputStream: ReadableStream<AudioFrame>;
98
+ private sttInputStream: ReadableStream<AudioFrame>;
99
+ private interruptionInputStream: ReadableStream<AudioFrame>;
100
+ private silenceAudioTransform = new IdentityTransform<AudioFrame>();
101
+ private silenceAudioWriter: WritableStreamDefaultWriter<AudioFrame>;
102
+
103
+ // all cancellable tasks
104
+ private bounceEOUTask?: Task<void>;
105
+ private commitUserTurnTask?: Task<void>;
106
+ private vadTask?: Task<void>;
107
+ private sttTask?: Task<void>;
108
+ private interruptionTask?: Task<void>;
109
+
110
+ // interruption detection
111
+ private interruptionDetector?: AdaptiveInterruptionDetector;
112
+ private interruptionStream?: InterruptionStreamBase;
113
+ private interruptionEnabled = false;
114
+ private agentSpeaking = false;
115
+
116
+ constructor(opts: AudioRecognitionOptions) {
117
+ this.hooks = opts.recognitionHooks;
118
+ this.stt = opts.stt;
119
+ this.vad = opts.vad;
120
+ this.interruptionDetector = opts.interruptionDetector;
121
+ this.turnDetector = opts.turnDetector;
122
+ this.turnDetectionMode = opts.turnDetectionMode;
123
+ this.minEndpointingDelay = opts.minEndpointingDelay;
124
+ this.maxEndpointingDelay = opts.maxEndpointingDelay;
125
+ this.lastLanguage = undefined;
126
+ this.rootSpanContext = opts.rootSpanContext;
127
+
128
+ // Interruption detection is only enabled if both detector and VAD are provided
129
+ this.interruptionEnabled = this.interruptionDetector !== undefined && this.vad !== undefined;
130
+
131
+ this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
132
+ const [vadInputStream, rest] = this.deferredInputStream.stream.tee();
133
+ const [sttInputStream, interruptionInputStream] = rest.tee();
134
+ this.vadInputStream = vadInputStream;
135
+ this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);
136
+ this.interruptionInputStream = interruptionInputStream;
137
+ this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();
138
+ }
139
+
140
+ /**
141
+ * Current transcript of the user's speech, including interim transcript if available.
142
+ */
143
+ get currentTranscript(): string {
144
+ if (this.audioInterimTranscript) {
145
+ return `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();
146
+ }
147
+ return this.audioTranscript;
148
+ }
149
+
150
+ async start() {
151
+ this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));
152
+ this.vadTask.result.catch((err) => {
153
+ this.logger.error(`Error running VAD task: ${err}`);
154
+ });
155
+
156
+ this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));
157
+ this.sttTask.result.catch((err) => {
158
+ this.logger.error(`Error running STT task: ${err}`);
159
+ });
160
+
161
+ if (this.interruptionEnabled && this.interruptionDetector) {
162
+ this.interruptionTask = Task.from(({ signal }) =>
163
+ this.createInterruptionTask(this.interruptionDetector!, signal),
164
+ );
165
+ this.interruptionTask.result.catch((err) => {
166
+ this.logger.error(`Error running interruption task: ${err}`);
167
+ });
168
+ }
169
+ }
170
+
171
+ private async onSTTEvent(ev: SpeechEvent) {
172
+ if (
173
+ this.turnDetectionMode === 'manual' &&
174
+ this.userTurnCommitted &&
175
+ (this.bounceEOUTask === undefined ||
176
+ this.bounceEOUTask.done ||
177
+ ev.type == SpeechEventType.INTERIM_TRANSCRIPT)
178
+ ) {
179
+ // ignore stt event if user turn already committed and EOU task is done
180
+ // or it's an interim transcript
181
+ this.logger.debug(
182
+ {
183
+ userTurnCommitted: this.userTurnCommitted,
184
+ eouTaskDone: this.bounceEOUTask?.done,
185
+ evType: ev.type,
186
+ turnDetectionMode: this.turnDetectionMode,
187
+ },
188
+ 'ignoring stt event',
189
+ );
190
+ return;
191
+ }
192
+
193
+ switch (ev.type) {
194
+ case SpeechEventType.FINAL_TRANSCRIPT:
195
+ this.hooks.onFinalTranscript(ev);
196
+ const transcript = ev.alternatives?.[0]?.text;
197
+ const confidence = ev.alternatives?.[0]?.confidence ?? 0;
198
+ this.lastLanguage = ev.alternatives?.[0]?.language;
199
+
200
+ if (!transcript) {
201
+ // stt final transcript received but no transcript
202
+ return;
203
+ }
204
+
205
+ this.logger.debug(
206
+ {
207
+ user_transcript: transcript,
208
+ language: this.lastLanguage,
209
+ },
210
+ 'received user transcript',
211
+ );
212
+
213
+ this.lastFinalTranscriptTime = Date.now();
214
+ this.audioTranscript += ` ${transcript}`;
215
+ this.audioTranscript = this.audioTranscript.trimStart();
216
+ this.finalTranscriptConfidence.push(confidence);
217
+ const transcriptChanged = this.audioTranscript !== this.audioPreflightTranscript;
218
+ this.audioInterimTranscript = '';
219
+ this.audioPreflightTranscript = '';
220
+
221
+ if (!this.vad || this.lastSpeakingTime === undefined) {
222
+ // vad disabled, use stt timestamp
223
+ // TODO: this would screw up transcription latency metrics
224
+ // but we'll live with it for now.
225
+ // the correct way is to ensure STT fires SpeechEventType.END_OF_SPEECH
226
+ // and using that timestamp for lastSpeakingTime
227
+ this.lastSpeakingTime = Date.now();
228
+ }
229
+
230
+ if (this.vadBaseTurnDetection || this.userTurnCommitted) {
231
+ if (transcriptChanged) {
232
+ this.logger.debug(
233
+ { transcript: this.audioTranscript },
234
+ 'triggering preemptive generation (FINAL_TRANSCRIPT)',
235
+ );
236
+ this.hooks.onPreemptiveGeneration({
237
+ newTranscript: this.audioTranscript,
238
+ transcriptConfidence:
239
+ this.finalTranscriptConfidence.length > 0
240
+ ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /
241
+ this.finalTranscriptConfidence.length
242
+ : 0,
243
+ });
244
+ }
245
+
246
+ if (!this.speaking) {
247
+ const chatCtx = this.hooks.retrieveChatCtx();
248
+ this.logger.debug('running EOU detection on stt FINAL_TRANSCRIPT');
249
+ this.runEOUDetection(chatCtx);
250
+ }
251
+ }
252
+ break;
253
+ case SpeechEventType.PREFLIGHT_TRANSCRIPT:
254
+ this.hooks.onInterimTranscript(ev);
255
+ const preflightTranscript = ev.alternatives?.[0]?.text ?? '';
256
+ const preflightConfidence = ev.alternatives?.[0]?.confidence ?? 0;
257
+ const preflightLanguage = ev.alternatives?.[0]?.language;
258
+
259
+ const MIN_LANGUAGE_DETECTION_LENGTH = 5;
260
+ if (
261
+ !this.lastLanguage ||
262
+ (preflightLanguage && preflightTranscript.length > MIN_LANGUAGE_DETECTION_LENGTH)
263
+ ) {
264
+ this.lastLanguage = preflightLanguage;
265
+ }
266
+
267
+ if (!preflightTranscript) {
268
+ return;
269
+ }
270
+
271
+ this.logger.debug(
272
+ {
273
+ user_transcript: preflightTranscript,
274
+ language: this.lastLanguage,
275
+ },
276
+ 'received user preflight transcript',
277
+ );
278
+
279
+ // still need to increment it as it's used for turn detection,
280
+ this.lastFinalTranscriptTime = Date.now();
281
+ // preflight transcript includes all pre-committed transcripts (including final transcript from the previous STT run)
282
+ this.audioPreflightTranscript =
283
+ `${this.audioTranscript} ${preflightTranscript}`.trimStart();
284
+ this.audioInterimTranscript = preflightTranscript;
285
+
286
+ if (!this.vad || this.lastSpeakingTime === undefined) {
287
+ // vad disabled, use stt timestamp
288
+ this.lastSpeakingTime = Date.now();
289
+ }
290
+
291
+ if (this.turnDetectionMode !== 'manual' || this.userTurnCommitted) {
292
+ const confidenceVals = [...this.finalTranscriptConfidence, preflightConfidence];
293
+ this.logger.debug(
294
+ {
295
+ transcript:
296
+ this.audioPreflightTranscript.length > 100
297
+ ? this.audioPreflightTranscript.slice(0, 100) + '...'
298
+ : this.audioPreflightTranscript,
299
+ },
300
+ 'triggering preemptive generation (PREFLIGHT_TRANSCRIPT)',
301
+ );
302
+ this.hooks.onPreemptiveGeneration({
303
+ newTranscript: this.audioPreflightTranscript,
304
+ transcriptConfidence:
305
+ confidenceVals.length > 0
306
+ ? confidenceVals.reduce((a, b) => a + b, 0) / confidenceVals.length
307
+ : 0,
308
+ });
309
+ }
310
+ break;
311
+ case SpeechEventType.INTERIM_TRANSCRIPT:
312
+ this.logger.debug({ transcript: ev.alternatives?.[0]?.text }, 'interim transcript');
313
+ this.hooks.onInterimTranscript(ev);
314
+ this.audioInterimTranscript = ev.alternatives?.[0]?.text ?? '';
315
+ break;
316
+ case SpeechEventType.START_OF_SPEECH:
317
+ if (this.turnDetectionMode !== 'stt') break;
318
+ this.hooks.onStartOfSpeech({
319
+ type: VADEventType.START_OF_SPEECH,
320
+ samplesIndex: 0,
321
+ timestamp: Date.now(),
322
+ speechDuration: 0,
323
+ silenceDuration: 0,
324
+ frames: [],
325
+ probability: 0,
326
+ inferenceDuration: 0,
327
+ speaking: true,
328
+ rawAccumulatedSilence: 0,
329
+ rawAccumulatedSpeech: 0,
330
+ });
331
+ this.speaking = true;
332
+ this.lastSpeakingTime = Date.now();
333
+
334
+ this.bounceEOUTask?.cancel();
335
+ break;
336
+ case SpeechEventType.END_OF_SPEECH:
337
+ if (this.turnDetectionMode !== 'stt') break;
338
+ this.hooks.onEndOfSpeech({
339
+ type: VADEventType.END_OF_SPEECH,
340
+ samplesIndex: 0,
341
+ timestamp: Date.now(),
342
+ speechDuration: 0,
343
+ silenceDuration: 0,
344
+ frames: [],
345
+ probability: 0,
346
+ inferenceDuration: 0,
347
+ speaking: false,
348
+ rawAccumulatedSilence: 0,
349
+ rawAccumulatedSpeech: 0,
350
+ });
351
+ this.speaking = false;
352
+ this.userTurnCommitted = true;
353
+ this.lastSpeakingTime = Date.now();
354
+
355
+ if (!this.speaking) {
356
+ const chatCtx = this.hooks.retrieveChatCtx();
357
+ this.logger.debug('running EOU detection on stt END_OF_SPEECH');
358
+ this.runEOUDetection(chatCtx);
359
+ }
360
+ }
361
+ }
362
+
363
+ private runEOUDetection(chatCtx: ChatContext) {
364
+ this.logger.debug(
365
+ {
366
+ stt: this.stt,
367
+ audioTranscript: this.audioTranscript,
368
+ turnDetectionMode: this.turnDetectionMode,
369
+ },
370
+ 'running EOU detection',
371
+ );
372
+
373
+ if (this.stt && !this.audioTranscript && this.turnDetectionMode !== 'manual') {
374
+ // stt enabled but no transcript yet
375
+ this.logger.debug('skipping EOU detection');
376
+ return;
377
+ }
378
+
379
+ chatCtx = chatCtx.copy();
380
+ chatCtx.addMessage({ role: 'user', content: this.audioTranscript });
381
+
382
+ const turnDetector =
383
+ // disable EOU model if manual turn detection enabled
384
+ this.audioTranscript && this.turnDetectionMode !== 'manual' ? this.turnDetector : undefined;
385
+
386
+ const bounceEOUTask =
387
+ (
388
+ lastSpeakingTime: number | undefined,
389
+ lastFinalTranscriptTime: number,
390
+ speechStartTime: number | undefined,
391
+ ) =>
392
+ async (controller: AbortController) => {
393
+ let endpointingDelay = this.minEndpointingDelay;
394
+
395
+ if (turnDetector) {
396
+ await tracer.startActiveSpan(
397
+ async (span) => {
398
+ this.logger.debug('Running turn detector model');
399
+
400
+ let endOfTurnProbability = 0.0;
401
+ let unlikelyThreshold: number | undefined;
402
+
403
+ if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {
404
+ this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);
405
+ } else {
406
+ try {
407
+ endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);
408
+ unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);
409
+
410
+ this.logger.debug(
411
+ { endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },
412
+ 'end of turn probability',
413
+ );
414
+
415
+ if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {
416
+ endpointingDelay = this.maxEndpointingDelay;
417
+ }
418
+ } catch (error) {
419
+ this.logger.error(error, 'Error predicting end of turn');
420
+ }
421
+ }
422
+
423
+ span.setAttribute(
424
+ traceTypes.ATTR_CHAT_CTX,
425
+ JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false })),
426
+ );
427
+ span.setAttribute(traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);
428
+ span.setAttribute(traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);
429
+ span.setAttribute(traceTypes.ATTR_EOU_DELAY, endpointingDelay);
430
+ span.setAttribute(traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? '');
431
+ },
432
+ {
433
+ name: 'eou_detection',
434
+ context: this.rootSpanContext,
435
+ },
436
+ );
437
+ }
438
+
439
+ let extraSleep = endpointingDelay;
440
+ if (lastSpeakingTime !== undefined) {
441
+ extraSleep += lastSpeakingTime - Date.now();
442
+ }
443
+
444
+ if (extraSleep > 0) {
445
+ // add delay to see if there's a potential upcoming EOU task that cancels this one
446
+ await delay(Math.max(extraSleep, 0), { signal: controller.signal });
447
+ }
448
+
449
+ this.logger.debug({ transcript: this.audioTranscript }, 'end of user turn');
450
+
451
+ const confidenceAvg =
452
+ this.finalTranscriptConfidence.length > 0
453
+ ? this.finalTranscriptConfidence.reduce((a, b) => a + b, 0) /
454
+ this.finalTranscriptConfidence.length
455
+ : 0;
456
+
457
+ let startedSpeakingAt: number | undefined;
458
+ let stoppedSpeakingAt: number | undefined;
459
+ let transcriptionDelay: number | undefined;
460
+ let endOfUtteranceDelay: number | undefined;
461
+
462
+ // sometimes, we can't calculate the metrics because VAD was unreliable.
463
+ // in this case, we just ignore the calculation, it's better than providing likely wrong values
464
+ if (
465
+ lastFinalTranscriptTime !== 0 &&
466
+ lastSpeakingTime !== undefined &&
467
+ speechStartTime !== undefined
468
+ ) {
469
+ startedSpeakingAt = speechStartTime;
470
+ stoppedSpeakingAt = lastSpeakingTime;
471
+ transcriptionDelay = Math.max(lastFinalTranscriptTime - lastSpeakingTime, 0);
472
+ endOfUtteranceDelay = Date.now() - lastSpeakingTime;
473
+ }
474
+
475
+ const committed = await this.hooks.onEndOfTurn({
476
+ newTranscript: this.audioTranscript,
477
+ transcriptConfidence: confidenceAvg,
478
+ transcriptionDelay: transcriptionDelay ?? 0,
479
+ endOfUtteranceDelay: endOfUtteranceDelay ?? 0,
480
+ startedSpeakingAt,
481
+ stoppedSpeakingAt,
482
+ });
483
+
484
+ if (committed) {
485
+ this._endUserTurnSpan({
486
+ transcript: this.audioTranscript,
487
+ confidence: confidenceAvg,
488
+ transcriptionDelay: transcriptionDelay ?? 0,
489
+ endOfUtteranceDelay: endOfUtteranceDelay ?? 0,
490
+ });
491
+
492
+ // clear the transcript if the user turn was committed
493
+ this.audioTranscript = '';
494
+ this.finalTranscriptConfidence = [];
495
+ this.lastSpeakingTime = undefined;
496
+ this.lastFinalTranscriptTime = 0;
497
+ this.speechStartTime = undefined;
498
+ }
499
+
500
+ this.userTurnCommitted = false;
501
+ };
502
+
503
+ // cancel any existing EOU task
504
+ this.bounceEOUTask?.cancel();
505
+ // copy the values before awaiting (the values can change)
506
+ this.bounceEOUTask = Task.from(
507
+ bounceEOUTask(this.lastSpeakingTime, this.lastFinalTranscriptTime, this.speechStartTime),
508
+ );
509
+
510
+ this.bounceEOUTask.result
511
+ .then(() => {
512
+ this.logger.debug('EOU detection task completed');
513
+ })
514
+ .catch((err: unknown) => {
515
+ if (err instanceof Error && err.message.includes('This operation was aborted')) {
516
+ // ignore aborted errors
517
+ return;
518
+ }
519
+ this.logger.error(err, 'Error in EOU detection task:');
520
+ });
521
+ }
522
+
523
+ private async createSttTask(stt: STTNode | undefined, signal: AbortSignal) {
524
+ if (!stt) return;
525
+
526
+ this.logger.debug('createSttTask: create stt stream from stt node');
527
+
528
+ const sttStream = await stt(this.sttInputStream, {});
529
+
530
+ if (signal.aborted || sttStream === null) return;
531
+
532
+ if (sttStream instanceof ReadableStream) {
533
+ const reader = sttStream.getReader();
534
+
535
+ signal.addEventListener('abort', async () => {
536
+ try {
537
+ reader.releaseLock();
538
+ await sttStream?.cancel();
539
+ } catch (e) {
540
+ this.logger.debug('createSttTask: error during abort handler:', e);
541
+ }
542
+ });
543
+
544
+ try {
545
+ while (true) {
546
+ if (signal.aborted) break;
547
+
548
+ const { done, value: ev } = await reader.read();
549
+ if (done) break;
550
+
551
+ if (typeof ev === 'string') {
552
+ throw new Error('STT node must yield SpeechEvent');
553
+ } else {
554
+ await this.onSTTEvent(ev);
555
+ }
556
+ }
557
+ } catch (e) {
558
+ if (isStreamReaderReleaseError(e)) {
559
+ return;
560
+ }
561
+ this.logger.error({ error: e }, 'createSttTask: error reading sttStream');
562
+ } finally {
563
+ reader.releaseLock();
564
+ try {
565
+ await sttStream.cancel();
566
+ } catch (e) {
567
+ this.logger.debug(
568
+ 'createSttTask: error cancelling sttStream (may already be cancelled):',
569
+ e,
570
+ );
571
+ }
572
+ }
573
+ }
574
+ }
575
+
576
+ private async createVadTask(vad: VAD | undefined, signal: AbortSignal) {
577
+ if (!vad) return;
578
+
579
+ const vadStream = vad.stream();
580
+ vadStream.updateInputStream(this.vadInputStream);
581
+
582
+ const abortHandler = () => {
583
+ vadStream.detachInputStream();
584
+ vadStream.close();
585
+ signal.removeEventListener('abort', abortHandler);
586
+ };
587
+ signal.addEventListener('abort', abortHandler);
588
+
589
+ try {
590
+ for await (const ev of vadStream) {
591
+ if (signal.aborted) break;
592
+
593
+ switch (ev.type) {
594
+ case VADEventType.START_OF_SPEECH:
595
+ this.logger.debug('VAD task: START_OF_SPEECH');
596
+ this.hooks.onStartOfSpeech(ev);
597
+ this.speaking = true;
598
+
599
+ if (!this.userTurnSpan) {
600
+ this.userTurnSpan = tracer.startSpan({
601
+ name: 'user_turn',
602
+ context: this.rootSpanContext,
603
+ });
604
+ }
605
+
606
+ // Capture sample rate from the first VAD event if not already set
607
+ if (ev.frames.length > 0 && ev.frames[0]) {
608
+ this.sampleRate = ev.frames[0].sampleRate;
609
+ }
610
+
611
+ // If agent is speaking, user speech is overlap - trigger interruption detection
612
+ if (this.agentSpeaking && this.interruptionEnabled) {
613
+ this.onStartOfOverlapSpeech(ev.speechDuration, this.userTurnSpan);
614
+ }
615
+
616
+ this.bounceEOUTask?.cancel();
617
+ break;
618
+ case VADEventType.INFERENCE_DONE:
619
+ this.hooks.onVADInferenceDone(ev);
620
+ // for metrics, get the "earliest" signal of speech as possible
621
+ if (ev.rawAccumulatedSpeech > 0.0) {
622
+ this.lastSpeakingTime = Date.now();
623
+
624
+ if (this.speechStartTime === undefined) {
625
+ this.speechStartTime = Date.now();
626
+ }
627
+ }
628
+ break;
629
+ case VADEventType.END_OF_SPEECH:
630
+ this.logger.debug('VAD task: END_OF_SPEECH');
631
+ this.hooks.onEndOfSpeech(ev);
632
+
633
+ // when VAD fires END_OF_SPEECH, it already waited for the silence_duration
634
+ this.speaking = false;
635
+
636
+ // If we were in overlap speech (agent speaking + user speaking), end it
637
+ if (this.agentSpeaking && this.interruptionEnabled) {
638
+ this.onEndOfOverlapSpeech();
639
+ }
640
+
641
+ if (
642
+ this.vadBaseTurnDetection ||
643
+ (this.turnDetectionMode === 'stt' && this.userTurnCommitted)
644
+ ) {
645
+ const chatCtx = this.hooks.retrieveChatCtx();
646
+ this.runEOUDetection(chatCtx);
647
+ }
648
+ break;
649
+ }
650
+ }
651
+ } catch (e) {
652
+ this.logger.error(e, 'Error in VAD task');
653
+ } finally {
654
+ this.logger.debug('VAD task closed');
655
+ }
656
+ }
657
+
658
+ private async createInterruptionTask(
659
+ interruptionDetector: AdaptiveInterruptionDetector,
660
+ signal: AbortSignal,
661
+ ) {
662
+ // Create the interruption stream from the detector
663
+ this.interruptionStream = interruptionDetector.createStream();
664
+
665
+ // Forward audio frames to the interruption stream
666
+ const reader = this.interruptionInputStream.getReader();
667
+
668
+ const forwardTask = (async () => {
669
+ try {
670
+ while (!signal.aborted) {
671
+ const { done, value: frame } = await reader.read();
672
+ if (done) break;
673
+ await this.interruptionStream?.pushFrame(frame);
674
+ }
675
+ } catch (e) {
676
+ if (!signal.aborted) {
677
+ this.logger.error(e, 'Error forwarding audio to interruption stream');
678
+ }
679
+ } finally {
680
+ reader.releaseLock();
681
+ }
682
+ })();
683
+
684
+ // Read interruption events from the stream
685
+ const eventStream = this.interruptionStream.stream;
686
+ const eventReader = eventStream.getReader();
687
+
688
+ const abortHandler = () => {
689
+ eventReader.releaseLock();
690
+ this.interruptionStream?.close();
691
+ signal.removeEventListener('abort', abortHandler);
692
+ };
693
+ signal.addEventListener('abort', abortHandler);
694
+
695
+ try {
696
+ while (!signal.aborted) {
697
+ const { done, value: ev } = await eventReader.read();
698
+ if (done) break;
699
+
700
+ this.logger.debug({ type: ev.type, probability: ev.probability }, 'Interruption event');
701
+ this.hooks.onInterruption(ev);
702
+ }
703
+ } catch (e) {
704
+ if (!signal.aborted) {
705
+ this.logger.error(e, 'Error in interruption task');
706
+ }
707
+ } finally {
708
+ this.logger.debug('Interruption task closed');
709
+ await forwardTask;
710
+ }
711
+ }
712
+
713
+ /**
714
+ * Called when the agent starts speaking.
715
+ * Enables interruption detection by sending the agent-speech-started sentinel.
716
+ */
717
+ onStartOfAgentSpeech(): void {
718
+ this.agentSpeaking = true;
719
+
720
+ if (!this.interruptionEnabled || !this.interruptionStream) {
721
+ return;
722
+ }
723
+
724
+ this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechStarted());
725
+ }
726
+
727
+ /**
728
+ * Called when the agent stops speaking.
729
+ * Disables interruption detection by sending the agent-speech-ended sentinel.
730
+ */
731
+ onEndOfAgentSpeech(): void {
732
+ if (!this.interruptionEnabled || !this.interruptionStream) {
733
+ this.agentSpeaking = false;
734
+ return;
735
+ }
736
+
737
+ this.interruptionStream.pushFrame(InterruptionStreamSentinel.speechEnded());
738
+
739
+ if (this.agentSpeaking) {
740
+ // No interruption was detected, end the overlap inference (idempotent)
741
+ this.onEndOfOverlapSpeech();
742
+ }
743
+
744
+ this.agentSpeaking = false;
745
+ }
746
+
747
+ /**
748
+ * Called when user starts speaking while agent is speaking (overlap speech).
749
+ * This triggers the interruption detection inference.
750
+ */
751
+ onStartOfOverlapSpeech(speechDuration: number, userSpeakingSpan?: Span): void {
752
+ if (!this.interruptionEnabled || !this.interruptionStream) {
753
+ return;
754
+ }
755
+
756
+ if (this.agentSpeaking && userSpeakingSpan) {
757
+ this.interruptionStream.pushFrame(
758
+ InterruptionStreamSentinel.overlapSpeechStarted(speechDuration, userSpeakingSpan),
759
+ );
760
+ }
761
+ }
762
+
763
+ /**
764
+ * Called when user stops speaking during overlap.
765
+ * This ends the interruption detection inference for this overlap period.
766
+ */
767
+ onEndOfOverlapSpeech(): void {
768
+ if (!this.interruptionEnabled || !this.interruptionStream) {
769
+ return;
770
+ }
771
+
772
+ this.interruptionStream.pushFrame(InterruptionStreamSentinel.overlapSpeechEnded());
773
+ }
774
+
775
+ setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
776
+ this.deferredInputStream.setSource(audioStream);
777
+ }
778
+
779
+ detachInputAudioStream() {
780
+ this.deferredInputStream.detachSource();
781
+ }
782
+
783
+ clearUserTurn() {
784
+ this.audioTranscript = '';
785
+ this.audioInterimTranscript = '';
786
+ this.audioPreflightTranscript = '';
787
+ this.finalTranscriptConfidence = [];
788
+ this.userTurnCommitted = false;
789
+
790
+ this.sttTask?.cancelAndWait().finally(() => {
791
+ this.sttTask = Task.from(({ signal }) => this.createSttTask(this.stt, signal));
792
+ this.sttTask.result.catch((err) => {
793
+ this.logger.error(`Error running STT task: ${err}`);
794
+ });
795
+ });
796
+ }
797
+
798
+ commitUserTurn(audioDetached: boolean) {
799
+ const commitUserTurnTask =
800
+ (delayDuration: number = 500) =>
801
+ async (controller: AbortController) => {
802
+ if (Date.now() - this.lastFinalTranscriptTime > delayDuration) {
803
+ // flush the stt by pushing silence
804
+ if (audioDetached && this.sampleRate !== undefined) {
805
+ const numSamples = Math.floor(this.sampleRate * 0.5);
806
+ const silence = new Int16Array(numSamples * 2);
807
+ const silenceFrame = new AudioFrame(silence, this.sampleRate, 1, numSamples);
808
+ this.silenceAudioWriter.write(silenceFrame);
809
+ }
810
+
811
+ // wait for the final transcript to be available
812
+ await delay(delayDuration, { signal: controller.signal });
813
+ }
814
+
815
+ if (this.audioInterimTranscript) {
816
+ // append interim transcript in case the final transcript is not ready
817
+ this.audioTranscript = `${this.audioTranscript} ${this.audioInterimTranscript}`.trim();
818
+ }
819
+ this.audioInterimTranscript = '';
820
+
821
+ const chatCtx = this.hooks.retrieveChatCtx();
822
+ this.logger.debug('running EOU detection on commitUserTurn');
823
+ this.runEOUDetection(chatCtx);
824
+ this.userTurnCommitted = true;
825
+ };
826
+
827
+ // cancel any existing commit user turn task
828
+ this.commitUserTurnTask?.cancel();
829
+ this.commitUserTurnTask = Task.from(commitUserTurnTask());
830
+
831
+ this.commitUserTurnTask.result
832
+ .then(() => {
833
+ this.logger.debug('User turn committed');
834
+ })
835
+ .catch((err: unknown) => {
836
+ this.logger.error(err, 'Error in user turn commit task:');
837
+ });
838
+ }
839
+
840
+ async close() {
841
+ this.detachInputAudioStream();
842
+ this.silenceAudioWriter.releaseLock();
843
+ await this.commitUserTurnTask?.cancelAndWait();
844
+ await this.sttTask?.cancelAndWait();
845
+ await this.vadTask?.cancelAndWait();
846
+ await this.bounceEOUTask?.cancelAndWait();
847
+ await this.interruptionTask?.cancelAndWait();
848
+ await this.interruptionStream?.close();
849
+ }
850
+
851
+ private _endUserTurnSpan({
852
+ transcript,
853
+ confidence,
854
+ transcriptionDelay,
855
+ endOfUtteranceDelay,
856
+ }: {
857
+ transcript: string;
858
+ confidence: number;
859
+ transcriptionDelay: number;
860
+ endOfUtteranceDelay: number;
861
+ }): void {
862
+ if (this.userTurnSpan) {
863
+ this.userTurnSpan.setAttributes({
864
+ [traceTypes.ATTR_USER_TRANSCRIPT]: transcript,
865
+ [traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,
866
+ [traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,
867
+ [traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay,
868
+ });
869
+ this.userTurnSpan.end();
870
+ this.userTurnSpan = undefined;
871
+ }
872
+ }
873
+
874
+ private get vadBaseTurnDetection() {
875
+ return ['vad', undefined].includes(this.turnDetectionMode);
876
+ }
877
+ }