@livekit/agents 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (942) hide show
  1. package/dist/audio.cjs +89 -3
  2. package/dist/audio.cjs.map +1 -1
  3. package/dist/audio.d.cts +36 -1
  4. package/dist/audio.d.ts +36 -1
  5. package/dist/audio.d.ts.map +1 -1
  6. package/dist/audio.js +76 -2
  7. package/dist/audio.js.map +1 -1
  8. package/dist/beta/index.cjs +29 -0
  9. package/dist/beta/index.cjs.map +1 -0
  10. package/dist/beta/index.d.cts +2 -0
  11. package/dist/beta/index.d.ts +2 -0
  12. package/dist/beta/index.d.ts.map +1 -0
  13. package/dist/beta/index.js +7 -0
  14. package/dist/beta/index.js.map +1 -0
  15. package/dist/beta/workflows/index.cjs +29 -0
  16. package/dist/beta/workflows/index.cjs.map +1 -0
  17. package/dist/beta/workflows/index.d.cts +2 -0
  18. package/dist/beta/workflows/index.d.ts +2 -0
  19. package/dist/beta/workflows/index.d.ts.map +1 -0
  20. package/dist/beta/workflows/index.js +7 -0
  21. package/dist/beta/workflows/index.js.map +1 -0
  22. package/dist/beta/workflows/task_group.cjs +162 -0
  23. package/dist/beta/workflows/task_group.cjs.map +1 -0
  24. package/dist/beta/workflows/task_group.d.cts +32 -0
  25. package/dist/beta/workflows/task_group.d.ts +32 -0
  26. package/dist/beta/workflows/task_group.d.ts.map +1 -0
  27. package/dist/beta/workflows/task_group.js +138 -0
  28. package/dist/beta/workflows/task_group.js.map +1 -0
  29. package/dist/cli.cjs +44 -46
  30. package/dist/cli.cjs.map +1 -1
  31. package/dist/cli.d.cts +3 -3
  32. package/dist/cli.d.ts +3 -3
  33. package/dist/cli.d.ts.map +1 -1
  34. package/dist/cli.js +45 -47
  35. package/dist/cli.js.map +1 -1
  36. package/dist/connection_pool.cjs +242 -0
  37. package/dist/connection_pool.cjs.map +1 -0
  38. package/dist/connection_pool.d.cts +123 -0
  39. package/dist/connection_pool.d.ts +123 -0
  40. package/dist/connection_pool.d.ts.map +1 -0
  41. package/dist/connection_pool.js +218 -0
  42. package/dist/connection_pool.js.map +1 -0
  43. package/dist/connection_pool.test.cjs +256 -0
  44. package/dist/connection_pool.test.cjs.map +1 -0
  45. package/dist/connection_pool.test.js +255 -0
  46. package/dist/connection_pool.test.js.map +1 -0
  47. package/dist/constants.cjs +30 -0
  48. package/dist/constants.cjs.map +1 -1
  49. package/dist/constants.d.cts +10 -0
  50. package/dist/constants.d.ts +10 -0
  51. package/dist/constants.d.ts.map +1 -1
  52. package/dist/constants.js +20 -0
  53. package/dist/constants.js.map +1 -1
  54. package/dist/cpu.cjs +189 -0
  55. package/dist/cpu.cjs.map +1 -0
  56. package/dist/cpu.d.cts +24 -0
  57. package/dist/cpu.d.ts +24 -0
  58. package/dist/cpu.d.ts.map +1 -0
  59. package/dist/cpu.js +152 -0
  60. package/dist/cpu.js.map +1 -0
  61. package/dist/cpu.test.cjs +227 -0
  62. package/dist/cpu.test.cjs.map +1 -0
  63. package/dist/cpu.test.js +204 -0
  64. package/dist/cpu.test.js.map +1 -0
  65. package/dist/http_server.cjs +9 -6
  66. package/dist/http_server.cjs.map +1 -1
  67. package/dist/http_server.d.cts +5 -1
  68. package/dist/http_server.d.ts +5 -1
  69. package/dist/http_server.d.ts.map +1 -1
  70. package/dist/http_server.js +9 -6
  71. package/dist/http_server.js.map +1 -1
  72. package/dist/index.cjs +24 -9
  73. package/dist/index.cjs.map +1 -1
  74. package/dist/index.d.cts +15 -11
  75. package/dist/index.d.ts +15 -11
  76. package/dist/index.d.ts.map +1 -1
  77. package/dist/index.js +18 -9
  78. package/dist/index.js.map +1 -1
  79. package/dist/inference/api_protos.cjs +70 -2
  80. package/dist/inference/api_protos.cjs.map +1 -1
  81. package/dist/inference/api_protos.d.cts +373 -32
  82. package/dist/inference/api_protos.d.ts +373 -32
  83. package/dist/inference/api_protos.d.ts.map +1 -1
  84. package/dist/inference/api_protos.js +62 -2
  85. package/dist/inference/api_protos.js.map +1 -1
  86. package/dist/inference/index.cjs +8 -0
  87. package/dist/inference/index.cjs.map +1 -1
  88. package/dist/inference/index.d.cts +3 -4
  89. package/dist/inference/index.d.ts +3 -4
  90. package/dist/inference/index.d.ts.map +1 -1
  91. package/dist/inference/index.js +18 -3
  92. package/dist/inference/index.js.map +1 -1
  93. package/dist/inference/interruption/defaults.cjs +81 -0
  94. package/dist/inference/interruption/defaults.cjs.map +1 -0
  95. package/dist/inference/interruption/defaults.d.cts +19 -0
  96. package/dist/inference/interruption/defaults.d.ts +19 -0
  97. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  98. package/dist/inference/interruption/defaults.js +46 -0
  99. package/dist/inference/interruption/defaults.js.map +1 -0
  100. package/dist/inference/interruption/errors.cjs +44 -0
  101. package/dist/inference/interruption/errors.cjs.map +1 -0
  102. package/dist/inference/interruption/errors.d.cts +12 -0
  103. package/dist/inference/interruption/errors.d.ts +12 -0
  104. package/dist/inference/interruption/errors.d.ts.map +1 -0
  105. package/dist/inference/interruption/errors.js +20 -0
  106. package/dist/inference/interruption/errors.js.map +1 -0
  107. package/dist/inference/interruption/http_transport.cjs +163 -0
  108. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  109. package/dist/inference/interruption/http_transport.d.cts +63 -0
  110. package/dist/inference/interruption/http_transport.d.ts +63 -0
  111. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  112. package/dist/inference/interruption/http_transport.js +137 -0
  113. package/dist/inference/interruption/http_transport.js.map +1 -0
  114. package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
  115. package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
  116. package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
  117. package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
  118. package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
  119. package/dist/inference/interruption/interruption_cache_entry.js +34 -0
  120. package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
  121. package/dist/inference/interruption/interruption_detector.cjs +198 -0
  122. package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
  123. package/dist/inference/interruption/interruption_detector.d.cts +59 -0
  124. package/dist/inference/interruption/interruption_detector.d.ts +59 -0
  125. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
  126. package/dist/inference/interruption/interruption_detector.js +164 -0
  127. package/dist/inference/interruption/interruption_detector.js.map +1 -0
  128. package/dist/inference/interruption/interruption_stream.cjs +368 -0
  129. package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
  130. package/dist/inference/interruption/interruption_stream.d.cts +46 -0
  131. package/dist/inference/interruption/interruption_stream.d.ts +46 -0
  132. package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
  133. package/dist/inference/interruption/interruption_stream.js +344 -0
  134. package/dist/inference/interruption/interruption_stream.js.map +1 -0
  135. package/dist/inference/interruption/types.cjs +17 -0
  136. package/dist/inference/interruption/types.cjs.map +1 -0
  137. package/dist/inference/interruption/types.d.cts +66 -0
  138. package/dist/inference/interruption/types.d.ts +66 -0
  139. package/dist/inference/interruption/types.d.ts.map +1 -0
  140. package/dist/inference/interruption/types.js +1 -0
  141. package/dist/inference/interruption/types.js.map +1 -0
  142. package/dist/inference/interruption/utils.cjs +130 -0
  143. package/dist/inference/interruption/utils.cjs.map +1 -0
  144. package/dist/inference/interruption/utils.d.cts +41 -0
  145. package/dist/inference/interruption/utils.d.ts +41 -0
  146. package/dist/inference/interruption/utils.d.ts.map +1 -0
  147. package/dist/inference/interruption/utils.js +105 -0
  148. package/dist/inference/interruption/utils.js.map +1 -0
  149. package/dist/inference/interruption/utils.test.cjs +105 -0
  150. package/dist/inference/interruption/utils.test.cjs.map +1 -0
  151. package/dist/inference/interruption/utils.test.js +104 -0
  152. package/dist/inference/interruption/utils.test.js.map +1 -0
  153. package/dist/inference/interruption/ws_transport.cjs +342 -0
  154. package/dist/inference/interruption/ws_transport.cjs.map +1 -0
  155. package/dist/inference/interruption/ws_transport.d.cts +33 -0
  156. package/dist/inference/interruption/ws_transport.d.ts +33 -0
  157. package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
  158. package/dist/inference/interruption/ws_transport.js +308 -0
  159. package/dist/inference/interruption/ws_transport.js.map +1 -0
  160. package/dist/inference/llm.cjs +106 -66
  161. package/dist/inference/llm.cjs.map +1 -1
  162. package/dist/inference/llm.d.cts +65 -43
  163. package/dist/inference/llm.d.ts +65 -43
  164. package/dist/inference/llm.d.ts.map +1 -1
  165. package/dist/inference/llm.js +100 -66
  166. package/dist/inference/llm.js.map +1 -1
  167. package/dist/inference/stt.cjs +319 -170
  168. package/dist/inference/stt.cjs.map +1 -1
  169. package/dist/inference/stt.d.cts +64 -15
  170. package/dist/inference/stt.d.ts +64 -15
  171. package/dist/inference/stt.d.ts.map +1 -1
  172. package/dist/inference/stt.js +319 -170
  173. package/dist/inference/stt.js.map +1 -1
  174. package/dist/inference/stt.test.cjs +218 -0
  175. package/dist/inference/stt.test.cjs.map +1 -0
  176. package/dist/inference/stt.test.js +217 -0
  177. package/dist/inference/stt.test.js.map +1 -0
  178. package/dist/inference/tts.cjs +249 -71
  179. package/dist/inference/tts.cjs.map +1 -1
  180. package/dist/inference/tts.d.cts +55 -16
  181. package/dist/inference/tts.d.ts +55 -16
  182. package/dist/inference/tts.d.ts.map +1 -1
  183. package/dist/inference/tts.js +249 -77
  184. package/dist/inference/tts.js.map +1 -1
  185. package/dist/inference/tts.test.cjs +233 -0
  186. package/dist/inference/tts.test.cjs.map +1 -0
  187. package/dist/inference/tts.test.js +232 -0
  188. package/dist/inference/tts.test.js.map +1 -0
  189. package/dist/inference/utils.cjs +26 -7
  190. package/dist/inference/utils.cjs.map +1 -1
  191. package/dist/inference/utils.d.cts +14 -1
  192. package/dist/inference/utils.d.ts +14 -1
  193. package/dist/inference/utils.d.ts.map +1 -1
  194. package/dist/inference/utils.js +18 -2
  195. package/dist/inference/utils.js.map +1 -1
  196. package/dist/ipc/inference_proc_executor.cjs +6 -3
  197. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  198. package/dist/ipc/inference_proc_executor.d.ts.map +1 -1
  199. package/dist/ipc/inference_proc_executor.js +6 -3
  200. package/dist/ipc/inference_proc_executor.js.map +1 -1
  201. package/dist/ipc/inference_proc_lazy_main.cjs +13 -1
  202. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
  203. package/dist/ipc/inference_proc_lazy_main.js +13 -1
  204. package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
  205. package/dist/ipc/job_proc_executor.cjs +6 -1
  206. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  207. package/dist/ipc/job_proc_executor.d.ts.map +1 -1
  208. package/dist/ipc/job_proc_executor.js +6 -1
  209. package/dist/ipc/job_proc_executor.js.map +1 -1
  210. package/dist/ipc/job_proc_lazy_main.cjs +89 -17
  211. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  212. package/dist/ipc/job_proc_lazy_main.js +68 -18
  213. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  214. package/dist/ipc/supervised_proc.cjs +34 -8
  215. package/dist/ipc/supervised_proc.cjs.map +1 -1
  216. package/dist/ipc/supervised_proc.d.cts +8 -0
  217. package/dist/ipc/supervised_proc.d.ts +8 -0
  218. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  219. package/dist/ipc/supervised_proc.js +34 -8
  220. package/dist/ipc/supervised_proc.js.map +1 -1
  221. package/dist/ipc/supervised_proc.test.cjs +145 -0
  222. package/dist/ipc/supervised_proc.test.cjs.map +1 -0
  223. package/dist/ipc/supervised_proc.test.js +122 -0
  224. package/dist/ipc/supervised_proc.test.js.map +1 -0
  225. package/dist/job.cjs +109 -1
  226. package/dist/job.cjs.map +1 -1
  227. package/dist/job.d.cts +14 -0
  228. package/dist/job.d.ts +14 -0
  229. package/dist/job.d.ts.map +1 -1
  230. package/dist/job.js +99 -1
  231. package/dist/job.js.map +1 -1
  232. package/dist/language.cjs +394 -0
  233. package/dist/language.cjs.map +1 -0
  234. package/dist/language.d.cts +15 -0
  235. package/dist/language.d.ts +15 -0
  236. package/dist/language.d.ts.map +1 -0
  237. package/dist/language.js +363 -0
  238. package/dist/language.js.map +1 -0
  239. package/dist/language.test.cjs +43 -0
  240. package/dist/language.test.cjs.map +1 -0
  241. package/dist/language.test.js +49 -0
  242. package/dist/language.test.js.map +1 -0
  243. package/dist/llm/chat_context.cjs +274 -3
  244. package/dist/llm/chat_context.cjs.map +1 -1
  245. package/dist/llm/chat_context.d.cts +86 -2
  246. package/dist/llm/chat_context.d.ts +86 -2
  247. package/dist/llm/chat_context.d.ts.map +1 -1
  248. package/dist/llm/chat_context.js +273 -3
  249. package/dist/llm/chat_context.js.map +1 -1
  250. package/dist/llm/chat_context.test.cjs +574 -0
  251. package/dist/llm/chat_context.test.cjs.map +1 -1
  252. package/dist/llm/chat_context.test.js +574 -0
  253. package/dist/llm/chat_context.test.js.map +1 -1
  254. package/dist/llm/fallback_adapter.cjs +278 -0
  255. package/dist/llm/fallback_adapter.cjs.map +1 -0
  256. package/dist/llm/fallback_adapter.d.cts +73 -0
  257. package/dist/llm/fallback_adapter.d.ts +73 -0
  258. package/dist/llm/fallback_adapter.d.ts.map +1 -0
  259. package/dist/llm/fallback_adapter.js +254 -0
  260. package/dist/llm/fallback_adapter.js.map +1 -0
  261. package/dist/llm/fallback_adapter.test.cjs +176 -0
  262. package/dist/llm/fallback_adapter.test.cjs.map +1 -0
  263. package/dist/llm/fallback_adapter.test.js +175 -0
  264. package/dist/llm/fallback_adapter.test.js.map +1 -0
  265. package/dist/llm/index.cjs +9 -0
  266. package/dist/llm/index.cjs.map +1 -1
  267. package/dist/llm/index.d.cts +4 -3
  268. package/dist/llm/index.d.ts +4 -3
  269. package/dist/llm/index.d.ts.map +1 -1
  270. package/dist/llm/index.js +11 -1
  271. package/dist/llm/index.js.map +1 -1
  272. package/dist/llm/llm.cjs +65 -11
  273. package/dist/llm/llm.cjs.map +1 -1
  274. package/dist/llm/llm.d.cts +13 -2
  275. package/dist/llm/llm.d.ts +13 -2
  276. package/dist/llm/llm.d.ts.map +1 -1
  277. package/dist/llm/llm.js +65 -11
  278. package/dist/llm/llm.js.map +1 -1
  279. package/dist/llm/provider_format/google.cjs +6 -2
  280. package/dist/llm/provider_format/google.cjs.map +1 -1
  281. package/dist/llm/provider_format/google.d.cts +1 -1
  282. package/dist/llm/provider_format/google.d.ts +1 -1
  283. package/dist/llm/provider_format/google.d.ts.map +1 -1
  284. package/dist/llm/provider_format/google.js +6 -2
  285. package/dist/llm/provider_format/google.js.map +1 -1
  286. package/dist/llm/provider_format/google.test.cjs +48 -0
  287. package/dist/llm/provider_format/google.test.cjs.map +1 -1
  288. package/dist/llm/provider_format/google.test.js +54 -1
  289. package/dist/llm/provider_format/google.test.js.map +1 -1
  290. package/dist/llm/provider_format/index.cjs +2 -0
  291. package/dist/llm/provider_format/index.cjs.map +1 -1
  292. package/dist/llm/provider_format/index.d.cts +2 -2
  293. package/dist/llm/provider_format/index.d.ts +2 -2
  294. package/dist/llm/provider_format/index.d.ts.map +1 -1
  295. package/dist/llm/provider_format/index.js +6 -1
  296. package/dist/llm/provider_format/index.js.map +1 -1
  297. package/dist/llm/provider_format/openai.cjs +126 -24
  298. package/dist/llm/provider_format/openai.cjs.map +1 -1
  299. package/dist/llm/provider_format/openai.d.cts +1 -0
  300. package/dist/llm/provider_format/openai.d.ts +1 -0
  301. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  302. package/dist/llm/provider_format/openai.js +124 -23
  303. package/dist/llm/provider_format/openai.js.map +1 -1
  304. package/dist/llm/provider_format/openai.test.cjs +393 -0
  305. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  306. package/dist/llm/provider_format/openai.test.js +400 -2
  307. package/dist/llm/provider_format/openai.test.js.map +1 -1
  308. package/dist/llm/provider_format/utils.cjs +5 -4
  309. package/dist/llm/provider_format/utils.cjs.map +1 -1
  310. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  311. package/dist/llm/provider_format/utils.js +5 -4
  312. package/dist/llm/provider_format/utils.js.map +1 -1
  313. package/dist/llm/realtime.cjs +3 -0
  314. package/dist/llm/realtime.cjs.map +1 -1
  315. package/dist/llm/realtime.d.cts +15 -1
  316. package/dist/llm/realtime.d.ts +15 -1
  317. package/dist/llm/realtime.d.ts.map +1 -1
  318. package/dist/llm/realtime.js +3 -0
  319. package/dist/llm/realtime.js.map +1 -1
  320. package/dist/llm/remote_chat_context.cjs.map +1 -1
  321. package/dist/llm/remote_chat_context.d.cts +2 -0
  322. package/dist/llm/remote_chat_context.d.ts +2 -0
  323. package/dist/llm/remote_chat_context.d.ts.map +1 -1
  324. package/dist/llm/remote_chat_context.js.map +1 -1
  325. package/dist/llm/tool_context.cjs +50 -2
  326. package/dist/llm/tool_context.cjs.map +1 -1
  327. package/dist/llm/tool_context.d.cts +47 -11
  328. package/dist/llm/tool_context.d.ts +47 -11
  329. package/dist/llm/tool_context.d.ts.map +1 -1
  330. package/dist/llm/tool_context.js +48 -3
  331. package/dist/llm/tool_context.js.map +1 -1
  332. package/dist/llm/tool_context.test.cjs +197 -0
  333. package/dist/llm/tool_context.test.cjs.map +1 -1
  334. package/dist/llm/tool_context.test.js +175 -0
  335. package/dist/llm/tool_context.test.js.map +1 -1
  336. package/dist/llm/utils.cjs +18 -12
  337. package/dist/llm/utils.cjs.map +1 -1
  338. package/dist/llm/utils.d.cts +2 -3
  339. package/dist/llm/utils.d.ts +2 -3
  340. package/dist/llm/utils.d.ts.map +1 -1
  341. package/dist/llm/utils.js +18 -12
  342. package/dist/llm/utils.js.map +1 -1
  343. package/dist/llm/zod-utils.cjs +102 -0
  344. package/dist/llm/zod-utils.cjs.map +1 -0
  345. package/dist/llm/zod-utils.d.cts +65 -0
  346. package/dist/llm/zod-utils.d.ts +65 -0
  347. package/dist/llm/zod-utils.d.ts.map +1 -0
  348. package/dist/llm/zod-utils.js +64 -0
  349. package/dist/llm/zod-utils.js.map +1 -0
  350. package/dist/llm/zod-utils.test.cjs +472 -0
  351. package/dist/llm/zod-utils.test.cjs.map +1 -0
  352. package/dist/llm/zod-utils.test.js +455 -0
  353. package/dist/llm/zod-utils.test.js.map +1 -0
  354. package/dist/log.cjs +45 -14
  355. package/dist/log.cjs.map +1 -1
  356. package/dist/log.d.cts +8 -1
  357. package/dist/log.d.ts +8 -1
  358. package/dist/log.d.ts.map +1 -1
  359. package/dist/log.js +45 -15
  360. package/dist/log.js.map +1 -1
  361. package/dist/metrics/base.cjs.map +1 -1
  362. package/dist/metrics/base.d.cts +75 -19
  363. package/dist/metrics/base.d.ts +75 -19
  364. package/dist/metrics/base.d.ts.map +1 -1
  365. package/dist/metrics/index.cjs +5 -0
  366. package/dist/metrics/index.cjs.map +1 -1
  367. package/dist/metrics/index.d.cts +2 -1
  368. package/dist/metrics/index.d.ts +2 -1
  369. package/dist/metrics/index.d.ts.map +1 -1
  370. package/dist/metrics/index.js +6 -0
  371. package/dist/metrics/index.js.map +1 -1
  372. package/dist/metrics/model_usage.cjs +189 -0
  373. package/dist/metrics/model_usage.cjs.map +1 -0
  374. package/dist/metrics/model_usage.d.cts +92 -0
  375. package/dist/metrics/model_usage.d.ts +92 -0
  376. package/dist/metrics/model_usage.d.ts.map +1 -0
  377. package/dist/metrics/model_usage.js +164 -0
  378. package/dist/metrics/model_usage.js.map +1 -0
  379. package/dist/metrics/model_usage.test.cjs +474 -0
  380. package/dist/metrics/model_usage.test.cjs.map +1 -0
  381. package/dist/metrics/model_usage.test.js +476 -0
  382. package/dist/metrics/model_usage.test.js.map +1 -0
  383. package/dist/metrics/usage_collector.cjs +5 -2
  384. package/dist/metrics/usage_collector.cjs.map +1 -1
  385. package/dist/metrics/usage_collector.d.cts +10 -1
  386. package/dist/metrics/usage_collector.d.ts +10 -1
  387. package/dist/metrics/usage_collector.d.ts.map +1 -1
  388. package/dist/metrics/usage_collector.js +5 -2
  389. package/dist/metrics/usage_collector.js.map +1 -1
  390. package/dist/metrics/utils.cjs +23 -7
  391. package/dist/metrics/utils.cjs.map +1 -1
  392. package/dist/metrics/utils.d.ts.map +1 -1
  393. package/dist/metrics/utils.js +23 -7
  394. package/dist/metrics/utils.js.map +1 -1
  395. package/dist/stream/deferred_stream.cjs +31 -10
  396. package/dist/stream/deferred_stream.cjs.map +1 -1
  397. package/dist/stream/deferred_stream.d.cts +6 -1
  398. package/dist/stream/deferred_stream.d.ts +6 -1
  399. package/dist/stream/deferred_stream.d.ts.map +1 -1
  400. package/dist/stream/deferred_stream.js +31 -10
  401. package/dist/stream/deferred_stream.js.map +1 -1
  402. package/dist/stream/deferred_stream.test.cjs +2 -2
  403. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  404. package/dist/stream/deferred_stream.test.js +2 -2
  405. package/dist/stream/deferred_stream.test.js.map +1 -1
  406. package/dist/stream/index.cjs +3 -0
  407. package/dist/stream/index.cjs.map +1 -1
  408. package/dist/stream/index.d.cts +1 -0
  409. package/dist/stream/index.d.ts +1 -0
  410. package/dist/stream/index.d.ts.map +1 -1
  411. package/dist/stream/index.js +2 -0
  412. package/dist/stream/index.js.map +1 -1
  413. package/dist/stream/multi_input_stream.cjs +139 -0
  414. package/dist/stream/multi_input_stream.cjs.map +1 -0
  415. package/dist/stream/multi_input_stream.d.cts +55 -0
  416. package/dist/stream/multi_input_stream.d.ts +55 -0
  417. package/dist/stream/multi_input_stream.d.ts.map +1 -0
  418. package/dist/stream/multi_input_stream.js +115 -0
  419. package/dist/stream/multi_input_stream.js.map +1 -0
  420. package/dist/stream/multi_input_stream.test.cjs +344 -0
  421. package/dist/stream/multi_input_stream.test.cjs.map +1 -0
  422. package/dist/stream/multi_input_stream.test.js +343 -0
  423. package/dist/stream/multi_input_stream.test.js.map +1 -0
  424. package/dist/stream/stream_channel.cjs +39 -1
  425. package/dist/stream/stream_channel.cjs.map +1 -1
  426. package/dist/stream/stream_channel.d.cts +5 -2
  427. package/dist/stream/stream_channel.d.ts +5 -2
  428. package/dist/stream/stream_channel.d.ts.map +1 -1
  429. package/dist/stream/stream_channel.js +39 -1
  430. package/dist/stream/stream_channel.js.map +1 -1
  431. package/dist/stream/stream_channel.test.cjs +27 -0
  432. package/dist/stream/stream_channel.test.cjs.map +1 -1
  433. package/dist/stream/stream_channel.test.js +27 -0
  434. package/dist/stream/stream_channel.test.js.map +1 -1
  435. package/dist/stt/stream_adapter.cjs +24 -9
  436. package/dist/stt/stream_adapter.cjs.map +1 -1
  437. package/dist/stt/stream_adapter.d.cts +7 -3
  438. package/dist/stt/stream_adapter.d.ts +7 -3
  439. package/dist/stt/stream_adapter.d.ts.map +1 -1
  440. package/dist/stt/stream_adapter.js +24 -9
  441. package/dist/stt/stream_adapter.js.map +1 -1
  442. package/dist/stt/stt.cjs +86 -19
  443. package/dist/stt/stt.cjs.map +1 -1
  444. package/dist/stt/stt.d.cts +60 -5
  445. package/dist/stt/stt.d.ts +60 -5
  446. package/dist/stt/stt.d.ts.map +1 -1
  447. package/dist/stt/stt.js +88 -21
  448. package/dist/stt/stt.js.map +1 -1
  449. package/dist/telemetry/index.cjs +72 -0
  450. package/dist/telemetry/index.cjs.map +1 -0
  451. package/dist/telemetry/index.d.cts +7 -0
  452. package/dist/telemetry/index.d.ts +7 -0
  453. package/dist/telemetry/index.d.ts.map +1 -0
  454. package/dist/telemetry/index.js +37 -0
  455. package/dist/telemetry/index.js.map +1 -0
  456. package/dist/telemetry/logging.cjs +65 -0
  457. package/dist/telemetry/logging.cjs.map +1 -0
  458. package/dist/telemetry/logging.d.cts +21 -0
  459. package/dist/telemetry/logging.d.ts +21 -0
  460. package/dist/telemetry/logging.d.ts.map +1 -0
  461. package/dist/telemetry/logging.js +40 -0
  462. package/dist/telemetry/logging.js.map +1 -0
  463. package/dist/telemetry/otel_http_exporter.cjs +166 -0
  464. package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
  465. package/dist/telemetry/otel_http_exporter.d.cts +63 -0
  466. package/dist/telemetry/otel_http_exporter.d.ts +63 -0
  467. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
  468. package/dist/telemetry/otel_http_exporter.js +142 -0
  469. package/dist/telemetry/otel_http_exporter.js.map +1 -0
  470. package/dist/telemetry/pino_otel_transport.cjs +217 -0
  471. package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
  472. package/dist/telemetry/pino_otel_transport.d.cts +58 -0
  473. package/dist/telemetry/pino_otel_transport.d.ts +58 -0
  474. package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
  475. package/dist/telemetry/pino_otel_transport.js +189 -0
  476. package/dist/telemetry/pino_otel_transport.js.map +1 -0
  477. package/dist/telemetry/trace_types.cjs +233 -0
  478. package/dist/telemetry/trace_types.cjs.map +1 -0
  479. package/dist/telemetry/trace_types.d.cts +74 -0
  480. package/dist/telemetry/trace_types.d.ts +74 -0
  481. package/dist/telemetry/trace_types.d.ts.map +1 -0
  482. package/dist/telemetry/trace_types.js +141 -0
  483. package/dist/telemetry/trace_types.js.map +1 -0
  484. package/dist/telemetry/traces.cjs +484 -0
  485. package/dist/telemetry/traces.cjs.map +1 -0
  486. package/dist/telemetry/traces.d.cts +116 -0
  487. package/dist/telemetry/traces.d.ts +116 -0
  488. package/dist/telemetry/traces.d.ts.map +1 -0
  489. package/dist/telemetry/traces.js +449 -0
  490. package/dist/telemetry/traces.js.map +1 -0
  491. package/dist/telemetry/utils.cjs +86 -0
  492. package/dist/telemetry/utils.cjs.map +1 -0
  493. package/dist/telemetry/utils.d.cts +5 -0
  494. package/dist/telemetry/utils.d.ts +5 -0
  495. package/dist/telemetry/utils.d.ts.map +1 -0
  496. package/dist/telemetry/utils.js +51 -0
  497. package/dist/telemetry/utils.js.map +1 -0
  498. package/dist/tokenize/basic/sentence.cjs +3 -3
  499. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  500. package/dist/tokenize/basic/sentence.js +3 -3
  501. package/dist/tokenize/basic/sentence.js.map +1 -1
  502. package/dist/tokenize/tokenizer.test.cjs +3 -1
  503. package/dist/tokenize/tokenizer.test.cjs.map +1 -1
  504. package/dist/tokenize/tokenizer.test.js +3 -1
  505. package/dist/tokenize/tokenizer.test.js.map +1 -1
  506. package/dist/transcription.cjs.map +1 -1
  507. package/dist/transcription.d.cts +6 -0
  508. package/dist/transcription.d.ts +6 -0
  509. package/dist/transcription.d.ts.map +1 -1
  510. package/dist/transcription.js.map +1 -1
  511. package/dist/tts/fallback_adapter.cjs +466 -0
  512. package/dist/tts/fallback_adapter.cjs.map +1 -0
  513. package/dist/tts/fallback_adapter.d.cts +110 -0
  514. package/dist/tts/fallback_adapter.d.ts +110 -0
  515. package/dist/tts/fallback_adapter.d.ts.map +1 -0
  516. package/dist/tts/fallback_adapter.js +442 -0
  517. package/dist/tts/fallback_adapter.js.map +1 -0
  518. package/dist/tts/index.cjs +3 -0
  519. package/dist/tts/index.cjs.map +1 -1
  520. package/dist/tts/index.d.cts +1 -0
  521. package/dist/tts/index.d.ts +1 -0
  522. package/dist/tts/index.d.ts.map +1 -1
  523. package/dist/tts/index.js +2 -0
  524. package/dist/tts/index.js.map +1 -1
  525. package/dist/tts/stream_adapter.cjs +25 -8
  526. package/dist/tts/stream_adapter.cjs.map +1 -1
  527. package/dist/tts/stream_adapter.d.cts +6 -3
  528. package/dist/tts/stream_adapter.d.ts +6 -3
  529. package/dist/tts/stream_adapter.d.ts.map +1 -1
  530. package/dist/tts/stream_adapter.js +25 -8
  531. package/dist/tts/stream_adapter.js.map +1 -1
  532. package/dist/tts/tts.cjs +189 -57
  533. package/dist/tts/tts.cjs.map +1 -1
  534. package/dist/tts/tts.d.cts +58 -6
  535. package/dist/tts/tts.d.ts +58 -6
  536. package/dist/tts/tts.d.ts.map +1 -1
  537. package/dist/tts/tts.js +191 -59
  538. package/dist/tts/tts.js.map +1 -1
  539. package/dist/types.cjs +24 -32
  540. package/dist/types.cjs.map +1 -1
  541. package/dist/types.d.cts +45 -10
  542. package/dist/types.d.ts +45 -10
  543. package/dist/types.d.ts.map +1 -1
  544. package/dist/types.js +20 -30
  545. package/dist/types.js.map +1 -1
  546. package/dist/utils.cjs +122 -26
  547. package/dist/utils.cjs.map +1 -1
  548. package/dist/utils.d.cts +41 -1
  549. package/dist/utils.d.ts +41 -1
  550. package/dist/utils.d.ts.map +1 -1
  551. package/dist/utils.js +117 -25
  552. package/dist/utils.js.map +1 -1
  553. package/dist/utils.test.cjs +73 -1
  554. package/dist/utils.test.cjs.map +1 -1
  555. package/dist/utils.test.js +74 -10
  556. package/dist/utils.test.js.map +1 -1
  557. package/dist/vad.cjs +35 -15
  558. package/dist/vad.cjs.map +1 -1
  559. package/dist/vad.d.cts +15 -5
  560. package/dist/vad.d.ts +15 -5
  561. package/dist/vad.d.ts.map +1 -1
  562. package/dist/vad.js +35 -15
  563. package/dist/vad.js.map +1 -1
  564. package/dist/version.cjs +1 -1
  565. package/dist/version.cjs.map +1 -1
  566. package/dist/version.d.cts +1 -1
  567. package/dist/version.d.ts +1 -1
  568. package/dist/version.d.ts.map +1 -1
  569. package/dist/version.js +1 -1
  570. package/dist/version.js.map +1 -1
  571. package/dist/voice/agent.cjs +258 -35
  572. package/dist/voice/agent.cjs.map +1 -1
  573. package/dist/voice/agent.d.cts +54 -13
  574. package/dist/voice/agent.d.ts +54 -13
  575. package/dist/voice/agent.d.ts.map +1 -1
  576. package/dist/voice/agent.js +254 -34
  577. package/dist/voice/agent.js.map +1 -1
  578. package/dist/voice/agent.test.cjs +314 -0
  579. package/dist/voice/agent.test.cjs.map +1 -1
  580. package/dist/voice/agent.test.js +316 -2
  581. package/dist/voice/agent.test.js.map +1 -1
  582. package/dist/voice/agent_activity.cjs +1116 -385
  583. package/dist/voice/agent_activity.cjs.map +1 -1
  584. package/dist/voice/agent_activity.d.cts +72 -11
  585. package/dist/voice/agent_activity.d.ts +72 -11
  586. package/dist/voice/agent_activity.d.ts.map +1 -1
  587. package/dist/voice/agent_activity.js +1119 -383
  588. package/dist/voice/agent_activity.js.map +1 -1
  589. package/dist/voice/agent_activity.test.cjs +135 -0
  590. package/dist/voice/agent_activity.test.cjs.map +1 -0
  591. package/dist/voice/agent_activity.test.js +134 -0
  592. package/dist/voice/agent_activity.test.js.map +1 -0
  593. package/dist/voice/agent_session.cjs +550 -90
  594. package/dist/voice/agent_session.cjs.map +1 -1
  595. package/dist/voice/agent_session.d.cts +185 -25
  596. package/dist/voice/agent_session.d.ts +185 -25
  597. package/dist/voice/agent_session.d.ts.map +1 -1
  598. package/dist/voice/agent_session.js +556 -91
  599. package/dist/voice/agent_session.js.map +1 -1
  600. package/dist/voice/audio_recognition.cjs +605 -46
  601. package/dist/voice/audio_recognition.cjs.map +1 -1
  602. package/dist/voice/audio_recognition.d.cts +96 -4
  603. package/dist/voice/audio_recognition.d.ts +96 -4
  604. package/dist/voice/audio_recognition.d.ts.map +1 -1
  605. package/dist/voice/audio_recognition.js +611 -47
  606. package/dist/voice/audio_recognition.js.map +1 -1
  607. package/dist/voice/audio_recognition_span.test.cjs +295 -0
  608. package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
  609. package/dist/voice/audio_recognition_span.test.js +299 -0
  610. package/dist/voice/audio_recognition_span.test.js.map +1 -0
  611. package/dist/voice/avatar/datastream_io.cjs +7 -1
  612. package/dist/voice/avatar/datastream_io.cjs.map +1 -1
  613. package/dist/voice/avatar/datastream_io.d.cts +1 -0
  614. package/dist/voice/avatar/datastream_io.d.ts +1 -0
  615. package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
  616. package/dist/voice/avatar/datastream_io.js +7 -1
  617. package/dist/voice/avatar/datastream_io.js.map +1 -1
  618. package/dist/voice/background_audio.cjs +367 -0
  619. package/dist/voice/background_audio.cjs.map +1 -0
  620. package/dist/voice/background_audio.d.cts +123 -0
  621. package/dist/voice/background_audio.d.ts +123 -0
  622. package/dist/voice/background_audio.d.ts.map +1 -0
  623. package/dist/voice/background_audio.js +343 -0
  624. package/dist/voice/background_audio.js.map +1 -0
  625. package/dist/voice/events.cjs +3 -0
  626. package/dist/voice/events.cjs.map +1 -1
  627. package/dist/voice/events.d.cts +16 -9
  628. package/dist/voice/events.d.ts +16 -9
  629. package/dist/voice/events.d.ts.map +1 -1
  630. package/dist/voice/events.js +3 -0
  631. package/dist/voice/events.js.map +1 -1
  632. package/dist/voice/generation.cjs +205 -41
  633. package/dist/voice/generation.cjs.map +1 -1
  634. package/dist/voice/generation.d.cts +21 -5
  635. package/dist/voice/generation.d.ts +21 -5
  636. package/dist/voice/generation.d.ts.map +1 -1
  637. package/dist/voice/generation.js +215 -43
  638. package/dist/voice/generation.js.map +1 -1
  639. package/dist/voice/generation_tools.test.cjs +236 -0
  640. package/dist/voice/generation_tools.test.cjs.map +1 -0
  641. package/dist/voice/generation_tools.test.js +235 -0
  642. package/dist/voice/generation_tools.test.js.map +1 -0
  643. package/dist/voice/index.cjs +33 -2
  644. package/dist/voice/index.cjs.map +1 -1
  645. package/dist/voice/index.d.cts +8 -2
  646. package/dist/voice/index.d.ts +8 -2
  647. package/dist/voice/index.d.ts.map +1 -1
  648. package/dist/voice/index.js +19 -2
  649. package/dist/voice/index.js.map +1 -1
  650. package/dist/voice/interruption_detection.test.cjs +114 -0
  651. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  652. package/dist/voice/interruption_detection.test.js +113 -0
  653. package/dist/voice/interruption_detection.test.js.map +1 -0
  654. package/dist/voice/io.cjs +66 -6
  655. package/dist/voice/io.cjs.map +1 -1
  656. package/dist/voice/io.d.cts +67 -7
  657. package/dist/voice/io.d.ts +67 -7
  658. package/dist/voice/io.d.ts.map +1 -1
  659. package/dist/voice/io.js +62 -5
  660. package/dist/voice/io.js.map +1 -1
  661. package/dist/voice/recorder_io/index.cjs +23 -0
  662. package/dist/voice/recorder_io/index.cjs.map +1 -0
  663. package/dist/voice/recorder_io/index.d.cts +2 -0
  664. package/dist/voice/recorder_io/index.d.ts +2 -0
  665. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  666. package/dist/voice/recorder_io/index.js +2 -0
  667. package/dist/voice/recorder_io/index.js.map +1 -0
  668. package/dist/voice/recorder_io/recorder_io.cjs +607 -0
  669. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  670. package/dist/voice/recorder_io/recorder_io.d.cts +106 -0
  671. package/dist/voice/recorder_io/recorder_io.d.ts +106 -0
  672. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  673. package/dist/voice/recorder_io/recorder_io.js +573 -0
  674. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  675. package/dist/voice/remote_session.cjs +922 -0
  676. package/dist/voice/remote_session.cjs.map +1 -0
  677. package/dist/voice/remote_session.d.cts +108 -0
  678. package/dist/voice/remote_session.d.ts +108 -0
  679. package/dist/voice/remote_session.d.ts.map +1 -0
  680. package/dist/voice/remote_session.js +887 -0
  681. package/dist/voice/remote_session.js.map +1 -0
  682. package/dist/voice/report.cjs +88 -0
  683. package/dist/voice/report.cjs.map +1 -0
  684. package/dist/voice/report.d.cts +49 -0
  685. package/dist/voice/report.d.ts +49 -0
  686. package/dist/voice/report.d.ts.map +1 -0
  687. package/dist/voice/report.js +63 -0
  688. package/dist/voice/report.js.map +1 -0
  689. package/dist/voice/report.test.cjs +121 -0
  690. package/dist/voice/report.test.cjs.map +1 -0
  691. package/dist/voice/report.test.js +120 -0
  692. package/dist/voice/report.test.js.map +1 -0
  693. package/dist/voice/room_io/_input.cjs +40 -7
  694. package/dist/voice/room_io/_input.cjs.map +1 -1
  695. package/dist/voice/room_io/_input.d.cts +5 -2
  696. package/dist/voice/room_io/_input.d.ts +5 -2
  697. package/dist/voice/room_io/_input.d.ts.map +1 -1
  698. package/dist/voice/room_io/_input.js +41 -8
  699. package/dist/voice/room_io/_input.js.map +1 -1
  700. package/dist/voice/room_io/_output.cjs +19 -11
  701. package/dist/voice/room_io/_output.cjs.map +1 -1
  702. package/dist/voice/room_io/_output.d.cts +7 -4
  703. package/dist/voice/room_io/_output.d.ts +7 -4
  704. package/dist/voice/room_io/_output.d.ts.map +1 -1
  705. package/dist/voice/room_io/_output.js +20 -12
  706. package/dist/voice/room_io/_output.js.map +1 -1
  707. package/dist/voice/room_io/room_io.cjs +33 -6
  708. package/dist/voice/room_io/room_io.cjs.map +1 -1
  709. package/dist/voice/room_io/room_io.d.cts +29 -9
  710. package/dist/voice/room_io/room_io.d.ts +29 -9
  711. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  712. package/dist/voice/room_io/room_io.js +33 -7
  713. package/dist/voice/room_io/room_io.js.map +1 -1
  714. package/dist/voice/speech_handle.cjs +22 -4
  715. package/dist/voice/speech_handle.cjs.map +1 -1
  716. package/dist/voice/speech_handle.d.cts +17 -2
  717. package/dist/voice/speech_handle.d.ts +17 -2
  718. package/dist/voice/speech_handle.d.ts.map +1 -1
  719. package/dist/voice/speech_handle.js +21 -4
  720. package/dist/voice/speech_handle.js.map +1 -1
  721. package/dist/voice/testing/fake_llm.cjs +127 -0
  722. package/dist/voice/testing/fake_llm.cjs.map +1 -0
  723. package/dist/voice/testing/fake_llm.d.cts +30 -0
  724. package/dist/voice/testing/fake_llm.d.ts +30 -0
  725. package/dist/voice/testing/fake_llm.d.ts.map +1 -0
  726. package/dist/voice/testing/fake_llm.js +103 -0
  727. package/dist/voice/testing/fake_llm.js.map +1 -0
  728. package/dist/voice/testing/index.cjs +57 -0
  729. package/dist/voice/testing/index.cjs.map +1 -0
  730. package/dist/voice/testing/index.d.cts +21 -0
  731. package/dist/voice/testing/index.d.ts +21 -0
  732. package/dist/voice/testing/index.d.ts.map +1 -0
  733. package/dist/voice/testing/index.js +35 -0
  734. package/dist/voice/testing/index.js.map +1 -0
  735. package/dist/voice/testing/run_result.cjs +817 -0
  736. package/dist/voice/testing/run_result.cjs.map +1 -0
  737. package/dist/voice/testing/run_result.d.cts +385 -0
  738. package/dist/voice/testing/run_result.d.ts +385 -0
  739. package/dist/voice/testing/run_result.d.ts.map +1 -0
  740. package/dist/voice/testing/run_result.js +790 -0
  741. package/dist/voice/testing/run_result.js.map +1 -0
  742. package/dist/voice/testing/types.cjs +46 -0
  743. package/dist/voice/testing/types.cjs.map +1 -0
  744. package/dist/voice/testing/types.d.cts +83 -0
  745. package/dist/voice/testing/types.d.ts +83 -0
  746. package/dist/voice/testing/types.d.ts.map +1 -0
  747. package/dist/voice/testing/types.js +19 -0
  748. package/dist/voice/testing/types.js.map +1 -0
  749. package/dist/voice/transcription/synchronizer.cjs +139 -15
  750. package/dist/voice/transcription/synchronizer.cjs.map +1 -1
  751. package/dist/voice/transcription/synchronizer.d.cts +35 -4
  752. package/dist/voice/transcription/synchronizer.d.ts +35 -4
  753. package/dist/voice/transcription/synchronizer.d.ts.map +1 -1
  754. package/dist/voice/transcription/synchronizer.js +143 -16
  755. package/dist/voice/transcription/synchronizer.js.map +1 -1
  756. package/dist/voice/transcription/synchronizer.test.cjs +151 -0
  757. package/dist/voice/transcription/synchronizer.test.cjs.map +1 -0
  758. package/dist/voice/transcription/synchronizer.test.js +150 -0
  759. package/dist/voice/transcription/synchronizer.test.js.map +1 -0
  760. package/dist/voice/turn_config/endpointing.cjs +33 -0
  761. package/dist/voice/turn_config/endpointing.cjs.map +1 -0
  762. package/dist/voice/turn_config/endpointing.d.cts +30 -0
  763. package/dist/voice/turn_config/endpointing.d.ts +30 -0
  764. package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
  765. package/dist/voice/turn_config/endpointing.js +9 -0
  766. package/dist/voice/turn_config/endpointing.js.map +1 -0
  767. package/dist/voice/turn_config/interruption.cjs +37 -0
  768. package/dist/voice/turn_config/interruption.cjs.map +1 -0
  769. package/dist/voice/turn_config/interruption.d.cts +53 -0
  770. package/dist/voice/turn_config/interruption.d.ts +53 -0
  771. package/dist/voice/turn_config/interruption.d.ts.map +1 -0
  772. package/dist/voice/turn_config/interruption.js +13 -0
  773. package/dist/voice/turn_config/interruption.js.map +1 -0
  774. package/dist/voice/turn_config/turn_handling.cjs +35 -0
  775. package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
  776. package/dist/voice/turn_config/turn_handling.d.cts +36 -0
  777. package/dist/voice/turn_config/turn_handling.d.ts +36 -0
  778. package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
  779. package/dist/voice/turn_config/turn_handling.js +11 -0
  780. package/dist/voice/turn_config/turn_handling.js.map +1 -0
  781. package/dist/voice/turn_config/utils.cjs +157 -0
  782. package/dist/voice/turn_config/utils.cjs.map +1 -0
  783. package/dist/voice/turn_config/utils.d.cts +37 -0
  784. package/dist/voice/turn_config/utils.d.ts +37 -0
  785. package/dist/voice/turn_config/utils.d.ts.map +1 -0
  786. package/dist/voice/turn_config/utils.js +131 -0
  787. package/dist/voice/turn_config/utils.js.map +1 -0
  788. package/dist/voice/turn_config/utils.test.cjs +128 -0
  789. package/dist/voice/turn_config/utils.test.cjs.map +1 -0
  790. package/dist/voice/turn_config/utils.test.js +127 -0
  791. package/dist/voice/turn_config/utils.test.js.map +1 -0
  792. package/dist/voice/utils.cjs +47 -0
  793. package/dist/voice/utils.cjs.map +1 -0
  794. package/dist/voice/utils.d.cts +4 -0
  795. package/dist/voice/utils.d.ts +4 -0
  796. package/dist/voice/utils.d.ts.map +1 -0
  797. package/dist/voice/utils.js +23 -0
  798. package/dist/voice/utils.js.map +1 -0
  799. package/dist/worker.cjs +44 -52
  800. package/dist/worker.cjs.map +1 -1
  801. package/dist/worker.d.cts +18 -8
  802. package/dist/worker.d.ts +18 -8
  803. package/dist/worker.d.ts.map +1 -1
  804. package/dist/worker.js +43 -43
  805. package/dist/worker.js.map +1 -1
  806. package/package.json +32 -12
  807. package/resources/NOTICE +2 -0
  808. package/resources/keyboard-typing.ogg +0 -0
  809. package/resources/keyboard-typing2.ogg +0 -0
  810. package/resources/office-ambience.ogg +0 -0
  811. package/src/audio.ts +132 -1
  812. package/src/beta/index.ts +9 -0
  813. package/src/beta/workflows/index.ts +9 -0
  814. package/src/beta/workflows/task_group.ts +194 -0
  815. package/src/cli.ts +57 -66
  816. package/src/connection_pool.test.ts +346 -0
  817. package/src/connection_pool.ts +307 -0
  818. package/src/constants.ts +14 -0
  819. package/src/cpu.test.ts +239 -0
  820. package/src/cpu.ts +173 -0
  821. package/src/http_server.ts +18 -6
  822. package/src/index.ts +15 -13
  823. package/src/inference/api_protos.ts +85 -2
  824. package/src/inference/index.ts +32 -4
  825. package/src/inference/interruption/defaults.ts +51 -0
  826. package/src/inference/interruption/errors.ts +25 -0
  827. package/src/inference/interruption/http_transport.ts +206 -0
  828. package/src/inference/interruption/interruption_cache_entry.ts +50 -0
  829. package/src/inference/interruption/interruption_detector.ts +204 -0
  830. package/src/inference/interruption/interruption_stream.ts +467 -0
  831. package/src/inference/interruption/types.ts +84 -0
  832. package/src/inference/interruption/utils.test.ts +132 -0
  833. package/src/inference/interruption/utils.ts +137 -0
  834. package/src/inference/interruption/ws_transport.ts +406 -0
  835. package/src/inference/llm.ts +214 -163
  836. package/src/inference/stt.test.ts +253 -0
  837. package/src/inference/stt.ts +449 -208
  838. package/src/inference/tts.test.ts +267 -0
  839. package/src/inference/tts.ts +377 -115
  840. package/src/inference/utils.ts +30 -2
  841. package/src/ipc/inference_proc_executor.ts +11 -3
  842. package/src/ipc/inference_proc_lazy_main.ts +13 -1
  843. package/src/ipc/job_proc_executor.ts +11 -1
  844. package/src/ipc/job_proc_lazy_main.ts +86 -20
  845. package/src/ipc/supervised_proc.test.ts +153 -0
  846. package/src/ipc/supervised_proc.ts +39 -10
  847. package/src/job.ts +120 -1
  848. package/src/language.test.ts +62 -0
  849. package/src/language.ts +380 -0
  850. package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
  851. package/src/llm/chat_context.test.ts +655 -0
  852. package/src/llm/chat_context.ts +412 -2
  853. package/src/llm/fallback_adapter.test.ts +238 -0
  854. package/src/llm/fallback_adapter.ts +391 -0
  855. package/src/llm/index.ts +11 -0
  856. package/src/llm/llm.ts +77 -12
  857. package/src/llm/provider_format/google.test.ts +72 -1
  858. package/src/llm/provider_format/google.ts +10 -6
  859. package/src/llm/provider_format/index.ts +7 -2
  860. package/src/llm/provider_format/openai.test.ts +480 -2
  861. package/src/llm/provider_format/openai.ts +152 -21
  862. package/src/llm/provider_format/utils.ts +11 -5
  863. package/src/llm/realtime.ts +23 -2
  864. package/src/llm/remote_chat_context.ts +2 -2
  865. package/src/llm/tool_context.test.ts +210 -1
  866. package/src/llm/tool_context.ts +115 -17
  867. package/src/llm/utils.ts +24 -16
  868. package/src/llm/zod-utils.test.ts +577 -0
  869. package/src/llm/zod-utils.ts +153 -0
  870. package/src/log.ts +71 -19
  871. package/src/metrics/base.ts +78 -19
  872. package/src/metrics/index.ts +12 -0
  873. package/src/metrics/model_usage.test.ts +545 -0
  874. package/src/metrics/model_usage.ts +262 -0
  875. package/src/metrics/usage_collector.ts +14 -3
  876. package/src/metrics/utils.ts +27 -7
  877. package/src/stream/deferred_stream.test.ts +3 -3
  878. package/src/stream/deferred_stream.ts +43 -11
  879. package/src/stream/index.ts +1 -0
  880. package/src/stream/multi_input_stream.test.ts +545 -0
  881. package/src/stream/multi_input_stream.ts +172 -0
  882. package/src/stream/stream_channel.test.ts +37 -0
  883. package/src/stream/stream_channel.ts +43 -3
  884. package/src/stt/stream_adapter.ts +30 -9
  885. package/src/stt/stt.ts +131 -22
  886. package/src/telemetry/index.ts +28 -0
  887. package/src/telemetry/logging.ts +55 -0
  888. package/src/telemetry/otel_http_exporter.ts +218 -0
  889. package/src/telemetry/pino_otel_transport.ts +265 -0
  890. package/src/telemetry/trace_types.ts +109 -0
  891. package/src/telemetry/traces.ts +673 -0
  892. package/src/telemetry/utils.ts +61 -0
  893. package/src/tokenize/basic/sentence.ts +3 -3
  894. package/src/tokenize/tokenizer.test.ts +4 -0
  895. package/src/transcription.ts +6 -0
  896. package/src/tts/fallback_adapter.ts +579 -0
  897. package/src/tts/index.ts +1 -0
  898. package/src/tts/stream_adapter.ts +38 -8
  899. package/src/tts/tts.ts +245 -62
  900. package/src/types.ts +62 -33
  901. package/src/utils.test.ts +90 -10
  902. package/src/utils.ts +176 -31
  903. package/src/vad.ts +42 -18
  904. package/src/version.ts +1 -1
  905. package/src/voice/agent.test.ts +347 -2
  906. package/src/voice/agent.ts +346 -44
  907. package/src/voice/agent_activity.test.ts +194 -0
  908. package/src/voice/agent_activity.ts +1457 -388
  909. package/src/voice/agent_session.ts +817 -112
  910. package/src/voice/audio_recognition.ts +845 -70
  911. package/src/voice/audio_recognition_span.test.ts +341 -0
  912. package/src/voice/avatar/datastream_io.ts +9 -1
  913. package/src/voice/background_audio.ts +494 -0
  914. package/src/voice/events.ts +27 -7
  915. package/src/voice/generation.ts +310 -56
  916. package/src/voice/generation_tools.test.ts +268 -0
  917. package/src/voice/index.ts +17 -3
  918. package/src/voice/interruption_detection.test.ts +151 -0
  919. package/src/voice/io.ts +115 -12
  920. package/src/voice/recorder_io/index.ts +4 -0
  921. package/src/voice/recorder_io/recorder_io.ts +783 -0
  922. package/src/voice/remote_session.ts +1083 -0
  923. package/src/voice/report.test.ts +136 -0
  924. package/src/voice/report.ts +140 -0
  925. package/src/voice/room_io/_input.ts +45 -10
  926. package/src/voice/room_io/_output.ts +26 -14
  927. package/src/voice/room_io/room_io.ts +67 -22
  928. package/src/voice/speech_handle.ts +38 -6
  929. package/src/voice/testing/fake_llm.ts +138 -0
  930. package/src/voice/testing/index.ts +52 -0
  931. package/src/voice/testing/run_result.ts +995 -0
  932. package/src/voice/testing/types.ts +118 -0
  933. package/src/voice/transcription/synchronizer.test.ts +206 -0
  934. package/src/voice/transcription/synchronizer.ts +204 -19
  935. package/src/voice/turn_config/endpointing.ts +33 -0
  936. package/src/voice/turn_config/interruption.ts +56 -0
  937. package/src/voice/turn_config/turn_handling.ts +45 -0
  938. package/src/voice/turn_config/utils.test.ts +148 -0
  939. package/src/voice/turn_config/utils.ts +167 -0
  940. package/src/voice/utils.ts +29 -0
  941. package/src/worker.ts +92 -78
  942. package/src/llm/__snapshots__/utils.test.ts.snap +0 -65
@@ -2,10 +2,12 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { type AudioFrame } from '@livekit/rtc-node';
5
- import { type RawData, WebSocket } from 'ws';
5
+ import type { WebSocket } from 'ws';
6
6
  import { APIError, APIStatusError } from '../_exceptions.js';
7
7
  import { AudioByteStream } from '../audio.js';
8
+ import { type LanguageCode, areLanguagesEquivalent, normalizeLanguage } from '../language.js';
8
9
  import { log } from '../log.js';
10
+ import { createStreamChannel } from '../stream/stream_channel.js';
9
11
  import {
10
12
  STT as BaseSTT,
11
13
  SpeechStream as BaseSpeechStream,
@@ -15,91 +17,178 @@ import {
15
17
  } from '../stt/index.js';
16
18
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
17
19
  import { type AudioBuffer, Event, Task, cancelAndWait, shortuuid, waitForAbort } from '../utils.js';
18
- import { type AnyModels, connectWs, createAccessToken } from './utils.js';
20
+ import { type TimedString, createTimedString } from '../voice/io.js';
21
+ import {
22
+ type SttServerEvent,
23
+ type SttTranscriptEvent,
24
+ sttServerEventSchema,
25
+ } from './api_protos.js';
26
+ import { type AnyString, connectWs, createAccessToken, getDefaultInferenceUrl } from './utils.js';
19
27
 
20
28
  export type DeepgramModels =
21
- | 'deepgram'
29
+ | 'deepgram/flux-general'
22
30
  | 'deepgram/nova-3'
23
- | 'deepgram/nova-3-general'
24
31
  | 'deepgram/nova-3-medical'
25
32
  | 'deepgram/nova-2'
26
- | 'deepgram/nova-2-general'
27
33
  | 'deepgram/nova-2-medical'
34
+ | 'deepgram/nova-2-conversationalai'
28
35
  | 'deepgram/nova-2-phonecall';
29
36
 
30
- export type CartesiaModels = 'cartesia' | 'cartesia/ink-whisper';
37
+ export type CartesiaModels = 'cartesia/ink-whisper';
38
+
39
+ export type AssemblyaiModels =
40
+ | 'assemblyai/universal-streaming'
41
+ | 'assemblyai/universal-streaming-multilingual';
31
42
 
32
- export type AssemblyaiModels = 'assemblyai' | 'assemblyai/universal-streaming';
43
+ export type ElevenlabsSTTModels = 'elevenlabs/scribe_v2_realtime';
33
44
 
34
45
  export interface CartesiaOptions {
35
- min_volume?: number; // default: not specified
36
- max_silence_duration_secs?: number; // default: not specified
46
+ /** Minimum volume threshold. Default: not specified. */
47
+ min_volume?: number;
48
+ /** Maximum silence duration in seconds. Default: not specified. */
49
+ max_silence_duration_secs?: number;
37
50
  }
38
51
 
39
52
  export interface DeepgramOptions {
40
- filler_words?: boolean; // default: true
41
- interim_results?: boolean; // default: true
42
- endpointing?: number; // default: 25 (ms)
43
- punctuate?: boolean; // default: false
53
+ /** Enable filler words. Default: true. */
54
+ filler_words?: boolean;
55
+ /** Enable interim results. Default: true. */
56
+ interim_results?: boolean;
57
+ /** Endpointing timeout in milliseconds. Default: 25. */
58
+ endpointing?: number;
59
+ /** Enable punctuation. Default: false. */
60
+ punctuate?: boolean;
61
+ /** Enable smart formatting. */
44
62
  smart_format?: boolean;
63
+ /** Keywords with boost values. */
45
64
  keywords?: Array<[string, number]>;
65
+ /** Key terms for recognition. */
46
66
  keyterms?: string[];
67
+ /** Enable profanity filter. */
47
68
  profanity_filter?: boolean;
69
+ /** Convert spoken numbers to numerals. */
48
70
  numerals?: boolean;
71
+ /** Opt out of model improvement program. */
49
72
  mip_opt_out?: boolean;
50
73
  }
51
74
 
52
- export interface AssemblyaiOptions {
53
- format_turns?: boolean; // default: false
54
- end_of_turn_confidence_threshold?: number; // default: 0.01
55
- min_end_of_turn_silence_when_confident?: number; // default: 0
56
- max_turn_silence?: number; // default: not specified
57
- keyterms_prompt?: string[]; // default: not specified
75
+ export interface AssemblyAIOptions {
76
+ /** Enable turn formatting. Default: false. */
77
+ format_turns?: boolean;
78
+ /** End of turn confidence threshold. Default: 0.01. */
79
+ end_of_turn_confidence_threshold?: number;
80
+ /** Minimum silence duration in milliseconds when confident about end of turn. Default: 0. */
81
+ min_end_of_turn_silence_when_confident?: number;
82
+ /** Maximum turn silence in milliseconds. Default: not specified. */
83
+ max_turn_silence?: number;
84
+ /** Key terms prompt for recognition. Default: not specified. */
85
+ keyterms_prompt?: string[];
58
86
  }
59
87
 
60
- export type STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels | AnyModels;
88
+ export type STTLanguages =
89
+ | 'multi'
90
+ | 'en'
91
+ | 'de'
92
+ | 'es'
93
+ | 'fr'
94
+ | 'ja'
95
+ | 'pt'
96
+ | 'zh'
97
+ | 'hi'
98
+ | AnyString;
99
+
100
+ type _STTModels = DeepgramModels | CartesiaModels | AssemblyaiModels | ElevenlabsSTTModels;
101
+
102
+ export type STTModels = _STTModels | 'auto' | AnyString;
103
+
104
+ export type ModelWithLanguage = `${_STTModels}:${STTLanguages}` | STTModels;
105
+
61
106
  export type STTOptions<TModel extends STTModels> = TModel extends DeepgramModels
62
107
  ? DeepgramOptions
63
108
  : TModel extends CartesiaModels
64
109
  ? CartesiaOptions
65
110
  : TModel extends AssemblyaiModels
66
- ? AssemblyaiOptions
111
+ ? AssemblyAIOptions
67
112
  : Record<string, unknown>;
68
113
 
69
- export type STTLanguages = 'en' | 'de' | 'es' | 'fr' | 'ja' | 'pt' | 'zh';
114
+ /** A fallback model with optional extra configuration. Extra fields are passed through to the provider. */
115
+ export interface STTFallbackModel {
116
+ /** Model name (e.g. "deepgram/nova-3", "assemblyai/universal-streaming", "cartesia/ink-whisper"). */
117
+ model: string;
118
+ /** Extra configuration for the model. */
119
+ extraKwargs?: Record<string, unknown>;
120
+ }
121
+
122
+ export type STTFallbackModelType = STTFallbackModel | string;
123
+
124
+ /** Parse a model string into [model, language]. Language is undefined if not specified. */
125
+ export function parseSTTModelString(model: string): [string, LanguageCode | undefined] {
126
+ const idx = model.lastIndexOf(':');
127
+ if (idx !== -1) {
128
+ return [model.slice(0, idx), normalizeLanguage(model.slice(idx + 1))];
129
+ }
130
+ return [model, undefined];
131
+ }
132
+
133
+ /** Normalize a single or list of FallbackModelType into STTFallbackModel[]. */
134
+ export function normalizeSTTFallback(
135
+ fallback: STTFallbackModelType | STTFallbackModelType[],
136
+ ): STTFallbackModel[] {
137
+ const makeFallback = (model: STTFallbackModelType): STTFallbackModel => {
138
+ if (typeof model === 'string') {
139
+ const [name] = parseSTTModelString(model);
140
+ return { model: name };
141
+ }
142
+ return model;
143
+ };
144
+
145
+ if (Array.isArray(fallback)) {
146
+ return fallback.map(makeFallback);
147
+ }
148
+ return [makeFallback(fallback)];
149
+ }
150
+
70
151
  export type STTEncoding = 'pcm_s16le';
71
152
 
72
153
  const DEFAULT_ENCODING: STTEncoding = 'pcm_s16le';
73
154
  const DEFAULT_SAMPLE_RATE = 16000;
74
- const DEFAULT_BASE_URL = 'wss://agent-gateway.livekit.cloud/v1';
75
155
  const DEFAULT_CANCEL_TIMEOUT = 5000;
76
156
 
77
157
  export interface InferenceSTTOptions<TModel extends STTModels> {
78
- model: TModel;
79
- language?: STTLanguages | string;
158
+ model?: TModel;
159
+ language?: LanguageCode;
80
160
  encoding: STTEncoding;
81
161
  sampleRate: number;
82
162
  baseURL: string;
83
163
  apiKey: string;
84
164
  apiSecret: string;
85
- extraKwargs: STTOptions<TModel>;
165
+ modelOptions: STTOptions<TModel>;
166
+ fallback?: STTFallbackModel[];
167
+ connOptions?: APIConnectOptions;
86
168
  }
87
169
 
170
+ /**
171
+ * Livekit Cloud Inference STT
172
+ */
88
173
  export class STT<TModel extends STTModels> extends BaseSTT {
89
174
  private opts: InferenceSTTOptions<TModel>;
90
175
  private streams: Set<SpeechStream<TModel>> = new Set();
91
176
 
92
- constructor(opts: {
93
- model: TModel;
94
- language?: STTLanguages | string;
177
+ #logger = log();
178
+
179
+ constructor(opts?: {
180
+ model?: ModelWithLanguage;
181
+ language?: STTLanguages;
95
182
  baseURL?: string;
96
183
  encoding?: STTEncoding;
97
184
  sampleRate?: number;
98
185
  apiKey?: string;
99
186
  apiSecret?: string;
100
- extraKwargs?: STTOptions<TModel>;
187
+ modelOptions?: STTOptions<TModel>;
188
+ fallback?: STTFallbackModelType | STTFallbackModelType[];
189
+ connOptions?: APIConnectOptions;
101
190
  }) {
102
- super({ streaming: true, interimResults: true });
191
+ super({ streaming: true, interimResults: true, alignedTranscript: 'word' });
103
192
 
104
193
  const {
105
194
  model,
@@ -109,10 +198,12 @@ export class STT<TModel extends STTModels> extends BaseSTT {
109
198
  sampleRate = DEFAULT_SAMPLE_RATE,
110
199
  apiKey,
111
200
  apiSecret,
112
- extraKwargs = {} as STTOptions<TModel>,
201
+ modelOptions = {} as STTOptions<TModel>,
202
+ fallback,
203
+ connOptions,
113
204
  } = opts || {};
114
205
 
115
- const lkBaseURL = baseURL || process.env.LIVEKIT_INFERENCE_URL || DEFAULT_BASE_URL;
206
+ const lkBaseURL = baseURL || getDefaultInferenceUrl();
116
207
  const lkApiKey = apiKey || process.env.LIVEKIT_INFERENCE_API_KEY || process.env.LIVEKIT_API_KEY;
117
208
  if (!lkApiKey) {
118
209
  throw new Error('apiKey is required: pass apiKey or set LIVEKIT_API_KEY');
@@ -124,15 +215,36 @@ export class STT<TModel extends STTModels> extends BaseSTT {
124
215
  throw new Error('apiSecret is required: pass apiSecret or set LIVEKIT_API_SECRET');
125
216
  }
126
217
 
218
+ // Parse language from model string if provided: "provider/model:language"
219
+ let nextModel = model;
220
+ let nextLanguage = language;
221
+ if (typeof nextModel === 'string') {
222
+ const [parsedModel, parsedLanguage] = parseSTTModelString(nextModel);
223
+ if (parsedLanguage !== undefined) {
224
+ if (nextLanguage && !areLanguagesEquivalent(nextLanguage, parsedLanguage)) {
225
+ this.#logger.warn(
226
+ '`language` is provided via both argument and model, using the one from the argument',
227
+ { language: nextLanguage, model: nextModel },
228
+ );
229
+ } else {
230
+ nextLanguage = parsedLanguage as STTLanguages;
231
+ }
232
+ nextModel = parsedModel as TModel;
233
+ }
234
+ }
235
+ const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;
236
+
127
237
  this.opts = {
128
- model,
129
- language,
238
+ model: nextModel as TModel,
239
+ language: nextLanguage ? normalizeLanguage(nextLanguage) : undefined,
130
240
  encoding,
131
241
  sampleRate,
132
242
  baseURL: lkBaseURL,
133
243
  apiKey: lkApiKey,
134
244
  apiSecret: lkApiSecret,
135
- extraKwargs,
245
+ modelOptions,
246
+ fallback: normalizedFallback,
247
+ connOptions: connOptions ?? DEFAULT_API_CONNECT_OPTIONS,
136
248
  };
137
249
  }
138
250
 
@@ -140,12 +252,29 @@ export class STT<TModel extends STTModels> extends BaseSTT {
140
252
  return 'inference.STT';
141
253
  }
142
254
 
255
+ get model(): string {
256
+ return this.opts.model ?? 'auto';
257
+ }
258
+
259
+ get provider(): string {
260
+ return 'livekit';
261
+ }
262
+
263
+ static fromModelString(modelString: string): STT<AnyString> {
264
+ const [model, language] = parseSTTModelString(modelString);
265
+ return new STT({ model, language });
266
+ }
267
+
143
268
  protected async _recognize(_: AudioBuffer): Promise<SpeechEvent> {
144
269
  throw new Error('LiveKit STT does not support batch recognition, use stream() instead');
145
270
  }
146
271
 
147
272
  updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
148
- this.opts = { ...this.opts, ...opts };
273
+ this.opts = {
274
+ ...this.opts,
275
+ ...opts,
276
+ language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
277
+ };
149
278
 
150
279
  for (const stream of this.streams) {
151
280
  stream.updateOptions(opts);
@@ -156,10 +285,11 @@ export class STT<TModel extends STTModels> extends BaseSTT {
156
285
  language?: STTLanguages | string;
157
286
  connOptions?: APIConnectOptions;
158
287
  }): SpeechStream<TModel> {
159
- const { language, connOptions = DEFAULT_API_CONNECT_OPTIONS } = options || {};
288
+ const { language, connOptions = this.opts.connOptions ?? DEFAULT_API_CONNECT_OPTIONS } =
289
+ options || {};
160
290
  const streamOpts = {
161
291
  ...this.opts,
162
- language: language ?? this.opts.language,
292
+ language: language !== undefined ? normalizeLanguage(language) : this.opts.language,
163
293
  } as InferenceSTTOptions<TModel>;
164
294
 
165
295
  const stream = new SpeechStream(this, streamOpts, connOptions);
@@ -167,6 +297,55 @@ export class STT<TModel extends STTModels> extends BaseSTT {
167
297
 
168
298
  return stream;
169
299
  }
300
+
301
+ async connectWs(timeout: number): Promise<WebSocket> {
302
+ const params = {
303
+ settings: {
304
+ sample_rate: String(this.opts.sampleRate),
305
+ encoding: this.opts.encoding,
306
+ extra: this.opts.modelOptions,
307
+ },
308
+ } as Record<string, unknown>;
309
+
310
+ if (this.opts.model && this.opts.model !== 'auto') {
311
+ params.model = this.opts.model;
312
+ }
313
+
314
+ if (this.opts.language) {
315
+ (params.settings as Record<string, unknown>).language = this.opts.language;
316
+ }
317
+
318
+ if (this.opts.fallback?.length) {
319
+ params.fallback = {
320
+ models: this.opts.fallback.map((m) => ({
321
+ model: m.model,
322
+ extra: m.extraKwargs ?? {},
323
+ })),
324
+ };
325
+ }
326
+
327
+ if (this.opts.connOptions) {
328
+ params.connection = {
329
+ timeout: this.opts.connOptions.timeoutMs / 1000,
330
+ retries: this.opts.connOptions.maxRetry,
331
+ };
332
+ }
333
+
334
+ let baseURL = this.opts.baseURL;
335
+ if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {
336
+ baseURL = baseURL.replace('http', 'ws');
337
+ }
338
+
339
+ const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);
340
+ const url = `${baseURL}/stt`;
341
+ const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;
342
+
343
+ const socket = await connectWs(url, headers, timeout);
344
+ const msg = { ...params, type: 'session.create' };
345
+ socket.send(JSON.stringify(msg));
346
+
347
+ return socket;
348
+ }
170
349
  }
171
350
 
172
351
  export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
@@ -175,6 +354,8 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
175
354
  private speaking = false;
176
355
  private speechDuration = 0;
177
356
  private reconnectEvent = new Event();
357
+ private stt: STT<TModel>;
358
+ private connOptions: APIConnectOptions;
178
359
 
179
360
  #logger = log();
180
361
 
@@ -185,6 +366,8 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
185
366
  ) {
186
367
  super(sttImpl, opts.sampleRate, connOptions);
187
368
  this.opts = opts;
369
+ this.stt = sttImpl;
370
+ this.connOptions = connOptions;
188
371
  }
189
372
 
190
373
  get label(): string {
@@ -192,223 +375,281 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
192
375
  }
193
376
 
194
377
  updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
195
- this.opts = { ...this.opts, ...opts };
378
+ this.opts = {
379
+ ...this.opts,
380
+ ...opts,
381
+ language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
382
+ };
383
+ this.reconnectEvent.set();
196
384
  }
197
385
 
198
386
  protected async run(): Promise<void> {
199
- let ws: WebSocket | null = null;
200
- let closingWs = false;
201
-
202
- this.reconnectEvent.set();
387
+ while (true) {
388
+ // Create fresh resources for each connection attempt
389
+ let ws: WebSocket | null = null;
390
+ let closing = false;
391
+ let finalReceived = false;
392
+
393
+ const eventChannel = createStreamChannel<SttServerEvent>();
394
+
395
+ const resourceCleanup = () => {
396
+ if (closing) return;
397
+ closing = true;
398
+ eventChannel.close();
399
+ ws?.removeAllListeners();
400
+ ws?.close();
401
+ };
402
+
403
+ const createWsListener = async (ws: WebSocket, signal: AbortSignal) => {
404
+ return new Promise<void>((resolve, reject) => {
405
+ const onAbort = () => {
406
+ resourceCleanup();
407
+ reject(new Error('WebSocket connection aborted'));
408
+ };
203
409
 
204
- const connect = async () => {
205
- const params = {
206
- settings: {
207
- sample_rate: String(this.opts.sampleRate),
208
- encoding: this.opts.encoding,
209
- extra: this.opts.extraKwargs,
210
- },
211
- } as Record<string, unknown>;
212
-
213
- if (this.opts.model) {
214
- params.model = this.opts.model;
215
- }
410
+ signal.addEventListener('abort', onAbort, { once: true });
216
411
 
217
- if (this.opts.language) {
218
- (params.settings as Record<string, unknown>).language = this.opts.language;
219
- }
412
+ ws.on('message', (data) => {
413
+ const json = JSON.parse(data.toString()) as SttServerEvent;
414
+ eventChannel.write(json);
415
+ });
220
416
 
221
- let baseURL = this.opts.baseURL;
222
- if (baseURL.startsWith('http://') || baseURL.startsWith('https://')) {
223
- baseURL = baseURL.replace('http', 'ws');
224
- }
417
+ ws.on('error', (e) => {
418
+ this.#logger.error({ error: e }, 'WebSocket error');
419
+ resourceCleanup();
420
+ reject(e);
421
+ });
225
422
 
226
- const token = await createAccessToken(this.opts.apiKey, this.opts.apiSecret);
227
- const url = `${baseURL}/stt`;
228
- const headers = { Authorization: `Bearer ${token}` } as Record<string, string>;
423
+ ws.on('close', (code: number) => {
424
+ resourceCleanup();
229
425
 
230
- const socket = await connectWs(url, headers, 10000);
231
- const msg = { ...params, type: 'session.create' };
232
- socket.send(JSON.stringify(msg));
426
+ if (!closing) return this.#logger.error('WebSocket closed unexpectedly');
427
+ if (finalReceived) return resolve();
233
428
 
234
- return socket;
235
- };
429
+ reject(
430
+ new APIStatusError({
431
+ message: 'LiveKit STT connection closed unexpectedly',
432
+ options: { statusCode: code },
433
+ }),
434
+ );
435
+ });
436
+ });
437
+ };
438
+
439
+ const send = async (socket: WebSocket, signal: AbortSignal) => {
440
+ const audioStream = new AudioByteStream(
441
+ this.opts.sampleRate,
442
+ 1,
443
+ Math.floor(this.opts.sampleRate / 20), // 50ms
444
+ );
445
+
446
+ // Create abort promise once to avoid memory leak
447
+ const abortPromise = new Promise<never>((_, reject) => {
448
+ if (signal.aborted) {
449
+ return reject(new Error('Send aborted'));
450
+ }
451
+ const onAbort = () => reject(new Error('Send aborted'));
452
+ signal.addEventListener('abort', onAbort, { once: true });
453
+ });
236
454
 
237
- const send = async (socket: WebSocket, signal: AbortSignal) => {
238
- const audioStream = new AudioByteStream(
239
- this.opts.sampleRate,
240
- 1,
241
- Math.floor(this.opts.sampleRate / 20), // 50ms
242
- );
455
+ // Manual iteration to support cancellation
456
+ const iterator = this.input[Symbol.asyncIterator]();
457
+ try {
458
+ while (true) {
459
+ const result = await Promise.race([iterator.next(), abortPromise]);
243
460
 
244
- for await (const ev of this.input) {
245
- if (signal.aborted) break;
246
- let frames: AudioFrame[];
461
+ if (result.done) break;
462
+ const ev = result.value;
247
463
 
248
- if (ev === SpeechStream.FLUSH_SENTINEL) {
249
- frames = audioStream.flush();
250
- } else {
251
- const frame = ev as AudioFrame;
252
- frames = audioStream.write(new Int16Array(frame.data).buffer);
253
- }
464
+ let frames: AudioFrame[];
465
+ if (ev === SpeechStream.FLUSH_SENTINEL) {
466
+ frames = audioStream.flush();
467
+ } else {
468
+ const frame = ev as AudioFrame;
469
+ frames = audioStream.write(new Int16Array(frame.data).buffer);
470
+ }
254
471
 
255
- for (const frame of frames) {
256
- this.speechDuration += frame.samplesPerChannel / frame.sampleRate;
257
- const base64 = Buffer.from(frame.data.buffer).toString('base64');
258
- const msg = { type: 'input_audio', audio: base64 };
259
- socket.send(JSON.stringify(msg));
472
+ for (const frame of frames) {
473
+ this.speechDuration += frame.samplesPerChannel / frame.sampleRate;
474
+ const base64 = Buffer.from(frame.data.buffer).toString('base64');
475
+ const msg = { type: 'input_audio', audio: base64 };
476
+ socket.send(JSON.stringify(msg));
477
+ }
478
+ }
479
+
480
+ closing = true;
481
+ socket.send(JSON.stringify({ type: 'session.finalize' }));
482
+ } catch (e) {
483
+ if ((e as Error).message === 'Send aborted') {
484
+ // Expected abort, don't log
485
+ return;
486
+ }
487
+ throw e;
260
488
  }
261
- }
489
+ };
262
490
 
263
- closingWs = true;
264
- socket.send(JSON.stringify({ type: 'session.finalize' }));
265
- };
491
+ const recv = async (signal: AbortSignal) => {
492
+ const serverEventStream = eventChannel.stream();
493
+ const reader = serverEventStream.getReader();
266
494
 
267
- const recv = async (socket: WebSocket, signal: AbortSignal) => {
268
- while (!this.closed && !signal.aborted) {
269
- const dataPromise = new Promise<string>((resolve, reject) => {
270
- const messageHandler = (d: RawData) => {
271
- resolve(d.toString());
272
- removeListeners();
273
- };
274
- const errorHandler = (e: Error) => {
275
- reject(e);
276
- removeListeners();
277
- };
278
- const closeHandler = (code: number) => {
279
- if (closingWs) {
280
- resolve('');
281
- } else {
282
- reject(
283
- new APIStatusError({
284
- message: 'LiveKit STT connection closed unexpectedly',
285
- options: { statusCode: code },
286
- }),
495
+ try {
496
+ while (!this.closed && !signal.aborted) {
497
+ const result = await reader.read();
498
+ if (signal.aborted) return;
499
+ if (result.done) return;
500
+
501
+ // Parse and validate with Zod schema
502
+ const parseResult = await sttServerEventSchema.safeParseAsync(result.value);
503
+ if (!parseResult.success) {
504
+ this.#logger.warn(
505
+ { error: parseResult.error, rawData: result.value },
506
+ 'Failed to parse STT server event',
287
507
  );
508
+ continue;
288
509
  }
289
- removeListeners();
290
- };
291
- const removeListeners = () => {
292
- socket.removeListener('message', messageHandler);
293
- socket.removeListener('error', errorHandler);
294
- socket.removeListener('close', closeHandler);
295
- };
296
- socket.once('message', messageHandler);
297
- socket.once('error', errorHandler);
298
- socket.once('close', closeHandler);
299
- });
300
510
 
301
- const data = await Promise.race([dataPromise, waitForAbort(signal)]);
302
-
303
- if (!data || signal.aborted) return;
304
-
305
- const json = JSON.parse(data);
306
- const type = json.type as string | undefined;
307
-
308
- switch (type) {
309
- case 'session.created':
310
- case 'session.finalized':
311
- case 'session.closed':
312
- break;
313
- case 'interim_transcript':
314
- this.processTranscript(json, false);
315
- break;
316
- case 'final_transcript':
317
- this.processTranscript(json, true);
318
- break;
319
- case 'error':
320
- this.#logger.error('received error from LiveKit STT: %o', json);
321
- throw new APIError(`LiveKit STT returned error: ${JSON.stringify(json)}`);
322
- default:
323
- this.#logger.warn('received unexpected message from LiveKit STT: %o', json);
324
- break;
511
+ const event: SttServerEvent = parseResult.data;
512
+
513
+ switch (event.type) {
514
+ case 'session.created':
515
+ case 'session.finalized':
516
+ break;
517
+ case 'session.closed':
518
+ finalReceived = true;
519
+ resourceCleanup();
520
+ break;
521
+ case 'interim_transcript':
522
+ this.processTranscript(event, false);
523
+ break;
524
+ case 'final_transcript':
525
+ this.processTranscript(event, true);
526
+ break;
527
+ case 'error':
528
+ this.#logger.error({ error: event }, 'Received error from LiveKit STT');
529
+ resourceCleanup();
530
+ throw new APIError(`LiveKit STT returned error: ${JSON.stringify(event)}`);
531
+ }
532
+ }
533
+ } finally {
534
+ reader.releaseLock();
535
+ try {
536
+ await serverEventStream.cancel();
537
+ } catch (e) {
538
+ this.#logger.debug('Error cancelling serverEventStream (may already be cancelled):', e);
539
+ }
325
540
  }
326
- }
327
- };
541
+ };
328
542
 
329
- while (true) {
330
543
  try {
331
- ws = await connect();
544
+ ws = await this.stt.connectWs(this.connOptions.timeoutMs);
332
545
 
333
- const sendTask = Task.from(async ({ signal }) => {
334
- await send(ws!, signal);
335
- });
336
-
337
- const recvTask = Task.from(async ({ signal }) => {
338
- await recv(ws!, signal);
339
- });
340
-
341
- const tasks = [sendTask, recvTask];
342
- const waitReconnectTask = Task.from(async ({ signal }) => {
343
- await Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]);
344
- });
546
+ const controller = this.abortController; // Use base class abortController for proper cancellation
547
+ const sendTask = Task.from(({ signal }) => send(ws!, signal), controller);
548
+ const wsListenerTask = Task.from(({ signal }) => createWsListener(ws!, signal), controller);
549
+ const recvTask = Task.from(({ signal }) => recv(signal), controller);
550
+ const waitReconnectTask = Task.from(
551
+ ({ signal }) => Promise.race([this.reconnectEvent.wait(), waitForAbort(signal)]),
552
+ controller,
553
+ );
345
554
 
346
555
  try {
347
556
  await Promise.race([
348
- Promise.all(tasks.map((task) => task.result)),
557
+ Promise.all([sendTask.result, wsListenerTask.result, recvTask.result]),
349
558
  waitReconnectTask.result,
350
559
  ]);
351
560
 
561
+ // If reconnect didn't trigger, tasks finished - exit loop
352
562
  if (!waitReconnectTask.done) break;
563
+
564
+ // Reconnect triggered - clear event and continue loop
353
565
  this.reconnectEvent.clear();
354
566
  } finally {
355
- await cancelAndWait([sendTask, recvTask, waitReconnectTask], DEFAULT_CANCEL_TIMEOUT);
567
+ // Cancel all tasks to ensure cleanup
568
+ await cancelAndWait(
569
+ [sendTask, wsListenerTask, recvTask, waitReconnectTask],
570
+ DEFAULT_CANCEL_TIMEOUT,
571
+ );
572
+ resourceCleanup();
356
573
  }
357
574
  } finally {
358
- try {
359
- if (ws) ws.close();
360
- } catch {}
575
+ // Ensure cleanup even if connectWs throws
576
+ resourceCleanup();
361
577
  }
362
578
  }
363
579
  }
364
580
 
365
- private processTranscript(data: Record<string, any>, isFinal: boolean) {
366
- const requestId = data.request_id ?? this.requestId;
367
- const text = data.transcript ?? '';
368
- const language = data.language ?? this.opts.language ?? 'en';
581
+ private processTranscript(data: SttTranscriptEvent, isFinal: boolean) {
582
+ // Check if queue is closed to avoid race condition during disconnect
583
+ if (this.queue.closed) return;
584
+
585
+ const requestId = data.session_id || this.requestId;
586
+ const text = data.transcript;
587
+ const language = normalizeLanguage(data.language || this.opts.language || 'en');
369
588
 
370
589
  if (!text && !isFinal) return;
371
590
 
372
- // We'll have a more accurate way of detecting when speech started when we have VAD
373
- if (!this.speaking) {
374
- this.speaking = true;
375
- this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
376
- }
591
+ try {
592
+ // We'll have a more accurate way of detecting when speech started when we have VAD
593
+ if (!this.speaking) {
594
+ this.speaking = true;
595
+ this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
596
+ }
377
597
 
378
- const speechData: SpeechData = {
379
- language,
380
- startTime: data.start ?? 0,
381
- endTime: data.duration ?? 0,
382
- confidence: data.confidence ?? 1.0,
383
- text,
384
- };
598
+ const speechData: SpeechData = {
599
+ language,
600
+ startTime: this.startTimeOffset + data.start,
601
+ endTime: this.startTimeOffset + data.start + data.duration,
602
+ confidence: data.confidence,
603
+ text,
604
+ words: data.words.map(
605
+ (word): TimedString =>
606
+ createTimedString({
607
+ text: word.word,
608
+ startTime: word.start + this.startTimeOffset,
609
+ endTime: word.end + this.startTimeOffset,
610
+ startTimeOffset: this.startTimeOffset,
611
+ confidence: word.confidence,
612
+ }),
613
+ ),
614
+ };
615
+
616
+ if (isFinal) {
617
+ if (this.speechDuration > 0) {
618
+ this.queue.put({
619
+ type: SpeechEventType.RECOGNITION_USAGE,
620
+ requestId,
621
+ recognitionUsage: { audioDuration: this.speechDuration },
622
+ });
623
+ this.speechDuration = 0;
624
+ }
385
625
 
386
- if (isFinal) {
387
- if (this.speechDuration > 0) {
388
626
  this.queue.put({
389
- type: SpeechEventType.RECOGNITION_USAGE,
627
+ type: SpeechEventType.FINAL_TRANSCRIPT,
390
628
  requestId,
391
- recognitionUsage: { audioDuration: this.speechDuration },
629
+ alternatives: [speechData],
392
630
  });
393
- this.speechDuration = 0;
394
- }
395
631
 
396
- this.queue.put({
397
- type: SpeechEventType.FINAL_TRANSCRIPT,
398
- requestId,
399
- alternatives: [speechData],
400
- });
401
-
402
- if (this.speaking) {
403
- this.speaking = false;
404
- this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
632
+ if (this.speaking) {
633
+ this.speaking = false;
634
+ this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
635
+ }
636
+ } else {
637
+ this.queue.put({
638
+ type: SpeechEventType.INTERIM_TRANSCRIPT,
639
+ requestId,
640
+ alternatives: [speechData],
641
+ });
642
+ }
643
+ } catch (e) {
644
+ if (e instanceof Error && e.message.includes('Queue is closed')) {
645
+ // Expected behavior on disconnect, log as warning
646
+ this.#logger.warn(
647
+ { err: e },
648
+ 'Queue closed during transcript processing (expected during disconnect)',
649
+ );
650
+ } else {
651
+ this.#logger.error({ err: e }, 'Error putting transcript to queue');
405
652
  }
406
- } else {
407
- this.queue.put({
408
- type: SpeechEventType.INTERIM_TRANSCRIPT,
409
- requestId,
410
- alternatives: [speechData],
411
- });
412
653
  }
413
654
  }
414
655
  }