@livekit/agents 0.7.9 → 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (627) hide show
  1. package/dist/_exceptions.cjs +109 -0
  2. package/dist/_exceptions.cjs.map +1 -0
  3. package/dist/_exceptions.d.cts +64 -0
  4. package/dist/_exceptions.d.ts +64 -0
  5. package/dist/_exceptions.d.ts.map +1 -0
  6. package/dist/_exceptions.js +80 -0
  7. package/dist/_exceptions.js.map +1 -0
  8. package/dist/audio.cjs +10 -3
  9. package/dist/audio.cjs.map +1 -1
  10. package/dist/audio.d.cts +2 -0
  11. package/dist/audio.d.ts +2 -0
  12. package/dist/audio.d.ts.map +1 -1
  13. package/dist/audio.js +8 -2
  14. package/dist/audio.js.map +1 -1
  15. package/dist/cli.cjs +25 -0
  16. package/dist/cli.cjs.map +1 -1
  17. package/dist/cli.d.ts.map +1 -1
  18. package/dist/cli.js +25 -0
  19. package/dist/cli.js.map +1 -1
  20. package/dist/constants.cjs +6 -3
  21. package/dist/constants.cjs.map +1 -1
  22. package/dist/constants.d.cts +2 -1
  23. package/dist/constants.d.ts +2 -1
  24. package/dist/constants.d.ts.map +1 -1
  25. package/dist/constants.js +4 -2
  26. package/dist/constants.js.map +1 -1
  27. package/dist/http_server.cjs.map +1 -1
  28. package/dist/http_server.d.cts +1 -0
  29. package/dist/http_server.d.ts +1 -0
  30. package/dist/http_server.d.ts.map +1 -1
  31. package/dist/http_server.js.map +1 -1
  32. package/dist/index.cjs +27 -20
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +13 -10
  35. package/dist/index.d.ts +13 -10
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +15 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/inference_runner.cjs +0 -1
  40. package/dist/inference_runner.cjs.map +1 -1
  41. package/dist/inference_runner.d.cts +2 -3
  42. package/dist/inference_runner.d.ts +2 -3
  43. package/dist/inference_runner.d.ts.map +1 -1
  44. package/dist/inference_runner.js +0 -1
  45. package/dist/inference_runner.js.map +1 -1
  46. package/dist/ipc/inference_proc_executor.cjs +2 -2
  47. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  48. package/dist/ipc/inference_proc_executor.js +2 -2
  49. package/dist/ipc/inference_proc_executor.js.map +1 -1
  50. package/dist/ipc/job_executor.cjs.map +1 -1
  51. package/dist/ipc/job_executor.js.map +1 -1
  52. package/dist/ipc/job_proc_executor.cjs +1 -0
  53. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  54. package/dist/ipc/job_proc_executor.js +1 -0
  55. package/dist/ipc/job_proc_executor.js.map +1 -1
  56. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  57. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  58. package/dist/ipc/job_proc_lazy_main.js +1 -1
  59. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  60. package/dist/ipc/supervised_proc.d.cts +1 -1
  61. package/dist/ipc/supervised_proc.d.ts +1 -1
  62. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  63. package/dist/job.cjs +14 -2
  64. package/dist/job.cjs.map +1 -1
  65. package/dist/job.d.cts +8 -0
  66. package/dist/job.d.ts +8 -0
  67. package/dist/job.d.ts.map +1 -1
  68. package/dist/job.js +12 -1
  69. package/dist/job.js.map +1 -1
  70. package/dist/llm/chat_context.cjs +332 -82
  71. package/dist/llm/chat_context.cjs.map +1 -1
  72. package/dist/llm/chat_context.d.cts +152 -48
  73. package/dist/llm/chat_context.d.ts +152 -48
  74. package/dist/llm/chat_context.d.ts.map +1 -1
  75. package/dist/llm/chat_context.js +327 -81
  76. package/dist/llm/chat_context.js.map +1 -1
  77. package/dist/llm/chat_context.test.cjs +380 -0
  78. package/dist/llm/chat_context.test.cjs.map +1 -0
  79. package/dist/llm/chat_context.test.js +385 -0
  80. package/dist/llm/chat_context.test.js.map +1 -0
  81. package/dist/llm/index.cjs +37 -8
  82. package/dist/llm/index.cjs.map +1 -1
  83. package/dist/llm/index.d.cts +7 -3
  84. package/dist/llm/index.d.ts +7 -3
  85. package/dist/llm/index.d.ts.map +1 -1
  86. package/dist/llm/index.js +39 -9
  87. package/dist/llm/index.js.map +1 -1
  88. package/dist/llm/llm.cjs +97 -33
  89. package/dist/llm/llm.cjs.map +1 -1
  90. package/dist/llm/llm.d.cts +50 -24
  91. package/dist/llm/llm.d.ts +50 -24
  92. package/dist/llm/llm.d.ts.map +1 -1
  93. package/dist/llm/llm.js +98 -33
  94. package/dist/llm/llm.js.map +1 -1
  95. package/dist/llm/provider_format/google.cjs +128 -0
  96. package/dist/llm/provider_format/google.cjs.map +1 -0
  97. package/dist/llm/provider_format/google.d.cts +6 -0
  98. package/dist/llm/provider_format/google.d.ts +6 -0
  99. package/dist/llm/provider_format/google.d.ts.map +1 -0
  100. package/dist/llm/provider_format/google.js +104 -0
  101. package/dist/llm/provider_format/google.js.map +1 -0
  102. package/dist/llm/provider_format/google.test.cjs +676 -0
  103. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  104. package/dist/llm/provider_format/google.test.js +675 -0
  105. package/dist/llm/provider_format/google.test.js.map +1 -0
  106. package/dist/llm/provider_format/index.cjs +40 -0
  107. package/dist/llm/provider_format/index.cjs.map +1 -0
  108. package/dist/llm/provider_format/index.d.cts +4 -0
  109. package/dist/llm/provider_format/index.d.ts +4 -0
  110. package/dist/llm/provider_format/index.d.ts.map +1 -0
  111. package/dist/llm/provider_format/index.js +16 -0
  112. package/dist/llm/provider_format/index.js.map +1 -0
  113. package/dist/llm/provider_format/openai.cjs +116 -0
  114. package/dist/llm/provider_format/openai.cjs.map +1 -0
  115. package/dist/llm/provider_format/openai.d.cts +3 -0
  116. package/dist/llm/provider_format/openai.d.ts +3 -0
  117. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  118. package/dist/llm/provider_format/openai.js +92 -0
  119. package/dist/llm/provider_format/openai.js.map +1 -0
  120. package/dist/llm/provider_format/openai.test.cjs +490 -0
  121. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  122. package/dist/llm/provider_format/openai.test.js +489 -0
  123. package/dist/llm/provider_format/openai.test.js.map +1 -0
  124. package/dist/llm/provider_format/utils.cjs +146 -0
  125. package/dist/llm/provider_format/utils.cjs.map +1 -0
  126. package/dist/llm/provider_format/utils.d.cts +38 -0
  127. package/dist/llm/provider_format/utils.d.ts +38 -0
  128. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  129. package/dist/llm/provider_format/utils.js +122 -0
  130. package/dist/llm/provider_format/utils.js.map +1 -0
  131. package/dist/llm/realtime.cjs +77 -0
  132. package/dist/llm/realtime.cjs.map +1 -0
  133. package/dist/llm/realtime.d.cts +98 -0
  134. package/dist/llm/realtime.d.ts +98 -0
  135. package/dist/llm/realtime.d.ts.map +1 -0
  136. package/dist/llm/realtime.js +52 -0
  137. package/dist/llm/realtime.js.map +1 -0
  138. package/dist/llm/remote_chat_context.cjs +112 -0
  139. package/dist/llm/remote_chat_context.cjs.map +1 -0
  140. package/dist/llm/remote_chat_context.d.cts +23 -0
  141. package/dist/llm/remote_chat_context.d.ts +23 -0
  142. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  143. package/dist/llm/remote_chat_context.js +88 -0
  144. package/dist/llm/remote_chat_context.js.map +1 -0
  145. package/dist/llm/remote_chat_context.test.cjs +225 -0
  146. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  147. package/dist/llm/remote_chat_context.test.js +224 -0
  148. package/dist/llm/remote_chat_context.test.js.map +1 -0
  149. package/dist/llm/tool_context.cjs +111 -0
  150. package/dist/llm/tool_context.cjs.map +1 -0
  151. package/dist/llm/tool_context.d.cts +125 -0
  152. package/dist/llm/tool_context.d.ts +125 -0
  153. package/dist/llm/tool_context.d.ts.map +1 -0
  154. package/dist/llm/tool_context.js +80 -0
  155. package/dist/llm/tool_context.js.map +1 -0
  156. package/dist/llm/tool_context.test.cjs +162 -0
  157. package/dist/llm/tool_context.test.cjs.map +1 -0
  158. package/dist/llm/tool_context.test.js +161 -0
  159. package/dist/llm/tool_context.test.js.map +1 -0
  160. package/dist/llm/tool_context.type.test.cjs +92 -0
  161. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  162. package/dist/llm/tool_context.type.test.js +91 -0
  163. package/dist/llm/tool_context.type.test.js.map +1 -0
  164. package/dist/llm/utils.cjs +260 -0
  165. package/dist/llm/utils.cjs.map +1 -0
  166. package/dist/llm/utils.d.cts +42 -0
  167. package/dist/llm/utils.d.ts +42 -0
  168. package/dist/llm/utils.d.ts.map +1 -0
  169. package/dist/llm/utils.js +223 -0
  170. package/dist/llm/utils.js.map +1 -0
  171. package/dist/llm/utils.test.cjs +513 -0
  172. package/dist/llm/utils.test.cjs.map +1 -0
  173. package/dist/llm/utils.test.js +490 -0
  174. package/dist/llm/utils.test.js.map +1 -0
  175. package/dist/metrics/base.cjs +0 -27
  176. package/dist/metrics/base.cjs.map +1 -1
  177. package/dist/metrics/base.d.cts +105 -63
  178. package/dist/metrics/base.d.ts +105 -63
  179. package/dist/metrics/base.d.ts.map +1 -1
  180. package/dist/metrics/base.js +0 -19
  181. package/dist/metrics/base.js.map +1 -1
  182. package/dist/metrics/index.cjs +0 -3
  183. package/dist/metrics/index.cjs.map +1 -1
  184. package/dist/metrics/index.d.cts +2 -3
  185. package/dist/metrics/index.d.ts +2 -3
  186. package/dist/metrics/index.d.ts.map +1 -1
  187. package/dist/metrics/index.js +0 -2
  188. package/dist/metrics/index.js.map +1 -1
  189. package/dist/metrics/usage_collector.cjs +17 -12
  190. package/dist/metrics/usage_collector.cjs.map +1 -1
  191. package/dist/metrics/usage_collector.d.cts +3 -2
  192. package/dist/metrics/usage_collector.d.ts +3 -2
  193. package/dist/metrics/usage_collector.d.ts.map +1 -1
  194. package/dist/metrics/usage_collector.js +17 -12
  195. package/dist/metrics/usage_collector.js.map +1 -1
  196. package/dist/metrics/utils.cjs +22 -59
  197. package/dist/metrics/utils.cjs.map +1 -1
  198. package/dist/metrics/utils.d.cts +1 -8
  199. package/dist/metrics/utils.d.ts +1 -8
  200. package/dist/metrics/utils.d.ts.map +1 -1
  201. package/dist/metrics/utils.js +22 -52
  202. package/dist/metrics/utils.js.map +1 -1
  203. package/dist/multimodal/index.cjs +0 -2
  204. package/dist/multimodal/index.cjs.map +1 -1
  205. package/dist/multimodal/index.d.cts +0 -1
  206. package/dist/multimodal/index.d.ts +0 -1
  207. package/dist/multimodal/index.d.ts.map +1 -1
  208. package/dist/multimodal/index.js +0 -1
  209. package/dist/multimodal/index.js.map +1 -1
  210. package/dist/plugin.cjs +24 -8
  211. package/dist/plugin.cjs.map +1 -1
  212. package/dist/plugin.d.cts +18 -4
  213. package/dist/plugin.d.ts +18 -4
  214. package/dist/plugin.d.ts.map +1 -1
  215. package/dist/plugin.js +22 -7
  216. package/dist/plugin.js.map +1 -1
  217. package/dist/stream/deferred_stream.cjs +98 -0
  218. package/dist/stream/deferred_stream.cjs.map +1 -0
  219. package/dist/stream/deferred_stream.d.cts +27 -0
  220. package/dist/stream/deferred_stream.d.ts +27 -0
  221. package/dist/stream/deferred_stream.d.ts.map +1 -0
  222. package/dist/stream/deferred_stream.js +73 -0
  223. package/dist/stream/deferred_stream.js.map +1 -0
  224. package/dist/stream/deferred_stream.test.cjs +527 -0
  225. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  226. package/dist/stream/deferred_stream.test.js +526 -0
  227. package/dist/stream/deferred_stream.test.js.map +1 -0
  228. package/dist/stream/identity_transform.cjs +42 -0
  229. package/dist/stream/identity_transform.cjs.map +1 -0
  230. package/dist/stream/identity_transform.d.cts +6 -0
  231. package/dist/stream/identity_transform.d.ts +6 -0
  232. package/dist/stream/identity_transform.d.ts.map +1 -0
  233. package/dist/stream/identity_transform.js +18 -0
  234. package/dist/stream/identity_transform.js.map +1 -0
  235. package/dist/stream/identity_transform.test.cjs +125 -0
  236. package/dist/stream/identity_transform.test.cjs.map +1 -0
  237. package/dist/stream/identity_transform.test.js +124 -0
  238. package/dist/stream/identity_transform.test.js.map +1 -0
  239. package/dist/stream/index.cjs +38 -0
  240. package/dist/stream/index.cjs.map +1 -0
  241. package/dist/stream/index.d.cts +5 -0
  242. package/dist/stream/index.d.ts +5 -0
  243. package/dist/stream/index.d.ts.map +1 -0
  244. package/dist/stream/index.js +11 -0
  245. package/dist/stream/index.js.map +1 -0
  246. package/dist/stream/merge_readable_streams.cjs +59 -0
  247. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  248. package/dist/stream/merge_readable_streams.d.cts +4 -0
  249. package/dist/stream/merge_readable_streams.d.ts +4 -0
  250. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  251. package/dist/stream/merge_readable_streams.js +35 -0
  252. package/dist/stream/merge_readable_streams.js.map +1 -0
  253. package/dist/stream/stream_channel.cjs +47 -0
  254. package/dist/stream/stream_channel.cjs.map +1 -0
  255. package/dist/stream/stream_channel.d.cts +9 -0
  256. package/dist/stream/stream_channel.d.ts +9 -0
  257. package/dist/stream/stream_channel.d.ts.map +1 -0
  258. package/dist/stream/stream_channel.js +23 -0
  259. package/dist/stream/stream_channel.js.map +1 -0
  260. package/dist/stream/stream_channel.test.cjs +97 -0
  261. package/dist/stream/stream_channel.test.cjs.map +1 -0
  262. package/dist/stream/stream_channel.test.js +96 -0
  263. package/dist/stream/stream_channel.test.js.map +1 -0
  264. package/dist/stt/stream_adapter.cjs +3 -4
  265. package/dist/stt/stream_adapter.cjs.map +1 -1
  266. package/dist/stt/stream_adapter.d.cts +1 -0
  267. package/dist/stt/stream_adapter.d.ts +1 -0
  268. package/dist/stt/stream_adapter.d.ts.map +1 -1
  269. package/dist/stt/stream_adapter.js +3 -4
  270. package/dist/stt/stream_adapter.js.map +1 -1
  271. package/dist/stt/stt.cjs +100 -10
  272. package/dist/stt/stt.cjs.map +1 -1
  273. package/dist/stt/stt.d.cts +26 -5
  274. package/dist/stt/stt.d.ts +26 -5
  275. package/dist/stt/stt.d.ts.map +1 -1
  276. package/dist/stt/stt.js +101 -11
  277. package/dist/stt/stt.js.map +1 -1
  278. package/dist/tokenize/basic/basic.cjs +10 -5
  279. package/dist/tokenize/basic/basic.cjs.map +1 -1
  280. package/dist/tokenize/basic/basic.d.cts +7 -1
  281. package/dist/tokenize/basic/basic.d.ts +7 -1
  282. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  283. package/dist/tokenize/basic/basic.js +10 -5
  284. package/dist/tokenize/basic/basic.js.map +1 -1
  285. package/dist/tokenize/basic/sentence.cjs +14 -6
  286. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  287. package/dist/tokenize/basic/sentence.d.cts +1 -1
  288. package/dist/tokenize/basic/sentence.d.ts +1 -1
  289. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  290. package/dist/tokenize/basic/sentence.js +14 -6
  291. package/dist/tokenize/basic/sentence.js.map +1 -1
  292. package/dist/tokenize/token_stream.cjs +5 -3
  293. package/dist/tokenize/token_stream.cjs.map +1 -1
  294. package/dist/tokenize/token_stream.d.cts +1 -0
  295. package/dist/tokenize/token_stream.d.ts +1 -0
  296. package/dist/tokenize/token_stream.d.ts.map +1 -1
  297. package/dist/tokenize/token_stream.js +6 -4
  298. package/dist/tokenize/token_stream.js.map +1 -1
  299. package/dist/transcription.cjs +1 -2
  300. package/dist/transcription.cjs.map +1 -1
  301. package/dist/transcription.d.ts.map +1 -1
  302. package/dist/transcription.js +2 -3
  303. package/dist/transcription.js.map +1 -1
  304. package/dist/tts/index.cjs +2 -4
  305. package/dist/tts/index.cjs.map +1 -1
  306. package/dist/tts/index.d.cts +1 -1
  307. package/dist/tts/index.d.ts +1 -1
  308. package/dist/tts/index.d.ts.map +1 -1
  309. package/dist/tts/index.js +1 -3
  310. package/dist/tts/index.js.map +1 -1
  311. package/dist/tts/stream_adapter.cjs +26 -13
  312. package/dist/tts/stream_adapter.cjs.map +1 -1
  313. package/dist/tts/stream_adapter.d.cts +1 -1
  314. package/dist/tts/stream_adapter.d.ts +1 -1
  315. package/dist/tts/stream_adapter.d.ts.map +1 -1
  316. package/dist/tts/stream_adapter.js +27 -14
  317. package/dist/tts/stream_adapter.js.map +1 -1
  318. package/dist/tts/tts.cjs +156 -25
  319. package/dist/tts/tts.cjs.map +1 -1
  320. package/dist/tts/tts.d.cts +29 -5
  321. package/dist/tts/tts.d.ts +29 -5
  322. package/dist/tts/tts.d.ts.map +1 -1
  323. package/dist/tts/tts.js +156 -24
  324. package/dist/tts/tts.js.map +1 -1
  325. package/dist/types.cjs +60 -0
  326. package/dist/types.cjs.map +1 -0
  327. package/dist/types.d.cts +13 -0
  328. package/dist/types.d.ts +13 -0
  329. package/dist/types.d.ts.map +1 -0
  330. package/dist/types.js +35 -0
  331. package/dist/types.js.map +1 -0
  332. package/dist/utils.cjs +298 -27
  333. package/dist/utils.cjs.map +1 -1
  334. package/dist/utils.d.cts +145 -9
  335. package/dist/utils.d.ts +145 -9
  336. package/dist/utils.d.ts.map +1 -1
  337. package/dist/utils.js +281 -26
  338. package/dist/utils.js.map +1 -1
  339. package/dist/utils.test.cjs +491 -0
  340. package/dist/utils.test.cjs.map +1 -0
  341. package/dist/utils.test.js +498 -0
  342. package/dist/utils.test.js.map +1 -0
  343. package/dist/vad.cjs +76 -20
  344. package/dist/vad.cjs.map +1 -1
  345. package/dist/vad.d.cts +25 -5
  346. package/dist/vad.d.ts +25 -5
  347. package/dist/vad.d.ts.map +1 -1
  348. package/dist/vad.js +76 -20
  349. package/dist/vad.js.map +1 -1
  350. package/dist/voice/agent.cjs +245 -0
  351. package/dist/voice/agent.cjs.map +1 -0
  352. package/dist/voice/agent.d.cts +78 -0
  353. package/dist/voice/agent.d.ts +78 -0
  354. package/dist/voice/agent.d.ts.map +1 -0
  355. package/dist/voice/agent.js +220 -0
  356. package/dist/voice/agent.js.map +1 -0
  357. package/dist/voice/agent.test.cjs +61 -0
  358. package/dist/voice/agent.test.cjs.map +1 -0
  359. package/dist/voice/agent.test.js +60 -0
  360. package/dist/voice/agent.test.js.map +1 -0
  361. package/dist/voice/agent_activity.cjs +1453 -0
  362. package/dist/voice/agent_activity.cjs.map +1 -0
  363. package/dist/voice/agent_activity.d.cts +94 -0
  364. package/dist/voice/agent_activity.d.ts +94 -0
  365. package/dist/voice/agent_activity.d.ts.map +1 -0
  366. package/dist/voice/agent_activity.js +1449 -0
  367. package/dist/voice/agent_activity.js.map +1 -0
  368. package/dist/voice/agent_session.cjs +312 -0
  369. package/dist/voice/agent_session.cjs.map +1 -0
  370. package/dist/voice/agent_session.d.cts +121 -0
  371. package/dist/voice/agent_session.d.ts +121 -0
  372. package/dist/voice/agent_session.d.ts.map +1 -0
  373. package/dist/voice/agent_session.js +295 -0
  374. package/dist/voice/agent_session.js.map +1 -0
  375. package/dist/voice/audio_recognition.cjs +374 -0
  376. package/dist/voice/audio_recognition.cjs.map +1 -0
  377. package/dist/voice/audio_recognition.d.cts +80 -0
  378. package/dist/voice/audio_recognition.d.ts +80 -0
  379. package/dist/voice/audio_recognition.d.ts.map +1 -0
  380. package/dist/voice/audio_recognition.js +350 -0
  381. package/dist/voice/audio_recognition.js.map +1 -0
  382. package/dist/voice/events.cjs +145 -0
  383. package/dist/voice/events.cjs.map +1 -0
  384. package/dist/voice/events.d.cts +124 -0
  385. package/dist/voice/events.d.ts +124 -0
  386. package/dist/voice/events.d.ts.map +1 -0
  387. package/dist/voice/events.js +110 -0
  388. package/dist/voice/events.js.map +1 -0
  389. package/dist/voice/generation.cjs +700 -0
  390. package/dist/voice/generation.cjs.map +1 -0
  391. package/dist/voice/generation.d.cts +115 -0
  392. package/dist/voice/generation.d.ts +115 -0
  393. package/dist/voice/generation.d.ts.map +1 -0
  394. package/dist/voice/generation.js +672 -0
  395. package/dist/voice/generation.js.map +1 -0
  396. package/dist/voice/index.cjs +40 -0
  397. package/dist/voice/index.cjs.map +1 -0
  398. package/dist/voice/index.d.cts +5 -0
  399. package/dist/voice/index.d.ts +5 -0
  400. package/dist/voice/index.d.ts.map +1 -0
  401. package/dist/voice/index.js +11 -0
  402. package/dist/voice/index.js.map +1 -0
  403. package/dist/voice/io.cjs +245 -0
  404. package/dist/voice/io.cjs.map +1 -0
  405. package/dist/voice/io.d.cts +101 -0
  406. package/dist/voice/io.d.ts +101 -0
  407. package/dist/voice/io.d.ts.map +1 -0
  408. package/dist/voice/io.js +217 -0
  409. package/dist/voice/io.js.map +1 -0
  410. package/dist/voice/room_io/_input.cjs +121 -0
  411. package/dist/voice/room_io/_input.cjs.map +1 -0
  412. package/dist/voice/room_io/_input.d.cts +24 -0
  413. package/dist/voice/room_io/_input.d.ts +24 -0
  414. package/dist/voice/room_io/_input.d.ts.map +1 -0
  415. package/dist/voice/room_io/_input.js +102 -0
  416. package/dist/voice/room_io/_input.js.map +1 -0
  417. package/dist/voice/room_io/_output.cjs +358 -0
  418. package/dist/voice/room_io/_output.cjs.map +1 -0
  419. package/dist/voice/room_io/_output.d.cts +75 -0
  420. package/dist/voice/room_io/_output.d.ts +75 -0
  421. package/dist/voice/room_io/_output.d.ts.map +1 -0
  422. package/dist/voice/room_io/_output.js +342 -0
  423. package/dist/voice/room_io/_output.js.map +1 -0
  424. package/dist/voice/room_io/index.cjs +25 -0
  425. package/dist/voice/room_io/index.cjs.map +1 -0
  426. package/dist/voice/room_io/index.d.cts +3 -0
  427. package/dist/voice/room_io/index.d.ts +3 -0
  428. package/dist/voice/room_io/index.d.ts.map +1 -0
  429. package/dist/voice/room_io/index.js +3 -0
  430. package/dist/voice/room_io/index.js.map +1 -0
  431. package/dist/voice/room_io/room_io.cjs +370 -0
  432. package/dist/voice/room_io/room_io.cjs.map +1 -0
  433. package/dist/voice/room_io/room_io.d.cts +73 -0
  434. package/dist/voice/room_io/room_io.d.ts +73 -0
  435. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  436. package/dist/voice/room_io/room_io.js +361 -0
  437. package/dist/voice/room_io/room_io.js.map +1 -0
  438. package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
  439. package/dist/voice/run_context.cjs.map +1 -0
  440. package/dist/voice/run_context.d.cts +12 -0
  441. package/dist/voice/run_context.d.ts +12 -0
  442. package/dist/voice/run_context.d.ts.map +1 -0
  443. package/dist/voice/run_context.js +14 -0
  444. package/dist/voice/run_context.js.map +1 -0
  445. package/dist/voice/speech_handle.cjs +105 -0
  446. package/dist/voice/speech_handle.cjs.map +1 -0
  447. package/dist/voice/speech_handle.d.cts +46 -0
  448. package/dist/voice/speech_handle.d.ts +46 -0
  449. package/dist/voice/speech_handle.d.ts.map +1 -0
  450. package/dist/voice/speech_handle.js +81 -0
  451. package/dist/voice/speech_handle.js.map +1 -0
  452. package/dist/voice/transcription/_utils.cjs +45 -0
  453. package/dist/voice/transcription/_utils.cjs.map +1 -0
  454. package/dist/voice/transcription/_utils.d.cts +3 -0
  455. package/dist/voice/transcription/_utils.d.ts +3 -0
  456. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  457. package/dist/voice/transcription/_utils.js +21 -0
  458. package/dist/voice/transcription/_utils.js.map +1 -0
  459. package/dist/voice/transcription/index.cjs +23 -0
  460. package/dist/voice/transcription/index.cjs.map +1 -0
  461. package/dist/voice/transcription/index.d.cts +2 -0
  462. package/dist/voice/transcription/index.d.ts +2 -0
  463. package/dist/voice/transcription/index.d.ts.map +1 -0
  464. package/dist/voice/transcription/index.js +2 -0
  465. package/dist/voice/transcription/index.js.map +1 -0
  466. package/dist/voice/transcription/synchronizer.cjs +379 -0
  467. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  468. package/dist/voice/transcription/synchronizer.d.cts +86 -0
  469. package/dist/voice/transcription/synchronizer.d.ts +86 -0
  470. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  471. package/dist/voice/transcription/synchronizer.js +354 -0
  472. package/dist/voice/transcription/synchronizer.js.map +1 -0
  473. package/dist/worker.cjs +22 -4
  474. package/dist/worker.cjs.map +1 -1
  475. package/dist/worker.d.cts +1 -1
  476. package/dist/worker.d.ts +1 -1
  477. package/dist/worker.d.ts.map +1 -1
  478. package/dist/worker.js +22 -4
  479. package/dist/worker.js.map +1 -1
  480. package/package.json +8 -2
  481. package/src/_exceptions.ts +137 -0
  482. package/src/audio.ts +12 -1
  483. package/src/cli.ts +37 -0
  484. package/src/constants.ts +2 -1
  485. package/src/http_server.ts +1 -0
  486. package/src/index.ts +13 -10
  487. package/src/inference_runner.ts +2 -3
  488. package/src/ipc/inference_proc_executor.ts +2 -2
  489. package/src/ipc/job_executor.ts +1 -1
  490. package/src/ipc/job_proc_executor.ts +1 -1
  491. package/src/ipc/job_proc_lazy_main.ts +1 -1
  492. package/src/job.ts +18 -0
  493. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  494. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  495. package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
  496. package/src/llm/chat_context.test.ts +450 -0
  497. package/src/llm/chat_context.ts +501 -103
  498. package/src/llm/index.ts +53 -18
  499. package/src/llm/llm.ts +148 -50
  500. package/src/llm/provider_format/google.test.ts +772 -0
  501. package/src/llm/provider_format/google.ts +130 -0
  502. package/src/llm/provider_format/index.ts +23 -0
  503. package/src/llm/provider_format/openai.test.ts +581 -0
  504. package/src/llm/provider_format/openai.ts +118 -0
  505. package/src/llm/provider_format/utils.ts +183 -0
  506. package/src/llm/realtime.ts +151 -0
  507. package/src/llm/remote_chat_context.test.ts +290 -0
  508. package/src/llm/remote_chat_context.ts +114 -0
  509. package/src/llm/tool_context.test.ts +198 -0
  510. package/src/llm/tool_context.ts +259 -0
  511. package/src/llm/tool_context.type.test.ts +115 -0
  512. package/src/llm/utils.test.ts +670 -0
  513. package/src/llm/utils.ts +324 -0
  514. package/src/metrics/base.ts +110 -78
  515. package/src/metrics/index.ts +3 -9
  516. package/src/metrics/usage_collector.ts +19 -13
  517. package/src/metrics/utils.ts +24 -69
  518. package/src/multimodal/index.ts +0 -1
  519. package/src/plugin.ts +26 -8
  520. package/src/stream/deferred_stream.test.ts +755 -0
  521. package/src/stream/deferred_stream.ts +110 -0
  522. package/src/stream/identity_transform.test.ts +179 -0
  523. package/src/stream/identity_transform.ts +18 -0
  524. package/src/stream/index.ts +7 -0
  525. package/src/stream/merge_readable_streams.ts +40 -0
  526. package/src/stream/stream_channel.test.ts +129 -0
  527. package/src/stream/stream_channel.ts +32 -0
  528. package/src/stt/stream_adapter.ts +3 -5
  529. package/src/stt/stt.ts +134 -17
  530. package/src/tokenize/basic/basic.ts +13 -5
  531. package/src/tokenize/basic/sentence.ts +20 -6
  532. package/src/tokenize/token_stream.ts +7 -4
  533. package/src/transcription.ts +2 -3
  534. package/src/tts/index.ts +0 -1
  535. package/src/tts/stream_adapter.ts +42 -16
  536. package/src/tts/tts.ts +202 -21
  537. package/src/types.ts +42 -0
  538. package/src/utils.test.ts +658 -0
  539. package/src/utils.ts +402 -44
  540. package/src/vad.ts +90 -22
  541. package/src/voice/agent.test.ts +80 -0
  542. package/src/voice/agent.ts +332 -0
  543. package/src/voice/agent_activity.ts +1913 -0
  544. package/src/voice/agent_session.ts +460 -0
  545. package/src/voice/audio_recognition.ts +473 -0
  546. package/src/voice/events.ts +252 -0
  547. package/src/voice/generation.ts +881 -0
  548. package/src/voice/index.ts +7 -0
  549. package/src/voice/io.ts +304 -0
  550. package/src/voice/room_io/_input.ts +144 -0
  551. package/src/voice/room_io/_output.ts +436 -0
  552. package/src/voice/room_io/index.ts +5 -0
  553. package/src/voice/room_io/room_io.ts +495 -0
  554. package/src/voice/run_context.ts +20 -0
  555. package/src/voice/speech_handle.ts +104 -0
  556. package/src/voice/transcription/_utils.ts +25 -0
  557. package/src/voice/transcription/index.ts +4 -0
  558. package/src/voice/transcription/synchronizer.ts +477 -0
  559. package/src/worker.ts +22 -2
  560. package/dist/llm/function_context.cjs +0 -103
  561. package/dist/llm/function_context.cjs.map +0 -1
  562. package/dist/llm/function_context.d.cts +0 -47
  563. package/dist/llm/function_context.d.ts +0 -47
  564. package/dist/llm/function_context.d.ts.map +0 -1
  565. package/dist/llm/function_context.js +0 -78
  566. package/dist/llm/function_context.js.map +0 -1
  567. package/dist/llm/function_context.test.cjs +0 -218
  568. package/dist/llm/function_context.test.cjs.map +0 -1
  569. package/dist/llm/function_context.test.js +0 -217
  570. package/dist/llm/function_context.test.js.map +0 -1
  571. package/dist/multimodal/multimodal_agent.cjs +0 -486
  572. package/dist/multimodal/multimodal_agent.cjs.map +0 -1
  573. package/dist/multimodal/multimodal_agent.d.cts +0 -48
  574. package/dist/multimodal/multimodal_agent.d.ts +0 -48
  575. package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
  576. package/dist/multimodal/multimodal_agent.js +0 -461
  577. package/dist/multimodal/multimodal_agent.js.map +0 -1
  578. package/dist/pipeline/agent_output.cjs +0 -197
  579. package/dist/pipeline/agent_output.cjs.map +0 -1
  580. package/dist/pipeline/agent_output.d.cts +0 -33
  581. package/dist/pipeline/agent_output.d.ts +0 -33
  582. package/dist/pipeline/agent_output.d.ts.map +0 -1
  583. package/dist/pipeline/agent_output.js +0 -172
  584. package/dist/pipeline/agent_output.js.map +0 -1
  585. package/dist/pipeline/agent_playout.cjs +0 -175
  586. package/dist/pipeline/agent_playout.cjs.map +0 -1
  587. package/dist/pipeline/agent_playout.d.cts +0 -40
  588. package/dist/pipeline/agent_playout.d.ts +0 -40
  589. package/dist/pipeline/agent_playout.d.ts.map +0 -1
  590. package/dist/pipeline/agent_playout.js +0 -139
  591. package/dist/pipeline/agent_playout.js.map +0 -1
  592. package/dist/pipeline/human_input.cjs +0 -171
  593. package/dist/pipeline/human_input.cjs.map +0 -1
  594. package/dist/pipeline/human_input.d.cts +0 -30
  595. package/dist/pipeline/human_input.d.ts +0 -30
  596. package/dist/pipeline/human_input.d.ts.map +0 -1
  597. package/dist/pipeline/human_input.js +0 -146
  598. package/dist/pipeline/human_input.js.map +0 -1
  599. package/dist/pipeline/index.cjs.map +0 -1
  600. package/dist/pipeline/index.d.cts +0 -2
  601. package/dist/pipeline/index.d.ts +0 -2
  602. package/dist/pipeline/index.d.ts.map +0 -1
  603. package/dist/pipeline/index.js +0 -11
  604. package/dist/pipeline/index.js.map +0 -1
  605. package/dist/pipeline/pipeline_agent.cjs +0 -859
  606. package/dist/pipeline/pipeline_agent.cjs.map +0 -1
  607. package/dist/pipeline/pipeline_agent.d.cts +0 -150
  608. package/dist/pipeline/pipeline_agent.d.ts +0 -150
  609. package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
  610. package/dist/pipeline/pipeline_agent.js +0 -837
  611. package/dist/pipeline/pipeline_agent.js.map +0 -1
  612. package/dist/pipeline/speech_handle.cjs +0 -176
  613. package/dist/pipeline/speech_handle.cjs.map +0 -1
  614. package/dist/pipeline/speech_handle.d.cts +0 -37
  615. package/dist/pipeline/speech_handle.d.ts +0 -37
  616. package/dist/pipeline/speech_handle.d.ts.map +0 -1
  617. package/dist/pipeline/speech_handle.js +0 -152
  618. package/dist/pipeline/speech_handle.js.map +0 -1
  619. package/src/llm/function_context.test.ts +0 -248
  620. package/src/llm/function_context.ts +0 -142
  621. package/src/multimodal/multimodal_agent.ts +0 -592
  622. package/src/pipeline/agent_output.ts +0 -219
  623. package/src/pipeline/agent_playout.ts +0 -192
  624. package/src/pipeline/human_input.ts +0 -188
  625. package/src/pipeline/index.ts +0 -15
  626. package/src/pipeline/pipeline_agent.ts +0 -1197
  627. package/src/pipeline/speech_handle.ts +0 -201
@@ -0,0 +1,495 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import {
5
+ ConnectionState,
6
+ DisconnectReason,
7
+ type NoiseCancellationOptions,
8
+ type Participant,
9
+ ParticipantKind,
10
+ type RemoteParticipant,
11
+ type Room,
12
+ RoomEvent,
13
+ type TextStreamInfo,
14
+ type TextStreamReader,
15
+ TrackPublishOptions,
16
+ TrackSource,
17
+ } from '@livekit/rtc-node';
18
+ import type { WritableStreamDefaultWriter } from 'node:stream/web';
19
+ import { ATTRIBUTE_PUBLISH_ON_BEHALF, TOPIC_CHAT } from '../../constants.js';
20
+ import { log } from '../../log.js';
21
+ import { IdentityTransform } from '../../stream/identity_transform.js';
22
+ import { Future, Task } from '../../utils.js';
23
+ import { type AgentSession } from '../agent_session.js';
24
+ import {
25
+ AgentSessionEventTypes,
26
+ type AgentStateChangedEvent,
27
+ CloseReason,
28
+ type UserInputTranscribedEvent,
29
+ } from '../events.js';
30
+ import type { AudioOutput, TextOutput } from '../io.js';
31
+ import { TranscriptionSynchronizer } from '../transcription/synchronizer.js';
32
+ import { ParticipantAudioInputStream } from './_input.js';
33
+ import {
34
+ ParalellTextOutput,
35
+ ParticipantAudioOutput,
36
+ ParticipantLegacyTranscriptionOutput,
37
+ ParticipantTranscriptionOutput,
38
+ } from './_output.js';
39
+
40
+ export interface TextInputEvent {
41
+ text: string;
42
+ info: TextStreamInfo;
43
+ participant: RemoteParticipant;
44
+ }
45
+
46
+ export type TextInputCallback = (sess: AgentSession, ev: TextInputEvent) => void | Promise<void>;
47
+
48
+ const DEFAULT_TEXT_INPUT_CALLBACK: TextInputCallback = (sess: AgentSession, ev: TextInputEvent) => {
49
+ sess.interrupt();
50
+ sess.generateReply({ userInput: ev.text });
51
+ };
52
+
53
+ const DEFAULT_PARTICIPANT_KINDS: ParticipantKind[] = [
54
+ ParticipantKind.SIP,
55
+ ParticipantKind.STANDARD,
56
+ ];
57
+
58
+ const CLOSE_ON_DISCONNECT_REASONS: DisconnectReason[] = [
59
+ DisconnectReason.CLIENT_INITIATED,
60
+ DisconnectReason.ROOM_DELETED,
61
+ DisconnectReason.USER_REJECTED,
62
+ ];
63
+
64
+ export interface RoomInputOptions {
65
+ audioSampleRate: number;
66
+ audioNumChannels: number;
67
+ textEnabled: boolean;
68
+ audioEnabled: boolean;
69
+ videoEnabled: boolean;
70
+ participantIdentity?: string;
71
+ noiseCancellation?: NoiseCancellationOptions;
72
+ textInputCallback?: TextInputCallback;
73
+ participantKinds?: ParticipantKind[];
74
+ closeOnDisconnect: boolean;
75
+ }
76
+
77
+ export interface RoomOutputOptions {
78
+ transcriptionEnabled: boolean;
79
+ audioEnabled: boolean;
80
+ audioSampleRate: number;
81
+ audioNumChannels: number;
82
+ syncTranscription: boolean;
83
+ audioPublishOptions: TrackPublishOptions;
84
+ }
85
+
86
+ const DEFAULT_ROOM_INPUT_OPTIONS: RoomInputOptions = {
87
+ audioSampleRate: 24000,
88
+ audioNumChannels: 1,
89
+ textEnabled: true,
90
+ audioEnabled: true,
91
+ videoEnabled: false,
92
+ textInputCallback: DEFAULT_TEXT_INPUT_CALLBACK,
93
+ closeOnDisconnect: true,
94
+ };
95
+
96
+ const DEFAULT_ROOM_OUTPUT_OPTIONS: RoomOutputOptions = {
97
+ audioSampleRate: 24000,
98
+ audioNumChannels: 1,
99
+ transcriptionEnabled: true,
100
+ audioEnabled: true,
101
+ syncTranscription: true,
102
+ audioPublishOptions: new TrackPublishOptions({ source: TrackSource.SOURCE_MICROPHONE }),
103
+ };
104
+
105
+ export class RoomIO {
106
+ private agentSession: AgentSession;
107
+ private room: Room;
108
+ private inputOptions: RoomInputOptions;
109
+ private outputOptions: RoomOutputOptions;
110
+
111
+ private audioInput?: ParticipantAudioInputStream;
112
+ private participantAudioOutput?: ParticipantAudioOutput;
113
+ private userTranscriptOutput?: ParalellTextOutput;
114
+ private agentTranscriptOutput?: ParalellTextOutput;
115
+ private transcriptionSynchronizer?: TranscriptionSynchronizer;
116
+ private participantIdentity: string | null = null;
117
+
118
+ private participantAvailableFuture: Future<RemoteParticipant> = new Future();
119
+ private roomConnectedFuture: Future<void> = new Future();
120
+
121
+ // Use stream API for transcript queue
122
+ private userTranscriptStream = new IdentityTransform<UserInputTranscribedEvent>();
123
+ private userTranscriptWriter: WritableStreamDefaultWriter<UserInputTranscribedEvent>;
124
+ private forwardUserTranscriptTask?: Task<void>;
125
+ private initTask?: Task<void>;
126
+
127
+ private textStreamHandlerRegistered = false;
128
+
129
+ private logger = log();
130
+
131
+ constructor({
132
+ agentSession,
133
+ room,
134
+ participant = null,
135
+ inputOptions,
136
+ outputOptions,
137
+ }: {
138
+ agentSession: AgentSession;
139
+ room: Room;
140
+ participant?: RemoteParticipant | string | null;
141
+ inputOptions?: Partial<RoomInputOptions>;
142
+ outputOptions?: Partial<RoomOutputOptions>;
143
+ }) {
144
+ this.agentSession = agentSession;
145
+ this.room = room;
146
+ this.inputOptions = { ...DEFAULT_ROOM_INPUT_OPTIONS, ...inputOptions };
147
+ this.outputOptions = { ...DEFAULT_ROOM_OUTPUT_OPTIONS, ...outputOptions };
148
+
149
+ this.userTranscriptWriter = this.userTranscriptStream.writable.getWriter();
150
+
151
+ this.participantIdentity = participant
152
+ ? typeof participant === 'string'
153
+ ? participant
154
+ : participant.identity
155
+ : this.inputOptions.participantIdentity ?? null;
156
+ }
157
+ private async init(signal: AbortSignal): Promise<void> {
158
+ await this.roomConnectedFuture.await;
159
+
160
+ for (const participant of this.room.remoteParticipants.values()) {
161
+ this.onParticipantConnected(participant);
162
+ }
163
+ if (signal.aborted) {
164
+ return;
165
+ }
166
+
167
+ const participant = await this.participantAvailableFuture.await;
168
+ this.setParticipant(participant.identity);
169
+
170
+ // init agent outputs
171
+ this.updateTranscriptionOutput({
172
+ output: this.agentTranscriptOutput,
173
+ participant: this.room.localParticipant?.identity ?? null,
174
+ });
175
+
176
+ await this.participantAudioOutput?.start(signal);
177
+ }
178
+
179
+ private onConnectionStateChanged = (state: ConnectionState) => {
180
+ this.logger.debug({ state }, 'connection state changed');
181
+ if (
182
+ state === ConnectionState.CONN_CONNECTED &&
183
+ this.room.isConnected &&
184
+ !this.roomConnectedFuture.done
185
+ ) {
186
+ this.roomConnectedFuture.resolve();
187
+ }
188
+ };
189
+
190
+ private onParticipantConnected = (participant: RemoteParticipant) => {
191
+ if (this.participantAvailableFuture.done) {
192
+ return;
193
+ }
194
+
195
+ if (this.participantIdentity) {
196
+ if (participant.identity !== this.participantIdentity) {
197
+ return;
198
+ }
199
+ } else if (
200
+ // otherwise, skip participants that are marked as publishing for this agent
201
+ participant.attributes?.[ATTRIBUTE_PUBLISH_ON_BEHALF] === this.room.localParticipant?.identity
202
+ ) {
203
+ return;
204
+ }
205
+
206
+ const acceptedKinds = this.inputOptions.participantKinds ?? DEFAULT_PARTICIPANT_KINDS;
207
+ if (participant.info.kind !== undefined && !acceptedKinds.includes(participant.info.kind)) {
208
+ return;
209
+ }
210
+
211
+ this.participantAvailableFuture.resolve(participant);
212
+ };
213
+
214
+ private onParticipantDisconnected = (participant: RemoteParticipant) => {
215
+ if (participant.identity !== this.participantIdentity) {
216
+ return;
217
+ }
218
+ this.participantAvailableFuture = new Future<RemoteParticipant>();
219
+ if (
220
+ this.inputOptions.closeOnDisconnect &&
221
+ participant.disconnectReason &&
222
+ CLOSE_ON_DISCONNECT_REASONS.includes(participant.disconnectReason)
223
+ ) {
224
+ this.logger.info(
225
+ {
226
+ participant: participant.identity,
227
+ reason: DisconnectReason[participant.disconnectReason],
228
+ },
229
+ 'closing agent session due to participant disconnect ' +
230
+ '(disable via `RoomInputOptions.closeOnDisconnect=False`)',
231
+ );
232
+ this.agentSession._closeSoon({
233
+ reason: CloseReason.PARTICIPANT_DISCONNECTED,
234
+ });
235
+ }
236
+ };
237
+
238
+ private onUserInputTranscribed = (ev: UserInputTranscribedEvent) => {
239
+ this.userTranscriptWriter.write(ev).catch((error) => {
240
+ this.logger.error({ error }, 'Failed to write transcript event to stream');
241
+ });
242
+ };
243
+
244
+ private onAgentStateChanged = async (ev: AgentStateChangedEvent) => {
245
+ if (this.room.isConnected && this.room.localParticipant) {
246
+ await this.room.localParticipant.setAttributes({
247
+ [`lk.agent.state`]: ev.newState,
248
+ });
249
+ }
250
+ };
251
+
252
+ private onUserTextInput = (reader: TextStreamReader, participantInfo: { identity: string }) => {
253
+ if (participantInfo.identity !== this.participantIdentity) {
254
+ return;
255
+ }
256
+
257
+ const participant = this.room.remoteParticipants.get(participantInfo.identity);
258
+ if (!participant) {
259
+ this.logger.warn('participant not found, ignoring text input');
260
+ return;
261
+ }
262
+
263
+ const readText = async () => {
264
+ const text = await reader.readAll();
265
+
266
+ const textInputResult = this.inputOptions.textInputCallback!(this.agentSession, {
267
+ text,
268
+ info: reader.info,
269
+ participant,
270
+ });
271
+
272
+ // check if callback is a Promise
273
+ if (textInputResult instanceof Promise) {
274
+ await textInputResult;
275
+ }
276
+ };
277
+
278
+ readText().catch((error) => {
279
+ this.logger.error({ error }, 'Error reading text input');
280
+ });
281
+ };
282
+
283
+ private async forwardUserTranscript(signal: AbortSignal): Promise<void> {
284
+ const reader = this.userTranscriptStream.readable.getReader();
285
+ try {
286
+ while (!signal.aborted) {
287
+ const { done, value } = await reader.read();
288
+ if (done) break;
289
+
290
+ const event = value;
291
+ // IMPORTANT: need to await here to avoid race condition
292
+ await this.userTranscriptOutput?.captureText(event.transcript);
293
+ if (event.isFinal) {
294
+ this.userTranscriptOutput?.flush();
295
+ }
296
+ }
297
+ } catch (error) {
298
+ this.logger.error({ error }, 'Error processing transcript stream');
299
+ }
300
+ }
301
+
302
+ private createTranscriptionOutput(options: {
303
+ isDeltaStream: boolean;
304
+ participant: Participant | string | null;
305
+ }) {
306
+ return new ParalellTextOutput([
307
+ new ParticipantLegacyTranscriptionOutput(
308
+ this.room,
309
+ options.isDeltaStream,
310
+ options.participant,
311
+ ),
312
+ new ParticipantTranscriptionOutput(this.room, options.isDeltaStream, options.participant),
313
+ ]);
314
+ }
315
+
316
+ private updateTranscriptionOutput({
317
+ output,
318
+ participant,
319
+ }: {
320
+ output?: ParalellTextOutput;
321
+ participant: string | null;
322
+ }) {
323
+ if (!output) {
324
+ return;
325
+ }
326
+
327
+ for (const sink of output._sinks) {
328
+ if (
329
+ sink instanceof ParticipantLegacyTranscriptionOutput ||
330
+ sink instanceof ParticipantTranscriptionOutput
331
+ ) {
332
+ sink.setParticipant(participant);
333
+ }
334
+ }
335
+ }
336
+
337
+ get audioOutput(): AudioOutput | undefined {
338
+ if (!this.transcriptionSynchronizer) {
339
+ return this.participantAudioOutput;
340
+ }
341
+
342
+ return this.transcriptionSynchronizer.audioOutput;
343
+ }
344
+
345
+ get transcriptionOutput(): TextOutput | undefined {
346
+ if (!this.transcriptionSynchronizer) {
347
+ return this.agentTranscriptOutput;
348
+ }
349
+
350
+ return this.transcriptionSynchronizer.textOutput;
351
+ }
352
+
353
+ /* Switch to a different participant */
354
+ setParticipant(participantIdentity: string | null) {
355
+ this.logger.debug({ participantIdentity }, 'setting participant');
356
+ if (participantIdentity === null) {
357
+ this.unsetParticipant();
358
+ return;
359
+ }
360
+
361
+ if (this.participantIdentity !== participantIdentity) {
362
+ this.participantAvailableFuture = new Future<RemoteParticipant>();
363
+
364
+ // check if new participant is already connected
365
+ for (const participant of this.room.remoteParticipants.values()) {
366
+ if (participant.identity === participantIdentity) {
367
+ this.participantAvailableFuture.resolve(participant);
368
+ break;
369
+ }
370
+ }
371
+ }
372
+
373
+ // update participant identity and handlers
374
+ this.participantIdentity = participantIdentity;
375
+ this.audioInput?.setParticipant(participantIdentity);
376
+ this.updateTranscriptionOutput({
377
+ output: this.userTranscriptOutput,
378
+ participant: participantIdentity,
379
+ });
380
+ }
381
+
382
+ unsetParticipant() {
383
+ this.participantIdentity = null;
384
+ this.participantAvailableFuture = new Future<RemoteParticipant>();
385
+ this.audioInput?.setParticipant(null);
386
+ this.updateTranscriptionOutput({
387
+ output: this.userTranscriptOutput,
388
+ participant: null,
389
+ });
390
+ }
391
+
392
+ start() {
393
+ if (this.inputOptions.textEnabled) {
394
+ try {
395
+ this.room.registerTextStreamHandler(TOPIC_CHAT, this.onUserTextInput);
396
+ this.textStreamHandlerRegistered = true;
397
+ } catch (error) {
398
+ if (this.inputOptions.textEnabled) {
399
+ this.logger.warn(`text stream handler for topic "${TOPIC_CHAT}" already set, ignoring`);
400
+ }
401
+ }
402
+ }
403
+
404
+ // -- create inputs --
405
+ if (this.inputOptions.audioEnabled) {
406
+ this.audioInput = new ParticipantAudioInputStream({
407
+ room: this.room,
408
+ sampleRate: this.inputOptions.audioSampleRate,
409
+ numChannels: this.inputOptions.audioNumChannels,
410
+ noiseCancellation: this.inputOptions.noiseCancellation,
411
+ });
412
+ }
413
+
414
+ // -- create outputs --
415
+ if (this.outputOptions.audioEnabled) {
416
+ this.participantAudioOutput = new ParticipantAudioOutput(this.room, {
417
+ sampleRate: this.outputOptions.audioSampleRate,
418
+ numChannels: this.outputOptions.audioNumChannels,
419
+ trackPublishOptions: this.outputOptions.audioPublishOptions,
420
+ });
421
+ }
422
+ if (this.outputOptions.transcriptionEnabled) {
423
+ this.userTranscriptOutput = this.createTranscriptionOutput({
424
+ isDeltaStream: false,
425
+ participant: this.participantIdentity,
426
+ });
427
+ // Start the transcript forwarding
428
+ this.forwardUserTranscriptTask = Task.from((controller) =>
429
+ this.forwardUserTranscript(controller.signal),
430
+ );
431
+ this.agentTranscriptOutput = this.createTranscriptionOutput({
432
+ isDeltaStream: true,
433
+ participant: null,
434
+ });
435
+
436
+ // use the RoomIO's audio output if available, otherwise use the agent's audio output
437
+ // TODO(AJS-176): check for agent output
438
+ const audioOutput = this.participantAudioOutput;
439
+ if (this.outputOptions.syncTranscription && audioOutput) {
440
+ this.transcriptionSynchronizer = new TranscriptionSynchronizer(
441
+ audioOutput,
442
+ this.agentTranscriptOutput,
443
+ );
444
+ }
445
+ }
446
+
447
+ // -- set the room event handlers --
448
+ this.room.on(RoomEvent.ParticipantConnected, this.onParticipantConnected);
449
+ this.room.on(RoomEvent.ConnectionStateChanged, this.onConnectionStateChanged);
450
+ this.room.on(RoomEvent.ParticipantDisconnected, this.onParticipantDisconnected);
451
+ if (this.room.isConnected) {
452
+ this.onConnectionStateChanged(ConnectionState.CONN_CONNECTED);
453
+ }
454
+
455
+ this.initTask = Task.from((controller) => this.init(controller.signal));
456
+
457
+ // -- attatch the agent to the session --
458
+ if (this.audioInput) {
459
+ this.agentSession.input.audio = this.audioInput;
460
+ }
461
+ if (this.audioOutput) {
462
+ this.agentSession.output.audio = this.audioOutput;
463
+ }
464
+ if (this.transcriptionOutput) {
465
+ this.agentSession.output.transcription = this.transcriptionOutput;
466
+ }
467
+
468
+ this.agentSession.on(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
469
+ this.agentSession.on(AgentSessionEventTypes.UserInputTranscribed, this.onUserInputTranscribed);
470
+ }
471
+
472
+ async close() {
473
+ this.room.off(RoomEvent.ParticipantConnected, this.onParticipantConnected);
474
+ this.room.off(RoomEvent.ConnectionStateChanged, this.onConnectionStateChanged);
475
+ this.room.off(RoomEvent.ParticipantDisconnected, this.onParticipantDisconnected);
476
+ this.agentSession.off(AgentSessionEventTypes.UserInputTranscribed, this.onUserInputTranscribed);
477
+ this.agentSession.off(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
478
+
479
+ if (this.textStreamHandlerRegistered) {
480
+ this.room.unregisterTextStreamHandler(TOPIC_CHAT);
481
+ this.textStreamHandlerRegistered = false;
482
+ }
483
+
484
+ await this.initTask?.cancelAndWait();
485
+
486
+ // Close stream FIRST so reader.read() in forwardUserTranscript can exit.
487
+ // This is a workaround for a race condition in the stream API.
488
+ this.userTranscriptWriter.close();
489
+ await this.forwardUserTranscriptTask?.cancelAndWait();
490
+
491
+ await this.audioInput?.close();
492
+ await this.participantAudioOutput?.close();
493
+ await this.transcriptionSynchronizer?.close();
494
+ }
495
+ }
@@ -0,0 +1,20 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { FunctionCall } from '../llm/chat_context.js';
5
+ import type { AgentSession } from './agent_session.js';
6
+ import type { SpeechHandle } from './speech_handle.js';
7
+
8
+ export type UnknownUserData = unknown;
9
+
10
+ export class RunContext<UserData = UnknownUserData> {
11
+ constructor(
12
+ public readonly session: AgentSession<UserData>,
13
+ public readonly speechHandle: SpeechHandle,
14
+ public readonly functionCall: FunctionCall,
15
+ ) {}
16
+
17
+ get userData(): UserData {
18
+ return this.session.userData;
19
+ }
20
+ }
@@ -0,0 +1,104 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { ChatMessage } from '../llm/index.js';
5
+ import { Future, shortuuid } from '../utils.js';
6
+
7
+ export class SpeechHandle {
8
+ /** Priority for messages that should be played after all other messages in the queue */
9
+ static SPEECH_PRIORITY_LOW = 0;
10
+ /** Every speech generates by the VoiceAgent defaults to this priority. */
11
+ static SPEECH_PRIORITY_NORMAL = 5;
12
+ /** Priority for important messages that should be played before others. */
13
+ static SPEECH_PRIORITY_HIGH = 10;
14
+
15
+ private interruptFut = new Future();
16
+ private authorizeFut = new Future();
17
+ private playoutDoneFut = new Future();
18
+
19
+ private _chatMessage?: ChatMessage;
20
+
21
+ constructor(
22
+ readonly id: string,
23
+ readonly allowInterruptions: boolean,
24
+ readonly stepIndex: number,
25
+ readonly parent?: SpeechHandle,
26
+ ) {}
27
+
28
+ static create(options: {
29
+ allowInterruptions?: boolean;
30
+ stepIndex?: number;
31
+ parent?: SpeechHandle;
32
+ }) {
33
+ const { allowInterruptions = false, stepIndex = 0, parent } = options ?? {};
34
+
35
+ return new SpeechHandle(shortuuid('speech_'), allowInterruptions, stepIndex, parent);
36
+ }
37
+
38
+ get interrupted(): boolean {
39
+ return this.interruptFut.done;
40
+ }
41
+
42
+ get done(): boolean {
43
+ return this.playoutDoneFut.done;
44
+ }
45
+
46
+ get chatMessage(): ChatMessage | undefined {
47
+ return this._chatMessage;
48
+ }
49
+
50
+ /**
51
+ * Interrupt the current speech generation.
52
+ *
53
+ * @throws Error If this speech handle does not allow interruptions.
54
+ *
55
+ * @returns The same speech handle that was interrupted.
56
+ */
57
+ interrupt(): SpeechHandle {
58
+ if (!this.allowInterruptions) {
59
+ throw new Error('interruptions are not allowed');
60
+ }
61
+
62
+ if (this.done) return this;
63
+
64
+ this.interruptFut.resolve();
65
+ return this;
66
+ }
67
+
68
+ then(callback: (sh: SpeechHandle) => void) {
69
+ return this.playoutDoneFut.await.finally(() => callback(this));
70
+ }
71
+
72
+ async waitForPlayout() {
73
+ return this.playoutDoneFut.await;
74
+ }
75
+
76
+ async waitIfNotInterrupted(aw: Promise<unknown>[]): Promise<void> {
77
+ const allTasksPromise = Promise.all(aw);
78
+ const fs: Promise<unknown>[] = [allTasksPromise, this.interruptFut.await];
79
+ await Promise.race(fs);
80
+ }
81
+
82
+ /** @internal */
83
+ _setChatMessage(chatMessage: ChatMessage) {
84
+ if (this.done) {
85
+ throw new Error('cannot set chat message after speech has been played');
86
+ }
87
+ this._chatMessage = chatMessage;
88
+ }
89
+
90
+ /** @internal */
91
+ _authorizePlayout() {
92
+ this.authorizeFut.resolve();
93
+ }
94
+
95
+ /** @internal */
96
+ async _waitForAuthorization() {
97
+ return this.authorizeFut.await;
98
+ }
99
+
100
+ /** @internal */
101
+ _markPlayoutDone() {
102
+ this.playoutDoneFut.resolve();
103
+ }
104
+ }
@@ -0,0 +1,25 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { LocalParticipant, Participant, Room } from '@livekit/rtc-node';
5
+ import { TrackSource } from '@livekit/rtc-node';
6
+
7
+ export function findMicrophoneTrackId(room: Room, identity: string): string {
8
+ let p: Participant | LocalParticipant | null = room.remoteParticipants.get(identity) ?? null;
9
+ if (identity === room.localParticipant?.identity) {
10
+ p = room.localParticipant;
11
+ }
12
+
13
+ if (p === null) {
14
+ throw new Error(`Participant ${identity} not found`);
15
+ }
16
+
17
+ for (const track of p.trackPublications.values()) {
18
+ if (track.source === TrackSource.SOURCE_MICROPHONE && track.sid) {
19
+ // find the first microphone track
20
+ return track.sid;
21
+ }
22
+ }
23
+
24
+ throw new Error(`Participant ${identity} does not have a microphone track`);
25
+ }
@@ -0,0 +1,4 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ export * from './_utils.js';