@livekit/agents 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (959) hide show
  1. package/dist/_exceptions.cjs.map +1 -1
  2. package/dist/_exceptions.d.ts.map +1 -1
  3. package/dist/_exceptions.js.map +1 -1
  4. package/dist/audio.cjs +89 -3
  5. package/dist/audio.cjs.map +1 -1
  6. package/dist/audio.d.cts +36 -1
  7. package/dist/audio.d.ts +36 -1
  8. package/dist/audio.d.ts.map +1 -1
  9. package/dist/audio.js +76 -2
  10. package/dist/audio.js.map +1 -1
  11. package/dist/beta/index.cjs +29 -0
  12. package/dist/beta/index.cjs.map +1 -0
  13. package/dist/beta/index.d.cts +2 -0
  14. package/dist/beta/index.d.ts +2 -0
  15. package/dist/beta/index.d.ts.map +1 -0
  16. package/dist/beta/index.js +7 -0
  17. package/dist/beta/index.js.map +1 -0
  18. package/dist/beta/workflows/index.cjs +29 -0
  19. package/dist/beta/workflows/index.cjs.map +1 -0
  20. package/dist/beta/workflows/index.d.cts +2 -0
  21. package/dist/beta/workflows/index.d.ts +2 -0
  22. package/dist/beta/workflows/index.d.ts.map +1 -0
  23. package/dist/beta/workflows/index.js +7 -0
  24. package/dist/beta/workflows/index.js.map +1 -0
  25. package/dist/beta/workflows/task_group.cjs +165 -0
  26. package/dist/beta/workflows/task_group.cjs.map +1 -0
  27. package/dist/beta/workflows/task_group.d.cts +32 -0
  28. package/dist/beta/workflows/task_group.d.ts +32 -0
  29. package/dist/beta/workflows/task_group.d.ts.map +1 -0
  30. package/dist/beta/workflows/task_group.js +141 -0
  31. package/dist/beta/workflows/task_group.js.map +1 -0
  32. package/dist/cli.cjs +44 -46
  33. package/dist/cli.cjs.map +1 -1
  34. package/dist/cli.d.cts +3 -3
  35. package/dist/cli.d.ts +3 -3
  36. package/dist/cli.d.ts.map +1 -1
  37. package/dist/cli.js +45 -47
  38. package/dist/cli.js.map +1 -1
  39. package/dist/connection_pool.cjs +242 -0
  40. package/dist/connection_pool.cjs.map +1 -0
  41. package/dist/connection_pool.d.cts +123 -0
  42. package/dist/connection_pool.d.ts +123 -0
  43. package/dist/connection_pool.d.ts.map +1 -0
  44. package/dist/connection_pool.js +218 -0
  45. package/dist/connection_pool.js.map +1 -0
  46. package/dist/connection_pool.test.cjs +256 -0
  47. package/dist/connection_pool.test.cjs.map +1 -0
  48. package/dist/connection_pool.test.js +255 -0
  49. package/dist/connection_pool.test.js.map +1 -0
  50. package/dist/constants.cjs +30 -0
  51. package/dist/constants.cjs.map +1 -1
  52. package/dist/constants.d.cts +10 -0
  53. package/dist/constants.d.ts +10 -0
  54. package/dist/constants.d.ts.map +1 -1
  55. package/dist/constants.js +20 -0
  56. package/dist/constants.js.map +1 -1
  57. package/dist/cpu.cjs +189 -0
  58. package/dist/cpu.cjs.map +1 -0
  59. package/dist/cpu.d.cts +24 -0
  60. package/dist/cpu.d.ts +24 -0
  61. package/dist/cpu.d.ts.map +1 -0
  62. package/dist/cpu.js +152 -0
  63. package/dist/cpu.js.map +1 -0
  64. package/dist/cpu.test.cjs +227 -0
  65. package/dist/cpu.test.cjs.map +1 -0
  66. package/dist/cpu.test.js +204 -0
  67. package/dist/cpu.test.js.map +1 -0
  68. package/dist/http_server.cjs +9 -6
  69. package/dist/http_server.cjs.map +1 -1
  70. package/dist/http_server.d.cts +5 -1
  71. package/dist/http_server.d.ts +5 -1
  72. package/dist/http_server.d.ts.map +1 -1
  73. package/dist/http_server.js +9 -6
  74. package/dist/http_server.js.map +1 -1
  75. package/dist/index.cjs +24 -9
  76. package/dist/index.cjs.map +1 -1
  77. package/dist/index.d.cts +15 -11
  78. package/dist/index.d.ts +15 -11
  79. package/dist/index.d.ts.map +1 -1
  80. package/dist/index.js +18 -9
  81. package/dist/index.js.map +1 -1
  82. package/dist/inference/api_protos.cjs +70 -2
  83. package/dist/inference/api_protos.cjs.map +1 -1
  84. package/dist/inference/api_protos.d.cts +373 -32
  85. package/dist/inference/api_protos.d.ts +373 -32
  86. package/dist/inference/api_protos.d.ts.map +1 -1
  87. package/dist/inference/api_protos.js +62 -2
  88. package/dist/inference/api_protos.js.map +1 -1
  89. package/dist/inference/index.cjs +8 -0
  90. package/dist/inference/index.cjs.map +1 -1
  91. package/dist/inference/index.d.cts +3 -4
  92. package/dist/inference/index.d.ts +3 -4
  93. package/dist/inference/index.d.ts.map +1 -1
  94. package/dist/inference/index.js +18 -3
  95. package/dist/inference/index.js.map +1 -1
  96. package/dist/inference/interruption/defaults.cjs +81 -0
  97. package/dist/inference/interruption/defaults.cjs.map +1 -0
  98. package/dist/inference/interruption/defaults.d.cts +19 -0
  99. package/dist/inference/interruption/defaults.d.ts +19 -0
  100. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  101. package/dist/inference/interruption/defaults.js +46 -0
  102. package/dist/inference/interruption/defaults.js.map +1 -0
  103. package/dist/inference/interruption/errors.cjs +44 -0
  104. package/dist/inference/interruption/errors.cjs.map +1 -0
  105. package/dist/inference/interruption/errors.d.cts +12 -0
  106. package/dist/inference/interruption/errors.d.ts +12 -0
  107. package/dist/inference/interruption/errors.d.ts.map +1 -0
  108. package/dist/inference/interruption/errors.js +20 -0
  109. package/dist/inference/interruption/errors.js.map +1 -0
  110. package/dist/inference/interruption/http_transport.cjs +163 -0
  111. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  112. package/dist/inference/interruption/http_transport.d.cts +65 -0
  113. package/dist/inference/interruption/http_transport.d.ts +65 -0
  114. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  115. package/dist/inference/interruption/http_transport.js +137 -0
  116. package/dist/inference/interruption/http_transport.js.map +1 -0
  117. package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
  118. package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
  119. package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
  120. package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
  121. package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
  122. package/dist/inference/interruption/interruption_cache_entry.js +34 -0
  123. package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
  124. package/dist/inference/interruption/interruption_detector.cjs +198 -0
  125. package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
  126. package/dist/inference/interruption/interruption_detector.d.cts +59 -0
  127. package/dist/inference/interruption/interruption_detector.d.ts +59 -0
  128. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
  129. package/dist/inference/interruption/interruption_detector.js +164 -0
  130. package/dist/inference/interruption/interruption_detector.js.map +1 -0
  131. package/dist/inference/interruption/interruption_stream.cjs +368 -0
  132. package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
  133. package/dist/inference/interruption/interruption_stream.d.cts +46 -0
  134. package/dist/inference/interruption/interruption_stream.d.ts +46 -0
  135. package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
  136. package/dist/inference/interruption/interruption_stream.js +344 -0
  137. package/dist/inference/interruption/interruption_stream.js.map +1 -0
  138. package/dist/inference/interruption/types.cjs +17 -0
  139. package/dist/inference/interruption/types.cjs.map +1 -0
  140. package/dist/inference/interruption/types.d.cts +66 -0
  141. package/dist/inference/interruption/types.d.ts +66 -0
  142. package/dist/inference/interruption/types.d.ts.map +1 -0
  143. package/dist/inference/interruption/types.js +1 -0
  144. package/dist/inference/interruption/types.js.map +1 -0
  145. package/dist/inference/interruption/utils.cjs +130 -0
  146. package/dist/inference/interruption/utils.cjs.map +1 -0
  147. package/dist/inference/interruption/utils.d.cts +41 -0
  148. package/dist/inference/interruption/utils.d.ts +41 -0
  149. package/dist/inference/interruption/utils.d.ts.map +1 -0
  150. package/dist/inference/interruption/utils.js +105 -0
  151. package/dist/inference/interruption/utils.js.map +1 -0
  152. package/dist/inference/interruption/utils.test.cjs +105 -0
  153. package/dist/inference/interruption/utils.test.cjs.map +1 -0
  154. package/dist/inference/interruption/utils.test.js +104 -0
  155. package/dist/inference/interruption/utils.test.js.map +1 -0
  156. package/dist/inference/interruption/ws_transport.cjs +347 -0
  157. package/dist/inference/interruption/ws_transport.cjs.map +1 -0
  158. package/dist/inference/interruption/ws_transport.d.cts +33 -0
  159. package/dist/inference/interruption/ws_transport.d.ts +33 -0
  160. package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
  161. package/dist/inference/interruption/ws_transport.js +313 -0
  162. package/dist/inference/interruption/ws_transport.js.map +1 -0
  163. package/dist/inference/llm.cjs +106 -66
  164. package/dist/inference/llm.cjs.map +1 -1
  165. package/dist/inference/llm.d.cts +65 -43
  166. package/dist/inference/llm.d.ts +65 -43
  167. package/dist/inference/llm.d.ts.map +1 -1
  168. package/dist/inference/llm.js +100 -66
  169. package/dist/inference/llm.js.map +1 -1
  170. package/dist/inference/stt.cjs +319 -170
  171. package/dist/inference/stt.cjs.map +1 -1
  172. package/dist/inference/stt.d.cts +64 -15
  173. package/dist/inference/stt.d.ts +64 -15
  174. package/dist/inference/stt.d.ts.map +1 -1
  175. package/dist/inference/stt.js +319 -170
  176. package/dist/inference/stt.js.map +1 -1
  177. package/dist/inference/stt.test.cjs +218 -0
  178. package/dist/inference/stt.test.cjs.map +1 -0
  179. package/dist/inference/stt.test.js +217 -0
  180. package/dist/inference/stt.test.js.map +1 -0
  181. package/dist/inference/tts.cjs +249 -71
  182. package/dist/inference/tts.cjs.map +1 -1
  183. package/dist/inference/tts.d.cts +94 -17
  184. package/dist/inference/tts.d.ts +94 -17
  185. package/dist/inference/tts.d.ts.map +1 -1
  186. package/dist/inference/tts.js +249 -77
  187. package/dist/inference/tts.js.map +1 -1
  188. package/dist/inference/tts.test.cjs +305 -0
  189. package/dist/inference/tts.test.cjs.map +1 -0
  190. package/dist/inference/tts.test.js +304 -0
  191. package/dist/inference/tts.test.js.map +1 -0
  192. package/dist/inference/utils.cjs +26 -7
  193. package/dist/inference/utils.cjs.map +1 -1
  194. package/dist/inference/utils.d.cts +14 -1
  195. package/dist/inference/utils.d.ts +14 -1
  196. package/dist/inference/utils.d.ts.map +1 -1
  197. package/dist/inference/utils.js +18 -2
  198. package/dist/inference/utils.js.map +1 -1
  199. package/dist/ipc/inference_proc_executor.cjs +6 -3
  200. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  201. package/dist/ipc/inference_proc_executor.d.ts.map +1 -1
  202. package/dist/ipc/inference_proc_executor.js +6 -3
  203. package/dist/ipc/inference_proc_executor.js.map +1 -1
  204. package/dist/ipc/inference_proc_lazy_main.cjs +13 -1
  205. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -1
  206. package/dist/ipc/inference_proc_lazy_main.js +13 -1
  207. package/dist/ipc/inference_proc_lazy_main.js.map +1 -1
  208. package/dist/ipc/job_proc_executor.cjs +6 -1
  209. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  210. package/dist/ipc/job_proc_executor.d.ts.map +1 -1
  211. package/dist/ipc/job_proc_executor.js +6 -1
  212. package/dist/ipc/job_proc_executor.js.map +1 -1
  213. package/dist/ipc/job_proc_lazy_main.cjs +89 -17
  214. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  215. package/dist/ipc/job_proc_lazy_main.js +68 -18
  216. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  217. package/dist/ipc/supervised_proc.cjs +34 -8
  218. package/dist/ipc/supervised_proc.cjs.map +1 -1
  219. package/dist/ipc/supervised_proc.d.cts +8 -0
  220. package/dist/ipc/supervised_proc.d.ts +8 -0
  221. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  222. package/dist/ipc/supervised_proc.js +34 -8
  223. package/dist/ipc/supervised_proc.js.map +1 -1
  224. package/dist/ipc/supervised_proc.test.cjs +145 -0
  225. package/dist/ipc/supervised_proc.test.cjs.map +1 -0
  226. package/dist/ipc/supervised_proc.test.js +122 -0
  227. package/dist/ipc/supervised_proc.test.js.map +1 -0
  228. package/dist/job.cjs +109 -1
  229. package/dist/job.cjs.map +1 -1
  230. package/dist/job.d.cts +14 -0
  231. package/dist/job.d.ts +14 -0
  232. package/dist/job.d.ts.map +1 -1
  233. package/dist/job.js +99 -1
  234. package/dist/job.js.map +1 -1
  235. package/dist/language.cjs +394 -0
  236. package/dist/language.cjs.map +1 -0
  237. package/dist/language.d.cts +15 -0
  238. package/dist/language.d.ts +15 -0
  239. package/dist/language.d.ts.map +1 -0
  240. package/dist/language.js +363 -0
  241. package/dist/language.js.map +1 -0
  242. package/dist/language.test.cjs +43 -0
  243. package/dist/language.test.cjs.map +1 -0
  244. package/dist/language.test.js +49 -0
  245. package/dist/language.test.js.map +1 -0
  246. package/dist/llm/chat_context.cjs +345 -3
  247. package/dist/llm/chat_context.cjs.map +1 -1
  248. package/dist/llm/chat_context.d.cts +86 -2
  249. package/dist/llm/chat_context.d.ts +86 -2
  250. package/dist/llm/chat_context.d.ts.map +1 -1
  251. package/dist/llm/chat_context.js +344 -3
  252. package/dist/llm/chat_context.js.map +1 -1
  253. package/dist/llm/chat_context.test.cjs +692 -0
  254. package/dist/llm/chat_context.test.cjs.map +1 -1
  255. package/dist/llm/chat_context.test.js +692 -0
  256. package/dist/llm/chat_context.test.js.map +1 -1
  257. package/dist/llm/fallback_adapter.cjs +280 -0
  258. package/dist/llm/fallback_adapter.cjs.map +1 -0
  259. package/dist/llm/fallback_adapter.d.cts +73 -0
  260. package/dist/llm/fallback_adapter.d.ts +73 -0
  261. package/dist/llm/fallback_adapter.d.ts.map +1 -0
  262. package/dist/llm/fallback_adapter.js +256 -0
  263. package/dist/llm/fallback_adapter.js.map +1 -0
  264. package/dist/llm/fallback_adapter.test.cjs +176 -0
  265. package/dist/llm/fallback_adapter.test.cjs.map +1 -0
  266. package/dist/llm/fallback_adapter.test.js +175 -0
  267. package/dist/llm/fallback_adapter.test.js.map +1 -0
  268. package/dist/llm/index.cjs +11 -0
  269. package/dist/llm/index.cjs.map +1 -1
  270. package/dist/llm/index.d.cts +4 -3
  271. package/dist/llm/index.d.ts +4 -3
  272. package/dist/llm/index.d.ts.map +1 -1
  273. package/dist/llm/index.js +13 -1
  274. package/dist/llm/index.js.map +1 -1
  275. package/dist/llm/llm.cjs +65 -11
  276. package/dist/llm/llm.cjs.map +1 -1
  277. package/dist/llm/llm.d.cts +13 -2
  278. package/dist/llm/llm.d.ts +13 -2
  279. package/dist/llm/llm.d.ts.map +1 -1
  280. package/dist/llm/llm.js +65 -11
  281. package/dist/llm/llm.js.map +1 -1
  282. package/dist/llm/provider_format/google.cjs +6 -2
  283. package/dist/llm/provider_format/google.cjs.map +1 -1
  284. package/dist/llm/provider_format/google.d.cts +1 -1
  285. package/dist/llm/provider_format/google.d.ts +1 -1
  286. package/dist/llm/provider_format/google.d.ts.map +1 -1
  287. package/dist/llm/provider_format/google.js +6 -2
  288. package/dist/llm/provider_format/google.js.map +1 -1
  289. package/dist/llm/provider_format/google.test.cjs +48 -0
  290. package/dist/llm/provider_format/google.test.cjs.map +1 -1
  291. package/dist/llm/provider_format/google.test.js +54 -1
  292. package/dist/llm/provider_format/google.test.js.map +1 -1
  293. package/dist/llm/provider_format/index.cjs +2 -0
  294. package/dist/llm/provider_format/index.cjs.map +1 -1
  295. package/dist/llm/provider_format/index.d.cts +2 -2
  296. package/dist/llm/provider_format/index.d.ts +2 -2
  297. package/dist/llm/provider_format/index.d.ts.map +1 -1
  298. package/dist/llm/provider_format/index.js +6 -1
  299. package/dist/llm/provider_format/index.js.map +1 -1
  300. package/dist/llm/provider_format/openai.cjs +126 -24
  301. package/dist/llm/provider_format/openai.cjs.map +1 -1
  302. package/dist/llm/provider_format/openai.d.cts +1 -0
  303. package/dist/llm/provider_format/openai.d.ts +1 -0
  304. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  305. package/dist/llm/provider_format/openai.js +124 -23
  306. package/dist/llm/provider_format/openai.js.map +1 -1
  307. package/dist/llm/provider_format/openai.test.cjs +393 -0
  308. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  309. package/dist/llm/provider_format/openai.test.js +400 -2
  310. package/dist/llm/provider_format/openai.test.js.map +1 -1
  311. package/dist/llm/provider_format/utils.cjs +5 -4
  312. package/dist/llm/provider_format/utils.cjs.map +1 -1
  313. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  314. package/dist/llm/provider_format/utils.js +5 -4
  315. package/dist/llm/provider_format/utils.js.map +1 -1
  316. package/dist/llm/realtime.cjs +3 -0
  317. package/dist/llm/realtime.cjs.map +1 -1
  318. package/dist/llm/realtime.d.cts +15 -1
  319. package/dist/llm/realtime.d.ts +15 -1
  320. package/dist/llm/realtime.d.ts.map +1 -1
  321. package/dist/llm/realtime.js +3 -0
  322. package/dist/llm/realtime.js.map +1 -1
  323. package/dist/llm/remote_chat_context.cjs.map +1 -1
  324. package/dist/llm/remote_chat_context.d.cts +2 -0
  325. package/dist/llm/remote_chat_context.d.ts +2 -0
  326. package/dist/llm/remote_chat_context.d.ts.map +1 -1
  327. package/dist/llm/remote_chat_context.js.map +1 -1
  328. package/dist/llm/tool_context.cjs +50 -2
  329. package/dist/llm/tool_context.cjs.map +1 -1
  330. package/dist/llm/tool_context.d.cts +47 -11
  331. package/dist/llm/tool_context.d.ts +47 -11
  332. package/dist/llm/tool_context.d.ts.map +1 -1
  333. package/dist/llm/tool_context.js +48 -3
  334. package/dist/llm/tool_context.js.map +1 -1
  335. package/dist/llm/tool_context.test.cjs +197 -0
  336. package/dist/llm/tool_context.test.cjs.map +1 -1
  337. package/dist/llm/tool_context.test.js +175 -0
  338. package/dist/llm/tool_context.test.js.map +1 -1
  339. package/dist/llm/utils.cjs +107 -12
  340. package/dist/llm/utils.cjs.map +1 -1
  341. package/dist/llm/utils.d.cts +10 -3
  342. package/dist/llm/utils.d.ts +10 -3
  343. package/dist/llm/utils.d.ts.map +1 -1
  344. package/dist/llm/utils.js +106 -12
  345. package/dist/llm/utils.js.map +1 -1
  346. package/dist/llm/utils.test.cjs +90 -0
  347. package/dist/llm/utils.test.cjs.map +1 -1
  348. package/dist/llm/utils.test.js +98 -2
  349. package/dist/llm/utils.test.js.map +1 -1
  350. package/dist/llm/zod-utils.cjs +102 -0
  351. package/dist/llm/zod-utils.cjs.map +1 -0
  352. package/dist/llm/zod-utils.d.cts +65 -0
  353. package/dist/llm/zod-utils.d.ts +65 -0
  354. package/dist/llm/zod-utils.d.ts.map +1 -0
  355. package/dist/llm/zod-utils.js +64 -0
  356. package/dist/llm/zod-utils.js.map +1 -0
  357. package/dist/llm/zod-utils.test.cjs +472 -0
  358. package/dist/llm/zod-utils.test.cjs.map +1 -0
  359. package/dist/llm/zod-utils.test.js +455 -0
  360. package/dist/llm/zod-utils.test.js.map +1 -0
  361. package/dist/log.cjs +45 -14
  362. package/dist/log.cjs.map +1 -1
  363. package/dist/log.d.cts +8 -1
  364. package/dist/log.d.ts +8 -1
  365. package/dist/log.d.ts.map +1 -1
  366. package/dist/log.js +45 -15
  367. package/dist/log.js.map +1 -1
  368. package/dist/metrics/base.cjs.map +1 -1
  369. package/dist/metrics/base.d.cts +75 -19
  370. package/dist/metrics/base.d.ts +75 -19
  371. package/dist/metrics/base.d.ts.map +1 -1
  372. package/dist/metrics/index.cjs +5 -0
  373. package/dist/metrics/index.cjs.map +1 -1
  374. package/dist/metrics/index.d.cts +2 -1
  375. package/dist/metrics/index.d.ts +2 -1
  376. package/dist/metrics/index.d.ts.map +1 -1
  377. package/dist/metrics/index.js +6 -0
  378. package/dist/metrics/index.js.map +1 -1
  379. package/dist/metrics/model_usage.cjs +189 -0
  380. package/dist/metrics/model_usage.cjs.map +1 -0
  381. package/dist/metrics/model_usage.d.cts +92 -0
  382. package/dist/metrics/model_usage.d.ts +92 -0
  383. package/dist/metrics/model_usage.d.ts.map +1 -0
  384. package/dist/metrics/model_usage.js +164 -0
  385. package/dist/metrics/model_usage.js.map +1 -0
  386. package/dist/metrics/model_usage.test.cjs +474 -0
  387. package/dist/metrics/model_usage.test.cjs.map +1 -0
  388. package/dist/metrics/model_usage.test.js +476 -0
  389. package/dist/metrics/model_usage.test.js.map +1 -0
  390. package/dist/metrics/usage_collector.cjs +5 -2
  391. package/dist/metrics/usage_collector.cjs.map +1 -1
  392. package/dist/metrics/usage_collector.d.cts +10 -1
  393. package/dist/metrics/usage_collector.d.ts +10 -1
  394. package/dist/metrics/usage_collector.d.ts.map +1 -1
  395. package/dist/metrics/usage_collector.js +5 -2
  396. package/dist/metrics/usage_collector.js.map +1 -1
  397. package/dist/metrics/utils.cjs +23 -7
  398. package/dist/metrics/utils.cjs.map +1 -1
  399. package/dist/metrics/utils.d.ts.map +1 -1
  400. package/dist/metrics/utils.js +23 -7
  401. package/dist/metrics/utils.js.map +1 -1
  402. package/dist/stream/deferred_stream.cjs +31 -10
  403. package/dist/stream/deferred_stream.cjs.map +1 -1
  404. package/dist/stream/deferred_stream.d.cts +6 -1
  405. package/dist/stream/deferred_stream.d.ts +6 -1
  406. package/dist/stream/deferred_stream.d.ts.map +1 -1
  407. package/dist/stream/deferred_stream.js +31 -10
  408. package/dist/stream/deferred_stream.js.map +1 -1
  409. package/dist/stream/deferred_stream.test.cjs +2 -2
  410. package/dist/stream/deferred_stream.test.cjs.map +1 -1
  411. package/dist/stream/deferred_stream.test.js +2 -2
  412. package/dist/stream/deferred_stream.test.js.map +1 -1
  413. package/dist/stream/index.cjs +3 -0
  414. package/dist/stream/index.cjs.map +1 -1
  415. package/dist/stream/index.d.cts +1 -0
  416. package/dist/stream/index.d.ts +1 -0
  417. package/dist/stream/index.d.ts.map +1 -1
  418. package/dist/stream/index.js +2 -0
  419. package/dist/stream/index.js.map +1 -1
  420. package/dist/stream/multi_input_stream.cjs +139 -0
  421. package/dist/stream/multi_input_stream.cjs.map +1 -0
  422. package/dist/stream/multi_input_stream.d.cts +55 -0
  423. package/dist/stream/multi_input_stream.d.ts +55 -0
  424. package/dist/stream/multi_input_stream.d.ts.map +1 -0
  425. package/dist/stream/multi_input_stream.js +115 -0
  426. package/dist/stream/multi_input_stream.js.map +1 -0
  427. package/dist/stream/multi_input_stream.test.cjs +344 -0
  428. package/dist/stream/multi_input_stream.test.cjs.map +1 -0
  429. package/dist/stream/multi_input_stream.test.js +343 -0
  430. package/dist/stream/multi_input_stream.test.js.map +1 -0
  431. package/dist/stream/stream_channel.cjs +39 -1
  432. package/dist/stream/stream_channel.cjs.map +1 -1
  433. package/dist/stream/stream_channel.d.cts +5 -2
  434. package/dist/stream/stream_channel.d.ts +5 -2
  435. package/dist/stream/stream_channel.d.ts.map +1 -1
  436. package/dist/stream/stream_channel.js +39 -1
  437. package/dist/stream/stream_channel.js.map +1 -1
  438. package/dist/stream/stream_channel.test.cjs +27 -0
  439. package/dist/stream/stream_channel.test.cjs.map +1 -1
  440. package/dist/stream/stream_channel.test.js +27 -0
  441. package/dist/stream/stream_channel.test.js.map +1 -1
  442. package/dist/stt/stream_adapter.cjs +24 -9
  443. package/dist/stt/stream_adapter.cjs.map +1 -1
  444. package/dist/stt/stream_adapter.d.cts +7 -3
  445. package/dist/stt/stream_adapter.d.ts +7 -3
  446. package/dist/stt/stream_adapter.d.ts.map +1 -1
  447. package/dist/stt/stream_adapter.js +24 -9
  448. package/dist/stt/stream_adapter.js.map +1 -1
  449. package/dist/stt/stt.cjs +94 -19
  450. package/dist/stt/stt.cjs.map +1 -1
  451. package/dist/stt/stt.d.cts +68 -5
  452. package/dist/stt/stt.d.ts +68 -5
  453. package/dist/stt/stt.d.ts.map +1 -1
  454. package/dist/stt/stt.js +96 -21
  455. package/dist/stt/stt.js.map +1 -1
  456. package/dist/telemetry/index.cjs +72 -0
  457. package/dist/telemetry/index.cjs.map +1 -0
  458. package/dist/telemetry/index.d.cts +7 -0
  459. package/dist/telemetry/index.d.ts +7 -0
  460. package/dist/telemetry/index.d.ts.map +1 -0
  461. package/dist/telemetry/index.js +37 -0
  462. package/dist/telemetry/index.js.map +1 -0
  463. package/dist/telemetry/logging.cjs +65 -0
  464. package/dist/telemetry/logging.cjs.map +1 -0
  465. package/dist/telemetry/logging.d.cts +21 -0
  466. package/dist/telemetry/logging.d.ts +21 -0
  467. package/dist/telemetry/logging.d.ts.map +1 -0
  468. package/dist/telemetry/logging.js +40 -0
  469. package/dist/telemetry/logging.js.map +1 -0
  470. package/dist/telemetry/otel_http_exporter.cjs +166 -0
  471. package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
  472. package/dist/telemetry/otel_http_exporter.d.cts +63 -0
  473. package/dist/telemetry/otel_http_exporter.d.ts +63 -0
  474. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
  475. package/dist/telemetry/otel_http_exporter.js +142 -0
  476. package/dist/telemetry/otel_http_exporter.js.map +1 -0
  477. package/dist/telemetry/pino_otel_transport.cjs +217 -0
  478. package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
  479. package/dist/telemetry/pino_otel_transport.d.cts +58 -0
  480. package/dist/telemetry/pino_otel_transport.d.ts +58 -0
  481. package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
  482. package/dist/telemetry/pino_otel_transport.js +189 -0
  483. package/dist/telemetry/pino_otel_transport.js.map +1 -0
  484. package/dist/telemetry/trace_types.cjs +233 -0
  485. package/dist/telemetry/trace_types.cjs.map +1 -0
  486. package/dist/telemetry/trace_types.d.cts +74 -0
  487. package/dist/telemetry/trace_types.d.ts +74 -0
  488. package/dist/telemetry/trace_types.d.ts.map +1 -0
  489. package/dist/telemetry/trace_types.js +141 -0
  490. package/dist/telemetry/trace_types.js.map +1 -0
  491. package/dist/telemetry/traces.cjs +484 -0
  492. package/dist/telemetry/traces.cjs.map +1 -0
  493. package/dist/telemetry/traces.d.cts +116 -0
  494. package/dist/telemetry/traces.d.ts +116 -0
  495. package/dist/telemetry/traces.d.ts.map +1 -0
  496. package/dist/telemetry/traces.js +449 -0
  497. package/dist/telemetry/traces.js.map +1 -0
  498. package/dist/telemetry/utils.cjs +86 -0
  499. package/dist/telemetry/utils.cjs.map +1 -0
  500. package/dist/telemetry/utils.d.cts +5 -0
  501. package/dist/telemetry/utils.d.ts +5 -0
  502. package/dist/telemetry/utils.d.ts.map +1 -0
  503. package/dist/telemetry/utils.js +51 -0
  504. package/dist/telemetry/utils.js.map +1 -0
  505. package/dist/tokenize/basic/sentence.cjs +3 -3
  506. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  507. package/dist/tokenize/basic/sentence.js +3 -3
  508. package/dist/tokenize/basic/sentence.js.map +1 -1
  509. package/dist/tokenize/tokenizer.test.cjs +3 -1
  510. package/dist/tokenize/tokenizer.test.cjs.map +1 -1
  511. package/dist/tokenize/tokenizer.test.js +3 -1
  512. package/dist/tokenize/tokenizer.test.js.map +1 -1
  513. package/dist/transcription.cjs.map +1 -1
  514. package/dist/transcription.d.cts +6 -0
  515. package/dist/transcription.d.ts +6 -0
  516. package/dist/transcription.d.ts.map +1 -1
  517. package/dist/transcription.js.map +1 -1
  518. package/dist/tts/fallback_adapter.cjs +472 -0
  519. package/dist/tts/fallback_adapter.cjs.map +1 -0
  520. package/dist/tts/fallback_adapter.d.cts +110 -0
  521. package/dist/tts/fallback_adapter.d.ts +110 -0
  522. package/dist/tts/fallback_adapter.d.ts.map +1 -0
  523. package/dist/tts/fallback_adapter.js +448 -0
  524. package/dist/tts/fallback_adapter.js.map +1 -0
  525. package/dist/tts/index.cjs +3 -0
  526. package/dist/tts/index.cjs.map +1 -1
  527. package/dist/tts/index.d.cts +1 -0
  528. package/dist/tts/index.d.ts +1 -0
  529. package/dist/tts/index.d.ts.map +1 -1
  530. package/dist/tts/index.js +2 -0
  531. package/dist/tts/index.js.map +1 -1
  532. package/dist/tts/stream_adapter.cjs +25 -8
  533. package/dist/tts/stream_adapter.cjs.map +1 -1
  534. package/dist/tts/stream_adapter.d.cts +6 -3
  535. package/dist/tts/stream_adapter.d.ts +6 -3
  536. package/dist/tts/stream_adapter.d.ts.map +1 -1
  537. package/dist/tts/stream_adapter.js +25 -8
  538. package/dist/tts/stream_adapter.js.map +1 -1
  539. package/dist/tts/tts.cjs +189 -57
  540. package/dist/tts/tts.cjs.map +1 -1
  541. package/dist/tts/tts.d.cts +58 -6
  542. package/dist/tts/tts.d.ts +58 -6
  543. package/dist/tts/tts.d.ts.map +1 -1
  544. package/dist/tts/tts.js +191 -59
  545. package/dist/tts/tts.js.map +1 -1
  546. package/dist/typed_promise.cjs +48 -0
  547. package/dist/typed_promise.cjs.map +1 -0
  548. package/dist/typed_promise.d.cts +24 -0
  549. package/dist/typed_promise.d.ts +24 -0
  550. package/dist/typed_promise.d.ts.map +1 -0
  551. package/dist/typed_promise.js +28 -0
  552. package/dist/typed_promise.js.map +1 -0
  553. package/dist/types.cjs +24 -32
  554. package/dist/types.cjs.map +1 -1
  555. package/dist/types.d.cts +45 -10
  556. package/dist/types.d.ts +45 -10
  557. package/dist/types.d.ts.map +1 -1
  558. package/dist/types.js +20 -30
  559. package/dist/types.js.map +1 -1
  560. package/dist/utils.cjs +124 -28
  561. package/dist/utils.cjs.map +1 -1
  562. package/dist/utils.d.cts +41 -1
  563. package/dist/utils.d.ts +41 -1
  564. package/dist/utils.d.ts.map +1 -1
  565. package/dist/utils.js +119 -27
  566. package/dist/utils.js.map +1 -1
  567. package/dist/utils.test.cjs +73 -1
  568. package/dist/utils.test.cjs.map +1 -1
  569. package/dist/utils.test.js +74 -10
  570. package/dist/utils.test.js.map +1 -1
  571. package/dist/vad.cjs +35 -15
  572. package/dist/vad.cjs.map +1 -1
  573. package/dist/vad.d.cts +15 -5
  574. package/dist/vad.d.ts +15 -5
  575. package/dist/vad.d.ts.map +1 -1
  576. package/dist/vad.js +35 -15
  577. package/dist/vad.js.map +1 -1
  578. package/dist/version.cjs +1 -1
  579. package/dist/version.cjs.map +1 -1
  580. package/dist/version.d.cts +1 -1
  581. package/dist/version.d.ts +1 -1
  582. package/dist/version.d.ts.map +1 -1
  583. package/dist/version.js +1 -1
  584. package/dist/version.js.map +1 -1
  585. package/dist/voice/agent.cjs +258 -35
  586. package/dist/voice/agent.cjs.map +1 -1
  587. package/dist/voice/agent.d.cts +54 -13
  588. package/dist/voice/agent.d.ts +54 -13
  589. package/dist/voice/agent.d.ts.map +1 -1
  590. package/dist/voice/agent.js +254 -34
  591. package/dist/voice/agent.js.map +1 -1
  592. package/dist/voice/agent.test.cjs +314 -0
  593. package/dist/voice/agent.test.cjs.map +1 -1
  594. package/dist/voice/agent.test.js +316 -2
  595. package/dist/voice/agent.test.js.map +1 -1
  596. package/dist/voice/agent_activity.cjs +1116 -385
  597. package/dist/voice/agent_activity.cjs.map +1 -1
  598. package/dist/voice/agent_activity.d.cts +72 -11
  599. package/dist/voice/agent_activity.d.ts +72 -11
  600. package/dist/voice/agent_activity.d.ts.map +1 -1
  601. package/dist/voice/agent_activity.js +1119 -383
  602. package/dist/voice/agent_activity.js.map +1 -1
  603. package/dist/voice/agent_activity.test.cjs +135 -0
  604. package/dist/voice/agent_activity.test.cjs.map +1 -0
  605. package/dist/voice/agent_activity.test.js +134 -0
  606. package/dist/voice/agent_activity.test.js.map +1 -0
  607. package/dist/voice/agent_session.cjs +550 -90
  608. package/dist/voice/agent_session.cjs.map +1 -1
  609. package/dist/voice/agent_session.d.cts +185 -25
  610. package/dist/voice/agent_session.d.ts +185 -25
  611. package/dist/voice/agent_session.d.ts.map +1 -1
  612. package/dist/voice/agent_session.js +556 -91
  613. package/dist/voice/agent_session.js.map +1 -1
  614. package/dist/voice/audio_recognition.cjs +605 -46
  615. package/dist/voice/audio_recognition.cjs.map +1 -1
  616. package/dist/voice/audio_recognition.d.cts +96 -4
  617. package/dist/voice/audio_recognition.d.ts +96 -4
  618. package/dist/voice/audio_recognition.d.ts.map +1 -1
  619. package/dist/voice/audio_recognition.js +611 -47
  620. package/dist/voice/audio_recognition.js.map +1 -1
  621. package/dist/voice/audio_recognition_span.test.cjs +295 -0
  622. package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
  623. package/dist/voice/audio_recognition_span.test.js +299 -0
  624. package/dist/voice/audio_recognition_span.test.js.map +1 -0
  625. package/dist/voice/avatar/datastream_io.cjs +7 -1
  626. package/dist/voice/avatar/datastream_io.cjs.map +1 -1
  627. package/dist/voice/avatar/datastream_io.d.cts +1 -0
  628. package/dist/voice/avatar/datastream_io.d.ts +1 -0
  629. package/dist/voice/avatar/datastream_io.d.ts.map +1 -1
  630. package/dist/voice/avatar/datastream_io.js +7 -1
  631. package/dist/voice/avatar/datastream_io.js.map +1 -1
  632. package/dist/voice/background_audio.cjs +367 -0
  633. package/dist/voice/background_audio.cjs.map +1 -0
  634. package/dist/voice/background_audio.d.cts +123 -0
  635. package/dist/voice/background_audio.d.ts +123 -0
  636. package/dist/voice/background_audio.d.ts.map +1 -0
  637. package/dist/voice/background_audio.js +343 -0
  638. package/dist/voice/background_audio.js.map +1 -0
  639. package/dist/voice/events.cjs +3 -0
  640. package/dist/voice/events.cjs.map +1 -1
  641. package/dist/voice/events.d.cts +16 -9
  642. package/dist/voice/events.d.ts +16 -9
  643. package/dist/voice/events.d.ts.map +1 -1
  644. package/dist/voice/events.js +3 -0
  645. package/dist/voice/events.js.map +1 -1
  646. package/dist/voice/generation.cjs +205 -41
  647. package/dist/voice/generation.cjs.map +1 -1
  648. package/dist/voice/generation.d.cts +21 -5
  649. package/dist/voice/generation.d.ts +21 -5
  650. package/dist/voice/generation.d.ts.map +1 -1
  651. package/dist/voice/generation.js +215 -43
  652. package/dist/voice/generation.js.map +1 -1
  653. package/dist/voice/generation_tools.test.cjs +236 -0
  654. package/dist/voice/generation_tools.test.cjs.map +1 -0
  655. package/dist/voice/generation_tools.test.js +235 -0
  656. package/dist/voice/generation_tools.test.js.map +1 -0
  657. package/dist/voice/index.cjs +33 -2
  658. package/dist/voice/index.cjs.map +1 -1
  659. package/dist/voice/index.d.cts +8 -2
  660. package/dist/voice/index.d.ts +8 -2
  661. package/dist/voice/index.d.ts.map +1 -1
  662. package/dist/voice/index.js +19 -2
  663. package/dist/voice/index.js.map +1 -1
  664. package/dist/voice/interruption_detection.test.cjs +114 -0
  665. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  666. package/dist/voice/interruption_detection.test.js +113 -0
  667. package/dist/voice/interruption_detection.test.js.map +1 -0
  668. package/dist/voice/io.cjs +66 -6
  669. package/dist/voice/io.cjs.map +1 -1
  670. package/dist/voice/io.d.cts +67 -7
  671. package/dist/voice/io.d.ts +67 -7
  672. package/dist/voice/io.d.ts.map +1 -1
  673. package/dist/voice/io.js +62 -5
  674. package/dist/voice/io.js.map +1 -1
  675. package/dist/voice/recorder_io/index.cjs +23 -0
  676. package/dist/voice/recorder_io/index.cjs.map +1 -0
  677. package/dist/voice/recorder_io/index.d.cts +2 -0
  678. package/dist/voice/recorder_io/index.d.ts +2 -0
  679. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  680. package/dist/voice/recorder_io/index.js +2 -0
  681. package/dist/voice/recorder_io/index.js.map +1 -0
  682. package/dist/voice/recorder_io/recorder_io.cjs +607 -0
  683. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  684. package/dist/voice/recorder_io/recorder_io.d.cts +106 -0
  685. package/dist/voice/recorder_io/recorder_io.d.ts +106 -0
  686. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  687. package/dist/voice/recorder_io/recorder_io.js +573 -0
  688. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  689. package/dist/voice/remote_session.cjs +922 -0
  690. package/dist/voice/remote_session.cjs.map +1 -0
  691. package/dist/voice/remote_session.d.cts +108 -0
  692. package/dist/voice/remote_session.d.ts +108 -0
  693. package/dist/voice/remote_session.d.ts.map +1 -0
  694. package/dist/voice/remote_session.js +887 -0
  695. package/dist/voice/remote_session.js.map +1 -0
  696. package/dist/voice/report.cjs +88 -0
  697. package/dist/voice/report.cjs.map +1 -0
  698. package/dist/voice/report.d.cts +49 -0
  699. package/dist/voice/report.d.ts +49 -0
  700. package/dist/voice/report.d.ts.map +1 -0
  701. package/dist/voice/report.js +63 -0
  702. package/dist/voice/report.js.map +1 -0
  703. package/dist/voice/report.test.cjs +121 -0
  704. package/dist/voice/report.test.cjs.map +1 -0
  705. package/dist/voice/report.test.js +120 -0
  706. package/dist/voice/report.test.js.map +1 -0
  707. package/dist/voice/room_io/_input.cjs +40 -7
  708. package/dist/voice/room_io/_input.cjs.map +1 -1
  709. package/dist/voice/room_io/_input.d.cts +5 -2
  710. package/dist/voice/room_io/_input.d.ts +5 -2
  711. package/dist/voice/room_io/_input.d.ts.map +1 -1
  712. package/dist/voice/room_io/_input.js +41 -8
  713. package/dist/voice/room_io/_input.js.map +1 -1
  714. package/dist/voice/room_io/_output.cjs +19 -11
  715. package/dist/voice/room_io/_output.cjs.map +1 -1
  716. package/dist/voice/room_io/_output.d.cts +7 -4
  717. package/dist/voice/room_io/_output.d.ts +7 -4
  718. package/dist/voice/room_io/_output.d.ts.map +1 -1
  719. package/dist/voice/room_io/_output.js +20 -12
  720. package/dist/voice/room_io/_output.js.map +1 -1
  721. package/dist/voice/room_io/room_io.cjs +33 -6
  722. package/dist/voice/room_io/room_io.cjs.map +1 -1
  723. package/dist/voice/room_io/room_io.d.cts +29 -9
  724. package/dist/voice/room_io/room_io.d.ts +29 -9
  725. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  726. package/dist/voice/room_io/room_io.js +33 -7
  727. package/dist/voice/room_io/room_io.js.map +1 -1
  728. package/dist/voice/speech_handle.cjs +22 -4
  729. package/dist/voice/speech_handle.cjs.map +1 -1
  730. package/dist/voice/speech_handle.d.cts +17 -2
  731. package/dist/voice/speech_handle.d.ts +17 -2
  732. package/dist/voice/speech_handle.d.ts.map +1 -1
  733. package/dist/voice/speech_handle.js +21 -4
  734. package/dist/voice/speech_handle.js.map +1 -1
  735. package/dist/voice/testing/fake_llm.cjs +127 -0
  736. package/dist/voice/testing/fake_llm.cjs.map +1 -0
  737. package/dist/voice/testing/fake_llm.d.cts +30 -0
  738. package/dist/voice/testing/fake_llm.d.ts +30 -0
  739. package/dist/voice/testing/fake_llm.d.ts.map +1 -0
  740. package/dist/voice/testing/fake_llm.js +103 -0
  741. package/dist/voice/testing/fake_llm.js.map +1 -0
  742. package/dist/voice/testing/index.cjs +57 -0
  743. package/dist/voice/testing/index.cjs.map +1 -0
  744. package/dist/voice/testing/index.d.cts +21 -0
  745. package/dist/voice/testing/index.d.ts +21 -0
  746. package/dist/voice/testing/index.d.ts.map +1 -0
  747. package/dist/voice/testing/index.js +35 -0
  748. package/dist/voice/testing/index.js.map +1 -0
  749. package/dist/voice/testing/run_result.cjs +817 -0
  750. package/dist/voice/testing/run_result.cjs.map +1 -0
  751. package/dist/voice/testing/run_result.d.cts +385 -0
  752. package/dist/voice/testing/run_result.d.ts +385 -0
  753. package/dist/voice/testing/run_result.d.ts.map +1 -0
  754. package/dist/voice/testing/run_result.js +790 -0
  755. package/dist/voice/testing/run_result.js.map +1 -0
  756. package/dist/voice/testing/types.cjs +46 -0
  757. package/dist/voice/testing/types.cjs.map +1 -0
  758. package/dist/voice/testing/types.d.cts +83 -0
  759. package/dist/voice/testing/types.d.ts +83 -0
  760. package/dist/voice/testing/types.d.ts.map +1 -0
  761. package/dist/voice/testing/types.js +19 -0
  762. package/dist/voice/testing/types.js.map +1 -0
  763. package/dist/voice/transcription/synchronizer.cjs +139 -15
  764. package/dist/voice/transcription/synchronizer.cjs.map +1 -1
  765. package/dist/voice/transcription/synchronizer.d.cts +35 -4
  766. package/dist/voice/transcription/synchronizer.d.ts +35 -4
  767. package/dist/voice/transcription/synchronizer.d.ts.map +1 -1
  768. package/dist/voice/transcription/synchronizer.js +143 -16
  769. package/dist/voice/transcription/synchronizer.js.map +1 -1
  770. package/dist/voice/transcription/synchronizer.test.cjs +151 -0
  771. package/dist/voice/transcription/synchronizer.test.cjs.map +1 -0
  772. package/dist/voice/transcription/synchronizer.test.js +150 -0
  773. package/dist/voice/transcription/synchronizer.test.js.map +1 -0
  774. package/dist/voice/turn_config/endpointing.cjs +33 -0
  775. package/dist/voice/turn_config/endpointing.cjs.map +1 -0
  776. package/dist/voice/turn_config/endpointing.d.cts +30 -0
  777. package/dist/voice/turn_config/endpointing.d.ts +30 -0
  778. package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
  779. package/dist/voice/turn_config/endpointing.js +9 -0
  780. package/dist/voice/turn_config/endpointing.js.map +1 -0
  781. package/dist/voice/turn_config/interruption.cjs +37 -0
  782. package/dist/voice/turn_config/interruption.cjs.map +1 -0
  783. package/dist/voice/turn_config/interruption.d.cts +53 -0
  784. package/dist/voice/turn_config/interruption.d.ts +53 -0
  785. package/dist/voice/turn_config/interruption.d.ts.map +1 -0
  786. package/dist/voice/turn_config/interruption.js +13 -0
  787. package/dist/voice/turn_config/interruption.js.map +1 -0
  788. package/dist/voice/turn_config/turn_handling.cjs +35 -0
  789. package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
  790. package/dist/voice/turn_config/turn_handling.d.cts +36 -0
  791. package/dist/voice/turn_config/turn_handling.d.ts +36 -0
  792. package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
  793. package/dist/voice/turn_config/turn_handling.js +11 -0
  794. package/dist/voice/turn_config/turn_handling.js.map +1 -0
  795. package/dist/voice/turn_config/utils.cjs +157 -0
  796. package/dist/voice/turn_config/utils.cjs.map +1 -0
  797. package/dist/voice/turn_config/utils.d.cts +37 -0
  798. package/dist/voice/turn_config/utils.d.ts +37 -0
  799. package/dist/voice/turn_config/utils.d.ts.map +1 -0
  800. package/dist/voice/turn_config/utils.js +131 -0
  801. package/dist/voice/turn_config/utils.js.map +1 -0
  802. package/dist/voice/turn_config/utils.test.cjs +128 -0
  803. package/dist/voice/turn_config/utils.test.cjs.map +1 -0
  804. package/dist/voice/turn_config/utils.test.js +127 -0
  805. package/dist/voice/turn_config/utils.test.js.map +1 -0
  806. package/dist/voice/utils.cjs +47 -0
  807. package/dist/voice/utils.cjs.map +1 -0
  808. package/dist/voice/utils.d.cts +4 -0
  809. package/dist/voice/utils.d.ts +4 -0
  810. package/dist/voice/utils.d.ts.map +1 -0
  811. package/dist/voice/utils.js +23 -0
  812. package/dist/voice/utils.js.map +1 -0
  813. package/dist/worker.cjs +44 -52
  814. package/dist/worker.cjs.map +1 -1
  815. package/dist/worker.d.cts +18 -8
  816. package/dist/worker.d.ts +18 -8
  817. package/dist/worker.d.ts.map +1 -1
  818. package/dist/worker.js +43 -43
  819. package/dist/worker.js.map +1 -1
  820. package/package.json +35 -13
  821. package/resources/NOTICE +2 -0
  822. package/resources/keyboard-typing.ogg +0 -0
  823. package/resources/keyboard-typing2.ogg +0 -0
  824. package/resources/office-ambience.ogg +0 -0
  825. package/src/_exceptions.ts +5 -0
  826. package/src/audio.ts +132 -1
  827. package/src/beta/index.ts +9 -0
  828. package/src/beta/workflows/index.ts +9 -0
  829. package/src/beta/workflows/task_group.ts +203 -0
  830. package/src/cli.ts +57 -66
  831. package/src/connection_pool.test.ts +346 -0
  832. package/src/connection_pool.ts +307 -0
  833. package/src/constants.ts +14 -0
  834. package/src/cpu.test.ts +239 -0
  835. package/src/cpu.ts +173 -0
  836. package/src/http_server.ts +18 -6
  837. package/src/index.ts +15 -13
  838. package/src/inference/api_protos.ts +85 -2
  839. package/src/inference/index.ts +32 -4
  840. package/src/inference/interruption/defaults.ts +51 -0
  841. package/src/inference/interruption/errors.ts +25 -0
  842. package/src/inference/interruption/http_transport.ts +207 -0
  843. package/src/inference/interruption/interruption_cache_entry.ts +50 -0
  844. package/src/inference/interruption/interruption_detector.ts +204 -0
  845. package/src/inference/interruption/interruption_stream.ts +467 -0
  846. package/src/inference/interruption/types.ts +84 -0
  847. package/src/inference/interruption/utils.test.ts +132 -0
  848. package/src/inference/interruption/utils.ts +137 -0
  849. package/src/inference/interruption/ws_transport.ts +416 -0
  850. package/src/inference/llm.ts +214 -163
  851. package/src/inference/stt.test.ts +253 -0
  852. package/src/inference/stt.ts +449 -208
  853. package/src/inference/tts.test.ts +354 -0
  854. package/src/inference/tts.ts +417 -115
  855. package/src/inference/utils.ts +30 -2
  856. package/src/ipc/inference_proc_executor.ts +11 -3
  857. package/src/ipc/inference_proc_lazy_main.ts +13 -1
  858. package/src/ipc/job_proc_executor.ts +11 -1
  859. package/src/ipc/job_proc_lazy_main.ts +86 -20
  860. package/src/ipc/supervised_proc.test.ts +153 -0
  861. package/src/ipc/supervised_proc.ts +39 -10
  862. package/src/job.ts +120 -1
  863. package/src/language.test.ts +62 -0
  864. package/src/language.ts +380 -0
  865. package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
  866. package/src/llm/chat_context.test.ts +787 -0
  867. package/src/llm/chat_context.ts +493 -2
  868. package/src/llm/fallback_adapter.test.ts +238 -0
  869. package/src/llm/fallback_adapter.ts +394 -0
  870. package/src/llm/index.ts +13 -0
  871. package/src/llm/llm.ts +77 -12
  872. package/src/llm/provider_format/google.test.ts +72 -1
  873. package/src/llm/provider_format/google.ts +10 -6
  874. package/src/llm/provider_format/index.ts +7 -2
  875. package/src/llm/provider_format/openai.test.ts +480 -2
  876. package/src/llm/provider_format/openai.ts +152 -21
  877. package/src/llm/provider_format/utils.ts +11 -5
  878. package/src/llm/realtime.ts +23 -2
  879. package/src/llm/remote_chat_context.ts +2 -2
  880. package/src/llm/tool_context.test.ts +210 -1
  881. package/src/llm/tool_context.ts +115 -17
  882. package/src/llm/utils.test.ts +103 -2
  883. package/src/llm/utils.ts +152 -16
  884. package/src/llm/zod-utils.test.ts +577 -0
  885. package/src/llm/zod-utils.ts +153 -0
  886. package/src/log.ts +71 -19
  887. package/src/metrics/base.ts +78 -19
  888. package/src/metrics/index.ts +12 -0
  889. package/src/metrics/model_usage.test.ts +545 -0
  890. package/src/metrics/model_usage.ts +262 -0
  891. package/src/metrics/usage_collector.ts +14 -3
  892. package/src/metrics/utils.ts +27 -7
  893. package/src/stream/deferred_stream.test.ts +3 -3
  894. package/src/stream/deferred_stream.ts +43 -11
  895. package/src/stream/index.ts +1 -0
  896. package/src/stream/multi_input_stream.test.ts +545 -0
  897. package/src/stream/multi_input_stream.ts +172 -0
  898. package/src/stream/stream_channel.test.ts +37 -0
  899. package/src/stream/stream_channel.ts +43 -3
  900. package/src/stt/stream_adapter.ts +30 -9
  901. package/src/stt/stt.ts +140 -23
  902. package/src/telemetry/index.ts +28 -0
  903. package/src/telemetry/logging.ts +55 -0
  904. package/src/telemetry/otel_http_exporter.ts +218 -0
  905. package/src/telemetry/pino_otel_transport.ts +265 -0
  906. package/src/telemetry/trace_types.ts +109 -0
  907. package/src/telemetry/traces.ts +673 -0
  908. package/src/telemetry/utils.ts +61 -0
  909. package/src/tokenize/basic/sentence.ts +3 -3
  910. package/src/tokenize/tokenizer.test.ts +4 -0
  911. package/src/transcription.ts +6 -0
  912. package/src/tts/fallback_adapter.ts +586 -0
  913. package/src/tts/index.ts +1 -0
  914. package/src/tts/stream_adapter.ts +38 -8
  915. package/src/tts/tts.ts +245 -62
  916. package/src/typed_promise.ts +67 -0
  917. package/src/types.ts +62 -33
  918. package/src/utils.test.ts +90 -10
  919. package/src/utils.ts +178 -33
  920. package/src/vad.ts +42 -18
  921. package/src/version.ts +1 -1
  922. package/src/voice/agent.test.ts +347 -2
  923. package/src/voice/agent.ts +346 -44
  924. package/src/voice/agent_activity.test.ts +194 -0
  925. package/src/voice/agent_activity.ts +1457 -388
  926. package/src/voice/agent_session.ts +817 -112
  927. package/src/voice/audio_recognition.ts +845 -70
  928. package/src/voice/audio_recognition_span.test.ts +341 -0
  929. package/src/voice/avatar/datastream_io.ts +9 -1
  930. package/src/voice/background_audio.ts +494 -0
  931. package/src/voice/events.ts +27 -7
  932. package/src/voice/generation.ts +310 -56
  933. package/src/voice/generation_tools.test.ts +268 -0
  934. package/src/voice/index.ts +17 -3
  935. package/src/voice/interruption_detection.test.ts +151 -0
  936. package/src/voice/io.ts +115 -12
  937. package/src/voice/recorder_io/index.ts +4 -0
  938. package/src/voice/recorder_io/recorder_io.ts +783 -0
  939. package/src/voice/remote_session.ts +1083 -0
  940. package/src/voice/report.test.ts +136 -0
  941. package/src/voice/report.ts +140 -0
  942. package/src/voice/room_io/_input.ts +45 -10
  943. package/src/voice/room_io/_output.ts +26 -14
  944. package/src/voice/room_io/room_io.ts +67 -22
  945. package/src/voice/speech_handle.ts +38 -6
  946. package/src/voice/testing/fake_llm.ts +138 -0
  947. package/src/voice/testing/index.ts +52 -0
  948. package/src/voice/testing/run_result.ts +995 -0
  949. package/src/voice/testing/types.ts +118 -0
  950. package/src/voice/transcription/synchronizer.test.ts +206 -0
  951. package/src/voice/transcription/synchronizer.ts +204 -19
  952. package/src/voice/turn_config/endpointing.ts +33 -0
  953. package/src/voice/turn_config/interruption.ts +56 -0
  954. package/src/voice/turn_config/turn_handling.ts +45 -0
  955. package/src/voice/turn_config/utils.test.ts +148 -0
  956. package/src/voice/turn_config/utils.ts +167 -0
  957. package/src/voice/utils.ts +29 -0
  958. package/src/worker.ts +92 -78
  959. package/src/llm/__snapshots__/utils.test.ts.snap +0 -65
@@ -1,31 +1,48 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import { Mutex } from '@livekit/mutex';
4
5
  import type { AudioFrame, Room } from '@livekit/rtc-node';
5
6
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
7
+ import type { Context, Span } from '@opentelemetry/api';
8
+ import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
6
9
  import { EventEmitter } from 'node:events';
7
10
  import type { ReadableStream } from 'node:stream/web';
11
+ import type { z } from 'zod';
8
12
  import {
9
13
  LLM as InferenceLLM,
10
14
  STT as InferenceSTT,
11
15
  TTS as InferenceTTS,
12
16
  type LLMModels,
13
- type STTModels,
14
- type TTSModels,
17
+ type STTModelString,
18
+ type TTSModelString,
15
19
  } from '../inference/index.js';
16
- import { getJobContext } from '../job.js';
17
- import { ChatContext, ChatMessage } from '../llm/chat_context.js';
20
+ import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
21
+ import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
22
+ import { type JobContext, getJobContext } from '../job.js';
23
+ import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
24
+ import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
18
25
  import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
19
26
  import type { LLMError } from '../llm/llm.js';
20
27
  import { log } from '../log.js';
28
+ import { type ModelUsage, ModelUsageCollector, filterZeroValues } from '../metrics/model_usage.js';
21
29
  import type { STT } from '../stt/index.js';
22
30
  import type { STTError } from '../stt/stt.js';
31
+ import { traceTypes, tracer } from '../telemetry/index.js';
23
32
  import type { TTS, TTSError } from '../tts/tts.js';
33
+ import {
34
+ DEFAULT_API_CONNECT_OPTIONS,
35
+ DEFAULT_SESSION_CONNECT_OPTIONS,
36
+ type ResolvedSessionConnectOptions,
37
+ type SessionConnectOptions,
38
+ } from '../types.js';
39
+ import { Task } from '../utils.js';
24
40
  import type { VAD } from '../vad.js';
25
41
  import type { Agent } from './agent.js';
26
42
  import { AgentActivity } from './agent_activity.js';
27
43
  import type { _TurnDetector } from './audio_recognition.js';
28
44
  import {
45
+ type AgentEvent,
29
46
  AgentSessionEventTypes,
30
47
  type AgentState,
31
48
  type AgentStateChangedEvent,
@@ -35,6 +52,7 @@ import {
35
52
  type ErrorEvent,
36
53
  type FunctionToolsExecutedEvent,
37
54
  type MetricsCollectedEvent,
55
+ type ShutdownReason,
38
56
  type SpeechCreatedEvent,
39
57
  type UserInputTranscribedEvent,
40
58
  type UserState,
@@ -45,29 +63,64 @@ import {
45
63
  createUserStateChangedEvent,
46
64
  } from './events.js';
47
65
  import { AgentInput, AgentOutput } from './io.js';
48
- import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
66
+ import { RecorderIO } from './recorder_io/index.js';
67
+ import { RoomSessionTransport, SessionHost } from './remote_session.js';
68
+ import {
69
+ DEFAULT_TEXT_INPUT_CALLBACK,
70
+ RoomIO,
71
+ type RoomInputOptions,
72
+ type RoomOutputOptions,
73
+ } from './room_io/index.js';
49
74
  import type { UnknownUserData } from './run_context.js';
50
75
  import type { SpeechHandle } from './speech_handle.js';
76
+ import { RunResult } from './testing/run_result.js';
77
+ import type { InterruptionOptions } from './turn_config/interruption.js';
78
+ import type {
79
+ InternalTurnHandlingOptions,
80
+ TurnHandlingOptions,
81
+ } from './turn_config/turn_handling.js';
82
+ import { migrateLegacyOptions } from './turn_config/utils.js';
83
+ import { setParticipantSpanAttributes } from './utils.js';
84
+
85
+ export interface AgentSessionUsage {
86
+ /** List of usage summaries, one per model/provider combination. */
87
+ modelUsage: Array<Partial<ModelUsage>>;
88
+ }
51
89
 
52
- export interface VoiceOptions {
53
- allowInterruptions: boolean;
54
- discardAudioIfUninterruptible: boolean;
55
- minInterruptionDuration: number;
56
- minInterruptionWords: number;
57
- minEndpointingDelay: number;
58
- maxEndpointingDelay: number;
90
+ export interface InternalSessionOptions<UserData> extends AgentSessionOptions<UserData> {
91
+ turnHandling: InternalTurnHandlingOptions;
92
+ useTtsAlignedTranscript: boolean;
59
93
  maxToolSteps: number;
94
+ userAwayTimeout: number | null;
60
95
  }
61
96
 
62
- const defaultVoiceOptions: VoiceOptions = {
63
- allowInterruptions: true,
64
- discardAudioIfUninterruptible: true,
65
- minInterruptionDuration: 500,
66
- minInterruptionWords: 0,
67
- minEndpointingDelay: 500,
68
- maxEndpointingDelay: 6000,
97
+ export const defaultAgentSessionOptions = {
69
98
  maxToolSteps: 3,
70
- } as const;
99
+ preemptiveGeneration: true,
100
+ userAwayTimeout: 15.0,
101
+ aecWarmupDuration: 3000,
102
+ turnHandling: {},
103
+ useTtsAlignedTranscript: true,
104
+ } as const satisfies AgentSessionOptions;
105
+
106
+ /** @deprecated {@link VoiceOptions} has been flattened onto to {@link AgentSessionOptions} */
107
+ export type VoiceOptions = {
108
+ maxToolSteps: number;
109
+ preemptiveGeneration: boolean;
110
+ userAwayTimeout?: number | null;
111
+ /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.mode instead. */
112
+ allowInterruptions?: boolean;
113
+ /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.discardAudioIfUninterruptible instead. */
114
+ discardAudioIfUninterruptible?: boolean;
115
+ /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minDuration instead. */
116
+ minInterruptionDuration?: number;
117
+ /** @deprecated Use {@link AgentSessionOptions.turnHandling}.interruption.minWords instead. */
118
+ minInterruptionWords?: number;
119
+ /** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.minDelay instead. */
120
+ minEndpointingDelay?: number;
121
+ /** @deprecated Use {@link AgentSessionOptions.turnHandling}.endpointing.maxDelay instead. */
122
+ maxEndpointingDelay?: number;
123
+ };
71
124
 
72
125
  export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
73
126
 
@@ -81,16 +134,60 @@ export type AgentSessionCallbacks = {
81
134
  [AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;
82
135
  [AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;
83
136
  [AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;
137
+ [AgentSessionEventTypes.OverlappingSpeech]: (ev: OverlappingSpeechEvent) => void;
84
138
  };
85
139
 
86
140
  export type AgentSessionOptions<UserData = UnknownUserData> = {
87
- turnDetection?: TurnDetectionMode;
88
- stt?: STT | STTModels;
141
+ stt?: STT | STTModelString;
89
142
  vad?: VAD;
90
143
  llm?: LLM | RealtimeModel | LLMModels;
91
- tts?: TTS | TTSModels;
144
+ tts?: TTS | TTSModelString;
92
145
  userData?: UserData;
146
+ connOptions?: SessionConnectOptions;
147
+
148
+ /** @deprecated use turnHandling.turnDetection instead */
149
+ turnDetection?: TurnDetectionMode;
150
+ /** @deprecated use top-level SessionOptions fields instead */
93
151
  voiceOptions?: Partial<VoiceOptions>;
152
+
153
+ maxToolSteps?: number;
154
+ /**
155
+ * Whether to speculatively begin LLM and TTS requests before an end-of-turn is detected.
156
+ * When `true`, the agent sends inference calls as soon as a user transcript is received rather
157
+ * than waiting for a definitive turn boundary. This can reduce response latency by overlapping
158
+ * model inference with user audio, but may incur extra compute if the user interrupts or
159
+ * revises mid-utterance.
160
+ * @defaultValue true
161
+ */
162
+ preemptiveGeneration?: boolean;
163
+
164
+ /**
165
+ * If set, set the user state as "away" after this amount of time after user and agent are
166
+ * silent. Set to `null` to disable.
167
+ * @defaultValue 15.0
168
+ */
169
+ userAwayTimeout?: number | null;
170
+
171
+ /**
172
+ * Duration in milliseconds for AEC (Acoustic Echo Cancellation) warmup, during which
173
+ * interruptions from audio activity are suppressed. Set to `null` to disable.
174
+ * @defaultValue 3000
175
+ */
176
+ aecWarmupDuration?: number | null;
177
+
178
+ /**
179
+ * Configuration for turn handling.
180
+ */
181
+ turnHandling?: Partial<TurnHandlingOptions>;
182
+
183
+ useTtsAlignedTranscript?: boolean;
184
+ };
185
+
186
+ type ActivityTransitionOptions = {
187
+ previousActivity?: 'close' | 'pause';
188
+ newActivity?: 'start' | 'resume';
189
+ blockedTasks?: Task<any>[];
190
+ waitOnEnter?: boolean;
94
191
  };
95
192
 
96
193
  export class AgentSession<
@@ -102,59 +199,116 @@ export class AgentSession<
102
199
  tts?: TTS;
103
200
  turnDetection?: TurnDetectionMode;
104
201
 
202
+ /** @deprecated use {@link sessionOptions } instead */
105
203
  readonly options: VoiceOptions;
106
204
 
205
+ readonly sessionOptions: InternalSessionOptions<UserData>;
206
+
207
+ private readonly activityLock = new Mutex();
208
+
107
209
  private agent?: Agent;
108
210
  private activity?: AgentActivity;
109
211
  private nextActivity?: AgentActivity;
212
+ private updateActivityTask?: Task<void>;
110
213
  private started = false;
111
- private userState: UserState = 'listening';
112
-
113
- private roomIO?: RoomIO;
114
- private logger = log();
214
+ private sessionHost?: SessionHost;
115
215
 
116
216
  private _chatCtx: ChatContext;
117
217
  private _userData: UserData | undefined;
218
+ private _userState: UserState = 'listening';
118
219
  private _agentState: AgentState = 'initializing';
119
220
 
120
221
  private _input: AgentInput;
121
222
  private _output: AgentOutput;
122
223
 
123
224
  private closingTask: Promise<void> | null = null;
225
+ private userAwayTimer: NodeJS.Timeout | null = null;
226
+
227
+ private _aecWarmupTimer: NodeJS.Timeout | null = null;
228
+
229
+ // Connection options for STT, LLM, and TTS
230
+ private _connOptions: ResolvedSessionConnectOptions;
231
+
232
+ // Unrecoverable error counts, reset after agent speaking
233
+ private llmErrorCounts = 0;
234
+ private ttsErrorCounts = 0;
235
+
236
+ private sessionSpan?: Span;
237
+ private agentSpeakingSpan?: Span;
238
+
239
+ private _interruptionDetection?: InterruptionOptions['mode'];
240
+
241
+ /** @internal */
242
+ _usageCollector: ModelUsageCollector = new ModelUsageCollector();
243
+
244
+ /** @internal */
245
+ _roomIO?: RoomIO;
246
+
247
+ /** @internal */
248
+ _aecWarmupRemaining = 0;
249
+
250
+ /** @internal */
251
+ _recorderIO?: RecorderIO;
252
+
253
+ /** @internal */
254
+ rootSpanContext?: Context;
255
+
256
+ /** @internal */
257
+ _recordedEvents: AgentEvent[] = [];
258
+
259
+ /** @internal */
260
+ _enableRecording = false;
261
+
262
+ /** @internal - Timestamp when the session started (milliseconds) */
263
+ _startedAt?: number;
264
+
265
+ /** @internal - Current run state for testing */
266
+ _globalRunState?: RunResult;
267
+
268
+ /** @internal */
269
+ _userSpeakingSpan?: Span;
270
+
271
+ private logger = log();
124
272
 
125
- constructor(opts: AgentSessionOptions<UserData>) {
273
+ constructor(options: AgentSessionOptions<UserData>) {
126
274
  super();
127
275
 
128
- const {
129
- vad,
130
- stt,
131
- llm,
132
- tts,
133
- turnDetection,
134
- userData,
135
- voiceOptions = defaultVoiceOptions,
136
- } = opts;
276
+ const { agentSessionOptions: opts, legacyVoiceOptions } =
277
+ migrateLegacyOptions<UserData>(options);
278
+
279
+ const { vad, stt, llm, tts, userData, connOptions, ...resolvedSessionOptions } = opts;
280
+ // Merge user-provided connOptions with defaults
281
+ this._connOptions = {
282
+ sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
283
+ llmConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.llmConnOptions },
284
+ ttsConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.ttsConnOptions },
285
+ maxUnrecoverableErrors:
286
+ connOptions?.maxUnrecoverableErrors ??
287
+ DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors,
288
+ };
137
289
 
138
290
  this.vad = vad;
139
291
 
140
292
  if (typeof stt === 'string') {
141
- this.stt = new InferenceSTT({ model: stt });
293
+ this.stt = InferenceSTT.fromModelString(stt);
142
294
  } else {
143
295
  this.stt = stt;
144
296
  }
145
297
 
146
298
  if (typeof llm === 'string') {
147
- this.llm = new InferenceLLM({ model: llm });
299
+ this.llm = InferenceLLM.fromModelString(llm);
148
300
  } else {
149
301
  this.llm = llm;
150
302
  }
151
303
 
152
304
  if (typeof tts === 'string') {
153
- this.tts = new InferenceTTS({ model: tts });
305
+ this.tts = InferenceTTS.fromModelString(tts);
154
306
  } else {
155
307
  this.tts = tts;
156
308
  }
157
- this.turnDetection = turnDetection;
309
+
310
+ this.turnDetection = resolvedSessionOptions.turnHandling.turnDetection;
311
+ this._interruptionDetection = resolvedSessionOptions.turnHandling.interruption?.mode;
158
312
  this._userData = userData;
159
313
 
160
314
  // configurable IO
@@ -163,7 +317,21 @@ export class AgentSession<
163
317
 
164
318
  // This is the "global" chat context, it holds the entire conversation history
165
319
  this._chatCtx = ChatContext.empty();
166
- this.options = { ...defaultVoiceOptions, ...voiceOptions };
320
+ this.sessionOptions = resolvedSessionOptions;
321
+ this.options = legacyVoiceOptions;
322
+ this._aecWarmupRemaining = this.sessionOptions.aecWarmupDuration ?? 0;
323
+
324
+ this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
325
+ this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
326
+ }
327
+
328
+ emit<K extends keyof AgentSessionCallbacks>(
329
+ event: K,
330
+ ...args: Parameters<AgentSessionCallbacks[K]>
331
+ ): boolean {
332
+ const eventData = args[0] as AgentEvent;
333
+ this._recordedEvents.push(eventData);
334
+ return super.emit(event, ...args);
167
335
  }
168
336
 
169
337
  get input(): AgentInput {
@@ -186,64 +354,134 @@ export class AgentSession<
186
354
  return this._chatCtx;
187
355
  }
188
356
 
357
+ /** Connection options for STT, LLM, and TTS. */
358
+ get connOptions(): ResolvedSessionConnectOptions {
359
+ return this._connOptions;
360
+ }
361
+
362
+ get interruptionDetection() {
363
+ return this._interruptionDetection;
364
+ }
365
+
366
+ /**
367
+ * Returns usage summaries for this session, one per model/provider combination.
368
+ */
369
+ get usage(): AgentSessionUsage {
370
+ // Skip zero fields for more concise usage display (matches python behavior).
371
+ return { modelUsage: this._usageCollector.flatten().map(filterZeroValues) };
372
+ }
373
+
374
+ get useTtsAlignedTranscript(): boolean {
375
+ return this.sessionOptions.useTtsAlignedTranscript;
376
+ }
377
+
189
378
  set userData(value: UserData) {
190
379
  this._userData = value;
191
380
  }
192
381
 
193
- async start({
382
+ private async _startImpl({
194
383
  agent,
195
384
  room,
196
385
  inputOptions,
197
386
  outputOptions,
387
+ span,
198
388
  }: {
199
389
  agent: Agent;
200
- room: Room;
390
+ room?: Room;
201
391
  inputOptions?: Partial<RoomInputOptions>;
202
392
  outputOptions?: Partial<RoomOutputOptions>;
393
+ span: Span;
203
394
  }): Promise<void> {
204
- if (this.started) {
205
- return;
206
- }
395
+ span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
207
396
 
208
397
  this.agent = agent;
209
398
  this._updateAgentState('initializing');
210
399
 
211
400
  const tasks: Promise<void>[] = [];
212
- // Check for existing input/output configuration and warn if needed
213
- if (this.input.audio && inputOptions?.audioEnabled !== false) {
214
- this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');
215
- }
216
401
 
217
- if (this.output.audio && outputOptions?.audioEnabled !== false) {
218
- this.logger.warn(
219
- 'RoomIO audio output is enabled but output.audio is already set, ignoring..',
220
- );
402
+ if (room && !this._roomIO) {
403
+ // Check for existing input/output configuration and warn if needed
404
+ if (this.input.audio && inputOptions?.audioEnabled !== false) {
405
+ this.logger.warn(
406
+ 'RoomIO audio input is enabled but input.audio is already set, ignoring..',
407
+ );
408
+ }
409
+
410
+ if (this.output.audio && outputOptions?.audioEnabled !== false) {
411
+ this.logger.warn(
412
+ 'RoomIO audio output is enabled but output.audio is already set, ignoring..',
413
+ );
414
+ }
415
+
416
+ if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
417
+ this.logger.warn(
418
+ 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
419
+ );
420
+ }
421
+
422
+ this._roomIO = new RoomIO({
423
+ agentSession: this,
424
+ room,
425
+ inputOptions,
426
+ outputOptions,
427
+ });
428
+
429
+ this._roomIO.start();
430
+
431
+ const transport = new RoomSessionTransport(room, this._roomIO);
432
+ this.sessionHost = new SessionHost(transport);
433
+ this.sessionHost.registerSession(this);
434
+ if (inputOptions?.textEnabled !== false) {
435
+ this.sessionHost.registerTextInput(
436
+ inputOptions?.textInputCallback ?? DEFAULT_TEXT_INPUT_CALLBACK,
437
+ );
438
+ }
221
439
  }
222
440
 
223
- if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {
224
- this.logger.warn(
225
- 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',
226
- );
441
+ let ctx: JobContext | undefined = undefined;
442
+ try {
443
+ ctx = getJobContext();
444
+ } catch {
445
+ // JobContext is not available in evals
227
446
  }
228
447
 
229
- this.roomIO = new RoomIO({
230
- agentSession: this,
231
- room,
232
- inputOptions,
233
- outputOptions,
234
- });
235
- this.roomIO.start();
448
+ if (ctx) {
449
+ if (room && ctx.room === room && !room.isConnected) {
450
+ this.logger.debug('Auto-connecting to room via job context');
451
+ tasks.push(ctx.connect());
452
+ }
453
+
454
+ if (ctx._primaryAgentSession === undefined) {
455
+ ctx._primaryAgentSession = this;
456
+ } else if (this._enableRecording) {
457
+ throw new Error(
458
+ 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use `session.start({ record: false })`.',
459
+ );
460
+ }
236
461
 
237
- const ctx = getJobContext();
238
- if (ctx && ctx.room === room && !room.isConnected) {
239
- this.logger.debug('Auto-connecting to room via job context');
240
- tasks.push(ctx.connect());
462
+ if (this.input.audio && this.output.audio && this._enableRecording) {
463
+ this._recorderIO = new RecorderIO({ agentSession: this });
464
+ this.input.audio = this._recorderIO.recordInput(this.input.audio);
465
+ this.output.audio = this._recorderIO.recordOutput(this.output.audio);
466
+
467
+ // Start recording to session directory
468
+ const sessionDir = ctx.sessionDirectory;
469
+ if (sessionDir) {
470
+ tasks.push(this._recorderIO.start(`${sessionDir}/audio.ogg`));
471
+ }
472
+ }
241
473
  }
474
+
242
475
  // TODO(AJS-265): add shutdown callback to job context
243
- tasks.push(this.updateActivity(this.agent));
476
+ // Initial start does not wait on onEnter
477
+ tasks.push(this._updateActivity(this.agent, { waitOnEnter: false }));
244
478
 
245
479
  await Promise.allSettled(tasks);
246
480
 
481
+ if (this.sessionHost) {
482
+ await this.sessionHost.start();
483
+ }
484
+
247
485
  // Log used IO configuration
248
486
  this.logger.debug(
249
487
  `using audio io: ${this.input.audio ? '`' + this.input.audio.constructor.name + '`' : '(none)'} -> \`AgentSession\` -> ${this.output.audio ? '`' + this.output.audio.constructor.name + '`' : '(none)'}`,
@@ -254,14 +492,94 @@ export class AgentSession<
254
492
  );
255
493
 
256
494
  this.started = true;
495
+ this._startedAt = Date.now();
257
496
  this._updateAgentState('listening');
258
497
  }
259
498
 
499
+ async start({
500
+ agent,
501
+ room,
502
+ inputOptions,
503
+ outputOptions,
504
+ record,
505
+ }: {
506
+ agent: Agent;
507
+ room?: Room;
508
+ inputOptions?: Partial<RoomInputOptions>;
509
+ outputOptions?: Partial<RoomOutputOptions>;
510
+ record?: boolean;
511
+ }): Promise<void> {
512
+ if (this.started) {
513
+ return;
514
+ }
515
+
516
+ this._usageCollector = new ModelUsageCollector();
517
+
518
+ let ctx: JobContext | undefined = undefined;
519
+ try {
520
+ ctx = getJobContext();
521
+
522
+ if (record === undefined) {
523
+ record = ctx.job.enableRecording;
524
+ }
525
+
526
+ this._enableRecording = record;
527
+
528
+ if (this._enableRecording) {
529
+ ctx.initRecording();
530
+ }
531
+ } catch (error) {
532
+ // JobContext is not available in evals
533
+ this.logger.warn('JobContext is not available');
534
+ }
535
+
536
+ this.sessionSpan = tracer.startSpan({
537
+ name: 'agent_session',
538
+ context: ROOT_CONTEXT,
539
+ });
540
+
541
+ this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
542
+
543
+ await this._startImpl({
544
+ agent,
545
+ room,
546
+ inputOptions,
547
+ outputOptions,
548
+ span: this.sessionSpan,
549
+ });
550
+ }
551
+
260
552
  updateAgent(agent: Agent): void {
261
553
  this.agent = agent;
262
554
 
263
- if (this.started) {
264
- this.updateActivity(agent);
555
+ if (!this.started) {
556
+ return;
557
+ }
558
+
559
+ const _updateActivityTask = async (oldTask: Task<void> | undefined, agent: Agent) => {
560
+ if (oldTask) {
561
+ try {
562
+ await oldTask.result;
563
+ } catch (error) {
564
+ this.logger.error(error, 'previous updateAgent transition failed');
565
+ }
566
+ }
567
+
568
+ await this._updateActivity(agent);
569
+ };
570
+
571
+ const oldTask = this.updateActivityTask;
572
+ this.updateActivityTask = Task.from(
573
+ async () => _updateActivityTask(oldTask, agent),
574
+ undefined,
575
+ 'AgentSession_updateActivityTask',
576
+ );
577
+
578
+ const runState = this._globalRunState;
579
+ if (runState) {
580
+ // Don't mark the RunResult as done, if there is currently an agent transition happening.
581
+ // (used to make sure we're correctly adding the AgentHandoffResult before completion)
582
+ runState._watchHandle(this.updateActivityTask);
265
583
  }
266
584
  }
267
585
 
@@ -292,14 +610,42 @@ export class AgentSession<
292
610
  throw new Error('AgentSession is not running');
293
611
  }
294
612
 
295
- return this.activity.say(text, options);
613
+ const doSay = (activity: AgentActivity, nextActivity?: AgentActivity) => {
614
+ if (activity.schedulingPaused) {
615
+ if (!nextActivity) {
616
+ throw new Error('AgentSession is closing, cannot use say()');
617
+ }
618
+ return nextActivity.say(text, options);
619
+ }
620
+ return activity.say(text, options);
621
+ };
622
+
623
+ const runState = this._globalRunState;
624
+ let handle: SpeechHandle;
625
+
626
+ // attach to the session span if called outside of the AgentSession
627
+ const activeSpan = trace.getActiveSpan();
628
+ if (!activeSpan && this.rootSpanContext) {
629
+ handle = otelContext.with(this.rootSpanContext, () =>
630
+ doSay(this.activity!, this.nextActivity),
631
+ );
632
+ } else {
633
+ handle = doSay(this.activity, this.nextActivity);
634
+ }
635
+
636
+ if (runState) {
637
+ runState._watchHandle(handle);
638
+ }
639
+
640
+ return handle;
296
641
  }
297
642
 
298
- interrupt() {
643
+ interrupt(options?: { force?: boolean }) {
299
644
  if (!this.activity) {
300
645
  throw new Error('AgentSession is not running');
301
646
  }
302
- return this.activity.interrupt();
647
+
648
+ return this.activity.interrupt(options);
303
649
  }
304
650
 
305
651
  generateReply(options?: {
@@ -319,33 +665,178 @@ export class AgentSession<
319
665
  })
320
666
  : undefined;
321
667
 
322
- if (this.activity.draining) {
323
- if (!this.nextActivity) {
324
- throw new Error('AgentSession is closing, cannot use generateReply()');
668
+ const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
669
+ if (activity.schedulingPaused) {
670
+ if (!nextActivity) {
671
+ throw new Error('AgentSession is closing, cannot use generateReply()');
672
+ }
673
+ return nextActivity.generateReply({ userMessage, ...options });
325
674
  }
326
- return this.nextActivity.generateReply({ userMessage, ...options });
675
+ return activity.generateReply({ userMessage, ...options });
676
+ };
677
+
678
+ // attach to the session span if called outside of the AgentSession
679
+ const activeSpan = trace.getActiveSpan();
680
+ let handle: SpeechHandle;
681
+ if (!activeSpan && this.rootSpanContext) {
682
+ handle = otelContext.with(this.rootSpanContext, () =>
683
+ doGenerateReply(this.activity!, this.nextActivity),
684
+ );
685
+ } else {
686
+ handle = doGenerateReply(this.activity!, this.nextActivity);
327
687
  }
328
688
 
329
- return this.activity.generateReply({ userMessage, ...options });
330
- }
689
+ if (this._globalRunState) {
690
+ this._globalRunState._watchHandle(handle);
691
+ }
331
692
 
332
- private async updateActivity(agent: Agent): Promise<void> {
333
- // TODO(AJS-129): add lock to agent activity core lifecycle
334
- this.nextActivity = new AgentActivity(agent, this);
693
+ return handle;
694
+ }
335
695
 
336
- if (this.activity) {
337
- await this.activity.drain();
338
- await this.activity.close();
696
+ /**
697
+ * Run a test with user input and return a result for assertions.
698
+ *
699
+ * This method is primarily used for testing agent behavior without
700
+ * requiring a real room connection.
701
+ *
702
+ * @example
703
+ * ```typescript
704
+ * const result = await session.run({ userInput: 'Hello' });
705
+ * result.expect.nextEvent().isMessage({ role: 'assistant' });
706
+ * result.expect.noMoreEvents();
707
+ * ```
708
+ *
709
+ * @param options - Run options including user input and optional output type
710
+ * @returns A RunResult that resolves when the agent finishes responding
711
+ */
712
+ run<T = unknown>({
713
+ userInput,
714
+ outputType,
715
+ }: {
716
+ userInput: string;
717
+ outputType?: z.ZodType<T>;
718
+ }): RunResult<T> {
719
+ if (this._globalRunState && !this._globalRunState.done()) {
720
+ throw new Error('nested runs are not supported');
339
721
  }
340
722
 
341
- this.activity = this.nextActivity;
342
- this.nextActivity = undefined;
723
+ const runState = new RunResult<T>({
724
+ userInput,
725
+ outputType,
726
+ });
343
727
 
344
- await this.activity.start();
728
+ this._globalRunState = runState;
729
+
730
+ // Defer generateReply through the activityLock to ensure any in-progress
731
+ // activity transition (e.g. AgentTask started from onEnter) completes first.
732
+ // TS Task.from starts onEnter synchronously, so the transition may already be
733
+ // mid-flight by the time run() is called after session.start() resolves.
734
+ // Acquiring and immediately releasing the lock guarantees FIFO ordering:
735
+ // the transition's lock section finishes before we route generateReply.
736
+ (async () => {
737
+ try {
738
+ const unlock = await this.activityLock.lock();
739
+ unlock();
740
+ this.generateReply({ userInput });
741
+ } catch (e) {
742
+ runState._reject(e instanceof Error ? e : new Error(String(e)));
743
+ }
744
+ })();
345
745
 
346
- if (this._input.audio) {
347
- this.activity.attachAudioInput(this._input.audio.stream);
746
+ return runState;
747
+ }
748
+
749
+ /** @internal */
750
+ async _updateActivity(agent: Agent, options: ActivityTransitionOptions = {}): Promise<void> {
751
+ const { previousActivity = 'close', newActivity = 'start', blockedTasks = [] } = options;
752
+ const waitOnEnter = options.waitOnEnter ?? newActivity === 'start';
753
+
754
+ const runWithContext = async () => {
755
+ const unlock = await this.activityLock.lock();
756
+ let onEnterTask: Task<void> | undefined;
757
+
758
+ try {
759
+ this.agent = agent;
760
+ const prevActivityObj = this.activity;
761
+
762
+ if (newActivity === 'start') {
763
+ const prevAgent = prevActivityObj?.agent;
764
+ if (
765
+ agent._agentActivity &&
766
+ // allow updating the same agent that is running
767
+ (agent !== prevAgent || previousActivity !== 'close')
768
+ ) {
769
+ throw new Error('Cannot start agent: an activity is already running');
770
+ }
771
+ this.nextActivity = new AgentActivity(agent, this);
772
+ } else if (newActivity === 'resume') {
773
+ if (!agent._agentActivity) {
774
+ throw new Error('Cannot resume agent: no existing activity to resume');
775
+ }
776
+ this.nextActivity = agent._agentActivity;
777
+ }
778
+
779
+ if (prevActivityObj && prevActivityObj !== this.nextActivity) {
780
+ if (previousActivity === 'pause') {
781
+ await prevActivityObj.pause({ blockedTasks });
782
+ } else {
783
+ await prevActivityObj.drain();
784
+ await prevActivityObj.close();
785
+ }
786
+ }
787
+
788
+ this.activity = this.nextActivity;
789
+ this.nextActivity = undefined;
790
+
791
+ const runState = this._globalRunState;
792
+ const handoffItem = new AgentHandoffItem({
793
+ oldAgentId: prevActivityObj?.agent.id,
794
+ newAgentId: agent.id,
795
+ });
796
+
797
+ if (runState) {
798
+ runState._agentHandoff({
799
+ item: handoffItem,
800
+ oldAgent: prevActivityObj?.agent,
801
+ newAgent: this.activity!.agent,
802
+ });
803
+ }
804
+
805
+ this._chatCtx.insert(handoffItem);
806
+ this.logger.debug(
807
+ { previousAgentId: prevActivityObj?.agent.id, newAgentId: agent.id },
808
+ 'Agent handoff inserted into chat context',
809
+ );
810
+
811
+ if (newActivity === 'start') {
812
+ await this.activity!.start();
813
+ } else {
814
+ await this.activity!.resume();
815
+ }
816
+
817
+ onEnterTask = this.activity!._onEnterTask;
818
+
819
+ if (this._input.audio) {
820
+ this.activity!.attachAudioInput(this._input.audio.stream);
821
+ }
822
+ } finally {
823
+ unlock();
824
+ }
825
+
826
+ if (waitOnEnter) {
827
+ if (!onEnterTask) {
828
+ throw new Error('expected onEnter task to be available while waitOnEnter=true');
829
+ }
830
+ await onEnterTask.result;
831
+ }
832
+ };
833
+
834
+ // Run within session span context if available
835
+ if (this.rootSpanContext) {
836
+ return otelContext.with(this.rootSpanContext, runWithContext);
348
837
  }
838
+
839
+ return runWithContext();
349
840
  }
350
841
 
351
842
  get chatCtx(): ChatContext {
@@ -356,6 +847,10 @@ export class AgentSession<
356
847
  return this._agentState;
357
848
  }
358
849
 
850
+ get userState(): UserState {
851
+ return this._userState;
852
+ }
853
+
359
854
  get currentAgent(): Agent {
360
855
  if (!this.agent) {
361
856
  throw new Error('AgentSession is not running');
@@ -368,29 +863,58 @@ export class AgentSession<
368
863
  await this.closeImpl(CloseReason.USER_INITIATED);
369
864
  }
370
865
 
866
+ shutdown(options?: { drain?: boolean; reason?: ShutdownReason }): void {
867
+ const { drain = true, reason = CloseReason.USER_INITIATED } = options ?? {};
868
+
869
+ this._closeSoon({
870
+ reason,
871
+ drain,
872
+ });
873
+ }
874
+
371
875
  /** @internal */
372
876
  _closeSoon({
373
877
  reason,
374
878
  drain = false,
375
879
  error = null,
376
880
  }: {
377
- reason: CloseReason;
881
+ reason: ShutdownReason;
378
882
  drain?: boolean;
379
883
  error?: RealtimeModelError | STTError | TTSError | LLMError | null;
380
884
  }): void {
381
885
  if (this.closingTask) {
382
886
  return;
383
887
  }
384
- this.closeImpl(reason, error, drain);
888
+ this.closingTask = this.closeImpl(reason, error, drain).finally(() => {
889
+ this.closingTask = null;
890
+ });
385
891
  }
386
892
 
387
893
  /** @internal */
388
- _onError(error: RealtimeModelError | STTError | TTSError | LLMError): void {
894
+ _onError(
895
+ error: RealtimeModelError | STTError | TTSError | LLMError | InterruptionDetectionError,
896
+ ): void {
389
897
  if (this.closingTask || error.recoverable) {
390
898
  return;
391
899
  }
392
900
 
393
- this.logger.error(error, 'AgentSession is closing due to unrecoverable error');
901
+ // Track error counts per type to implement max_unrecoverable_errors logic
902
+ if (error.type === 'llm_error') {
903
+ this.llmErrorCounts += 1;
904
+ if (this.llmErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
905
+ return;
906
+ }
907
+ } else if (error.type === 'tts_error') {
908
+ this.ttsErrorCounts += 1;
909
+ if (this.ttsErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
910
+ return;
911
+ }
912
+ } else if (error.type === 'interruption_detection_error') {
913
+ this.logger.error(error.toString());
914
+ return;
915
+ }
916
+
917
+ this.logger.error(error, 'AgentSession is closing due to an unrecoverable error');
394
918
 
395
919
  this.closingTask = (async () => {
396
920
  await this.closeImpl(CloseReason.ERROR, error);
@@ -406,13 +930,56 @@ export class AgentSession<
406
930
  }
407
931
 
408
932
  /** @internal */
409
- _updateAgentState(state: AgentState) {
933
+ _toolItemsAdded(items: (FunctionCall | FunctionCallOutput)[]): void {
934
+ this._chatCtx.insert(items);
935
+ }
936
+
937
+ /** @internal */
938
+ _updateAgentState(state: AgentState, options?: { startTime?: number; otelContext?: Context }) {
410
939
  if (this._agentState === state) {
411
940
  return;
412
941
  }
413
942
 
943
+ if (state === 'speaking') {
944
+ this.llmErrorCounts = 0;
945
+ this.ttsErrorCounts = 0;
946
+
947
+ if (this.agentSpeakingSpan === undefined) {
948
+ this.agentSpeakingSpan = tracer.startSpan({
949
+ name: 'agent_speaking',
950
+ context: options?.otelContext ?? this.rootSpanContext,
951
+ startTime: options?.startTime,
952
+ });
953
+
954
+ const localParticipant = this._roomIO?.localParticipant;
955
+ if (localParticipant) {
956
+ setParticipantSpanAttributes(this.agentSpeakingSpan, localParticipant);
957
+ }
958
+ }
959
+ } else if (this.agentSpeakingSpan !== undefined) {
960
+ // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
961
+ this.agentSpeakingSpan.end();
962
+ this.agentSpeakingSpan = undefined;
963
+ }
964
+
965
+ if (state === 'speaking' && this._aecWarmupRemaining > 0 && this._aecWarmupTimer === null) {
966
+ this._aecWarmupTimer = setTimeout(() => this._onAecWarmupExpired(), this._aecWarmupRemaining);
967
+ this.logger.debug(
968
+ { warmupDurationMs: this._aecWarmupRemaining },
969
+ 'aec warmup active, disabling interruptions',
970
+ );
971
+ }
972
+
414
973
  const oldState = this._agentState;
415
974
  this._agentState = state;
975
+
976
+ // Handle user away timer based on state changes
977
+ if (state === 'listening' && this._userState === 'listening') {
978
+ this._setUserAwayTimer();
979
+ } else {
980
+ this._cancelUserAwayTimer();
981
+ }
982
+
416
983
  this.emit(
417
984
  AgentSessionEventTypes.AgentStateChanged,
418
985
  createAgentStateChangedEvent(oldState, state),
@@ -420,13 +987,40 @@ export class AgentSession<
420
987
  }
421
988
 
422
989
  /** @internal */
423
- _updateUserState(state: UserState) {
424
- if (this.userState === state) {
990
+ _updateUserState(
991
+ state: UserState,
992
+ options?: { lastSpeakingTime?: number; otelContext?: Context },
993
+ ) {
994
+ if (this._userState === state) {
425
995
  return;
426
996
  }
427
997
 
428
- const oldState = this.userState;
429
- this.userState = state;
998
+ if (state === 'speaking' && this._userSpeakingSpan === undefined) {
999
+ this._userSpeakingSpan = tracer.startSpan({
1000
+ name: 'user_speaking',
1001
+ context: options?.otelContext ?? this.rootSpanContext,
1002
+ startTime: options?.lastSpeakingTime,
1003
+ });
1004
+
1005
+ const linked = this._roomIO?.linkedParticipant;
1006
+ if (linked) {
1007
+ setParticipantSpanAttributes(this._userSpeakingSpan, linked);
1008
+ }
1009
+ } else if (this._userSpeakingSpan !== undefined) {
1010
+ this._userSpeakingSpan.end(options?.lastSpeakingTime);
1011
+ this._userSpeakingSpan = undefined;
1012
+ }
1013
+
1014
+ const oldState = this._userState;
1015
+ this._userState = state;
1016
+
1017
+ // Handle user away timer based on state changes
1018
+ if (state === 'listening' && this._agentState === 'listening') {
1019
+ this._setUserAwayTimer();
1020
+ } else {
1021
+ this._cancelUserAwayTimer();
1022
+ }
1023
+
430
1024
  this.emit(
431
1025
  AgentSessionEventTypes.UserStateChanged,
432
1026
  createUserStateChangedEvent(oldState, state),
@@ -448,29 +1042,119 @@ export class AgentSession<
448
1042
 
449
1043
  private onTextOutputChanged(): void {}
450
1044
 
1045
+ private _setUserAwayTimer(): void {
1046
+ this._cancelUserAwayTimer();
1047
+
1048
+ if (
1049
+ this.sessionOptions.userAwayTimeout === null ||
1050
+ this.sessionOptions.userAwayTimeout === undefined
1051
+ ) {
1052
+ return;
1053
+ }
1054
+
1055
+ if (this._roomIO && !this._roomIO.isParticipantAvailable) {
1056
+ return;
1057
+ }
1058
+
1059
+ this.userAwayTimer = setTimeout(() => {
1060
+ this.logger.debug('User away timeout triggered');
1061
+ this._updateUserState('away');
1062
+ }, this.sessionOptions.userAwayTimeout * 1000);
1063
+ }
1064
+
1065
+ private _cancelUserAwayTimer(): void {
1066
+ if (this.userAwayTimer !== null) {
1067
+ clearTimeout(this.userAwayTimer);
1068
+ this.userAwayTimer = null;
1069
+ }
1070
+ }
1071
+
1072
+ /** @internal */
1073
+ _onAecWarmupExpired(): void {
1074
+ if (this._aecWarmupRemaining > 0) {
1075
+ this.logger.debug('aec warmup expired, re-enabling interruptions');
1076
+ }
1077
+
1078
+ this._aecWarmupRemaining = 0;
1079
+ if (this._aecWarmupTimer !== null) {
1080
+ clearTimeout(this._aecWarmupTimer);
1081
+ this._aecWarmupTimer = null;
1082
+ }
1083
+ }
1084
+
1085
+ private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
1086
+ if (this._userState === 'away' && ev.isFinal) {
1087
+ this.logger.debug('User returned from away state due to speech input');
1088
+ this._updateUserState('listening');
1089
+ }
1090
+ }
1091
+
451
1092
  private async closeImpl(
452
- reason: CloseReason,
453
- error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
1093
+ reason: ShutdownReason,
1094
+ error:
1095
+ | RealtimeModelError
1096
+ | LLMError
1097
+ | TTSError
1098
+ | STTError
1099
+ | InterruptionDetectionError
1100
+ | null = null,
1101
+ drain: boolean = false,
1102
+ ): Promise<void> {
1103
+ if (this.rootSpanContext) {
1104
+ return otelContext.with(this.rootSpanContext, async () => {
1105
+ await this.closeImplInner(reason, error, drain);
1106
+ });
1107
+ }
1108
+
1109
+ return this.closeImplInner(reason, error, drain);
1110
+ }
1111
+
1112
+ private async closeImplInner(
1113
+ reason: ShutdownReason,
1114
+ error:
1115
+ | RealtimeModelError
1116
+ | LLMError
1117
+ | TTSError
1118
+ | STTError
1119
+ | InterruptionDetectionError
1120
+ | null = null,
454
1121
  drain: boolean = false,
455
1122
  ): Promise<void> {
456
1123
  if (!this.started) {
457
1124
  return;
458
1125
  }
459
1126
 
1127
+ this._cancelUserAwayTimer();
1128
+ this._onAecWarmupExpired();
1129
+ this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
1130
+
460
1131
  if (this.activity) {
461
1132
  if (!drain) {
462
1133
  try {
463
- this.activity.interrupt();
1134
+ await this.activity.interrupt({ force: true }).await;
464
1135
  } catch (error) {
465
- // uninterruptible speech [copied from python]
466
- // TODO(shubhra): force interrupt or wait for it to finish?
467
- // it might be an audio played from the error callback
1136
+ this.logger.warn({ error }, 'Error interrupting activity');
468
1137
  }
469
1138
  }
1139
+
470
1140
  await this.activity.drain();
471
1141
  // wait any uninterruptible speech to finish
472
1142
  await this.activity.currentSpeech?.waitForPlayout();
473
- this.activity.detachAudioInput();
1143
+
1144
+ if (reason !== CloseReason.ERROR) {
1145
+ this.activity.commitUserTurn({ audioDetached: true, throwIfNotReady: false });
1146
+ }
1147
+
1148
+ try {
1149
+ this.activity.detachAudioInput();
1150
+ } catch (error) {
1151
+ // Ignore detach errors during cleanup - source may not have been set
1152
+ }
1153
+ }
1154
+
1155
+ // Close recorder before detaching inputs/outputs (keep reference for session report)
1156
+ if (this._recorderIO) {
1157
+ await this._recorderIO.close();
474
1158
  }
475
1159
 
476
1160
  // detach the inputs and outputs
@@ -478,18 +1162,39 @@ export class AgentSession<
478
1162
  this.output.audio = null;
479
1163
  this.output.transcription = null;
480
1164
 
481
- await this.roomIO?.close();
482
- this.roomIO = undefined;
1165
+ await this.sessionHost?.close();
1166
+ this.sessionHost = undefined;
1167
+
1168
+ await this._roomIO?.close();
1169
+ this._roomIO = undefined;
483
1170
 
484
1171
  await this.activity?.close();
485
1172
  this.activity = undefined;
486
1173
 
1174
+ if (this.sessionSpan) {
1175
+ this.sessionSpan.end();
1176
+ this.sessionSpan = undefined;
1177
+ }
1178
+
1179
+ if (this._userSpeakingSpan) {
1180
+ this._userSpeakingSpan.end();
1181
+ this._userSpeakingSpan = undefined;
1182
+ }
1183
+
1184
+ if (this.agentSpeakingSpan) {
1185
+ this.agentSpeakingSpan.end();
1186
+ this.agentSpeakingSpan = undefined;
1187
+ }
1188
+
487
1189
  this.started = false;
488
1190
 
489
1191
  this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));
490
1192
 
491
- this.userState = 'listening';
1193
+ this._userState = 'listening';
492
1194
  this._agentState = 'initializing';
1195
+ this.rootSpanContext = undefined;
1196
+ this.llmErrorCounts = 0;
1197
+ this.ttsErrorCounts = 0;
493
1198
 
494
1199
  this.logger.info({ reason, error }, 'AgentSession closed');
495
1200
  }