@livekit/agents 0.0.0-20260120144724

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (987) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +17 -0
  3. package/dist/_exceptions.cjs +109 -0
  4. package/dist/_exceptions.cjs.map +1 -0
  5. package/dist/_exceptions.d.cts +64 -0
  6. package/dist/_exceptions.d.ts +64 -0
  7. package/dist/_exceptions.d.ts.map +1 -0
  8. package/dist/_exceptions.js +80 -0
  9. package/dist/_exceptions.js.map +1 -0
  10. package/dist/audio.cjs +170 -0
  11. package/dist/audio.cjs.map +1 -0
  12. package/dist/audio.d.cts +46 -0
  13. package/dist/audio.d.ts +46 -0
  14. package/dist/audio.d.ts.map +1 -0
  15. package/dist/audio.js +133 -0
  16. package/dist/audio.js.map +1 -0
  17. package/dist/cli.cjs +171 -0
  18. package/dist/cli.cjs.map +1 -0
  19. package/dist/cli.d.cts +14 -0
  20. package/dist/cli.d.ts +14 -0
  21. package/dist/cli.d.ts.map +1 -0
  22. package/dist/cli.js +145 -0
  23. package/dist/cli.js.map +1 -0
  24. package/dist/connection_pool.cjs +242 -0
  25. package/dist/connection_pool.cjs.map +1 -0
  26. package/dist/connection_pool.d.cts +123 -0
  27. package/dist/connection_pool.d.ts +123 -0
  28. package/dist/connection_pool.d.ts.map +1 -0
  29. package/dist/connection_pool.js +218 -0
  30. package/dist/connection_pool.js.map +1 -0
  31. package/dist/connection_pool.test.cjs +256 -0
  32. package/dist/connection_pool.test.cjs.map +1 -0
  33. package/dist/connection_pool.test.js +255 -0
  34. package/dist/connection_pool.test.js.map +1 -0
  35. package/dist/constants.cjs +44 -0
  36. package/dist/constants.cjs.map +1 -0
  37. package/dist/constants.d.cts +7 -0
  38. package/dist/constants.d.ts +7 -0
  39. package/dist/constants.d.ts.map +1 -0
  40. package/dist/constants.js +15 -0
  41. package/dist/constants.js.map +1 -0
  42. package/dist/generator.cjs +36 -0
  43. package/dist/generator.cjs.map +1 -0
  44. package/dist/generator.d.cts +23 -0
  45. package/dist/generator.d.ts +23 -0
  46. package/dist/generator.d.ts.map +1 -0
  47. package/dist/generator.js +11 -0
  48. package/dist/generator.js.map +1 -0
  49. package/dist/http_server.cjs +75 -0
  50. package/dist/http_server.cjs.map +1 -0
  51. package/dist/http_server.d.cts +20 -0
  52. package/dist/http_server.d.ts +20 -0
  53. package/dist/http_server.d.ts.map +1 -0
  54. package/dist/http_server.js +51 -0
  55. package/dist/http_server.js.map +1 -0
  56. package/dist/index.cjs +100 -0
  57. package/dist/index.cjs.map +1 -0
  58. package/dist/index.d.cts +35 -0
  59. package/dist/index.d.ts +35 -0
  60. package/dist/index.d.ts.map +1 -0
  61. package/dist/index.js +40 -0
  62. package/dist/index.js.map +1 -0
  63. package/dist/inference/api_protos.cjs +104 -0
  64. package/dist/inference/api_protos.cjs.map +1 -0
  65. package/dist/inference/api_protos.d.cts +222 -0
  66. package/dist/inference/api_protos.d.ts +222 -0
  67. package/dist/inference/api_protos.d.ts.map +1 -0
  68. package/dist/inference/api_protos.js +70 -0
  69. package/dist/inference/api_protos.js.map +1 -0
  70. package/dist/inference/index.cjs +56 -0
  71. package/dist/inference/index.cjs.map +1 -0
  72. package/dist/inference/index.d.cts +8 -0
  73. package/dist/inference/index.d.ts +8 -0
  74. package/dist/inference/index.d.ts.map +1 -0
  75. package/dist/inference/index.js +23 -0
  76. package/dist/inference/index.js.map +1 -0
  77. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +152 -0
  78. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +1 -0
  79. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +50 -0
  80. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +50 -0
  81. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +1 -0
  82. package/dist/inference/interruption/AdaptiveInterruptionDetector.js +125 -0
  83. package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +1 -0
  84. package/dist/inference/interruption/InterruptionStream.cjs +310 -0
  85. package/dist/inference/interruption/InterruptionStream.cjs.map +1 -0
  86. package/dist/inference/interruption/InterruptionStream.d.cts +57 -0
  87. package/dist/inference/interruption/InterruptionStream.d.ts +57 -0
  88. package/dist/inference/interruption/InterruptionStream.d.ts.map +1 -0
  89. package/dist/inference/interruption/InterruptionStream.js +288 -0
  90. package/dist/inference/interruption/InterruptionStream.js.map +1 -0
  91. package/dist/inference/interruption/defaults.cjs +76 -0
  92. package/dist/inference/interruption/defaults.cjs.map +1 -0
  93. package/dist/inference/interruption/defaults.d.cts +14 -0
  94. package/dist/inference/interruption/defaults.d.ts +14 -0
  95. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  96. package/dist/inference/interruption/defaults.js +42 -0
  97. package/dist/inference/interruption/defaults.js.map +1 -0
  98. package/dist/inference/interruption/errors.cjs +2 -0
  99. package/dist/inference/interruption/errors.cjs.map +1 -0
  100. package/dist/inference/interruption/errors.d.cts +2 -0
  101. package/dist/inference/interruption/errors.d.ts +2 -0
  102. package/dist/inference/interruption/errors.d.ts.map +1 -0
  103. package/dist/inference/interruption/errors.js +1 -0
  104. package/dist/inference/interruption/errors.js.map +1 -0
  105. package/dist/inference/interruption/http_transport.cjs +57 -0
  106. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  107. package/dist/inference/interruption/http_transport.d.cts +23 -0
  108. package/dist/inference/interruption/http_transport.d.ts +23 -0
  109. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  110. package/dist/inference/interruption/http_transport.js +33 -0
  111. package/dist/inference/interruption/http_transport.js.map +1 -0
  112. package/dist/inference/interruption/index.cjs +34 -0
  113. package/dist/inference/interruption/index.cjs.map +1 -0
  114. package/dist/inference/interruption/index.d.cts +5 -0
  115. package/dist/inference/interruption/index.d.ts +5 -0
  116. package/dist/inference/interruption/index.d.ts.map +1 -0
  117. package/dist/inference/interruption/index.js +7 -0
  118. package/dist/inference/interruption/index.js.map +1 -0
  119. package/dist/inference/interruption/interruption.cjs +85 -0
  120. package/dist/inference/interruption/interruption.cjs.map +1 -0
  121. package/dist/inference/interruption/interruption.d.cts +48 -0
  122. package/dist/inference/interruption/interruption.d.ts +48 -0
  123. package/dist/inference/interruption/interruption.d.ts.map +1 -0
  124. package/dist/inference/interruption/interruption.js +59 -0
  125. package/dist/inference/interruption/interruption.js.map +1 -0
  126. package/dist/inference/llm.cjs +347 -0
  127. package/dist/inference/llm.cjs.map +1 -0
  128. package/dist/inference/llm.d.cts +114 -0
  129. package/dist/inference/llm.d.ts +114 -0
  130. package/dist/inference/llm.d.ts.map +1 -0
  131. package/dist/inference/llm.js +318 -0
  132. package/dist/inference/llm.js.map +1 -0
  133. package/dist/inference/stt.cjs +371 -0
  134. package/dist/inference/stt.cjs.map +1 -0
  135. package/dist/inference/stt.d.cts +91 -0
  136. package/dist/inference/stt.d.ts +91 -0
  137. package/dist/inference/stt.d.ts.map +1 -0
  138. package/dist/inference/stt.js +350 -0
  139. package/dist/inference/stt.js.map +1 -0
  140. package/dist/inference/tts.cjs +439 -0
  141. package/dist/inference/tts.cjs.map +1 -0
  142. package/dist/inference/tts.d.cts +80 -0
  143. package/dist/inference/tts.d.ts +80 -0
  144. package/dist/inference/tts.d.ts.map +1 -0
  145. package/dist/inference/tts.js +417 -0
  146. package/dist/inference/tts.js.map +1 -0
  147. package/dist/inference/utils.cjs +89 -0
  148. package/dist/inference/utils.cjs.map +1 -0
  149. package/dist/inference/utils.d.cts +6 -0
  150. package/dist/inference/utils.d.ts +6 -0
  151. package/dist/inference/utils.d.ts.map +1 -0
  152. package/dist/inference/utils.js +63 -0
  153. package/dist/inference/utils.js.map +1 -0
  154. package/dist/inference/utils.test.cjs +20 -0
  155. package/dist/inference/utils.test.cjs.map +1 -0
  156. package/dist/inference/utils.test.js +19 -0
  157. package/dist/inference/utils.test.js.map +1 -0
  158. package/dist/inference_runner.cjs +37 -0
  159. package/dist/inference_runner.cjs.map +1 -0
  160. package/dist/inference_runner.d.cts +11 -0
  161. package/dist/inference_runner.d.ts +11 -0
  162. package/dist/inference_runner.d.ts.map +1 -0
  163. package/dist/inference_runner.js +13 -0
  164. package/dist/inference_runner.js.map +1 -0
  165. package/dist/ipc/index.cjs +23 -0
  166. package/dist/ipc/index.cjs.map +1 -0
  167. package/dist/ipc/index.d.cts +2 -0
  168. package/dist/ipc/index.d.ts +2 -0
  169. package/dist/ipc/index.d.ts.map +1 -0
  170. package/dist/ipc/index.js +2 -0
  171. package/dist/ipc/index.js.map +1 -0
  172. package/dist/ipc/inference_executor.cjs +17 -0
  173. package/dist/ipc/inference_executor.cjs.map +1 -0
  174. package/dist/ipc/inference_executor.d.cts +4 -0
  175. package/dist/ipc/inference_executor.d.ts +4 -0
  176. package/dist/ipc/inference_executor.d.ts.map +1 -0
  177. package/dist/ipc/inference_executor.js +1 -0
  178. package/dist/ipc/inference_executor.js.map +1 -0
  179. package/dist/ipc/inference_proc_executor.cjs +101 -0
  180. package/dist/ipc/inference_proc_executor.cjs.map +1 -0
  181. package/dist/ipc/inference_proc_executor.d.cts +23 -0
  182. package/dist/ipc/inference_proc_executor.d.ts +23 -0
  183. package/dist/ipc/inference_proc_executor.d.ts.map +1 -0
  184. package/dist/ipc/inference_proc_executor.js +75 -0
  185. package/dist/ipc/inference_proc_executor.js.map +1 -0
  186. package/dist/ipc/inference_proc_lazy_main.cjs +86 -0
  187. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -0
  188. package/dist/ipc/inference_proc_lazy_main.d.cts +2 -0
  189. package/dist/ipc/inference_proc_lazy_main.d.ts +2 -0
  190. package/dist/ipc/inference_proc_lazy_main.d.ts.map +1 -0
  191. package/dist/ipc/inference_proc_lazy_main.js +85 -0
  192. package/dist/ipc/inference_proc_lazy_main.js.map +1 -0
  193. package/dist/ipc/job_executor.cjs +34 -0
  194. package/dist/ipc/job_executor.cjs.map +1 -0
  195. package/dist/ipc/job_executor.d.cts +18 -0
  196. package/dist/ipc/job_executor.d.ts +18 -0
  197. package/dist/ipc/job_executor.d.ts.map +1 -0
  198. package/dist/ipc/job_executor.js +10 -0
  199. package/dist/ipc/job_executor.js.map +1 -0
  200. package/dist/ipc/job_proc_executor.cjs +115 -0
  201. package/dist/ipc/job_proc_executor.cjs.map +1 -0
  202. package/dist/ipc/job_proc_executor.d.cts +19 -0
  203. package/dist/ipc/job_proc_executor.d.ts +19 -0
  204. package/dist/ipc/job_proc_executor.d.ts.map +1 -0
  205. package/dist/ipc/job_proc_executor.js +89 -0
  206. package/dist/ipc/job_proc_executor.js.map +1 -0
  207. package/dist/ipc/job_proc_lazy_main.cjs +210 -0
  208. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -0
  209. package/dist/ipc/job_proc_lazy_main.d.cts +2 -0
  210. package/dist/ipc/job_proc_lazy_main.d.ts +2 -0
  211. package/dist/ipc/job_proc_lazy_main.d.ts.map +1 -0
  212. package/dist/ipc/job_proc_lazy_main.js +187 -0
  213. package/dist/ipc/job_proc_lazy_main.js.map +1 -0
  214. package/dist/ipc/message.cjs +17 -0
  215. package/dist/ipc/message.cjs.map +1 -0
  216. package/dist/ipc/message.d.cts +58 -0
  217. package/dist/ipc/message.d.ts +58 -0
  218. package/dist/ipc/message.d.ts.map +1 -0
  219. package/dist/ipc/message.js +1 -0
  220. package/dist/ipc/message.js.map +1 -0
  221. package/dist/ipc/proc_pool.cjs +164 -0
  222. package/dist/ipc/proc_pool.cjs.map +1 -0
  223. package/dist/ipc/proc_pool.d.cts +31 -0
  224. package/dist/ipc/proc_pool.d.ts +31 -0
  225. package/dist/ipc/proc_pool.d.ts.map +1 -0
  226. package/dist/ipc/proc_pool.js +140 -0
  227. package/dist/ipc/proc_pool.js.map +1 -0
  228. package/dist/ipc/supervised_proc.cjs +229 -0
  229. package/dist/ipc/supervised_proc.cjs.map +1 -0
  230. package/dist/ipc/supervised_proc.d.cts +32 -0
  231. package/dist/ipc/supervised_proc.d.ts +32 -0
  232. package/dist/ipc/supervised_proc.d.ts.map +1 -0
  233. package/dist/ipc/supervised_proc.js +195 -0
  234. package/dist/ipc/supervised_proc.js.map +1 -0
  235. package/dist/ipc/supervised_proc.test.cjs +145 -0
  236. package/dist/ipc/supervised_proc.test.cjs.map +1 -0
  237. package/dist/ipc/supervised_proc.test.js +122 -0
  238. package/dist/ipc/supervised_proc.test.js.map +1 -0
  239. package/dist/job.cjs +373 -0
  240. package/dist/job.cjs.map +1 -0
  241. package/dist/job.d.cts +141 -0
  242. package/dist/job.d.ts +141 -0
  243. package/dist/job.d.ts.map +1 -0
  244. package/dist/job.js +332 -0
  245. package/dist/job.js.map +1 -0
  246. package/dist/llm/chat_context.cjs +527 -0
  247. package/dist/llm/chat_context.cjs.map +1 -0
  248. package/dist/llm/chat_context.d.cts +223 -0
  249. package/dist/llm/chat_context.d.ts +223 -0
  250. package/dist/llm/chat_context.d.ts.map +1 -0
  251. package/dist/llm/chat_context.js +496 -0
  252. package/dist/llm/chat_context.js.map +1 -0
  253. package/dist/llm/chat_context.test.cjs +911 -0
  254. package/dist/llm/chat_context.test.cjs.map +1 -0
  255. package/dist/llm/chat_context.test.js +916 -0
  256. package/dist/llm/chat_context.test.js.map +1 -0
  257. package/dist/llm/fallback_adapter.cjs +278 -0
  258. package/dist/llm/fallback_adapter.cjs.map +1 -0
  259. package/dist/llm/fallback_adapter.d.cts +73 -0
  260. package/dist/llm/fallback_adapter.d.ts +73 -0
  261. package/dist/llm/fallback_adapter.d.ts.map +1 -0
  262. package/dist/llm/fallback_adapter.js +254 -0
  263. package/dist/llm/fallback_adapter.js.map +1 -0
  264. package/dist/llm/fallback_adapter.test.cjs +176 -0
  265. package/dist/llm/fallback_adapter.test.cjs.map +1 -0
  266. package/dist/llm/fallback_adapter.test.js +175 -0
  267. package/dist/llm/fallback_adapter.test.js.map +1 -0
  268. package/dist/llm/index.cjs +79 -0
  269. package/dist/llm/index.cjs.map +1 -0
  270. package/dist/llm/index.d.cts +9 -0
  271. package/dist/llm/index.d.ts +9 -0
  272. package/dist/llm/index.d.ts.map +1 -0
  273. package/dist/llm/index.js +61 -0
  274. package/dist/llm/index.js.map +1 -0
  275. package/dist/llm/llm.cjs +226 -0
  276. package/dist/llm/llm.cjs.map +1 -0
  277. package/dist/llm/llm.d.cts +94 -0
  278. package/dist/llm/llm.d.ts +94 -0
  279. package/dist/llm/llm.d.ts.map +1 -0
  280. package/dist/llm/llm.js +201 -0
  281. package/dist/llm/llm.js.map +1 -0
  282. package/dist/llm/provider_format/google.cjs +132 -0
  283. package/dist/llm/provider_format/google.cjs.map +1 -0
  284. package/dist/llm/provider_format/google.d.cts +6 -0
  285. package/dist/llm/provider_format/google.d.ts +6 -0
  286. package/dist/llm/provider_format/google.d.ts.map +1 -0
  287. package/dist/llm/provider_format/google.js +108 -0
  288. package/dist/llm/provider_format/google.js.map +1 -0
  289. package/dist/llm/provider_format/google.test.cjs +724 -0
  290. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  291. package/dist/llm/provider_format/google.test.js +728 -0
  292. package/dist/llm/provider_format/google.test.js.map +1 -0
  293. package/dist/llm/provider_format/index.cjs +40 -0
  294. package/dist/llm/provider_format/index.cjs.map +1 -0
  295. package/dist/llm/provider_format/index.d.cts +4 -0
  296. package/dist/llm/provider_format/index.d.ts +4 -0
  297. package/dist/llm/provider_format/index.d.ts.map +1 -0
  298. package/dist/llm/provider_format/index.js +16 -0
  299. package/dist/llm/provider_format/index.js.map +1 -0
  300. package/dist/llm/provider_format/openai.cjs +138 -0
  301. package/dist/llm/provider_format/openai.cjs.map +1 -0
  302. package/dist/llm/provider_format/openai.d.cts +3 -0
  303. package/dist/llm/provider_format/openai.d.ts +3 -0
  304. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  305. package/dist/llm/provider_format/openai.js +114 -0
  306. package/dist/llm/provider_format/openai.js.map +1 -0
  307. package/dist/llm/provider_format/openai.test.cjs +557 -0
  308. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  309. package/dist/llm/provider_format/openai.test.js +561 -0
  310. package/dist/llm/provider_format/openai.test.js.map +1 -0
  311. package/dist/llm/provider_format/utils.cjs +146 -0
  312. package/dist/llm/provider_format/utils.cjs.map +1 -0
  313. package/dist/llm/provider_format/utils.d.cts +38 -0
  314. package/dist/llm/provider_format/utils.d.ts +38 -0
  315. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  316. package/dist/llm/provider_format/utils.js +122 -0
  317. package/dist/llm/provider_format/utils.js.map +1 -0
  318. package/dist/llm/realtime.cjs +77 -0
  319. package/dist/llm/realtime.cjs.map +1 -0
  320. package/dist/llm/realtime.d.cts +106 -0
  321. package/dist/llm/realtime.d.ts +106 -0
  322. package/dist/llm/realtime.d.ts.map +1 -0
  323. package/dist/llm/realtime.js +52 -0
  324. package/dist/llm/realtime.js.map +1 -0
  325. package/dist/llm/remote_chat_context.cjs +112 -0
  326. package/dist/llm/remote_chat_context.cjs.map +1 -0
  327. package/dist/llm/remote_chat_context.d.cts +25 -0
  328. package/dist/llm/remote_chat_context.d.ts +25 -0
  329. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  330. package/dist/llm/remote_chat_context.js +88 -0
  331. package/dist/llm/remote_chat_context.js.map +1 -0
  332. package/dist/llm/remote_chat_context.test.cjs +225 -0
  333. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  334. package/dist/llm/remote_chat_context.test.js +224 -0
  335. package/dist/llm/remote_chat_context.test.js.map +1 -0
  336. package/dist/llm/tool_context.cjs +152 -0
  337. package/dist/llm/tool_context.cjs.map +1 -0
  338. package/dist/llm/tool_context.d.cts +153 -0
  339. package/dist/llm/tool_context.d.ts +153 -0
  340. package/dist/llm/tool_context.d.ts.map +1 -0
  341. package/dist/llm/tool_context.js +119 -0
  342. package/dist/llm/tool_context.js.map +1 -0
  343. package/dist/llm/tool_context.test.cjs +359 -0
  344. package/dist/llm/tool_context.test.cjs.map +1 -0
  345. package/dist/llm/tool_context.test.js +336 -0
  346. package/dist/llm/tool_context.test.js.map +1 -0
  347. package/dist/llm/tool_context.type.test.cjs +92 -0
  348. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  349. package/dist/llm/tool_context.type.test.js +91 -0
  350. package/dist/llm/tool_context.type.test.js.map +1 -0
  351. package/dist/llm/utils.cjs +267 -0
  352. package/dist/llm/utils.cjs.map +1 -0
  353. package/dist/llm/utils.d.cts +41 -0
  354. package/dist/llm/utils.d.ts +41 -0
  355. package/dist/llm/utils.d.ts.map +1 -0
  356. package/dist/llm/utils.js +230 -0
  357. package/dist/llm/utils.js.map +1 -0
  358. package/dist/llm/utils.test.cjs +513 -0
  359. package/dist/llm/utils.test.cjs.map +1 -0
  360. package/dist/llm/utils.test.js +490 -0
  361. package/dist/llm/utils.test.js.map +1 -0
  362. package/dist/llm/zod-utils.cjs +102 -0
  363. package/dist/llm/zod-utils.cjs.map +1 -0
  364. package/dist/llm/zod-utils.d.cts +65 -0
  365. package/dist/llm/zod-utils.d.ts +65 -0
  366. package/dist/llm/zod-utils.d.ts.map +1 -0
  367. package/dist/llm/zod-utils.js +64 -0
  368. package/dist/llm/zod-utils.js.map +1 -0
  369. package/dist/llm/zod-utils.test.cjs +472 -0
  370. package/dist/llm/zod-utils.test.cjs.map +1 -0
  371. package/dist/llm/zod-utils.test.js +455 -0
  372. package/dist/llm/zod-utils.test.js.map +1 -0
  373. package/dist/log.cjs +81 -0
  374. package/dist/log.cjs.map +1 -0
  375. package/dist/log.d.cts +20 -0
  376. package/dist/log.d.ts +20 -0
  377. package/dist/log.d.ts.map +1 -0
  378. package/dist/log.js +54 -0
  379. package/dist/log.js.map +1 -0
  380. package/dist/metrics/base.cjs +17 -0
  381. package/dist/metrics/base.cjs.map +1 -0
  382. package/dist/metrics/base.d.cts +150 -0
  383. package/dist/metrics/base.d.ts +150 -0
  384. package/dist/metrics/base.d.ts.map +1 -0
  385. package/dist/metrics/base.js +1 -0
  386. package/dist/metrics/base.js.map +1 -0
  387. package/dist/metrics/index.cjs +32 -0
  388. package/dist/metrics/index.cjs.map +1 -0
  389. package/dist/metrics/index.d.cts +4 -0
  390. package/dist/metrics/index.d.ts +4 -0
  391. package/dist/metrics/index.d.ts.map +1 -0
  392. package/dist/metrics/index.js +7 -0
  393. package/dist/metrics/index.js.map +1 -0
  394. package/dist/metrics/usage_collector.cjs +58 -0
  395. package/dist/metrics/usage_collector.cjs.map +1 -0
  396. package/dist/metrics/usage_collector.d.cts +15 -0
  397. package/dist/metrics/usage_collector.d.ts +15 -0
  398. package/dist/metrics/usage_collector.d.ts.map +1 -0
  399. package/dist/metrics/usage_collector.js +34 -0
  400. package/dist/metrics/usage_collector.js.map +1 -0
  401. package/dist/metrics/utils.cjs +74 -0
  402. package/dist/metrics/utils.cjs.map +1 -0
  403. package/dist/metrics/utils.d.cts +3 -0
  404. package/dist/metrics/utils.d.ts +3 -0
  405. package/dist/metrics/utils.d.ts.map +1 -0
  406. package/dist/metrics/utils.js +50 -0
  407. package/dist/metrics/utils.js.map +1 -0
  408. package/dist/plugin.cjs +62 -0
  409. package/dist/plugin.cjs.map +1 -0
  410. package/dist/plugin.d.cts +24 -0
  411. package/dist/plugin.d.ts +24 -0
  412. package/dist/plugin.d.ts.map +1 -0
  413. package/dist/plugin.js +37 -0
  414. package/dist/plugin.js.map +1 -0
  415. package/dist/stream/deferred_stream.cjs +106 -0
  416. package/dist/stream/deferred_stream.cjs.map +1 -0
  417. package/dist/stream/deferred_stream.d.cts +32 -0
  418. package/dist/stream/deferred_stream.d.ts +32 -0
  419. package/dist/stream/deferred_stream.d.ts.map +1 -0
  420. package/dist/stream/deferred_stream.js +81 -0
  421. package/dist/stream/deferred_stream.js.map +1 -0
  422. package/dist/stream/deferred_stream.test.cjs +527 -0
  423. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  424. package/dist/stream/deferred_stream.test.js +526 -0
  425. package/dist/stream/deferred_stream.test.js.map +1 -0
  426. package/dist/stream/identity_transform.cjs +42 -0
  427. package/dist/stream/identity_transform.cjs.map +1 -0
  428. package/dist/stream/identity_transform.d.cts +6 -0
  429. package/dist/stream/identity_transform.d.ts +6 -0
  430. package/dist/stream/identity_transform.d.ts.map +1 -0
  431. package/dist/stream/identity_transform.js +18 -0
  432. package/dist/stream/identity_transform.js.map +1 -0
  433. package/dist/stream/identity_transform.test.cjs +125 -0
  434. package/dist/stream/identity_transform.test.cjs.map +1 -0
  435. package/dist/stream/identity_transform.test.js +124 -0
  436. package/dist/stream/identity_transform.test.js.map +1 -0
  437. package/dist/stream/index.cjs +38 -0
  438. package/dist/stream/index.cjs.map +1 -0
  439. package/dist/stream/index.d.cts +5 -0
  440. package/dist/stream/index.d.ts +5 -0
  441. package/dist/stream/index.d.ts.map +1 -0
  442. package/dist/stream/index.js +11 -0
  443. package/dist/stream/index.js.map +1 -0
  444. package/dist/stream/merge_readable_streams.cjs +59 -0
  445. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  446. package/dist/stream/merge_readable_streams.d.cts +4 -0
  447. package/dist/stream/merge_readable_streams.d.ts +4 -0
  448. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  449. package/dist/stream/merge_readable_streams.js +35 -0
  450. package/dist/stream/merge_readable_streams.js.map +1 -0
  451. package/dist/stream/stream_channel.cjs +57 -0
  452. package/dist/stream/stream_channel.cjs.map +1 -0
  453. package/dist/stream/stream_channel.d.cts +11 -0
  454. package/dist/stream/stream_channel.d.ts +11 -0
  455. package/dist/stream/stream_channel.d.ts.map +1 -0
  456. package/dist/stream/stream_channel.js +33 -0
  457. package/dist/stream/stream_channel.js.map +1 -0
  458. package/dist/stream/stream_channel.test.cjs +124 -0
  459. package/dist/stream/stream_channel.test.cjs.map +1 -0
  460. package/dist/stream/stream_channel.test.js +123 -0
  461. package/dist/stream/stream_channel.test.js.map +1 -0
  462. package/dist/stt/index.cjs +38 -0
  463. package/dist/stt/index.cjs.map +1 -0
  464. package/dist/stt/index.d.cts +3 -0
  465. package/dist/stt/index.d.ts +3 -0
  466. package/dist/stt/index.d.ts.map +1 -0
  467. package/dist/stt/index.js +14 -0
  468. package/dist/stt/index.js.map +1 -0
  469. package/dist/stt/stream_adapter.cjs +115 -0
  470. package/dist/stt/stream_adapter.cjs.map +1 -0
  471. package/dist/stt/stream_adapter.d.cts +23 -0
  472. package/dist/stt/stream_adapter.d.ts +23 -0
  473. package/dist/stt/stream_adapter.d.ts.map +1 -0
  474. package/dist/stt/stream_adapter.js +90 -0
  475. package/dist/stt/stream_adapter.js.map +1 -0
  476. package/dist/stt/stt.cjs +253 -0
  477. package/dist/stt/stt.cjs.map +1 -0
  478. package/dist/stt/stt.d.cts +158 -0
  479. package/dist/stt/stt.d.ts +158 -0
  480. package/dist/stt/stt.d.ts.map +1 -0
  481. package/dist/stt/stt.js +227 -0
  482. package/dist/stt/stt.js.map +1 -0
  483. package/dist/telemetry/index.cjs +72 -0
  484. package/dist/telemetry/index.cjs.map +1 -0
  485. package/dist/telemetry/index.d.cts +7 -0
  486. package/dist/telemetry/index.d.ts +7 -0
  487. package/dist/telemetry/index.d.ts.map +1 -0
  488. package/dist/telemetry/index.js +37 -0
  489. package/dist/telemetry/index.js.map +1 -0
  490. package/dist/telemetry/logging.cjs +65 -0
  491. package/dist/telemetry/logging.cjs.map +1 -0
  492. package/dist/telemetry/logging.d.cts +21 -0
  493. package/dist/telemetry/logging.d.ts +21 -0
  494. package/dist/telemetry/logging.d.ts.map +1 -0
  495. package/dist/telemetry/logging.js +40 -0
  496. package/dist/telemetry/logging.js.map +1 -0
  497. package/dist/telemetry/otel_http_exporter.cjs +147 -0
  498. package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
  499. package/dist/telemetry/otel_http_exporter.d.cts +62 -0
  500. package/dist/telemetry/otel_http_exporter.d.ts +62 -0
  501. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
  502. package/dist/telemetry/otel_http_exporter.js +123 -0
  503. package/dist/telemetry/otel_http_exporter.js.map +1 -0
  504. package/dist/telemetry/pino_otel_transport.cjs +217 -0
  505. package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
  506. package/dist/telemetry/pino_otel_transport.d.cts +58 -0
  507. package/dist/telemetry/pino_otel_transport.d.ts +58 -0
  508. package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
  509. package/dist/telemetry/pino_otel_transport.js +189 -0
  510. package/dist/telemetry/pino_otel_transport.js.map +1 -0
  511. package/dist/telemetry/trace_types.cjs +206 -0
  512. package/dist/telemetry/trace_types.cjs.map +1 -0
  513. package/dist/telemetry/trace_types.d.cts +61 -0
  514. package/dist/telemetry/trace_types.d.ts +61 -0
  515. package/dist/telemetry/trace_types.d.ts.map +1 -0
  516. package/dist/telemetry/trace_types.js +123 -0
  517. package/dist/telemetry/trace_types.js.map +1 -0
  518. package/dist/telemetry/traces.cjs +444 -0
  519. package/dist/telemetry/traces.cjs.map +1 -0
  520. package/dist/telemetry/traces.d.cts +114 -0
  521. package/dist/telemetry/traces.d.ts +114 -0
  522. package/dist/telemetry/traces.d.ts.map +1 -0
  523. package/dist/telemetry/traces.js +409 -0
  524. package/dist/telemetry/traces.js.map +1 -0
  525. package/dist/telemetry/utils.cjs +86 -0
  526. package/dist/telemetry/utils.cjs.map +1 -0
  527. package/dist/telemetry/utils.d.cts +5 -0
  528. package/dist/telemetry/utils.d.ts +5 -0
  529. package/dist/telemetry/utils.d.ts.map +1 -0
  530. package/dist/telemetry/utils.js +51 -0
  531. package/dist/telemetry/utils.js.map +1 -0
  532. package/dist/tokenize/basic/basic.cjs +105 -0
  533. package/dist/tokenize/basic/basic.cjs.map +1 -0
  534. package/dist/tokenize/basic/basic.d.cts +24 -0
  535. package/dist/tokenize/basic/basic.d.ts +24 -0
  536. package/dist/tokenize/basic/basic.d.ts.map +1 -0
  537. package/dist/tokenize/basic/basic.js +67 -0
  538. package/dist/tokenize/basic/basic.js.map +1 -0
  539. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  540. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  541. package/dist/tokenize/basic/hyphenator.d.cts +17 -0
  542. package/dist/tokenize/basic/hyphenator.d.ts +17 -0
  543. package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
  544. package/dist/tokenize/basic/hyphenator.js +401 -0
  545. package/dist/tokenize/basic/hyphenator.js.map +1 -0
  546. package/dist/tokenize/basic/index.cjs +37 -0
  547. package/dist/tokenize/basic/index.cjs.map +1 -0
  548. package/dist/tokenize/basic/index.d.cts +2 -0
  549. package/dist/tokenize/basic/index.d.ts +2 -0
  550. package/dist/tokenize/basic/index.d.ts.map +1 -0
  551. package/dist/tokenize/basic/index.js +15 -0
  552. package/dist/tokenize/basic/index.js.map +1 -0
  553. package/dist/tokenize/basic/paragraph.cjs +57 -0
  554. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  555. package/dist/tokenize/basic/paragraph.d.cts +5 -0
  556. package/dist/tokenize/basic/paragraph.d.ts +5 -0
  557. package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
  558. package/dist/tokenize/basic/paragraph.js +33 -0
  559. package/dist/tokenize/basic/paragraph.js.map +1 -0
  560. package/dist/tokenize/basic/sentence.cjs +97 -0
  561. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  562. package/dist/tokenize/basic/sentence.d.cts +5 -0
  563. package/dist/tokenize/basic/sentence.d.ts +5 -0
  564. package/dist/tokenize/basic/sentence.d.ts.map +1 -0
  565. package/dist/tokenize/basic/sentence.js +73 -0
  566. package/dist/tokenize/basic/sentence.js.map +1 -0
  567. package/dist/tokenize/basic/word.cjs +44 -0
  568. package/dist/tokenize/basic/word.cjs.map +1 -0
  569. package/dist/tokenize/basic/word.d.cts +5 -0
  570. package/dist/tokenize/basic/word.d.ts +5 -0
  571. package/dist/tokenize/basic/word.d.ts.map +1 -0
  572. package/dist/tokenize/basic/word.js +20 -0
  573. package/dist/tokenize/basic/word.js.map +1 -0
  574. package/dist/tokenize/index.cjs +55 -0
  575. package/dist/tokenize/index.cjs.map +1 -0
  576. package/dist/tokenize/index.d.cts +5 -0
  577. package/dist/tokenize/index.d.ts +5 -0
  578. package/dist/tokenize/index.d.ts.map +1 -0
  579. package/dist/tokenize/index.js +19 -0
  580. package/dist/tokenize/index.js.map +1 -0
  581. package/dist/tokenize/token_stream.cjs +168 -0
  582. package/dist/tokenize/token_stream.cjs.map +1 -0
  583. package/dist/tokenize/token_stream.d.cts +40 -0
  584. package/dist/tokenize/token_stream.d.ts +40 -0
  585. package/dist/tokenize/token_stream.d.ts.map +1 -0
  586. package/dist/tokenize/token_stream.js +142 -0
  587. package/dist/tokenize/token_stream.js.map +1 -0
  588. package/dist/tokenize/tokenizer.cjs +184 -0
  589. package/dist/tokenize/tokenizer.cjs.map +1 -0
  590. package/dist/tokenize/tokenizer.d.cts +55 -0
  591. package/dist/tokenize/tokenizer.d.ts +55 -0
  592. package/dist/tokenize/tokenizer.d.ts.map +1 -0
  593. package/dist/tokenize/tokenizer.js +156 -0
  594. package/dist/tokenize/tokenizer.js.map +1 -0
  595. package/dist/tokenize/tokenizer.test.cjs +220 -0
  596. package/dist/tokenize/tokenizer.test.cjs.map +1 -0
  597. package/dist/tokenize/tokenizer.test.js +219 -0
  598. package/dist/tokenize/tokenizer.test.js.map +1 -0
  599. package/dist/transcription.cjs +247 -0
  600. package/dist/transcription.cjs.map +1 -0
  601. package/dist/transcription.d.cts +31 -0
  602. package/dist/transcription.d.ts +31 -0
  603. package/dist/transcription.d.ts.map +1 -0
  604. package/dist/transcription.js +222 -0
  605. package/dist/transcription.js.map +1 -0
  606. package/dist/tts/index.cjs +38 -0
  607. package/dist/tts/index.cjs.map +1 -0
  608. package/dist/tts/index.d.cts +3 -0
  609. package/dist/tts/index.d.ts +3 -0
  610. package/dist/tts/index.d.ts.map +1 -0
  611. package/dist/tts/index.js +14 -0
  612. package/dist/tts/index.js.map +1 -0
  613. package/dist/tts/stream_adapter.cjs +105 -0
  614. package/dist/tts/stream_adapter.cjs.map +1 -0
  615. package/dist/tts/stream_adapter.d.cts +20 -0
  616. package/dist/tts/stream_adapter.d.ts +20 -0
  617. package/dist/tts/stream_adapter.d.ts.map +1 -0
  618. package/dist/tts/stream_adapter.js +80 -0
  619. package/dist/tts/stream_adapter.js.map +1 -0
  620. package/dist/tts/tts.cjs +431 -0
  621. package/dist/tts/tts.cjs.map +1 -0
  622. package/dist/tts/tts.d.cts +161 -0
  623. package/dist/tts/tts.d.ts +161 -0
  624. package/dist/tts/tts.d.ts.map +1 -0
  625. package/dist/tts/tts.js +405 -0
  626. package/dist/tts/tts.js.map +1 -0
  627. package/dist/types.cjs +49 -0
  628. package/dist/types.cjs.map +1 -0
  629. package/dist/types.d.cts +44 -0
  630. package/dist/types.d.ts +44 -0
  631. package/dist/types.d.ts.map +1 -0
  632. package/dist/types.js +23 -0
  633. package/dist/types.js.map +1 -0
  634. package/dist/utils/ws_transport.cjs +51 -0
  635. package/dist/utils/ws_transport.cjs.map +1 -0
  636. package/dist/utils/ws_transport.d.cts +9 -0
  637. package/dist/utils/ws_transport.d.ts +9 -0
  638. package/dist/utils/ws_transport.d.ts.map +1 -0
  639. package/dist/utils/ws_transport.js +17 -0
  640. package/dist/utils/ws_transport.js.map +1 -0
  641. package/dist/utils/ws_transport.test.cjs +212 -0
  642. package/dist/utils/ws_transport.test.cjs.map +1 -0
  643. package/dist/utils/ws_transport.test.js +211 -0
  644. package/dist/utils/ws_transport.test.js.map +1 -0
  645. package/dist/utils.cjs +669 -0
  646. package/dist/utils.cjs.map +1 -0
  647. package/dist/utils.d.cts +244 -0
  648. package/dist/utils.d.ts +244 -0
  649. package/dist/utils.d.ts.map +1 -0
  650. package/dist/utils.js +617 -0
  651. package/dist/utils.js.map +1 -0
  652. package/dist/utils.test.cjs +492 -0
  653. package/dist/utils.test.cjs.map +1 -0
  654. package/dist/utils.test.js +491 -0
  655. package/dist/utils.test.js.map +1 -0
  656. package/dist/vad.cjs +211 -0
  657. package/dist/vad.cjs.map +1 -0
  658. package/dist/vad.d.cts +105 -0
  659. package/dist/vad.d.ts +105 -0
  660. package/dist/vad.d.ts.map +1 -0
  661. package/dist/vad.js +185 -0
  662. package/dist/vad.js.map +1 -0
  663. package/dist/version.cjs +29 -0
  664. package/dist/version.cjs.map +1 -0
  665. package/dist/version.d.cts +2 -0
  666. package/dist/version.d.ts +2 -0
  667. package/dist/version.d.ts.map +1 -0
  668. package/dist/version.js +5 -0
  669. package/dist/version.js.map +1 -0
  670. package/dist/voice/agent.cjs +308 -0
  671. package/dist/voice/agent.cjs.map +1 -0
  672. package/dist/voice/agent.d.cts +83 -0
  673. package/dist/voice/agent.d.ts +83 -0
  674. package/dist/voice/agent.d.ts.map +1 -0
  675. package/dist/voice/agent.js +287 -0
  676. package/dist/voice/agent.js.map +1 -0
  677. package/dist/voice/agent.test.cjs +61 -0
  678. package/dist/voice/agent.test.cjs.map +1 -0
  679. package/dist/voice/agent.test.js +60 -0
  680. package/dist/voice/agent.test.js.map +1 -0
  681. package/dist/voice/agent_activity.cjs +1784 -0
  682. package/dist/voice/agent_activity.cjs.map +1 -0
  683. package/dist/voice/agent_activity.d.cts +116 -0
  684. package/dist/voice/agent_activity.d.ts +116 -0
  685. package/dist/voice/agent_activity.d.ts.map +1 -0
  686. package/dist/voice/agent_activity.js +1780 -0
  687. package/dist/voice/agent_activity.js.map +1 -0
  688. package/dist/voice/agent_session.cjs +592 -0
  689. package/dist/voice/agent_session.cjs.map +1 -0
  690. package/dist/voice/agent_session.d.cts +165 -0
  691. package/dist/voice/agent_session.d.ts +165 -0
  692. package/dist/voice/agent_session.d.ts.map +1 -0
  693. package/dist/voice/agent_session.js +582 -0
  694. package/dist/voice/agent_session.js.map +1 -0
  695. package/dist/voice/audio_recognition.cjs +668 -0
  696. package/dist/voice/audio_recognition.cjs.map +1 -0
  697. package/dist/voice/audio_recognition.d.cts +127 -0
  698. package/dist/voice/audio_recognition.d.ts +127 -0
  699. package/dist/voice/audio_recognition.d.ts.map +1 -0
  700. package/dist/voice/audio_recognition.js +647 -0
  701. package/dist/voice/audio_recognition.js.map +1 -0
  702. package/dist/voice/avatar/datastream_io.cjs +204 -0
  703. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  704. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  705. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  706. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  707. package/dist/voice/avatar/datastream_io.js +188 -0
  708. package/dist/voice/avatar/datastream_io.js.map +1 -0
  709. package/dist/voice/avatar/index.cjs +23 -0
  710. package/dist/voice/avatar/index.cjs.map +1 -0
  711. package/dist/voice/avatar/index.d.cts +2 -0
  712. package/dist/voice/avatar/index.d.ts +2 -0
  713. package/dist/voice/avatar/index.d.ts.map +1 -0
  714. package/dist/voice/avatar/index.js +2 -0
  715. package/dist/voice/avatar/index.js.map +1 -0
  716. package/dist/voice/background_audio.cjs +366 -0
  717. package/dist/voice/background_audio.cjs.map +1 -0
  718. package/dist/voice/background_audio.d.cts +121 -0
  719. package/dist/voice/background_audio.d.ts +121 -0
  720. package/dist/voice/background_audio.d.ts.map +1 -0
  721. package/dist/voice/background_audio.js +342 -0
  722. package/dist/voice/background_audio.js.map +1 -0
  723. package/dist/voice/events.cjs +147 -0
  724. package/dist/voice/events.cjs.map +1 -0
  725. package/dist/voice/events.d.cts +127 -0
  726. package/dist/voice/events.d.ts +127 -0
  727. package/dist/voice/events.d.ts.map +1 -0
  728. package/dist/voice/events.js +112 -0
  729. package/dist/voice/events.js.map +1 -0
  730. package/dist/voice/generation.cjs +747 -0
  731. package/dist/voice/generation.cjs.map +1 -0
  732. package/dist/voice/generation.d.cts +116 -0
  733. package/dist/voice/generation.d.ts +116 -0
  734. package/dist/voice/generation.d.ts.map +1 -0
  735. package/dist/voice/generation.js +719 -0
  736. package/dist/voice/generation.js.map +1 -0
  737. package/dist/voice/generation_tools.test.cjs +236 -0
  738. package/dist/voice/generation_tools.test.cjs.map +1 -0
  739. package/dist/voice/generation_tools.test.js +235 -0
  740. package/dist/voice/generation_tools.test.js.map +1 -0
  741. package/dist/voice/index.cjs +49 -0
  742. package/dist/voice/index.cjs.map +1 -0
  743. package/dist/voice/index.d.cts +10 -0
  744. package/dist/voice/index.d.ts +10 -0
  745. package/dist/voice/index.d.ts.map +1 -0
  746. package/dist/voice/index.js +16 -0
  747. package/dist/voice/index.js.map +1 -0
  748. package/dist/voice/interruption_detection.test.cjs +114 -0
  749. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  750. package/dist/voice/interruption_detection.test.js +113 -0
  751. package/dist/voice/interruption_detection.test.js.map +1 -0
  752. package/dist/voice/io.cjs +270 -0
  753. package/dist/voice/io.cjs.map +1 -0
  754. package/dist/voice/io.d.cts +126 -0
  755. package/dist/voice/io.d.ts +126 -0
  756. package/dist/voice/io.d.ts.map +1 -0
  757. package/dist/voice/io.js +242 -0
  758. package/dist/voice/io.js.map +1 -0
  759. package/dist/voice/recorder_io/index.cjs +23 -0
  760. package/dist/voice/recorder_io/index.cjs.map +1 -0
  761. package/dist/voice/recorder_io/index.d.cts +2 -0
  762. package/dist/voice/recorder_io/index.d.ts +2 -0
  763. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  764. package/dist/voice/recorder_io/index.js +2 -0
  765. package/dist/voice/recorder_io/index.js.map +1 -0
  766. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  767. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  768. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  769. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  770. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  771. package/dist/voice/recorder_io/recorder_io.js +508 -0
  772. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  773. package/dist/voice/report.cjs +75 -0
  774. package/dist/voice/report.cjs.map +1 -0
  775. package/dist/voice/report.d.cts +42 -0
  776. package/dist/voice/report.d.ts +42 -0
  777. package/dist/voice/report.d.ts.map +1 -0
  778. package/dist/voice/report.js +50 -0
  779. package/dist/voice/report.js.map +1 -0
  780. package/dist/voice/room_io/_input.cjs +133 -0
  781. package/dist/voice/room_io/_input.cjs.map +1 -0
  782. package/dist/voice/room_io/_input.d.cts +24 -0
  783. package/dist/voice/room_io/_input.d.ts +24 -0
  784. package/dist/voice/room_io/_input.d.ts.map +1 -0
  785. package/dist/voice/room_io/_input.js +114 -0
  786. package/dist/voice/room_io/_input.js.map +1 -0
  787. package/dist/voice/room_io/_output.cjs +359 -0
  788. package/dist/voice/room_io/_output.cjs.map +1 -0
  789. package/dist/voice/room_io/_output.d.cts +77 -0
  790. package/dist/voice/room_io/_output.d.ts +77 -0
  791. package/dist/voice/room_io/_output.d.ts.map +1 -0
  792. package/dist/voice/room_io/_output.js +343 -0
  793. package/dist/voice/room_io/_output.js.map +1 -0
  794. package/dist/voice/room_io/index.cjs +25 -0
  795. package/dist/voice/room_io/index.cjs.map +1 -0
  796. package/dist/voice/room_io/index.d.cts +3 -0
  797. package/dist/voice/room_io/index.d.ts +3 -0
  798. package/dist/voice/room_io/index.d.ts.map +1 -0
  799. package/dist/voice/room_io/index.js +3 -0
  800. package/dist/voice/room_io/index.js.map +1 -0
  801. package/dist/voice/room_io/room_io.cjs +373 -0
  802. package/dist/voice/room_io/room_io.cjs.map +1 -0
  803. package/dist/voice/room_io/room_io.d.cts +94 -0
  804. package/dist/voice/room_io/room_io.d.ts +94 -0
  805. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  806. package/dist/voice/room_io/room_io.js +364 -0
  807. package/dist/voice/room_io/room_io.js.map +1 -0
  808. package/dist/voice/run_context.cjs +51 -0
  809. package/dist/voice/run_context.cjs.map +1 -0
  810. package/dist/voice/run_context.d.cts +22 -0
  811. package/dist/voice/run_context.d.ts +22 -0
  812. package/dist/voice/run_context.d.ts.map +1 -0
  813. package/dist/voice/run_context.js +27 -0
  814. package/dist/voice/run_context.js.map +1 -0
  815. package/dist/voice/speech_handle.cjs +228 -0
  816. package/dist/voice/speech_handle.cjs.map +1 -0
  817. package/dist/voice/speech_handle.d.cts +97 -0
  818. package/dist/voice/speech_handle.d.ts +97 -0
  819. package/dist/voice/speech_handle.d.ts.map +1 -0
  820. package/dist/voice/speech_handle.js +204 -0
  821. package/dist/voice/speech_handle.js.map +1 -0
  822. package/dist/voice/transcription/_utils.cjs +45 -0
  823. package/dist/voice/transcription/_utils.cjs.map +1 -0
  824. package/dist/voice/transcription/_utils.d.cts +3 -0
  825. package/dist/voice/transcription/_utils.d.ts +3 -0
  826. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  827. package/dist/voice/transcription/_utils.js +21 -0
  828. package/dist/voice/transcription/_utils.js.map +1 -0
  829. package/dist/voice/transcription/index.cjs +23 -0
  830. package/dist/voice/transcription/index.cjs.map +1 -0
  831. package/dist/voice/transcription/index.d.cts +2 -0
  832. package/dist/voice/transcription/index.d.ts +2 -0
  833. package/dist/voice/transcription/index.d.ts.map +1 -0
  834. package/dist/voice/transcription/index.js +2 -0
  835. package/dist/voice/transcription/index.js.map +1 -0
  836. package/dist/voice/transcription/synchronizer.cjs +379 -0
  837. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  838. package/dist/voice/transcription/synchronizer.d.cts +87 -0
  839. package/dist/voice/transcription/synchronizer.d.ts +87 -0
  840. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  841. package/dist/voice/transcription/synchronizer.js +354 -0
  842. package/dist/voice/transcription/synchronizer.js.map +1 -0
  843. package/dist/worker.cjs +680 -0
  844. package/dist/worker.cjs.map +1 -0
  845. package/dist/worker.d.cts +119 -0
  846. package/dist/worker.d.ts +119 -0
  847. package/dist/worker.d.ts.map +1 -0
  848. package/dist/worker.js +645 -0
  849. package/dist/worker.js.map +1 -0
  850. package/package.json +86 -0
  851. package/resources/NOTICE +2 -0
  852. package/resources/keyboard-typing.ogg +0 -0
  853. package/resources/keyboard-typing2.ogg +0 -0
  854. package/resources/office-ambience.ogg +0 -0
  855. package/src/_exceptions.ts +137 -0
  856. package/src/audio.ts +205 -0
  857. package/src/cli.ts +224 -0
  858. package/src/connection_pool.test.ts +346 -0
  859. package/src/connection_pool.ts +307 -0
  860. package/src/constants.ts +9 -0
  861. package/src/generator.ts +38 -0
  862. package/src/http_server.ts +64 -0
  863. package/src/index.ts +41 -0
  864. package/src/inference/api_protos.ts +82 -0
  865. package/src/inference/index.ts +32 -0
  866. package/src/inference/interruption/AdaptiveInterruptionDetector.ts +166 -0
  867. package/src/inference/interruption/InterruptionStream.ts +397 -0
  868. package/src/inference/interruption/defaults.ts +33 -0
  869. package/src/inference/interruption/errors.ts +0 -0
  870. package/src/inference/interruption/http_transport.ts +61 -0
  871. package/src/inference/interruption/index.ts +4 -0
  872. package/src/inference/interruption/interruption.ts +88 -0
  873. package/src/inference/llm.ts +532 -0
  874. package/src/inference/stt.ts +524 -0
  875. package/src/inference/tts.ts +574 -0
  876. package/src/inference/utils.test.ts +31 -0
  877. package/src/inference/utils.ts +81 -0
  878. package/src/inference_runner.ts +19 -0
  879. package/src/ipc/index.ts +5 -0
  880. package/src/ipc/inference_executor.ts +7 -0
  881. package/src/ipc/inference_proc_executor.ts +101 -0
  882. package/src/ipc/inference_proc_lazy_main.ts +115 -0
  883. package/src/ipc/job_executor.ts +23 -0
  884. package/src/ipc/job_proc_executor.ts +122 -0
  885. package/src/ipc/job_proc_lazy_main.ts +247 -0
  886. package/src/ipc/message.ts +52 -0
  887. package/src/ipc/proc_pool.ts +164 -0
  888. package/src/ipc/supervised_proc.test.ts +153 -0
  889. package/src/ipc/supervised_proc.ts +242 -0
  890. package/src/job.ts +461 -0
  891. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  892. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  893. package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
  894. package/src/llm/chat_context.test.ts +1057 -0
  895. package/src/llm/chat_context.ts +759 -0
  896. package/src/llm/fallback_adapter.test.ts +238 -0
  897. package/src/llm/fallback_adapter.ts +391 -0
  898. package/src/llm/index.ts +74 -0
  899. package/src/llm/llm.ts +303 -0
  900. package/src/llm/provider_format/google.test.ts +843 -0
  901. package/src/llm/provider_format/google.ts +134 -0
  902. package/src/llm/provider_format/index.ts +23 -0
  903. package/src/llm/provider_format/openai.test.ts +675 -0
  904. package/src/llm/provider_format/openai.ts +146 -0
  905. package/src/llm/provider_format/utils.ts +187 -0
  906. package/src/llm/realtime.ts +163 -0
  907. package/src/llm/remote_chat_context.test.ts +290 -0
  908. package/src/llm/remote_chat_context.ts +114 -0
  909. package/src/llm/tool_context.test.ts +407 -0
  910. package/src/llm/tool_context.ts +343 -0
  911. package/src/llm/tool_context.type.test.ts +115 -0
  912. package/src/llm/utils.test.ts +670 -0
  913. package/src/llm/utils.ts +336 -0
  914. package/src/llm/zod-utils.test.ts +577 -0
  915. package/src/llm/zod-utils.ts +153 -0
  916. package/src/log.ts +83 -0
  917. package/src/metrics/base.ts +168 -0
  918. package/src/metrics/index.ts +15 -0
  919. package/src/metrics/usage_collector.ts +46 -0
  920. package/src/metrics/utils.ts +64 -0
  921. package/src/plugin.ts +46 -0
  922. package/src/stream/deferred_stream.test.ts +755 -0
  923. package/src/stream/deferred_stream.ts +127 -0
  924. package/src/stream/identity_transform.test.ts +179 -0
  925. package/src/stream/identity_transform.ts +18 -0
  926. package/src/stream/index.ts +7 -0
  927. package/src/stream/merge_readable_streams.ts +40 -0
  928. package/src/stream/stream_channel.test.ts +166 -0
  929. package/src/stream/stream_channel.ts +44 -0
  930. package/src/stt/index.ts +15 -0
  931. package/src/stt/stream_adapter.ts +107 -0
  932. package/src/stt/stt.ts +374 -0
  933. package/src/telemetry/index.ts +28 -0
  934. package/src/telemetry/logging.ts +55 -0
  935. package/src/telemetry/otel_http_exporter.ts +195 -0
  936. package/src/telemetry/pino_otel_transport.ts +265 -0
  937. package/src/telemetry/trace_types.ts +95 -0
  938. package/src/telemetry/traces.ts +612 -0
  939. package/src/telemetry/utils.ts +61 -0
  940. package/src/tokenize/basic/basic.ts +83 -0
  941. package/src/tokenize/basic/hyphenator.ts +434 -0
  942. package/src/tokenize/basic/index.ts +11 -0
  943. package/src/tokenize/basic/paragraph.ts +43 -0
  944. package/src/tokenize/basic/sentence.ts +89 -0
  945. package/src/tokenize/basic/word.ts +27 -0
  946. package/src/tokenize/index.ts +16 -0
  947. package/src/tokenize/token_stream.ts +180 -0
  948. package/src/tokenize/tokenizer.test.ts +255 -0
  949. package/src/tokenize/tokenizer.ts +152 -0
  950. package/src/transcription.ts +307 -0
  951. package/src/tts/index.ts +12 -0
  952. package/src/tts/stream_adapter.ts +110 -0
  953. package/src/tts/tts.ts +598 -0
  954. package/src/types.ts +66 -0
  955. package/src/utils/ws_transport.test.ts +282 -0
  956. package/src/utils/ws_transport.ts +22 -0
  957. package/src/utils.test.ts +651 -0
  958. package/src/utils.ts +871 -0
  959. package/src/vad.ts +262 -0
  960. package/src/version.ts +5 -0
  961. package/src/voice/agent.test.ts +80 -0
  962. package/src/voice/agent.ts +418 -0
  963. package/src/voice/agent_activity.ts +2375 -0
  964. package/src/voice/agent_session.ts +866 -0
  965. package/src/voice/audio_recognition.ts +877 -0
  966. package/src/voice/avatar/datastream_io.ts +247 -0
  967. package/src/voice/avatar/index.ts +4 -0
  968. package/src/voice/background_audio.ts +491 -0
  969. package/src/voice/events.ts +261 -0
  970. package/src/voice/generation.ts +946 -0
  971. package/src/voice/generation_tools.test.ts +268 -0
  972. package/src/voice/index.ts +12 -0
  973. package/src/voice/interruption_detection.test.ts +151 -0
  974. package/src/voice/io.ts +347 -0
  975. package/src/voice/recorder_io/index.ts +4 -0
  976. package/src/voice/recorder_io/recorder_io.ts +690 -0
  977. package/src/voice/report.ts +100 -0
  978. package/src/voice/room_io/_input.ts +162 -0
  979. package/src/voice/room_io/_output.ts +439 -0
  980. package/src/voice/room_io/index.ts +5 -0
  981. package/src/voice/room_io/room_io.ts +518 -0
  982. package/src/voice/run_context.ts +34 -0
  983. package/src/voice/speech_handle.ts +250 -0
  984. package/src/voice/transcription/_utils.ts +25 -0
  985. package/src/voice/transcription/index.ts +4 -0
  986. package/src/voice/transcription/synchronizer.ts +477 -0
  987. package/src/worker.ts +798 -0
@@ -0,0 +1,1780 @@
1
+ import { Mutex } from "@livekit/mutex";
2
+ import { ROOT_CONTEXT, trace } from "@opentelemetry/api";
3
+ import { Heap } from "heap-js";
4
+ import { AsyncLocalStorage } from "node:async_hooks";
5
+ import { ReadableStream } from "node:stream/web";
6
+ import { ChatMessage } from "../llm/chat_context.js";
7
+ import {
8
+ LLM,
9
+ RealtimeModel
10
+ } from "../llm/index.js";
11
+ import { isSameToolChoice, isSameToolContext } from "../llm/tool_context.js";
12
+ import { log } from "../log.js";
13
+ import { DeferredReadableStream } from "../stream/deferred_stream.js";
14
+ import { STT } from "../stt/stt.js";
15
+ import { recordRealtimeMetrics, traceTypes, tracer } from "../telemetry/index.js";
16
+ import { splitWords } from "../tokenize/basic/word.js";
17
+ import { TTS } from "../tts/tts.js";
18
+ import { Future, Task, cancelAndWait, waitFor } from "../utils.js";
19
+ import { InterruptionEventType } from "../inference/interruption/interruption.js";
20
+ import { VAD } from "../vad.js";
21
+ import { StopResponse, asyncLocalStorage } from "./agent.js";
22
+ import {} from "./agent_session.js";
23
+ import {
24
+ AudioRecognition
25
+ } from "./audio_recognition.js";
26
+ import {
27
+ AgentSessionEventTypes,
28
+ createErrorEvent,
29
+ createFunctionToolsExecutedEvent,
30
+ createMetricsCollectedEvent,
31
+ createSpeechCreatedEvent,
32
+ createUserInputTranscribedEvent
33
+ } from "./events.js";
34
+ import {
35
+ performAudioForwarding,
36
+ performLLMInference,
37
+ performTTSInference,
38
+ performTextForwarding,
39
+ performToolExecutions,
40
+ removeInstructions,
41
+ updateInstructions
42
+ } from "./generation.js";
43
+ import { SpeechHandle } from "./speech_handle.js";
44
+ const speechHandleStorage = new AsyncLocalStorage();
45
+ class AgentActivity {
46
+ static REPLY_TASK_CANCEL_TIMEOUT = 5e3;
47
+ started = false;
48
+ audioRecognition;
49
+ realtimeSession;
50
+ realtimeSpans;
51
+ // Maps response_id to OTEL span for metrics recording
52
+ turnDetectionMode;
53
+ logger = log();
54
+ _draining = false;
55
+ _currentSpeech;
56
+ speechQueue;
57
+ // [priority, timestamp, speechHandle]
58
+ q_updated;
59
+ speechTasks = /* @__PURE__ */ new Set();
60
+ lock = new Mutex();
61
+ audioStream = new DeferredReadableStream();
62
+ // default to null as None, which maps to the default provider tool choice value
63
+ toolChoice = null;
64
+ _preemptiveGeneration;
65
+ agent;
66
+ agentSession;
67
+ /** @internal */
68
+ _mainTask;
69
+ _userTurnCompletedTask;
70
+ /**
71
+ * Notify that agent started speaking.
72
+ * This enables interruption detection in AudioRecognition.
73
+ * @internal
74
+ */
75
+ notifyAgentSpeechStarted() {
76
+ var _a;
77
+ (_a = this.audioRecognition) == null ? void 0 : _a.onStartOfAgentSpeech();
78
+ }
79
+ /**
80
+ * Notify that agent stopped speaking.
81
+ * This disables interruption detection in AudioRecognition.
82
+ * @internal
83
+ */
84
+ notifyAgentSpeechEnded() {
85
+ var _a;
86
+ (_a = this.audioRecognition) == null ? void 0 : _a.onEndOfAgentSpeech();
87
+ }
88
+ constructor(agent, agentSession) {
89
+ this.agent = agent;
90
+ this.agentSession = agentSession;
91
+ this.speechQueue = new Heap(([p1, t1, _], [p2, t2, __]) => {
92
+ return p1 === p2 ? t1 - t2 : p2 - p1;
93
+ });
94
+ this.q_updated = new Future();
95
+ this.turnDetectionMode = typeof this.turnDetection === "string" ? this.turnDetection : void 0;
96
+ if (this.turnDetectionMode === "vad" && this.vad === void 0) {
97
+ this.logger.warn(
98
+ 'turnDetection is set to "vad", but no VAD model is provided, ignoring the turnDdetection setting'
99
+ );
100
+ this.turnDetectionMode = void 0;
101
+ }
102
+ if (this.turnDetectionMode === "stt" && this.stt === void 0) {
103
+ this.logger.warn(
104
+ 'turnDetection is set to "stt", but no STT model is provided, ignoring the turnDetection setting'
105
+ );
106
+ this.turnDetectionMode = void 0;
107
+ }
108
+ if (this.llm instanceof RealtimeModel) {
109
+ if (this.llm.capabilities.turnDetection && !this.allowInterruptions) {
110
+ this.logger.warn(
111
+ "the RealtimeModel uses a server-side turn detection, allowInterruptions cannot be false, disable turnDetection in the RealtimeModel and use VAD on the AgentSession instead"
112
+ );
113
+ }
114
+ if (this.turnDetectionMode === "realtime_llm" && !this.llm.capabilities.turnDetection) {
115
+ this.logger.warn(
116
+ 'turnDetection is set to "realtime_llm", but the LLM is not a RealtimeModel or the server-side turn detection is not supported/enabled, ignoring the turnDetection setting'
117
+ );
118
+ this.turnDetectionMode = void 0;
119
+ }
120
+ if (this.turnDetectionMode === "stt") {
121
+ this.logger.warn(
122
+ 'turnDetection is set to "stt", but the LLM is a RealtimeModel, ignoring the turnDetection setting'
123
+ );
124
+ this.turnDetectionMode = void 0;
125
+ }
126
+ if (this.turnDetectionMode && this.turnDetectionMode !== "realtime_llm" && this.llm.capabilities.turnDetection) {
127
+ this.logger.warn(
128
+ `turnDetection is set to "${this.turnDetectionMode}", but the LLM is a RealtimeModel and server-side turn detection enabled, ignoring the turnDetection setting`
129
+ );
130
+ this.turnDetectionMode = void 0;
131
+ }
132
+ if (!this.llm.capabilities.turnDetection && this.vad && this.turnDetectionMode === void 0) {
133
+ this.turnDetectionMode = "vad";
134
+ }
135
+ } else if (this.turnDetectionMode === "realtime_llm") {
136
+ this.logger.warn(
137
+ 'turnDetection is set to "realtime_llm", but the LLM is not a RealtimeModel'
138
+ );
139
+ this.turnDetectionMode = void 0;
140
+ }
141
+ if (!this.vad && this.stt && this.llm instanceof LLM && this.allowInterruptions && this.turnDetectionMode === void 0) {
142
+ this.logger.warn(
143
+ "VAD is not set. Enabling VAD is recommended when using LLM and STT for more responsive interruption handling."
144
+ );
145
+ }
146
+ }
147
+ async start() {
148
+ const unlock = await this.lock.lock();
149
+ try {
150
+ const startSpan = tracer.startSpan({
151
+ name: "start_agent_activity",
152
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
153
+ context: ROOT_CONTEXT
154
+ });
155
+ this.agent._agentActivity = this;
156
+ if (this.llm instanceof RealtimeModel) {
157
+ this.realtimeSession = this.llm.session();
158
+ this.realtimeSpans = /* @__PURE__ */ new Map();
159
+ this.realtimeSession.on("generation_created", (ev) => this.onGenerationCreated(ev));
160
+ this.realtimeSession.on("input_speech_started", (ev) => this.onInputSpeechStarted(ev));
161
+ this.realtimeSession.on("input_speech_stopped", (ev) => this.onInputSpeechStopped(ev));
162
+ this.realtimeSession.on(
163
+ "input_audio_transcription_completed",
164
+ (ev) => this.onInputAudioTranscriptionCompleted(ev)
165
+ );
166
+ this.realtimeSession.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
167
+ this.realtimeSession.on("error", (ev) => this.onError(ev));
168
+ removeInstructions(this.agent._chatCtx);
169
+ try {
170
+ await this.realtimeSession.updateInstructions(this.agent.instructions);
171
+ } catch (error) {
172
+ this.logger.error(error, "failed to update the instructions");
173
+ }
174
+ try {
175
+ await this.realtimeSession.updateChatCtx(this.agent.chatCtx);
176
+ } catch (error) {
177
+ this.logger.error(error, "failed to update the chat context");
178
+ }
179
+ try {
180
+ await this.realtimeSession.updateTools(this.tools);
181
+ } catch (error) {
182
+ this.logger.error(error, "failed to update the tools");
183
+ }
184
+ if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
185
+ this.logger.error(
186
+ "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
187
+ );
188
+ }
189
+ } else if (this.llm instanceof LLM) {
190
+ try {
191
+ updateInstructions({
192
+ chatCtx: this.agent._chatCtx,
193
+ instructions: this.agent.instructions,
194
+ addIfMissing: true
195
+ });
196
+ } catch (error) {
197
+ this.logger.error("failed to update the instructions", error);
198
+ }
199
+ }
200
+ if (this.llm instanceof LLM) {
201
+ this.llm.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
202
+ this.llm.on("error", (ev) => this.onError(ev));
203
+ }
204
+ if (this.stt instanceof STT) {
205
+ this.stt.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
206
+ this.stt.on("error", (ev) => this.onError(ev));
207
+ }
208
+ if (this.tts instanceof TTS) {
209
+ this.tts.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
210
+ this.tts.on("error", (ev) => this.onError(ev));
211
+ }
212
+ if (this.vad instanceof VAD) {
213
+ this.vad.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
214
+ }
215
+ this.audioRecognition = new AudioRecognition({
216
+ recognitionHooks: this,
217
+ // Disable stt node if stt is not provided
218
+ stt: this.stt ? (...args) => this.agent.sttNode(...args) : void 0,
219
+ vad: this.vad,
220
+ interruptionDetector: this.agentSession.interruptionDetector,
221
+ turnDetector: typeof this.turnDetection === "string" ? void 0 : this.turnDetection,
222
+ turnDetectionMode: this.turnDetectionMode,
223
+ minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
224
+ maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
225
+ rootSpanContext: this.agentSession.rootSpanContext
226
+ });
227
+ this.audioRecognition.start();
228
+ this.started = true;
229
+ this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
230
+ const onEnterTask = tracer.startActiveSpan(async () => this.agent.onEnter(), {
231
+ name: "on_enter",
232
+ context: trace.setSpan(ROOT_CONTEXT, startSpan),
233
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
234
+ });
235
+ this.createSpeechTask({
236
+ task: Task.from(() => onEnterTask),
237
+ name: "AgentActivity_onEnter"
238
+ });
239
+ startSpan.end();
240
+ } finally {
241
+ unlock();
242
+ }
243
+ }
244
+ get currentSpeech() {
245
+ return this._currentSpeech;
246
+ }
247
+ get vad() {
248
+ return this.agent.vad || this.agentSession.vad;
249
+ }
250
+ get stt() {
251
+ return this.agent.stt || this.agentSession.stt;
252
+ }
253
+ get llm() {
254
+ return this.agent.llm || this.agentSession.llm;
255
+ }
256
+ get tts() {
257
+ return this.agent.tts || this.agentSession.tts;
258
+ }
259
+ get tools() {
260
+ return this.agent.toolCtx;
261
+ }
262
+ get draining() {
263
+ return this._draining;
264
+ }
265
+ get realtimeLLMSession() {
266
+ return this.realtimeSession;
267
+ }
268
+ get allowInterruptions() {
269
+ return this.agentSession.options.allowInterruptions;
270
+ }
271
+ get turnDetection() {
272
+ return this.agentSession.turnDetection;
273
+ }
274
+ get toolCtx() {
275
+ return this.agent.toolCtx;
276
+ }
277
+ async updateChatCtx(chatCtx) {
278
+ chatCtx = chatCtx.copy({ toolCtx: this.toolCtx });
279
+ this.agent._chatCtx = chatCtx;
280
+ if (this.realtimeSession) {
281
+ removeInstructions(chatCtx);
282
+ this.realtimeSession.updateChatCtx(chatCtx);
283
+ } else {
284
+ updateInstructions({
285
+ chatCtx,
286
+ instructions: this.agent.instructions,
287
+ addIfMissing: true
288
+ });
289
+ }
290
+ }
291
+ updateOptions({ toolChoice }) {
292
+ if (toolChoice !== void 0) {
293
+ this.toolChoice = toolChoice;
294
+ }
295
+ if (this.realtimeSession) {
296
+ this.realtimeSession.updateOptions({ toolChoice: this.toolChoice });
297
+ }
298
+ }
299
+ attachAudioInput(audioStream) {
300
+ if (this.audioStream.isSourceSet) {
301
+ this.logger.debug("detaching existing audio input in agent activity");
302
+ this.audioStream.detachSource();
303
+ }
304
+ this.audioStream.setSource(audioStream);
305
+ const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream.tee();
306
+ if (this.realtimeSession) {
307
+ this.realtimeSession.setInputAudioStream(realtimeAudioStream);
308
+ }
309
+ if (this.audioRecognition) {
310
+ this.audioRecognition.setInputAudioStream(recognitionAudioStream);
311
+ }
312
+ }
313
+ detachAudioInput() {
314
+ this.audioStream.detachSource();
315
+ }
316
+ commitUserTurn() {
317
+ if (!this.audioRecognition) {
318
+ throw new Error("AudioRecognition is not initialized");
319
+ }
320
+ const audioDetached = false;
321
+ this.audioRecognition.commitUserTurn(audioDetached);
322
+ }
323
+ clearUserTurn() {
324
+ var _a, _b;
325
+ (_a = this.audioRecognition) == null ? void 0 : _a.clearUserTurn();
326
+ (_b = this.realtimeSession) == null ? void 0 : _b.clearAudio();
327
+ }
328
+ say(text, options) {
329
+ const {
330
+ audio,
331
+ allowInterruptions: defaultAllowInterruptions,
332
+ addToChatCtx = true
333
+ } = options ?? {};
334
+ let allowInterruptions = defaultAllowInterruptions;
335
+ if (!audio && !this.tts && this.agentSession.output.audio && this.agentSession.output.audioEnabled) {
336
+ throw new Error("trying to generate speech from text without a TTS model");
337
+ }
338
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection && allowInterruptions === false) {
339
+ this.logger.warn(
340
+ "the RealtimeModel uses a server-side turn detection, allowInterruptions cannot be false when using VoiceAgent.say(), disable turnDetection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead"
341
+ );
342
+ allowInterruptions = true;
343
+ }
344
+ const handle = SpeechHandle.create({
345
+ allowInterruptions: allowInterruptions ?? this.allowInterruptions
346
+ });
347
+ this.agentSession.emit(
348
+ AgentSessionEventTypes.SpeechCreated,
349
+ createSpeechCreatedEvent({
350
+ userInitiated: true,
351
+ source: "say",
352
+ speechHandle: handle
353
+ })
354
+ );
355
+ const task = this.createSpeechTask({
356
+ task: Task.from(
357
+ (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
358
+ ),
359
+ ownedSpeechHandle: handle,
360
+ name: "AgentActivity.say_tts"
361
+ });
362
+ task.finally(() => this.onPipelineReplyDone());
363
+ this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
364
+ return handle;
365
+ }
366
+ // -- Metrics and errors --
367
+ onMetricsCollected = (ev) => {
368
+ const speechHandle = speechHandleStorage.getStore();
369
+ if (speechHandle && (ev.type === "llm_metrics" || ev.type === "tts_metrics")) {
370
+ ev.speechId = speechHandle.id;
371
+ }
372
+ if (ev.type === "realtime_model_metrics" && this.realtimeSpans) {
373
+ const span = this.realtimeSpans.get(ev.requestId);
374
+ if (span) {
375
+ recordRealtimeMetrics(span, ev);
376
+ this.realtimeSpans.delete(ev.requestId);
377
+ }
378
+ }
379
+ this.agentSession.emit(
380
+ AgentSessionEventTypes.MetricsCollected,
381
+ createMetricsCollectedEvent({ metrics: ev })
382
+ );
383
+ };
384
+ onError(ev) {
385
+ if (ev.type === "realtime_model_error") {
386
+ const errorEvent = createErrorEvent(ev.error, this.llm);
387
+ this.agentSession.emit(AgentSessionEventTypes.Error, errorEvent);
388
+ } else if (ev.type === "stt_error") {
389
+ const errorEvent = createErrorEvent(ev.error, this.stt);
390
+ this.agentSession.emit(AgentSessionEventTypes.Error, errorEvent);
391
+ } else if (ev.type === "tts_error") {
392
+ const errorEvent = createErrorEvent(ev.error, this.tts);
393
+ this.agentSession.emit(AgentSessionEventTypes.Error, errorEvent);
394
+ } else if (ev.type === "llm_error") {
395
+ const errorEvent = createErrorEvent(ev.error, this.llm);
396
+ this.agentSession.emit(AgentSessionEventTypes.Error, errorEvent);
397
+ }
398
+ this.agentSession._onError(ev);
399
+ }
400
+ // -- Realtime Session events --
401
+ onInputSpeechStarted(_ev) {
402
+ this.logger.info("onInputSpeechStarted");
403
+ if (!this.vad) {
404
+ this.agentSession._updateUserState("speaking");
405
+ }
406
+ try {
407
+ this.interrupt();
408
+ } catch (error) {
409
+ this.logger.error(
410
+ "RealtimeAPI input_speech_started, but current speech is not interruptable, this should never happen!",
411
+ error
412
+ );
413
+ }
414
+ }
415
+ onInputSpeechStopped(ev) {
416
+ this.logger.info(ev, "onInputSpeechStopped");
417
+ if (!this.vad) {
418
+ this.agentSession._updateUserState("listening");
419
+ }
420
+ if (ev.userTranscriptionEnabled) {
421
+ this.agentSession.emit(
422
+ AgentSessionEventTypes.UserInputTranscribed,
423
+ createUserInputTranscribedEvent({
424
+ isFinal: false,
425
+ transcript: ""
426
+ })
427
+ );
428
+ }
429
+ }
430
+ onInputAudioTranscriptionCompleted(ev) {
431
+ this.agentSession.emit(
432
+ AgentSessionEventTypes.UserInputTranscribed,
433
+ createUserInputTranscribedEvent({
434
+ transcript: ev.transcript,
435
+ isFinal: ev.isFinal
436
+ })
437
+ );
438
+ if (ev.isFinal) {
439
+ const message = ChatMessage.create({
440
+ role: "user",
441
+ content: ev.transcript,
442
+ id: ev.itemId
443
+ });
444
+ this.agent._chatCtx.items.push(message);
445
+ this.agentSession._conversationItemAdded(message);
446
+ }
447
+ }
448
+ onGenerationCreated(ev) {
449
+ if (ev.userInitiated) {
450
+ return;
451
+ }
452
+ if (this.draining) {
453
+ this.logger.warn("skipping new realtime generation, the agent is draining");
454
+ return;
455
+ }
456
+ const handle = SpeechHandle.create({
457
+ allowInterruptions: this.allowInterruptions
458
+ });
459
+ this.agentSession.emit(
460
+ AgentSessionEventTypes.SpeechCreated,
461
+ createSpeechCreatedEvent({
462
+ userInitiated: false,
463
+ source: "generate_reply",
464
+ speechHandle: handle
465
+ })
466
+ );
467
+ this.logger.info({ speech_id: handle.id }, "Creating speech handle");
468
+ this.createSpeechTask({
469
+ task: Task.from(
470
+ (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
471
+ ),
472
+ ownedSpeechHandle: handle,
473
+ name: "AgentActivity.realtimeGeneration"
474
+ });
475
+ this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
476
+ }
477
+ // recognition hooks
478
+ onStartOfSpeech(_ev) {
479
+ this.agentSession._updateUserState("speaking");
480
+ }
481
+ onEndOfSpeech(ev) {
482
+ let speechEndTime = Date.now();
483
+ if (ev) {
484
+ speechEndTime = speechEndTime - ev.silenceDuration;
485
+ }
486
+ this.agentSession._updateUserState("listening", speechEndTime);
487
+ }
488
+ onVADInferenceDone(ev) {
489
+ var _a, _b;
490
+ if (this.turnDetection === "manual" || this.turnDetection === "realtime_llm") {
491
+ return;
492
+ }
493
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
494
+ return;
495
+ }
496
+ if (ev.speechDuration < this.agentSession.options.minInterruptionDuration) {
497
+ return;
498
+ }
499
+ if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
500
+ const text = this.audioRecognition.currentTranscript;
501
+ const normalizedText = text ?? "";
502
+ const wordCount = splitWords(normalizedText, true).length;
503
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
504
+ return;
505
+ }
506
+ }
507
+ (_a = this.realtimeSession) == null ? void 0 : _a.startUserActivity();
508
+ if (this._currentSpeech && !this._currentSpeech.interrupted && this._currentSpeech.allowInterruptions) {
509
+ this.logger.info({ "speech id": this._currentSpeech.id }, "speech interrupted by VAD");
510
+ (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
511
+ this._currentSpeech.interrupt();
512
+ }
513
+ }
514
+ onInterruption(ev) {
515
+ var _a, _b;
516
+ if (ev.type !== InterruptionEventType.INTERRUPTION) {
517
+ return;
518
+ }
519
+ this.logger.info(
520
+ {
521
+ probability: ev.probability,
522
+ detectionDelay: ev.detectionDelay,
523
+ totalDuration: ev.totalDuration
524
+ },
525
+ "adaptive interruption detected"
526
+ );
527
+ if (this.turnDetection === "manual" || this.turnDetection === "realtime_llm") {
528
+ return;
529
+ }
530
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
531
+ return;
532
+ }
533
+ (_a = this.realtimeSession) == null ? void 0 : _a.startUserActivity();
534
+ if (this._currentSpeech && !this._currentSpeech.interrupted && this._currentSpeech.allowInterruptions) {
535
+ this.logger.info(
536
+ { "speech id": this._currentSpeech.id },
537
+ "speech interrupted by adaptive interruption detector"
538
+ );
539
+ (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
540
+ this._currentSpeech.interrupt();
541
+ }
542
+ }
543
+ onInterimTranscript(ev) {
544
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
545
+ return;
546
+ }
547
+ this.agentSession.emit(
548
+ AgentSessionEventTypes.UserInputTranscribed,
549
+ createUserInputTranscribedEvent({
550
+ transcript: ev.alternatives[0].text,
551
+ isFinal: false,
552
+ language: ev.alternatives[0].language
553
+ // TODO(AJS-106): add multi participant support
554
+ })
555
+ );
556
+ }
557
+ onFinalTranscript(ev) {
558
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
559
+ return;
560
+ }
561
+ this.agentSession.emit(
562
+ AgentSessionEventTypes.UserInputTranscribed,
563
+ createUserInputTranscribedEvent({
564
+ transcript: ev.alternatives[0].text,
565
+ isFinal: true,
566
+ language: ev.alternatives[0].language
567
+ // TODO(AJS-106): add multi participant support
568
+ })
569
+ );
570
+ }
571
+ onPreemptiveGeneration(info) {
572
+ if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof LLM)) {
573
+ return;
574
+ }
575
+ this.cancelPreemptiveGeneration();
576
+ this.logger.info(
577
+ {
578
+ newTranscript: info.newTranscript,
579
+ transcriptConfidence: info.transcriptConfidence
580
+ },
581
+ "starting preemptive generation"
582
+ );
583
+ const userMessage = ChatMessage.create({
584
+ role: "user",
585
+ content: info.newTranscript
586
+ });
587
+ const chatCtx = this.agent.chatCtx.copy();
588
+ const speechHandle = this.generateReply({
589
+ userMessage,
590
+ chatCtx,
591
+ scheduleSpeech: false
592
+ });
593
+ this._preemptiveGeneration = {
594
+ speechHandle,
595
+ userMessage,
596
+ info,
597
+ chatCtx: chatCtx.copy(),
598
+ tools: { ...this.tools },
599
+ toolChoice: this.toolChoice,
600
+ createdAt: Date.now()
601
+ };
602
+ }
603
+ cancelPreemptiveGeneration() {
604
+ if (this._preemptiveGeneration !== void 0) {
605
+ this._preemptiveGeneration.speechHandle._cancel();
606
+ this._preemptiveGeneration = void 0;
607
+ }
608
+ }
609
+ createSpeechTask(options) {
610
+ const { task, ownedSpeechHandle } = options;
611
+ this.speechTasks.add(task);
612
+ task.addDoneCallback(() => {
613
+ this.speechTasks.delete(task);
614
+ });
615
+ if (ownedSpeechHandle) {
616
+ ownedSpeechHandle._tasks.push(task);
617
+ task.addDoneCallback(() => {
618
+ if (ownedSpeechHandle._tasks.every((t) => t.done)) {
619
+ ownedSpeechHandle._markDone();
620
+ }
621
+ });
622
+ }
623
+ task.addDoneCallback(() => {
624
+ this.wakeupMainTask();
625
+ });
626
+ return task.result;
627
+ }
628
+ async onEndOfTurn(info) {
629
+ if (this.draining) {
630
+ this.cancelPreemptiveGeneration();
631
+ this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
632
+ return true;
633
+ }
634
+ if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0) {
635
+ const wordCount = splitWords(info.newTranscript, true).length;
636
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
637
+ this.cancelPreemptiveGeneration();
638
+ this.logger.info(
639
+ {
640
+ wordCount,
641
+ minInterruptionWords: this.agentSession.options.minInterruptionWords
642
+ },
643
+ "skipping user input, word count below minimum interruption threshold"
644
+ );
645
+ return false;
646
+ }
647
+ }
648
+ const oldTask = this._userTurnCompletedTask;
649
+ this._userTurnCompletedTask = this.createSpeechTask({
650
+ task: Task.from(() => this.userTurnCompleted(info, oldTask)),
651
+ name: "AgentActivity.userTurnCompleted"
652
+ });
653
+ return true;
654
+ }
655
+ retrieveChatCtx() {
656
+ return this.agentSession.chatCtx;
657
+ }
658
+ async mainTask(signal) {
659
+ const abortFuture = new Future();
660
+ const abortHandler = () => {
661
+ abortFuture.resolve();
662
+ signal.removeEventListener("abort", abortHandler);
663
+ };
664
+ signal.addEventListener("abort", abortHandler);
665
+ while (true) {
666
+ await Promise.race([this.q_updated.await, abortFuture.await]);
667
+ if (signal.aborted) break;
668
+ while (this.speechQueue.size() > 0) {
669
+ if (signal.aborted) break;
670
+ const heapItem = this.speechQueue.pop();
671
+ if (!heapItem) {
672
+ throw new Error("Speech queue is empty");
673
+ }
674
+ const speechHandle = heapItem[2];
675
+ this._currentSpeech = speechHandle;
676
+ speechHandle._authorizeGeneration();
677
+ await speechHandle._waitForGeneration();
678
+ this._currentSpeech = void 0;
679
+ }
680
+ if (this.draining && this.speechTasks.size === 0) {
681
+ this.logger.info("mainTask: draining and no more speech tasks");
682
+ break;
683
+ }
684
+ this.q_updated = new Future();
685
+ }
686
+ this.logger.info("AgentActivity mainTask: exiting");
687
+ }
688
+ wakeupMainTask() {
689
+ this.q_updated.resolve();
690
+ }
691
+ generateReply(options) {
692
+ var _a;
693
+ const {
694
+ userMessage,
695
+ chatCtx,
696
+ instructions: defaultInstructions,
697
+ toolChoice: defaultToolChoice,
698
+ allowInterruptions: defaultAllowInterruptions,
699
+ scheduleSpeech = true
700
+ } = options;
701
+ let instructions = defaultInstructions;
702
+ let toolChoice = defaultToolChoice;
703
+ let allowInterruptions = defaultAllowInterruptions;
704
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection && allowInterruptions === false) {
705
+ this.logger.warn(
706
+ "the RealtimeModel uses a server-side turn detection, allowInterruptions cannot be false when using VoiceAgent.generateReply(), disable turnDetection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead"
707
+ );
708
+ allowInterruptions = true;
709
+ }
710
+ if (this.llm === void 0) {
711
+ throw new Error("trying to generate reply without an LLM model");
712
+ }
713
+ const functionCall = (_a = asyncLocalStorage.getStore()) == null ? void 0 : _a.functionCall;
714
+ if (toolChoice === void 0 && functionCall !== void 0) {
715
+ toolChoice = "none";
716
+ }
717
+ const handle = SpeechHandle.create({
718
+ allowInterruptions: allowInterruptions ?? this.allowInterruptions
719
+ });
720
+ this.agentSession.emit(
721
+ AgentSessionEventTypes.SpeechCreated,
722
+ createSpeechCreatedEvent({
723
+ userInitiated: true,
724
+ source: "generate_reply",
725
+ speechHandle: handle
726
+ })
727
+ );
728
+ this.logger.info({ speech_id: handle.id }, "Creating speech handle");
729
+ if (this.llm instanceof RealtimeModel) {
730
+ this.createSpeechTask({
731
+ task: Task.from(
732
+ (abortController) => this.realtimeReplyTask({
733
+ speechHandle: handle,
734
+ // TODO(brian): support llm.ChatMessage for the realtime model
735
+ userInput: userMessage == null ? void 0 : userMessage.textContent,
736
+ instructions,
737
+ modelSettings: {
738
+ // isGiven(toolChoice) = toolChoice !== undefined
739
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
740
+ },
741
+ abortController
742
+ })
743
+ ),
744
+ ownedSpeechHandle: handle,
745
+ name: "AgentActivity.realtimeReply"
746
+ });
747
+ } else if (this.llm instanceof LLM) {
748
+ if (instructions) {
749
+ instructions = `${this.agent.instructions}
750
+ ${instructions}`;
751
+ }
752
+ const task = this.createSpeechTask({
753
+ task: Task.from(
754
+ (abortController) => this.pipelineReplyTask(
755
+ handle,
756
+ chatCtx ?? this.agent.chatCtx,
757
+ this.agent.toolCtx,
758
+ {
759
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
760
+ },
761
+ abortController,
762
+ instructions ? `${this.agent.instructions}
763
+ ${instructions}` : instructions,
764
+ userMessage
765
+ )
766
+ ),
767
+ ownedSpeechHandle: handle,
768
+ name: "AgentActivity.pipelineReply"
769
+ });
770
+ task.finally(() => this.onPipelineReplyDone());
771
+ }
772
+ if (scheduleSpeech) {
773
+ this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
774
+ }
775
+ return handle;
776
+ }
777
+ interrupt() {
778
+ var _a;
779
+ const future = new Future();
780
+ const currentSpeech = this._currentSpeech;
781
+ currentSpeech == null ? void 0 : currentSpeech.interrupt();
782
+ for (const [_, __, speech] of this.speechQueue) {
783
+ speech.interrupt();
784
+ }
785
+ (_a = this.realtimeSession) == null ? void 0 : _a.interrupt();
786
+ if (currentSpeech === void 0) {
787
+ future.resolve();
788
+ } else {
789
+ currentSpeech.addDoneCallback(() => {
790
+ if (future.done) return;
791
+ future.resolve();
792
+ });
793
+ }
794
+ return future;
795
+ }
796
+ onPipelineReplyDone() {
797
+ if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
798
+ this.agentSession._updateAgentState("listening");
799
+ }
800
+ }
801
+ async userTurnCompleted(info, oldTask) {
802
+ var _a, _b;
803
+ if (oldTask) {
804
+ await oldTask;
805
+ }
806
+ if (this.llm instanceof RealtimeModel) {
807
+ if (this.llm.capabilities.turnDetection) {
808
+ return;
809
+ }
810
+ (_a = this.realtimeSession) == null ? void 0 : _a.commitAudio();
811
+ }
812
+ if (this._currentSpeech) {
813
+ if (!this._currentSpeech.allowInterruptions) {
814
+ this.logger.warn(
815
+ { user_input: info.newTranscript },
816
+ "skipping user input, current speech generation cannot be interrupted"
817
+ );
818
+ return;
819
+ }
820
+ this.logger.info(
821
+ { "speech id": this._currentSpeech.id },
822
+ "speech interrupted, new user turn detected"
823
+ );
824
+ this._currentSpeech.interrupt();
825
+ (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
826
+ }
827
+ let userMessage = ChatMessage.create({
828
+ role: "user",
829
+ content: info.newTranscript
830
+ });
831
+ const chatCtx = this.agent.chatCtx.copy();
832
+ const startTime = Date.now();
833
+ try {
834
+ await this.agent.onUserTurnCompleted(chatCtx, userMessage);
835
+ } catch (e) {
836
+ if (e instanceof StopResponse) {
837
+ return;
838
+ }
839
+ this.logger.error({ error: e }, "error occurred during onUserTurnCompleted");
840
+ }
841
+ const callbackDuration = Date.now() - startTime;
842
+ if (this.llm instanceof RealtimeModel) {
843
+ userMessage = void 0;
844
+ } else if (this.llm === void 0) {
845
+ return;
846
+ }
847
+ let speechHandle;
848
+ if (this._preemptiveGeneration !== void 0) {
849
+ const preemptive = this._preemptiveGeneration;
850
+ if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && isSameToolContext(preemptive.tools, this.tools) && isSameToolChoice(preemptive.toolChoice, this.toolChoice)) {
851
+ speechHandle = preemptive.speechHandle;
852
+ this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL);
853
+ this.logger.debug(
854
+ {
855
+ preemptiveLeadTime: Date.now() - preemptive.createdAt
856
+ },
857
+ "using preemptive generation"
858
+ );
859
+ } else {
860
+ this.logger.warn(
861
+ "preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
862
+ );
863
+ preemptive.speechHandle._cancel();
864
+ }
865
+ this._preemptiveGeneration = void 0;
866
+ }
867
+ if (speechHandle === void 0) {
868
+ speechHandle = this.generateReply({ userMessage, chatCtx });
869
+ }
870
+ const eouMetrics = {
871
+ type: "eou_metrics",
872
+ timestamp: Date.now(),
873
+ endOfUtteranceDelayMs: info.endOfUtteranceDelay,
874
+ transcriptionDelayMs: info.transcriptionDelay,
875
+ onUserTurnCompletedDelayMs: callbackDuration,
876
+ lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
877
+ speechId: speechHandle.id
878
+ };
879
+ this.agentSession.emit(
880
+ AgentSessionEventTypes.MetricsCollected,
881
+ createMetricsCollectedEvent({ metrics: eouMetrics })
882
+ );
883
+ }
884
+ async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
885
+ speechHandleStorage.enterWith(speechHandle);
886
+ const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
887
+ const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
888
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
889
+ if (speechHandle.interrupted) {
890
+ return;
891
+ }
892
+ let baseStream;
893
+ if (text instanceof ReadableStream) {
894
+ baseStream = text;
895
+ } else {
896
+ baseStream = new ReadableStream({
897
+ start(controller) {
898
+ controller.enqueue(text);
899
+ controller.close();
900
+ }
901
+ });
902
+ }
903
+ const [textSource, audioSource] = baseStream.tee();
904
+ const tasks = [];
905
+ const trNode = await this.agent.transcriptionNode(textSource, {});
906
+ let textOut = null;
907
+ if (trNode) {
908
+ const [textForwardTask, _textOut] = performTextForwarding(
909
+ trNode,
910
+ replyAbortController,
911
+ transcriptionOutput
912
+ );
913
+ textOut = _textOut;
914
+ tasks.push(textForwardTask);
915
+ }
916
+ const onFirstFrame = () => {
917
+ this.agentSession._updateAgentState("speaking");
918
+ };
919
+ if (!audioOutput) {
920
+ if (textOut) {
921
+ textOut.firstTextFut.await.finally(onFirstFrame);
922
+ }
923
+ } else {
924
+ let audioOut = null;
925
+ if (!audio) {
926
+ const [ttsTask, ttsStream] = performTTSInference(
927
+ (...args) => this.agent.ttsNode(...args),
928
+ audioSource,
929
+ modelSettings,
930
+ replyAbortController
931
+ );
932
+ tasks.push(ttsTask);
933
+ const [forwardTask, _audioOut] = performAudioForwarding(
934
+ ttsStream,
935
+ audioOutput,
936
+ replyAbortController
937
+ );
938
+ tasks.push(forwardTask);
939
+ audioOut = _audioOut;
940
+ } else {
941
+ const [forwardTask, _audioOut] = performAudioForwarding(
942
+ audio,
943
+ audioOutput,
944
+ replyAbortController
945
+ );
946
+ tasks.push(forwardTask);
947
+ audioOut = _audioOut;
948
+ }
949
+ audioOut.firstFrameFut.await.finally(onFirstFrame);
950
+ }
951
+ await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
952
+ if (audioOutput) {
953
+ await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
954
+ }
955
+ if (speechHandle.interrupted) {
956
+ replyAbortController.abort();
957
+ await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
958
+ if (audioOutput) {
959
+ audioOutput.clearBuffer();
960
+ await audioOutput.waitForPlayout();
961
+ }
962
+ }
963
+ if (addToChatCtx) {
964
+ const message = ChatMessage.create({
965
+ role: "assistant",
966
+ content: (textOut == null ? void 0 : textOut.text) || "",
967
+ interrupted: speechHandle.interrupted
968
+ });
969
+ this.agent._chatCtx.insert(message);
970
+ this.agentSession._conversationItemAdded(message);
971
+ }
972
+ if (this.agentSession.agentState === "speaking") {
973
+ this.agentSession._updateAgentState("listening");
974
+ }
975
+ }
976
+ _pipelineReplyTaskImpl = async ({
977
+ speechHandle,
978
+ chatCtx,
979
+ toolCtx,
980
+ modelSettings,
981
+ replyAbortController,
982
+ instructions,
983
+ newMessage,
984
+ toolsMessages,
985
+ span
986
+ }) => {
987
+ var _a, _b, _c;
988
+ span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
989
+ if (instructions) {
990
+ span.setAttribute(traceTypes.ATTR_INSTRUCTIONS, instructions);
991
+ }
992
+ if (newMessage) {
993
+ span.setAttribute(traceTypes.ATTR_USER_INPUT, newMessage.textContent || "");
994
+ }
995
+ speechHandleStorage.enterWith(speechHandle);
996
+ const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
997
+ const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
998
+ chatCtx = chatCtx.copy();
999
+ if (newMessage) {
1000
+ chatCtx.insert(newMessage);
1001
+ }
1002
+ if (instructions) {
1003
+ try {
1004
+ updateInstructions({
1005
+ chatCtx,
1006
+ instructions,
1007
+ addIfMissing: true
1008
+ });
1009
+ } catch (e) {
1010
+ this.logger.error({ error: e }, "error occurred during updateInstructions");
1011
+ }
1012
+ }
1013
+ const tasks = [];
1014
+ const [llmTask, llmGenData] = performLLMInference(
1015
+ // preserve `this` context in llmNode
1016
+ (...args) => this.agent.llmNode(...args),
1017
+ chatCtx,
1018
+ toolCtx,
1019
+ modelSettings,
1020
+ replyAbortController
1021
+ );
1022
+ tasks.push(llmTask);
1023
+ const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
1024
+ let ttsTask = null;
1025
+ let ttsStream = null;
1026
+ if (audioOutput) {
1027
+ [ttsTask, ttsStream] = performTTSInference(
1028
+ (...args) => this.agent.ttsNode(...args),
1029
+ ttsTextInput,
1030
+ modelSettings,
1031
+ replyAbortController
1032
+ );
1033
+ tasks.push(ttsTask);
1034
+ }
1035
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
1036
+ if (newMessage && speechHandle.scheduled) {
1037
+ this.agent._chatCtx.insert(newMessage);
1038
+ this.agentSession._conversationItemAdded(newMessage);
1039
+ }
1040
+ if (speechHandle.interrupted) {
1041
+ replyAbortController.abort();
1042
+ await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1043
+ return;
1044
+ }
1045
+ this.agentSession._updateAgentState("thinking");
1046
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1047
+ speechHandle._clearAuthorization();
1048
+ const replyStartedAt = Date.now();
1049
+ const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
1050
+ let textOut = null;
1051
+ if (trNodeResult) {
1052
+ const [textForwardTask, _textOut] = performTextForwarding(
1053
+ trNodeResult,
1054
+ replyAbortController,
1055
+ transcriptionOutput
1056
+ );
1057
+ tasks.push(textForwardTask);
1058
+ textOut = _textOut;
1059
+ }
1060
+ const onFirstFrame = () => {
1061
+ this.agentSession._updateAgentState("speaking");
1062
+ };
1063
+ let audioOut = null;
1064
+ if (audioOutput) {
1065
+ if (ttsStream) {
1066
+ const [forwardTask, _audioOut] = performAudioForwarding(
1067
+ ttsStream,
1068
+ audioOutput,
1069
+ replyAbortController
1070
+ );
1071
+ audioOut = _audioOut;
1072
+ tasks.push(forwardTask);
1073
+ audioOut.firstFrameFut.await.finally(onFirstFrame);
1074
+ } else {
1075
+ throw Error("ttsStream is null when audioOutput is enabled");
1076
+ }
1077
+ } else {
1078
+ textOut == null ? void 0 : textOut.firstTextFut.await.finally(onFirstFrame);
1079
+ }
1080
+ const onToolExecutionStarted = (_) => {
1081
+ };
1082
+ const onToolExecutionCompleted = (_) => {
1083
+ };
1084
+ const [executeToolsTask, toolOutput] = performToolExecutions({
1085
+ session: this.agentSession,
1086
+ speechHandle,
1087
+ toolCtx,
1088
+ toolChoice: modelSettings.toolChoice,
1089
+ toolCallStream: llmGenData.toolCallStream,
1090
+ controller: replyAbortController,
1091
+ onToolExecutionStarted,
1092
+ onToolExecutionCompleted
1093
+ });
1094
+ await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
1095
+ if (audioOutput) {
1096
+ await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
1097
+ }
1098
+ if (toolsMessages) {
1099
+ for (const msg of toolsMessages) {
1100
+ msg.createdAt = replyStartedAt;
1101
+ }
1102
+ this.agent._chatCtx.insert(toolsMessages);
1103
+ this.agentSession._toolItemsAdded(toolsMessages);
1104
+ }
1105
+ if (speechHandle.interrupted) {
1106
+ this.logger.debug(
1107
+ { speech_id: speechHandle.id },
1108
+ "Aborting all pipeline reply tasks due to interruption"
1109
+ );
1110
+ if (audioOutput) {
1111
+ audioOutput.clearBuffer();
1112
+ }
1113
+ replyAbortController.abort();
1114
+ await Promise.allSettled(
1115
+ tasks.map((task) => task.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT))
1116
+ );
1117
+ let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
1118
+ if (audioOutput) {
1119
+ const playbackEv = await audioOutput.waitForPlayout();
1120
+ if (audioOut == null ? void 0 : audioOut.firstFrameFut.done) {
1121
+ this.logger.info(
1122
+ { speech_id: speechHandle.id, playbackPosition: playbackEv.playbackPosition },
1123
+ "playout interrupted"
1124
+ );
1125
+ if (playbackEv.synchronizedTranscript) {
1126
+ forwardedText = playbackEv.synchronizedTranscript;
1127
+ }
1128
+ } else {
1129
+ forwardedText = "";
1130
+ }
1131
+ }
1132
+ if (forwardedText) {
1133
+ const message = ChatMessage.create({
1134
+ role: "assistant",
1135
+ content: forwardedText,
1136
+ id: llmGenData.id,
1137
+ interrupted: true,
1138
+ createdAt: replyStartedAt
1139
+ });
1140
+ chatCtx.insert(message);
1141
+ this.agent._chatCtx.insert(message);
1142
+ this.agentSession._conversationItemAdded(message);
1143
+ }
1144
+ if (this.agentSession.agentState === "speaking") {
1145
+ this.agentSession._updateAgentState("listening");
1146
+ }
1147
+ this.logger.info(
1148
+ { speech_id: speechHandle.id, message: forwardedText },
1149
+ "playout completed with interrupt"
1150
+ );
1151
+ speechHandle._markGenerationDone();
1152
+ await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1153
+ return;
1154
+ }
1155
+ if (textOut && textOut.text) {
1156
+ const message = ChatMessage.create({
1157
+ role: "assistant",
1158
+ id: llmGenData.id,
1159
+ interrupted: false,
1160
+ createdAt: replyStartedAt,
1161
+ content: textOut.text
1162
+ });
1163
+ chatCtx.insert(message);
1164
+ this.agent._chatCtx.insert(message);
1165
+ this.agentSession._conversationItemAdded(message);
1166
+ this.logger.info(
1167
+ { speech_id: speechHandle.id, message: textOut.text },
1168
+ "playout completed without interruption"
1169
+ );
1170
+ }
1171
+ if (toolOutput.output.length > 0) {
1172
+ this.agentSession._updateAgentState("thinking");
1173
+ } else if (this.agentSession.agentState === "speaking") {
1174
+ this.agentSession._updateAgentState("listening");
1175
+ }
1176
+ speechHandle._markGenerationDone();
1177
+ await executeToolsTask.result;
1178
+ if (toolOutput.output.length === 0) return;
1179
+ const { maxToolSteps } = this.agentSession.options;
1180
+ if (speechHandle.numSteps >= maxToolSteps) {
1181
+ this.logger.warn(
1182
+ { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1183
+ "maximum number of function calls steps reached"
1184
+ );
1185
+ return;
1186
+ }
1187
+ const functionToolsExecutedEvent = createFunctionToolsExecutedEvent({
1188
+ functionCalls: [],
1189
+ functionCallOutputs: []
1190
+ });
1191
+ let shouldGenerateToolReply = false;
1192
+ let newAgentTask = null;
1193
+ let ignoreTaskSwitch = false;
1194
+ for (const sanitizedOut of toolOutput.output) {
1195
+ if (sanitizedOut.toolCallOutput !== void 0) {
1196
+ functionToolsExecutedEvent.functionCalls.push(sanitizedOut.toolCall);
1197
+ functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1198
+ if (sanitizedOut.replyRequired) {
1199
+ shouldGenerateToolReply = true;
1200
+ }
1201
+ }
1202
+ if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1203
+ this.logger.error("expected to receive only one agent task from the tool executions");
1204
+ ignoreTaskSwitch = true;
1205
+ }
1206
+ newAgentTask = sanitizedOut.agentTask ?? null;
1207
+ this.logger.debug(
1208
+ {
1209
+ speechId: speechHandle.id,
1210
+ name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
1211
+ args: sanitizedOut.toolCall.args,
1212
+ output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
1213
+ isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
1214
+ },
1215
+ "Tool call execution finished"
1216
+ );
1217
+ }
1218
+ this.agentSession.emit(
1219
+ AgentSessionEventTypes.FunctionToolsExecuted,
1220
+ functionToolsExecutedEvent
1221
+ );
1222
+ let draining = this.draining;
1223
+ if (!ignoreTaskSwitch && newAgentTask !== null) {
1224
+ this.agentSession.updateAgent(newAgentTask);
1225
+ draining = true;
1226
+ }
1227
+ const toolMessages = [
1228
+ ...functionToolsExecutedEvent.functionCalls,
1229
+ ...functionToolsExecutedEvent.functionCallOutputs
1230
+ ];
1231
+ if (shouldGenerateToolReply) {
1232
+ chatCtx.insert(toolMessages);
1233
+ const handle = SpeechHandle.create({
1234
+ allowInterruptions: speechHandle.allowInterruptions,
1235
+ stepIndex: speechHandle._stepIndex + 1,
1236
+ parent: speechHandle
1237
+ });
1238
+ this.agentSession.emit(
1239
+ AgentSessionEventTypes.SpeechCreated,
1240
+ createSpeechCreatedEvent({
1241
+ userInitiated: false,
1242
+ source: "tool_response",
1243
+ speechHandle: handle
1244
+ })
1245
+ );
1246
+ const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1247
+ const toolResponseTask = this.createSpeechTask({
1248
+ task: Task.from(
1249
+ () => this.pipelineReplyTask(
1250
+ handle,
1251
+ chatCtx,
1252
+ toolCtx,
1253
+ { toolChoice: respondToolChoice },
1254
+ replyAbortController,
1255
+ instructions,
1256
+ void 0,
1257
+ toolMessages
1258
+ )
1259
+ ),
1260
+ ownedSpeechHandle: handle,
1261
+ name: "AgentActivity.pipelineReply"
1262
+ });
1263
+ toolResponseTask.finally(() => this.onPipelineReplyDone());
1264
+ this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1265
+ } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1266
+ for (const msg of toolMessages) {
1267
+ msg.createdAt = replyStartedAt;
1268
+ }
1269
+ this.agent._chatCtx.insert(toolMessages);
1270
+ this.agentSession._toolItemsAdded(toolMessages);
1271
+ }
1272
+ };
1273
+ pipelineReplyTask = async (speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) => tracer.startActiveSpan(
1274
+ async (span) => this._pipelineReplyTaskImpl({
1275
+ speechHandle,
1276
+ chatCtx,
1277
+ toolCtx,
1278
+ modelSettings,
1279
+ replyAbortController,
1280
+ instructions,
1281
+ newMessage,
1282
+ toolsMessages,
1283
+ span
1284
+ }),
1285
+ {
1286
+ name: "agent_turn",
1287
+ context: this.agentSession.rootSpanContext
1288
+ }
1289
+ );
1290
+ async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
1291
+ return tracer.startActiveSpan(
1292
+ async (span) => this._realtimeGenerationTaskImpl({
1293
+ speechHandle,
1294
+ ev,
1295
+ modelSettings,
1296
+ replyAbortController,
1297
+ span
1298
+ }),
1299
+ {
1300
+ name: "agent_turn",
1301
+ context: this.agentSession.rootSpanContext
1302
+ }
1303
+ );
1304
+ }
1305
+ async _realtimeGenerationTaskImpl({
1306
+ speechHandle,
1307
+ ev,
1308
+ modelSettings,
1309
+ replyAbortController,
1310
+ span
1311
+ }) {
1312
+ var _a, _b, _c;
1313
+ span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
1314
+ speechHandleStorage.enterWith(speechHandle);
1315
+ if (!this.realtimeSession) {
1316
+ throw new Error("realtime session is not initialized");
1317
+ }
1318
+ if (!(this.llm instanceof RealtimeModel)) {
1319
+ throw new Error("llm is not a realtime model");
1320
+ }
1321
+ span.setAttribute(traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.llm.model);
1322
+ if (this.realtimeSpans && ev.responseId) {
1323
+ this.realtimeSpans.set(ev.responseId, span);
1324
+ }
1325
+ this.logger.debug(
1326
+ { speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
1327
+ "realtime generation started"
1328
+ );
1329
+ const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
1330
+ const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
1331
+ const toolCtx = this.realtimeSession.tools;
1332
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1333
+ speechHandle._clearAuthorization();
1334
+ if (speechHandle.interrupted) {
1335
+ return;
1336
+ }
1337
+ const onFirstFrame = () => {
1338
+ this.agentSession._updateAgentState("speaking");
1339
+ };
1340
+ const readMessages = async (abortController, outputs) => {
1341
+ replyAbortController.signal.addEventListener("abort", () => abortController.abort(), {
1342
+ once: true
1343
+ });
1344
+ const forwardTasks = [];
1345
+ try {
1346
+ for await (const msg of ev.messageStream) {
1347
+ if (forwardTasks.length > 0) {
1348
+ this.logger.warn(
1349
+ "expected to receive only one message generation from the realtime API"
1350
+ );
1351
+ break;
1352
+ }
1353
+ const msgModalities = msg.modalities ? await msg.modalities : void 0;
1354
+ let ttsTextInput = null;
1355
+ let trTextInput;
1356
+ if (msgModalities && !msgModalities.includes("audio") && this.tts) {
1357
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1358
+ this.logger.warn(
1359
+ "text response received from realtime API, falling back to use a TTS model."
1360
+ );
1361
+ }
1362
+ const [_ttsTextInput, _trTextInput] = msg.textStream.tee();
1363
+ ttsTextInput = _ttsTextInput;
1364
+ trTextInput = _trTextInput;
1365
+ } else {
1366
+ trTextInput = msg.textStream;
1367
+ }
1368
+ const trNodeResult = await this.agent.transcriptionNode(trTextInput, modelSettings);
1369
+ let textOut = null;
1370
+ if (trNodeResult) {
1371
+ const [textForwardTask, _textOut] = performTextForwarding(
1372
+ trNodeResult,
1373
+ abortController,
1374
+ textOutput
1375
+ );
1376
+ forwardTasks.push(textForwardTask);
1377
+ textOut = _textOut;
1378
+ }
1379
+ let audioOut = null;
1380
+ if (audioOutput) {
1381
+ let realtimeAudioResult = null;
1382
+ if (ttsTextInput) {
1383
+ const [ttsTask, ttsStream] = performTTSInference(
1384
+ (...args) => this.agent.ttsNode(...args),
1385
+ ttsTextInput,
1386
+ modelSettings,
1387
+ abortController
1388
+ );
1389
+ tasks.push(ttsTask);
1390
+ realtimeAudioResult = ttsStream;
1391
+ } else if (msgModalities && msgModalities.includes("audio")) {
1392
+ realtimeAudioResult = await this.agent.realtimeAudioOutputNode(
1393
+ msg.audioStream,
1394
+ modelSettings
1395
+ );
1396
+ } else if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1397
+ this.logger.error(
1398
+ "Text message received from Realtime API with audio modality. This usually happens when text chat context is synced to the API. Try to add a TTS model as fallback or use text modality with TTS instead."
1399
+ );
1400
+ } else {
1401
+ this.logger.warn(
1402
+ "audio output is enabled but neither tts nor realtime audio is available"
1403
+ );
1404
+ }
1405
+ if (realtimeAudioResult) {
1406
+ const [forwardTask, _audioOut] = performAudioForwarding(
1407
+ realtimeAudioResult,
1408
+ audioOutput,
1409
+ abortController
1410
+ );
1411
+ forwardTasks.push(forwardTask);
1412
+ audioOut = _audioOut;
1413
+ audioOut.firstFrameFut.await.finally(onFirstFrame);
1414
+ }
1415
+ } else if (textOut) {
1416
+ textOut.firstTextFut.await.finally(onFirstFrame);
1417
+ }
1418
+ outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
1419
+ }
1420
+ await waitFor(forwardTasks);
1421
+ } catch (error) {
1422
+ this.logger.error(error, "error reading messages from the realtime API");
1423
+ } finally {
1424
+ await cancelAndWait(forwardTasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1425
+ }
1426
+ };
1427
+ const messageOutputs = [];
1428
+ const tasks = [
1429
+ Task.from(
1430
+ (controller) => readMessages(controller, messageOutputs),
1431
+ void 0,
1432
+ "AgentActivity.realtime_generation.read_messages"
1433
+ )
1434
+ ];
1435
+ const [toolCallStream, toolCallStreamForTracing] = ev.functionStream.tee();
1436
+ const toolCalls = [];
1437
+ const readToolStreamTask = async (controller, stream) => {
1438
+ const reader = stream.getReader();
1439
+ try {
1440
+ while (!controller.signal.aborted) {
1441
+ const { done, value } = await reader.read();
1442
+ if (done) break;
1443
+ this.logger.debug({ tool_call: value }, "received tool call from the realtime API");
1444
+ toolCalls.push(value);
1445
+ }
1446
+ } finally {
1447
+ reader.releaseLock();
1448
+ }
1449
+ };
1450
+ tasks.push(
1451
+ Task.from(
1452
+ (controller) => readToolStreamTask(controller, toolCallStreamForTracing),
1453
+ replyAbortController,
1454
+ "AgentActivity.realtime_generation.read_tool_stream"
1455
+ )
1456
+ );
1457
+ const onToolExecutionStarted = (f) => {
1458
+ speechHandle._itemAdded([f]);
1459
+ this.agent._chatCtx.items.push(f);
1460
+ this.agentSession._toolItemsAdded([f]);
1461
+ };
1462
+ const onToolExecutionCompleted = (out) => {
1463
+ if (out.toolCallOutput) {
1464
+ speechHandle._itemAdded([out.toolCallOutput]);
1465
+ }
1466
+ };
1467
+ const [executeToolsTask, toolOutput] = performToolExecutions({
1468
+ session: this.agentSession,
1469
+ speechHandle,
1470
+ toolCtx,
1471
+ toolCallStream,
1472
+ toolChoice: modelSettings.toolChoice,
1473
+ controller: replyAbortController,
1474
+ onToolExecutionStarted,
1475
+ onToolExecutionCompleted
1476
+ });
1477
+ await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
1478
+ if (audioOutput) {
1479
+ await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
1480
+ this.agentSession._updateAgentState("listening");
1481
+ }
1482
+ if (speechHandle.interrupted) {
1483
+ this.logger.debug(
1484
+ { speech_id: speechHandle.id },
1485
+ "Aborting all realtime generation tasks due to interruption"
1486
+ );
1487
+ replyAbortController.abort();
1488
+ await cancelAndWait(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1489
+ if (messageOutputs.length > 0) {
1490
+ const [msgId, textOut, audioOut, msgModalities] = messageOutputs[0];
1491
+ let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
1492
+ if (audioOutput) {
1493
+ audioOutput.clearBuffer();
1494
+ const playbackEv = await audioOutput.waitForPlayout();
1495
+ let playbackPosition = playbackEv.playbackPosition;
1496
+ if (audioOut == null ? void 0 : audioOut.firstFrameFut.done) {
1497
+ this.logger.info(
1498
+ { speech_id: speechHandle.id, playbackPosition: playbackEv.playbackPosition },
1499
+ "playout interrupted"
1500
+ );
1501
+ if (playbackEv.synchronizedTranscript) {
1502
+ forwardedText = playbackEv.synchronizedTranscript;
1503
+ }
1504
+ } else {
1505
+ forwardedText = "";
1506
+ playbackPosition = 0;
1507
+ }
1508
+ this.realtimeSession.truncate({
1509
+ messageId: msgId,
1510
+ audioEndMs: Math.floor(playbackPosition),
1511
+ modalities: msgModalities,
1512
+ audioTranscript: forwardedText
1513
+ });
1514
+ }
1515
+ if (forwardedText) {
1516
+ const message = ChatMessage.create({
1517
+ role: "assistant",
1518
+ content: forwardedText,
1519
+ id: msgId,
1520
+ interrupted: true
1521
+ });
1522
+ this.agent._chatCtx.insert(message);
1523
+ speechHandle._itemAdded([message]);
1524
+ this.agentSession._conversationItemAdded(message);
1525
+ }
1526
+ this.logger.info(
1527
+ { speech_id: speechHandle.id, message: forwardedText },
1528
+ "playout completed with interrupt"
1529
+ );
1530
+ }
1531
+ speechHandle._markGenerationDone();
1532
+ await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1533
+ return;
1534
+ }
1535
+ if (messageOutputs.length > 0) {
1536
+ const [msgId, textOut, _, __] = messageOutputs[0];
1537
+ const message = ChatMessage.create({
1538
+ role: "assistant",
1539
+ content: (textOut == null ? void 0 : textOut.text) || "",
1540
+ id: msgId,
1541
+ interrupted: false
1542
+ });
1543
+ this.agent._chatCtx.insert(message);
1544
+ speechHandle._itemAdded([message]);
1545
+ this.agentSession._conversationItemAdded(message);
1546
+ }
1547
+ speechHandle._markGenerationDone();
1548
+ toolOutput.firstToolStartedFuture.await.finally(() => {
1549
+ this.agentSession._updateAgentState("thinking");
1550
+ });
1551
+ await executeToolsTask.result;
1552
+ if (toolOutput.output.length === 0) {
1553
+ if (!speechHandle.interrupted) {
1554
+ this.agentSession._updateAgentState("listening");
1555
+ }
1556
+ return;
1557
+ }
1558
+ const { maxToolSteps } = this.agentSession.options;
1559
+ if (speechHandle.numSteps >= maxToolSteps) {
1560
+ this.logger.warn(
1561
+ { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1562
+ "maximum number of function calls steps reached"
1563
+ );
1564
+ return;
1565
+ }
1566
+ const functionToolsExecutedEvent = createFunctionToolsExecutedEvent({
1567
+ functionCalls: [],
1568
+ functionCallOutputs: []
1569
+ });
1570
+ let shouldGenerateToolReply = false;
1571
+ let newAgentTask = null;
1572
+ let ignoreTaskSwitch = false;
1573
+ for (const sanitizedOut of toolOutput.output) {
1574
+ if (sanitizedOut.toolCallOutput !== void 0) {
1575
+ functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1576
+ if (sanitizedOut.replyRequired) {
1577
+ shouldGenerateToolReply = true;
1578
+ }
1579
+ }
1580
+ if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1581
+ this.logger.error("expected to receive only one agent task from the tool executions");
1582
+ ignoreTaskSwitch = true;
1583
+ }
1584
+ newAgentTask = sanitizedOut.agentTask ?? null;
1585
+ this.logger.debug(
1586
+ {
1587
+ speechId: speechHandle.id,
1588
+ name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
1589
+ args: sanitizedOut.toolCall.args,
1590
+ output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
1591
+ isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
1592
+ },
1593
+ "Tool call execution finished"
1594
+ );
1595
+ }
1596
+ this.agentSession.emit(
1597
+ AgentSessionEventTypes.FunctionToolsExecuted,
1598
+ functionToolsExecutedEvent
1599
+ );
1600
+ let draining = this.draining;
1601
+ if (!ignoreTaskSwitch && newAgentTask !== null) {
1602
+ this.agentSession.updateAgent(newAgentTask);
1603
+ draining = true;
1604
+ }
1605
+ if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1606
+ while (this.currentSpeech || this.speechQueue.size() > 0) {
1607
+ if (this.currentSpeech && !this.currentSpeech.done() && this.currentSpeech !== speechHandle) {
1608
+ await this.currentSpeech.waitForPlayout();
1609
+ } else {
1610
+ await new Promise((resolve) => setImmediate(resolve));
1611
+ }
1612
+ }
1613
+ const chatCtx = this.realtimeSession.chatCtx.copy();
1614
+ chatCtx.items.push(...functionToolsExecutedEvent.functionCallOutputs);
1615
+ this.agentSession._toolItemsAdded(
1616
+ functionToolsExecutedEvent.functionCallOutputs
1617
+ );
1618
+ try {
1619
+ await this.realtimeSession.updateChatCtx(chatCtx);
1620
+ } catch (error) {
1621
+ this.logger.warn(
1622
+ { error },
1623
+ "failed to update chat context before generating the function calls results"
1624
+ );
1625
+ }
1626
+ }
1627
+ if (!shouldGenerateToolReply || this.llm.capabilities.autoToolReplyGeneration) {
1628
+ return;
1629
+ }
1630
+ this.realtimeSession.interrupt();
1631
+ const replySpeechHandle = SpeechHandle.create({
1632
+ allowInterruptions: speechHandle.allowInterruptions,
1633
+ stepIndex: speechHandle.numSteps + 1,
1634
+ parent: speechHandle
1635
+ });
1636
+ this.agentSession.emit(
1637
+ AgentSessionEventTypes.SpeechCreated,
1638
+ createSpeechCreatedEvent({
1639
+ userInitiated: false,
1640
+ source: "tool_response",
1641
+ speechHandle: replySpeechHandle
1642
+ })
1643
+ );
1644
+ const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1645
+ this.createSpeechTask({
1646
+ task: Task.from(
1647
+ (abortController) => this.realtimeReplyTask({
1648
+ speechHandle: replySpeechHandle,
1649
+ modelSettings: { toolChoice },
1650
+ abortController
1651
+ })
1652
+ ),
1653
+ ownedSpeechHandle: replySpeechHandle,
1654
+ name: "AgentActivity.realtime_reply"
1655
+ });
1656
+ this.scheduleSpeech(replySpeechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1657
+ }
1658
+ async realtimeReplyTask({
1659
+ speechHandle,
1660
+ modelSettings: { toolChoice },
1661
+ userInput,
1662
+ instructions,
1663
+ abortController
1664
+ }) {
1665
+ speechHandleStorage.enterWith(speechHandle);
1666
+ if (!this.realtimeSession) {
1667
+ throw new Error("realtime session is not available");
1668
+ }
1669
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1670
+ if (userInput) {
1671
+ const chatCtx = this.realtimeSession.chatCtx.copy();
1672
+ const message = chatCtx.addMessage({
1673
+ role: "user",
1674
+ content: userInput
1675
+ });
1676
+ await this.realtimeSession.updateChatCtx(chatCtx);
1677
+ this.agent._chatCtx.insert(message);
1678
+ this.agentSession._conversationItemAdded(message);
1679
+ }
1680
+ const originalToolChoice = this.toolChoice;
1681
+ if (toolChoice !== void 0) {
1682
+ this.realtimeSession.updateOptions({ toolChoice });
1683
+ }
1684
+ try {
1685
+ const generationEvent = await this.realtimeSession.generateReply(instructions);
1686
+ await this.realtimeGenerationTask(
1687
+ speechHandle,
1688
+ generationEvent,
1689
+ { toolChoice },
1690
+ abortController
1691
+ );
1692
+ } finally {
1693
+ if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
1694
+ this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
1695
+ }
1696
+ }
1697
+ }
1698
+ scheduleSpeech(speechHandle, priority, force = false) {
1699
+ if (this.draining && !force) {
1700
+ throw new Error("cannot schedule new speech, the agent is draining");
1701
+ }
1702
+ this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
1703
+ speechHandle._markScheduled();
1704
+ this.wakeupMainTask();
1705
+ }
1706
+ async drain() {
1707
+ return tracer.startActiveSpan(async (span) => this._drainImpl(span), {
1708
+ name: "drain_agent_activity",
1709
+ context: ROOT_CONTEXT
1710
+ });
1711
+ }
1712
+ async _drainImpl(span) {
1713
+ var _a;
1714
+ span.setAttribute(traceTypes.ATTR_AGENT_LABEL, this.agent.id);
1715
+ const unlock = await this.lock.lock();
1716
+ try {
1717
+ if (this._draining) return;
1718
+ this.cancelPreemptiveGeneration();
1719
+ const onExitTask = tracer.startActiveSpan(async () => this.agent.onExit(), {
1720
+ name: "on_exit",
1721
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
1722
+ });
1723
+ this.createSpeechTask({
1724
+ task: Task.from(() => onExitTask),
1725
+ name: "AgentActivity_onExit"
1726
+ });
1727
+ this.wakeupMainTask();
1728
+ this._draining = true;
1729
+ await ((_a = this._mainTask) == null ? void 0 : _a.result);
1730
+ } finally {
1731
+ unlock();
1732
+ }
1733
+ }
1734
+ async close() {
1735
+ var _a, _b, _c, _d;
1736
+ const unlock = await this.lock.lock();
1737
+ try {
1738
+ if (!this._draining) {
1739
+ this.logger.warn("task closing without draining");
1740
+ }
1741
+ this.cancelPreemptiveGeneration();
1742
+ if (this.llm instanceof LLM) {
1743
+ this.llm.off("metrics_collected", this.onMetricsCollected);
1744
+ }
1745
+ if (this.realtimeSession) {
1746
+ this.realtimeSession.off("generation_created", this.onGenerationCreated);
1747
+ this.realtimeSession.off("input_speech_started", this.onInputSpeechStarted);
1748
+ this.realtimeSession.off("input_speech_stopped", this.onInputSpeechStopped);
1749
+ this.realtimeSession.off(
1750
+ "input_audio_transcription_completed",
1751
+ this.onInputAudioTranscriptionCompleted
1752
+ );
1753
+ this.realtimeSession.off("metrics_collected", this.onMetricsCollected);
1754
+ }
1755
+ if (this.stt instanceof STT) {
1756
+ this.stt.off("metrics_collected", this.onMetricsCollected);
1757
+ }
1758
+ if (this.tts instanceof TTS) {
1759
+ this.tts.off("metrics_collected", this.onMetricsCollected);
1760
+ }
1761
+ if (this.vad instanceof VAD) {
1762
+ this.vad.off("metrics_collected", this.onMetricsCollected);
1763
+ }
1764
+ this.detachAudioInput();
1765
+ (_a = this.realtimeSpans) == null ? void 0 : _a.clear();
1766
+ await ((_b = this.realtimeSession) == null ? void 0 : _b.close());
1767
+ await ((_c = this.audioRecognition) == null ? void 0 : _c.close());
1768
+ await ((_d = this._mainTask) == null ? void 0 : _d.cancelAndWait());
1769
+ } finally {
1770
+ unlock();
1771
+ }
1772
+ }
1773
+ }
1774
+ function toOaiToolChoice(toolChoice) {
1775
+ return toolChoice !== null ? toolChoice : void 0;
1776
+ }
1777
+ export {
1778
+ AgentActivity
1779
+ };
1780
+ //# sourceMappingURL=agent_activity.js.map