@livekit/agents 0.0.0-20260120144724

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (987) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +17 -0
  3. package/dist/_exceptions.cjs +109 -0
  4. package/dist/_exceptions.cjs.map +1 -0
  5. package/dist/_exceptions.d.cts +64 -0
  6. package/dist/_exceptions.d.ts +64 -0
  7. package/dist/_exceptions.d.ts.map +1 -0
  8. package/dist/_exceptions.js +80 -0
  9. package/dist/_exceptions.js.map +1 -0
  10. package/dist/audio.cjs +170 -0
  11. package/dist/audio.cjs.map +1 -0
  12. package/dist/audio.d.cts +46 -0
  13. package/dist/audio.d.ts +46 -0
  14. package/dist/audio.d.ts.map +1 -0
  15. package/dist/audio.js +133 -0
  16. package/dist/audio.js.map +1 -0
  17. package/dist/cli.cjs +171 -0
  18. package/dist/cli.cjs.map +1 -0
  19. package/dist/cli.d.cts +14 -0
  20. package/dist/cli.d.ts +14 -0
  21. package/dist/cli.d.ts.map +1 -0
  22. package/dist/cli.js +145 -0
  23. package/dist/cli.js.map +1 -0
  24. package/dist/connection_pool.cjs +242 -0
  25. package/dist/connection_pool.cjs.map +1 -0
  26. package/dist/connection_pool.d.cts +123 -0
  27. package/dist/connection_pool.d.ts +123 -0
  28. package/dist/connection_pool.d.ts.map +1 -0
  29. package/dist/connection_pool.js +218 -0
  30. package/dist/connection_pool.js.map +1 -0
  31. package/dist/connection_pool.test.cjs +256 -0
  32. package/dist/connection_pool.test.cjs.map +1 -0
  33. package/dist/connection_pool.test.js +255 -0
  34. package/dist/connection_pool.test.js.map +1 -0
  35. package/dist/constants.cjs +44 -0
  36. package/dist/constants.cjs.map +1 -0
  37. package/dist/constants.d.cts +7 -0
  38. package/dist/constants.d.ts +7 -0
  39. package/dist/constants.d.ts.map +1 -0
  40. package/dist/constants.js +15 -0
  41. package/dist/constants.js.map +1 -0
  42. package/dist/generator.cjs +36 -0
  43. package/dist/generator.cjs.map +1 -0
  44. package/dist/generator.d.cts +23 -0
  45. package/dist/generator.d.ts +23 -0
  46. package/dist/generator.d.ts.map +1 -0
  47. package/dist/generator.js +11 -0
  48. package/dist/generator.js.map +1 -0
  49. package/dist/http_server.cjs +75 -0
  50. package/dist/http_server.cjs.map +1 -0
  51. package/dist/http_server.d.cts +20 -0
  52. package/dist/http_server.d.ts +20 -0
  53. package/dist/http_server.d.ts.map +1 -0
  54. package/dist/http_server.js +51 -0
  55. package/dist/http_server.js.map +1 -0
  56. package/dist/index.cjs +100 -0
  57. package/dist/index.cjs.map +1 -0
  58. package/dist/index.d.cts +35 -0
  59. package/dist/index.d.ts +35 -0
  60. package/dist/index.d.ts.map +1 -0
  61. package/dist/index.js +40 -0
  62. package/dist/index.js.map +1 -0
  63. package/dist/inference/api_protos.cjs +104 -0
  64. package/dist/inference/api_protos.cjs.map +1 -0
  65. package/dist/inference/api_protos.d.cts +222 -0
  66. package/dist/inference/api_protos.d.ts +222 -0
  67. package/dist/inference/api_protos.d.ts.map +1 -0
  68. package/dist/inference/api_protos.js +70 -0
  69. package/dist/inference/api_protos.js.map +1 -0
  70. package/dist/inference/index.cjs +56 -0
  71. package/dist/inference/index.cjs.map +1 -0
  72. package/dist/inference/index.d.cts +8 -0
  73. package/dist/inference/index.d.ts +8 -0
  74. package/dist/inference/index.d.ts.map +1 -0
  75. package/dist/inference/index.js +23 -0
  76. package/dist/inference/index.js.map +1 -0
  77. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +152 -0
  78. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +1 -0
  79. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +50 -0
  80. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +50 -0
  81. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +1 -0
  82. package/dist/inference/interruption/AdaptiveInterruptionDetector.js +125 -0
  83. package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +1 -0
  84. package/dist/inference/interruption/InterruptionStream.cjs +310 -0
  85. package/dist/inference/interruption/InterruptionStream.cjs.map +1 -0
  86. package/dist/inference/interruption/InterruptionStream.d.cts +57 -0
  87. package/dist/inference/interruption/InterruptionStream.d.ts +57 -0
  88. package/dist/inference/interruption/InterruptionStream.d.ts.map +1 -0
  89. package/dist/inference/interruption/InterruptionStream.js +288 -0
  90. package/dist/inference/interruption/InterruptionStream.js.map +1 -0
  91. package/dist/inference/interruption/defaults.cjs +76 -0
  92. package/dist/inference/interruption/defaults.cjs.map +1 -0
  93. package/dist/inference/interruption/defaults.d.cts +14 -0
  94. package/dist/inference/interruption/defaults.d.ts +14 -0
  95. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  96. package/dist/inference/interruption/defaults.js +42 -0
  97. package/dist/inference/interruption/defaults.js.map +1 -0
  98. package/dist/inference/interruption/errors.cjs +2 -0
  99. package/dist/inference/interruption/errors.cjs.map +1 -0
  100. package/dist/inference/interruption/errors.d.cts +2 -0
  101. package/dist/inference/interruption/errors.d.ts +2 -0
  102. package/dist/inference/interruption/errors.d.ts.map +1 -0
  103. package/dist/inference/interruption/errors.js +1 -0
  104. package/dist/inference/interruption/errors.js.map +1 -0
  105. package/dist/inference/interruption/http_transport.cjs +57 -0
  106. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  107. package/dist/inference/interruption/http_transport.d.cts +23 -0
  108. package/dist/inference/interruption/http_transport.d.ts +23 -0
  109. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  110. package/dist/inference/interruption/http_transport.js +33 -0
  111. package/dist/inference/interruption/http_transport.js.map +1 -0
  112. package/dist/inference/interruption/index.cjs +34 -0
  113. package/dist/inference/interruption/index.cjs.map +1 -0
  114. package/dist/inference/interruption/index.d.cts +5 -0
  115. package/dist/inference/interruption/index.d.ts +5 -0
  116. package/dist/inference/interruption/index.d.ts.map +1 -0
  117. package/dist/inference/interruption/index.js +7 -0
  118. package/dist/inference/interruption/index.js.map +1 -0
  119. package/dist/inference/interruption/interruption.cjs +85 -0
  120. package/dist/inference/interruption/interruption.cjs.map +1 -0
  121. package/dist/inference/interruption/interruption.d.cts +48 -0
  122. package/dist/inference/interruption/interruption.d.ts +48 -0
  123. package/dist/inference/interruption/interruption.d.ts.map +1 -0
  124. package/dist/inference/interruption/interruption.js +59 -0
  125. package/dist/inference/interruption/interruption.js.map +1 -0
  126. package/dist/inference/llm.cjs +347 -0
  127. package/dist/inference/llm.cjs.map +1 -0
  128. package/dist/inference/llm.d.cts +114 -0
  129. package/dist/inference/llm.d.ts +114 -0
  130. package/dist/inference/llm.d.ts.map +1 -0
  131. package/dist/inference/llm.js +318 -0
  132. package/dist/inference/llm.js.map +1 -0
  133. package/dist/inference/stt.cjs +371 -0
  134. package/dist/inference/stt.cjs.map +1 -0
  135. package/dist/inference/stt.d.cts +91 -0
  136. package/dist/inference/stt.d.ts +91 -0
  137. package/dist/inference/stt.d.ts.map +1 -0
  138. package/dist/inference/stt.js +350 -0
  139. package/dist/inference/stt.js.map +1 -0
  140. package/dist/inference/tts.cjs +439 -0
  141. package/dist/inference/tts.cjs.map +1 -0
  142. package/dist/inference/tts.d.cts +80 -0
  143. package/dist/inference/tts.d.ts +80 -0
  144. package/dist/inference/tts.d.ts.map +1 -0
  145. package/dist/inference/tts.js +417 -0
  146. package/dist/inference/tts.js.map +1 -0
  147. package/dist/inference/utils.cjs +89 -0
  148. package/dist/inference/utils.cjs.map +1 -0
  149. package/dist/inference/utils.d.cts +6 -0
  150. package/dist/inference/utils.d.ts +6 -0
  151. package/dist/inference/utils.d.ts.map +1 -0
  152. package/dist/inference/utils.js +63 -0
  153. package/dist/inference/utils.js.map +1 -0
  154. package/dist/inference/utils.test.cjs +20 -0
  155. package/dist/inference/utils.test.cjs.map +1 -0
  156. package/dist/inference/utils.test.js +19 -0
  157. package/dist/inference/utils.test.js.map +1 -0
  158. package/dist/inference_runner.cjs +37 -0
  159. package/dist/inference_runner.cjs.map +1 -0
  160. package/dist/inference_runner.d.cts +11 -0
  161. package/dist/inference_runner.d.ts +11 -0
  162. package/dist/inference_runner.d.ts.map +1 -0
  163. package/dist/inference_runner.js +13 -0
  164. package/dist/inference_runner.js.map +1 -0
  165. package/dist/ipc/index.cjs +23 -0
  166. package/dist/ipc/index.cjs.map +1 -0
  167. package/dist/ipc/index.d.cts +2 -0
  168. package/dist/ipc/index.d.ts +2 -0
  169. package/dist/ipc/index.d.ts.map +1 -0
  170. package/dist/ipc/index.js +2 -0
  171. package/dist/ipc/index.js.map +1 -0
  172. package/dist/ipc/inference_executor.cjs +17 -0
  173. package/dist/ipc/inference_executor.cjs.map +1 -0
  174. package/dist/ipc/inference_executor.d.cts +4 -0
  175. package/dist/ipc/inference_executor.d.ts +4 -0
  176. package/dist/ipc/inference_executor.d.ts.map +1 -0
  177. package/dist/ipc/inference_executor.js +1 -0
  178. package/dist/ipc/inference_executor.js.map +1 -0
  179. package/dist/ipc/inference_proc_executor.cjs +101 -0
  180. package/dist/ipc/inference_proc_executor.cjs.map +1 -0
  181. package/dist/ipc/inference_proc_executor.d.cts +23 -0
  182. package/dist/ipc/inference_proc_executor.d.ts +23 -0
  183. package/dist/ipc/inference_proc_executor.d.ts.map +1 -0
  184. package/dist/ipc/inference_proc_executor.js +75 -0
  185. package/dist/ipc/inference_proc_executor.js.map +1 -0
  186. package/dist/ipc/inference_proc_lazy_main.cjs +86 -0
  187. package/dist/ipc/inference_proc_lazy_main.cjs.map +1 -0
  188. package/dist/ipc/inference_proc_lazy_main.d.cts +2 -0
  189. package/dist/ipc/inference_proc_lazy_main.d.ts +2 -0
  190. package/dist/ipc/inference_proc_lazy_main.d.ts.map +1 -0
  191. package/dist/ipc/inference_proc_lazy_main.js +85 -0
  192. package/dist/ipc/inference_proc_lazy_main.js.map +1 -0
  193. package/dist/ipc/job_executor.cjs +34 -0
  194. package/dist/ipc/job_executor.cjs.map +1 -0
  195. package/dist/ipc/job_executor.d.cts +18 -0
  196. package/dist/ipc/job_executor.d.ts +18 -0
  197. package/dist/ipc/job_executor.d.ts.map +1 -0
  198. package/dist/ipc/job_executor.js +10 -0
  199. package/dist/ipc/job_executor.js.map +1 -0
  200. package/dist/ipc/job_proc_executor.cjs +115 -0
  201. package/dist/ipc/job_proc_executor.cjs.map +1 -0
  202. package/dist/ipc/job_proc_executor.d.cts +19 -0
  203. package/dist/ipc/job_proc_executor.d.ts +19 -0
  204. package/dist/ipc/job_proc_executor.d.ts.map +1 -0
  205. package/dist/ipc/job_proc_executor.js +89 -0
  206. package/dist/ipc/job_proc_executor.js.map +1 -0
  207. package/dist/ipc/job_proc_lazy_main.cjs +210 -0
  208. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -0
  209. package/dist/ipc/job_proc_lazy_main.d.cts +2 -0
  210. package/dist/ipc/job_proc_lazy_main.d.ts +2 -0
  211. package/dist/ipc/job_proc_lazy_main.d.ts.map +1 -0
  212. package/dist/ipc/job_proc_lazy_main.js +187 -0
  213. package/dist/ipc/job_proc_lazy_main.js.map +1 -0
  214. package/dist/ipc/message.cjs +17 -0
  215. package/dist/ipc/message.cjs.map +1 -0
  216. package/dist/ipc/message.d.cts +58 -0
  217. package/dist/ipc/message.d.ts +58 -0
  218. package/dist/ipc/message.d.ts.map +1 -0
  219. package/dist/ipc/message.js +1 -0
  220. package/dist/ipc/message.js.map +1 -0
  221. package/dist/ipc/proc_pool.cjs +164 -0
  222. package/dist/ipc/proc_pool.cjs.map +1 -0
  223. package/dist/ipc/proc_pool.d.cts +31 -0
  224. package/dist/ipc/proc_pool.d.ts +31 -0
  225. package/dist/ipc/proc_pool.d.ts.map +1 -0
  226. package/dist/ipc/proc_pool.js +140 -0
  227. package/dist/ipc/proc_pool.js.map +1 -0
  228. package/dist/ipc/supervised_proc.cjs +229 -0
  229. package/dist/ipc/supervised_proc.cjs.map +1 -0
  230. package/dist/ipc/supervised_proc.d.cts +32 -0
  231. package/dist/ipc/supervised_proc.d.ts +32 -0
  232. package/dist/ipc/supervised_proc.d.ts.map +1 -0
  233. package/dist/ipc/supervised_proc.js +195 -0
  234. package/dist/ipc/supervised_proc.js.map +1 -0
  235. package/dist/ipc/supervised_proc.test.cjs +145 -0
  236. package/dist/ipc/supervised_proc.test.cjs.map +1 -0
  237. package/dist/ipc/supervised_proc.test.js +122 -0
  238. package/dist/ipc/supervised_proc.test.js.map +1 -0
  239. package/dist/job.cjs +373 -0
  240. package/dist/job.cjs.map +1 -0
  241. package/dist/job.d.cts +141 -0
  242. package/dist/job.d.ts +141 -0
  243. package/dist/job.d.ts.map +1 -0
  244. package/dist/job.js +332 -0
  245. package/dist/job.js.map +1 -0
  246. package/dist/llm/chat_context.cjs +527 -0
  247. package/dist/llm/chat_context.cjs.map +1 -0
  248. package/dist/llm/chat_context.d.cts +223 -0
  249. package/dist/llm/chat_context.d.ts +223 -0
  250. package/dist/llm/chat_context.d.ts.map +1 -0
  251. package/dist/llm/chat_context.js +496 -0
  252. package/dist/llm/chat_context.js.map +1 -0
  253. package/dist/llm/chat_context.test.cjs +911 -0
  254. package/dist/llm/chat_context.test.cjs.map +1 -0
  255. package/dist/llm/chat_context.test.js +916 -0
  256. package/dist/llm/chat_context.test.js.map +1 -0
  257. package/dist/llm/fallback_adapter.cjs +278 -0
  258. package/dist/llm/fallback_adapter.cjs.map +1 -0
  259. package/dist/llm/fallback_adapter.d.cts +73 -0
  260. package/dist/llm/fallback_adapter.d.ts +73 -0
  261. package/dist/llm/fallback_adapter.d.ts.map +1 -0
  262. package/dist/llm/fallback_adapter.js +254 -0
  263. package/dist/llm/fallback_adapter.js.map +1 -0
  264. package/dist/llm/fallback_adapter.test.cjs +176 -0
  265. package/dist/llm/fallback_adapter.test.cjs.map +1 -0
  266. package/dist/llm/fallback_adapter.test.js +175 -0
  267. package/dist/llm/fallback_adapter.test.js.map +1 -0
  268. package/dist/llm/index.cjs +79 -0
  269. package/dist/llm/index.cjs.map +1 -0
  270. package/dist/llm/index.d.cts +9 -0
  271. package/dist/llm/index.d.ts +9 -0
  272. package/dist/llm/index.d.ts.map +1 -0
  273. package/dist/llm/index.js +61 -0
  274. package/dist/llm/index.js.map +1 -0
  275. package/dist/llm/llm.cjs +226 -0
  276. package/dist/llm/llm.cjs.map +1 -0
  277. package/dist/llm/llm.d.cts +94 -0
  278. package/dist/llm/llm.d.ts +94 -0
  279. package/dist/llm/llm.d.ts.map +1 -0
  280. package/dist/llm/llm.js +201 -0
  281. package/dist/llm/llm.js.map +1 -0
  282. package/dist/llm/provider_format/google.cjs +132 -0
  283. package/dist/llm/provider_format/google.cjs.map +1 -0
  284. package/dist/llm/provider_format/google.d.cts +6 -0
  285. package/dist/llm/provider_format/google.d.ts +6 -0
  286. package/dist/llm/provider_format/google.d.ts.map +1 -0
  287. package/dist/llm/provider_format/google.js +108 -0
  288. package/dist/llm/provider_format/google.js.map +1 -0
  289. package/dist/llm/provider_format/google.test.cjs +724 -0
  290. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  291. package/dist/llm/provider_format/google.test.js +728 -0
  292. package/dist/llm/provider_format/google.test.js.map +1 -0
  293. package/dist/llm/provider_format/index.cjs +40 -0
  294. package/dist/llm/provider_format/index.cjs.map +1 -0
  295. package/dist/llm/provider_format/index.d.cts +4 -0
  296. package/dist/llm/provider_format/index.d.ts +4 -0
  297. package/dist/llm/provider_format/index.d.ts.map +1 -0
  298. package/dist/llm/provider_format/index.js +16 -0
  299. package/dist/llm/provider_format/index.js.map +1 -0
  300. package/dist/llm/provider_format/openai.cjs +138 -0
  301. package/dist/llm/provider_format/openai.cjs.map +1 -0
  302. package/dist/llm/provider_format/openai.d.cts +3 -0
  303. package/dist/llm/provider_format/openai.d.ts +3 -0
  304. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  305. package/dist/llm/provider_format/openai.js +114 -0
  306. package/dist/llm/provider_format/openai.js.map +1 -0
  307. package/dist/llm/provider_format/openai.test.cjs +557 -0
  308. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  309. package/dist/llm/provider_format/openai.test.js +561 -0
  310. package/dist/llm/provider_format/openai.test.js.map +1 -0
  311. package/dist/llm/provider_format/utils.cjs +146 -0
  312. package/dist/llm/provider_format/utils.cjs.map +1 -0
  313. package/dist/llm/provider_format/utils.d.cts +38 -0
  314. package/dist/llm/provider_format/utils.d.ts +38 -0
  315. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  316. package/dist/llm/provider_format/utils.js +122 -0
  317. package/dist/llm/provider_format/utils.js.map +1 -0
  318. package/dist/llm/realtime.cjs +77 -0
  319. package/dist/llm/realtime.cjs.map +1 -0
  320. package/dist/llm/realtime.d.cts +106 -0
  321. package/dist/llm/realtime.d.ts +106 -0
  322. package/dist/llm/realtime.d.ts.map +1 -0
  323. package/dist/llm/realtime.js +52 -0
  324. package/dist/llm/realtime.js.map +1 -0
  325. package/dist/llm/remote_chat_context.cjs +112 -0
  326. package/dist/llm/remote_chat_context.cjs.map +1 -0
  327. package/dist/llm/remote_chat_context.d.cts +25 -0
  328. package/dist/llm/remote_chat_context.d.ts +25 -0
  329. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  330. package/dist/llm/remote_chat_context.js +88 -0
  331. package/dist/llm/remote_chat_context.js.map +1 -0
  332. package/dist/llm/remote_chat_context.test.cjs +225 -0
  333. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  334. package/dist/llm/remote_chat_context.test.js +224 -0
  335. package/dist/llm/remote_chat_context.test.js.map +1 -0
  336. package/dist/llm/tool_context.cjs +152 -0
  337. package/dist/llm/tool_context.cjs.map +1 -0
  338. package/dist/llm/tool_context.d.cts +153 -0
  339. package/dist/llm/tool_context.d.ts +153 -0
  340. package/dist/llm/tool_context.d.ts.map +1 -0
  341. package/dist/llm/tool_context.js +119 -0
  342. package/dist/llm/tool_context.js.map +1 -0
  343. package/dist/llm/tool_context.test.cjs +359 -0
  344. package/dist/llm/tool_context.test.cjs.map +1 -0
  345. package/dist/llm/tool_context.test.js +336 -0
  346. package/dist/llm/tool_context.test.js.map +1 -0
  347. package/dist/llm/tool_context.type.test.cjs +92 -0
  348. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  349. package/dist/llm/tool_context.type.test.js +91 -0
  350. package/dist/llm/tool_context.type.test.js.map +1 -0
  351. package/dist/llm/utils.cjs +267 -0
  352. package/dist/llm/utils.cjs.map +1 -0
  353. package/dist/llm/utils.d.cts +41 -0
  354. package/dist/llm/utils.d.ts +41 -0
  355. package/dist/llm/utils.d.ts.map +1 -0
  356. package/dist/llm/utils.js +230 -0
  357. package/dist/llm/utils.js.map +1 -0
  358. package/dist/llm/utils.test.cjs +513 -0
  359. package/dist/llm/utils.test.cjs.map +1 -0
  360. package/dist/llm/utils.test.js +490 -0
  361. package/dist/llm/utils.test.js.map +1 -0
  362. package/dist/llm/zod-utils.cjs +102 -0
  363. package/dist/llm/zod-utils.cjs.map +1 -0
  364. package/dist/llm/zod-utils.d.cts +65 -0
  365. package/dist/llm/zod-utils.d.ts +65 -0
  366. package/dist/llm/zod-utils.d.ts.map +1 -0
  367. package/dist/llm/zod-utils.js +64 -0
  368. package/dist/llm/zod-utils.js.map +1 -0
  369. package/dist/llm/zod-utils.test.cjs +472 -0
  370. package/dist/llm/zod-utils.test.cjs.map +1 -0
  371. package/dist/llm/zod-utils.test.js +455 -0
  372. package/dist/llm/zod-utils.test.js.map +1 -0
  373. package/dist/log.cjs +81 -0
  374. package/dist/log.cjs.map +1 -0
  375. package/dist/log.d.cts +20 -0
  376. package/dist/log.d.ts +20 -0
  377. package/dist/log.d.ts.map +1 -0
  378. package/dist/log.js +54 -0
  379. package/dist/log.js.map +1 -0
  380. package/dist/metrics/base.cjs +17 -0
  381. package/dist/metrics/base.cjs.map +1 -0
  382. package/dist/metrics/base.d.cts +150 -0
  383. package/dist/metrics/base.d.ts +150 -0
  384. package/dist/metrics/base.d.ts.map +1 -0
  385. package/dist/metrics/base.js +1 -0
  386. package/dist/metrics/base.js.map +1 -0
  387. package/dist/metrics/index.cjs +32 -0
  388. package/dist/metrics/index.cjs.map +1 -0
  389. package/dist/metrics/index.d.cts +4 -0
  390. package/dist/metrics/index.d.ts +4 -0
  391. package/dist/metrics/index.d.ts.map +1 -0
  392. package/dist/metrics/index.js +7 -0
  393. package/dist/metrics/index.js.map +1 -0
  394. package/dist/metrics/usage_collector.cjs +58 -0
  395. package/dist/metrics/usage_collector.cjs.map +1 -0
  396. package/dist/metrics/usage_collector.d.cts +15 -0
  397. package/dist/metrics/usage_collector.d.ts +15 -0
  398. package/dist/metrics/usage_collector.d.ts.map +1 -0
  399. package/dist/metrics/usage_collector.js +34 -0
  400. package/dist/metrics/usage_collector.js.map +1 -0
  401. package/dist/metrics/utils.cjs +74 -0
  402. package/dist/metrics/utils.cjs.map +1 -0
  403. package/dist/metrics/utils.d.cts +3 -0
  404. package/dist/metrics/utils.d.ts +3 -0
  405. package/dist/metrics/utils.d.ts.map +1 -0
  406. package/dist/metrics/utils.js +50 -0
  407. package/dist/metrics/utils.js.map +1 -0
  408. package/dist/plugin.cjs +62 -0
  409. package/dist/plugin.cjs.map +1 -0
  410. package/dist/plugin.d.cts +24 -0
  411. package/dist/plugin.d.ts +24 -0
  412. package/dist/plugin.d.ts.map +1 -0
  413. package/dist/plugin.js +37 -0
  414. package/dist/plugin.js.map +1 -0
  415. package/dist/stream/deferred_stream.cjs +106 -0
  416. package/dist/stream/deferred_stream.cjs.map +1 -0
  417. package/dist/stream/deferred_stream.d.cts +32 -0
  418. package/dist/stream/deferred_stream.d.ts +32 -0
  419. package/dist/stream/deferred_stream.d.ts.map +1 -0
  420. package/dist/stream/deferred_stream.js +81 -0
  421. package/dist/stream/deferred_stream.js.map +1 -0
  422. package/dist/stream/deferred_stream.test.cjs +527 -0
  423. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  424. package/dist/stream/deferred_stream.test.js +526 -0
  425. package/dist/stream/deferred_stream.test.js.map +1 -0
  426. package/dist/stream/identity_transform.cjs +42 -0
  427. package/dist/stream/identity_transform.cjs.map +1 -0
  428. package/dist/stream/identity_transform.d.cts +6 -0
  429. package/dist/stream/identity_transform.d.ts +6 -0
  430. package/dist/stream/identity_transform.d.ts.map +1 -0
  431. package/dist/stream/identity_transform.js +18 -0
  432. package/dist/stream/identity_transform.js.map +1 -0
  433. package/dist/stream/identity_transform.test.cjs +125 -0
  434. package/dist/stream/identity_transform.test.cjs.map +1 -0
  435. package/dist/stream/identity_transform.test.js +124 -0
  436. package/dist/stream/identity_transform.test.js.map +1 -0
  437. package/dist/stream/index.cjs +38 -0
  438. package/dist/stream/index.cjs.map +1 -0
  439. package/dist/stream/index.d.cts +5 -0
  440. package/dist/stream/index.d.ts +5 -0
  441. package/dist/stream/index.d.ts.map +1 -0
  442. package/dist/stream/index.js +11 -0
  443. package/dist/stream/index.js.map +1 -0
  444. package/dist/stream/merge_readable_streams.cjs +59 -0
  445. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  446. package/dist/stream/merge_readable_streams.d.cts +4 -0
  447. package/dist/stream/merge_readable_streams.d.ts +4 -0
  448. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  449. package/dist/stream/merge_readable_streams.js +35 -0
  450. package/dist/stream/merge_readable_streams.js.map +1 -0
  451. package/dist/stream/stream_channel.cjs +57 -0
  452. package/dist/stream/stream_channel.cjs.map +1 -0
  453. package/dist/stream/stream_channel.d.cts +11 -0
  454. package/dist/stream/stream_channel.d.ts +11 -0
  455. package/dist/stream/stream_channel.d.ts.map +1 -0
  456. package/dist/stream/stream_channel.js +33 -0
  457. package/dist/stream/stream_channel.js.map +1 -0
  458. package/dist/stream/stream_channel.test.cjs +124 -0
  459. package/dist/stream/stream_channel.test.cjs.map +1 -0
  460. package/dist/stream/stream_channel.test.js +123 -0
  461. package/dist/stream/stream_channel.test.js.map +1 -0
  462. package/dist/stt/index.cjs +38 -0
  463. package/dist/stt/index.cjs.map +1 -0
  464. package/dist/stt/index.d.cts +3 -0
  465. package/dist/stt/index.d.ts +3 -0
  466. package/dist/stt/index.d.ts.map +1 -0
  467. package/dist/stt/index.js +14 -0
  468. package/dist/stt/index.js.map +1 -0
  469. package/dist/stt/stream_adapter.cjs +115 -0
  470. package/dist/stt/stream_adapter.cjs.map +1 -0
  471. package/dist/stt/stream_adapter.d.cts +23 -0
  472. package/dist/stt/stream_adapter.d.ts +23 -0
  473. package/dist/stt/stream_adapter.d.ts.map +1 -0
  474. package/dist/stt/stream_adapter.js +90 -0
  475. package/dist/stt/stream_adapter.js.map +1 -0
  476. package/dist/stt/stt.cjs +253 -0
  477. package/dist/stt/stt.cjs.map +1 -0
  478. package/dist/stt/stt.d.cts +158 -0
  479. package/dist/stt/stt.d.ts +158 -0
  480. package/dist/stt/stt.d.ts.map +1 -0
  481. package/dist/stt/stt.js +227 -0
  482. package/dist/stt/stt.js.map +1 -0
  483. package/dist/telemetry/index.cjs +72 -0
  484. package/dist/telemetry/index.cjs.map +1 -0
  485. package/dist/telemetry/index.d.cts +7 -0
  486. package/dist/telemetry/index.d.ts +7 -0
  487. package/dist/telemetry/index.d.ts.map +1 -0
  488. package/dist/telemetry/index.js +37 -0
  489. package/dist/telemetry/index.js.map +1 -0
  490. package/dist/telemetry/logging.cjs +65 -0
  491. package/dist/telemetry/logging.cjs.map +1 -0
  492. package/dist/telemetry/logging.d.cts +21 -0
  493. package/dist/telemetry/logging.d.ts +21 -0
  494. package/dist/telemetry/logging.d.ts.map +1 -0
  495. package/dist/telemetry/logging.js +40 -0
  496. package/dist/telemetry/logging.js.map +1 -0
  497. package/dist/telemetry/otel_http_exporter.cjs +147 -0
  498. package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
  499. package/dist/telemetry/otel_http_exporter.d.cts +62 -0
  500. package/dist/telemetry/otel_http_exporter.d.ts +62 -0
  501. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
  502. package/dist/telemetry/otel_http_exporter.js +123 -0
  503. package/dist/telemetry/otel_http_exporter.js.map +1 -0
  504. package/dist/telemetry/pino_otel_transport.cjs +217 -0
  505. package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
  506. package/dist/telemetry/pino_otel_transport.d.cts +58 -0
  507. package/dist/telemetry/pino_otel_transport.d.ts +58 -0
  508. package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
  509. package/dist/telemetry/pino_otel_transport.js +189 -0
  510. package/dist/telemetry/pino_otel_transport.js.map +1 -0
  511. package/dist/telemetry/trace_types.cjs +206 -0
  512. package/dist/telemetry/trace_types.cjs.map +1 -0
  513. package/dist/telemetry/trace_types.d.cts +61 -0
  514. package/dist/telemetry/trace_types.d.ts +61 -0
  515. package/dist/telemetry/trace_types.d.ts.map +1 -0
  516. package/dist/telemetry/trace_types.js +123 -0
  517. package/dist/telemetry/trace_types.js.map +1 -0
  518. package/dist/telemetry/traces.cjs +444 -0
  519. package/dist/telemetry/traces.cjs.map +1 -0
  520. package/dist/telemetry/traces.d.cts +114 -0
  521. package/dist/telemetry/traces.d.ts +114 -0
  522. package/dist/telemetry/traces.d.ts.map +1 -0
  523. package/dist/telemetry/traces.js +409 -0
  524. package/dist/telemetry/traces.js.map +1 -0
  525. package/dist/telemetry/utils.cjs +86 -0
  526. package/dist/telemetry/utils.cjs.map +1 -0
  527. package/dist/telemetry/utils.d.cts +5 -0
  528. package/dist/telemetry/utils.d.ts +5 -0
  529. package/dist/telemetry/utils.d.ts.map +1 -0
  530. package/dist/telemetry/utils.js +51 -0
  531. package/dist/telemetry/utils.js.map +1 -0
  532. package/dist/tokenize/basic/basic.cjs +105 -0
  533. package/dist/tokenize/basic/basic.cjs.map +1 -0
  534. package/dist/tokenize/basic/basic.d.cts +24 -0
  535. package/dist/tokenize/basic/basic.d.ts +24 -0
  536. package/dist/tokenize/basic/basic.d.ts.map +1 -0
  537. package/dist/tokenize/basic/basic.js +67 -0
  538. package/dist/tokenize/basic/basic.js.map +1 -0
  539. package/dist/tokenize/basic/hyphenator.cjs +425 -0
  540. package/dist/tokenize/basic/hyphenator.cjs.map +1 -0
  541. package/dist/tokenize/basic/hyphenator.d.cts +17 -0
  542. package/dist/tokenize/basic/hyphenator.d.ts +17 -0
  543. package/dist/tokenize/basic/hyphenator.d.ts.map +1 -0
  544. package/dist/tokenize/basic/hyphenator.js +401 -0
  545. package/dist/tokenize/basic/hyphenator.js.map +1 -0
  546. package/dist/tokenize/basic/index.cjs +37 -0
  547. package/dist/tokenize/basic/index.cjs.map +1 -0
  548. package/dist/tokenize/basic/index.d.cts +2 -0
  549. package/dist/tokenize/basic/index.d.ts +2 -0
  550. package/dist/tokenize/basic/index.d.ts.map +1 -0
  551. package/dist/tokenize/basic/index.js +15 -0
  552. package/dist/tokenize/basic/index.js.map +1 -0
  553. package/dist/tokenize/basic/paragraph.cjs +57 -0
  554. package/dist/tokenize/basic/paragraph.cjs.map +1 -0
  555. package/dist/tokenize/basic/paragraph.d.cts +5 -0
  556. package/dist/tokenize/basic/paragraph.d.ts +5 -0
  557. package/dist/tokenize/basic/paragraph.d.ts.map +1 -0
  558. package/dist/tokenize/basic/paragraph.js +33 -0
  559. package/dist/tokenize/basic/paragraph.js.map +1 -0
  560. package/dist/tokenize/basic/sentence.cjs +97 -0
  561. package/dist/tokenize/basic/sentence.cjs.map +1 -0
  562. package/dist/tokenize/basic/sentence.d.cts +5 -0
  563. package/dist/tokenize/basic/sentence.d.ts +5 -0
  564. package/dist/tokenize/basic/sentence.d.ts.map +1 -0
  565. package/dist/tokenize/basic/sentence.js +73 -0
  566. package/dist/tokenize/basic/sentence.js.map +1 -0
  567. package/dist/tokenize/basic/word.cjs +44 -0
  568. package/dist/tokenize/basic/word.cjs.map +1 -0
  569. package/dist/tokenize/basic/word.d.cts +5 -0
  570. package/dist/tokenize/basic/word.d.ts +5 -0
  571. package/dist/tokenize/basic/word.d.ts.map +1 -0
  572. package/dist/tokenize/basic/word.js +20 -0
  573. package/dist/tokenize/basic/word.js.map +1 -0
  574. package/dist/tokenize/index.cjs +55 -0
  575. package/dist/tokenize/index.cjs.map +1 -0
  576. package/dist/tokenize/index.d.cts +5 -0
  577. package/dist/tokenize/index.d.ts +5 -0
  578. package/dist/tokenize/index.d.ts.map +1 -0
  579. package/dist/tokenize/index.js +19 -0
  580. package/dist/tokenize/index.js.map +1 -0
  581. package/dist/tokenize/token_stream.cjs +168 -0
  582. package/dist/tokenize/token_stream.cjs.map +1 -0
  583. package/dist/tokenize/token_stream.d.cts +40 -0
  584. package/dist/tokenize/token_stream.d.ts +40 -0
  585. package/dist/tokenize/token_stream.d.ts.map +1 -0
  586. package/dist/tokenize/token_stream.js +142 -0
  587. package/dist/tokenize/token_stream.js.map +1 -0
  588. package/dist/tokenize/tokenizer.cjs +184 -0
  589. package/dist/tokenize/tokenizer.cjs.map +1 -0
  590. package/dist/tokenize/tokenizer.d.cts +55 -0
  591. package/dist/tokenize/tokenizer.d.ts +55 -0
  592. package/dist/tokenize/tokenizer.d.ts.map +1 -0
  593. package/dist/tokenize/tokenizer.js +156 -0
  594. package/dist/tokenize/tokenizer.js.map +1 -0
  595. package/dist/tokenize/tokenizer.test.cjs +220 -0
  596. package/dist/tokenize/tokenizer.test.cjs.map +1 -0
  597. package/dist/tokenize/tokenizer.test.js +219 -0
  598. package/dist/tokenize/tokenizer.test.js.map +1 -0
  599. package/dist/transcription.cjs +247 -0
  600. package/dist/transcription.cjs.map +1 -0
  601. package/dist/transcription.d.cts +31 -0
  602. package/dist/transcription.d.ts +31 -0
  603. package/dist/transcription.d.ts.map +1 -0
  604. package/dist/transcription.js +222 -0
  605. package/dist/transcription.js.map +1 -0
  606. package/dist/tts/index.cjs +38 -0
  607. package/dist/tts/index.cjs.map +1 -0
  608. package/dist/tts/index.d.cts +3 -0
  609. package/dist/tts/index.d.ts +3 -0
  610. package/dist/tts/index.d.ts.map +1 -0
  611. package/dist/tts/index.js +14 -0
  612. package/dist/tts/index.js.map +1 -0
  613. package/dist/tts/stream_adapter.cjs +105 -0
  614. package/dist/tts/stream_adapter.cjs.map +1 -0
  615. package/dist/tts/stream_adapter.d.cts +20 -0
  616. package/dist/tts/stream_adapter.d.ts +20 -0
  617. package/dist/tts/stream_adapter.d.ts.map +1 -0
  618. package/dist/tts/stream_adapter.js +80 -0
  619. package/dist/tts/stream_adapter.js.map +1 -0
  620. package/dist/tts/tts.cjs +431 -0
  621. package/dist/tts/tts.cjs.map +1 -0
  622. package/dist/tts/tts.d.cts +161 -0
  623. package/dist/tts/tts.d.ts +161 -0
  624. package/dist/tts/tts.d.ts.map +1 -0
  625. package/dist/tts/tts.js +405 -0
  626. package/dist/tts/tts.js.map +1 -0
  627. package/dist/types.cjs +49 -0
  628. package/dist/types.cjs.map +1 -0
  629. package/dist/types.d.cts +44 -0
  630. package/dist/types.d.ts +44 -0
  631. package/dist/types.d.ts.map +1 -0
  632. package/dist/types.js +23 -0
  633. package/dist/types.js.map +1 -0
  634. package/dist/utils/ws_transport.cjs +51 -0
  635. package/dist/utils/ws_transport.cjs.map +1 -0
  636. package/dist/utils/ws_transport.d.cts +9 -0
  637. package/dist/utils/ws_transport.d.ts +9 -0
  638. package/dist/utils/ws_transport.d.ts.map +1 -0
  639. package/dist/utils/ws_transport.js +17 -0
  640. package/dist/utils/ws_transport.js.map +1 -0
  641. package/dist/utils/ws_transport.test.cjs +212 -0
  642. package/dist/utils/ws_transport.test.cjs.map +1 -0
  643. package/dist/utils/ws_transport.test.js +211 -0
  644. package/dist/utils/ws_transport.test.js.map +1 -0
  645. package/dist/utils.cjs +669 -0
  646. package/dist/utils.cjs.map +1 -0
  647. package/dist/utils.d.cts +244 -0
  648. package/dist/utils.d.ts +244 -0
  649. package/dist/utils.d.ts.map +1 -0
  650. package/dist/utils.js +617 -0
  651. package/dist/utils.js.map +1 -0
  652. package/dist/utils.test.cjs +492 -0
  653. package/dist/utils.test.cjs.map +1 -0
  654. package/dist/utils.test.js +491 -0
  655. package/dist/utils.test.js.map +1 -0
  656. package/dist/vad.cjs +211 -0
  657. package/dist/vad.cjs.map +1 -0
  658. package/dist/vad.d.cts +105 -0
  659. package/dist/vad.d.ts +105 -0
  660. package/dist/vad.d.ts.map +1 -0
  661. package/dist/vad.js +185 -0
  662. package/dist/vad.js.map +1 -0
  663. package/dist/version.cjs +29 -0
  664. package/dist/version.cjs.map +1 -0
  665. package/dist/version.d.cts +2 -0
  666. package/dist/version.d.ts +2 -0
  667. package/dist/version.d.ts.map +1 -0
  668. package/dist/version.js +5 -0
  669. package/dist/version.js.map +1 -0
  670. package/dist/voice/agent.cjs +308 -0
  671. package/dist/voice/agent.cjs.map +1 -0
  672. package/dist/voice/agent.d.cts +83 -0
  673. package/dist/voice/agent.d.ts +83 -0
  674. package/dist/voice/agent.d.ts.map +1 -0
  675. package/dist/voice/agent.js +287 -0
  676. package/dist/voice/agent.js.map +1 -0
  677. package/dist/voice/agent.test.cjs +61 -0
  678. package/dist/voice/agent.test.cjs.map +1 -0
  679. package/dist/voice/agent.test.js +60 -0
  680. package/dist/voice/agent.test.js.map +1 -0
  681. package/dist/voice/agent_activity.cjs +1784 -0
  682. package/dist/voice/agent_activity.cjs.map +1 -0
  683. package/dist/voice/agent_activity.d.cts +116 -0
  684. package/dist/voice/agent_activity.d.ts +116 -0
  685. package/dist/voice/agent_activity.d.ts.map +1 -0
  686. package/dist/voice/agent_activity.js +1780 -0
  687. package/dist/voice/agent_activity.js.map +1 -0
  688. package/dist/voice/agent_session.cjs +592 -0
  689. package/dist/voice/agent_session.cjs.map +1 -0
  690. package/dist/voice/agent_session.d.cts +165 -0
  691. package/dist/voice/agent_session.d.ts +165 -0
  692. package/dist/voice/agent_session.d.ts.map +1 -0
  693. package/dist/voice/agent_session.js +582 -0
  694. package/dist/voice/agent_session.js.map +1 -0
  695. package/dist/voice/audio_recognition.cjs +668 -0
  696. package/dist/voice/audio_recognition.cjs.map +1 -0
  697. package/dist/voice/audio_recognition.d.cts +127 -0
  698. package/dist/voice/audio_recognition.d.ts +127 -0
  699. package/dist/voice/audio_recognition.d.ts.map +1 -0
  700. package/dist/voice/audio_recognition.js +647 -0
  701. package/dist/voice/audio_recognition.js.map +1 -0
  702. package/dist/voice/avatar/datastream_io.cjs +204 -0
  703. package/dist/voice/avatar/datastream_io.cjs.map +1 -0
  704. package/dist/voice/avatar/datastream_io.d.cts +37 -0
  705. package/dist/voice/avatar/datastream_io.d.ts +37 -0
  706. package/dist/voice/avatar/datastream_io.d.ts.map +1 -0
  707. package/dist/voice/avatar/datastream_io.js +188 -0
  708. package/dist/voice/avatar/datastream_io.js.map +1 -0
  709. package/dist/voice/avatar/index.cjs +23 -0
  710. package/dist/voice/avatar/index.cjs.map +1 -0
  711. package/dist/voice/avatar/index.d.cts +2 -0
  712. package/dist/voice/avatar/index.d.ts +2 -0
  713. package/dist/voice/avatar/index.d.ts.map +1 -0
  714. package/dist/voice/avatar/index.js +2 -0
  715. package/dist/voice/avatar/index.js.map +1 -0
  716. package/dist/voice/background_audio.cjs +366 -0
  717. package/dist/voice/background_audio.cjs.map +1 -0
  718. package/dist/voice/background_audio.d.cts +121 -0
  719. package/dist/voice/background_audio.d.ts +121 -0
  720. package/dist/voice/background_audio.d.ts.map +1 -0
  721. package/dist/voice/background_audio.js +342 -0
  722. package/dist/voice/background_audio.js.map +1 -0
  723. package/dist/voice/events.cjs +147 -0
  724. package/dist/voice/events.cjs.map +1 -0
  725. package/dist/voice/events.d.cts +127 -0
  726. package/dist/voice/events.d.ts +127 -0
  727. package/dist/voice/events.d.ts.map +1 -0
  728. package/dist/voice/events.js +112 -0
  729. package/dist/voice/events.js.map +1 -0
  730. package/dist/voice/generation.cjs +747 -0
  731. package/dist/voice/generation.cjs.map +1 -0
  732. package/dist/voice/generation.d.cts +116 -0
  733. package/dist/voice/generation.d.ts +116 -0
  734. package/dist/voice/generation.d.ts.map +1 -0
  735. package/dist/voice/generation.js +719 -0
  736. package/dist/voice/generation.js.map +1 -0
  737. package/dist/voice/generation_tools.test.cjs +236 -0
  738. package/dist/voice/generation_tools.test.cjs.map +1 -0
  739. package/dist/voice/generation_tools.test.js +235 -0
  740. package/dist/voice/generation_tools.test.js.map +1 -0
  741. package/dist/voice/index.cjs +49 -0
  742. package/dist/voice/index.cjs.map +1 -0
  743. package/dist/voice/index.d.cts +10 -0
  744. package/dist/voice/index.d.ts +10 -0
  745. package/dist/voice/index.d.ts.map +1 -0
  746. package/dist/voice/index.js +16 -0
  747. package/dist/voice/index.js.map +1 -0
  748. package/dist/voice/interruption_detection.test.cjs +114 -0
  749. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  750. package/dist/voice/interruption_detection.test.js +113 -0
  751. package/dist/voice/interruption_detection.test.js.map +1 -0
  752. package/dist/voice/io.cjs +270 -0
  753. package/dist/voice/io.cjs.map +1 -0
  754. package/dist/voice/io.d.cts +126 -0
  755. package/dist/voice/io.d.ts +126 -0
  756. package/dist/voice/io.d.ts.map +1 -0
  757. package/dist/voice/io.js +242 -0
  758. package/dist/voice/io.js.map +1 -0
  759. package/dist/voice/recorder_io/index.cjs +23 -0
  760. package/dist/voice/recorder_io/index.cjs.map +1 -0
  761. package/dist/voice/recorder_io/index.d.cts +2 -0
  762. package/dist/voice/recorder_io/index.d.ts +2 -0
  763. package/dist/voice/recorder_io/index.d.ts.map +1 -0
  764. package/dist/voice/recorder_io/index.js +2 -0
  765. package/dist/voice/recorder_io/index.js.map +1 -0
  766. package/dist/voice/recorder_io/recorder_io.cjs +542 -0
  767. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
  768. package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
  769. package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
  770. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
  771. package/dist/voice/recorder_io/recorder_io.js +508 -0
  772. package/dist/voice/recorder_io/recorder_io.js.map +1 -0
  773. package/dist/voice/report.cjs +75 -0
  774. package/dist/voice/report.cjs.map +1 -0
  775. package/dist/voice/report.d.cts +42 -0
  776. package/dist/voice/report.d.ts +42 -0
  777. package/dist/voice/report.d.ts.map +1 -0
  778. package/dist/voice/report.js +50 -0
  779. package/dist/voice/report.js.map +1 -0
  780. package/dist/voice/room_io/_input.cjs +133 -0
  781. package/dist/voice/room_io/_input.cjs.map +1 -0
  782. package/dist/voice/room_io/_input.d.cts +24 -0
  783. package/dist/voice/room_io/_input.d.ts +24 -0
  784. package/dist/voice/room_io/_input.d.ts.map +1 -0
  785. package/dist/voice/room_io/_input.js +114 -0
  786. package/dist/voice/room_io/_input.js.map +1 -0
  787. package/dist/voice/room_io/_output.cjs +359 -0
  788. package/dist/voice/room_io/_output.cjs.map +1 -0
  789. package/dist/voice/room_io/_output.d.cts +77 -0
  790. package/dist/voice/room_io/_output.d.ts +77 -0
  791. package/dist/voice/room_io/_output.d.ts.map +1 -0
  792. package/dist/voice/room_io/_output.js +343 -0
  793. package/dist/voice/room_io/_output.js.map +1 -0
  794. package/dist/voice/room_io/index.cjs +25 -0
  795. package/dist/voice/room_io/index.cjs.map +1 -0
  796. package/dist/voice/room_io/index.d.cts +3 -0
  797. package/dist/voice/room_io/index.d.ts +3 -0
  798. package/dist/voice/room_io/index.d.ts.map +1 -0
  799. package/dist/voice/room_io/index.js +3 -0
  800. package/dist/voice/room_io/index.js.map +1 -0
  801. package/dist/voice/room_io/room_io.cjs +373 -0
  802. package/dist/voice/room_io/room_io.cjs.map +1 -0
  803. package/dist/voice/room_io/room_io.d.cts +94 -0
  804. package/dist/voice/room_io/room_io.d.ts +94 -0
  805. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  806. package/dist/voice/room_io/room_io.js +364 -0
  807. package/dist/voice/room_io/room_io.js.map +1 -0
  808. package/dist/voice/run_context.cjs +51 -0
  809. package/dist/voice/run_context.cjs.map +1 -0
  810. package/dist/voice/run_context.d.cts +22 -0
  811. package/dist/voice/run_context.d.ts +22 -0
  812. package/dist/voice/run_context.d.ts.map +1 -0
  813. package/dist/voice/run_context.js +27 -0
  814. package/dist/voice/run_context.js.map +1 -0
  815. package/dist/voice/speech_handle.cjs +228 -0
  816. package/dist/voice/speech_handle.cjs.map +1 -0
  817. package/dist/voice/speech_handle.d.cts +97 -0
  818. package/dist/voice/speech_handle.d.ts +97 -0
  819. package/dist/voice/speech_handle.d.ts.map +1 -0
  820. package/dist/voice/speech_handle.js +204 -0
  821. package/dist/voice/speech_handle.js.map +1 -0
  822. package/dist/voice/transcription/_utils.cjs +45 -0
  823. package/dist/voice/transcription/_utils.cjs.map +1 -0
  824. package/dist/voice/transcription/_utils.d.cts +3 -0
  825. package/dist/voice/transcription/_utils.d.ts +3 -0
  826. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  827. package/dist/voice/transcription/_utils.js +21 -0
  828. package/dist/voice/transcription/_utils.js.map +1 -0
  829. package/dist/voice/transcription/index.cjs +23 -0
  830. package/dist/voice/transcription/index.cjs.map +1 -0
  831. package/dist/voice/transcription/index.d.cts +2 -0
  832. package/dist/voice/transcription/index.d.ts +2 -0
  833. package/dist/voice/transcription/index.d.ts.map +1 -0
  834. package/dist/voice/transcription/index.js +2 -0
  835. package/dist/voice/transcription/index.js.map +1 -0
  836. package/dist/voice/transcription/synchronizer.cjs +379 -0
  837. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  838. package/dist/voice/transcription/synchronizer.d.cts +87 -0
  839. package/dist/voice/transcription/synchronizer.d.ts +87 -0
  840. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  841. package/dist/voice/transcription/synchronizer.js +354 -0
  842. package/dist/voice/transcription/synchronizer.js.map +1 -0
  843. package/dist/worker.cjs +680 -0
  844. package/dist/worker.cjs.map +1 -0
  845. package/dist/worker.d.cts +119 -0
  846. package/dist/worker.d.ts +119 -0
  847. package/dist/worker.d.ts.map +1 -0
  848. package/dist/worker.js +645 -0
  849. package/dist/worker.js.map +1 -0
  850. package/package.json +86 -0
  851. package/resources/NOTICE +2 -0
  852. package/resources/keyboard-typing.ogg +0 -0
  853. package/resources/keyboard-typing2.ogg +0 -0
  854. package/resources/office-ambience.ogg +0 -0
  855. package/src/_exceptions.ts +137 -0
  856. package/src/audio.ts +205 -0
  857. package/src/cli.ts +224 -0
  858. package/src/connection_pool.test.ts +346 -0
  859. package/src/connection_pool.ts +307 -0
  860. package/src/constants.ts +9 -0
  861. package/src/generator.ts +38 -0
  862. package/src/http_server.ts +64 -0
  863. package/src/index.ts +41 -0
  864. package/src/inference/api_protos.ts +82 -0
  865. package/src/inference/index.ts +32 -0
  866. package/src/inference/interruption/AdaptiveInterruptionDetector.ts +166 -0
  867. package/src/inference/interruption/InterruptionStream.ts +397 -0
  868. package/src/inference/interruption/defaults.ts +33 -0
  869. package/src/inference/interruption/errors.ts +0 -0
  870. package/src/inference/interruption/http_transport.ts +61 -0
  871. package/src/inference/interruption/index.ts +4 -0
  872. package/src/inference/interruption/interruption.ts +88 -0
  873. package/src/inference/llm.ts +532 -0
  874. package/src/inference/stt.ts +524 -0
  875. package/src/inference/tts.ts +574 -0
  876. package/src/inference/utils.test.ts +31 -0
  877. package/src/inference/utils.ts +81 -0
  878. package/src/inference_runner.ts +19 -0
  879. package/src/ipc/index.ts +5 -0
  880. package/src/ipc/inference_executor.ts +7 -0
  881. package/src/ipc/inference_proc_executor.ts +101 -0
  882. package/src/ipc/inference_proc_lazy_main.ts +115 -0
  883. package/src/ipc/job_executor.ts +23 -0
  884. package/src/ipc/job_proc_executor.ts +122 -0
  885. package/src/ipc/job_proc_lazy_main.ts +247 -0
  886. package/src/ipc/message.ts +52 -0
  887. package/src/ipc/proc_pool.ts +164 -0
  888. package/src/ipc/supervised_proc.test.ts +153 -0
  889. package/src/ipc/supervised_proc.ts +242 -0
  890. package/src/job.ts +461 -0
  891. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  892. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  893. package/src/llm/__snapshots__/zod-utils.test.ts.snap +559 -0
  894. package/src/llm/chat_context.test.ts +1057 -0
  895. package/src/llm/chat_context.ts +759 -0
  896. package/src/llm/fallback_adapter.test.ts +238 -0
  897. package/src/llm/fallback_adapter.ts +391 -0
  898. package/src/llm/index.ts +74 -0
  899. package/src/llm/llm.ts +303 -0
  900. package/src/llm/provider_format/google.test.ts +843 -0
  901. package/src/llm/provider_format/google.ts +134 -0
  902. package/src/llm/provider_format/index.ts +23 -0
  903. package/src/llm/provider_format/openai.test.ts +675 -0
  904. package/src/llm/provider_format/openai.ts +146 -0
  905. package/src/llm/provider_format/utils.ts +187 -0
  906. package/src/llm/realtime.ts +163 -0
  907. package/src/llm/remote_chat_context.test.ts +290 -0
  908. package/src/llm/remote_chat_context.ts +114 -0
  909. package/src/llm/tool_context.test.ts +407 -0
  910. package/src/llm/tool_context.ts +343 -0
  911. package/src/llm/tool_context.type.test.ts +115 -0
  912. package/src/llm/utils.test.ts +670 -0
  913. package/src/llm/utils.ts +336 -0
  914. package/src/llm/zod-utils.test.ts +577 -0
  915. package/src/llm/zod-utils.ts +153 -0
  916. package/src/log.ts +83 -0
  917. package/src/metrics/base.ts +168 -0
  918. package/src/metrics/index.ts +15 -0
  919. package/src/metrics/usage_collector.ts +46 -0
  920. package/src/metrics/utils.ts +64 -0
  921. package/src/plugin.ts +46 -0
  922. package/src/stream/deferred_stream.test.ts +755 -0
  923. package/src/stream/deferred_stream.ts +127 -0
  924. package/src/stream/identity_transform.test.ts +179 -0
  925. package/src/stream/identity_transform.ts +18 -0
  926. package/src/stream/index.ts +7 -0
  927. package/src/stream/merge_readable_streams.ts +40 -0
  928. package/src/stream/stream_channel.test.ts +166 -0
  929. package/src/stream/stream_channel.ts +44 -0
  930. package/src/stt/index.ts +15 -0
  931. package/src/stt/stream_adapter.ts +107 -0
  932. package/src/stt/stt.ts +374 -0
  933. package/src/telemetry/index.ts +28 -0
  934. package/src/telemetry/logging.ts +55 -0
  935. package/src/telemetry/otel_http_exporter.ts +195 -0
  936. package/src/telemetry/pino_otel_transport.ts +265 -0
  937. package/src/telemetry/trace_types.ts +95 -0
  938. package/src/telemetry/traces.ts +612 -0
  939. package/src/telemetry/utils.ts +61 -0
  940. package/src/tokenize/basic/basic.ts +83 -0
  941. package/src/tokenize/basic/hyphenator.ts +434 -0
  942. package/src/tokenize/basic/index.ts +11 -0
  943. package/src/tokenize/basic/paragraph.ts +43 -0
  944. package/src/tokenize/basic/sentence.ts +89 -0
  945. package/src/tokenize/basic/word.ts +27 -0
  946. package/src/tokenize/index.ts +16 -0
  947. package/src/tokenize/token_stream.ts +180 -0
  948. package/src/tokenize/tokenizer.test.ts +255 -0
  949. package/src/tokenize/tokenizer.ts +152 -0
  950. package/src/transcription.ts +307 -0
  951. package/src/tts/index.ts +12 -0
  952. package/src/tts/stream_adapter.ts +110 -0
  953. package/src/tts/tts.ts +598 -0
  954. package/src/types.ts +66 -0
  955. package/src/utils/ws_transport.test.ts +282 -0
  956. package/src/utils/ws_transport.ts +22 -0
  957. package/src/utils.test.ts +651 -0
  958. package/src/utils.ts +871 -0
  959. package/src/vad.ts +262 -0
  960. package/src/version.ts +5 -0
  961. package/src/voice/agent.test.ts +80 -0
  962. package/src/voice/agent.ts +418 -0
  963. package/src/voice/agent_activity.ts +2375 -0
  964. package/src/voice/agent_session.ts +866 -0
  965. package/src/voice/audio_recognition.ts +877 -0
  966. package/src/voice/avatar/datastream_io.ts +247 -0
  967. package/src/voice/avatar/index.ts +4 -0
  968. package/src/voice/background_audio.ts +491 -0
  969. package/src/voice/events.ts +261 -0
  970. package/src/voice/generation.ts +946 -0
  971. package/src/voice/generation_tools.test.ts +268 -0
  972. package/src/voice/index.ts +12 -0
  973. package/src/voice/interruption_detection.test.ts +151 -0
  974. package/src/voice/io.ts +347 -0
  975. package/src/voice/recorder_io/index.ts +4 -0
  976. package/src/voice/recorder_io/recorder_io.ts +690 -0
  977. package/src/voice/report.ts +100 -0
  978. package/src/voice/room_io/_input.ts +162 -0
  979. package/src/voice/room_io/_output.ts +439 -0
  980. package/src/voice/room_io/index.ts +5 -0
  981. package/src/voice/room_io/room_io.ts +518 -0
  982. package/src/voice/run_context.ts +34 -0
  983. package/src/voice/speech_handle.ts +250 -0
  984. package/src/voice/transcription/_utils.ts +25 -0
  985. package/src/voice/transcription/index.ts +4 -0
  986. package/src/voice/transcription/synchronizer.ts +477 -0
  987. package/src/worker.ts +798 -0
@@ -0,0 +1,1784 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+ var agent_activity_exports = {};
20
+ __export(agent_activity_exports, {
21
+ AgentActivity: () => AgentActivity
22
+ });
23
+ module.exports = __toCommonJS(agent_activity_exports);
24
+ var import_mutex = require("@livekit/mutex");
25
+ var import_api = require("@opentelemetry/api");
26
+ var import_heap_js = require("heap-js");
27
+ var import_node_async_hooks = require("node:async_hooks");
28
+ var import_web = require("node:stream/web");
29
+ var import_chat_context = require("../llm/chat_context.cjs");
30
+ var import_llm = require("../llm/index.cjs");
31
+ var import_tool_context = require("../llm/tool_context.cjs");
32
+ var import_log = require("../log.cjs");
33
+ var import_deferred_stream = require("../stream/deferred_stream.cjs");
34
+ var import_stt = require("../stt/stt.cjs");
35
+ var import_telemetry = require("../telemetry/index.cjs");
36
+ var import_word = require("../tokenize/basic/word.cjs");
37
+ var import_tts = require("../tts/tts.cjs");
38
+ var import_utils = require("../utils.cjs");
39
+ var import_interruption = require("../inference/interruption/interruption.cjs");
40
+ var import_vad = require("../vad.cjs");
41
+ var import_agent = require("./agent.cjs");
42
+ var import_agent_session = require("./agent_session.cjs");
43
+ var import_audio_recognition = require("./audio_recognition.cjs");
44
+ var import_events = require("./events.cjs");
45
+ var import_generation = require("./generation.cjs");
46
+ var import_speech_handle = require("./speech_handle.cjs");
47
+ const speechHandleStorage = new import_node_async_hooks.AsyncLocalStorage();
48
+ class AgentActivity {
49
+ static REPLY_TASK_CANCEL_TIMEOUT = 5e3;
50
+ started = false;
51
+ audioRecognition;
52
+ realtimeSession;
53
+ realtimeSpans;
54
+ // Maps response_id to OTEL span for metrics recording
55
+ turnDetectionMode;
56
+ logger = (0, import_log.log)();
57
+ _draining = false;
58
+ _currentSpeech;
59
+ speechQueue;
60
+ // [priority, timestamp, speechHandle]
61
+ q_updated;
62
+ speechTasks = /* @__PURE__ */ new Set();
63
+ lock = new import_mutex.Mutex();
64
+ audioStream = new import_deferred_stream.DeferredReadableStream();
65
+ // default to null as None, which maps to the default provider tool choice value
66
+ toolChoice = null;
67
+ _preemptiveGeneration;
68
+ agent;
69
+ agentSession;
70
+ /** @internal */
71
+ _mainTask;
72
+ _userTurnCompletedTask;
73
+ /**
74
+ * Notify that agent started speaking.
75
+ * This enables interruption detection in AudioRecognition.
76
+ * @internal
77
+ */
78
+ notifyAgentSpeechStarted() {
79
+ var _a;
80
+ (_a = this.audioRecognition) == null ? void 0 : _a.onStartOfAgentSpeech();
81
+ }
82
+ /**
83
+ * Notify that agent stopped speaking.
84
+ * This disables interruption detection in AudioRecognition.
85
+ * @internal
86
+ */
87
+ notifyAgentSpeechEnded() {
88
+ var _a;
89
+ (_a = this.audioRecognition) == null ? void 0 : _a.onEndOfAgentSpeech();
90
+ }
91
+ constructor(agent, agentSession) {
92
+ this.agent = agent;
93
+ this.agentSession = agentSession;
94
+ this.speechQueue = new import_heap_js.Heap(([p1, t1, _], [p2, t2, __]) => {
95
+ return p1 === p2 ? t1 - t2 : p2 - p1;
96
+ });
97
+ this.q_updated = new import_utils.Future();
98
+ this.turnDetectionMode = typeof this.turnDetection === "string" ? this.turnDetection : void 0;
99
+ if (this.turnDetectionMode === "vad" && this.vad === void 0) {
100
+ this.logger.warn(
101
+ 'turnDetection is set to "vad", but no VAD model is provided, ignoring the turnDdetection setting'
102
+ );
103
+ this.turnDetectionMode = void 0;
104
+ }
105
+ if (this.turnDetectionMode === "stt" && this.stt === void 0) {
106
+ this.logger.warn(
107
+ 'turnDetection is set to "stt", but no STT model is provided, ignoring the turnDetection setting'
108
+ );
109
+ this.turnDetectionMode = void 0;
110
+ }
111
+ if (this.llm instanceof import_llm.RealtimeModel) {
112
+ if (this.llm.capabilities.turnDetection && !this.allowInterruptions) {
113
+ this.logger.warn(
114
+ "the RealtimeModel uses a server-side turn detection, allowInterruptions cannot be false, disable turnDetection in the RealtimeModel and use VAD on the AgentSession instead"
115
+ );
116
+ }
117
+ if (this.turnDetectionMode === "realtime_llm" && !this.llm.capabilities.turnDetection) {
118
+ this.logger.warn(
119
+ 'turnDetection is set to "realtime_llm", but the LLM is not a RealtimeModel or the server-side turn detection is not supported/enabled, ignoring the turnDetection setting'
120
+ );
121
+ this.turnDetectionMode = void 0;
122
+ }
123
+ if (this.turnDetectionMode === "stt") {
124
+ this.logger.warn(
125
+ 'turnDetection is set to "stt", but the LLM is a RealtimeModel, ignoring the turnDetection setting'
126
+ );
127
+ this.turnDetectionMode = void 0;
128
+ }
129
+ if (this.turnDetectionMode && this.turnDetectionMode !== "realtime_llm" && this.llm.capabilities.turnDetection) {
130
+ this.logger.warn(
131
+ `turnDetection is set to "${this.turnDetectionMode}", but the LLM is a RealtimeModel and server-side turn detection enabled, ignoring the turnDetection setting`
132
+ );
133
+ this.turnDetectionMode = void 0;
134
+ }
135
+ if (!this.llm.capabilities.turnDetection && this.vad && this.turnDetectionMode === void 0) {
136
+ this.turnDetectionMode = "vad";
137
+ }
138
+ } else if (this.turnDetectionMode === "realtime_llm") {
139
+ this.logger.warn(
140
+ 'turnDetection is set to "realtime_llm", but the LLM is not a RealtimeModel'
141
+ );
142
+ this.turnDetectionMode = void 0;
143
+ }
144
+ if (!this.vad && this.stt && this.llm instanceof import_llm.LLM && this.allowInterruptions && this.turnDetectionMode === void 0) {
145
+ this.logger.warn(
146
+ "VAD is not set. Enabling VAD is recommended when using LLM and STT for more responsive interruption handling."
147
+ );
148
+ }
149
+ }
150
+ async start() {
151
+ const unlock = await this.lock.lock();
152
+ try {
153
+ const startSpan = import_telemetry.tracer.startSpan({
154
+ name: "start_agent_activity",
155
+ attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
156
+ context: import_api.ROOT_CONTEXT
157
+ });
158
+ this.agent._agentActivity = this;
159
+ if (this.llm instanceof import_llm.RealtimeModel) {
160
+ this.realtimeSession = this.llm.session();
161
+ this.realtimeSpans = /* @__PURE__ */ new Map();
162
+ this.realtimeSession.on("generation_created", (ev) => this.onGenerationCreated(ev));
163
+ this.realtimeSession.on("input_speech_started", (ev) => this.onInputSpeechStarted(ev));
164
+ this.realtimeSession.on("input_speech_stopped", (ev) => this.onInputSpeechStopped(ev));
165
+ this.realtimeSession.on(
166
+ "input_audio_transcription_completed",
167
+ (ev) => this.onInputAudioTranscriptionCompleted(ev)
168
+ );
169
+ this.realtimeSession.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
170
+ this.realtimeSession.on("error", (ev) => this.onError(ev));
171
+ (0, import_generation.removeInstructions)(this.agent._chatCtx);
172
+ try {
173
+ await this.realtimeSession.updateInstructions(this.agent.instructions);
174
+ } catch (error) {
175
+ this.logger.error(error, "failed to update the instructions");
176
+ }
177
+ try {
178
+ await this.realtimeSession.updateChatCtx(this.agent.chatCtx);
179
+ } catch (error) {
180
+ this.logger.error(error, "failed to update the chat context");
181
+ }
182
+ try {
183
+ await this.realtimeSession.updateTools(this.tools);
184
+ } catch (error) {
185
+ this.logger.error(error, "failed to update the tools");
186
+ }
187
+ if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
188
+ this.logger.error(
189
+ "audio output is enabled but RealtimeModel has no audio modality and no TTS is set. Either enable audio modality in the RealtimeModel or set a TTS model."
190
+ );
191
+ }
192
+ } else if (this.llm instanceof import_llm.LLM) {
193
+ try {
194
+ (0, import_generation.updateInstructions)({
195
+ chatCtx: this.agent._chatCtx,
196
+ instructions: this.agent.instructions,
197
+ addIfMissing: true
198
+ });
199
+ } catch (error) {
200
+ this.logger.error("failed to update the instructions", error);
201
+ }
202
+ }
203
+ if (this.llm instanceof import_llm.LLM) {
204
+ this.llm.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
205
+ this.llm.on("error", (ev) => this.onError(ev));
206
+ }
207
+ if (this.stt instanceof import_stt.STT) {
208
+ this.stt.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
209
+ this.stt.on("error", (ev) => this.onError(ev));
210
+ }
211
+ if (this.tts instanceof import_tts.TTS) {
212
+ this.tts.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
213
+ this.tts.on("error", (ev) => this.onError(ev));
214
+ }
215
+ if (this.vad instanceof import_vad.VAD) {
216
+ this.vad.on("metrics_collected", (ev) => this.onMetricsCollected(ev));
217
+ }
218
+ this.audioRecognition = new import_audio_recognition.AudioRecognition({
219
+ recognitionHooks: this,
220
+ // Disable stt node if stt is not provided
221
+ stt: this.stt ? (...args) => this.agent.sttNode(...args) : void 0,
222
+ vad: this.vad,
223
+ interruptionDetector: this.agentSession.interruptionDetector,
224
+ turnDetector: typeof this.turnDetection === "string" ? void 0 : this.turnDetection,
225
+ turnDetectionMode: this.turnDetectionMode,
226
+ minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
227
+ maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
228
+ rootSpanContext: this.agentSession.rootSpanContext
229
+ });
230
+ this.audioRecognition.start();
231
+ this.started = true;
232
+ this._mainTask = import_utils.Task.from(({ signal }) => this.mainTask(signal));
233
+ const onEnterTask = import_telemetry.tracer.startActiveSpan(async () => this.agent.onEnter(), {
234
+ name: "on_enter",
235
+ context: import_api.trace.setSpan(import_api.ROOT_CONTEXT, startSpan),
236
+ attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
237
+ });
238
+ this.createSpeechTask({
239
+ task: import_utils.Task.from(() => onEnterTask),
240
+ name: "AgentActivity_onEnter"
241
+ });
242
+ startSpan.end();
243
+ } finally {
244
+ unlock();
245
+ }
246
+ }
247
+ get currentSpeech() {
248
+ return this._currentSpeech;
249
+ }
250
+ get vad() {
251
+ return this.agent.vad || this.agentSession.vad;
252
+ }
253
+ get stt() {
254
+ return this.agent.stt || this.agentSession.stt;
255
+ }
256
+ get llm() {
257
+ return this.agent.llm || this.agentSession.llm;
258
+ }
259
+ get tts() {
260
+ return this.agent.tts || this.agentSession.tts;
261
+ }
262
+ get tools() {
263
+ return this.agent.toolCtx;
264
+ }
265
+ get draining() {
266
+ return this._draining;
267
+ }
268
+ get realtimeLLMSession() {
269
+ return this.realtimeSession;
270
+ }
271
+ get allowInterruptions() {
272
+ return this.agentSession.options.allowInterruptions;
273
+ }
274
+ get turnDetection() {
275
+ return this.agentSession.turnDetection;
276
+ }
277
+ get toolCtx() {
278
+ return this.agent.toolCtx;
279
+ }
280
+ async updateChatCtx(chatCtx) {
281
+ chatCtx = chatCtx.copy({ toolCtx: this.toolCtx });
282
+ this.agent._chatCtx = chatCtx;
283
+ if (this.realtimeSession) {
284
+ (0, import_generation.removeInstructions)(chatCtx);
285
+ this.realtimeSession.updateChatCtx(chatCtx);
286
+ } else {
287
+ (0, import_generation.updateInstructions)({
288
+ chatCtx,
289
+ instructions: this.agent.instructions,
290
+ addIfMissing: true
291
+ });
292
+ }
293
+ }
294
+ updateOptions({ toolChoice }) {
295
+ if (toolChoice !== void 0) {
296
+ this.toolChoice = toolChoice;
297
+ }
298
+ if (this.realtimeSession) {
299
+ this.realtimeSession.updateOptions({ toolChoice: this.toolChoice });
300
+ }
301
+ }
302
+ attachAudioInput(audioStream) {
303
+ if (this.audioStream.isSourceSet) {
304
+ this.logger.debug("detaching existing audio input in agent activity");
305
+ this.audioStream.detachSource();
306
+ }
307
+ this.audioStream.setSource(audioStream);
308
+ const [realtimeAudioStream, recognitionAudioStream] = this.audioStream.stream.tee();
309
+ if (this.realtimeSession) {
310
+ this.realtimeSession.setInputAudioStream(realtimeAudioStream);
311
+ }
312
+ if (this.audioRecognition) {
313
+ this.audioRecognition.setInputAudioStream(recognitionAudioStream);
314
+ }
315
+ }
316
+ detachAudioInput() {
317
+ this.audioStream.detachSource();
318
+ }
319
+ commitUserTurn() {
320
+ if (!this.audioRecognition) {
321
+ throw new Error("AudioRecognition is not initialized");
322
+ }
323
+ const audioDetached = false;
324
+ this.audioRecognition.commitUserTurn(audioDetached);
325
+ }
326
+ clearUserTurn() {
327
+ var _a, _b;
328
+ (_a = this.audioRecognition) == null ? void 0 : _a.clearUserTurn();
329
+ (_b = this.realtimeSession) == null ? void 0 : _b.clearAudio();
330
+ }
331
+ say(text, options) {
332
+ const {
333
+ audio,
334
+ allowInterruptions: defaultAllowInterruptions,
335
+ addToChatCtx = true
336
+ } = options ?? {};
337
+ let allowInterruptions = defaultAllowInterruptions;
338
+ if (!audio && !this.tts && this.agentSession.output.audio && this.agentSession.output.audioEnabled) {
339
+ throw new Error("trying to generate speech from text without a TTS model");
340
+ }
341
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.turnDetection && allowInterruptions === false) {
342
+ this.logger.warn(
343
+ "the RealtimeModel uses a server-side turn detection, allowInterruptions cannot be false when using VoiceAgent.say(), disable turnDetection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead"
344
+ );
345
+ allowInterruptions = true;
346
+ }
347
+ const handle = import_speech_handle.SpeechHandle.create({
348
+ allowInterruptions: allowInterruptions ?? this.allowInterruptions
349
+ });
350
+ this.agentSession.emit(
351
+ import_events.AgentSessionEventTypes.SpeechCreated,
352
+ (0, import_events.createSpeechCreatedEvent)({
353
+ userInitiated: true,
354
+ source: "say",
355
+ speechHandle: handle
356
+ })
357
+ );
358
+ const task = this.createSpeechTask({
359
+ task: import_utils.Task.from(
360
+ (abortController) => this.ttsTask(handle, text, addToChatCtx, {}, abortController, audio)
361
+ ),
362
+ ownedSpeechHandle: handle,
363
+ name: "AgentActivity.say_tts"
364
+ });
365
+ task.finally(() => this.onPipelineReplyDone());
366
+ this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
367
+ return handle;
368
+ }
369
+ // -- Metrics and errors --
370
+ onMetricsCollected = (ev) => {
371
+ const speechHandle = speechHandleStorage.getStore();
372
+ if (speechHandle && (ev.type === "llm_metrics" || ev.type === "tts_metrics")) {
373
+ ev.speechId = speechHandle.id;
374
+ }
375
+ if (ev.type === "realtime_model_metrics" && this.realtimeSpans) {
376
+ const span = this.realtimeSpans.get(ev.requestId);
377
+ if (span) {
378
+ (0, import_telemetry.recordRealtimeMetrics)(span, ev);
379
+ this.realtimeSpans.delete(ev.requestId);
380
+ }
381
+ }
382
+ this.agentSession.emit(
383
+ import_events.AgentSessionEventTypes.MetricsCollected,
384
+ (0, import_events.createMetricsCollectedEvent)({ metrics: ev })
385
+ );
386
+ };
387
+ onError(ev) {
388
+ if (ev.type === "realtime_model_error") {
389
+ const errorEvent = (0, import_events.createErrorEvent)(ev.error, this.llm);
390
+ this.agentSession.emit(import_events.AgentSessionEventTypes.Error, errorEvent);
391
+ } else if (ev.type === "stt_error") {
392
+ const errorEvent = (0, import_events.createErrorEvent)(ev.error, this.stt);
393
+ this.agentSession.emit(import_events.AgentSessionEventTypes.Error, errorEvent);
394
+ } else if (ev.type === "tts_error") {
395
+ const errorEvent = (0, import_events.createErrorEvent)(ev.error, this.tts);
396
+ this.agentSession.emit(import_events.AgentSessionEventTypes.Error, errorEvent);
397
+ } else if (ev.type === "llm_error") {
398
+ const errorEvent = (0, import_events.createErrorEvent)(ev.error, this.llm);
399
+ this.agentSession.emit(import_events.AgentSessionEventTypes.Error, errorEvent);
400
+ }
401
+ this.agentSession._onError(ev);
402
+ }
403
+ // -- Realtime Session events --
404
+ onInputSpeechStarted(_ev) {
405
+ this.logger.info("onInputSpeechStarted");
406
+ if (!this.vad) {
407
+ this.agentSession._updateUserState("speaking");
408
+ }
409
+ try {
410
+ this.interrupt();
411
+ } catch (error) {
412
+ this.logger.error(
413
+ "RealtimeAPI input_speech_started, but current speech is not interruptable, this should never happen!",
414
+ error
415
+ );
416
+ }
417
+ }
418
+ onInputSpeechStopped(ev) {
419
+ this.logger.info(ev, "onInputSpeechStopped");
420
+ if (!this.vad) {
421
+ this.agentSession._updateUserState("listening");
422
+ }
423
+ if (ev.userTranscriptionEnabled) {
424
+ this.agentSession.emit(
425
+ import_events.AgentSessionEventTypes.UserInputTranscribed,
426
+ (0, import_events.createUserInputTranscribedEvent)({
427
+ isFinal: false,
428
+ transcript: ""
429
+ })
430
+ );
431
+ }
432
+ }
433
+ onInputAudioTranscriptionCompleted(ev) {
434
+ this.agentSession.emit(
435
+ import_events.AgentSessionEventTypes.UserInputTranscribed,
436
+ (0, import_events.createUserInputTranscribedEvent)({
437
+ transcript: ev.transcript,
438
+ isFinal: ev.isFinal
439
+ })
440
+ );
441
+ if (ev.isFinal) {
442
+ const message = import_chat_context.ChatMessage.create({
443
+ role: "user",
444
+ content: ev.transcript,
445
+ id: ev.itemId
446
+ });
447
+ this.agent._chatCtx.items.push(message);
448
+ this.agentSession._conversationItemAdded(message);
449
+ }
450
+ }
451
+ onGenerationCreated(ev) {
452
+ if (ev.userInitiated) {
453
+ return;
454
+ }
455
+ if (this.draining) {
456
+ this.logger.warn("skipping new realtime generation, the agent is draining");
457
+ return;
458
+ }
459
+ const handle = import_speech_handle.SpeechHandle.create({
460
+ allowInterruptions: this.allowInterruptions
461
+ });
462
+ this.agentSession.emit(
463
+ import_events.AgentSessionEventTypes.SpeechCreated,
464
+ (0, import_events.createSpeechCreatedEvent)({
465
+ userInitiated: false,
466
+ source: "generate_reply",
467
+ speechHandle: handle
468
+ })
469
+ );
470
+ this.logger.info({ speech_id: handle.id }, "Creating speech handle");
471
+ this.createSpeechTask({
472
+ task: import_utils.Task.from(
473
+ (abortController) => this.realtimeGenerationTask(handle, ev, {}, abortController)
474
+ ),
475
+ ownedSpeechHandle: handle,
476
+ name: "AgentActivity.realtimeGeneration"
477
+ });
478
+ this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
479
+ }
480
+ // recognition hooks
481
+ onStartOfSpeech(_ev) {
482
+ this.agentSession._updateUserState("speaking");
483
+ }
484
+ onEndOfSpeech(ev) {
485
+ let speechEndTime = Date.now();
486
+ if (ev) {
487
+ speechEndTime = speechEndTime - ev.silenceDuration;
488
+ }
489
+ this.agentSession._updateUserState("listening", speechEndTime);
490
+ }
491
+ onVADInferenceDone(ev) {
492
+ var _a, _b;
493
+ if (this.turnDetection === "manual" || this.turnDetection === "realtime_llm") {
494
+ return;
495
+ }
496
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.turnDetection) {
497
+ return;
498
+ }
499
+ if (ev.speechDuration < this.agentSession.options.minInterruptionDuration) {
500
+ return;
501
+ }
502
+ if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
503
+ const text = this.audioRecognition.currentTranscript;
504
+ const normalizedText = text ?? "";
505
+ const wordCount = (0, import_word.splitWords)(normalizedText, true).length;
506
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
507
+ return;
508
+ }
509
+ }
510
+ (_a = this.realtimeSession) == null ? void 0 : _a.startUserActivity();
511
+ if (this._currentSpeech && !this._currentSpeech.interrupted && this._currentSpeech.allowInterruptions) {
512
+ this.logger.info({ "speech id": this._currentSpeech.id }, "speech interrupted by VAD");
513
+ (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
514
+ this._currentSpeech.interrupt();
515
+ }
516
+ }
517
+ onInterruption(ev) {
518
+ var _a, _b;
519
+ if (ev.type !== import_interruption.InterruptionEventType.INTERRUPTION) {
520
+ return;
521
+ }
522
+ this.logger.info(
523
+ {
524
+ probability: ev.probability,
525
+ detectionDelay: ev.detectionDelay,
526
+ totalDuration: ev.totalDuration
527
+ },
528
+ "adaptive interruption detected"
529
+ );
530
+ if (this.turnDetection === "manual" || this.turnDetection === "realtime_llm") {
531
+ return;
532
+ }
533
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.turnDetection) {
534
+ return;
535
+ }
536
+ (_a = this.realtimeSession) == null ? void 0 : _a.startUserActivity();
537
+ if (this._currentSpeech && !this._currentSpeech.interrupted && this._currentSpeech.allowInterruptions) {
538
+ this.logger.info(
539
+ { "speech id": this._currentSpeech.id },
540
+ "speech interrupted by adaptive interruption detector"
541
+ );
542
+ (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
543
+ this._currentSpeech.interrupt();
544
+ }
545
+ }
546
+ onInterimTranscript(ev) {
547
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.userTranscription) {
548
+ return;
549
+ }
550
+ this.agentSession.emit(
551
+ import_events.AgentSessionEventTypes.UserInputTranscribed,
552
+ (0, import_events.createUserInputTranscribedEvent)({
553
+ transcript: ev.alternatives[0].text,
554
+ isFinal: false,
555
+ language: ev.alternatives[0].language
556
+ // TODO(AJS-106): add multi participant support
557
+ })
558
+ );
559
+ }
560
+ onFinalTranscript(ev) {
561
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.userTranscription) {
562
+ return;
563
+ }
564
+ this.agentSession.emit(
565
+ import_events.AgentSessionEventTypes.UserInputTranscribed,
566
+ (0, import_events.createUserInputTranscribedEvent)({
567
+ transcript: ev.alternatives[0].text,
568
+ isFinal: true,
569
+ language: ev.alternatives[0].language
570
+ // TODO(AJS-106): add multi participant support
571
+ })
572
+ );
573
+ }
574
+ onPreemptiveGeneration(info) {
575
+ if (!this.agentSession.options.preemptiveGeneration || this.draining || this._currentSpeech !== void 0 && !this._currentSpeech.interrupted || !(this.llm instanceof import_llm.LLM)) {
576
+ return;
577
+ }
578
+ this.cancelPreemptiveGeneration();
579
+ this.logger.info(
580
+ {
581
+ newTranscript: info.newTranscript,
582
+ transcriptConfidence: info.transcriptConfidence
583
+ },
584
+ "starting preemptive generation"
585
+ );
586
+ const userMessage = import_chat_context.ChatMessage.create({
587
+ role: "user",
588
+ content: info.newTranscript
589
+ });
590
+ const chatCtx = this.agent.chatCtx.copy();
591
+ const speechHandle = this.generateReply({
592
+ userMessage,
593
+ chatCtx,
594
+ scheduleSpeech: false
595
+ });
596
+ this._preemptiveGeneration = {
597
+ speechHandle,
598
+ userMessage,
599
+ info,
600
+ chatCtx: chatCtx.copy(),
601
+ tools: { ...this.tools },
602
+ toolChoice: this.toolChoice,
603
+ createdAt: Date.now()
604
+ };
605
+ }
606
+ cancelPreemptiveGeneration() {
607
+ if (this._preemptiveGeneration !== void 0) {
608
+ this._preemptiveGeneration.speechHandle._cancel();
609
+ this._preemptiveGeneration = void 0;
610
+ }
611
+ }
612
+ createSpeechTask(options) {
613
+ const { task, ownedSpeechHandle } = options;
614
+ this.speechTasks.add(task);
615
+ task.addDoneCallback(() => {
616
+ this.speechTasks.delete(task);
617
+ });
618
+ if (ownedSpeechHandle) {
619
+ ownedSpeechHandle._tasks.push(task);
620
+ task.addDoneCallback(() => {
621
+ if (ownedSpeechHandle._tasks.every((t) => t.done)) {
622
+ ownedSpeechHandle._markDone();
623
+ }
624
+ });
625
+ }
626
+ task.addDoneCallback(() => {
627
+ this.wakeupMainTask();
628
+ });
629
+ return task.result;
630
+ }
631
+ async onEndOfTurn(info) {
632
+ if (this.draining) {
633
+ this.cancelPreemptiveGeneration();
634
+ this.logger.warn({ user_input: info.newTranscript }, "skipping user input, task is draining");
635
+ return true;
636
+ }
637
+ if (this.stt && this.turnDetection !== "manual" && this._currentSpeech && this._currentSpeech.allowInterruptions && !this._currentSpeech.interrupted && this.agentSession.options.minInterruptionWords > 0) {
638
+ const wordCount = (0, import_word.splitWords)(info.newTranscript, true).length;
639
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
640
+ this.cancelPreemptiveGeneration();
641
+ this.logger.info(
642
+ {
643
+ wordCount,
644
+ minInterruptionWords: this.agentSession.options.minInterruptionWords
645
+ },
646
+ "skipping user input, word count below minimum interruption threshold"
647
+ );
648
+ return false;
649
+ }
650
+ }
651
+ const oldTask = this._userTurnCompletedTask;
652
+ this._userTurnCompletedTask = this.createSpeechTask({
653
+ task: import_utils.Task.from(() => this.userTurnCompleted(info, oldTask)),
654
+ name: "AgentActivity.userTurnCompleted"
655
+ });
656
+ return true;
657
+ }
658
+ retrieveChatCtx() {
659
+ return this.agentSession.chatCtx;
660
+ }
661
+ async mainTask(signal) {
662
+ const abortFuture = new import_utils.Future();
663
+ const abortHandler = () => {
664
+ abortFuture.resolve();
665
+ signal.removeEventListener("abort", abortHandler);
666
+ };
667
+ signal.addEventListener("abort", abortHandler);
668
+ while (true) {
669
+ await Promise.race([this.q_updated.await, abortFuture.await]);
670
+ if (signal.aborted) break;
671
+ while (this.speechQueue.size() > 0) {
672
+ if (signal.aborted) break;
673
+ const heapItem = this.speechQueue.pop();
674
+ if (!heapItem) {
675
+ throw new Error("Speech queue is empty");
676
+ }
677
+ const speechHandle = heapItem[2];
678
+ this._currentSpeech = speechHandle;
679
+ speechHandle._authorizeGeneration();
680
+ await speechHandle._waitForGeneration();
681
+ this._currentSpeech = void 0;
682
+ }
683
+ if (this.draining && this.speechTasks.size === 0) {
684
+ this.logger.info("mainTask: draining and no more speech tasks");
685
+ break;
686
+ }
687
+ this.q_updated = new import_utils.Future();
688
+ }
689
+ this.logger.info("AgentActivity mainTask: exiting");
690
+ }
691
+ wakeupMainTask() {
692
+ this.q_updated.resolve();
693
+ }
694
+ generateReply(options) {
695
+ var _a;
696
+ const {
697
+ userMessage,
698
+ chatCtx,
699
+ instructions: defaultInstructions,
700
+ toolChoice: defaultToolChoice,
701
+ allowInterruptions: defaultAllowInterruptions,
702
+ scheduleSpeech = true
703
+ } = options;
704
+ let instructions = defaultInstructions;
705
+ let toolChoice = defaultToolChoice;
706
+ let allowInterruptions = defaultAllowInterruptions;
707
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.turnDetection && allowInterruptions === false) {
708
+ this.logger.warn(
709
+ "the RealtimeModel uses a server-side turn detection, allowInterruptions cannot be false when using VoiceAgent.generateReply(), disable turnDetection in the RealtimeModel and use VAD on the AgentTask/VoiceAgent instead"
710
+ );
711
+ allowInterruptions = true;
712
+ }
713
+ if (this.llm === void 0) {
714
+ throw new Error("trying to generate reply without an LLM model");
715
+ }
716
+ const functionCall = (_a = import_agent.asyncLocalStorage.getStore()) == null ? void 0 : _a.functionCall;
717
+ if (toolChoice === void 0 && functionCall !== void 0) {
718
+ toolChoice = "none";
719
+ }
720
+ const handle = import_speech_handle.SpeechHandle.create({
721
+ allowInterruptions: allowInterruptions ?? this.allowInterruptions
722
+ });
723
+ this.agentSession.emit(
724
+ import_events.AgentSessionEventTypes.SpeechCreated,
725
+ (0, import_events.createSpeechCreatedEvent)({
726
+ userInitiated: true,
727
+ source: "generate_reply",
728
+ speechHandle: handle
729
+ })
730
+ );
731
+ this.logger.info({ speech_id: handle.id }, "Creating speech handle");
732
+ if (this.llm instanceof import_llm.RealtimeModel) {
733
+ this.createSpeechTask({
734
+ task: import_utils.Task.from(
735
+ (abortController) => this.realtimeReplyTask({
736
+ speechHandle: handle,
737
+ // TODO(brian): support llm.ChatMessage for the realtime model
738
+ userInput: userMessage == null ? void 0 : userMessage.textContent,
739
+ instructions,
740
+ modelSettings: {
741
+ // isGiven(toolChoice) = toolChoice !== undefined
742
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
743
+ },
744
+ abortController
745
+ })
746
+ ),
747
+ ownedSpeechHandle: handle,
748
+ name: "AgentActivity.realtimeReply"
749
+ });
750
+ } else if (this.llm instanceof import_llm.LLM) {
751
+ if (instructions) {
752
+ instructions = `${this.agent.instructions}
753
+ ${instructions}`;
754
+ }
755
+ const task = this.createSpeechTask({
756
+ task: import_utils.Task.from(
757
+ (abortController) => this.pipelineReplyTask(
758
+ handle,
759
+ chatCtx ?? this.agent.chatCtx,
760
+ this.agent.toolCtx,
761
+ {
762
+ toolChoice: toOaiToolChoice(toolChoice !== void 0 ? toolChoice : this.toolChoice)
763
+ },
764
+ abortController,
765
+ instructions ? `${this.agent.instructions}
766
+ ${instructions}` : instructions,
767
+ userMessage
768
+ )
769
+ ),
770
+ ownedSpeechHandle: handle,
771
+ name: "AgentActivity.pipelineReply"
772
+ });
773
+ task.finally(() => this.onPipelineReplyDone());
774
+ }
775
+ if (scheduleSpeech) {
776
+ this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
777
+ }
778
+ return handle;
779
+ }
780
+ interrupt() {
781
+ var _a;
782
+ const future = new import_utils.Future();
783
+ const currentSpeech = this._currentSpeech;
784
+ currentSpeech == null ? void 0 : currentSpeech.interrupt();
785
+ for (const [_, __, speech] of this.speechQueue) {
786
+ speech.interrupt();
787
+ }
788
+ (_a = this.realtimeSession) == null ? void 0 : _a.interrupt();
789
+ if (currentSpeech === void 0) {
790
+ future.resolve();
791
+ } else {
792
+ currentSpeech.addDoneCallback(() => {
793
+ if (future.done) return;
794
+ future.resolve();
795
+ });
796
+ }
797
+ return future;
798
+ }
799
+ onPipelineReplyDone() {
800
+ if (!this.speechQueue.peek() && (!this._currentSpeech || this._currentSpeech.done())) {
801
+ this.agentSession._updateAgentState("listening");
802
+ }
803
+ }
804
+ async userTurnCompleted(info, oldTask) {
805
+ var _a, _b;
806
+ if (oldTask) {
807
+ await oldTask;
808
+ }
809
+ if (this.llm instanceof import_llm.RealtimeModel) {
810
+ if (this.llm.capabilities.turnDetection) {
811
+ return;
812
+ }
813
+ (_a = this.realtimeSession) == null ? void 0 : _a.commitAudio();
814
+ }
815
+ if (this._currentSpeech) {
816
+ if (!this._currentSpeech.allowInterruptions) {
817
+ this.logger.warn(
818
+ { user_input: info.newTranscript },
819
+ "skipping user input, current speech generation cannot be interrupted"
820
+ );
821
+ return;
822
+ }
823
+ this.logger.info(
824
+ { "speech id": this._currentSpeech.id },
825
+ "speech interrupted, new user turn detected"
826
+ );
827
+ this._currentSpeech.interrupt();
828
+ (_b = this.realtimeSession) == null ? void 0 : _b.interrupt();
829
+ }
830
+ let userMessage = import_chat_context.ChatMessage.create({
831
+ role: "user",
832
+ content: info.newTranscript
833
+ });
834
+ const chatCtx = this.agent.chatCtx.copy();
835
+ const startTime = Date.now();
836
+ try {
837
+ await this.agent.onUserTurnCompleted(chatCtx, userMessage);
838
+ } catch (e) {
839
+ if (e instanceof import_agent.StopResponse) {
840
+ return;
841
+ }
842
+ this.logger.error({ error: e }, "error occurred during onUserTurnCompleted");
843
+ }
844
+ const callbackDuration = Date.now() - startTime;
845
+ if (this.llm instanceof import_llm.RealtimeModel) {
846
+ userMessage = void 0;
847
+ } else if (this.llm === void 0) {
848
+ return;
849
+ }
850
+ let speechHandle;
851
+ if (this._preemptiveGeneration !== void 0) {
852
+ const preemptive = this._preemptiveGeneration;
853
+ if (preemptive.info.newTranscript === (userMessage == null ? void 0 : userMessage.textContent) && preemptive.chatCtx.isEquivalent(chatCtx) && (0, import_tool_context.isSameToolContext)(preemptive.tools, this.tools) && (0, import_tool_context.isSameToolChoice)(preemptive.toolChoice, this.toolChoice)) {
854
+ speechHandle = preemptive.speechHandle;
855
+ this.scheduleSpeech(speechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL);
856
+ this.logger.debug(
857
+ {
858
+ preemptiveLeadTime: Date.now() - preemptive.createdAt
859
+ },
860
+ "using preemptive generation"
861
+ );
862
+ } else {
863
+ this.logger.warn(
864
+ "preemptive generation enabled but chat context or tools have changed after `onUserTurnCompleted`"
865
+ );
866
+ preemptive.speechHandle._cancel();
867
+ }
868
+ this._preemptiveGeneration = void 0;
869
+ }
870
+ if (speechHandle === void 0) {
871
+ speechHandle = this.generateReply({ userMessage, chatCtx });
872
+ }
873
+ const eouMetrics = {
874
+ type: "eou_metrics",
875
+ timestamp: Date.now(),
876
+ endOfUtteranceDelayMs: info.endOfUtteranceDelay,
877
+ transcriptionDelayMs: info.transcriptionDelay,
878
+ onUserTurnCompletedDelayMs: callbackDuration,
879
+ lastSpeakingTimeMs: info.stoppedSpeakingAt ?? 0,
880
+ speechId: speechHandle.id
881
+ };
882
+ this.agentSession.emit(
883
+ import_events.AgentSessionEventTypes.MetricsCollected,
884
+ (0, import_events.createMetricsCollectedEvent)({ metrics: eouMetrics })
885
+ );
886
+ }
887
+ async ttsTask(speechHandle, text, addToChatCtx, modelSettings, replyAbortController, audio) {
888
+ speechHandleStorage.enterWith(speechHandle);
889
+ const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
890
+ const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
891
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
892
+ if (speechHandle.interrupted) {
893
+ return;
894
+ }
895
+ let baseStream;
896
+ if (text instanceof import_web.ReadableStream) {
897
+ baseStream = text;
898
+ } else {
899
+ baseStream = new import_web.ReadableStream({
900
+ start(controller) {
901
+ controller.enqueue(text);
902
+ controller.close();
903
+ }
904
+ });
905
+ }
906
+ const [textSource, audioSource] = baseStream.tee();
907
+ const tasks = [];
908
+ const trNode = await this.agent.transcriptionNode(textSource, {});
909
+ let textOut = null;
910
+ if (trNode) {
911
+ const [textForwardTask, _textOut] = (0, import_generation.performTextForwarding)(
912
+ trNode,
913
+ replyAbortController,
914
+ transcriptionOutput
915
+ );
916
+ textOut = _textOut;
917
+ tasks.push(textForwardTask);
918
+ }
919
+ const onFirstFrame = () => {
920
+ this.agentSession._updateAgentState("speaking");
921
+ };
922
+ if (!audioOutput) {
923
+ if (textOut) {
924
+ textOut.firstTextFut.await.finally(onFirstFrame);
925
+ }
926
+ } else {
927
+ let audioOut = null;
928
+ if (!audio) {
929
+ const [ttsTask, ttsStream] = (0, import_generation.performTTSInference)(
930
+ (...args) => this.agent.ttsNode(...args),
931
+ audioSource,
932
+ modelSettings,
933
+ replyAbortController
934
+ );
935
+ tasks.push(ttsTask);
936
+ const [forwardTask, _audioOut] = (0, import_generation.performAudioForwarding)(
937
+ ttsStream,
938
+ audioOutput,
939
+ replyAbortController
940
+ );
941
+ tasks.push(forwardTask);
942
+ audioOut = _audioOut;
943
+ } else {
944
+ const [forwardTask, _audioOut] = (0, import_generation.performAudioForwarding)(
945
+ audio,
946
+ audioOutput,
947
+ replyAbortController
948
+ );
949
+ tasks.push(forwardTask);
950
+ audioOut = _audioOut;
951
+ }
952
+ audioOut.firstFrameFut.await.finally(onFirstFrame);
953
+ }
954
+ await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
955
+ if (audioOutput) {
956
+ await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
957
+ }
958
+ if (speechHandle.interrupted) {
959
+ replyAbortController.abort();
960
+ await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
961
+ if (audioOutput) {
962
+ audioOutput.clearBuffer();
963
+ await audioOutput.waitForPlayout();
964
+ }
965
+ }
966
+ if (addToChatCtx) {
967
+ const message = import_chat_context.ChatMessage.create({
968
+ role: "assistant",
969
+ content: (textOut == null ? void 0 : textOut.text) || "",
970
+ interrupted: speechHandle.interrupted
971
+ });
972
+ this.agent._chatCtx.insert(message);
973
+ this.agentSession._conversationItemAdded(message);
974
+ }
975
+ if (this.agentSession.agentState === "speaking") {
976
+ this.agentSession._updateAgentState("listening");
977
+ }
978
+ }
979
+ _pipelineReplyTaskImpl = async ({
980
+ speechHandle,
981
+ chatCtx,
982
+ toolCtx,
983
+ modelSettings,
984
+ replyAbortController,
985
+ instructions,
986
+ newMessage,
987
+ toolsMessages,
988
+ span
989
+ }) => {
990
+ var _a, _b, _c;
991
+ span.setAttribute(import_telemetry.traceTypes.ATTR_SPEECH_ID, speechHandle.id);
992
+ if (instructions) {
993
+ span.setAttribute(import_telemetry.traceTypes.ATTR_INSTRUCTIONS, instructions);
994
+ }
995
+ if (newMessage) {
996
+ span.setAttribute(import_telemetry.traceTypes.ATTR_USER_INPUT, newMessage.textContent || "");
997
+ }
998
+ speechHandleStorage.enterWith(speechHandle);
999
+ const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
1000
+ const transcriptionOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
1001
+ chatCtx = chatCtx.copy();
1002
+ if (newMessage) {
1003
+ chatCtx.insert(newMessage);
1004
+ }
1005
+ if (instructions) {
1006
+ try {
1007
+ (0, import_generation.updateInstructions)({
1008
+ chatCtx,
1009
+ instructions,
1010
+ addIfMissing: true
1011
+ });
1012
+ } catch (e) {
1013
+ this.logger.error({ error: e }, "error occurred during updateInstructions");
1014
+ }
1015
+ }
1016
+ const tasks = [];
1017
+ const [llmTask, llmGenData] = (0, import_generation.performLLMInference)(
1018
+ // preserve `this` context in llmNode
1019
+ (...args) => this.agent.llmNode(...args),
1020
+ chatCtx,
1021
+ toolCtx,
1022
+ modelSettings,
1023
+ replyAbortController
1024
+ );
1025
+ tasks.push(llmTask);
1026
+ const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
1027
+ let ttsTask = null;
1028
+ let ttsStream = null;
1029
+ if (audioOutput) {
1030
+ [ttsTask, ttsStream] = (0, import_generation.performTTSInference)(
1031
+ (...args) => this.agent.ttsNode(...args),
1032
+ ttsTextInput,
1033
+ modelSettings,
1034
+ replyAbortController
1035
+ );
1036
+ tasks.push(ttsTask);
1037
+ }
1038
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
1039
+ if (newMessage && speechHandle.scheduled) {
1040
+ this.agent._chatCtx.insert(newMessage);
1041
+ this.agentSession._conversationItemAdded(newMessage);
1042
+ }
1043
+ if (speechHandle.interrupted) {
1044
+ replyAbortController.abort();
1045
+ await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1046
+ return;
1047
+ }
1048
+ this.agentSession._updateAgentState("thinking");
1049
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1050
+ speechHandle._clearAuthorization();
1051
+ const replyStartedAt = Date.now();
1052
+ const trNodeResult = await this.agent.transcriptionNode(llmOutput, modelSettings);
1053
+ let textOut = null;
1054
+ if (trNodeResult) {
1055
+ const [textForwardTask, _textOut] = (0, import_generation.performTextForwarding)(
1056
+ trNodeResult,
1057
+ replyAbortController,
1058
+ transcriptionOutput
1059
+ );
1060
+ tasks.push(textForwardTask);
1061
+ textOut = _textOut;
1062
+ }
1063
+ const onFirstFrame = () => {
1064
+ this.agentSession._updateAgentState("speaking");
1065
+ };
1066
+ let audioOut = null;
1067
+ if (audioOutput) {
1068
+ if (ttsStream) {
1069
+ const [forwardTask, _audioOut] = (0, import_generation.performAudioForwarding)(
1070
+ ttsStream,
1071
+ audioOutput,
1072
+ replyAbortController
1073
+ );
1074
+ audioOut = _audioOut;
1075
+ tasks.push(forwardTask);
1076
+ audioOut.firstFrameFut.await.finally(onFirstFrame);
1077
+ } else {
1078
+ throw Error("ttsStream is null when audioOutput is enabled");
1079
+ }
1080
+ } else {
1081
+ textOut == null ? void 0 : textOut.firstTextFut.await.finally(onFirstFrame);
1082
+ }
1083
+ const onToolExecutionStarted = (_) => {
1084
+ };
1085
+ const onToolExecutionCompleted = (_) => {
1086
+ };
1087
+ const [executeToolsTask, toolOutput] = (0, import_generation.performToolExecutions)({
1088
+ session: this.agentSession,
1089
+ speechHandle,
1090
+ toolCtx,
1091
+ toolChoice: modelSettings.toolChoice,
1092
+ toolCallStream: llmGenData.toolCallStream,
1093
+ controller: replyAbortController,
1094
+ onToolExecutionStarted,
1095
+ onToolExecutionCompleted
1096
+ });
1097
+ await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
1098
+ if (audioOutput) {
1099
+ await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
1100
+ }
1101
+ if (toolsMessages) {
1102
+ for (const msg of toolsMessages) {
1103
+ msg.createdAt = replyStartedAt;
1104
+ }
1105
+ this.agent._chatCtx.insert(toolsMessages);
1106
+ this.agentSession._toolItemsAdded(toolsMessages);
1107
+ }
1108
+ if (speechHandle.interrupted) {
1109
+ this.logger.debug(
1110
+ { speech_id: speechHandle.id },
1111
+ "Aborting all pipeline reply tasks due to interruption"
1112
+ );
1113
+ if (audioOutput) {
1114
+ audioOutput.clearBuffer();
1115
+ }
1116
+ replyAbortController.abort();
1117
+ await Promise.allSettled(
1118
+ tasks.map((task) => task.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT))
1119
+ );
1120
+ let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
1121
+ if (audioOutput) {
1122
+ const playbackEv = await audioOutput.waitForPlayout();
1123
+ if (audioOut == null ? void 0 : audioOut.firstFrameFut.done) {
1124
+ this.logger.info(
1125
+ { speech_id: speechHandle.id, playbackPosition: playbackEv.playbackPosition },
1126
+ "playout interrupted"
1127
+ );
1128
+ if (playbackEv.synchronizedTranscript) {
1129
+ forwardedText = playbackEv.synchronizedTranscript;
1130
+ }
1131
+ } else {
1132
+ forwardedText = "";
1133
+ }
1134
+ }
1135
+ if (forwardedText) {
1136
+ const message = import_chat_context.ChatMessage.create({
1137
+ role: "assistant",
1138
+ content: forwardedText,
1139
+ id: llmGenData.id,
1140
+ interrupted: true,
1141
+ createdAt: replyStartedAt
1142
+ });
1143
+ chatCtx.insert(message);
1144
+ this.agent._chatCtx.insert(message);
1145
+ this.agentSession._conversationItemAdded(message);
1146
+ }
1147
+ if (this.agentSession.agentState === "speaking") {
1148
+ this.agentSession._updateAgentState("listening");
1149
+ }
1150
+ this.logger.info(
1151
+ { speech_id: speechHandle.id, message: forwardedText },
1152
+ "playout completed with interrupt"
1153
+ );
1154
+ speechHandle._markGenerationDone();
1155
+ await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1156
+ return;
1157
+ }
1158
+ if (textOut && textOut.text) {
1159
+ const message = import_chat_context.ChatMessage.create({
1160
+ role: "assistant",
1161
+ id: llmGenData.id,
1162
+ interrupted: false,
1163
+ createdAt: replyStartedAt,
1164
+ content: textOut.text
1165
+ });
1166
+ chatCtx.insert(message);
1167
+ this.agent._chatCtx.insert(message);
1168
+ this.agentSession._conversationItemAdded(message);
1169
+ this.logger.info(
1170
+ { speech_id: speechHandle.id, message: textOut.text },
1171
+ "playout completed without interruption"
1172
+ );
1173
+ }
1174
+ if (toolOutput.output.length > 0) {
1175
+ this.agentSession._updateAgentState("thinking");
1176
+ } else if (this.agentSession.agentState === "speaking") {
1177
+ this.agentSession._updateAgentState("listening");
1178
+ }
1179
+ speechHandle._markGenerationDone();
1180
+ await executeToolsTask.result;
1181
+ if (toolOutput.output.length === 0) return;
1182
+ const { maxToolSteps } = this.agentSession.options;
1183
+ if (speechHandle.numSteps >= maxToolSteps) {
1184
+ this.logger.warn(
1185
+ { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1186
+ "maximum number of function calls steps reached"
1187
+ );
1188
+ return;
1189
+ }
1190
+ const functionToolsExecutedEvent = (0, import_events.createFunctionToolsExecutedEvent)({
1191
+ functionCalls: [],
1192
+ functionCallOutputs: []
1193
+ });
1194
+ let shouldGenerateToolReply = false;
1195
+ let newAgentTask = null;
1196
+ let ignoreTaskSwitch = false;
1197
+ for (const sanitizedOut of toolOutput.output) {
1198
+ if (sanitizedOut.toolCallOutput !== void 0) {
1199
+ functionToolsExecutedEvent.functionCalls.push(sanitizedOut.toolCall);
1200
+ functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1201
+ if (sanitizedOut.replyRequired) {
1202
+ shouldGenerateToolReply = true;
1203
+ }
1204
+ }
1205
+ if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1206
+ this.logger.error("expected to receive only one agent task from the tool executions");
1207
+ ignoreTaskSwitch = true;
1208
+ }
1209
+ newAgentTask = sanitizedOut.agentTask ?? null;
1210
+ this.logger.debug(
1211
+ {
1212
+ speechId: speechHandle.id,
1213
+ name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
1214
+ args: sanitizedOut.toolCall.args,
1215
+ output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
1216
+ isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
1217
+ },
1218
+ "Tool call execution finished"
1219
+ );
1220
+ }
1221
+ this.agentSession.emit(
1222
+ import_events.AgentSessionEventTypes.FunctionToolsExecuted,
1223
+ functionToolsExecutedEvent
1224
+ );
1225
+ let draining = this.draining;
1226
+ if (!ignoreTaskSwitch && newAgentTask !== null) {
1227
+ this.agentSession.updateAgent(newAgentTask);
1228
+ draining = true;
1229
+ }
1230
+ const toolMessages = [
1231
+ ...functionToolsExecutedEvent.functionCalls,
1232
+ ...functionToolsExecutedEvent.functionCallOutputs
1233
+ ];
1234
+ if (shouldGenerateToolReply) {
1235
+ chatCtx.insert(toolMessages);
1236
+ const handle = import_speech_handle.SpeechHandle.create({
1237
+ allowInterruptions: speechHandle.allowInterruptions,
1238
+ stepIndex: speechHandle._stepIndex + 1,
1239
+ parent: speechHandle
1240
+ });
1241
+ this.agentSession.emit(
1242
+ import_events.AgentSessionEventTypes.SpeechCreated,
1243
+ (0, import_events.createSpeechCreatedEvent)({
1244
+ userInitiated: false,
1245
+ source: "tool_response",
1246
+ speechHandle: handle
1247
+ })
1248
+ );
1249
+ const respondToolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1250
+ const toolResponseTask = this.createSpeechTask({
1251
+ task: import_utils.Task.from(
1252
+ () => this.pipelineReplyTask(
1253
+ handle,
1254
+ chatCtx,
1255
+ toolCtx,
1256
+ { toolChoice: respondToolChoice },
1257
+ replyAbortController,
1258
+ instructions,
1259
+ void 0,
1260
+ toolMessages
1261
+ )
1262
+ ),
1263
+ ownedSpeechHandle: handle,
1264
+ name: "AgentActivity.pipelineReply"
1265
+ });
1266
+ toolResponseTask.finally(() => this.onPipelineReplyDone());
1267
+ this.scheduleSpeech(handle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1268
+ } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1269
+ for (const msg of toolMessages) {
1270
+ msg.createdAt = replyStartedAt;
1271
+ }
1272
+ this.agent._chatCtx.insert(toolMessages);
1273
+ this.agentSession._toolItemsAdded(toolMessages);
1274
+ }
1275
+ };
1276
+ pipelineReplyTask = async (speechHandle, chatCtx, toolCtx, modelSettings, replyAbortController, instructions, newMessage, toolsMessages) => import_telemetry.tracer.startActiveSpan(
1277
+ async (span) => this._pipelineReplyTaskImpl({
1278
+ speechHandle,
1279
+ chatCtx,
1280
+ toolCtx,
1281
+ modelSettings,
1282
+ replyAbortController,
1283
+ instructions,
1284
+ newMessage,
1285
+ toolsMessages,
1286
+ span
1287
+ }),
1288
+ {
1289
+ name: "agent_turn",
1290
+ context: this.agentSession.rootSpanContext
1291
+ }
1292
+ );
1293
+ async realtimeGenerationTask(speechHandle, ev, modelSettings, replyAbortController) {
1294
+ return import_telemetry.tracer.startActiveSpan(
1295
+ async (span) => this._realtimeGenerationTaskImpl({
1296
+ speechHandle,
1297
+ ev,
1298
+ modelSettings,
1299
+ replyAbortController,
1300
+ span
1301
+ }),
1302
+ {
1303
+ name: "agent_turn",
1304
+ context: this.agentSession.rootSpanContext
1305
+ }
1306
+ );
1307
+ }
1308
+ async _realtimeGenerationTaskImpl({
1309
+ speechHandle,
1310
+ ev,
1311
+ modelSettings,
1312
+ replyAbortController,
1313
+ span
1314
+ }) {
1315
+ var _a, _b, _c;
1316
+ span.setAttribute(import_telemetry.traceTypes.ATTR_SPEECH_ID, speechHandle.id);
1317
+ speechHandleStorage.enterWith(speechHandle);
1318
+ if (!this.realtimeSession) {
1319
+ throw new Error("realtime session is not initialized");
1320
+ }
1321
+ if (!(this.llm instanceof import_llm.RealtimeModel)) {
1322
+ throw new Error("llm is not a realtime model");
1323
+ }
1324
+ span.setAttribute(import_telemetry.traceTypes.ATTR_GEN_AI_REQUEST_MODEL, this.llm.model);
1325
+ if (this.realtimeSpans && ev.responseId) {
1326
+ this.realtimeSpans.set(ev.responseId, span);
1327
+ }
1328
+ this.logger.debug(
1329
+ { speech_id: speechHandle.id, stepIndex: speechHandle.numSteps },
1330
+ "realtime generation started"
1331
+ );
1332
+ const audioOutput = this.agentSession.output.audioEnabled ? this.agentSession.output.audio : null;
1333
+ const textOutput = this.agentSession.output.transcriptionEnabled ? this.agentSession.output.transcription : null;
1334
+ const toolCtx = this.realtimeSession.tools;
1335
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1336
+ speechHandle._clearAuthorization();
1337
+ if (speechHandle.interrupted) {
1338
+ return;
1339
+ }
1340
+ const onFirstFrame = () => {
1341
+ this.agentSession._updateAgentState("speaking");
1342
+ };
1343
+ const readMessages = async (abortController, outputs) => {
1344
+ replyAbortController.signal.addEventListener("abort", () => abortController.abort(), {
1345
+ once: true
1346
+ });
1347
+ const forwardTasks = [];
1348
+ try {
1349
+ for await (const msg of ev.messageStream) {
1350
+ if (forwardTasks.length > 0) {
1351
+ this.logger.warn(
1352
+ "expected to receive only one message generation from the realtime API"
1353
+ );
1354
+ break;
1355
+ }
1356
+ const msgModalities = msg.modalities ? await msg.modalities : void 0;
1357
+ let ttsTextInput = null;
1358
+ let trTextInput;
1359
+ if (msgModalities && !msgModalities.includes("audio") && this.tts) {
1360
+ if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.audioOutput) {
1361
+ this.logger.warn(
1362
+ "text response received from realtime API, falling back to use a TTS model."
1363
+ );
1364
+ }
1365
+ const [_ttsTextInput, _trTextInput] = msg.textStream.tee();
1366
+ ttsTextInput = _ttsTextInput;
1367
+ trTextInput = _trTextInput;
1368
+ } else {
1369
+ trTextInput = msg.textStream;
1370
+ }
1371
+ const trNodeResult = await this.agent.transcriptionNode(trTextInput, modelSettings);
1372
+ let textOut = null;
1373
+ if (trNodeResult) {
1374
+ const [textForwardTask, _textOut] = (0, import_generation.performTextForwarding)(
1375
+ trNodeResult,
1376
+ abortController,
1377
+ textOutput
1378
+ );
1379
+ forwardTasks.push(textForwardTask);
1380
+ textOut = _textOut;
1381
+ }
1382
+ let audioOut = null;
1383
+ if (audioOutput) {
1384
+ let realtimeAudioResult = null;
1385
+ if (ttsTextInput) {
1386
+ const [ttsTask, ttsStream] = (0, import_generation.performTTSInference)(
1387
+ (...args) => this.agent.ttsNode(...args),
1388
+ ttsTextInput,
1389
+ modelSettings,
1390
+ abortController
1391
+ );
1392
+ tasks.push(ttsTask);
1393
+ realtimeAudioResult = ttsStream;
1394
+ } else if (msgModalities && msgModalities.includes("audio")) {
1395
+ realtimeAudioResult = await this.agent.realtimeAudioOutputNode(
1396
+ msg.audioStream,
1397
+ modelSettings
1398
+ );
1399
+ } else if (this.llm instanceof import_llm.RealtimeModel && this.llm.capabilities.audioOutput) {
1400
+ this.logger.error(
1401
+ "Text message received from Realtime API with audio modality. This usually happens when text chat context is synced to the API. Try to add a TTS model as fallback or use text modality with TTS instead."
1402
+ );
1403
+ } else {
1404
+ this.logger.warn(
1405
+ "audio output is enabled but neither tts nor realtime audio is available"
1406
+ );
1407
+ }
1408
+ if (realtimeAudioResult) {
1409
+ const [forwardTask, _audioOut] = (0, import_generation.performAudioForwarding)(
1410
+ realtimeAudioResult,
1411
+ audioOutput,
1412
+ abortController
1413
+ );
1414
+ forwardTasks.push(forwardTask);
1415
+ audioOut = _audioOut;
1416
+ audioOut.firstFrameFut.await.finally(onFirstFrame);
1417
+ }
1418
+ } else if (textOut) {
1419
+ textOut.firstTextFut.await.finally(onFirstFrame);
1420
+ }
1421
+ outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
1422
+ }
1423
+ await (0, import_utils.waitFor)(forwardTasks);
1424
+ } catch (error) {
1425
+ this.logger.error(error, "error reading messages from the realtime API");
1426
+ } finally {
1427
+ await (0, import_utils.cancelAndWait)(forwardTasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1428
+ }
1429
+ };
1430
+ const messageOutputs = [];
1431
+ const tasks = [
1432
+ import_utils.Task.from(
1433
+ (controller) => readMessages(controller, messageOutputs),
1434
+ void 0,
1435
+ "AgentActivity.realtime_generation.read_messages"
1436
+ )
1437
+ ];
1438
+ const [toolCallStream, toolCallStreamForTracing] = ev.functionStream.tee();
1439
+ const toolCalls = [];
1440
+ const readToolStreamTask = async (controller, stream) => {
1441
+ const reader = stream.getReader();
1442
+ try {
1443
+ while (!controller.signal.aborted) {
1444
+ const { done, value } = await reader.read();
1445
+ if (done) break;
1446
+ this.logger.debug({ tool_call: value }, "received tool call from the realtime API");
1447
+ toolCalls.push(value);
1448
+ }
1449
+ } finally {
1450
+ reader.releaseLock();
1451
+ }
1452
+ };
1453
+ tasks.push(
1454
+ import_utils.Task.from(
1455
+ (controller) => readToolStreamTask(controller, toolCallStreamForTracing),
1456
+ replyAbortController,
1457
+ "AgentActivity.realtime_generation.read_tool_stream"
1458
+ )
1459
+ );
1460
+ const onToolExecutionStarted = (f) => {
1461
+ speechHandle._itemAdded([f]);
1462
+ this.agent._chatCtx.items.push(f);
1463
+ this.agentSession._toolItemsAdded([f]);
1464
+ };
1465
+ const onToolExecutionCompleted = (out) => {
1466
+ if (out.toolCallOutput) {
1467
+ speechHandle._itemAdded([out.toolCallOutput]);
1468
+ }
1469
+ };
1470
+ const [executeToolsTask, toolOutput] = (0, import_generation.performToolExecutions)({
1471
+ session: this.agentSession,
1472
+ speechHandle,
1473
+ toolCtx,
1474
+ toolCallStream,
1475
+ toolChoice: modelSettings.toolChoice,
1476
+ controller: replyAbortController,
1477
+ onToolExecutionStarted,
1478
+ onToolExecutionCompleted
1479
+ });
1480
+ await speechHandle.waitIfNotInterrupted(tasks.map((task) => task.result));
1481
+ if (audioOutput) {
1482
+ await speechHandle.waitIfNotInterrupted([audioOutput.waitForPlayout()]);
1483
+ this.agentSession._updateAgentState("listening");
1484
+ }
1485
+ if (speechHandle.interrupted) {
1486
+ this.logger.debug(
1487
+ { speech_id: speechHandle.id },
1488
+ "Aborting all realtime generation tasks due to interruption"
1489
+ );
1490
+ replyAbortController.abort();
1491
+ await (0, import_utils.cancelAndWait)(tasks, AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1492
+ if (messageOutputs.length > 0) {
1493
+ const [msgId, textOut, audioOut, msgModalities] = messageOutputs[0];
1494
+ let forwardedText = (textOut == null ? void 0 : textOut.text) || "";
1495
+ if (audioOutput) {
1496
+ audioOutput.clearBuffer();
1497
+ const playbackEv = await audioOutput.waitForPlayout();
1498
+ let playbackPosition = playbackEv.playbackPosition;
1499
+ if (audioOut == null ? void 0 : audioOut.firstFrameFut.done) {
1500
+ this.logger.info(
1501
+ { speech_id: speechHandle.id, playbackPosition: playbackEv.playbackPosition },
1502
+ "playout interrupted"
1503
+ );
1504
+ if (playbackEv.synchronizedTranscript) {
1505
+ forwardedText = playbackEv.synchronizedTranscript;
1506
+ }
1507
+ } else {
1508
+ forwardedText = "";
1509
+ playbackPosition = 0;
1510
+ }
1511
+ this.realtimeSession.truncate({
1512
+ messageId: msgId,
1513
+ audioEndMs: Math.floor(playbackPosition),
1514
+ modalities: msgModalities,
1515
+ audioTranscript: forwardedText
1516
+ });
1517
+ }
1518
+ if (forwardedText) {
1519
+ const message = import_chat_context.ChatMessage.create({
1520
+ role: "assistant",
1521
+ content: forwardedText,
1522
+ id: msgId,
1523
+ interrupted: true
1524
+ });
1525
+ this.agent._chatCtx.insert(message);
1526
+ speechHandle._itemAdded([message]);
1527
+ this.agentSession._conversationItemAdded(message);
1528
+ }
1529
+ this.logger.info(
1530
+ { speech_id: speechHandle.id, message: forwardedText },
1531
+ "playout completed with interrupt"
1532
+ );
1533
+ }
1534
+ speechHandle._markGenerationDone();
1535
+ await executeToolsTask.cancelAndWait(AgentActivity.REPLY_TASK_CANCEL_TIMEOUT);
1536
+ return;
1537
+ }
1538
+ if (messageOutputs.length > 0) {
1539
+ const [msgId, textOut, _, __] = messageOutputs[0];
1540
+ const message = import_chat_context.ChatMessage.create({
1541
+ role: "assistant",
1542
+ content: (textOut == null ? void 0 : textOut.text) || "",
1543
+ id: msgId,
1544
+ interrupted: false
1545
+ });
1546
+ this.agent._chatCtx.insert(message);
1547
+ speechHandle._itemAdded([message]);
1548
+ this.agentSession._conversationItemAdded(message);
1549
+ }
1550
+ speechHandle._markGenerationDone();
1551
+ toolOutput.firstToolStartedFuture.await.finally(() => {
1552
+ this.agentSession._updateAgentState("thinking");
1553
+ });
1554
+ await executeToolsTask.result;
1555
+ if (toolOutput.output.length === 0) {
1556
+ if (!speechHandle.interrupted) {
1557
+ this.agentSession._updateAgentState("listening");
1558
+ }
1559
+ return;
1560
+ }
1561
+ const { maxToolSteps } = this.agentSession.options;
1562
+ if (speechHandle.numSteps >= maxToolSteps) {
1563
+ this.logger.warn(
1564
+ { speech_id: speechHandle.id, max_tool_steps: maxToolSteps },
1565
+ "maximum number of function calls steps reached"
1566
+ );
1567
+ return;
1568
+ }
1569
+ const functionToolsExecutedEvent = (0, import_events.createFunctionToolsExecutedEvent)({
1570
+ functionCalls: [],
1571
+ functionCallOutputs: []
1572
+ });
1573
+ let shouldGenerateToolReply = false;
1574
+ let newAgentTask = null;
1575
+ let ignoreTaskSwitch = false;
1576
+ for (const sanitizedOut of toolOutput.output) {
1577
+ if (sanitizedOut.toolCallOutput !== void 0) {
1578
+ functionToolsExecutedEvent.functionCallOutputs.push(sanitizedOut.toolCallOutput);
1579
+ if (sanitizedOut.replyRequired) {
1580
+ shouldGenerateToolReply = true;
1581
+ }
1582
+ }
1583
+ if (newAgentTask !== null && sanitizedOut.agentTask !== void 0) {
1584
+ this.logger.error("expected to receive only one agent task from the tool executions");
1585
+ ignoreTaskSwitch = true;
1586
+ }
1587
+ newAgentTask = sanitizedOut.agentTask ?? null;
1588
+ this.logger.debug(
1589
+ {
1590
+ speechId: speechHandle.id,
1591
+ name: (_a = sanitizedOut.toolCall) == null ? void 0 : _a.name,
1592
+ args: sanitizedOut.toolCall.args,
1593
+ output: (_b = sanitizedOut.toolCallOutput) == null ? void 0 : _b.output,
1594
+ isError: (_c = sanitizedOut.toolCallOutput) == null ? void 0 : _c.isError
1595
+ },
1596
+ "Tool call execution finished"
1597
+ );
1598
+ }
1599
+ this.agentSession.emit(
1600
+ import_events.AgentSessionEventTypes.FunctionToolsExecuted,
1601
+ functionToolsExecutedEvent
1602
+ );
1603
+ let draining = this.draining;
1604
+ if (!ignoreTaskSwitch && newAgentTask !== null) {
1605
+ this.agentSession.updateAgent(newAgentTask);
1606
+ draining = true;
1607
+ }
1608
+ if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
1609
+ while (this.currentSpeech || this.speechQueue.size() > 0) {
1610
+ if (this.currentSpeech && !this.currentSpeech.done() && this.currentSpeech !== speechHandle) {
1611
+ await this.currentSpeech.waitForPlayout();
1612
+ } else {
1613
+ await new Promise((resolve) => setImmediate(resolve));
1614
+ }
1615
+ }
1616
+ const chatCtx = this.realtimeSession.chatCtx.copy();
1617
+ chatCtx.items.push(...functionToolsExecutedEvent.functionCallOutputs);
1618
+ this.agentSession._toolItemsAdded(
1619
+ functionToolsExecutedEvent.functionCallOutputs
1620
+ );
1621
+ try {
1622
+ await this.realtimeSession.updateChatCtx(chatCtx);
1623
+ } catch (error) {
1624
+ this.logger.warn(
1625
+ { error },
1626
+ "failed to update chat context before generating the function calls results"
1627
+ );
1628
+ }
1629
+ }
1630
+ if (!shouldGenerateToolReply || this.llm.capabilities.autoToolReplyGeneration) {
1631
+ return;
1632
+ }
1633
+ this.realtimeSession.interrupt();
1634
+ const replySpeechHandle = import_speech_handle.SpeechHandle.create({
1635
+ allowInterruptions: speechHandle.allowInterruptions,
1636
+ stepIndex: speechHandle.numSteps + 1,
1637
+ parent: speechHandle
1638
+ });
1639
+ this.agentSession.emit(
1640
+ import_events.AgentSessionEventTypes.SpeechCreated,
1641
+ (0, import_events.createSpeechCreatedEvent)({
1642
+ userInitiated: false,
1643
+ source: "tool_response",
1644
+ speechHandle: replySpeechHandle
1645
+ })
1646
+ );
1647
+ const toolChoice = draining || modelSettings.toolChoice === "none" ? "none" : "auto";
1648
+ this.createSpeechTask({
1649
+ task: import_utils.Task.from(
1650
+ (abortController) => this.realtimeReplyTask({
1651
+ speechHandle: replySpeechHandle,
1652
+ modelSettings: { toolChoice },
1653
+ abortController
1654
+ })
1655
+ ),
1656
+ ownedSpeechHandle: replySpeechHandle,
1657
+ name: "AgentActivity.realtime_reply"
1658
+ });
1659
+ this.scheduleSpeech(replySpeechHandle, import_speech_handle.SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
1660
+ }
1661
+ async realtimeReplyTask({
1662
+ speechHandle,
1663
+ modelSettings: { toolChoice },
1664
+ userInput,
1665
+ instructions,
1666
+ abortController
1667
+ }) {
1668
+ speechHandleStorage.enterWith(speechHandle);
1669
+ if (!this.realtimeSession) {
1670
+ throw new Error("realtime session is not available");
1671
+ }
1672
+ await speechHandle.waitIfNotInterrupted([speechHandle._waitForAuthorization()]);
1673
+ if (userInput) {
1674
+ const chatCtx = this.realtimeSession.chatCtx.copy();
1675
+ const message = chatCtx.addMessage({
1676
+ role: "user",
1677
+ content: userInput
1678
+ });
1679
+ await this.realtimeSession.updateChatCtx(chatCtx);
1680
+ this.agent._chatCtx.insert(message);
1681
+ this.agentSession._conversationItemAdded(message);
1682
+ }
1683
+ const originalToolChoice = this.toolChoice;
1684
+ if (toolChoice !== void 0) {
1685
+ this.realtimeSession.updateOptions({ toolChoice });
1686
+ }
1687
+ try {
1688
+ const generationEvent = await this.realtimeSession.generateReply(instructions);
1689
+ await this.realtimeGenerationTask(
1690
+ speechHandle,
1691
+ generationEvent,
1692
+ { toolChoice },
1693
+ abortController
1694
+ );
1695
+ } finally {
1696
+ if (toolChoice !== void 0 && toolChoice !== originalToolChoice) {
1697
+ this.realtimeSession.updateOptions({ toolChoice: originalToolChoice });
1698
+ }
1699
+ }
1700
+ }
1701
+ scheduleSpeech(speechHandle, priority, force = false) {
1702
+ if (this.draining && !force) {
1703
+ throw new Error("cannot schedule new speech, the agent is draining");
1704
+ }
1705
+ this.speechQueue.push([priority, Number(process.hrtime.bigint()), speechHandle]);
1706
+ speechHandle._markScheduled();
1707
+ this.wakeupMainTask();
1708
+ }
1709
+ async drain() {
1710
+ return import_telemetry.tracer.startActiveSpan(async (span) => this._drainImpl(span), {
1711
+ name: "drain_agent_activity",
1712
+ context: import_api.ROOT_CONTEXT
1713
+ });
1714
+ }
1715
+ async _drainImpl(span) {
1716
+ var _a;
1717
+ span.setAttribute(import_telemetry.traceTypes.ATTR_AGENT_LABEL, this.agent.id);
1718
+ const unlock = await this.lock.lock();
1719
+ try {
1720
+ if (this._draining) return;
1721
+ this.cancelPreemptiveGeneration();
1722
+ const onExitTask = import_telemetry.tracer.startActiveSpan(async () => this.agent.onExit(), {
1723
+ name: "on_exit",
1724
+ attributes: { [import_telemetry.traceTypes.ATTR_AGENT_LABEL]: this.agent.id }
1725
+ });
1726
+ this.createSpeechTask({
1727
+ task: import_utils.Task.from(() => onExitTask),
1728
+ name: "AgentActivity_onExit"
1729
+ });
1730
+ this.wakeupMainTask();
1731
+ this._draining = true;
1732
+ await ((_a = this._mainTask) == null ? void 0 : _a.result);
1733
+ } finally {
1734
+ unlock();
1735
+ }
1736
+ }
1737
+ async close() {
1738
+ var _a, _b, _c, _d;
1739
+ const unlock = await this.lock.lock();
1740
+ try {
1741
+ if (!this._draining) {
1742
+ this.logger.warn("task closing without draining");
1743
+ }
1744
+ this.cancelPreemptiveGeneration();
1745
+ if (this.llm instanceof import_llm.LLM) {
1746
+ this.llm.off("metrics_collected", this.onMetricsCollected);
1747
+ }
1748
+ if (this.realtimeSession) {
1749
+ this.realtimeSession.off("generation_created", this.onGenerationCreated);
1750
+ this.realtimeSession.off("input_speech_started", this.onInputSpeechStarted);
1751
+ this.realtimeSession.off("input_speech_stopped", this.onInputSpeechStopped);
1752
+ this.realtimeSession.off(
1753
+ "input_audio_transcription_completed",
1754
+ this.onInputAudioTranscriptionCompleted
1755
+ );
1756
+ this.realtimeSession.off("metrics_collected", this.onMetricsCollected);
1757
+ }
1758
+ if (this.stt instanceof import_stt.STT) {
1759
+ this.stt.off("metrics_collected", this.onMetricsCollected);
1760
+ }
1761
+ if (this.tts instanceof import_tts.TTS) {
1762
+ this.tts.off("metrics_collected", this.onMetricsCollected);
1763
+ }
1764
+ if (this.vad instanceof import_vad.VAD) {
1765
+ this.vad.off("metrics_collected", this.onMetricsCollected);
1766
+ }
1767
+ this.detachAudioInput();
1768
+ (_a = this.realtimeSpans) == null ? void 0 : _a.clear();
1769
+ await ((_b = this.realtimeSession) == null ? void 0 : _b.close());
1770
+ await ((_c = this.audioRecognition) == null ? void 0 : _c.close());
1771
+ await ((_d = this._mainTask) == null ? void 0 : _d.cancelAndWait());
1772
+ } finally {
1773
+ unlock();
1774
+ }
1775
+ }
1776
+ }
1777
+ function toOaiToolChoice(toolChoice) {
1778
+ return toolChoice !== null ? toolChoice : void 0;
1779
+ }
1780
+ // Annotate the CommonJS export names for ESM import in node:
1781
+ 0 && (module.exports = {
1782
+ AgentActivity
1783
+ });
1784
+ //# sourceMappingURL=agent_activity.cjs.map