@livekit/agents 0.7.9 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (627) hide show
  1. package/dist/_exceptions.cjs +109 -0
  2. package/dist/_exceptions.cjs.map +1 -0
  3. package/dist/_exceptions.d.cts +64 -0
  4. package/dist/_exceptions.d.ts +64 -0
  5. package/dist/_exceptions.d.ts.map +1 -0
  6. package/dist/_exceptions.js +80 -0
  7. package/dist/_exceptions.js.map +1 -0
  8. package/dist/audio.cjs +10 -3
  9. package/dist/audio.cjs.map +1 -1
  10. package/dist/audio.d.cts +2 -0
  11. package/dist/audio.d.ts +2 -0
  12. package/dist/audio.d.ts.map +1 -1
  13. package/dist/audio.js +8 -2
  14. package/dist/audio.js.map +1 -1
  15. package/dist/cli.cjs +25 -0
  16. package/dist/cli.cjs.map +1 -1
  17. package/dist/cli.d.ts.map +1 -1
  18. package/dist/cli.js +25 -0
  19. package/dist/cli.js.map +1 -1
  20. package/dist/constants.cjs +6 -3
  21. package/dist/constants.cjs.map +1 -1
  22. package/dist/constants.d.cts +2 -1
  23. package/dist/constants.d.ts +2 -1
  24. package/dist/constants.d.ts.map +1 -1
  25. package/dist/constants.js +4 -2
  26. package/dist/constants.js.map +1 -1
  27. package/dist/http_server.cjs.map +1 -1
  28. package/dist/http_server.d.cts +1 -0
  29. package/dist/http_server.d.ts +1 -0
  30. package/dist/http_server.d.ts.map +1 -1
  31. package/dist/http_server.js.map +1 -1
  32. package/dist/index.cjs +27 -20
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +13 -10
  35. package/dist/index.d.ts +13 -10
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +15 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/inference_runner.cjs +0 -1
  40. package/dist/inference_runner.cjs.map +1 -1
  41. package/dist/inference_runner.d.cts +2 -3
  42. package/dist/inference_runner.d.ts +2 -3
  43. package/dist/inference_runner.d.ts.map +1 -1
  44. package/dist/inference_runner.js +0 -1
  45. package/dist/inference_runner.js.map +1 -1
  46. package/dist/ipc/inference_proc_executor.cjs +2 -2
  47. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  48. package/dist/ipc/inference_proc_executor.js +2 -2
  49. package/dist/ipc/inference_proc_executor.js.map +1 -1
  50. package/dist/ipc/job_executor.cjs.map +1 -1
  51. package/dist/ipc/job_executor.js.map +1 -1
  52. package/dist/ipc/job_proc_executor.cjs +1 -0
  53. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  54. package/dist/ipc/job_proc_executor.js +1 -0
  55. package/dist/ipc/job_proc_executor.js.map +1 -1
  56. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  57. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  58. package/dist/ipc/job_proc_lazy_main.js +1 -1
  59. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  60. package/dist/ipc/supervised_proc.d.cts +1 -1
  61. package/dist/ipc/supervised_proc.d.ts +1 -1
  62. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  63. package/dist/job.cjs +14 -2
  64. package/dist/job.cjs.map +1 -1
  65. package/dist/job.d.cts +8 -0
  66. package/dist/job.d.ts +8 -0
  67. package/dist/job.d.ts.map +1 -1
  68. package/dist/job.js +12 -1
  69. package/dist/job.js.map +1 -1
  70. package/dist/llm/chat_context.cjs +332 -82
  71. package/dist/llm/chat_context.cjs.map +1 -1
  72. package/dist/llm/chat_context.d.cts +152 -48
  73. package/dist/llm/chat_context.d.ts +152 -48
  74. package/dist/llm/chat_context.d.ts.map +1 -1
  75. package/dist/llm/chat_context.js +327 -81
  76. package/dist/llm/chat_context.js.map +1 -1
  77. package/dist/llm/chat_context.test.cjs +380 -0
  78. package/dist/llm/chat_context.test.cjs.map +1 -0
  79. package/dist/llm/chat_context.test.js +385 -0
  80. package/dist/llm/chat_context.test.js.map +1 -0
  81. package/dist/llm/index.cjs +37 -8
  82. package/dist/llm/index.cjs.map +1 -1
  83. package/dist/llm/index.d.cts +7 -3
  84. package/dist/llm/index.d.ts +7 -3
  85. package/dist/llm/index.d.ts.map +1 -1
  86. package/dist/llm/index.js +39 -9
  87. package/dist/llm/index.js.map +1 -1
  88. package/dist/llm/llm.cjs +98 -33
  89. package/dist/llm/llm.cjs.map +1 -1
  90. package/dist/llm/llm.d.cts +50 -24
  91. package/dist/llm/llm.d.ts +50 -24
  92. package/dist/llm/llm.d.ts.map +1 -1
  93. package/dist/llm/llm.js +99 -33
  94. package/dist/llm/llm.js.map +1 -1
  95. package/dist/llm/provider_format/google.cjs +128 -0
  96. package/dist/llm/provider_format/google.cjs.map +1 -0
  97. package/dist/llm/provider_format/google.d.cts +6 -0
  98. package/dist/llm/provider_format/google.d.ts +6 -0
  99. package/dist/llm/provider_format/google.d.ts.map +1 -0
  100. package/dist/llm/provider_format/google.js +104 -0
  101. package/dist/llm/provider_format/google.js.map +1 -0
  102. package/dist/llm/provider_format/google.test.cjs +676 -0
  103. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  104. package/dist/llm/provider_format/google.test.js +675 -0
  105. package/dist/llm/provider_format/google.test.js.map +1 -0
  106. package/dist/llm/provider_format/index.cjs +40 -0
  107. package/dist/llm/provider_format/index.cjs.map +1 -0
  108. package/dist/llm/provider_format/index.d.cts +4 -0
  109. package/dist/llm/provider_format/index.d.ts +4 -0
  110. package/dist/llm/provider_format/index.d.ts.map +1 -0
  111. package/dist/llm/provider_format/index.js +16 -0
  112. package/dist/llm/provider_format/index.js.map +1 -0
  113. package/dist/llm/provider_format/openai.cjs +116 -0
  114. package/dist/llm/provider_format/openai.cjs.map +1 -0
  115. package/dist/llm/provider_format/openai.d.cts +3 -0
  116. package/dist/llm/provider_format/openai.d.ts +3 -0
  117. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  118. package/dist/llm/provider_format/openai.js +92 -0
  119. package/dist/llm/provider_format/openai.js.map +1 -0
  120. package/dist/llm/provider_format/openai.test.cjs +490 -0
  121. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  122. package/dist/llm/provider_format/openai.test.js +489 -0
  123. package/dist/llm/provider_format/openai.test.js.map +1 -0
  124. package/dist/llm/provider_format/utils.cjs +146 -0
  125. package/dist/llm/provider_format/utils.cjs.map +1 -0
  126. package/dist/llm/provider_format/utils.d.cts +38 -0
  127. package/dist/llm/provider_format/utils.d.ts +38 -0
  128. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  129. package/dist/llm/provider_format/utils.js +122 -0
  130. package/dist/llm/provider_format/utils.js.map +1 -0
  131. package/dist/llm/realtime.cjs +77 -0
  132. package/dist/llm/realtime.cjs.map +1 -0
  133. package/dist/llm/realtime.d.cts +98 -0
  134. package/dist/llm/realtime.d.ts +98 -0
  135. package/dist/llm/realtime.d.ts.map +1 -0
  136. package/dist/llm/realtime.js +52 -0
  137. package/dist/llm/realtime.js.map +1 -0
  138. package/dist/llm/remote_chat_context.cjs +112 -0
  139. package/dist/llm/remote_chat_context.cjs.map +1 -0
  140. package/dist/llm/remote_chat_context.d.cts +23 -0
  141. package/dist/llm/remote_chat_context.d.ts +23 -0
  142. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  143. package/dist/llm/remote_chat_context.js +88 -0
  144. package/dist/llm/remote_chat_context.js.map +1 -0
  145. package/dist/llm/remote_chat_context.test.cjs +225 -0
  146. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  147. package/dist/llm/remote_chat_context.test.js +224 -0
  148. package/dist/llm/remote_chat_context.test.js.map +1 -0
  149. package/dist/llm/tool_context.cjs +111 -0
  150. package/dist/llm/tool_context.cjs.map +1 -0
  151. package/dist/llm/tool_context.d.cts +125 -0
  152. package/dist/llm/tool_context.d.ts +125 -0
  153. package/dist/llm/tool_context.d.ts.map +1 -0
  154. package/dist/llm/tool_context.js +80 -0
  155. package/dist/llm/tool_context.js.map +1 -0
  156. package/dist/llm/tool_context.test.cjs +162 -0
  157. package/dist/llm/tool_context.test.cjs.map +1 -0
  158. package/dist/llm/tool_context.test.js +161 -0
  159. package/dist/llm/tool_context.test.js.map +1 -0
  160. package/dist/llm/tool_context.type.test.cjs +92 -0
  161. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  162. package/dist/llm/tool_context.type.test.js +91 -0
  163. package/dist/llm/tool_context.type.test.js.map +1 -0
  164. package/dist/llm/utils.cjs +260 -0
  165. package/dist/llm/utils.cjs.map +1 -0
  166. package/dist/llm/utils.d.cts +42 -0
  167. package/dist/llm/utils.d.ts +42 -0
  168. package/dist/llm/utils.d.ts.map +1 -0
  169. package/dist/llm/utils.js +223 -0
  170. package/dist/llm/utils.js.map +1 -0
  171. package/dist/llm/utils.test.cjs +513 -0
  172. package/dist/llm/utils.test.cjs.map +1 -0
  173. package/dist/llm/utils.test.js +490 -0
  174. package/dist/llm/utils.test.js.map +1 -0
  175. package/dist/metrics/base.cjs +0 -27
  176. package/dist/metrics/base.cjs.map +1 -1
  177. package/dist/metrics/base.d.cts +105 -63
  178. package/dist/metrics/base.d.ts +105 -63
  179. package/dist/metrics/base.d.ts.map +1 -1
  180. package/dist/metrics/base.js +0 -19
  181. package/dist/metrics/base.js.map +1 -1
  182. package/dist/metrics/index.cjs +0 -3
  183. package/dist/metrics/index.cjs.map +1 -1
  184. package/dist/metrics/index.d.cts +2 -3
  185. package/dist/metrics/index.d.ts +2 -3
  186. package/dist/metrics/index.d.ts.map +1 -1
  187. package/dist/metrics/index.js +0 -2
  188. package/dist/metrics/index.js.map +1 -1
  189. package/dist/metrics/usage_collector.cjs +17 -12
  190. package/dist/metrics/usage_collector.cjs.map +1 -1
  191. package/dist/metrics/usage_collector.d.cts +3 -2
  192. package/dist/metrics/usage_collector.d.ts +3 -2
  193. package/dist/metrics/usage_collector.d.ts.map +1 -1
  194. package/dist/metrics/usage_collector.js +17 -12
  195. package/dist/metrics/usage_collector.js.map +1 -1
  196. package/dist/metrics/utils.cjs +22 -59
  197. package/dist/metrics/utils.cjs.map +1 -1
  198. package/dist/metrics/utils.d.cts +1 -8
  199. package/dist/metrics/utils.d.ts +1 -8
  200. package/dist/metrics/utils.d.ts.map +1 -1
  201. package/dist/metrics/utils.js +22 -52
  202. package/dist/metrics/utils.js.map +1 -1
  203. package/dist/multimodal/index.cjs +0 -2
  204. package/dist/multimodal/index.cjs.map +1 -1
  205. package/dist/multimodal/index.d.cts +0 -1
  206. package/dist/multimodal/index.d.ts +0 -1
  207. package/dist/multimodal/index.d.ts.map +1 -1
  208. package/dist/multimodal/index.js +0 -1
  209. package/dist/multimodal/index.js.map +1 -1
  210. package/dist/plugin.cjs +24 -8
  211. package/dist/plugin.cjs.map +1 -1
  212. package/dist/plugin.d.cts +18 -4
  213. package/dist/plugin.d.ts +18 -4
  214. package/dist/plugin.d.ts.map +1 -1
  215. package/dist/plugin.js +22 -7
  216. package/dist/plugin.js.map +1 -1
  217. package/dist/stream/deferred_stream.cjs +98 -0
  218. package/dist/stream/deferred_stream.cjs.map +1 -0
  219. package/dist/stream/deferred_stream.d.cts +27 -0
  220. package/dist/stream/deferred_stream.d.ts +27 -0
  221. package/dist/stream/deferred_stream.d.ts.map +1 -0
  222. package/dist/stream/deferred_stream.js +73 -0
  223. package/dist/stream/deferred_stream.js.map +1 -0
  224. package/dist/stream/deferred_stream.test.cjs +527 -0
  225. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  226. package/dist/stream/deferred_stream.test.js +526 -0
  227. package/dist/stream/deferred_stream.test.js.map +1 -0
  228. package/dist/stream/identity_transform.cjs +42 -0
  229. package/dist/stream/identity_transform.cjs.map +1 -0
  230. package/dist/stream/identity_transform.d.cts +6 -0
  231. package/dist/stream/identity_transform.d.ts +6 -0
  232. package/dist/stream/identity_transform.d.ts.map +1 -0
  233. package/dist/stream/identity_transform.js +18 -0
  234. package/dist/stream/identity_transform.js.map +1 -0
  235. package/dist/stream/identity_transform.test.cjs +125 -0
  236. package/dist/stream/identity_transform.test.cjs.map +1 -0
  237. package/dist/stream/identity_transform.test.js +124 -0
  238. package/dist/stream/identity_transform.test.js.map +1 -0
  239. package/dist/stream/index.cjs +38 -0
  240. package/dist/stream/index.cjs.map +1 -0
  241. package/dist/stream/index.d.cts +5 -0
  242. package/dist/stream/index.d.ts +5 -0
  243. package/dist/stream/index.d.ts.map +1 -0
  244. package/dist/stream/index.js +11 -0
  245. package/dist/stream/index.js.map +1 -0
  246. package/dist/stream/merge_readable_streams.cjs +59 -0
  247. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  248. package/dist/stream/merge_readable_streams.d.cts +4 -0
  249. package/dist/stream/merge_readable_streams.d.ts +4 -0
  250. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  251. package/dist/stream/merge_readable_streams.js +35 -0
  252. package/dist/stream/merge_readable_streams.js.map +1 -0
  253. package/dist/stream/stream_channel.cjs +47 -0
  254. package/dist/stream/stream_channel.cjs.map +1 -0
  255. package/dist/stream/stream_channel.d.cts +9 -0
  256. package/dist/stream/stream_channel.d.ts +9 -0
  257. package/dist/stream/stream_channel.d.ts.map +1 -0
  258. package/dist/stream/stream_channel.js +23 -0
  259. package/dist/stream/stream_channel.js.map +1 -0
  260. package/dist/stream/stream_channel.test.cjs +97 -0
  261. package/dist/stream/stream_channel.test.cjs.map +1 -0
  262. package/dist/stream/stream_channel.test.js +96 -0
  263. package/dist/stream/stream_channel.test.js.map +1 -0
  264. package/dist/stt/stream_adapter.cjs +3 -4
  265. package/dist/stt/stream_adapter.cjs.map +1 -1
  266. package/dist/stt/stream_adapter.d.cts +1 -0
  267. package/dist/stt/stream_adapter.d.ts +1 -0
  268. package/dist/stt/stream_adapter.d.ts.map +1 -1
  269. package/dist/stt/stream_adapter.js +3 -4
  270. package/dist/stt/stream_adapter.js.map +1 -1
  271. package/dist/stt/stt.cjs +101 -10
  272. package/dist/stt/stt.cjs.map +1 -1
  273. package/dist/stt/stt.d.cts +26 -5
  274. package/dist/stt/stt.d.ts +26 -5
  275. package/dist/stt/stt.d.ts.map +1 -1
  276. package/dist/stt/stt.js +102 -11
  277. package/dist/stt/stt.js.map +1 -1
  278. package/dist/tokenize/basic/basic.cjs +10 -5
  279. package/dist/tokenize/basic/basic.cjs.map +1 -1
  280. package/dist/tokenize/basic/basic.d.cts +7 -1
  281. package/dist/tokenize/basic/basic.d.ts +7 -1
  282. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  283. package/dist/tokenize/basic/basic.js +10 -5
  284. package/dist/tokenize/basic/basic.js.map +1 -1
  285. package/dist/tokenize/basic/sentence.cjs +14 -6
  286. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  287. package/dist/tokenize/basic/sentence.d.cts +1 -1
  288. package/dist/tokenize/basic/sentence.d.ts +1 -1
  289. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  290. package/dist/tokenize/basic/sentence.js +14 -6
  291. package/dist/tokenize/basic/sentence.js.map +1 -1
  292. package/dist/tokenize/token_stream.cjs +5 -3
  293. package/dist/tokenize/token_stream.cjs.map +1 -1
  294. package/dist/tokenize/token_stream.d.cts +1 -0
  295. package/dist/tokenize/token_stream.d.ts +1 -0
  296. package/dist/tokenize/token_stream.d.ts.map +1 -1
  297. package/dist/tokenize/token_stream.js +6 -4
  298. package/dist/tokenize/token_stream.js.map +1 -1
  299. package/dist/transcription.cjs +1 -2
  300. package/dist/transcription.cjs.map +1 -1
  301. package/dist/transcription.d.ts.map +1 -1
  302. package/dist/transcription.js +2 -3
  303. package/dist/transcription.js.map +1 -1
  304. package/dist/tts/index.cjs +2 -4
  305. package/dist/tts/index.cjs.map +1 -1
  306. package/dist/tts/index.d.cts +1 -1
  307. package/dist/tts/index.d.ts +1 -1
  308. package/dist/tts/index.d.ts.map +1 -1
  309. package/dist/tts/index.js +1 -3
  310. package/dist/tts/index.js.map +1 -1
  311. package/dist/tts/stream_adapter.cjs +26 -13
  312. package/dist/tts/stream_adapter.cjs.map +1 -1
  313. package/dist/tts/stream_adapter.d.cts +1 -1
  314. package/dist/tts/stream_adapter.d.ts +1 -1
  315. package/dist/tts/stream_adapter.d.ts.map +1 -1
  316. package/dist/tts/stream_adapter.js +27 -14
  317. package/dist/tts/stream_adapter.js.map +1 -1
  318. package/dist/tts/tts.cjs +157 -25
  319. package/dist/tts/tts.cjs.map +1 -1
  320. package/dist/tts/tts.d.cts +29 -5
  321. package/dist/tts/tts.d.ts +29 -5
  322. package/dist/tts/tts.d.ts.map +1 -1
  323. package/dist/tts/tts.js +157 -24
  324. package/dist/tts/tts.js.map +1 -1
  325. package/dist/types.cjs +60 -0
  326. package/dist/types.cjs.map +1 -0
  327. package/dist/types.d.cts +13 -0
  328. package/dist/types.d.ts +13 -0
  329. package/dist/types.d.ts.map +1 -0
  330. package/dist/types.js +35 -0
  331. package/dist/types.js.map +1 -0
  332. package/dist/utils.cjs +281 -27
  333. package/dist/utils.cjs.map +1 -1
  334. package/dist/utils.d.cts +134 -9
  335. package/dist/utils.d.ts +134 -9
  336. package/dist/utils.d.ts.map +1 -1
  337. package/dist/utils.js +265 -26
  338. package/dist/utils.js.map +1 -1
  339. package/dist/utils.test.cjs +492 -0
  340. package/dist/utils.test.cjs.map +1 -0
  341. package/dist/utils.test.js +498 -0
  342. package/dist/utils.test.js.map +1 -0
  343. package/dist/vad.cjs +76 -20
  344. package/dist/vad.cjs.map +1 -1
  345. package/dist/vad.d.cts +25 -5
  346. package/dist/vad.d.ts +25 -5
  347. package/dist/vad.d.ts.map +1 -1
  348. package/dist/vad.js +76 -20
  349. package/dist/vad.js.map +1 -1
  350. package/dist/voice/agent.cjs +245 -0
  351. package/dist/voice/agent.cjs.map +1 -0
  352. package/dist/voice/agent.d.cts +78 -0
  353. package/dist/voice/agent.d.ts +78 -0
  354. package/dist/voice/agent.d.ts.map +1 -0
  355. package/dist/voice/agent.js +220 -0
  356. package/dist/voice/agent.js.map +1 -0
  357. package/dist/voice/agent.test.cjs +61 -0
  358. package/dist/voice/agent.test.cjs.map +1 -0
  359. package/dist/voice/agent.test.js +60 -0
  360. package/dist/voice/agent.test.js.map +1 -0
  361. package/dist/voice/agent_activity.cjs +1453 -0
  362. package/dist/voice/agent_activity.cjs.map +1 -0
  363. package/dist/voice/agent_activity.d.cts +94 -0
  364. package/dist/voice/agent_activity.d.ts +94 -0
  365. package/dist/voice/agent_activity.d.ts.map +1 -0
  366. package/dist/voice/agent_activity.js +1449 -0
  367. package/dist/voice/agent_activity.js.map +1 -0
  368. package/dist/voice/agent_session.cjs +312 -0
  369. package/dist/voice/agent_session.cjs.map +1 -0
  370. package/dist/voice/agent_session.d.cts +121 -0
  371. package/dist/voice/agent_session.d.ts +121 -0
  372. package/dist/voice/agent_session.d.ts.map +1 -0
  373. package/dist/voice/agent_session.js +295 -0
  374. package/dist/voice/agent_session.js.map +1 -0
  375. package/dist/voice/audio_recognition.cjs +375 -0
  376. package/dist/voice/audio_recognition.cjs.map +1 -0
  377. package/dist/voice/audio_recognition.d.cts +80 -0
  378. package/dist/voice/audio_recognition.d.ts +80 -0
  379. package/dist/voice/audio_recognition.d.ts.map +1 -0
  380. package/dist/voice/audio_recognition.js +351 -0
  381. package/dist/voice/audio_recognition.js.map +1 -0
  382. package/dist/voice/events.cjs +145 -0
  383. package/dist/voice/events.cjs.map +1 -0
  384. package/dist/voice/events.d.cts +124 -0
  385. package/dist/voice/events.d.ts +124 -0
  386. package/dist/voice/events.d.ts.map +1 -0
  387. package/dist/voice/events.js +110 -0
  388. package/dist/voice/events.js.map +1 -0
  389. package/dist/voice/generation.cjs +700 -0
  390. package/dist/voice/generation.cjs.map +1 -0
  391. package/dist/voice/generation.d.cts +115 -0
  392. package/dist/voice/generation.d.ts +115 -0
  393. package/dist/voice/generation.d.ts.map +1 -0
  394. package/dist/voice/generation.js +672 -0
  395. package/dist/voice/generation.js.map +1 -0
  396. package/dist/voice/index.cjs +40 -0
  397. package/dist/voice/index.cjs.map +1 -0
  398. package/dist/voice/index.d.cts +5 -0
  399. package/dist/voice/index.d.ts +5 -0
  400. package/dist/voice/index.d.ts.map +1 -0
  401. package/dist/voice/index.js +11 -0
  402. package/dist/voice/index.js.map +1 -0
  403. package/dist/voice/io.cjs +245 -0
  404. package/dist/voice/io.cjs.map +1 -0
  405. package/dist/voice/io.d.cts +101 -0
  406. package/dist/voice/io.d.ts +101 -0
  407. package/dist/voice/io.d.ts.map +1 -0
  408. package/dist/voice/io.js +217 -0
  409. package/dist/voice/io.js.map +1 -0
  410. package/dist/voice/room_io/_input.cjs +121 -0
  411. package/dist/voice/room_io/_input.cjs.map +1 -0
  412. package/dist/voice/room_io/_input.d.cts +24 -0
  413. package/dist/voice/room_io/_input.d.ts +24 -0
  414. package/dist/voice/room_io/_input.d.ts.map +1 -0
  415. package/dist/voice/room_io/_input.js +102 -0
  416. package/dist/voice/room_io/_input.js.map +1 -0
  417. package/dist/voice/room_io/_output.cjs +358 -0
  418. package/dist/voice/room_io/_output.cjs.map +1 -0
  419. package/dist/voice/room_io/_output.d.cts +75 -0
  420. package/dist/voice/room_io/_output.d.ts +75 -0
  421. package/dist/voice/room_io/_output.d.ts.map +1 -0
  422. package/dist/voice/room_io/_output.js +342 -0
  423. package/dist/voice/room_io/_output.js.map +1 -0
  424. package/dist/voice/room_io/index.cjs +25 -0
  425. package/dist/voice/room_io/index.cjs.map +1 -0
  426. package/dist/voice/room_io/index.d.cts +3 -0
  427. package/dist/voice/room_io/index.d.ts +3 -0
  428. package/dist/voice/room_io/index.d.ts.map +1 -0
  429. package/dist/voice/room_io/index.js +3 -0
  430. package/dist/voice/room_io/index.js.map +1 -0
  431. package/dist/voice/room_io/room_io.cjs +370 -0
  432. package/dist/voice/room_io/room_io.cjs.map +1 -0
  433. package/dist/voice/room_io/room_io.d.cts +73 -0
  434. package/dist/voice/room_io/room_io.d.ts +73 -0
  435. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  436. package/dist/voice/room_io/room_io.js +361 -0
  437. package/dist/voice/room_io/room_io.js.map +1 -0
  438. package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
  439. package/dist/voice/run_context.cjs.map +1 -0
  440. package/dist/voice/run_context.d.cts +12 -0
  441. package/dist/voice/run_context.d.ts +12 -0
  442. package/dist/voice/run_context.d.ts.map +1 -0
  443. package/dist/voice/run_context.js +14 -0
  444. package/dist/voice/run_context.js.map +1 -0
  445. package/dist/voice/speech_handle.cjs +105 -0
  446. package/dist/voice/speech_handle.cjs.map +1 -0
  447. package/dist/voice/speech_handle.d.cts +46 -0
  448. package/dist/voice/speech_handle.d.ts +46 -0
  449. package/dist/voice/speech_handle.d.ts.map +1 -0
  450. package/dist/voice/speech_handle.js +81 -0
  451. package/dist/voice/speech_handle.js.map +1 -0
  452. package/dist/voice/transcription/_utils.cjs +45 -0
  453. package/dist/voice/transcription/_utils.cjs.map +1 -0
  454. package/dist/voice/transcription/_utils.d.cts +3 -0
  455. package/dist/voice/transcription/_utils.d.ts +3 -0
  456. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  457. package/dist/voice/transcription/_utils.js +21 -0
  458. package/dist/voice/transcription/_utils.js.map +1 -0
  459. package/dist/voice/transcription/index.cjs +23 -0
  460. package/dist/voice/transcription/index.cjs.map +1 -0
  461. package/dist/voice/transcription/index.d.cts +2 -0
  462. package/dist/voice/transcription/index.d.ts +2 -0
  463. package/dist/voice/transcription/index.d.ts.map +1 -0
  464. package/dist/voice/transcription/index.js +2 -0
  465. package/dist/voice/transcription/index.js.map +1 -0
  466. package/dist/voice/transcription/synchronizer.cjs +380 -0
  467. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  468. package/dist/voice/transcription/synchronizer.d.cts +86 -0
  469. package/dist/voice/transcription/synchronizer.d.ts +86 -0
  470. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  471. package/dist/voice/transcription/synchronizer.js +355 -0
  472. package/dist/voice/transcription/synchronizer.js.map +1 -0
  473. package/dist/worker.cjs +22 -4
  474. package/dist/worker.cjs.map +1 -1
  475. package/dist/worker.d.cts +1 -1
  476. package/dist/worker.d.ts +1 -1
  477. package/dist/worker.d.ts.map +1 -1
  478. package/dist/worker.js +22 -4
  479. package/dist/worker.js.map +1 -1
  480. package/package.json +9 -2
  481. package/src/_exceptions.ts +137 -0
  482. package/src/audio.ts +12 -1
  483. package/src/cli.ts +37 -0
  484. package/src/constants.ts +2 -1
  485. package/src/http_server.ts +1 -0
  486. package/src/index.ts +13 -10
  487. package/src/inference_runner.ts +2 -3
  488. package/src/ipc/inference_proc_executor.ts +2 -2
  489. package/src/ipc/job_executor.ts +1 -1
  490. package/src/ipc/job_proc_executor.ts +1 -1
  491. package/src/ipc/job_proc_lazy_main.ts +1 -1
  492. package/src/job.ts +18 -0
  493. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  494. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  495. package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
  496. package/src/llm/chat_context.test.ts +450 -0
  497. package/src/llm/chat_context.ts +501 -103
  498. package/src/llm/index.ts +53 -18
  499. package/src/llm/llm.ts +149 -50
  500. package/src/llm/provider_format/google.test.ts +772 -0
  501. package/src/llm/provider_format/google.ts +130 -0
  502. package/src/llm/provider_format/index.ts +23 -0
  503. package/src/llm/provider_format/openai.test.ts +581 -0
  504. package/src/llm/provider_format/openai.ts +118 -0
  505. package/src/llm/provider_format/utils.ts +183 -0
  506. package/src/llm/realtime.ts +151 -0
  507. package/src/llm/remote_chat_context.test.ts +290 -0
  508. package/src/llm/remote_chat_context.ts +114 -0
  509. package/src/llm/tool_context.test.ts +198 -0
  510. package/src/llm/tool_context.ts +259 -0
  511. package/src/llm/tool_context.type.test.ts +115 -0
  512. package/src/llm/utils.test.ts +670 -0
  513. package/src/llm/utils.ts +324 -0
  514. package/src/metrics/base.ts +110 -78
  515. package/src/metrics/index.ts +3 -9
  516. package/src/metrics/usage_collector.ts +19 -13
  517. package/src/metrics/utils.ts +24 -69
  518. package/src/multimodal/index.ts +0 -1
  519. package/src/plugin.ts +26 -8
  520. package/src/stream/deferred_stream.test.ts +755 -0
  521. package/src/stream/deferred_stream.ts +110 -0
  522. package/src/stream/identity_transform.test.ts +179 -0
  523. package/src/stream/identity_transform.ts +18 -0
  524. package/src/stream/index.ts +7 -0
  525. package/src/stream/merge_readable_streams.ts +40 -0
  526. package/src/stream/stream_channel.test.ts +129 -0
  527. package/src/stream/stream_channel.ts +32 -0
  528. package/src/stt/stream_adapter.ts +3 -5
  529. package/src/stt/stt.ts +135 -17
  530. package/src/tokenize/basic/basic.ts +13 -5
  531. package/src/tokenize/basic/sentence.ts +20 -6
  532. package/src/tokenize/token_stream.ts +7 -4
  533. package/src/transcription.ts +2 -3
  534. package/src/tts/index.ts +0 -1
  535. package/src/tts/stream_adapter.ts +42 -16
  536. package/src/tts/tts.ts +203 -21
  537. package/src/types.ts +42 -0
  538. package/src/utils.test.ts +658 -0
  539. package/src/utils.ts +375 -44
  540. package/src/vad.ts +90 -22
  541. package/src/voice/agent.test.ts +80 -0
  542. package/src/voice/agent.ts +332 -0
  543. package/src/voice/agent_activity.ts +1913 -0
  544. package/src/voice/agent_session.ts +460 -0
  545. package/src/voice/audio_recognition.ts +474 -0
  546. package/src/voice/events.ts +252 -0
  547. package/src/voice/generation.ts +881 -0
  548. package/src/voice/index.ts +7 -0
  549. package/src/voice/io.ts +304 -0
  550. package/src/voice/room_io/_input.ts +144 -0
  551. package/src/voice/room_io/_output.ts +436 -0
  552. package/src/voice/room_io/index.ts +5 -0
  553. package/src/voice/room_io/room_io.ts +495 -0
  554. package/src/voice/run_context.ts +20 -0
  555. package/src/voice/speech_handle.ts +104 -0
  556. package/src/voice/transcription/_utils.ts +25 -0
  557. package/src/voice/transcription/index.ts +4 -0
  558. package/src/voice/transcription/synchronizer.ts +478 -0
  559. package/src/worker.ts +22 -2
  560. package/dist/llm/function_context.cjs +0 -103
  561. package/dist/llm/function_context.cjs.map +0 -1
  562. package/dist/llm/function_context.d.cts +0 -47
  563. package/dist/llm/function_context.d.ts +0 -47
  564. package/dist/llm/function_context.d.ts.map +0 -1
  565. package/dist/llm/function_context.js +0 -78
  566. package/dist/llm/function_context.js.map +0 -1
  567. package/dist/llm/function_context.test.cjs +0 -218
  568. package/dist/llm/function_context.test.cjs.map +0 -1
  569. package/dist/llm/function_context.test.js +0 -217
  570. package/dist/llm/function_context.test.js.map +0 -1
  571. package/dist/multimodal/multimodal_agent.cjs +0 -486
  572. package/dist/multimodal/multimodal_agent.cjs.map +0 -1
  573. package/dist/multimodal/multimodal_agent.d.cts +0 -48
  574. package/dist/multimodal/multimodal_agent.d.ts +0 -48
  575. package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
  576. package/dist/multimodal/multimodal_agent.js +0 -461
  577. package/dist/multimodal/multimodal_agent.js.map +0 -1
  578. package/dist/pipeline/agent_output.cjs +0 -197
  579. package/dist/pipeline/agent_output.cjs.map +0 -1
  580. package/dist/pipeline/agent_output.d.cts +0 -33
  581. package/dist/pipeline/agent_output.d.ts +0 -33
  582. package/dist/pipeline/agent_output.d.ts.map +0 -1
  583. package/dist/pipeline/agent_output.js +0 -172
  584. package/dist/pipeline/agent_output.js.map +0 -1
  585. package/dist/pipeline/agent_playout.cjs +0 -175
  586. package/dist/pipeline/agent_playout.cjs.map +0 -1
  587. package/dist/pipeline/agent_playout.d.cts +0 -40
  588. package/dist/pipeline/agent_playout.d.ts +0 -40
  589. package/dist/pipeline/agent_playout.d.ts.map +0 -1
  590. package/dist/pipeline/agent_playout.js +0 -139
  591. package/dist/pipeline/agent_playout.js.map +0 -1
  592. package/dist/pipeline/human_input.cjs +0 -171
  593. package/dist/pipeline/human_input.cjs.map +0 -1
  594. package/dist/pipeline/human_input.d.cts +0 -30
  595. package/dist/pipeline/human_input.d.ts +0 -30
  596. package/dist/pipeline/human_input.d.ts.map +0 -1
  597. package/dist/pipeline/human_input.js +0 -146
  598. package/dist/pipeline/human_input.js.map +0 -1
  599. package/dist/pipeline/index.cjs.map +0 -1
  600. package/dist/pipeline/index.d.cts +0 -2
  601. package/dist/pipeline/index.d.ts +0 -2
  602. package/dist/pipeline/index.d.ts.map +0 -1
  603. package/dist/pipeline/index.js +0 -11
  604. package/dist/pipeline/index.js.map +0 -1
  605. package/dist/pipeline/pipeline_agent.cjs +0 -859
  606. package/dist/pipeline/pipeline_agent.cjs.map +0 -1
  607. package/dist/pipeline/pipeline_agent.d.cts +0 -150
  608. package/dist/pipeline/pipeline_agent.d.ts +0 -150
  609. package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
  610. package/dist/pipeline/pipeline_agent.js +0 -837
  611. package/dist/pipeline/pipeline_agent.js.map +0 -1
  612. package/dist/pipeline/speech_handle.cjs +0 -176
  613. package/dist/pipeline/speech_handle.cjs.map +0 -1
  614. package/dist/pipeline/speech_handle.d.cts +0 -37
  615. package/dist/pipeline/speech_handle.d.ts +0 -37
  616. package/dist/pipeline/speech_handle.d.ts.map +0 -1
  617. package/dist/pipeline/speech_handle.js +0 -152
  618. package/dist/pipeline/speech_handle.js.map +0 -1
  619. package/src/llm/function_context.test.ts +0 -248
  620. package/src/llm/function_context.ts +0 -142
  621. package/src/multimodal/multimodal_agent.ts +0 -592
  622. package/src/pipeline/agent_output.ts +0 -219
  623. package/src/pipeline/agent_playout.ts +0 -192
  624. package/src/pipeline/human_input.ts +0 -188
  625. package/src/pipeline/index.ts +0 -15
  626. package/src/pipeline/pipeline_agent.ts +0 -1197
  627. package/src/pipeline/speech_handle.ts +0 -201
@@ -0,0 +1,324 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { VideoBufferType, VideoFrame } from '@livekit/rtc-node';
5
+ import type { JSONSchema7 } from 'json-schema';
6
+ import sharp from 'sharp';
7
+ import { ZodObject } from 'zod';
8
+ import { zodToJsonSchema } from 'zod-to-json-schema';
9
+ import type { UnknownUserData } from '../voice/run_context.js';
10
+ import type { ChatContext } from './chat_context.js';
11
+ import {
12
+ type ChatItem,
13
+ FunctionCall,
14
+ FunctionCallOutput,
15
+ type ImageContent,
16
+ } from './chat_context.js';
17
+ import type { ToolContext, ToolInputSchema, ToolOptions } from './tool_context.js';
18
+
19
+ export interface SerializedImage {
20
+ inferenceDetail: 'auto' | 'high' | 'low';
21
+ mimeType?: string;
22
+ base64Data?: string;
23
+ externalUrl?: string;
24
+ }
25
+
26
+ function getChannelsFromVideoBufferType(type: VideoBufferType): 3 | 4 {
27
+ switch (type) {
28
+ case VideoBufferType.RGBA:
29
+ case VideoBufferType.ABGR:
30
+ case VideoBufferType.ARGB:
31
+ case VideoBufferType.BGRA:
32
+ return 4;
33
+ case VideoBufferType.RGB24:
34
+ return 3;
35
+ default:
36
+ // YUV formats (I420, I420A, I422, I444, I010, NV12) need conversion
37
+ throw new Error(`Unsupported VideoBufferType: ${type}. Only RGB/RGBA formats are supported.`);
38
+ }
39
+ }
40
+
41
+ function ensureRGBCompatible(frame: VideoFrame): VideoFrame {
42
+ // If the frame is already in an RGB/RGBA-compatible format, return it directly
43
+ if (
44
+ frame.type === VideoBufferType.RGBA ||
45
+ frame.type === VideoBufferType.BGRA ||
46
+ frame.type === VideoBufferType.ARGB ||
47
+ frame.type === VideoBufferType.ABGR ||
48
+ frame.type === VideoBufferType.RGB24
49
+ ) {
50
+ return frame;
51
+ }
52
+
53
+ // Otherwise, attempt conversion for other formats (like YUV)
54
+ try {
55
+ return frame.convert(VideoBufferType.RGBA);
56
+ } catch (error) {
57
+ throw new Error(
58
+ `Failed to convert format ${frame.type} to RGB: ${error}. ` +
59
+ `Consider using RGB/RGBA formats or converting on the client side.`,
60
+ );
61
+ }
62
+ }
63
+
64
+ export async function serializeImage(image: ImageContent): Promise<SerializedImage> {
65
+ if (typeof image.image === 'string') {
66
+ if (image.image.startsWith('data:')) {
67
+ const [header, base64Data] = image.image.split(',', 2) as [string, string];
68
+ const headerParts = header.split(';');
69
+ const mimeParts = headerParts[0]?.split(':');
70
+ const headerMime = mimeParts?.[1];
71
+
72
+ if (!headerMime) {
73
+ throw new Error('Invalid data URL format');
74
+ }
75
+
76
+ let mimeType: string;
77
+ if (image.mimeType && image.mimeType !== headerMime) {
78
+ console.warn(
79
+ `Provided mimeType '${image.mimeType}' does not match data URL mime type '${headerMime}'. Using provided mimeType.`,
80
+ );
81
+ mimeType = image.mimeType;
82
+ } else {
83
+ mimeType = headerMime;
84
+ }
85
+
86
+ const supportedTypes = new Set(['image/jpeg', 'image/png', 'image/webp', 'image/gif']);
87
+ if (!supportedTypes.has(mimeType)) {
88
+ throw new Error(`Unsupported mimeType ${mimeType}. Must be jpeg, png, webp, or gif`);
89
+ }
90
+
91
+ return {
92
+ base64Data,
93
+ mimeType: mimeType,
94
+ inferenceDetail: image.inferenceDetail,
95
+ };
96
+ }
97
+
98
+ // External URL
99
+ return {
100
+ mimeType: image.mimeType,
101
+ inferenceDetail: image.inferenceDetail,
102
+ externalUrl: image.image,
103
+ };
104
+ } else if (image.image instanceof VideoFrame) {
105
+ const frame = ensureRGBCompatible(image.image);
106
+ const channels = getChannelsFromVideoBufferType(frame.type);
107
+
108
+ // Sharp needs to know the format of raw pixel data
109
+ let encoded = sharp(frame.data, {
110
+ raw: {
111
+ width: frame.width,
112
+ height: frame.height,
113
+ channels,
114
+ },
115
+ });
116
+
117
+ if (image.inferenceWidth && image.inferenceHeight) {
118
+ encoded = encoded.resize(image.inferenceWidth, image.inferenceHeight);
119
+ }
120
+
121
+ const base64Data = await encoded
122
+ .png()
123
+ .toBuffer()
124
+ .then((buffer) => buffer.toString('base64'));
125
+
126
+ return {
127
+ base64Data,
128
+ mimeType: 'image/png',
129
+ inferenceDetail: image.inferenceDetail,
130
+ };
131
+ } else {
132
+ throw new Error('Unsupported image type');
133
+ }
134
+ }
135
+
136
+ /** Raw OpenAI-adherent function parameters. */
137
+ export type OpenAIFunctionParameters = {
138
+ type: 'object';
139
+ properties: { [id: string]: any }; // eslint-disable-line @typescript-eslint/no-explicit-any
140
+ required: string[];
141
+ additionalProperties?: boolean;
142
+ };
143
+
144
+ // TODO(brian): remove this helper once we have the real RunContext user data
145
+ export const createToolOptions = <UserData extends UnknownUserData>(
146
+ toolCallId: string,
147
+ userData: UserData = {} as UserData,
148
+ ): ToolOptions<UserData> => {
149
+ return { ctx: { userData }, toolCallId } as unknown as ToolOptions<UserData>;
150
+ };
151
+
152
+ /** @internal */
153
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
154
+ export const oaiParams = (p: ZodObject<any>): OpenAIFunctionParameters => {
155
+ // TODO(AJS-162): make zod to JSON parsing able to handle z.optional(v.field())
156
+ const { properties, required, additionalProperties } = zodToJsonSchema(p, {
157
+ target: 'openAi',
158
+ }) as OpenAIFunctionParameters;
159
+
160
+ return {
161
+ type: 'object',
162
+ properties,
163
+ required,
164
+ additionalProperties,
165
+ };
166
+ };
167
+
168
+ /** @internal */
169
+ export const oaiBuildFunctionInfo = (
170
+ toolCtx: ToolContext,
171
+ toolCallId: string,
172
+ toolName: string,
173
+ rawArgs: string,
174
+ ): FunctionCall => {
175
+ const tool = toolCtx[toolName];
176
+ if (!tool) {
177
+ throw new Error(`AI tool ${toolName} not found`);
178
+ }
179
+
180
+ return FunctionCall.create({
181
+ callId: toolCallId,
182
+ name: toolName,
183
+ args: rawArgs,
184
+ });
185
+ };
186
+
187
+ export async function executeToolCall(
188
+ toolCall: FunctionCall,
189
+ toolCtx: ToolContext,
190
+ ): Promise<FunctionCallOutput> {
191
+ const tool = toolCtx[toolCall.name]!;
192
+ let args: object | undefined;
193
+ let params: object | undefined;
194
+
195
+ // Ensure valid JSON
196
+ try {
197
+ args = JSON.parse(toolCall.args);
198
+ } catch (error) {
199
+ return FunctionCallOutput.create({
200
+ callId: toolCall.callId,
201
+ output: `Invalid JSON: ${error}`,
202
+ isError: true,
203
+ });
204
+ }
205
+
206
+ // Ensure valid arguments schema
207
+ try {
208
+ if (tool.parameters instanceof ZodObject) {
209
+ params = tool.parameters.parse(args);
210
+ } else {
211
+ params = args;
212
+ }
213
+ } catch (error) {
214
+ return FunctionCallOutput.create({
215
+ callId: toolCall.callId,
216
+ output: `Arguments parsing failed: ${error}`,
217
+ isError: true,
218
+ });
219
+ }
220
+
221
+ try {
222
+ const result = await tool.execute(params, createToolOptions(toolCall.callId));
223
+ return FunctionCallOutput.create({
224
+ callId: toolCall.callId,
225
+ output: JSON.stringify(result),
226
+ isError: false,
227
+ });
228
+ } catch (error) {
229
+ return FunctionCallOutput.create({
230
+ callId: toolCall.callId,
231
+ output: `Tool execution failed: ${error}`,
232
+ isError: true,
233
+ });
234
+ }
235
+ }
236
+
237
+ /**
238
+ * Standard dynamic-programming LCS to get the common subsequence
239
+ * of IDs (in order) that appear in both old_ids and new_ids.
240
+ *
241
+ * @param oldIds - The old list of IDs.
242
+ * @param newIds - The new list of IDs.
243
+ * @returns The longest common subsequence of the two lists of IDs.
244
+ */
245
+ function computeLCS(oldIds: string[], newIds: string[]): string[] {
246
+ const n = oldIds.length;
247
+ const m = newIds.length;
248
+ const dp: number[][] = Array(n + 1)
249
+ .fill(null)
250
+ .map(() => Array(m + 1).fill(0));
251
+
252
+ // Fill DP table
253
+ for (let i = 1; i <= n; i++) {
254
+ for (let j = 1; j <= m; j++) {
255
+ if (oldIds[i - 1] === newIds[j - 1]) {
256
+ dp[i]![j] = dp[i - 1]![j - 1]! + 1;
257
+ } else {
258
+ dp[i]![j] = Math.max(dp[i - 1]![j]!, dp[i]![j - 1]!);
259
+ }
260
+ }
261
+ }
262
+
263
+ // Backtrack to find the actual LCS sequence
264
+ const lcsIds: string[] = [];
265
+ let i = n;
266
+ let j = m;
267
+ while (i > 0 && j > 0) {
268
+ if (oldIds[i - 1] === newIds[j - 1]) {
269
+ lcsIds.push(oldIds[i - 1]!);
270
+ i--;
271
+ j--;
272
+ } else if (dp[i - 1]![j]! > dp[i]![j - 1]!) {
273
+ i--;
274
+ } else {
275
+ j--;
276
+ }
277
+ }
278
+
279
+ return lcsIds.reverse();
280
+ }
281
+
282
+ interface DiffOps {
283
+ toRemove: string[];
284
+ toCreate: Array<[string | null, string]>; // (previous_item_id, id), if previous_item_id is null, add to the root
285
+ }
286
+
287
+ /**
288
+ * Compute the minimal list of create/remove operations to transform oldCtx into newCtx.
289
+ *
290
+ * @param oldCtx - The old chat context.
291
+ * @param newCtx - The new chat context.
292
+ * @returns The minimal list of create/remove operations to transform oldCtx into newCtx.
293
+ */
294
+ export function computeChatCtxDiff(oldCtx: ChatContext, newCtx: ChatContext): DiffOps {
295
+ const oldIds = oldCtx.items.map((item: ChatItem) => item.id);
296
+ const newIds = newCtx.items.map((item: ChatItem) => item.id);
297
+ const lcsIds = new Set(computeLCS(oldIds, newIds));
298
+
299
+ const toRemove = oldCtx.items.filter((msg) => !lcsIds.has(msg.id)).map((msg) => msg.id);
300
+ const toCreate: Array<[string | null, string]> = [];
301
+
302
+ let lastIdInSequence: string | null = null;
303
+ for (const newItem of newCtx.items) {
304
+ if (lcsIds.has(newItem.id)) {
305
+ lastIdInSequence = newItem.id;
306
+ } else {
307
+ const prevId = lastIdInSequence; // null if root
308
+ toCreate.push([prevId, newItem.id]);
309
+ lastIdInSequence = newItem.id;
310
+ }
311
+ }
312
+
313
+ return {
314
+ toRemove,
315
+ toCreate,
316
+ };
317
+ }
318
+
319
+ export function toJsonSchema(schema: ToolInputSchema<any>): JSONSchema7 {
320
+ if (schema instanceof ZodObject) {
321
+ return oaiParams(schema);
322
+ }
323
+ return schema;
324
+ }
@@ -2,123 +2,155 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
 
5
- export interface LLMMetrics {
5
+ export type AgentMetrics =
6
+ | STTMetrics
7
+ | LLMMetrics
8
+ | TTSMetrics
9
+ | VADMetrics
10
+ | EOUMetrics
11
+ | RealtimeModelMetrics;
12
+
13
+ export type LLMMetrics = {
14
+ type: 'llm_metrics';
15
+ label: string;
6
16
  requestId: string;
7
17
  timestamp: number;
8
- ttft: number;
9
18
  duration: number;
10
- label: string;
19
+ ttft: number;
11
20
  cancelled: boolean;
12
21
  completionTokens: number;
13
22
  promptTokens: number;
23
+ promptCachedTokens: number;
14
24
  totalTokens: number;
15
25
  tokensPerSecond: number;
16
- error?: Error;
17
- }
26
+ speechId?: string;
27
+ };
18
28
 
19
- export interface STTMetrics {
29
+ export type STTMetrics = {
30
+ type: 'stt_metrics';
31
+ label: string;
20
32
  requestId: string;
21
33
  timestamp: number;
34
+ /**
35
+ * The request duration in seconds, 0.0 if the STT is streaming.
36
+ */
22
37
  duration: number;
23
- label: string;
38
+ /**
39
+ * The duration of the pushed audio in seconds.
40
+ */
24
41
  audioDuration: number;
42
+ /**
43
+ * Whether the STT is streaming (e.g using websocket).
44
+ */
25
45
  streamed: boolean;
26
- error?: Error;
27
- }
46
+ };
28
47
 
29
- export interface TTSMetrics {
48
+ export type TTSMetrics = {
49
+ type: 'tts_metrics';
50
+ label: string;
30
51
  requestId: string;
31
52
  timestamp: number;
32
53
  ttfb: number;
33
54
  duration: number;
34
- label: string;
35
55
  audioDuration: number;
36
56
  cancelled: boolean;
37
57
  charactersCount: number;
38
58
  streamed: boolean;
39
- error?: Error;
40
- }
59
+ segmentId?: string;
60
+ speechId?: string;
61
+ };
41
62
 
42
- export interface VADMetrics {
63
+ export type VADMetrics = {
64
+ type: 'vad_metrics';
65
+ label: string;
43
66
  timestamp: number;
44
67
  idleTime: number;
45
68
  inferenceDurationTotal: number;
46
69
  inferenceCount: number;
47
- label: string;
48
- }
70
+ };
49
71
 
50
- export interface PipelineEOUMetrics {
72
+ export type EOUMetrics = {
73
+ type: 'eou_metrics';
74
+ timestamp: number;
51
75
  /**
52
- * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
76
+ * Amount of time between the end of speech from VAD and the decision to end the user's turn.
77
+ * Set to 0.0 if the end of speech was not detected.
53
78
  */
54
- sequenceId: string;
55
- /** Timestamp of when the event was recorded */
56
- timestamp: number;
57
- /** Amount of time between the end of speech from VAD and the decision to end the user's turn */
58
79
  endOfUtteranceDelay: number;
59
80
  /**
60
81
  * Time taken to obtain the transcript after the end of the user's speech.
61
- *
62
- * @remarks
63
- * May be 0 if the transcript was already available.
82
+ * Set to 0.0 if the end of speech was not detected.
64
83
  */
65
84
  transcriptionDelay: number;
66
- }
67
-
68
- export interface PipelineLLMMetrics extends LLMMetrics {
69
- /**
70
- * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
71
- */
72
- sequenceId: string;
73
- }
74
-
75
- export interface PipelineTTSMetrics extends TTSMetrics {
76
85
  /**
77
- * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics
86
+ * Time taken to invoke the user's `Agent.onUserTurnCompleted` callback.
78
87
  */
79
- sequenceId: string;
80
- }
88
+ onUserTurnCompletedDelay: number;
89
+ speechId?: string;
90
+ };
81
91
 
82
- export type PipelineSTTMetrics = STTMetrics;
83
- export type PipelineVADMetrics = VADMetrics;
92
+ export type RealtimeModelMetricsCachedTokenDetails = {
93
+ audioTokens: number;
94
+ textTokens: number;
95
+ imageTokens: number;
96
+ };
84
97
 
85
- export class MultimodalLLMError extends Error {
86
- type?: string;
87
- reason?: string;
88
- code?: string;
89
- constructor({
90
- type,
91
- reason,
92
- code,
93
- message,
94
- }: { type?: string; reason?: string; code?: string; message?: string } = {}) {
95
- super(message);
96
- this.type = type;
97
- this.reason = reason;
98
- this.code = code;
99
- }
100
- }
98
+ export type RealtimeModelMetricsInputTokenDetails = {
99
+ audioTokens: number;
100
+ textTokens: number;
101
+ imageTokens: number;
102
+ cachedTokens: number;
103
+ cachedTokensDetails?: RealtimeModelMetricsCachedTokenDetails;
104
+ };
101
105
 
102
- export interface MultimodalLLMMetrics extends LLMMetrics {
103
- inputTokenDetails: {
104
- cachedTokens: number;
105
- textTokens: number;
106
- audioTokens: number;
107
- };
108
- outputTokenDetails: {
109
- textTokens: number;
110
- audioTokens: number;
111
- };
112
- }
106
+ export type RealtimeModelMetricsOutputTokenDetails = {
107
+ textTokens: number;
108
+ audioTokens: number;
109
+ imageTokens: number;
110
+ };
113
111
 
114
- export type AgentMetrics =
115
- | STTMetrics
116
- | LLMMetrics
117
- | TTSMetrics
118
- | VADMetrics
119
- | PipelineSTTMetrics
120
- | PipelineEOUMetrics
121
- | PipelineLLMMetrics
122
- | PipelineTTSMetrics
123
- | PipelineVADMetrics
124
- | MultimodalLLMMetrics;
112
+ export type RealtimeModelMetrics = {
113
+ type: 'realtime_model_metrics';
114
+ label: string;
115
+ requestId: string;
116
+ /**
117
+ * The timestamp of the response creation.
118
+ */
119
+ timestamp: number;
120
+ /**
121
+ * The duration of the response from created to done in seconds.
122
+ */
123
+ duration: number;
124
+ /**
125
+ * Time to first audio token in seconds. -1 if no audio token was sent.
126
+ */
127
+ ttft: number;
128
+ /**
129
+ * Whether the request was cancelled.
130
+ */
131
+ cancelled: boolean;
132
+ /**
133
+ * The number of input tokens used in the Response, including text and audio tokens.
134
+ */
135
+ inputTokens: number;
136
+ /**
137
+ * The number of output tokens sent in the Response, including text and audio tokens.
138
+ */
139
+ outputTokens: number;
140
+ /**
141
+ * The total number of tokens in the Response.
142
+ */
143
+ totalTokens: number;
144
+ /**
145
+ * The number of tokens per second.
146
+ */
147
+ tokensPerSecond: number;
148
+ /**
149
+ * Details about the input tokens used in the Response.
150
+ */
151
+ inputTokenDetails: RealtimeModelMetricsInputTokenDetails;
152
+ /**
153
+ * Details about the output tokens used in the Response.
154
+ */
155
+ outputTokenDetails: RealtimeModelMetricsOutputTokenDetails;
156
+ };
@@ -4,17 +4,11 @@
4
4
 
5
5
  export type {
6
6
  AgentMetrics,
7
- STTMetrics,
8
7
  LLMMetrics,
8
+ RealtimeModelMetrics,
9
+ STTMetrics,
9
10
  TTSMetrics,
10
11
  VADMetrics,
11
- PipelineSTTMetrics,
12
- PipelineEOUMetrics,
13
- PipelineLLMMetrics,
14
- PipelineTTSMetrics,
15
- PipelineVADMetrics,
16
- MultimodalLLMMetrics,
17
12
  } from './base.js';
18
- export { MultimodalLLMError } from './base.js';
19
- export { type UsageSummary, UsageCollector } from './usage_collector.js';
13
+ export { UsageCollector, type UsageSummary } from './usage_collector.js';
20
14
  export { logMetrics } from './utils.js';
@@ -2,39 +2,45 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { AgentMetrics } from './base.js';
5
- import { isLLMMetrics, isSTTMetrics, isTTSMetrics } from './utils.js';
6
5
 
7
6
  export interface UsageSummary {
8
7
  llmPromptTokens: number;
8
+ llmPromptCachedTokens: number;
9
9
  llmCompletionTokens: number;
10
10
  ttsCharactersCount: number;
11
11
  sttAudioDuration: number;
12
12
  }
13
13
 
14
14
  export class UsageCollector {
15
- #summary: UsageSummary;
15
+ private summary: UsageSummary;
16
16
 
17
17
  constructor() {
18
- this.#summary = {
18
+ this.summary = {
19
19
  llmPromptTokens: 0,
20
+ llmPromptCachedTokens: 0,
20
21
  llmCompletionTokens: 0,
21
22
  ttsCharactersCount: 0,
22
23
  sttAudioDuration: 0,
23
24
  };
24
25
  }
25
26
 
26
- collect(metrics: AgentMetrics) {
27
- if (isLLMMetrics(metrics)) {
28
- this.#summary.llmPromptTokens += metrics.promptTokens;
29
- this.#summary.llmCompletionTokens += metrics.completionTokens;
30
- } else if (isTTSMetrics(metrics)) {
31
- this.#summary.ttsCharactersCount += metrics.charactersCount;
32
- } else if (isSTTMetrics(metrics)) {
33
- this.#summary.sttAudioDuration += metrics.audioDuration;
27
+ collect(metrics: AgentMetrics): void {
28
+ if (metrics.type === 'llm_metrics') {
29
+ this.summary.llmPromptTokens += metrics.promptTokens;
30
+ this.summary.llmPromptCachedTokens += metrics.promptCachedTokens;
31
+ this.summary.llmCompletionTokens += metrics.completionTokens;
32
+ } else if (metrics.type === 'realtime_model_metrics') {
33
+ this.summary.llmPromptTokens += metrics.inputTokens;
34
+ this.summary.llmPromptCachedTokens += metrics.inputTokenDetails.cachedTokens;
35
+ this.summary.llmCompletionTokens += metrics.outputTokens;
36
+ } else if (metrics.type === 'tts_metrics') {
37
+ this.summary.ttsCharactersCount += metrics.charactersCount;
38
+ } else if (metrics.type === 'stt_metrics') {
39
+ this.summary.sttAudioDuration += metrics.audioDuration;
34
40
  }
35
41
  }
36
42
 
37
- get summary(): UsageSummary {
38
- return { ...this.#summary };
43
+ getSummary(): UsageSummary {
44
+ return { ...this.summary };
39
45
  }
40
46
  }