@livekit/agents 0.7.8 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (627) hide show
  1. package/dist/_exceptions.cjs +109 -0
  2. package/dist/_exceptions.cjs.map +1 -0
  3. package/dist/_exceptions.d.cts +64 -0
  4. package/dist/_exceptions.d.ts +64 -0
  5. package/dist/_exceptions.d.ts.map +1 -0
  6. package/dist/_exceptions.js +80 -0
  7. package/dist/_exceptions.js.map +1 -0
  8. package/dist/audio.cjs +10 -3
  9. package/dist/audio.cjs.map +1 -1
  10. package/dist/audio.d.cts +2 -0
  11. package/dist/audio.d.ts +2 -0
  12. package/dist/audio.d.ts.map +1 -1
  13. package/dist/audio.js +8 -2
  14. package/dist/audio.js.map +1 -1
  15. package/dist/cli.cjs +25 -0
  16. package/dist/cli.cjs.map +1 -1
  17. package/dist/cli.d.ts.map +1 -1
  18. package/dist/cli.js +25 -0
  19. package/dist/cli.js.map +1 -1
  20. package/dist/constants.cjs +6 -0
  21. package/dist/constants.cjs.map +1 -1
  22. package/dist/constants.d.cts +2 -0
  23. package/dist/constants.d.ts +2 -0
  24. package/dist/constants.d.ts.map +1 -1
  25. package/dist/constants.js +4 -0
  26. package/dist/constants.js.map +1 -1
  27. package/dist/http_server.cjs.map +1 -1
  28. package/dist/http_server.d.cts +1 -0
  29. package/dist/http_server.d.ts +1 -0
  30. package/dist/http_server.d.ts.map +1 -1
  31. package/dist/http_server.js.map +1 -1
  32. package/dist/index.cjs +27 -20
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +13 -10
  35. package/dist/index.d.ts +13 -10
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +15 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/inference_runner.cjs +0 -1
  40. package/dist/inference_runner.cjs.map +1 -1
  41. package/dist/inference_runner.d.cts +2 -3
  42. package/dist/inference_runner.d.ts +2 -3
  43. package/dist/inference_runner.d.ts.map +1 -1
  44. package/dist/inference_runner.js +0 -1
  45. package/dist/inference_runner.js.map +1 -1
  46. package/dist/ipc/inference_proc_executor.cjs +2 -2
  47. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  48. package/dist/ipc/inference_proc_executor.js +2 -2
  49. package/dist/ipc/inference_proc_executor.js.map +1 -1
  50. package/dist/ipc/job_executor.cjs.map +1 -1
  51. package/dist/ipc/job_executor.js.map +1 -1
  52. package/dist/ipc/job_proc_executor.cjs +1 -0
  53. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  54. package/dist/ipc/job_proc_executor.js +1 -0
  55. package/dist/ipc/job_proc_executor.js.map +1 -1
  56. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  57. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  58. package/dist/ipc/job_proc_lazy_main.js +1 -1
  59. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  60. package/dist/ipc/supervised_proc.d.cts +1 -1
  61. package/dist/ipc/supervised_proc.d.ts +1 -1
  62. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  63. package/dist/job.cjs +14 -2
  64. package/dist/job.cjs.map +1 -1
  65. package/dist/job.d.cts +8 -0
  66. package/dist/job.d.ts +8 -0
  67. package/dist/job.d.ts.map +1 -1
  68. package/dist/job.js +12 -1
  69. package/dist/job.js.map +1 -1
  70. package/dist/llm/chat_context.cjs +332 -82
  71. package/dist/llm/chat_context.cjs.map +1 -1
  72. package/dist/llm/chat_context.d.cts +152 -48
  73. package/dist/llm/chat_context.d.ts +152 -48
  74. package/dist/llm/chat_context.d.ts.map +1 -1
  75. package/dist/llm/chat_context.js +327 -81
  76. package/dist/llm/chat_context.js.map +1 -1
  77. package/dist/llm/chat_context.test.cjs +380 -0
  78. package/dist/llm/chat_context.test.cjs.map +1 -0
  79. package/dist/llm/chat_context.test.js +385 -0
  80. package/dist/llm/chat_context.test.js.map +1 -0
  81. package/dist/llm/index.cjs +37 -8
  82. package/dist/llm/index.cjs.map +1 -1
  83. package/dist/llm/index.d.cts +7 -3
  84. package/dist/llm/index.d.ts +7 -3
  85. package/dist/llm/index.d.ts.map +1 -1
  86. package/dist/llm/index.js +39 -9
  87. package/dist/llm/index.js.map +1 -1
  88. package/dist/llm/llm.cjs +98 -33
  89. package/dist/llm/llm.cjs.map +1 -1
  90. package/dist/llm/llm.d.cts +50 -24
  91. package/dist/llm/llm.d.ts +50 -24
  92. package/dist/llm/llm.d.ts.map +1 -1
  93. package/dist/llm/llm.js +99 -33
  94. package/dist/llm/llm.js.map +1 -1
  95. package/dist/llm/provider_format/google.cjs +128 -0
  96. package/dist/llm/provider_format/google.cjs.map +1 -0
  97. package/dist/llm/provider_format/google.d.cts +6 -0
  98. package/dist/llm/provider_format/google.d.ts +6 -0
  99. package/dist/llm/provider_format/google.d.ts.map +1 -0
  100. package/dist/llm/provider_format/google.js +104 -0
  101. package/dist/llm/provider_format/google.js.map +1 -0
  102. package/dist/llm/provider_format/google.test.cjs +676 -0
  103. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  104. package/dist/llm/provider_format/google.test.js +675 -0
  105. package/dist/llm/provider_format/google.test.js.map +1 -0
  106. package/dist/llm/provider_format/index.cjs +40 -0
  107. package/dist/llm/provider_format/index.cjs.map +1 -0
  108. package/dist/llm/provider_format/index.d.cts +4 -0
  109. package/dist/llm/provider_format/index.d.ts +4 -0
  110. package/dist/llm/provider_format/index.d.ts.map +1 -0
  111. package/dist/llm/provider_format/index.js +16 -0
  112. package/dist/llm/provider_format/index.js.map +1 -0
  113. package/dist/llm/provider_format/openai.cjs +116 -0
  114. package/dist/llm/provider_format/openai.cjs.map +1 -0
  115. package/dist/llm/provider_format/openai.d.cts +3 -0
  116. package/dist/llm/provider_format/openai.d.ts +3 -0
  117. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  118. package/dist/llm/provider_format/openai.js +92 -0
  119. package/dist/llm/provider_format/openai.js.map +1 -0
  120. package/dist/llm/provider_format/openai.test.cjs +490 -0
  121. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  122. package/dist/llm/provider_format/openai.test.js +489 -0
  123. package/dist/llm/provider_format/openai.test.js.map +1 -0
  124. package/dist/llm/provider_format/utils.cjs +146 -0
  125. package/dist/llm/provider_format/utils.cjs.map +1 -0
  126. package/dist/llm/provider_format/utils.d.cts +38 -0
  127. package/dist/llm/provider_format/utils.d.ts +38 -0
  128. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  129. package/dist/llm/provider_format/utils.js +122 -0
  130. package/dist/llm/provider_format/utils.js.map +1 -0
  131. package/dist/llm/realtime.cjs +77 -0
  132. package/dist/llm/realtime.cjs.map +1 -0
  133. package/dist/llm/realtime.d.cts +98 -0
  134. package/dist/llm/realtime.d.ts +98 -0
  135. package/dist/llm/realtime.d.ts.map +1 -0
  136. package/dist/llm/realtime.js +52 -0
  137. package/dist/llm/realtime.js.map +1 -0
  138. package/dist/llm/remote_chat_context.cjs +112 -0
  139. package/dist/llm/remote_chat_context.cjs.map +1 -0
  140. package/dist/llm/remote_chat_context.d.cts +23 -0
  141. package/dist/llm/remote_chat_context.d.ts +23 -0
  142. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  143. package/dist/llm/remote_chat_context.js +88 -0
  144. package/dist/llm/remote_chat_context.js.map +1 -0
  145. package/dist/llm/remote_chat_context.test.cjs +225 -0
  146. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  147. package/dist/llm/remote_chat_context.test.js +224 -0
  148. package/dist/llm/remote_chat_context.test.js.map +1 -0
  149. package/dist/llm/tool_context.cjs +111 -0
  150. package/dist/llm/tool_context.cjs.map +1 -0
  151. package/dist/llm/tool_context.d.cts +125 -0
  152. package/dist/llm/tool_context.d.ts +125 -0
  153. package/dist/llm/tool_context.d.ts.map +1 -0
  154. package/dist/llm/tool_context.js +80 -0
  155. package/dist/llm/tool_context.js.map +1 -0
  156. package/dist/llm/tool_context.test.cjs +162 -0
  157. package/dist/llm/tool_context.test.cjs.map +1 -0
  158. package/dist/llm/tool_context.test.js +161 -0
  159. package/dist/llm/tool_context.test.js.map +1 -0
  160. package/dist/llm/tool_context.type.test.cjs +92 -0
  161. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  162. package/dist/llm/tool_context.type.test.js +91 -0
  163. package/dist/llm/tool_context.type.test.js.map +1 -0
  164. package/dist/llm/utils.cjs +260 -0
  165. package/dist/llm/utils.cjs.map +1 -0
  166. package/dist/llm/utils.d.cts +42 -0
  167. package/dist/llm/utils.d.ts +42 -0
  168. package/dist/llm/utils.d.ts.map +1 -0
  169. package/dist/llm/utils.js +223 -0
  170. package/dist/llm/utils.js.map +1 -0
  171. package/dist/llm/utils.test.cjs +513 -0
  172. package/dist/llm/utils.test.cjs.map +1 -0
  173. package/dist/llm/utils.test.js +490 -0
  174. package/dist/llm/utils.test.js.map +1 -0
  175. package/dist/metrics/base.cjs +0 -27
  176. package/dist/metrics/base.cjs.map +1 -1
  177. package/dist/metrics/base.d.cts +105 -63
  178. package/dist/metrics/base.d.ts +105 -63
  179. package/dist/metrics/base.d.ts.map +1 -1
  180. package/dist/metrics/base.js +0 -19
  181. package/dist/metrics/base.js.map +1 -1
  182. package/dist/metrics/index.cjs +0 -3
  183. package/dist/metrics/index.cjs.map +1 -1
  184. package/dist/metrics/index.d.cts +2 -3
  185. package/dist/metrics/index.d.ts +2 -3
  186. package/dist/metrics/index.d.ts.map +1 -1
  187. package/dist/metrics/index.js +0 -2
  188. package/dist/metrics/index.js.map +1 -1
  189. package/dist/metrics/usage_collector.cjs +17 -12
  190. package/dist/metrics/usage_collector.cjs.map +1 -1
  191. package/dist/metrics/usage_collector.d.cts +3 -2
  192. package/dist/metrics/usage_collector.d.ts +3 -2
  193. package/dist/metrics/usage_collector.d.ts.map +1 -1
  194. package/dist/metrics/usage_collector.js +17 -12
  195. package/dist/metrics/usage_collector.js.map +1 -1
  196. package/dist/metrics/utils.cjs +22 -59
  197. package/dist/metrics/utils.cjs.map +1 -1
  198. package/dist/metrics/utils.d.cts +1 -8
  199. package/dist/metrics/utils.d.ts +1 -8
  200. package/dist/metrics/utils.d.ts.map +1 -1
  201. package/dist/metrics/utils.js +22 -52
  202. package/dist/metrics/utils.js.map +1 -1
  203. package/dist/multimodal/index.cjs +0 -2
  204. package/dist/multimodal/index.cjs.map +1 -1
  205. package/dist/multimodal/index.d.cts +0 -1
  206. package/dist/multimodal/index.d.ts +0 -1
  207. package/dist/multimodal/index.d.ts.map +1 -1
  208. package/dist/multimodal/index.js +0 -1
  209. package/dist/multimodal/index.js.map +1 -1
  210. package/dist/plugin.cjs +24 -8
  211. package/dist/plugin.cjs.map +1 -1
  212. package/dist/plugin.d.cts +18 -4
  213. package/dist/plugin.d.ts +18 -4
  214. package/dist/plugin.d.ts.map +1 -1
  215. package/dist/plugin.js +22 -7
  216. package/dist/plugin.js.map +1 -1
  217. package/dist/stream/deferred_stream.cjs +98 -0
  218. package/dist/stream/deferred_stream.cjs.map +1 -0
  219. package/dist/stream/deferred_stream.d.cts +27 -0
  220. package/dist/stream/deferred_stream.d.ts +27 -0
  221. package/dist/stream/deferred_stream.d.ts.map +1 -0
  222. package/dist/stream/deferred_stream.js +73 -0
  223. package/dist/stream/deferred_stream.js.map +1 -0
  224. package/dist/stream/deferred_stream.test.cjs +527 -0
  225. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  226. package/dist/stream/deferred_stream.test.js +526 -0
  227. package/dist/stream/deferred_stream.test.js.map +1 -0
  228. package/dist/stream/identity_transform.cjs +42 -0
  229. package/dist/stream/identity_transform.cjs.map +1 -0
  230. package/dist/stream/identity_transform.d.cts +6 -0
  231. package/dist/stream/identity_transform.d.ts +6 -0
  232. package/dist/stream/identity_transform.d.ts.map +1 -0
  233. package/dist/stream/identity_transform.js +18 -0
  234. package/dist/stream/identity_transform.js.map +1 -0
  235. package/dist/stream/identity_transform.test.cjs +125 -0
  236. package/dist/stream/identity_transform.test.cjs.map +1 -0
  237. package/dist/stream/identity_transform.test.js +124 -0
  238. package/dist/stream/identity_transform.test.js.map +1 -0
  239. package/dist/stream/index.cjs +38 -0
  240. package/dist/stream/index.cjs.map +1 -0
  241. package/dist/stream/index.d.cts +5 -0
  242. package/dist/stream/index.d.ts +5 -0
  243. package/dist/stream/index.d.ts.map +1 -0
  244. package/dist/stream/index.js +11 -0
  245. package/dist/stream/index.js.map +1 -0
  246. package/dist/stream/merge_readable_streams.cjs +59 -0
  247. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  248. package/dist/stream/merge_readable_streams.d.cts +4 -0
  249. package/dist/stream/merge_readable_streams.d.ts +4 -0
  250. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  251. package/dist/stream/merge_readable_streams.js +35 -0
  252. package/dist/stream/merge_readable_streams.js.map +1 -0
  253. package/dist/stream/stream_channel.cjs +47 -0
  254. package/dist/stream/stream_channel.cjs.map +1 -0
  255. package/dist/stream/stream_channel.d.cts +9 -0
  256. package/dist/stream/stream_channel.d.ts +9 -0
  257. package/dist/stream/stream_channel.d.ts.map +1 -0
  258. package/dist/stream/stream_channel.js +23 -0
  259. package/dist/stream/stream_channel.js.map +1 -0
  260. package/dist/stream/stream_channel.test.cjs +97 -0
  261. package/dist/stream/stream_channel.test.cjs.map +1 -0
  262. package/dist/stream/stream_channel.test.js +96 -0
  263. package/dist/stream/stream_channel.test.js.map +1 -0
  264. package/dist/stt/stream_adapter.cjs +3 -4
  265. package/dist/stt/stream_adapter.cjs.map +1 -1
  266. package/dist/stt/stream_adapter.d.cts +1 -0
  267. package/dist/stt/stream_adapter.d.ts +1 -0
  268. package/dist/stt/stream_adapter.d.ts.map +1 -1
  269. package/dist/stt/stream_adapter.js +3 -4
  270. package/dist/stt/stream_adapter.js.map +1 -1
  271. package/dist/stt/stt.cjs +101 -10
  272. package/dist/stt/stt.cjs.map +1 -1
  273. package/dist/stt/stt.d.cts +26 -5
  274. package/dist/stt/stt.d.ts +26 -5
  275. package/dist/stt/stt.d.ts.map +1 -1
  276. package/dist/stt/stt.js +102 -11
  277. package/dist/stt/stt.js.map +1 -1
  278. package/dist/tokenize/basic/basic.cjs +10 -5
  279. package/dist/tokenize/basic/basic.cjs.map +1 -1
  280. package/dist/tokenize/basic/basic.d.cts +7 -1
  281. package/dist/tokenize/basic/basic.d.ts +7 -1
  282. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  283. package/dist/tokenize/basic/basic.js +10 -5
  284. package/dist/tokenize/basic/basic.js.map +1 -1
  285. package/dist/tokenize/basic/sentence.cjs +14 -6
  286. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  287. package/dist/tokenize/basic/sentence.d.cts +1 -1
  288. package/dist/tokenize/basic/sentence.d.ts +1 -1
  289. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  290. package/dist/tokenize/basic/sentence.js +14 -6
  291. package/dist/tokenize/basic/sentence.js.map +1 -1
  292. package/dist/tokenize/token_stream.cjs +5 -3
  293. package/dist/tokenize/token_stream.cjs.map +1 -1
  294. package/dist/tokenize/token_stream.d.cts +1 -0
  295. package/dist/tokenize/token_stream.d.ts +1 -0
  296. package/dist/tokenize/token_stream.d.ts.map +1 -1
  297. package/dist/tokenize/token_stream.js +6 -4
  298. package/dist/tokenize/token_stream.js.map +1 -1
  299. package/dist/transcription.cjs +1 -2
  300. package/dist/transcription.cjs.map +1 -1
  301. package/dist/transcription.d.ts.map +1 -1
  302. package/dist/transcription.js +2 -3
  303. package/dist/transcription.js.map +1 -1
  304. package/dist/tts/index.cjs +2 -4
  305. package/dist/tts/index.cjs.map +1 -1
  306. package/dist/tts/index.d.cts +1 -1
  307. package/dist/tts/index.d.ts +1 -1
  308. package/dist/tts/index.d.ts.map +1 -1
  309. package/dist/tts/index.js +1 -3
  310. package/dist/tts/index.js.map +1 -1
  311. package/dist/tts/stream_adapter.cjs +26 -13
  312. package/dist/tts/stream_adapter.cjs.map +1 -1
  313. package/dist/tts/stream_adapter.d.cts +1 -1
  314. package/dist/tts/stream_adapter.d.ts +1 -1
  315. package/dist/tts/stream_adapter.d.ts.map +1 -1
  316. package/dist/tts/stream_adapter.js +27 -14
  317. package/dist/tts/stream_adapter.js.map +1 -1
  318. package/dist/tts/tts.cjs +157 -25
  319. package/dist/tts/tts.cjs.map +1 -1
  320. package/dist/tts/tts.d.cts +29 -5
  321. package/dist/tts/tts.d.ts +29 -5
  322. package/dist/tts/tts.d.ts.map +1 -1
  323. package/dist/tts/tts.js +157 -24
  324. package/dist/tts/tts.js.map +1 -1
  325. package/dist/types.cjs +60 -0
  326. package/dist/types.cjs.map +1 -0
  327. package/dist/types.d.cts +13 -0
  328. package/dist/types.d.ts +13 -0
  329. package/dist/types.d.ts.map +1 -0
  330. package/dist/types.js +35 -0
  331. package/dist/types.js.map +1 -0
  332. package/dist/utils.cjs +281 -27
  333. package/dist/utils.cjs.map +1 -1
  334. package/dist/utils.d.cts +134 -9
  335. package/dist/utils.d.ts +134 -9
  336. package/dist/utils.d.ts.map +1 -1
  337. package/dist/utils.js +265 -26
  338. package/dist/utils.js.map +1 -1
  339. package/dist/utils.test.cjs +492 -0
  340. package/dist/utils.test.cjs.map +1 -0
  341. package/dist/utils.test.js +498 -0
  342. package/dist/utils.test.js.map +1 -0
  343. package/dist/vad.cjs +76 -20
  344. package/dist/vad.cjs.map +1 -1
  345. package/dist/vad.d.cts +25 -5
  346. package/dist/vad.d.ts +25 -5
  347. package/dist/vad.d.ts.map +1 -1
  348. package/dist/vad.js +76 -20
  349. package/dist/vad.js.map +1 -1
  350. package/dist/voice/agent.cjs +245 -0
  351. package/dist/voice/agent.cjs.map +1 -0
  352. package/dist/voice/agent.d.cts +78 -0
  353. package/dist/voice/agent.d.ts +78 -0
  354. package/dist/voice/agent.d.ts.map +1 -0
  355. package/dist/voice/agent.js +220 -0
  356. package/dist/voice/agent.js.map +1 -0
  357. package/dist/voice/agent.test.cjs +61 -0
  358. package/dist/voice/agent.test.cjs.map +1 -0
  359. package/dist/voice/agent.test.js +60 -0
  360. package/dist/voice/agent.test.js.map +1 -0
  361. package/dist/voice/agent_activity.cjs +1453 -0
  362. package/dist/voice/agent_activity.cjs.map +1 -0
  363. package/dist/voice/agent_activity.d.cts +94 -0
  364. package/dist/voice/agent_activity.d.ts +94 -0
  365. package/dist/voice/agent_activity.d.ts.map +1 -0
  366. package/dist/voice/agent_activity.js +1449 -0
  367. package/dist/voice/agent_activity.js.map +1 -0
  368. package/dist/voice/agent_session.cjs +312 -0
  369. package/dist/voice/agent_session.cjs.map +1 -0
  370. package/dist/voice/agent_session.d.cts +121 -0
  371. package/dist/voice/agent_session.d.ts +121 -0
  372. package/dist/voice/agent_session.d.ts.map +1 -0
  373. package/dist/voice/agent_session.js +295 -0
  374. package/dist/voice/agent_session.js.map +1 -0
  375. package/dist/voice/audio_recognition.cjs +375 -0
  376. package/dist/voice/audio_recognition.cjs.map +1 -0
  377. package/dist/voice/audio_recognition.d.cts +80 -0
  378. package/dist/voice/audio_recognition.d.ts +80 -0
  379. package/dist/voice/audio_recognition.d.ts.map +1 -0
  380. package/dist/voice/audio_recognition.js +351 -0
  381. package/dist/voice/audio_recognition.js.map +1 -0
  382. package/dist/voice/events.cjs +145 -0
  383. package/dist/voice/events.cjs.map +1 -0
  384. package/dist/voice/events.d.cts +124 -0
  385. package/dist/voice/events.d.ts +124 -0
  386. package/dist/voice/events.d.ts.map +1 -0
  387. package/dist/voice/events.js +110 -0
  388. package/dist/voice/events.js.map +1 -0
  389. package/dist/voice/generation.cjs +700 -0
  390. package/dist/voice/generation.cjs.map +1 -0
  391. package/dist/voice/generation.d.cts +115 -0
  392. package/dist/voice/generation.d.ts +115 -0
  393. package/dist/voice/generation.d.ts.map +1 -0
  394. package/dist/voice/generation.js +672 -0
  395. package/dist/voice/generation.js.map +1 -0
  396. package/dist/voice/index.cjs +40 -0
  397. package/dist/voice/index.cjs.map +1 -0
  398. package/dist/voice/index.d.cts +5 -0
  399. package/dist/voice/index.d.ts +5 -0
  400. package/dist/voice/index.d.ts.map +1 -0
  401. package/dist/voice/index.js +11 -0
  402. package/dist/voice/index.js.map +1 -0
  403. package/dist/voice/io.cjs +245 -0
  404. package/dist/voice/io.cjs.map +1 -0
  405. package/dist/voice/io.d.cts +101 -0
  406. package/dist/voice/io.d.ts +101 -0
  407. package/dist/voice/io.d.ts.map +1 -0
  408. package/dist/voice/io.js +217 -0
  409. package/dist/voice/io.js.map +1 -0
  410. package/dist/voice/room_io/_input.cjs +121 -0
  411. package/dist/voice/room_io/_input.cjs.map +1 -0
  412. package/dist/voice/room_io/_input.d.cts +24 -0
  413. package/dist/voice/room_io/_input.d.ts +24 -0
  414. package/dist/voice/room_io/_input.d.ts.map +1 -0
  415. package/dist/voice/room_io/_input.js +102 -0
  416. package/dist/voice/room_io/_input.js.map +1 -0
  417. package/dist/voice/room_io/_output.cjs +358 -0
  418. package/dist/voice/room_io/_output.cjs.map +1 -0
  419. package/dist/voice/room_io/_output.d.cts +75 -0
  420. package/dist/voice/room_io/_output.d.ts +75 -0
  421. package/dist/voice/room_io/_output.d.ts.map +1 -0
  422. package/dist/voice/room_io/_output.js +342 -0
  423. package/dist/voice/room_io/_output.js.map +1 -0
  424. package/dist/voice/room_io/index.cjs +25 -0
  425. package/dist/voice/room_io/index.cjs.map +1 -0
  426. package/dist/voice/room_io/index.d.cts +3 -0
  427. package/dist/voice/room_io/index.d.ts +3 -0
  428. package/dist/voice/room_io/index.d.ts.map +1 -0
  429. package/dist/voice/room_io/index.js +3 -0
  430. package/dist/voice/room_io/index.js.map +1 -0
  431. package/dist/voice/room_io/room_io.cjs +370 -0
  432. package/dist/voice/room_io/room_io.cjs.map +1 -0
  433. package/dist/voice/room_io/room_io.d.cts +73 -0
  434. package/dist/voice/room_io/room_io.d.ts +73 -0
  435. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  436. package/dist/voice/room_io/room_io.js +361 -0
  437. package/dist/voice/room_io/room_io.js.map +1 -0
  438. package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
  439. package/dist/voice/run_context.cjs.map +1 -0
  440. package/dist/voice/run_context.d.cts +12 -0
  441. package/dist/voice/run_context.d.ts +12 -0
  442. package/dist/voice/run_context.d.ts.map +1 -0
  443. package/dist/voice/run_context.js +14 -0
  444. package/dist/voice/run_context.js.map +1 -0
  445. package/dist/voice/speech_handle.cjs +105 -0
  446. package/dist/voice/speech_handle.cjs.map +1 -0
  447. package/dist/voice/speech_handle.d.cts +46 -0
  448. package/dist/voice/speech_handle.d.ts +46 -0
  449. package/dist/voice/speech_handle.d.ts.map +1 -0
  450. package/dist/voice/speech_handle.js +81 -0
  451. package/dist/voice/speech_handle.js.map +1 -0
  452. package/dist/voice/transcription/_utils.cjs +45 -0
  453. package/dist/voice/transcription/_utils.cjs.map +1 -0
  454. package/dist/voice/transcription/_utils.d.cts +3 -0
  455. package/dist/voice/transcription/_utils.d.ts +3 -0
  456. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  457. package/dist/voice/transcription/_utils.js +21 -0
  458. package/dist/voice/transcription/_utils.js.map +1 -0
  459. package/dist/voice/transcription/index.cjs +23 -0
  460. package/dist/voice/transcription/index.cjs.map +1 -0
  461. package/dist/voice/transcription/index.d.cts +2 -0
  462. package/dist/voice/transcription/index.d.ts +2 -0
  463. package/dist/voice/transcription/index.d.ts.map +1 -0
  464. package/dist/voice/transcription/index.js +2 -0
  465. package/dist/voice/transcription/index.js.map +1 -0
  466. package/dist/voice/transcription/synchronizer.cjs +380 -0
  467. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  468. package/dist/voice/transcription/synchronizer.d.cts +86 -0
  469. package/dist/voice/transcription/synchronizer.d.ts +86 -0
  470. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  471. package/dist/voice/transcription/synchronizer.js +355 -0
  472. package/dist/voice/transcription/synchronizer.js.map +1 -0
  473. package/dist/worker.cjs +22 -4
  474. package/dist/worker.cjs.map +1 -1
  475. package/dist/worker.d.cts +1 -1
  476. package/dist/worker.d.ts +1 -1
  477. package/dist/worker.d.ts.map +1 -1
  478. package/dist/worker.js +22 -4
  479. package/dist/worker.js.map +1 -1
  480. package/package.json +9 -2
  481. package/src/_exceptions.ts +137 -0
  482. package/src/audio.ts +12 -1
  483. package/src/cli.ts +37 -0
  484. package/src/constants.ts +2 -0
  485. package/src/http_server.ts +1 -0
  486. package/src/index.ts +13 -10
  487. package/src/inference_runner.ts +2 -3
  488. package/src/ipc/inference_proc_executor.ts +2 -2
  489. package/src/ipc/job_executor.ts +1 -1
  490. package/src/ipc/job_proc_executor.ts +1 -1
  491. package/src/ipc/job_proc_lazy_main.ts +1 -1
  492. package/src/job.ts +18 -0
  493. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  494. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  495. package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
  496. package/src/llm/chat_context.test.ts +450 -0
  497. package/src/llm/chat_context.ts +501 -103
  498. package/src/llm/index.ts +53 -18
  499. package/src/llm/llm.ts +149 -50
  500. package/src/llm/provider_format/google.test.ts +772 -0
  501. package/src/llm/provider_format/google.ts +130 -0
  502. package/src/llm/provider_format/index.ts +23 -0
  503. package/src/llm/provider_format/openai.test.ts +581 -0
  504. package/src/llm/provider_format/openai.ts +118 -0
  505. package/src/llm/provider_format/utils.ts +183 -0
  506. package/src/llm/realtime.ts +151 -0
  507. package/src/llm/remote_chat_context.test.ts +290 -0
  508. package/src/llm/remote_chat_context.ts +114 -0
  509. package/src/llm/tool_context.test.ts +198 -0
  510. package/src/llm/tool_context.ts +259 -0
  511. package/src/llm/tool_context.type.test.ts +115 -0
  512. package/src/llm/utils.test.ts +670 -0
  513. package/src/llm/utils.ts +324 -0
  514. package/src/metrics/base.ts +110 -78
  515. package/src/metrics/index.ts +3 -9
  516. package/src/metrics/usage_collector.ts +19 -13
  517. package/src/metrics/utils.ts +24 -69
  518. package/src/multimodal/index.ts +0 -1
  519. package/src/plugin.ts +26 -8
  520. package/src/stream/deferred_stream.test.ts +755 -0
  521. package/src/stream/deferred_stream.ts +110 -0
  522. package/src/stream/identity_transform.test.ts +179 -0
  523. package/src/stream/identity_transform.ts +18 -0
  524. package/src/stream/index.ts +7 -0
  525. package/src/stream/merge_readable_streams.ts +40 -0
  526. package/src/stream/stream_channel.test.ts +129 -0
  527. package/src/stream/stream_channel.ts +32 -0
  528. package/src/stt/stream_adapter.ts +3 -5
  529. package/src/stt/stt.ts +135 -17
  530. package/src/tokenize/basic/basic.ts +13 -5
  531. package/src/tokenize/basic/sentence.ts +20 -6
  532. package/src/tokenize/token_stream.ts +7 -4
  533. package/src/transcription.ts +2 -3
  534. package/src/tts/index.ts +0 -1
  535. package/src/tts/stream_adapter.ts +42 -16
  536. package/src/tts/tts.ts +203 -21
  537. package/src/types.ts +42 -0
  538. package/src/utils.test.ts +658 -0
  539. package/src/utils.ts +375 -44
  540. package/src/vad.ts +90 -22
  541. package/src/voice/agent.test.ts +80 -0
  542. package/src/voice/agent.ts +332 -0
  543. package/src/voice/agent_activity.ts +1913 -0
  544. package/src/voice/agent_session.ts +460 -0
  545. package/src/voice/audio_recognition.ts +474 -0
  546. package/src/voice/events.ts +252 -0
  547. package/src/voice/generation.ts +881 -0
  548. package/src/voice/index.ts +7 -0
  549. package/src/voice/io.ts +304 -0
  550. package/src/voice/room_io/_input.ts +144 -0
  551. package/src/voice/room_io/_output.ts +436 -0
  552. package/src/voice/room_io/index.ts +5 -0
  553. package/src/voice/room_io/room_io.ts +495 -0
  554. package/src/voice/run_context.ts +20 -0
  555. package/src/voice/speech_handle.ts +104 -0
  556. package/src/voice/transcription/_utils.ts +25 -0
  557. package/src/voice/transcription/index.ts +4 -0
  558. package/src/voice/transcription/synchronizer.ts +478 -0
  559. package/src/worker.ts +22 -2
  560. package/dist/llm/function_context.cjs +0 -103
  561. package/dist/llm/function_context.cjs.map +0 -1
  562. package/dist/llm/function_context.d.cts +0 -47
  563. package/dist/llm/function_context.d.ts +0 -47
  564. package/dist/llm/function_context.d.ts.map +0 -1
  565. package/dist/llm/function_context.js +0 -78
  566. package/dist/llm/function_context.js.map +0 -1
  567. package/dist/llm/function_context.test.cjs +0 -218
  568. package/dist/llm/function_context.test.cjs.map +0 -1
  569. package/dist/llm/function_context.test.js +0 -217
  570. package/dist/llm/function_context.test.js.map +0 -1
  571. package/dist/multimodal/multimodal_agent.cjs +0 -451
  572. package/dist/multimodal/multimodal_agent.cjs.map +0 -1
  573. package/dist/multimodal/multimodal_agent.d.cts +0 -48
  574. package/dist/multimodal/multimodal_agent.d.ts +0 -48
  575. package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
  576. package/dist/multimodal/multimodal_agent.js +0 -425
  577. package/dist/multimodal/multimodal_agent.js.map +0 -1
  578. package/dist/pipeline/agent_output.cjs +0 -197
  579. package/dist/pipeline/agent_output.cjs.map +0 -1
  580. package/dist/pipeline/agent_output.d.cts +0 -33
  581. package/dist/pipeline/agent_output.d.ts +0 -33
  582. package/dist/pipeline/agent_output.d.ts.map +0 -1
  583. package/dist/pipeline/agent_output.js +0 -172
  584. package/dist/pipeline/agent_output.js.map +0 -1
  585. package/dist/pipeline/agent_playout.cjs +0 -175
  586. package/dist/pipeline/agent_playout.cjs.map +0 -1
  587. package/dist/pipeline/agent_playout.d.cts +0 -40
  588. package/dist/pipeline/agent_playout.d.ts +0 -40
  589. package/dist/pipeline/agent_playout.d.ts.map +0 -1
  590. package/dist/pipeline/agent_playout.js +0 -139
  591. package/dist/pipeline/agent_playout.js.map +0 -1
  592. package/dist/pipeline/human_input.cjs +0 -171
  593. package/dist/pipeline/human_input.cjs.map +0 -1
  594. package/dist/pipeline/human_input.d.cts +0 -30
  595. package/dist/pipeline/human_input.d.ts +0 -30
  596. package/dist/pipeline/human_input.d.ts.map +0 -1
  597. package/dist/pipeline/human_input.js +0 -146
  598. package/dist/pipeline/human_input.js.map +0 -1
  599. package/dist/pipeline/index.cjs.map +0 -1
  600. package/dist/pipeline/index.d.cts +0 -2
  601. package/dist/pipeline/index.d.ts +0 -2
  602. package/dist/pipeline/index.d.ts.map +0 -1
  603. package/dist/pipeline/index.js +0 -11
  604. package/dist/pipeline/index.js.map +0 -1
  605. package/dist/pipeline/pipeline_agent.cjs +0 -849
  606. package/dist/pipeline/pipeline_agent.cjs.map +0 -1
  607. package/dist/pipeline/pipeline_agent.d.cts +0 -150
  608. package/dist/pipeline/pipeline_agent.d.ts +0 -150
  609. package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
  610. package/dist/pipeline/pipeline_agent.js +0 -826
  611. package/dist/pipeline/pipeline_agent.js.map +0 -1
  612. package/dist/pipeline/speech_handle.cjs +0 -176
  613. package/dist/pipeline/speech_handle.cjs.map +0 -1
  614. package/dist/pipeline/speech_handle.d.cts +0 -37
  615. package/dist/pipeline/speech_handle.d.ts +0 -37
  616. package/dist/pipeline/speech_handle.d.ts.map +0 -1
  617. package/dist/pipeline/speech_handle.js +0 -152
  618. package/dist/pipeline/speech_handle.js.map +0 -1
  619. package/src/llm/function_context.test.ts +0 -248
  620. package/src/llm/function_context.ts +0 -142
  621. package/src/multimodal/multimodal_agent.ts +0 -555
  622. package/src/pipeline/agent_output.ts +0 -219
  623. package/src/pipeline/agent_playout.ts +0 -192
  624. package/src/pipeline/human_input.ts +0 -188
  625. package/src/pipeline/index.ts +0 -15
  626. package/src/pipeline/pipeline_agent.ts +0 -1185
  627. package/src/pipeline/speech_handle.ts +0 -201
@@ -0,0 +1,670 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { VideoBufferType, VideoFrame } from '@livekit/rtc-node';
5
+ import sharp from 'sharp';
6
+ import { beforeEach, describe, expect, it, vi } from 'vitest';
7
+ import { ChatContext, ChatMessage, type ImageContent } from './chat_context.js';
8
+ import { computeChatCtxDiff, serializeImage } from './utils.js';
9
+
10
+ function createChatMessage(
11
+ id: string,
12
+ content: string,
13
+ role: 'user' | 'assistant' | 'system' = 'user',
14
+ ): ChatMessage {
15
+ return ChatMessage.create({
16
+ id,
17
+ content,
18
+ role,
19
+ });
20
+ }
21
+
22
+ function createChatContext(messages: ChatMessage[]): ChatContext {
23
+ const ctx = new ChatContext();
24
+ for (const message of messages) {
25
+ ctx.items.push(message);
26
+ }
27
+ return ctx;
28
+ }
29
+
30
+ function createImageContent(
31
+ image: string | VideoFrame,
32
+ inferenceDetail: 'auto' | 'high' | 'low' = 'auto',
33
+ options?: {
34
+ mimeType?: string;
35
+ inferenceWidth?: number;
36
+ inferenceHeight?: number;
37
+ },
38
+ ): ImageContent {
39
+ return {
40
+ id: 'test-id',
41
+ type: 'image_content',
42
+ image,
43
+ inferenceDetail,
44
+ mimeType: options?.mimeType,
45
+ inferenceWidth: options?.inferenceWidth,
46
+ inferenceHeight: options?.inferenceHeight,
47
+ _cache: {},
48
+ };
49
+ }
50
+
51
+ async function decodeImageToRaw(base64Data: string) {
52
+ const imageBuffer = Buffer.from(base64Data, 'base64');
53
+ const decodedImage = await sharp(imageBuffer).raw().toBuffer({ resolveWithObject: true });
54
+ return { imageBuffer, decodedImage };
55
+ }
56
+
57
+ function createSolidColorFrame(
58
+ width: number,
59
+ height: number,
60
+ color: { r: number; g: number; b: number; a?: number },
61
+ bufferType: VideoBufferType = VideoBufferType.RGBA,
62
+ ): VideoFrame {
63
+ const channels = bufferType === VideoBufferType.RGB24 ? 3 : 4;
64
+ const frameData = new Uint8Array(width * height * channels);
65
+
66
+ for (let i = 0; i < frameData.length; i += channels) {
67
+ frameData[i] = color.r;
68
+ frameData[i + 1] = color.g;
69
+ frameData[i + 2] = color.b;
70
+ if (channels === 4 && color.a !== undefined) {
71
+ frameData[i + 3] = color.a;
72
+ }
73
+ }
74
+
75
+ return new VideoFrame(frameData, width, height, bufferType);
76
+ }
77
+
78
+ function createGradientFrame(width: number, height: number): VideoFrame {
79
+ const channels = 4;
80
+ const frameData = new Uint8Array(width * height * channels);
81
+
82
+ for (let y = 0; y < height; y++) {
83
+ for (let x = 0; x < width; x++) {
84
+ const idx = (y * width + x) * channels;
85
+ frameData[idx] = Math.floor((x / (width - 1)) * 255);
86
+ frameData[idx + 1] = Math.floor((y / (height - 1)) * 255);
87
+ frameData[idx + 2] = 128;
88
+ frameData[idx + 3] = 255;
89
+ }
90
+ }
91
+
92
+ return new VideoFrame(frameData, width, height, VideoBufferType.RGBA);
93
+ }
94
+
95
+ function createPatternFrame(width: number, height: number, patterns: number[][]): VideoFrame {
96
+ const channels = 4;
97
+ const frameData = new Uint8Array(width * height * channels);
98
+
99
+ for (let i = 0; i < patterns.length; i++) {
100
+ const offset = i * 4;
101
+ const pattern = patterns[i]!;
102
+ frameData[offset] = pattern[0]!;
103
+ frameData[offset + 1] = pattern[1]!;
104
+ frameData[offset + 2] = pattern[2]!;
105
+ frameData[offset + 3] = pattern[3]!;
106
+ }
107
+
108
+ return new VideoFrame(frameData, width, height, VideoBufferType.RGBA);
109
+ }
110
+
111
+ function verifyPngHeader(imageBuffer: Buffer) {
112
+ expect(imageBuffer[0]).toBe(0x89);
113
+ expect(imageBuffer[1]).toBe(0x50);
114
+ expect(imageBuffer[2]).toBe(0x4e);
115
+ expect(imageBuffer[3]).toBe(0x47);
116
+ }
117
+
118
+ function expectPixel(
119
+ data: Buffer,
120
+ index: number,
121
+ expected: { r: number; g: number; b: number; a: number },
122
+ ) {
123
+ expect(data[index]).toBe(expected.r);
124
+ expect(data[index + 1]).toBe(expected.g);
125
+ expect(data[index + 2]).toBe(expected.b);
126
+ expect(data[index + 3]).toBe(expected.a);
127
+ }
128
+
129
+ describe('computeChatCtxDiff', () => {
130
+ it('should return empty operations for identical contexts', () => {
131
+ const msg1 = createChatMessage('1', 'Hello', 'user');
132
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
133
+
134
+ const oldCtx = createChatContext([msg1, msg2]);
135
+ const newCtx = createChatContext([msg1, msg2]);
136
+
137
+ const result = computeChatCtxDiff(oldCtx, newCtx);
138
+
139
+ expect(result.toRemove).toEqual([]);
140
+ expect(result.toCreate).toEqual([]);
141
+ });
142
+
143
+ it('should handle empty old context', () => {
144
+ const msg1 = createChatMessage('1', 'Hello', 'user');
145
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
146
+
147
+ const oldCtx = createChatContext([]);
148
+ const newCtx = createChatContext([msg1, msg2]);
149
+
150
+ const result = computeChatCtxDiff(oldCtx, newCtx);
151
+
152
+ expect(result.toRemove).toEqual([]);
153
+ expect(result.toCreate).toEqual([
154
+ [null, '1'], // first item goes to root
155
+ ['1', '2'],
156
+ ]);
157
+ });
158
+
159
+ it('should handle empty new context', () => {
160
+ const msg1 = createChatMessage('1', 'Hello', 'user');
161
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
162
+
163
+ const oldCtx = createChatContext([msg1, msg2]);
164
+ const newCtx = createChatContext([]);
165
+
166
+ const result = computeChatCtxDiff(oldCtx, newCtx);
167
+
168
+ expect(result.toRemove).toEqual(['1', '2']);
169
+ expect(result.toCreate).toEqual([]);
170
+ });
171
+
172
+ it('should handle adding items to the end', () => {
173
+ const msg1 = createChatMessage('1', 'Hello', 'user');
174
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
175
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
176
+
177
+ const oldCtx = createChatContext([msg1, msg2]);
178
+ const newCtx = createChatContext([msg1, msg2, msg3]);
179
+
180
+ const result = computeChatCtxDiff(oldCtx, newCtx);
181
+
182
+ expect(result.toRemove).toEqual([]);
183
+ expect(result.toCreate).toEqual([['2', '3']]);
184
+ });
185
+
186
+ it('should handle removing items from the end', () => {
187
+ const msg1 = createChatMessage('1', 'Hello', 'user');
188
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
189
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
190
+
191
+ const oldCtx = createChatContext([msg1, msg2, msg3]);
192
+ const newCtx = createChatContext([msg1, msg2]);
193
+
194
+ const result = computeChatCtxDiff(oldCtx, newCtx);
195
+
196
+ expect(result.toRemove).toEqual(['3']);
197
+ expect(result.toCreate).toEqual([]);
198
+ });
199
+
200
+ it('should handle adding items to the beginning', () => {
201
+ const msg1 = createChatMessage('1', 'Hello', 'user');
202
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
203
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
204
+
205
+ const oldCtx = createChatContext([msg2, msg3]);
206
+ const newCtx = createChatContext([msg1, msg2, msg3]);
207
+
208
+ const result = computeChatCtxDiff(oldCtx, newCtx);
209
+
210
+ expect(result.toRemove).toEqual([]);
211
+ expect(result.toCreate).toEqual([[null, '1']]);
212
+ });
213
+
214
+ it('should handle removing items from the beginning', () => {
215
+ const msg1 = createChatMessage('1', 'Hello', 'user');
216
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
217
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
218
+
219
+ const oldCtx = createChatContext([msg1, msg2, msg3]);
220
+ const newCtx = createChatContext([msg2, msg3]);
221
+
222
+ const result = computeChatCtxDiff(oldCtx, newCtx);
223
+
224
+ expect(result.toRemove).toEqual(['1']);
225
+ expect(result.toCreate).toEqual([]);
226
+ });
227
+
228
+ it('should handle adding items in the middle', () => {
229
+ const msg1 = createChatMessage('1', 'Hello', 'user');
230
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
231
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
232
+ const msg4 = createChatMessage('4', 'Fine thanks', 'assistant');
233
+
234
+ const oldCtx = createChatContext([msg1, msg3, msg4]);
235
+ const newCtx = createChatContext([msg1, msg2, msg3, msg4]);
236
+
237
+ const result = computeChatCtxDiff(oldCtx, newCtx);
238
+
239
+ expect(result.toRemove).toEqual([]);
240
+ expect(result.toCreate).toEqual([['1', '2']]);
241
+ });
242
+
243
+ it('should handle removing items from the middle', () => {
244
+ const msg1 = createChatMessage('1', 'Hello', 'user');
245
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
246
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
247
+ const msg4 = createChatMessage('4', 'Fine thanks', 'assistant');
248
+
249
+ const oldCtx = createChatContext([msg1, msg2, msg3, msg4]);
250
+ const newCtx = createChatContext([msg1, msg3, msg4]);
251
+
252
+ const result = computeChatCtxDiff(oldCtx, newCtx);
253
+
254
+ expect(result.toRemove).toEqual(['2']);
255
+ expect(result.toCreate).toEqual([]);
256
+ });
257
+
258
+ it('should handle complex mixed operations', () => {
259
+ const msg1 = createChatMessage('1', 'Hello', 'user');
260
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
261
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
262
+ const msg4 = createChatMessage('4', 'Fine thanks', 'assistant');
263
+ const msg5 = createChatMessage('5', 'Good to hear', 'user');
264
+ const msg6 = createChatMessage('6', 'Anything else?', 'assistant');
265
+
266
+ // Old: [1, 2, 3, 4]
267
+ // New: [1, 5, 3, 6]
268
+ // Remove: [2, 4]
269
+ // Create: [5 after 1, 6 after 3]
270
+
271
+ const oldCtx = createChatContext([msg1, msg2, msg3, msg4]);
272
+ const newCtx = createChatContext([msg1, msg5, msg3, msg6]);
273
+
274
+ const result = computeChatCtxDiff(oldCtx, newCtx);
275
+
276
+ expect(result.toRemove).toEqual(['2', '4']);
277
+ expect(result.toCreate).toEqual([
278
+ ['1', '5'],
279
+ ['3', '6'],
280
+ ]);
281
+ });
282
+
283
+ it('should handle reordering items', () => {
284
+ const msg1 = createChatMessage('1', 'Hello', 'user');
285
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
286
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
287
+
288
+ // Old: [1, 2, 3]
289
+ // New: [3, 1, 2]
290
+ // This should remove all and recreate in new order
291
+
292
+ const oldCtx = createChatContext([msg1, msg2, msg3]);
293
+ const newCtx = createChatContext([msg3, msg1, msg2]);
294
+
295
+ const result = computeChatCtxDiff(oldCtx, newCtx);
296
+
297
+ // Since order changed completely, should have some operations
298
+ expect(result.toRemove.length + result.toCreate.length).toBeGreaterThan(0);
299
+ });
300
+
301
+ it('should handle identical single item contexts', () => {
302
+ const msg1 = createChatMessage('1', 'Hello', 'user');
303
+
304
+ const oldCtx = createChatContext([msg1]);
305
+ const newCtx = createChatContext([msg1]);
306
+
307
+ const result = computeChatCtxDiff(oldCtx, newCtx);
308
+
309
+ expect(result.toRemove).toEqual([]);
310
+ expect(result.toCreate).toEqual([]);
311
+ });
312
+
313
+ it('should handle longest common subsequence correctly', () => {
314
+ const msg1 = createChatMessage('1', 'Hello', 'user');
315
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
316
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
317
+ const msg4 = createChatMessage('4', 'Fine thanks', 'assistant');
318
+ const msg5 = createChatMessage('5', 'Good to hear', 'user');
319
+
320
+ // Old: [1, 2, 3, 4, 5]
321
+ // New: [1, 3, 5]
322
+ // LCS should be [1, 3, 5], remove [2, 4]
323
+
324
+ const oldCtx = createChatContext([msg1, msg2, msg3, msg4, msg5]);
325
+ const newCtx = createChatContext([msg1, msg3, msg5]);
326
+
327
+ const result = computeChatCtxDiff(oldCtx, newCtx);
328
+
329
+ expect(result.toRemove).toEqual(['2', '4']);
330
+ expect(result.toCreate).toEqual([]);
331
+ });
332
+
333
+ it('should handle interleaved additions and common subsequence', () => {
334
+ const msg1 = createChatMessage('1', 'Hello', 'user');
335
+ const msg2 = createChatMessage('2', 'Hi there', 'assistant');
336
+ const msg3 = createChatMessage('3', 'How are you?', 'user');
337
+ const msg4 = createChatMessage('4', 'Fine thanks', 'assistant');
338
+ const msg5 = createChatMessage('5', 'Good to hear', 'user');
339
+ const msg6 = createChatMessage('6', 'Anything else?', 'assistant');
340
+
341
+ // Old: [1, 3, 5]
342
+ // New: [1, 2, 3, 4, 5, 6]
343
+ // LCS: [1, 3, 5], add [2 after 1, 4 after 3, 6 after 5]
344
+
345
+ const oldCtx = createChatContext([msg1, msg3, msg5]);
346
+ const newCtx = createChatContext([msg1, msg2, msg3, msg4, msg5, msg6]);
347
+
348
+ const result = computeChatCtxDiff(oldCtx, newCtx);
349
+
350
+ expect(result.toRemove).toEqual([]);
351
+ expect(result.toCreate).toEqual([
352
+ ['1', '2'],
353
+ ['3', '4'],
354
+ ['5', '6'],
355
+ ]);
356
+ });
357
+ });
358
+
359
+ describe('serializeImage', () => {
360
+ let consoleWarnSpy: ReturnType<typeof vi.spyOn>;
361
+
362
+ beforeEach(() => {
363
+ consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
364
+ vi.clearAllMocks();
365
+ });
366
+
367
+ describe('Data URL handling', () => {
368
+ it('should serialize a valid JPEG data URL', async () => {
369
+ const originalBase64 = '/9j/4AAQSkZJRg==';
370
+ const imageContent = createImageContent(`data:image/jpeg;base64,${originalBase64}`, 'high');
371
+
372
+ const result = await serializeImage(imageContent);
373
+
374
+ expect(result).toEqual({
375
+ base64Data: originalBase64,
376
+ mimeType: 'image/jpeg',
377
+ inferenceDetail: 'high',
378
+ });
379
+
380
+ expect(result.base64Data).toBe(originalBase64);
381
+ });
382
+
383
+ it('should serialize a valid PNG data URL', async () => {
384
+ const imageContent = createImageContent(
385
+ 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
386
+ 'low',
387
+ );
388
+
389
+ const result = await serializeImage(imageContent);
390
+
391
+ expect(result).toEqual({
392
+ base64Data: 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAAB',
393
+ mimeType: 'image/png',
394
+ inferenceDetail: 'low',
395
+ });
396
+ });
397
+
398
+ it('should serialize a valid WebP data URL', async () => {
399
+ const imageContent = createImageContent(
400
+ 'data:image/webp;base64,UklGRiQAAABXRUJQVlA4IBgAAAA',
401
+ 'auto',
402
+ );
403
+
404
+ const result = await serializeImage(imageContent);
405
+
406
+ expect(result).toEqual({
407
+ base64Data: 'UklGRiQAAABXRUJQVlA4IBgAAAA',
408
+ mimeType: 'image/webp',
409
+ inferenceDetail: 'auto',
410
+ });
411
+ });
412
+
413
+ it('should serialize a valid GIF data URL', async () => {
414
+ const imageContent = createImageContent(
415
+ 'data:image/gif;base64,R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==',
416
+ 'auto',
417
+ );
418
+
419
+ const result = await serializeImage(imageContent);
420
+
421
+ expect(result).toEqual({
422
+ base64Data: 'R0lGODlhAQABAIAAAP///wAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==',
423
+ mimeType: 'image/gif',
424
+ inferenceDetail: 'auto',
425
+ });
426
+ });
427
+
428
+ it('should warn and use provided mimeType when it differs from data URL mime type', async () => {
429
+ const imageContent = createImageContent('data:image/jpeg;base64,/9j/4AAQSkZJRg==', 'auto', {
430
+ mimeType: 'image/png',
431
+ });
432
+
433
+ const result = await serializeImage(imageContent);
434
+
435
+ expect(consoleWarnSpy).toHaveBeenCalledWith(
436
+ "Provided mimeType 'image/png' does not match data URL mime type 'image/jpeg'. Using provided mimeType.",
437
+ );
438
+ expect(result).toEqual({
439
+ base64Data: '/9j/4AAQSkZJRg==',
440
+ mimeType: 'image/png',
441
+ inferenceDetail: 'auto',
442
+ });
443
+ });
444
+
445
+ it('should throw error for invalid data URL format', async () => {
446
+ const imageContent = createImageContent('data:;base64,/9j/4AAQSkZJRg==', 'auto');
447
+
448
+ await expect(serializeImage(imageContent)).rejects.toThrow('Invalid data URL format');
449
+ });
450
+
451
+ it('should throw error for unsupported mime type', async () => {
452
+ const imageContent = createImageContent(
453
+ 'data:image/bmp;base64,Qk06AAAAAAAAADYAAAAoAAAA',
454
+ 'auto',
455
+ );
456
+
457
+ await expect(serializeImage(imageContent)).rejects.toThrow(
458
+ 'Unsupported mimeType image/bmp. Must be jpeg, png, webp, or gif',
459
+ );
460
+ });
461
+ });
462
+
463
+ describe('External URL handling', () => {
464
+ it('should serialize an external URL without mimeType', async () => {
465
+ const imageContent = createImageContent('https://example.com/image.jpg', 'high');
466
+
467
+ const result = await serializeImage(imageContent);
468
+
469
+ expect(result).toEqual({
470
+ mimeType: undefined,
471
+ inferenceDetail: 'high',
472
+ externalUrl: 'https://example.com/image.jpg',
473
+ });
474
+ });
475
+
476
+ it('should serialize an external URL with mimeType', async () => {
477
+ const imageContent = createImageContent('https://example.com/image.jpg', 'low', {
478
+ mimeType: 'image/jpeg',
479
+ });
480
+
481
+ const result = await serializeImage(imageContent);
482
+
483
+ expect(result).toEqual({
484
+ mimeType: 'image/jpeg',
485
+ inferenceDetail: 'low',
486
+ externalUrl: 'https://example.com/image.jpg',
487
+ });
488
+ });
489
+ });
490
+
491
+ describe('VideoFrame handling', () => {
492
+ it('should serialize a VideoFrame without resize parameters', async () => {
493
+ const width = 4;
494
+ const height = 4;
495
+ const videoFrame = createSolidColorFrame(width, height, { r: 255, g: 0, b: 0, a: 255 });
496
+ const imageContent = createImageContent(videoFrame, 'auto');
497
+
498
+ const result = await serializeImage(imageContent);
499
+
500
+ expect(result).toMatchObject({
501
+ mimeType: 'image/png',
502
+ inferenceDetail: 'auto',
503
+ });
504
+ expect(result.base64Data).toBeDefined();
505
+ expect(result.base64Data).toMatch(/^[A-Za-z0-9+/]+=*$/);
506
+ expect(result.externalUrl).toBeUndefined();
507
+
508
+ const { imageBuffer, decodedImage } = await decodeImageToRaw(result.base64Data!);
509
+
510
+ verifyPngHeader(imageBuffer);
511
+
512
+ expect(decodedImage.info.width).toBe(width);
513
+ expect(decodedImage.info.height).toBe(height);
514
+ expect(decodedImage.info.channels).toBe(4);
515
+
516
+ const decodedData = decodedImage.data;
517
+ for (let i = 0; i < decodedData.length; i += 4) {
518
+ expectPixel(decodedData, i, { r: 255, g: 0, b: 0, a: 255 });
519
+ }
520
+ });
521
+
522
+ it('should serialize a VideoFrame with a gradient pattern', async () => {
523
+ const width = 8;
524
+ const height = 8;
525
+ const videoFrame = createGradientFrame(width, height);
526
+ const imageContent = createImageContent(videoFrame, 'high');
527
+
528
+ const result = await serializeImage(imageContent);
529
+
530
+ expect(result).toMatchObject({
531
+ mimeType: 'image/png',
532
+ inferenceDetail: 'high',
533
+ });
534
+
535
+ const { decodedImage } = await decodeImageToRaw(result.base64Data!);
536
+
537
+ expect(decodedImage.info.width).toBe(width);
538
+ expect(decodedImage.info.height).toBe(height);
539
+
540
+ const decodedData = decodedImage.data;
541
+
542
+ for (let y = 0; y < height; y++) {
543
+ for (let x = 0; x < width; x++) {
544
+ const idx = (y * width + x) * 4;
545
+ const expectedR = Math.floor((x / (width - 1)) * 255);
546
+ const expectedG = Math.floor((y / (height - 1)) * 255);
547
+
548
+ expectPixel(decodedData, idx, { r: expectedR, g: expectedG, b: 128, a: 255 });
549
+ }
550
+ }
551
+ });
552
+
553
+ it('should preserve exact pixel data through serialization', async () => {
554
+ const width = 2;
555
+ const height = 2;
556
+
557
+ const patterns = [
558
+ [255, 0, 0, 255],
559
+ [0, 255, 0, 255],
560
+ [0, 0, 255, 255],
561
+ [255, 255, 255, 255],
562
+ ];
563
+
564
+ const videoFrame = createPatternFrame(width, height, patterns);
565
+ const imageContent = createImageContent(videoFrame, 'low');
566
+
567
+ const result = await serializeImage(imageContent);
568
+
569
+ const { decodedImage } = await decodeImageToRaw(result.base64Data!);
570
+ const decoded = decodedImage.data;
571
+
572
+ expectPixel(decoded, 0, { r: 255, g: 0, b: 0, a: 255 });
573
+ expectPixel(decoded, 4, { r: 0, g: 255, b: 0, a: 255 });
574
+ expectPixel(decoded, width * 4, { r: 0, g: 0, b: 255, a: 255 });
575
+ expectPixel(decoded, (width + 1) * 4, { r: 255, g: 255, b: 255, a: 255 });
576
+ });
577
+
578
+ it('should handle resize parameters correctly', async () => {
579
+ const width = 2;
580
+ const height = 2;
581
+ const videoFrame = createSolidColorFrame(width, height, { r: 100, g: 100, b: 100, a: 255 });
582
+
583
+ const imageContent = createImageContent(videoFrame, 'auto', {
584
+ inferenceWidth: 4,
585
+ inferenceHeight: 4,
586
+ });
587
+
588
+ const result = await serializeImage(imageContent);
589
+
590
+ const { decodedImage } = await decodeImageToRaw(result.base64Data!);
591
+
592
+ expect(decodedImage.info.width).toBe(4);
593
+ expect(decodedImage.info.height).toBe(4);
594
+
595
+ const decodedData = decodedImage.data;
596
+ for (let i = 0; i < decodedData.length; i += 4) {
597
+ expect(decodedData[i]).toBeCloseTo(100, -1);
598
+ expect(decodedData[i + 1]).toBeCloseTo(100, -1);
599
+ expect(decodedData[i + 2]).toBeCloseTo(100, -1);
600
+ expect(decodedData[i + 3]).toBe(255);
601
+ }
602
+ });
603
+
604
+ it('should handle RGB24 VideoBufferType correctly', async () => {
605
+ const width = 2;
606
+ const height = 2;
607
+ const channels = 3;
608
+ const frameData = new Uint8Array(width * height * channels);
609
+
610
+ for (let i = 0; i < frameData.length; i += channels) {
611
+ frameData[i] = 255;
612
+ frameData[i + 1] = 128;
613
+ frameData[i + 2] = 64;
614
+ }
615
+
616
+ const videoFrame = new VideoFrame(frameData, width, height, VideoBufferType.RGB24);
617
+ const imageContent = createImageContent(videoFrame, 'auto');
618
+
619
+ const result = await serializeImage(imageContent);
620
+
621
+ expect(result.mimeType).toBe('image/png');
622
+
623
+ const { decodedImage } = await decodeImageToRaw(result.base64Data!);
624
+
625
+ expect(decodedImage.info.channels).toBeGreaterThanOrEqual(3);
626
+
627
+ const decodedData = decodedImage.data;
628
+ const decodedChannels = decodedImage.info.channels;
629
+
630
+ for (let i = 0; i < decodedData.length; i += decodedChannels) {
631
+ expect(decodedData[i]).toBe(255);
632
+ expect(decodedData[i + 1]).toBe(128);
633
+ expect(decodedData[i + 2]).toBe(64);
634
+ if (decodedChannels === 4) {
635
+ expect(decodedData[i + 3]).toBe(255);
636
+ }
637
+ }
638
+ });
639
+
640
+ it('should handle different RGBA-like formats correctly', async () => {
641
+ const width = 1;
642
+ const height = 1;
643
+ const testFormats = [
644
+ VideoBufferType.RGBA,
645
+ VideoBufferType.BGRA,
646
+ VideoBufferType.ARGB,
647
+ VideoBufferType.ABGR,
648
+ ];
649
+
650
+ for (const format of testFormats) {
651
+ const frameData = new Uint8Array([100, 150, 200, 250]);
652
+ const videoFrame = new VideoFrame(frameData, width, height, format);
653
+ const imageContent = createImageContent(videoFrame, 'auto');
654
+
655
+ const result = await serializeImage(imageContent);
656
+
657
+ expect(result.mimeType).toBe('image/png');
658
+ expect(result.base64Data).toBeDefined();
659
+ }
660
+ });
661
+ });
662
+
663
+ describe('Error handling', () => {
664
+ it('should throw error for unsupported image type', async () => {
665
+ const imageContent = createImageContent(123 as any, 'auto'); // eslint-disable-line @typescript-eslint/no-explicit-any
666
+
667
+ await expect(serializeImage(imageContent)).rejects.toThrow('Unsupported image type');
668
+ });
669
+ });
670
+ });