@livekit/agents 1.0.47 → 1.1.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/dist/beta/index.cjs +29 -0
  2. package/dist/beta/index.cjs.map +1 -0
  3. package/dist/beta/index.d.cts +2 -0
  4. package/dist/beta/index.d.ts +2 -0
  5. package/dist/beta/index.d.ts.map +1 -0
  6. package/dist/beta/index.js +7 -0
  7. package/dist/beta/index.js.map +1 -0
  8. package/dist/beta/workflows/index.cjs +29 -0
  9. package/dist/beta/workflows/index.cjs.map +1 -0
  10. package/dist/beta/workflows/index.d.cts +2 -0
  11. package/dist/beta/workflows/index.d.ts +2 -0
  12. package/dist/beta/workflows/index.d.ts.map +1 -0
  13. package/dist/beta/workflows/index.js +7 -0
  14. package/dist/beta/workflows/index.js.map +1 -0
  15. package/dist/beta/workflows/task_group.cjs +162 -0
  16. package/dist/beta/workflows/task_group.cjs.map +1 -0
  17. package/dist/beta/workflows/task_group.d.cts +32 -0
  18. package/dist/beta/workflows/task_group.d.ts +32 -0
  19. package/dist/beta/workflows/task_group.d.ts.map +1 -0
  20. package/dist/beta/workflows/task_group.js +138 -0
  21. package/dist/beta/workflows/task_group.js.map +1 -0
  22. package/dist/constants.cjs +27 -0
  23. package/dist/constants.cjs.map +1 -1
  24. package/dist/constants.d.cts +9 -0
  25. package/dist/constants.d.ts +9 -0
  26. package/dist/constants.d.ts.map +1 -1
  27. package/dist/constants.js +18 -0
  28. package/dist/constants.js.map +1 -1
  29. package/dist/index.cjs +3 -0
  30. package/dist/index.cjs.map +1 -1
  31. package/dist/index.d.cts +2 -1
  32. package/dist/index.d.ts +2 -1
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +2 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/inference/api_protos.d.cts +12 -12
  37. package/dist/inference/api_protos.d.ts +12 -12
  38. package/dist/inference/interruption/defaults.cjs +81 -0
  39. package/dist/inference/interruption/defaults.cjs.map +1 -0
  40. package/dist/inference/interruption/defaults.d.cts +19 -0
  41. package/dist/inference/interruption/defaults.d.ts +19 -0
  42. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  43. package/dist/inference/interruption/defaults.js +46 -0
  44. package/dist/inference/interruption/defaults.js.map +1 -0
  45. package/dist/inference/interruption/errors.cjs +44 -0
  46. package/dist/inference/interruption/errors.cjs.map +1 -0
  47. package/dist/inference/interruption/errors.d.cts +12 -0
  48. package/dist/inference/interruption/errors.d.ts +12 -0
  49. package/dist/inference/interruption/errors.d.ts.map +1 -0
  50. package/dist/inference/interruption/errors.js +20 -0
  51. package/dist/inference/interruption/errors.js.map +1 -0
  52. package/dist/inference/interruption/http_transport.cjs +147 -0
  53. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  54. package/dist/inference/interruption/http_transport.d.cts +63 -0
  55. package/dist/inference/interruption/http_transport.d.ts +63 -0
  56. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  57. package/dist/inference/interruption/http_transport.js +121 -0
  58. package/dist/inference/interruption/http_transport.js.map +1 -0
  59. package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
  60. package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
  61. package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
  62. package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
  63. package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
  64. package/dist/inference/interruption/interruption_cache_entry.js +34 -0
  65. package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
  66. package/dist/inference/interruption/interruption_detector.cjs +181 -0
  67. package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
  68. package/dist/inference/interruption/interruption_detector.d.cts +59 -0
  69. package/dist/inference/interruption/interruption_detector.d.ts +59 -0
  70. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
  71. package/dist/inference/interruption/interruption_detector.js +147 -0
  72. package/dist/inference/interruption/interruption_detector.js.map +1 -0
  73. package/dist/inference/interruption/interruption_stream.cjs +368 -0
  74. package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
  75. package/dist/inference/interruption/interruption_stream.d.cts +46 -0
  76. package/dist/inference/interruption/interruption_stream.d.ts +46 -0
  77. package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
  78. package/dist/inference/interruption/interruption_stream.js +344 -0
  79. package/dist/inference/interruption/interruption_stream.js.map +1 -0
  80. package/dist/inference/interruption/types.cjs +17 -0
  81. package/dist/inference/interruption/types.cjs.map +1 -0
  82. package/dist/inference/interruption/types.d.cts +66 -0
  83. package/dist/inference/interruption/types.d.ts +66 -0
  84. package/dist/inference/interruption/types.d.ts.map +1 -0
  85. package/dist/inference/interruption/types.js +1 -0
  86. package/dist/inference/interruption/types.js.map +1 -0
  87. package/dist/inference/interruption/utils.cjs +130 -0
  88. package/dist/inference/interruption/utils.cjs.map +1 -0
  89. package/dist/inference/interruption/utils.d.cts +41 -0
  90. package/dist/inference/interruption/utils.d.ts +41 -0
  91. package/dist/inference/interruption/utils.d.ts.map +1 -0
  92. package/dist/inference/interruption/utils.js +105 -0
  93. package/dist/inference/interruption/utils.js.map +1 -0
  94. package/dist/inference/interruption/utils.test.cjs +105 -0
  95. package/dist/inference/interruption/utils.test.cjs.map +1 -0
  96. package/dist/inference/interruption/utils.test.js +104 -0
  97. package/dist/inference/interruption/utils.test.js.map +1 -0
  98. package/dist/inference/interruption/ws_transport.cjs +329 -0
  99. package/dist/inference/interruption/ws_transport.cjs.map +1 -0
  100. package/dist/inference/interruption/ws_transport.d.cts +33 -0
  101. package/dist/inference/interruption/ws_transport.d.ts +33 -0
  102. package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
  103. package/dist/inference/interruption/ws_transport.js +295 -0
  104. package/dist/inference/interruption/ws_transport.js.map +1 -0
  105. package/dist/inference/llm.cjs +14 -10
  106. package/dist/inference/llm.cjs.map +1 -1
  107. package/dist/inference/llm.d.cts +2 -1
  108. package/dist/inference/llm.d.ts +2 -1
  109. package/dist/inference/llm.d.ts.map +1 -1
  110. package/dist/inference/llm.js +8 -10
  111. package/dist/inference/llm.js.map +1 -1
  112. package/dist/inference/stt.cjs +7 -2
  113. package/dist/inference/stt.cjs.map +1 -1
  114. package/dist/inference/stt.d.cts +2 -0
  115. package/dist/inference/stt.d.ts +2 -0
  116. package/dist/inference/stt.d.ts.map +1 -1
  117. package/dist/inference/stt.js +8 -3
  118. package/dist/inference/stt.js.map +1 -1
  119. package/dist/inference/tts.cjs +7 -2
  120. package/dist/inference/tts.cjs.map +1 -1
  121. package/dist/inference/tts.d.cts +2 -0
  122. package/dist/inference/tts.d.ts +2 -0
  123. package/dist/inference/tts.d.ts.map +1 -1
  124. package/dist/inference/tts.js +8 -3
  125. package/dist/inference/tts.js.map +1 -1
  126. package/dist/inference/utils.cjs +26 -7
  127. package/dist/inference/utils.cjs.map +1 -1
  128. package/dist/inference/utils.d.cts +13 -0
  129. package/dist/inference/utils.d.ts +13 -0
  130. package/dist/inference/utils.d.ts.map +1 -1
  131. package/dist/inference/utils.js +18 -2
  132. package/dist/inference/utils.js.map +1 -1
  133. package/dist/llm/chat_context.cjs +108 -2
  134. package/dist/llm/chat_context.cjs.map +1 -1
  135. package/dist/llm/chat_context.d.cts +28 -1
  136. package/dist/llm/chat_context.d.ts +28 -1
  137. package/dist/llm/chat_context.d.ts.map +1 -1
  138. package/dist/llm/chat_context.js +108 -2
  139. package/dist/llm/chat_context.js.map +1 -1
  140. package/dist/llm/chat_context.test.cjs +43 -0
  141. package/dist/llm/chat_context.test.cjs.map +1 -1
  142. package/dist/llm/chat_context.test.js +43 -0
  143. package/dist/llm/chat_context.test.js.map +1 -1
  144. package/dist/llm/index.cjs +2 -0
  145. package/dist/llm/index.cjs.map +1 -1
  146. package/dist/llm/index.d.cts +2 -2
  147. package/dist/llm/index.d.ts +2 -2
  148. package/dist/llm/index.d.ts.map +1 -1
  149. package/dist/llm/index.js +3 -1
  150. package/dist/llm/index.js.map +1 -1
  151. package/dist/llm/llm.cjs +16 -1
  152. package/dist/llm/llm.cjs.map +1 -1
  153. package/dist/llm/llm.d.cts +9 -0
  154. package/dist/llm/llm.d.ts +9 -0
  155. package/dist/llm/llm.d.ts.map +1 -1
  156. package/dist/llm/llm.js +16 -1
  157. package/dist/llm/llm.js.map +1 -1
  158. package/dist/llm/provider_format/index.d.cts +1 -1
  159. package/dist/llm/provider_format/index.d.ts +1 -1
  160. package/dist/llm/realtime.cjs +3 -0
  161. package/dist/llm/realtime.cjs.map +1 -1
  162. package/dist/llm/realtime.d.cts +1 -0
  163. package/dist/llm/realtime.d.ts +1 -0
  164. package/dist/llm/realtime.d.ts.map +1 -1
  165. package/dist/llm/realtime.js +3 -0
  166. package/dist/llm/realtime.js.map +1 -1
  167. package/dist/llm/tool_context.cjs +7 -0
  168. package/dist/llm/tool_context.cjs.map +1 -1
  169. package/dist/llm/tool_context.d.cts +10 -2
  170. package/dist/llm/tool_context.d.ts +10 -2
  171. package/dist/llm/tool_context.d.ts.map +1 -1
  172. package/dist/llm/tool_context.js +6 -0
  173. package/dist/llm/tool_context.js.map +1 -1
  174. package/dist/metrics/base.cjs.map +1 -1
  175. package/dist/metrics/base.d.cts +45 -1
  176. package/dist/metrics/base.d.ts +45 -1
  177. package/dist/metrics/base.d.ts.map +1 -1
  178. package/dist/metrics/index.cjs +5 -0
  179. package/dist/metrics/index.cjs.map +1 -1
  180. package/dist/metrics/index.d.cts +2 -1
  181. package/dist/metrics/index.d.ts +2 -1
  182. package/dist/metrics/index.d.ts.map +1 -1
  183. package/dist/metrics/index.js +6 -0
  184. package/dist/metrics/index.js.map +1 -1
  185. package/dist/metrics/model_usage.cjs +189 -0
  186. package/dist/metrics/model_usage.cjs.map +1 -0
  187. package/dist/metrics/model_usage.d.cts +92 -0
  188. package/dist/metrics/model_usage.d.ts +92 -0
  189. package/dist/metrics/model_usage.d.ts.map +1 -0
  190. package/dist/metrics/model_usage.js +164 -0
  191. package/dist/metrics/model_usage.js.map +1 -0
  192. package/dist/metrics/model_usage.test.cjs +474 -0
  193. package/dist/metrics/model_usage.test.cjs.map +1 -0
  194. package/dist/metrics/model_usage.test.js +476 -0
  195. package/dist/metrics/model_usage.test.js.map +1 -0
  196. package/dist/metrics/usage_collector.cjs +3 -0
  197. package/dist/metrics/usage_collector.cjs.map +1 -1
  198. package/dist/metrics/usage_collector.d.cts +9 -0
  199. package/dist/metrics/usage_collector.d.ts +9 -0
  200. package/dist/metrics/usage_collector.d.ts.map +1 -1
  201. package/dist/metrics/usage_collector.js +3 -0
  202. package/dist/metrics/usage_collector.js.map +1 -1
  203. package/dist/metrics/utils.cjs +9 -0
  204. package/dist/metrics/utils.cjs.map +1 -1
  205. package/dist/metrics/utils.d.ts.map +1 -1
  206. package/dist/metrics/utils.js +9 -0
  207. package/dist/metrics/utils.js.map +1 -1
  208. package/dist/stream/multi_input_stream.test.cjs +4 -0
  209. package/dist/stream/multi_input_stream.test.cjs.map +1 -1
  210. package/dist/stream/multi_input_stream.test.js +5 -1
  211. package/dist/stream/multi_input_stream.test.js.map +1 -1
  212. package/dist/stream/stream_channel.cjs +31 -0
  213. package/dist/stream/stream_channel.cjs.map +1 -1
  214. package/dist/stream/stream_channel.d.cts +4 -2
  215. package/dist/stream/stream_channel.d.ts +4 -2
  216. package/dist/stream/stream_channel.d.ts.map +1 -1
  217. package/dist/stream/stream_channel.js +31 -0
  218. package/dist/stream/stream_channel.js.map +1 -1
  219. package/dist/stt/stt.cjs +34 -2
  220. package/dist/stt/stt.cjs.map +1 -1
  221. package/dist/stt/stt.d.cts +22 -0
  222. package/dist/stt/stt.d.ts +22 -0
  223. package/dist/stt/stt.d.ts.map +1 -1
  224. package/dist/stt/stt.js +34 -2
  225. package/dist/stt/stt.js.map +1 -1
  226. package/dist/telemetry/otel_http_exporter.cjs +24 -5
  227. package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
  228. package/dist/telemetry/otel_http_exporter.d.cts +1 -0
  229. package/dist/telemetry/otel_http_exporter.d.ts +1 -0
  230. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
  231. package/dist/telemetry/otel_http_exporter.js +24 -5
  232. package/dist/telemetry/otel_http_exporter.js.map +1 -1
  233. package/dist/telemetry/trace_types.cjs +5 -5
  234. package/dist/telemetry/trace_types.cjs.map +1 -1
  235. package/dist/telemetry/trace_types.d.cts +9 -5
  236. package/dist/telemetry/trace_types.d.ts +9 -5
  237. package/dist/telemetry/trace_types.d.ts.map +1 -1
  238. package/dist/telemetry/trace_types.js +5 -5
  239. package/dist/telemetry/trace_types.js.map +1 -1
  240. package/dist/telemetry/traces.cjs +47 -8
  241. package/dist/telemetry/traces.cjs.map +1 -1
  242. package/dist/telemetry/traces.d.ts.map +1 -1
  243. package/dist/telemetry/traces.js +47 -8
  244. package/dist/telemetry/traces.js.map +1 -1
  245. package/dist/tts/tts.cjs +64 -2
  246. package/dist/tts/tts.cjs.map +1 -1
  247. package/dist/tts/tts.d.cts +34 -0
  248. package/dist/tts/tts.d.ts +34 -0
  249. package/dist/tts/tts.d.ts.map +1 -1
  250. package/dist/tts/tts.js +64 -2
  251. package/dist/tts/tts.js.map +1 -1
  252. package/dist/utils.cjs +1 -0
  253. package/dist/utils.cjs.map +1 -1
  254. package/dist/utils.d.ts.map +1 -1
  255. package/dist/utils.js +1 -0
  256. package/dist/utils.js.map +1 -1
  257. package/dist/version.cjs +1 -1
  258. package/dist/version.js +1 -1
  259. package/dist/voice/agent.cjs +34 -4
  260. package/dist/voice/agent.cjs.map +1 -1
  261. package/dist/voice/agent.d.cts +11 -2
  262. package/dist/voice/agent.d.ts +11 -2
  263. package/dist/voice/agent.d.ts.map +1 -1
  264. package/dist/voice/agent.js +34 -4
  265. package/dist/voice/agent.js.map +1 -1
  266. package/dist/voice/agent_activity.cjs +292 -44
  267. package/dist/voice/agent_activity.cjs.map +1 -1
  268. package/dist/voice/agent_activity.d.cts +27 -6
  269. package/dist/voice/agent_activity.d.ts +27 -6
  270. package/dist/voice/agent_activity.d.ts.map +1 -1
  271. package/dist/voice/agent_activity.js +293 -45
  272. package/dist/voice/agent_activity.js.map +1 -1
  273. package/dist/voice/agent_session.cjs +105 -48
  274. package/dist/voice/agent_session.cjs.map +1 -1
  275. package/dist/voice/agent_session.d.cts +90 -20
  276. package/dist/voice/agent_session.d.ts +90 -20
  277. package/dist/voice/agent_session.d.ts.map +1 -1
  278. package/dist/voice/agent_session.js +105 -46
  279. package/dist/voice/agent_session.js.map +1 -1
  280. package/dist/voice/audio_recognition.cjs +287 -6
  281. package/dist/voice/audio_recognition.cjs.map +1 -1
  282. package/dist/voice/audio_recognition.d.cts +42 -3
  283. package/dist/voice/audio_recognition.d.ts +42 -3
  284. package/dist/voice/audio_recognition.d.ts.map +1 -1
  285. package/dist/voice/audio_recognition.js +289 -7
  286. package/dist/voice/audio_recognition.js.map +1 -1
  287. package/dist/voice/client_events.cjs +554 -0
  288. package/dist/voice/client_events.cjs.map +1 -0
  289. package/dist/voice/client_events.d.cts +195 -0
  290. package/dist/voice/client_events.d.ts +195 -0
  291. package/dist/voice/client_events.d.ts.map +1 -0
  292. package/dist/voice/client_events.js +548 -0
  293. package/dist/voice/client_events.js.map +1 -0
  294. package/dist/voice/events.cjs +1 -0
  295. package/dist/voice/events.cjs.map +1 -1
  296. package/dist/voice/events.d.cts +8 -5
  297. package/dist/voice/events.d.ts +8 -5
  298. package/dist/voice/events.d.ts.map +1 -1
  299. package/dist/voice/events.js +1 -0
  300. package/dist/voice/events.js.map +1 -1
  301. package/dist/voice/generation.cjs +43 -8
  302. package/dist/voice/generation.cjs.map +1 -1
  303. package/dist/voice/generation.d.cts +3 -3
  304. package/dist/voice/generation.d.ts +3 -3
  305. package/dist/voice/generation.d.ts.map +1 -1
  306. package/dist/voice/generation.js +43 -8
  307. package/dist/voice/generation.js.map +1 -1
  308. package/dist/voice/index.cjs +1 -0
  309. package/dist/voice/index.cjs.map +1 -1
  310. package/dist/voice/index.d.cts +1 -0
  311. package/dist/voice/index.d.ts +1 -0
  312. package/dist/voice/index.d.ts.map +1 -1
  313. package/dist/voice/index.js +1 -0
  314. package/dist/voice/index.js.map +1 -1
  315. package/dist/voice/report.cjs +20 -8
  316. package/dist/voice/report.cjs.map +1 -1
  317. package/dist/voice/report.d.cts +5 -0
  318. package/dist/voice/report.d.ts +5 -0
  319. package/dist/voice/report.d.ts.map +1 -1
  320. package/dist/voice/report.js +20 -8
  321. package/dist/voice/report.js.map +1 -1
  322. package/dist/voice/report.test.cjs +106 -0
  323. package/dist/voice/report.test.cjs.map +1 -0
  324. package/dist/voice/report.test.js +105 -0
  325. package/dist/voice/report.test.js.map +1 -0
  326. package/dist/voice/room_io/room_io.cjs +16 -41
  327. package/dist/voice/room_io/room_io.cjs.map +1 -1
  328. package/dist/voice/room_io/room_io.d.cts +4 -9
  329. package/dist/voice/room_io/room_io.d.ts +4 -9
  330. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  331. package/dist/voice/room_io/room_io.js +17 -43
  332. package/dist/voice/room_io/room_io.js.map +1 -1
  333. package/dist/voice/testing/fake_llm.cjs +127 -0
  334. package/dist/voice/testing/fake_llm.cjs.map +1 -0
  335. package/dist/voice/testing/fake_llm.d.cts +30 -0
  336. package/dist/voice/testing/fake_llm.d.ts +30 -0
  337. package/dist/voice/testing/fake_llm.d.ts.map +1 -0
  338. package/dist/voice/testing/fake_llm.js +103 -0
  339. package/dist/voice/testing/fake_llm.js.map +1 -0
  340. package/dist/voice/testing/index.cjs +3 -0
  341. package/dist/voice/testing/index.cjs.map +1 -1
  342. package/dist/voice/testing/index.d.cts +1 -0
  343. package/dist/voice/testing/index.d.ts +1 -0
  344. package/dist/voice/testing/index.d.ts.map +1 -1
  345. package/dist/voice/testing/index.js +2 -0
  346. package/dist/voice/testing/index.js.map +1 -1
  347. package/dist/voice/turn_config/endpointing.cjs +33 -0
  348. package/dist/voice/turn_config/endpointing.cjs.map +1 -0
  349. package/dist/voice/turn_config/endpointing.d.cts +30 -0
  350. package/dist/voice/turn_config/endpointing.d.ts +30 -0
  351. package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
  352. package/dist/voice/turn_config/endpointing.js +9 -0
  353. package/dist/voice/turn_config/endpointing.js.map +1 -0
  354. package/dist/voice/turn_config/interruption.cjs +37 -0
  355. package/dist/voice/turn_config/interruption.cjs.map +1 -0
  356. package/dist/voice/turn_config/interruption.d.cts +53 -0
  357. package/dist/voice/turn_config/interruption.d.ts +53 -0
  358. package/dist/voice/turn_config/interruption.d.ts.map +1 -0
  359. package/dist/voice/turn_config/interruption.js +13 -0
  360. package/dist/voice/turn_config/interruption.js.map +1 -0
  361. package/dist/voice/turn_config/turn_handling.cjs +35 -0
  362. package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
  363. package/dist/voice/turn_config/turn_handling.d.cts +36 -0
  364. package/dist/voice/turn_config/turn_handling.d.ts +36 -0
  365. package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
  366. package/dist/voice/turn_config/turn_handling.js +11 -0
  367. package/dist/voice/turn_config/turn_handling.js.map +1 -0
  368. package/dist/voice/turn_config/utils.cjs +97 -0
  369. package/dist/voice/turn_config/utils.cjs.map +1 -0
  370. package/dist/voice/turn_config/utils.d.cts +25 -0
  371. package/dist/voice/turn_config/utils.d.ts +25 -0
  372. package/dist/voice/turn_config/utils.d.ts.map +1 -0
  373. package/dist/voice/turn_config/utils.js +73 -0
  374. package/dist/voice/turn_config/utils.js.map +1 -0
  375. package/dist/voice/turn_config/utils.test.cjs +86 -0
  376. package/dist/voice/turn_config/utils.test.cjs.map +1 -0
  377. package/dist/voice/turn_config/utils.test.js +85 -0
  378. package/dist/voice/turn_config/utils.test.js.map +1 -0
  379. package/dist/voice/wire_format.cjs +798 -0
  380. package/dist/voice/wire_format.cjs.map +1 -0
  381. package/dist/voice/wire_format.d.cts +5503 -0
  382. package/dist/voice/wire_format.d.ts +5503 -0
  383. package/dist/voice/wire_format.d.ts.map +1 -0
  384. package/dist/voice/wire_format.js +728 -0
  385. package/dist/voice/wire_format.js.map +1 -0
  386. package/package.json +2 -1
  387. package/src/beta/index.ts +9 -0
  388. package/src/beta/workflows/index.ts +9 -0
  389. package/src/beta/workflows/task_group.ts +194 -0
  390. package/src/constants.ts +13 -0
  391. package/src/index.ts +2 -1
  392. package/src/inference/interruption/defaults.ts +51 -0
  393. package/src/inference/interruption/errors.ts +25 -0
  394. package/src/inference/interruption/http_transport.ts +187 -0
  395. package/src/inference/interruption/interruption_cache_entry.ts +50 -0
  396. package/src/inference/interruption/interruption_detector.ts +188 -0
  397. package/src/inference/interruption/interruption_stream.ts +467 -0
  398. package/src/inference/interruption/types.ts +84 -0
  399. package/src/inference/interruption/utils.test.ts +132 -0
  400. package/src/inference/interruption/utils.ts +137 -0
  401. package/src/inference/interruption/ws_transport.ts +402 -0
  402. package/src/inference/llm.ts +9 -12
  403. package/src/inference/stt.ts +10 -3
  404. package/src/inference/tts.ts +10 -3
  405. package/src/inference/utils.ts +29 -1
  406. package/src/llm/chat_context.test.ts +48 -0
  407. package/src/llm/chat_context.ts +161 -0
  408. package/src/llm/index.ts +2 -0
  409. package/src/llm/llm.ts +16 -0
  410. package/src/llm/realtime.ts +4 -0
  411. package/src/llm/tool_context.ts +14 -0
  412. package/src/metrics/base.ts +48 -1
  413. package/src/metrics/index.ts +11 -0
  414. package/src/metrics/model_usage.test.ts +545 -0
  415. package/src/metrics/model_usage.ts +262 -0
  416. package/src/metrics/usage_collector.ts +11 -0
  417. package/src/metrics/utils.ts +11 -0
  418. package/src/stream/multi_input_stream.test.ts +6 -1
  419. package/src/stream/stream_channel.ts +34 -2
  420. package/src/stt/stt.ts +38 -0
  421. package/src/telemetry/otel_http_exporter.ts +28 -5
  422. package/src/telemetry/trace_types.ts +11 -8
  423. package/src/telemetry/traces.ts +111 -54
  424. package/src/tts/tts.ts +69 -1
  425. package/src/utils.ts +5 -0
  426. package/src/voice/agent.ts +41 -3
  427. package/src/voice/agent_activity.ts +371 -34
  428. package/src/voice/agent_session.ts +207 -59
  429. package/src/voice/audio_recognition.ts +385 -9
  430. package/src/voice/client_events.ts +838 -0
  431. package/src/voice/events.ts +14 -4
  432. package/src/voice/generation.ts +52 -9
  433. package/src/voice/index.ts +1 -0
  434. package/src/voice/report.test.ts +117 -0
  435. package/src/voice/report.ts +29 -6
  436. package/src/voice/room_io/room_io.ts +21 -64
  437. package/src/voice/testing/fake_llm.ts +138 -0
  438. package/src/voice/testing/index.ts +2 -0
  439. package/src/voice/turn_config/endpointing.ts +33 -0
  440. package/src/voice/turn_config/interruption.ts +56 -0
  441. package/src/voice/turn_config/turn_handling.ts +45 -0
  442. package/src/voice/turn_config/utils.test.ts +100 -0
  443. package/src/voice/turn_config/utils.ts +103 -0
  444. package/src/voice/wire_format.ts +827 -0
@@ -0,0 +1,827 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ //
5
+ // Explicit wire-format converters that produce the exact JSON shape emitted by
6
+ // Python Pydantic models (snake_case keys, durations in seconds).
7
+ // The agents-playground frontend (types.ts / useClientEvents.ts) consumes this
8
+ // format directly via JSON.parse — any mismatch breaks the UI.
9
+ import { z } from 'zod';
10
+ import type {
11
+ AgentHandoffItem,
12
+ AudioContent,
13
+ ChatContent,
14
+ ChatItem,
15
+ ChatMessage,
16
+ FunctionCall,
17
+ FunctionCallOutput,
18
+ ImageContent,
19
+ MetricsReport,
20
+ } from '../llm/chat_context.js';
21
+ import type {
22
+ AgentMetrics,
23
+ EOUMetrics,
24
+ InterruptionMetrics,
25
+ LLMMetrics,
26
+ MetricsMetadata,
27
+ RealtimeModelMetrics,
28
+ RealtimeModelMetricsCachedTokenDetails,
29
+ RealtimeModelMetricsInputTokenDetails,
30
+ RealtimeModelMetricsOutputTokenDetails,
31
+ STTMetrics,
32
+ TTSMetrics,
33
+ VADMetrics,
34
+ } from '../metrics/base.js';
35
+ import type {
36
+ InterruptionModelUsage,
37
+ LLMModelUsage,
38
+ ModelUsage,
39
+ STTModelUsage,
40
+ TTSModelUsage,
41
+ } from '../metrics/model_usage.js';
42
+ import type { AgentSessionUsage } from './agent_session.js';
43
+
44
+ // ---------------------------------------------------------------------------
45
+ // Helpers
46
+ // ---------------------------------------------------------------------------
47
+
48
+ type WireObject = Record<string, unknown>;
49
+
50
+ export function msToS(ms: number): number {
51
+ return ms / 1000;
52
+ }
53
+
54
+ function omitUndefined(obj: WireObject): WireObject {
55
+ const result: WireObject = {};
56
+ for (const [k, v] of Object.entries(obj)) {
57
+ if (v !== undefined) {
58
+ result[k] = v;
59
+ }
60
+ }
61
+ return result;
62
+ }
63
+
64
+ function imageContentToWire(img: ImageContent): WireObject {
65
+ return omitUndefined({
66
+ id: img.id,
67
+ type: img.type,
68
+ image: typeof img.image === 'string' ? img.image : undefined,
69
+ inference_detail: img.inferenceDetail,
70
+ inference_width: img.inferenceWidth,
71
+ inference_height: img.inferenceHeight,
72
+ mime_type: img.mimeType,
73
+ });
74
+ }
75
+
76
+ function audioContentToWire(audio: AudioContent): WireObject {
77
+ return omitUndefined({
78
+ type: audio.type,
79
+ transcript: audio.transcript,
80
+ });
81
+ }
82
+
83
+ function chatContentToWire(content: ChatContent): unknown {
84
+ if (typeof content === 'string') return content;
85
+ if (content.type === 'image_content') return imageContentToWire(content);
86
+ return audioContentToWire(content);
87
+ }
88
+
89
+ function metricsReportToWire(m: MetricsReport): WireObject {
90
+ return omitUndefined({
91
+ started_speaking_at: m.startedSpeakingAt,
92
+ stopped_speaking_at: m.stoppedSpeakingAt,
93
+ transcription_delay: m.transcriptionDelay,
94
+ end_of_turn_delay: m.endOfTurnDelay,
95
+ on_user_turn_completed_delay: m.onUserTurnCompletedDelay,
96
+ llm_node_ttft: m.llmNodeTtft,
97
+ tts_node_ttfb: m.ttsNodeTtfb,
98
+ e2e_latency: m.e2eLatency,
99
+ });
100
+ }
101
+
102
+ export function chatMessageToWire(msg: ChatMessage): WireObject {
103
+ const result: WireObject = {
104
+ id: msg.id,
105
+ type: msg.type,
106
+ role: msg.role,
107
+ content: msg.content.map(chatContentToWire),
108
+ interrupted: msg.interrupted,
109
+ created_at: msToS(msg.createdAt),
110
+ };
111
+
112
+ if (msg.transcriptConfidence !== undefined) {
113
+ result.transcript_confidence = msg.transcriptConfidence;
114
+ }
115
+ if (Object.keys(msg.metrics).length > 0) {
116
+ result.metrics = metricsReportToWire(msg.metrics);
117
+ }
118
+ if (Object.keys(msg.extra).length > 0) {
119
+ result.extra = msg.extra;
120
+ }
121
+ return result;
122
+ }
123
+
124
+ export function functionCallToWire(fc: FunctionCall): WireObject {
125
+ const result: WireObject = {
126
+ id: fc.id,
127
+ type: fc.type,
128
+ call_id: fc.callId,
129
+ arguments: fc.args,
130
+ name: fc.name,
131
+ created_at: msToS(fc.createdAt),
132
+ };
133
+
134
+ if (Object.keys(fc.extra).length > 0) {
135
+ result.extra = fc.extra;
136
+ }
137
+ if (fc.groupId !== undefined) {
138
+ result.group_id = fc.groupId;
139
+ }
140
+ return result;
141
+ }
142
+
143
+ export function functionCallOutputToWire(fco: FunctionCallOutput): WireObject {
144
+ return {
145
+ id: fco.id,
146
+ type: fco.type,
147
+ name: fco.name,
148
+ call_id: fco.callId,
149
+ output: fco.output,
150
+ is_error: fco.isError,
151
+ created_at: msToS(fco.createdAt),
152
+ };
153
+ }
154
+
155
+ export function agentHandoffToWire(ah: AgentHandoffItem): WireObject {
156
+ const result: WireObject = {
157
+ id: ah.id,
158
+ type: ah.type,
159
+ new_agent_id: ah.newAgentId,
160
+ created_at: msToS(ah.createdAt),
161
+ };
162
+ if (ah.oldAgentId !== undefined) {
163
+ result.old_agent_id = ah.oldAgentId;
164
+ }
165
+ return result;
166
+ }
167
+
168
+ export function chatItemToWire(item: ChatItem): WireObject {
169
+ switch (item.type) {
170
+ case 'message':
171
+ return chatMessageToWire(item);
172
+ case 'function_call':
173
+ return functionCallToWire(item);
174
+ case 'function_call_output':
175
+ return functionCallOutputToWire(item);
176
+ case 'agent_handoff':
177
+ return agentHandoffToWire(item);
178
+ }
179
+ }
180
+
181
+ function metadataToWire(m: MetricsMetadata | undefined): WireObject | null {
182
+ if (!m) return null;
183
+ return omitUndefined({
184
+ model_name: m.modelName,
185
+ model_provider: m.modelProvider,
186
+ });
187
+ }
188
+
189
+ function llmMetricsToWire(m: LLMMetrics): WireObject {
190
+ return omitUndefined({
191
+ type: m.type,
192
+ label: m.label,
193
+ request_id: m.requestId,
194
+ timestamp: msToS(m.timestamp),
195
+ duration: msToS(m.durationMs),
196
+ ttft: msToS(m.ttftMs),
197
+ cancelled: m.cancelled,
198
+ completion_tokens: m.completionTokens,
199
+ prompt_tokens: m.promptTokens,
200
+ prompt_cached_tokens: m.promptCachedTokens,
201
+ total_tokens: m.totalTokens,
202
+ tokens_per_second: m.tokensPerSecond,
203
+ speech_id: m.speechId,
204
+ metadata: metadataToWire(m.metadata),
205
+ });
206
+ }
207
+
208
+ function sttMetricsToWire(m: STTMetrics): WireObject {
209
+ return omitUndefined({
210
+ type: m.type,
211
+ label: m.label,
212
+ request_id: m.requestId,
213
+ timestamp: msToS(m.timestamp),
214
+ duration: msToS(m.durationMs),
215
+ audio_duration: msToS(m.audioDurationMs),
216
+ input_tokens: m.inputTokens,
217
+ output_tokens: m.outputTokens,
218
+ streamed: m.streamed,
219
+ metadata: metadataToWire(m.metadata),
220
+ });
221
+ }
222
+
223
+ function ttsMetricsToWire(m: TTSMetrics): WireObject {
224
+ return omitUndefined({
225
+ type: m.type,
226
+ label: m.label,
227
+ request_id: m.requestId,
228
+ timestamp: msToS(m.timestamp),
229
+ ttfb: msToS(m.ttfbMs),
230
+ duration: msToS(m.durationMs),
231
+ audio_duration: msToS(m.audioDurationMs),
232
+ cancelled: m.cancelled,
233
+ characters_count: m.charactersCount,
234
+ input_tokens: m.inputTokens,
235
+ output_tokens: m.outputTokens,
236
+ streamed: m.streamed,
237
+ segment_id: m.segmentId,
238
+ speech_id: m.speechId,
239
+ metadata: metadataToWire(m.metadata),
240
+ });
241
+ }
242
+
243
+ function vadMetricsToWire(m: VADMetrics): WireObject {
244
+ return {
245
+ type: m.type,
246
+ label: m.label,
247
+ timestamp: msToS(m.timestamp),
248
+ idle_time: msToS(m.idleTimeMs),
249
+ inference_duration_total: msToS(m.inferenceDurationTotalMs),
250
+ inference_count: m.inferenceCount,
251
+ };
252
+ }
253
+
254
+ function eouMetricsToWire(m: EOUMetrics): WireObject {
255
+ return omitUndefined({
256
+ type: m.type,
257
+ timestamp: msToS(m.timestamp),
258
+ end_of_utterance_delay: msToS(m.endOfUtteranceDelayMs),
259
+ transcription_delay: msToS(m.transcriptionDelayMs),
260
+ on_user_turn_completed_delay: msToS(m.onUserTurnCompletedDelayMs),
261
+ speech_id: m.speechId,
262
+ });
263
+ }
264
+
265
+ function cachedTokenDetailsToWire(d: RealtimeModelMetricsCachedTokenDetails): WireObject {
266
+ return {
267
+ audio_tokens: d.audioTokens,
268
+ text_tokens: d.textTokens,
269
+ image_tokens: d.imageTokens,
270
+ };
271
+ }
272
+
273
+ function inputTokenDetailsToWire(d: RealtimeModelMetricsInputTokenDetails): WireObject {
274
+ return omitUndefined({
275
+ audio_tokens: d.audioTokens,
276
+ text_tokens: d.textTokens,
277
+ image_tokens: d.imageTokens,
278
+ cached_tokens: d.cachedTokens,
279
+ cached_tokens_details: d.cachedTokensDetails
280
+ ? cachedTokenDetailsToWire(d.cachedTokensDetails)
281
+ : undefined,
282
+ });
283
+ }
284
+
285
+ function outputTokenDetailsToWire(d: RealtimeModelMetricsOutputTokenDetails): WireObject {
286
+ return {
287
+ text_tokens: d.textTokens,
288
+ audio_tokens: d.audioTokens,
289
+ image_tokens: d.imageTokens,
290
+ };
291
+ }
292
+
293
+ function realtimeModelMetricsToWire(m: RealtimeModelMetrics): WireObject {
294
+ return omitUndefined({
295
+ type: m.type,
296
+ label: m.label,
297
+ request_id: m.requestId,
298
+ timestamp: msToS(m.timestamp),
299
+ duration: msToS(m.durationMs),
300
+ session_duration: m.sessionDurationMs !== undefined ? msToS(m.sessionDurationMs) : undefined,
301
+ ttft: msToS(m.ttftMs),
302
+ cancelled: m.cancelled,
303
+ input_tokens: m.inputTokens,
304
+ output_tokens: m.outputTokens,
305
+ total_tokens: m.totalTokens,
306
+ tokens_per_second: m.tokensPerSecond,
307
+ input_token_details: inputTokenDetailsToWire(m.inputTokenDetails),
308
+ output_token_details: outputTokenDetailsToWire(m.outputTokenDetails),
309
+ metadata: metadataToWire(m.metadata),
310
+ });
311
+ }
312
+
313
+ function interruptionMetricsToWire(m: InterruptionMetrics): WireObject {
314
+ return omitUndefined({
315
+ type: m.type,
316
+ timestamp: msToS(m.timestamp),
317
+ total_duration: msToS(m.totalDuration),
318
+ prediction_duration: msToS(m.predictionDuration),
319
+ detection_delay: msToS(m.detectionDelay),
320
+ num_interruptions: m.numInterruptions,
321
+ num_backchannels: m.numBackchannels,
322
+ num_requests: m.numRequests,
323
+ metadata: metadataToWire(m.metadata),
324
+ });
325
+ }
326
+
327
+ export function agentMetricsToWire(m: AgentMetrics): WireObject {
328
+ switch (m.type) {
329
+ case 'llm_metrics':
330
+ return llmMetricsToWire(m);
331
+ case 'stt_metrics':
332
+ return sttMetricsToWire(m);
333
+ case 'tts_metrics':
334
+ return ttsMetricsToWire(m);
335
+ case 'vad_metrics':
336
+ return vadMetricsToWire(m);
337
+ case 'eou_metrics':
338
+ return eouMetricsToWire(m);
339
+ case 'realtime_model_metrics':
340
+ return realtimeModelMetricsToWire(m);
341
+ case 'interruption_metrics':
342
+ return interruptionMetricsToWire(m);
343
+ }
344
+ }
345
+
346
+ function llmModelUsageToWire(u: Partial<LLMModelUsage>): WireObject {
347
+ return {
348
+ type: u.type,
349
+ provider: u.provider ?? '',
350
+ model: u.model ?? '',
351
+ input_tokens: u.inputTokens ?? 0,
352
+ input_cached_tokens: u.inputCachedTokens ?? 0,
353
+ input_audio_tokens: u.inputAudioTokens ?? 0,
354
+ input_cached_audio_tokens: u.inputCachedAudioTokens ?? 0,
355
+ input_text_tokens: u.inputTextTokens ?? 0,
356
+ input_cached_text_tokens: u.inputCachedTextTokens ?? 0,
357
+ input_image_tokens: u.inputImageTokens ?? 0,
358
+ input_cached_image_tokens: u.inputCachedImageTokens ?? 0,
359
+ output_tokens: u.outputTokens ?? 0,
360
+ output_audio_tokens: u.outputAudioTokens ?? 0,
361
+ output_text_tokens: u.outputTextTokens ?? 0,
362
+ session_duration: msToS(u.sessionDurationMs ?? 0),
363
+ };
364
+ }
365
+
366
+ function ttsModelUsageToWire(u: Partial<TTSModelUsage>): WireObject {
367
+ return {
368
+ type: u.type,
369
+ provider: u.provider ?? '',
370
+ model: u.model ?? '',
371
+ input_tokens: u.inputTokens ?? 0,
372
+ output_tokens: u.outputTokens ?? 0,
373
+ characters_count: u.charactersCount ?? 0,
374
+ audio_duration: msToS(u.audioDurationMs ?? 0),
375
+ };
376
+ }
377
+
378
+ function sttModelUsageToWire(u: Partial<STTModelUsage>): WireObject {
379
+ return {
380
+ type: u.type,
381
+ provider: u.provider ?? '',
382
+ model: u.model ?? '',
383
+ input_tokens: u.inputTokens ?? 0,
384
+ output_tokens: u.outputTokens ?? 0,
385
+ audio_duration: msToS(u.audioDurationMs ?? 0),
386
+ };
387
+ }
388
+
389
+ function interruptionModelUsageToWire(u: Partial<InterruptionModelUsage>): WireObject {
390
+ return {
391
+ type: u.type,
392
+ provider: u.provider ?? '',
393
+ model: u.model ?? '',
394
+ total_requests: u.totalRequests ?? 0,
395
+ };
396
+ }
397
+
398
+ export function modelUsageToWire(u: Partial<ModelUsage>): WireObject {
399
+ switch (u.type) {
400
+ case 'llm_usage':
401
+ return llmModelUsageToWire(u as Partial<LLMModelUsage>);
402
+ case 'tts_usage':
403
+ return ttsModelUsageToWire(u as Partial<TTSModelUsage>);
404
+ case 'stt_usage':
405
+ return sttModelUsageToWire(u as Partial<STTModelUsage>);
406
+ case 'interruption_usage':
407
+ return interruptionModelUsageToWire(u as Partial<InterruptionModelUsage>);
408
+ default:
409
+ return u as WireObject;
410
+ }
411
+ }
412
+
413
+ export function agentSessionUsageToWire(u: AgentSessionUsage): WireObject {
414
+ return {
415
+ model_usage: u.modelUsage.map(modelUsageToWire),
416
+ };
417
+ }
418
+
419
+ // ===========================================================================
420
+ // Zod wire-format schemas
421
+ // These validate the exact JSON shape that Python Pydantic emits on the wire.
422
+ // Inferred types via z.infer give fully typed parse results.
423
+ // ===========================================================================
424
+ const imageContentWireSchema = z.object({
425
+ id: z.string(),
426
+ type: z.literal('image_content'),
427
+ image: z.string(),
428
+ inference_detail: z.enum(['auto', 'high', 'low']).optional(),
429
+ inference_width: z.number().optional(),
430
+ inference_height: z.number().optional(),
431
+ mime_type: z.string().optional(),
432
+ });
433
+
434
+ const audioContentWireSchema = z.object({
435
+ type: z.literal('audio_content'),
436
+ transcript: z.string().nullable().optional(),
437
+ });
438
+
439
+ const chatContentWireSchema = z.union([z.string(), imageContentWireSchema, audioContentWireSchema]);
440
+
441
+ const metricsReportWireSchema = z
442
+ .object({
443
+ started_speaking_at: z.number().optional(),
444
+ stopped_speaking_at: z.number().optional(),
445
+ transcription_delay: z.number().optional(),
446
+ end_of_turn_delay: z.number().optional(),
447
+ on_user_turn_completed_delay: z.number().optional(),
448
+ llm_node_ttft: z.number().optional(),
449
+ tts_node_ttfb: z.number().optional(),
450
+ e2e_latency: z.number().optional(),
451
+ })
452
+ .optional();
453
+
454
+ export const chatMessageWireSchema = z.object({
455
+ id: z.string(),
456
+ type: z.literal('message'),
457
+ role: z.enum(['developer', 'system', 'user', 'assistant']),
458
+ content: z.array(chatContentWireSchema),
459
+ interrupted: z.boolean(),
460
+ created_at: z.number(),
461
+ transcript_confidence: z.number().optional(),
462
+ metrics: metricsReportWireSchema,
463
+ extra: z.record(z.string(), z.unknown()).optional(),
464
+ });
465
+
466
+ export const functionCallWireSchema = z.object({
467
+ id: z.string(),
468
+ type: z.literal('function_call'),
469
+ call_id: z.string(),
470
+ arguments: z.string(),
471
+ name: z.string(),
472
+ created_at: z.number(),
473
+ extra: z.record(z.string(), z.unknown()).optional(),
474
+ group_id: z.string().optional(),
475
+ });
476
+
477
+ export const functionCallOutputWireSchema = z.object({
478
+ id: z.string(),
479
+ type: z.literal('function_call_output'),
480
+ name: z.string(),
481
+ call_id: z.string(),
482
+ output: z.string(),
483
+ is_error: z.boolean(),
484
+ created_at: z.number(),
485
+ });
486
+
487
+ export const agentHandoffWireSchema = z.object({
488
+ id: z.string(),
489
+ type: z.literal('agent_handoff'),
490
+ new_agent_id: z.string(),
491
+ created_at: z.number(),
492
+ old_agent_id: z.string().optional(),
493
+ });
494
+
495
+ export const chatItemWireSchema = z.discriminatedUnion('type', [
496
+ chatMessageWireSchema,
497
+ functionCallWireSchema,
498
+ functionCallOutputWireSchema,
499
+ agentHandoffWireSchema,
500
+ ]);
501
+
502
+ const metadataWireSchema = z
503
+ .object({
504
+ model_name: z.string().optional(),
505
+ model_provider: z.string().optional(),
506
+ })
507
+ .nullable()
508
+ .optional();
509
+
510
+ export const llmMetricsWireSchema = z.object({
511
+ type: z.literal('llm_metrics'),
512
+ label: z.string(),
513
+ request_id: z.string(),
514
+ timestamp: z.number(),
515
+ duration: z.number(),
516
+ ttft: z.number(),
517
+ cancelled: z.boolean(),
518
+ completion_tokens: z.number(),
519
+ prompt_tokens: z.number(),
520
+ prompt_cached_tokens: z.number(),
521
+ total_tokens: z.number(),
522
+ tokens_per_second: z.number(),
523
+ speech_id: z.string().nullable().optional(),
524
+ metadata: metadataWireSchema,
525
+ });
526
+
527
+ export const sttMetricsWireSchema = z.object({
528
+ type: z.literal('stt_metrics'),
529
+ label: z.string(),
530
+ request_id: z.string(),
531
+ timestamp: z.number(),
532
+ duration: z.number(),
533
+ audio_duration: z.number(),
534
+ input_tokens: z.number().optional(),
535
+ output_tokens: z.number().optional(),
536
+ streamed: z.boolean(),
537
+ metadata: metadataWireSchema,
538
+ });
539
+
540
+ export const ttsMetricsWireSchema = z.object({
541
+ type: z.literal('tts_metrics'),
542
+ label: z.string(),
543
+ request_id: z.string(),
544
+ timestamp: z.number(),
545
+ ttfb: z.number(),
546
+ duration: z.number(),
547
+ audio_duration: z.number(),
548
+ cancelled: z.boolean(),
549
+ characters_count: z.number(),
550
+ input_tokens: z.number().optional(),
551
+ output_tokens: z.number().optional(),
552
+ streamed: z.boolean(),
553
+ segment_id: z.string().nullable().optional(),
554
+ speech_id: z.string().nullable().optional(),
555
+ metadata: metadataWireSchema,
556
+ });
557
+
558
+ export const vadMetricsWireSchema = z.object({
559
+ type: z.literal('vad_metrics'),
560
+ label: z.string(),
561
+ timestamp: z.number(),
562
+ idle_time: z.number(),
563
+ inference_duration_total: z.number(),
564
+ inference_count: z.number(),
565
+ });
566
+
567
+ export const eouMetricsWireSchema = z.object({
568
+ type: z.literal('eou_metrics'),
569
+ timestamp: z.number(),
570
+ end_of_utterance_delay: z.number(),
571
+ transcription_delay: z.number(),
572
+ on_user_turn_completed_delay: z.number(),
573
+ speech_id: z.string().nullable().optional(),
574
+ });
575
+
576
+ const cachedTokenDetailsWireSchema = z.object({
577
+ audio_tokens: z.number(),
578
+ text_tokens: z.number(),
579
+ image_tokens: z.number(),
580
+ });
581
+
582
+ const inputTokenDetailsWireSchema = z.object({
583
+ audio_tokens: z.number(),
584
+ text_tokens: z.number(),
585
+ image_tokens: z.number(),
586
+ cached_tokens: z.number(),
587
+ cached_tokens_details: cachedTokenDetailsWireSchema.nullable().optional(),
588
+ });
589
+
590
+ const outputTokenDetailsWireSchema = z.object({
591
+ text_tokens: z.number(),
592
+ audio_tokens: z.number(),
593
+ image_tokens: z.number(),
594
+ });
595
+
596
+ export const realtimeModelMetricsWireSchema = z.object({
597
+ type: z.literal('realtime_model_metrics'),
598
+ label: z.string(),
599
+ request_id: z.string(),
600
+ timestamp: z.number(),
601
+ duration: z.number(),
602
+ session_duration: z.number().optional(),
603
+ ttft: z.number(),
604
+ cancelled: z.boolean(),
605
+ input_tokens: z.number(),
606
+ output_tokens: z.number(),
607
+ total_tokens: z.number(),
608
+ tokens_per_second: z.number(),
609
+ input_token_details: inputTokenDetailsWireSchema,
610
+ output_token_details: outputTokenDetailsWireSchema,
611
+ metadata: metadataWireSchema,
612
+ });
613
+
614
+ export const interruptionMetricsWireSchema = z.object({
615
+ type: z.literal('interruption_metrics'),
616
+ timestamp: z.number(),
617
+ total_duration: z.number(),
618
+ prediction_duration: z.number(),
619
+ detection_delay: z.number(),
620
+ num_interruptions: z.number(),
621
+ num_backchannels: z.number(),
622
+ num_requests: z.number(),
623
+ metadata: metadataWireSchema,
624
+ });
625
+
626
+ export const agentMetricsWireSchema = z.discriminatedUnion('type', [
627
+ llmMetricsWireSchema,
628
+ sttMetricsWireSchema,
629
+ ttsMetricsWireSchema,
630
+ vadMetricsWireSchema,
631
+ eouMetricsWireSchema,
632
+ realtimeModelMetricsWireSchema,
633
+ interruptionMetricsWireSchema,
634
+ ]);
635
+
636
+ // ---------------------------------------------------------------------------
637
+ // Model usage schemas
638
+ // ---------------------------------------------------------------------------
639
+
640
+ export const llmModelUsageWireSchema = z.object({
641
+ type: z.literal('llm_usage'),
642
+ provider: z.string().optional(),
643
+ model: z.string().optional(),
644
+ input_tokens: z.number().optional(),
645
+ input_cached_tokens: z.number().optional(),
646
+ input_audio_tokens: z.number().optional(),
647
+ input_cached_audio_tokens: z.number().optional(),
648
+ input_text_tokens: z.number().optional(),
649
+ input_cached_text_tokens: z.number().optional(),
650
+ input_image_tokens: z.number().optional(),
651
+ input_cached_image_tokens: z.number().optional(),
652
+ output_tokens: z.number().optional(),
653
+ output_audio_tokens: z.number().optional(),
654
+ output_text_tokens: z.number().optional(),
655
+ session_duration: z.number().optional(),
656
+ });
657
+
658
+ export const ttsModelUsageWireSchema = z.object({
659
+ type: z.literal('tts_usage'),
660
+ provider: z.string().optional(),
661
+ model: z.string().optional(),
662
+ input_tokens: z.number().optional(),
663
+ output_tokens: z.number().optional(),
664
+ characters_count: z.number().optional(),
665
+ audio_duration: z.number().optional(),
666
+ });
667
+
668
+ export const sttModelUsageWireSchema = z.object({
669
+ type: z.literal('stt_usage'),
670
+ provider: z.string().optional(),
671
+ model: z.string().optional(),
672
+ input_tokens: z.number().optional(),
673
+ output_tokens: z.number().optional(),
674
+ audio_duration: z.number().optional(),
675
+ });
676
+
677
+ export const interruptionModelUsageWireSchema = z.object({
678
+ type: z.literal('interruption_usage'),
679
+ provider: z.string().optional(),
680
+ model: z.string().optional(),
681
+ total_requests: z.number().optional(),
682
+ });
683
+
684
+ export const modelUsageWireSchema = z.discriminatedUnion('type', [
685
+ llmModelUsageWireSchema,
686
+ ttsModelUsageWireSchema,
687
+ sttModelUsageWireSchema,
688
+ interruptionModelUsageWireSchema,
689
+ ]);
690
+
691
+ export const agentSessionUsageWireSchema = z.object({
692
+ model_usage: z.array(modelUsageWireSchema),
693
+ });
694
+
695
+ // ---------------------------------------------------------------------------
696
+ // Client event schemas
697
+ // ---------------------------------------------------------------------------
698
+
699
+ const agentStateSchema = z.enum(['initializing', 'idle', 'listening', 'thinking', 'speaking']);
700
+ const userStateSchema = z.enum(['speaking', 'listening', 'away']);
701
+
702
+ export const clientAgentStateChangedSchema = z.object({
703
+ type: z.literal('agent_state_changed'),
704
+ old_state: agentStateSchema,
705
+ new_state: agentStateSchema,
706
+ created_at: z.number(),
707
+ });
708
+
709
+ export const clientUserStateChangedSchema = z.object({
710
+ type: z.literal('user_state_changed'),
711
+ old_state: userStateSchema,
712
+ new_state: userStateSchema,
713
+ created_at: z.number(),
714
+ });
715
+
716
+ export const clientConversationItemAddedSchema = z.object({
717
+ type: z.literal('conversation_item_added'),
718
+ item: chatMessageWireSchema,
719
+ created_at: z.number(),
720
+ });
721
+
722
+ export const clientUserInputTranscribedSchema = z.object({
723
+ type: z.literal('user_input_transcribed'),
724
+ transcript: z.string(),
725
+ is_final: z.boolean(),
726
+ language: z.string().nullable(),
727
+ created_at: z.number(),
728
+ });
729
+
730
+ export const clientFunctionToolsExecutedSchema = z.object({
731
+ type: z.literal('function_tools_executed'),
732
+ function_calls: z.array(functionCallWireSchema),
733
+ function_call_outputs: z.array(functionCallOutputWireSchema.nullable()),
734
+ created_at: z.number(),
735
+ });
736
+
737
+ export const clientMetricsCollectedSchema = z.object({
738
+ type: z.literal('metrics_collected'),
739
+ metrics: agentMetricsWireSchema,
740
+ created_at: z.number(),
741
+ });
742
+
743
+ export const clientErrorSchema = z.object({
744
+ type: z.literal('error'),
745
+ message: z.string(),
746
+ created_at: z.number(),
747
+ });
748
+
749
+ export const clientUserOverlappingSpeechSchema = z.object({
750
+ type: z.literal('user_overlapping_speech'),
751
+ is_interruption: z.boolean(),
752
+ created_at: z.number(),
753
+ sent_at: z.number(),
754
+ detection_delay: z.number(),
755
+ overlap_started_at: z.number().nullable(),
756
+ });
757
+
758
+ export const clientSessionUsageSchema = z.object({
759
+ type: z.literal('session_usage'),
760
+ usage: agentSessionUsageWireSchema,
761
+ created_at: z.number(),
762
+ });
763
+
764
+ export const clientEventSchema = z.discriminatedUnion('type', [
765
+ clientAgentStateChangedSchema,
766
+ clientUserStateChangedSchema,
767
+ clientConversationItemAddedSchema,
768
+ clientUserInputTranscribedSchema,
769
+ clientFunctionToolsExecutedSchema,
770
+ clientMetricsCollectedSchema,
771
+ clientErrorSchema,
772
+ clientUserOverlappingSpeechSchema,
773
+ clientSessionUsageSchema,
774
+ ]);
775
+
776
+ // ---------------------------------------------------------------------------
777
+ // RPC schemas
778
+ // ---------------------------------------------------------------------------
779
+
780
+ export const sendMessageRequestSchema = z.object({
781
+ text: z.string(),
782
+ });
783
+
784
+ export const streamRequestSchema = z.object({
785
+ request_id: z.string(),
786
+ method: z.string(),
787
+ payload: z.string(),
788
+ });
789
+
790
+ export const streamResponseSchema = z.object({
791
+ request_id: z.string(),
792
+ payload: z.string(),
793
+ error: z.string().nullable().optional(),
794
+ });
795
+
796
+ export const getSessionStateResponseSchema = z.object({
797
+ agent_state: agentStateSchema,
798
+ user_state: userStateSchema,
799
+ agent_id: z.string(),
800
+ options: z.record(z.string(), z.unknown()),
801
+ created_at: z.number(),
802
+ });
803
+
804
+ export const getChatHistoryResponseSchema = z.object({
805
+ items: z.array(chatItemWireSchema),
806
+ });
807
+
808
+ export const getAgentInfoResponseSchema = z.object({
809
+ id: z.string(),
810
+ instructions: z.string().nullable(),
811
+ tools: z.array(z.string()),
812
+ chat_ctx: z.array(chatItemWireSchema),
813
+ });
814
+
815
+ export const sendMessageResponseSchema = z.object({
816
+ items: z.array(chatItemWireSchema),
817
+ });
818
+
819
+ export const getRTCStatsResponseSchema = z.object({
820
+ publisher_stats: z.array(z.record(z.string(), z.unknown())),
821
+ subscriber_stats: z.array(z.record(z.string(), z.unknown())),
822
+ });
823
+
824
+ export const getSessionUsageResponseSchema = z.object({
825
+ usage: agentSessionUsageWireSchema,
826
+ created_at: z.number(),
827
+ });