@livekit/agents 1.0.47 → 1.1.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/dist/beta/index.cjs +29 -0
  2. package/dist/beta/index.cjs.map +1 -0
  3. package/dist/beta/index.d.cts +2 -0
  4. package/dist/beta/index.d.ts +2 -0
  5. package/dist/beta/index.d.ts.map +1 -0
  6. package/dist/beta/index.js +7 -0
  7. package/dist/beta/index.js.map +1 -0
  8. package/dist/beta/workflows/index.cjs +29 -0
  9. package/dist/beta/workflows/index.cjs.map +1 -0
  10. package/dist/beta/workflows/index.d.cts +2 -0
  11. package/dist/beta/workflows/index.d.ts +2 -0
  12. package/dist/beta/workflows/index.d.ts.map +1 -0
  13. package/dist/beta/workflows/index.js +7 -0
  14. package/dist/beta/workflows/index.js.map +1 -0
  15. package/dist/beta/workflows/task_group.cjs +162 -0
  16. package/dist/beta/workflows/task_group.cjs.map +1 -0
  17. package/dist/beta/workflows/task_group.d.cts +32 -0
  18. package/dist/beta/workflows/task_group.d.ts +32 -0
  19. package/dist/beta/workflows/task_group.d.ts.map +1 -0
  20. package/dist/beta/workflows/task_group.js +138 -0
  21. package/dist/beta/workflows/task_group.js.map +1 -0
  22. package/dist/constants.cjs +27 -0
  23. package/dist/constants.cjs.map +1 -1
  24. package/dist/constants.d.cts +9 -0
  25. package/dist/constants.d.ts +9 -0
  26. package/dist/constants.d.ts.map +1 -1
  27. package/dist/constants.js +18 -0
  28. package/dist/constants.js.map +1 -1
  29. package/dist/index.cjs +3 -0
  30. package/dist/index.cjs.map +1 -1
  31. package/dist/index.d.cts +2 -1
  32. package/dist/index.d.ts +2 -1
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +2 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/inference/api_protos.d.cts +12 -12
  37. package/dist/inference/api_protos.d.ts +12 -12
  38. package/dist/inference/interruption/defaults.cjs +81 -0
  39. package/dist/inference/interruption/defaults.cjs.map +1 -0
  40. package/dist/inference/interruption/defaults.d.cts +19 -0
  41. package/dist/inference/interruption/defaults.d.ts +19 -0
  42. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  43. package/dist/inference/interruption/defaults.js +46 -0
  44. package/dist/inference/interruption/defaults.js.map +1 -0
  45. package/dist/inference/interruption/errors.cjs +44 -0
  46. package/dist/inference/interruption/errors.cjs.map +1 -0
  47. package/dist/inference/interruption/errors.d.cts +12 -0
  48. package/dist/inference/interruption/errors.d.ts +12 -0
  49. package/dist/inference/interruption/errors.d.ts.map +1 -0
  50. package/dist/inference/interruption/errors.js +20 -0
  51. package/dist/inference/interruption/errors.js.map +1 -0
  52. package/dist/inference/interruption/http_transport.cjs +147 -0
  53. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  54. package/dist/inference/interruption/http_transport.d.cts +63 -0
  55. package/dist/inference/interruption/http_transport.d.ts +63 -0
  56. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  57. package/dist/inference/interruption/http_transport.js +121 -0
  58. package/dist/inference/interruption/http_transport.js.map +1 -0
  59. package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
  60. package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
  61. package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
  62. package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
  63. package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
  64. package/dist/inference/interruption/interruption_cache_entry.js +34 -0
  65. package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
  66. package/dist/inference/interruption/interruption_detector.cjs +181 -0
  67. package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
  68. package/dist/inference/interruption/interruption_detector.d.cts +59 -0
  69. package/dist/inference/interruption/interruption_detector.d.ts +59 -0
  70. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
  71. package/dist/inference/interruption/interruption_detector.js +147 -0
  72. package/dist/inference/interruption/interruption_detector.js.map +1 -0
  73. package/dist/inference/interruption/interruption_stream.cjs +368 -0
  74. package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
  75. package/dist/inference/interruption/interruption_stream.d.cts +46 -0
  76. package/dist/inference/interruption/interruption_stream.d.ts +46 -0
  77. package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
  78. package/dist/inference/interruption/interruption_stream.js +344 -0
  79. package/dist/inference/interruption/interruption_stream.js.map +1 -0
  80. package/dist/inference/interruption/types.cjs +17 -0
  81. package/dist/inference/interruption/types.cjs.map +1 -0
  82. package/dist/inference/interruption/types.d.cts +66 -0
  83. package/dist/inference/interruption/types.d.ts +66 -0
  84. package/dist/inference/interruption/types.d.ts.map +1 -0
  85. package/dist/inference/interruption/types.js +1 -0
  86. package/dist/inference/interruption/types.js.map +1 -0
  87. package/dist/inference/interruption/utils.cjs +130 -0
  88. package/dist/inference/interruption/utils.cjs.map +1 -0
  89. package/dist/inference/interruption/utils.d.cts +41 -0
  90. package/dist/inference/interruption/utils.d.ts +41 -0
  91. package/dist/inference/interruption/utils.d.ts.map +1 -0
  92. package/dist/inference/interruption/utils.js +105 -0
  93. package/dist/inference/interruption/utils.js.map +1 -0
  94. package/dist/inference/interruption/utils.test.cjs +105 -0
  95. package/dist/inference/interruption/utils.test.cjs.map +1 -0
  96. package/dist/inference/interruption/utils.test.js +104 -0
  97. package/dist/inference/interruption/utils.test.js.map +1 -0
  98. package/dist/inference/interruption/ws_transport.cjs +329 -0
  99. package/dist/inference/interruption/ws_transport.cjs.map +1 -0
  100. package/dist/inference/interruption/ws_transport.d.cts +33 -0
  101. package/dist/inference/interruption/ws_transport.d.ts +33 -0
  102. package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
  103. package/dist/inference/interruption/ws_transport.js +295 -0
  104. package/dist/inference/interruption/ws_transport.js.map +1 -0
  105. package/dist/inference/llm.cjs +14 -10
  106. package/dist/inference/llm.cjs.map +1 -1
  107. package/dist/inference/llm.d.cts +2 -1
  108. package/dist/inference/llm.d.ts +2 -1
  109. package/dist/inference/llm.d.ts.map +1 -1
  110. package/dist/inference/llm.js +8 -10
  111. package/dist/inference/llm.js.map +1 -1
  112. package/dist/inference/stt.cjs +7 -2
  113. package/dist/inference/stt.cjs.map +1 -1
  114. package/dist/inference/stt.d.cts +2 -0
  115. package/dist/inference/stt.d.ts +2 -0
  116. package/dist/inference/stt.d.ts.map +1 -1
  117. package/dist/inference/stt.js +8 -3
  118. package/dist/inference/stt.js.map +1 -1
  119. package/dist/inference/tts.cjs +7 -2
  120. package/dist/inference/tts.cjs.map +1 -1
  121. package/dist/inference/tts.d.cts +2 -0
  122. package/dist/inference/tts.d.ts +2 -0
  123. package/dist/inference/tts.d.ts.map +1 -1
  124. package/dist/inference/tts.js +8 -3
  125. package/dist/inference/tts.js.map +1 -1
  126. package/dist/inference/utils.cjs +26 -7
  127. package/dist/inference/utils.cjs.map +1 -1
  128. package/dist/inference/utils.d.cts +13 -0
  129. package/dist/inference/utils.d.ts +13 -0
  130. package/dist/inference/utils.d.ts.map +1 -1
  131. package/dist/inference/utils.js +18 -2
  132. package/dist/inference/utils.js.map +1 -1
  133. package/dist/llm/chat_context.cjs +108 -2
  134. package/dist/llm/chat_context.cjs.map +1 -1
  135. package/dist/llm/chat_context.d.cts +28 -1
  136. package/dist/llm/chat_context.d.ts +28 -1
  137. package/dist/llm/chat_context.d.ts.map +1 -1
  138. package/dist/llm/chat_context.js +108 -2
  139. package/dist/llm/chat_context.js.map +1 -1
  140. package/dist/llm/chat_context.test.cjs +43 -0
  141. package/dist/llm/chat_context.test.cjs.map +1 -1
  142. package/dist/llm/chat_context.test.js +43 -0
  143. package/dist/llm/chat_context.test.js.map +1 -1
  144. package/dist/llm/index.cjs +2 -0
  145. package/dist/llm/index.cjs.map +1 -1
  146. package/dist/llm/index.d.cts +2 -2
  147. package/dist/llm/index.d.ts +2 -2
  148. package/dist/llm/index.d.ts.map +1 -1
  149. package/dist/llm/index.js +3 -1
  150. package/dist/llm/index.js.map +1 -1
  151. package/dist/llm/llm.cjs +16 -1
  152. package/dist/llm/llm.cjs.map +1 -1
  153. package/dist/llm/llm.d.cts +9 -0
  154. package/dist/llm/llm.d.ts +9 -0
  155. package/dist/llm/llm.d.ts.map +1 -1
  156. package/dist/llm/llm.js +16 -1
  157. package/dist/llm/llm.js.map +1 -1
  158. package/dist/llm/provider_format/index.d.cts +1 -1
  159. package/dist/llm/provider_format/index.d.ts +1 -1
  160. package/dist/llm/realtime.cjs +3 -0
  161. package/dist/llm/realtime.cjs.map +1 -1
  162. package/dist/llm/realtime.d.cts +1 -0
  163. package/dist/llm/realtime.d.ts +1 -0
  164. package/dist/llm/realtime.d.ts.map +1 -1
  165. package/dist/llm/realtime.js +3 -0
  166. package/dist/llm/realtime.js.map +1 -1
  167. package/dist/llm/tool_context.cjs +7 -0
  168. package/dist/llm/tool_context.cjs.map +1 -1
  169. package/dist/llm/tool_context.d.cts +10 -2
  170. package/dist/llm/tool_context.d.ts +10 -2
  171. package/dist/llm/tool_context.d.ts.map +1 -1
  172. package/dist/llm/tool_context.js +6 -0
  173. package/dist/llm/tool_context.js.map +1 -1
  174. package/dist/metrics/base.cjs.map +1 -1
  175. package/dist/metrics/base.d.cts +45 -1
  176. package/dist/metrics/base.d.ts +45 -1
  177. package/dist/metrics/base.d.ts.map +1 -1
  178. package/dist/metrics/index.cjs +5 -0
  179. package/dist/metrics/index.cjs.map +1 -1
  180. package/dist/metrics/index.d.cts +2 -1
  181. package/dist/metrics/index.d.ts +2 -1
  182. package/dist/metrics/index.d.ts.map +1 -1
  183. package/dist/metrics/index.js +6 -0
  184. package/dist/metrics/index.js.map +1 -1
  185. package/dist/metrics/model_usage.cjs +189 -0
  186. package/dist/metrics/model_usage.cjs.map +1 -0
  187. package/dist/metrics/model_usage.d.cts +92 -0
  188. package/dist/metrics/model_usage.d.ts +92 -0
  189. package/dist/metrics/model_usage.d.ts.map +1 -0
  190. package/dist/metrics/model_usage.js +164 -0
  191. package/dist/metrics/model_usage.js.map +1 -0
  192. package/dist/metrics/model_usage.test.cjs +474 -0
  193. package/dist/metrics/model_usage.test.cjs.map +1 -0
  194. package/dist/metrics/model_usage.test.js +476 -0
  195. package/dist/metrics/model_usage.test.js.map +1 -0
  196. package/dist/metrics/usage_collector.cjs +3 -0
  197. package/dist/metrics/usage_collector.cjs.map +1 -1
  198. package/dist/metrics/usage_collector.d.cts +9 -0
  199. package/dist/metrics/usage_collector.d.ts +9 -0
  200. package/dist/metrics/usage_collector.d.ts.map +1 -1
  201. package/dist/metrics/usage_collector.js +3 -0
  202. package/dist/metrics/usage_collector.js.map +1 -1
  203. package/dist/metrics/utils.cjs +9 -0
  204. package/dist/metrics/utils.cjs.map +1 -1
  205. package/dist/metrics/utils.d.ts.map +1 -1
  206. package/dist/metrics/utils.js +9 -0
  207. package/dist/metrics/utils.js.map +1 -1
  208. package/dist/stream/multi_input_stream.test.cjs +4 -0
  209. package/dist/stream/multi_input_stream.test.cjs.map +1 -1
  210. package/dist/stream/multi_input_stream.test.js +5 -1
  211. package/dist/stream/multi_input_stream.test.js.map +1 -1
  212. package/dist/stream/stream_channel.cjs +31 -0
  213. package/dist/stream/stream_channel.cjs.map +1 -1
  214. package/dist/stream/stream_channel.d.cts +4 -2
  215. package/dist/stream/stream_channel.d.ts +4 -2
  216. package/dist/stream/stream_channel.d.ts.map +1 -1
  217. package/dist/stream/stream_channel.js +31 -0
  218. package/dist/stream/stream_channel.js.map +1 -1
  219. package/dist/stt/stt.cjs +34 -2
  220. package/dist/stt/stt.cjs.map +1 -1
  221. package/dist/stt/stt.d.cts +22 -0
  222. package/dist/stt/stt.d.ts +22 -0
  223. package/dist/stt/stt.d.ts.map +1 -1
  224. package/dist/stt/stt.js +34 -2
  225. package/dist/stt/stt.js.map +1 -1
  226. package/dist/telemetry/otel_http_exporter.cjs +24 -5
  227. package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
  228. package/dist/telemetry/otel_http_exporter.d.cts +1 -0
  229. package/dist/telemetry/otel_http_exporter.d.ts +1 -0
  230. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
  231. package/dist/telemetry/otel_http_exporter.js +24 -5
  232. package/dist/telemetry/otel_http_exporter.js.map +1 -1
  233. package/dist/telemetry/trace_types.cjs +5 -5
  234. package/dist/telemetry/trace_types.cjs.map +1 -1
  235. package/dist/telemetry/trace_types.d.cts +9 -5
  236. package/dist/telemetry/trace_types.d.ts +9 -5
  237. package/dist/telemetry/trace_types.d.ts.map +1 -1
  238. package/dist/telemetry/trace_types.js +5 -5
  239. package/dist/telemetry/trace_types.js.map +1 -1
  240. package/dist/telemetry/traces.cjs +47 -8
  241. package/dist/telemetry/traces.cjs.map +1 -1
  242. package/dist/telemetry/traces.d.ts.map +1 -1
  243. package/dist/telemetry/traces.js +47 -8
  244. package/dist/telemetry/traces.js.map +1 -1
  245. package/dist/tts/tts.cjs +64 -2
  246. package/dist/tts/tts.cjs.map +1 -1
  247. package/dist/tts/tts.d.cts +34 -0
  248. package/dist/tts/tts.d.ts +34 -0
  249. package/dist/tts/tts.d.ts.map +1 -1
  250. package/dist/tts/tts.js +64 -2
  251. package/dist/tts/tts.js.map +1 -1
  252. package/dist/utils.cjs +1 -0
  253. package/dist/utils.cjs.map +1 -1
  254. package/dist/utils.d.ts.map +1 -1
  255. package/dist/utils.js +1 -0
  256. package/dist/utils.js.map +1 -1
  257. package/dist/version.cjs +1 -1
  258. package/dist/version.js +1 -1
  259. package/dist/voice/agent.cjs +34 -4
  260. package/dist/voice/agent.cjs.map +1 -1
  261. package/dist/voice/agent.d.cts +11 -2
  262. package/dist/voice/agent.d.ts +11 -2
  263. package/dist/voice/agent.d.ts.map +1 -1
  264. package/dist/voice/agent.js +34 -4
  265. package/dist/voice/agent.js.map +1 -1
  266. package/dist/voice/agent_activity.cjs +292 -44
  267. package/dist/voice/agent_activity.cjs.map +1 -1
  268. package/dist/voice/agent_activity.d.cts +27 -6
  269. package/dist/voice/agent_activity.d.ts +27 -6
  270. package/dist/voice/agent_activity.d.ts.map +1 -1
  271. package/dist/voice/agent_activity.js +293 -45
  272. package/dist/voice/agent_activity.js.map +1 -1
  273. package/dist/voice/agent_session.cjs +105 -48
  274. package/dist/voice/agent_session.cjs.map +1 -1
  275. package/dist/voice/agent_session.d.cts +90 -20
  276. package/dist/voice/agent_session.d.ts +90 -20
  277. package/dist/voice/agent_session.d.ts.map +1 -1
  278. package/dist/voice/agent_session.js +105 -46
  279. package/dist/voice/agent_session.js.map +1 -1
  280. package/dist/voice/audio_recognition.cjs +287 -6
  281. package/dist/voice/audio_recognition.cjs.map +1 -1
  282. package/dist/voice/audio_recognition.d.cts +42 -3
  283. package/dist/voice/audio_recognition.d.ts +42 -3
  284. package/dist/voice/audio_recognition.d.ts.map +1 -1
  285. package/dist/voice/audio_recognition.js +289 -7
  286. package/dist/voice/audio_recognition.js.map +1 -1
  287. package/dist/voice/client_events.cjs +554 -0
  288. package/dist/voice/client_events.cjs.map +1 -0
  289. package/dist/voice/client_events.d.cts +195 -0
  290. package/dist/voice/client_events.d.ts +195 -0
  291. package/dist/voice/client_events.d.ts.map +1 -0
  292. package/dist/voice/client_events.js +548 -0
  293. package/dist/voice/client_events.js.map +1 -0
  294. package/dist/voice/events.cjs +1 -0
  295. package/dist/voice/events.cjs.map +1 -1
  296. package/dist/voice/events.d.cts +8 -5
  297. package/dist/voice/events.d.ts +8 -5
  298. package/dist/voice/events.d.ts.map +1 -1
  299. package/dist/voice/events.js +1 -0
  300. package/dist/voice/events.js.map +1 -1
  301. package/dist/voice/generation.cjs +43 -8
  302. package/dist/voice/generation.cjs.map +1 -1
  303. package/dist/voice/generation.d.cts +3 -3
  304. package/dist/voice/generation.d.ts +3 -3
  305. package/dist/voice/generation.d.ts.map +1 -1
  306. package/dist/voice/generation.js +43 -8
  307. package/dist/voice/generation.js.map +1 -1
  308. package/dist/voice/index.cjs +1 -0
  309. package/dist/voice/index.cjs.map +1 -1
  310. package/dist/voice/index.d.cts +1 -0
  311. package/dist/voice/index.d.ts +1 -0
  312. package/dist/voice/index.d.ts.map +1 -1
  313. package/dist/voice/index.js +1 -0
  314. package/dist/voice/index.js.map +1 -1
  315. package/dist/voice/report.cjs +20 -8
  316. package/dist/voice/report.cjs.map +1 -1
  317. package/dist/voice/report.d.cts +5 -0
  318. package/dist/voice/report.d.ts +5 -0
  319. package/dist/voice/report.d.ts.map +1 -1
  320. package/dist/voice/report.js +20 -8
  321. package/dist/voice/report.js.map +1 -1
  322. package/dist/voice/report.test.cjs +106 -0
  323. package/dist/voice/report.test.cjs.map +1 -0
  324. package/dist/voice/report.test.js +105 -0
  325. package/dist/voice/report.test.js.map +1 -0
  326. package/dist/voice/room_io/room_io.cjs +16 -41
  327. package/dist/voice/room_io/room_io.cjs.map +1 -1
  328. package/dist/voice/room_io/room_io.d.cts +4 -9
  329. package/dist/voice/room_io/room_io.d.ts +4 -9
  330. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  331. package/dist/voice/room_io/room_io.js +17 -43
  332. package/dist/voice/room_io/room_io.js.map +1 -1
  333. package/dist/voice/testing/fake_llm.cjs +127 -0
  334. package/dist/voice/testing/fake_llm.cjs.map +1 -0
  335. package/dist/voice/testing/fake_llm.d.cts +30 -0
  336. package/dist/voice/testing/fake_llm.d.ts +30 -0
  337. package/dist/voice/testing/fake_llm.d.ts.map +1 -0
  338. package/dist/voice/testing/fake_llm.js +103 -0
  339. package/dist/voice/testing/fake_llm.js.map +1 -0
  340. package/dist/voice/testing/index.cjs +3 -0
  341. package/dist/voice/testing/index.cjs.map +1 -1
  342. package/dist/voice/testing/index.d.cts +1 -0
  343. package/dist/voice/testing/index.d.ts +1 -0
  344. package/dist/voice/testing/index.d.ts.map +1 -1
  345. package/dist/voice/testing/index.js +2 -0
  346. package/dist/voice/testing/index.js.map +1 -1
  347. package/dist/voice/turn_config/endpointing.cjs +33 -0
  348. package/dist/voice/turn_config/endpointing.cjs.map +1 -0
  349. package/dist/voice/turn_config/endpointing.d.cts +30 -0
  350. package/dist/voice/turn_config/endpointing.d.ts +30 -0
  351. package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
  352. package/dist/voice/turn_config/endpointing.js +9 -0
  353. package/dist/voice/turn_config/endpointing.js.map +1 -0
  354. package/dist/voice/turn_config/interruption.cjs +37 -0
  355. package/dist/voice/turn_config/interruption.cjs.map +1 -0
  356. package/dist/voice/turn_config/interruption.d.cts +53 -0
  357. package/dist/voice/turn_config/interruption.d.ts +53 -0
  358. package/dist/voice/turn_config/interruption.d.ts.map +1 -0
  359. package/dist/voice/turn_config/interruption.js +13 -0
  360. package/dist/voice/turn_config/interruption.js.map +1 -0
  361. package/dist/voice/turn_config/turn_handling.cjs +35 -0
  362. package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
  363. package/dist/voice/turn_config/turn_handling.d.cts +36 -0
  364. package/dist/voice/turn_config/turn_handling.d.ts +36 -0
  365. package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
  366. package/dist/voice/turn_config/turn_handling.js +11 -0
  367. package/dist/voice/turn_config/turn_handling.js.map +1 -0
  368. package/dist/voice/turn_config/utils.cjs +97 -0
  369. package/dist/voice/turn_config/utils.cjs.map +1 -0
  370. package/dist/voice/turn_config/utils.d.cts +25 -0
  371. package/dist/voice/turn_config/utils.d.ts +25 -0
  372. package/dist/voice/turn_config/utils.d.ts.map +1 -0
  373. package/dist/voice/turn_config/utils.js +73 -0
  374. package/dist/voice/turn_config/utils.js.map +1 -0
  375. package/dist/voice/turn_config/utils.test.cjs +86 -0
  376. package/dist/voice/turn_config/utils.test.cjs.map +1 -0
  377. package/dist/voice/turn_config/utils.test.js +85 -0
  378. package/dist/voice/turn_config/utils.test.js.map +1 -0
  379. package/dist/voice/wire_format.cjs +798 -0
  380. package/dist/voice/wire_format.cjs.map +1 -0
  381. package/dist/voice/wire_format.d.cts +5503 -0
  382. package/dist/voice/wire_format.d.ts +5503 -0
  383. package/dist/voice/wire_format.d.ts.map +1 -0
  384. package/dist/voice/wire_format.js +728 -0
  385. package/dist/voice/wire_format.js.map +1 -0
  386. package/package.json +2 -1
  387. package/src/beta/index.ts +9 -0
  388. package/src/beta/workflows/index.ts +9 -0
  389. package/src/beta/workflows/task_group.ts +194 -0
  390. package/src/constants.ts +13 -0
  391. package/src/index.ts +2 -1
  392. package/src/inference/interruption/defaults.ts +51 -0
  393. package/src/inference/interruption/errors.ts +25 -0
  394. package/src/inference/interruption/http_transport.ts +187 -0
  395. package/src/inference/interruption/interruption_cache_entry.ts +50 -0
  396. package/src/inference/interruption/interruption_detector.ts +188 -0
  397. package/src/inference/interruption/interruption_stream.ts +467 -0
  398. package/src/inference/interruption/types.ts +84 -0
  399. package/src/inference/interruption/utils.test.ts +132 -0
  400. package/src/inference/interruption/utils.ts +137 -0
  401. package/src/inference/interruption/ws_transport.ts +402 -0
  402. package/src/inference/llm.ts +9 -12
  403. package/src/inference/stt.ts +10 -3
  404. package/src/inference/tts.ts +10 -3
  405. package/src/inference/utils.ts +29 -1
  406. package/src/llm/chat_context.test.ts +48 -0
  407. package/src/llm/chat_context.ts +161 -0
  408. package/src/llm/index.ts +2 -0
  409. package/src/llm/llm.ts +16 -0
  410. package/src/llm/realtime.ts +4 -0
  411. package/src/llm/tool_context.ts +14 -0
  412. package/src/metrics/base.ts +48 -1
  413. package/src/metrics/index.ts +11 -0
  414. package/src/metrics/model_usage.test.ts +545 -0
  415. package/src/metrics/model_usage.ts +262 -0
  416. package/src/metrics/usage_collector.ts +11 -0
  417. package/src/metrics/utils.ts +11 -0
  418. package/src/stream/multi_input_stream.test.ts +6 -1
  419. package/src/stream/stream_channel.ts +34 -2
  420. package/src/stt/stt.ts +38 -0
  421. package/src/telemetry/otel_http_exporter.ts +28 -5
  422. package/src/telemetry/trace_types.ts +11 -8
  423. package/src/telemetry/traces.ts +111 -54
  424. package/src/tts/tts.ts +69 -1
  425. package/src/utils.ts +5 -0
  426. package/src/voice/agent.ts +41 -3
  427. package/src/voice/agent_activity.ts +371 -34
  428. package/src/voice/agent_session.ts +207 -59
  429. package/src/voice/audio_recognition.ts +385 -9
  430. package/src/voice/client_events.ts +838 -0
  431. package/src/voice/events.ts +14 -4
  432. package/src/voice/generation.ts +52 -9
  433. package/src/voice/index.ts +1 -0
  434. package/src/voice/report.test.ts +117 -0
  435. package/src/voice/report.ts +29 -6
  436. package/src/voice/room_io/room_io.ts +21 -64
  437. package/src/voice/testing/fake_llm.ts +138 -0
  438. package/src/voice/testing/index.ts +2 -0
  439. package/src/voice/turn_config/endpointing.ts +33 -0
  440. package/src/voice/turn_config/interruption.ts +56 -0
  441. package/src/voice/turn_config/turn_handling.ts +45 -0
  442. package/src/voice/turn_config/utils.test.ts +100 -0
  443. package/src/voice/turn_config/utils.ts +103 -0
  444. package/src/voice/wire_format.ts +827 -0
@@ -0,0 +1,188 @@
1
+ // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { TypedEventEmitter } from '@livekit/typed-emitter';
5
+ import EventEmitter from 'events';
6
+ import { log } from '../../log.js';
7
+ import type { InterruptionMetrics } from '../../metrics/base.js';
8
+ import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
9
+ import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
10
+ import type { InterruptionDetectionError } from './errors.js';
11
+ import { InterruptionStreamBase } from './interruption_stream.js';
12
+ import type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';
13
+
14
+ type InterruptionCallbacks = {
15
+ user_overlapping_speech: (event: OverlappingSpeechEvent) => void;
16
+ metrics_collected: (metrics: InterruptionMetrics) => void;
17
+ error: (error: InterruptionDetectionError) => void;
18
+ };
19
+
20
+ export type AdaptiveInterruptionDetectorOptions = Omit<Partial<InterruptionOptions>, 'useProxy'>;
21
+
22
+ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => TypedEventEmitter<InterruptionCallbacks>) {
23
+ options: InterruptionOptions;
24
+ private readonly _label: string;
25
+ private logger = log();
26
+ // Use Set instead of WeakSet to allow iteration for propagating option updates
27
+ private streams: Set<InterruptionStreamBase> = new Set();
28
+
29
+ constructor(options: AdaptiveInterruptionDetectorOptions = {}) {
30
+ super();
31
+
32
+ const {
33
+ maxAudioDurationInS,
34
+ baseUrl,
35
+ apiKey,
36
+ apiSecret,
37
+ audioPrefixDurationInS,
38
+ threshold,
39
+ detectionIntervalInS,
40
+ inferenceTimeout,
41
+ minInterruptionDurationInS,
42
+ } = { ...interruptionOptionDefaults, ...options };
43
+
44
+ if (maxAudioDurationInS > 3.0) {
45
+ throw new RangeError('maxAudioDurationInS must be less than or equal to 3.0 seconds');
46
+ }
47
+
48
+ const lkBaseUrl = baseUrl ?? process.env.LIVEKIT_REMOTE_EOT_URL ?? getDefaultInferenceUrl();
49
+ let lkApiKey = apiKey ?? '';
50
+ let lkApiSecret = apiSecret ?? '';
51
+ let useProxy: boolean;
52
+
53
+ // Use LiveKit credentials if using the inference service (production or staging)
54
+ const isInferenceUrl =
55
+ lkBaseUrl === DEFAULT_INFERENCE_URL || lkBaseUrl === STAGING_INFERENCE_URL;
56
+ if (isInferenceUrl) {
57
+ lkApiKey =
58
+ apiKey ?? process.env.LIVEKIT_INFERENCE_API_KEY ?? process.env.LIVEKIT_API_KEY ?? '';
59
+ if (!lkApiKey) {
60
+ throw new TypeError(
61
+ 'apiKey is required, either as argument or set LIVEKIT_API_KEY environmental variable',
62
+ );
63
+ }
64
+
65
+ lkApiSecret =
66
+ apiSecret ??
67
+ process.env.LIVEKIT_INFERENCE_API_SECRET ??
68
+ process.env.LIVEKIT_API_SECRET ??
69
+ '';
70
+ if (!lkApiSecret) {
71
+ throw new TypeError(
72
+ 'apiSecret is required, either as argument or set LIVEKIT_API_SECRET environmental variable',
73
+ );
74
+ }
75
+ useProxy = true;
76
+ } else {
77
+ useProxy = false;
78
+ }
79
+
80
+ this.options = {
81
+ sampleRate: SAMPLE_RATE,
82
+ threshold,
83
+ minFrames: Math.ceil(minInterruptionDurationInS * FRAMES_PER_SECOND),
84
+ maxAudioDurationInS,
85
+ audioPrefixDurationInS,
86
+ detectionIntervalInS,
87
+ inferenceTimeout,
88
+ baseUrl: lkBaseUrl,
89
+ apiKey: lkApiKey,
90
+ apiSecret: lkApiSecret,
91
+ useProxy,
92
+ minInterruptionDurationInS,
93
+ };
94
+
95
+ this._label = `${this.constructor.name}`;
96
+
97
+ this.logger.debug(
98
+ {
99
+ baseUrl: this.options.baseUrl,
100
+ detectionIntervalInS: this.options.detectionIntervalInS,
101
+ audioPrefixDurationInS: this.options.audioPrefixDurationInS,
102
+ maxAudioDurationInS: this.options.maxAudioDurationInS,
103
+ minFrames: this.options.minFrames,
104
+ threshold: this.options.threshold,
105
+ inferenceTimeout: this.options.inferenceTimeout,
106
+ useProxy: this.options.useProxy,
107
+ },
108
+ 'adaptive interruption detector initialized',
109
+ );
110
+ }
111
+
112
+ /**
113
+ * The model identifier for this detector.
114
+ */
115
+ get model(): string {
116
+ return 'adaptive interruption';
117
+ }
118
+
119
+ /**
120
+ * The provider identifier for this detector.
121
+ */
122
+ get provider(): string {
123
+ return 'livekit';
124
+ }
125
+
126
+ /**
127
+ * The label for this detector instance.
128
+ */
129
+ get label(): string {
130
+ return this._label;
131
+ }
132
+
133
+ /**
134
+ * The sample rate used for audio processing.
135
+ */
136
+ get sampleRate(): number {
137
+ return this.options.sampleRate;
138
+ }
139
+
140
+ /**
141
+ * Emit an error event from the detector.
142
+ */
143
+ emitError(error: InterruptionDetectionError): void {
144
+ this.emit('error', error);
145
+ }
146
+
147
+ /**
148
+ * Creates a new InterruptionStreamBase for internal use.
149
+ * The stream can receive audio frames and sentinels via pushFrame().
150
+ * Use this when you need direct access to the stream for pushing frames.
151
+ */
152
+ createStream(): InterruptionStreamBase {
153
+ const streamBase = new InterruptionStreamBase(this, {});
154
+ this.streams.add(streamBase);
155
+ return streamBase;
156
+ }
157
+
158
+ /**
159
+ * Remove a stream from tracking (called when stream is closed).
160
+ */
161
+ removeStream(stream: InterruptionStreamBase): void {
162
+ this.streams.delete(stream);
163
+ }
164
+
165
+ /**
166
+ * Update options for the detector and propagate to all active streams.
167
+ * For WebSocket streams, this triggers a reconnection with new settings.
168
+ */
169
+ async updateOptions(options: {
170
+ threshold?: number;
171
+ minInterruptionDurationInS?: number;
172
+ }): Promise<void> {
173
+ if (options.threshold !== undefined) {
174
+ this.options.threshold = options.threshold;
175
+ }
176
+ if (options.minInterruptionDurationInS !== undefined) {
177
+ this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
178
+ this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
179
+ }
180
+
181
+ // Propagate option updates to all active streams (matching Python behavior)
182
+ const updatePromises: Promise<void>[] = [];
183
+ for (const stream of this.streams) {
184
+ updatePromises.push(stream.updateOptions(options));
185
+ }
186
+ await Promise.all(updatePromises);
187
+ }
188
+ }
@@ -0,0 +1,467 @@
1
+ // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { AudioFrame, AudioResampler } from '@livekit/rtc-node';
5
+ import type { Span } from '@opentelemetry/api';
6
+ import { type ReadableStream, TransformStream } from 'stream/web';
7
+ import { log } from '../../log.js';
8
+ import type { InterruptionMetrics } from '../../metrics/base.js';
9
+ import { type StreamChannel, createStreamChannel } from '../../stream/stream_channel.js';
10
+ import { traceTypes } from '../../telemetry/index.js';
11
+ import { FRAMES_PER_SECOND, apiConnectDefaults } from './defaults.js';
12
+ import type { InterruptionDetectionError } from './errors.js';
13
+ import { createHttpTransport } from './http_transport.js';
14
+ import { InterruptionCacheEntry } from './interruption_cache_entry.js';
15
+ import type { AdaptiveInterruptionDetector } from './interruption_detector.js';
16
+ import {
17
+ type AgentSpeechEnded,
18
+ type AgentSpeechStarted,
19
+ type ApiConnectOptions,
20
+ type Flush,
21
+ type InterruptionOptions,
22
+ type InterruptionSentinel,
23
+ type OverlapSpeechEnded,
24
+ type OverlapSpeechStarted,
25
+ type OverlappingSpeechEvent,
26
+ } from './types.js';
27
+ import { BoundedCache } from './utils.js';
28
+ import { createWsTransport } from './ws_transport.js';
29
+
30
+ // Re-export sentinel types for backwards compatibility
31
+ export type {
32
+ AgentSpeechEnded,
33
+ AgentSpeechStarted,
34
+ ApiConnectOptions,
35
+ Flush,
36
+ InterruptionSentinel,
37
+ OverlapSpeechEnded,
38
+ OverlapSpeechStarted,
39
+ };
40
+
41
+ export class InterruptionStreamSentinel {
42
+ static agentSpeechStarted(): AgentSpeechStarted {
43
+ return { type: 'agent-speech-started' };
44
+ }
45
+
46
+ static agentSpeechEnded(): AgentSpeechEnded {
47
+ return { type: 'agent-speech-ended' };
48
+ }
49
+
50
+ static overlapSpeechStarted(
51
+ speechDuration: number,
52
+ startedAt: number,
53
+ userSpeakingSpan?: Span,
54
+ ): OverlapSpeechStarted {
55
+ return { type: 'overlap-speech-started', speechDuration, startedAt, userSpeakingSpan };
56
+ }
57
+
58
+ static overlapSpeechEnded(endedAt: number): OverlapSpeechEnded {
59
+ return { type: 'overlap-speech-ended', endedAt };
60
+ }
61
+
62
+ static flush(): Flush {
63
+ return { type: 'flush' };
64
+ }
65
+ }
66
+
67
+ function updateUserSpeakingSpan(span: Span, entry: InterruptionCacheEntry) {
68
+ span.setAttribute(
69
+ traceTypes.ATTR_IS_INTERRUPTION,
70
+ (entry.isInterruption ?? false).toString().toLowerCase(),
71
+ );
72
+ span.setAttribute(traceTypes.ATTR_INTERRUPTION_PROBABILITY, entry.probability);
73
+ span.setAttribute(traceTypes.ATTR_INTERRUPTION_TOTAL_DURATION, entry.totalDurationInS);
74
+ span.setAttribute(traceTypes.ATTR_INTERRUPTION_PREDICTION_DURATION, entry.predictionDurationInS);
75
+ span.setAttribute(traceTypes.ATTR_INTERRUPTION_DETECTION_DELAY, entry.detectionDelayInS);
76
+ }
77
+
78
+ export class InterruptionStreamBase {
79
+ private inputStream: StreamChannel<InterruptionSentinel | AudioFrame, InterruptionDetectionError>;
80
+
81
+ private eventStream: ReadableStream<OverlappingSpeechEvent>;
82
+
83
+ private resampler?: AudioResampler;
84
+
85
+ private numRequests = 0;
86
+
87
+ private userSpeakingSpan: Span | undefined;
88
+
89
+ private overlapSpeechStartedAt: number | undefined;
90
+
91
+ private options: InterruptionOptions;
92
+
93
+ private apiOptions: ApiConnectOptions;
94
+
95
+ private model: AdaptiveInterruptionDetector;
96
+
97
+ private logger = log();
98
+
99
+ // Store reconnect function for WebSocket transport
100
+ private wsReconnect?: () => Promise<void>;
101
+
102
+ // Mutable transport options that can be updated via updateOptions()
103
+ private transportOptions: {
104
+ baseUrl: string;
105
+ apiKey: string;
106
+ apiSecret: string;
107
+ sampleRate: number;
108
+ threshold: number;
109
+ minFrames: number;
110
+ timeout: number;
111
+ maxRetries: number;
112
+ };
113
+
114
+ constructor(model: AdaptiveInterruptionDetector, apiOptions: Partial<ApiConnectOptions>) {
115
+ this.inputStream = createStreamChannel<
116
+ InterruptionSentinel | AudioFrame,
117
+ InterruptionDetectionError
118
+ >();
119
+
120
+ this.model = model;
121
+ this.options = { ...model.options };
122
+ this.apiOptions = { ...apiConnectDefaults, ...apiOptions };
123
+
124
+ // Initialize mutable transport options
125
+ this.transportOptions = {
126
+ baseUrl: this.options.baseUrl,
127
+ apiKey: this.options.apiKey,
128
+ apiSecret: this.options.apiSecret,
129
+ sampleRate: this.options.sampleRate,
130
+ threshold: this.options.threshold,
131
+ minFrames: this.options.minFrames,
132
+ timeout: this.options.inferenceTimeout,
133
+ maxRetries: this.apiOptions.maxRetries,
134
+ };
135
+
136
+ this.eventStream = this.setupTransform();
137
+ }
138
+
139
+ /**
140
+ * Update stream options. For WebSocket transport, this triggers a reconnection.
141
+ */
142
+ async updateOptions(options: {
143
+ threshold?: number;
144
+ minInterruptionDurationInS?: number;
145
+ }): Promise<void> {
146
+ if (options.threshold !== undefined) {
147
+ this.options.threshold = options.threshold;
148
+ this.transportOptions.threshold = options.threshold;
149
+ }
150
+ if (options.minInterruptionDurationInS !== undefined) {
151
+ this.options.minInterruptionDurationInS = options.minInterruptionDurationInS;
152
+ this.options.minFrames = Math.ceil(options.minInterruptionDurationInS * FRAMES_PER_SECOND);
153
+ this.transportOptions.minFrames = this.options.minFrames;
154
+ }
155
+ // Trigger WebSocket reconnection if using proxy (WebSocket transport)
156
+ if (this.options.useProxy && this.wsReconnect) {
157
+ await this.wsReconnect();
158
+ }
159
+ }
160
+
161
+ private setupTransform(): ReadableStream<OverlappingSpeechEvent> {
162
+ let agentSpeechStarted = false;
163
+ let startIdx = 0;
164
+ let accumulatedSamples = 0;
165
+ let overlapSpeechStarted = false;
166
+ let overlapCount = 0;
167
+ const cache = new BoundedCache<number, InterruptionCacheEntry>(10);
168
+ const inferenceS16Data = new Int16Array(
169
+ Math.ceil(this.options.maxAudioDurationInS * this.options.sampleRate),
170
+ ).fill(0);
171
+
172
+ // State accessors for transport
173
+ const getState = () => ({
174
+ overlapSpeechStarted,
175
+ overlapSpeechStartedAt: this.overlapSpeechStartedAt,
176
+ cache,
177
+ overlapCount,
178
+ });
179
+ const setState = (partial: { overlapSpeechStarted?: boolean }) => {
180
+ if (partial.overlapSpeechStarted !== undefined) {
181
+ overlapSpeechStarted = partial.overlapSpeechStarted;
182
+ }
183
+ };
184
+ const handleSpanUpdate = (entry: InterruptionCacheEntry) => {
185
+ if (this.userSpeakingSpan) {
186
+ updateUserSpeakingSpan(this.userSpeakingSpan, entry);
187
+ this.userSpeakingSpan = undefined;
188
+ }
189
+ };
190
+
191
+ const onRequestSent = () => {
192
+ this.numRequests++;
193
+ };
194
+
195
+ const getAndResetNumRequests = (): number => {
196
+ const n = this.numRequests;
197
+ this.numRequests = 0;
198
+ return n;
199
+ };
200
+
201
+ // First transform: process input frames/sentinels and output audio slices or events
202
+ const audioTransformer = new TransformStream<
203
+ InterruptionSentinel | AudioFrame,
204
+ Int16Array | OverlappingSpeechEvent
205
+ >(
206
+ {
207
+ transform: (chunk, controller) => {
208
+ if (chunk instanceof AudioFrame) {
209
+ if (!agentSpeechStarted) {
210
+ return;
211
+ }
212
+ if (this.options.sampleRate !== chunk.sampleRate) {
213
+ controller.error('the sample rate of the input frames must be consistent');
214
+ this.logger.error('the sample rate of the input frames must be consistent');
215
+ return;
216
+ }
217
+ const result = writeToInferenceS16Data(
218
+ chunk,
219
+ startIdx,
220
+ inferenceS16Data,
221
+ this.options.maxAudioDurationInS,
222
+ );
223
+ startIdx = result.startIdx;
224
+ accumulatedSamples += result.samplesWritten;
225
+
226
+ if (
227
+ accumulatedSamples >=
228
+ Math.floor(this.options.detectionIntervalInS * this.options.sampleRate) &&
229
+ overlapSpeechStarted
230
+ ) {
231
+ const audioSlice = inferenceS16Data.slice(0, startIdx);
232
+ accumulatedSamples = 0;
233
+ controller.enqueue(audioSlice);
234
+ }
235
+ } else if (chunk.type === 'agent-speech-started') {
236
+ this.logger.debug('agent speech started');
237
+ agentSpeechStarted = true;
238
+ overlapSpeechStarted = false;
239
+ this.overlapSpeechStartedAt = undefined;
240
+ accumulatedSamples = 0;
241
+ overlapCount = 0;
242
+ startIdx = 0;
243
+ this.numRequests = 0;
244
+ cache.clear();
245
+ } else if (chunk.type === 'agent-speech-ended') {
246
+ this.logger.debug('agent speech ended');
247
+ agentSpeechStarted = false;
248
+ overlapSpeechStarted = false;
249
+ this.overlapSpeechStartedAt = undefined;
250
+ accumulatedSamples = 0;
251
+ overlapCount = 0;
252
+ startIdx = 0;
253
+ this.numRequests = 0;
254
+ cache.clear();
255
+ } else if (chunk.type === 'overlap-speech-started' && agentSpeechStarted) {
256
+ this.overlapSpeechStartedAt = chunk.startedAt;
257
+ this.userSpeakingSpan = chunk.userSpeakingSpan;
258
+ this.logger.debug('overlap speech started, starting interruption inference');
259
+ overlapSpeechStarted = true;
260
+ accumulatedSamples = 0;
261
+ overlapCount += 1;
262
+ if (overlapCount <= 1) {
263
+ const keepSize =
264
+ Math.round((chunk.speechDuration / 1000) * this.options.sampleRate) +
265
+ Math.round(this.options.audioPrefixDurationInS * this.options.sampleRate);
266
+ const shiftCount = Math.max(0, startIdx - keepSize);
267
+ inferenceS16Data.copyWithin(0, shiftCount, startIdx);
268
+ startIdx -= shiftCount;
269
+ }
270
+ cache.clear();
271
+ } else if (chunk.type === 'overlap-speech-ended') {
272
+ this.logger.debug('overlap speech ended');
273
+ if (overlapSpeechStarted) {
274
+ this.userSpeakingSpan = undefined;
275
+ let latestEntry = cache.pop(
276
+ (entry) => entry.totalDurationInS !== undefined && entry.totalDurationInS > 0,
277
+ );
278
+ if (!latestEntry) {
279
+ this.logger.debug('no request made for overlap speech');
280
+ latestEntry = InterruptionCacheEntry.default();
281
+ }
282
+ const e = latestEntry ?? InterruptionCacheEntry.default();
283
+ const event: OverlappingSpeechEvent = {
284
+ type: 'user_overlapping_speech',
285
+ timestamp: chunk.endedAt,
286
+ isInterruption: false,
287
+ overlapStartedAt: this.overlapSpeechStartedAt,
288
+ speechInput: e.speechInput,
289
+ probabilities: e.probabilities,
290
+ totalDurationInS: e.totalDurationInS,
291
+ detectionDelayInS: e.detectionDelayInS,
292
+ predictionDurationInS: e.predictionDurationInS,
293
+ probability: e.probability,
294
+ numRequests: getAndResetNumRequests(),
295
+ };
296
+ controller.enqueue(event);
297
+ overlapSpeechStarted = false;
298
+ accumulatedSamples = 0;
299
+ }
300
+ this.overlapSpeechStartedAt = undefined;
301
+ } else if (chunk.type === 'flush') {
302
+ // no-op
303
+ }
304
+ },
305
+ },
306
+ { highWaterMark: 32 },
307
+ { highWaterMark: 32 },
308
+ );
309
+
310
+ // Second transform: transport layer (HTTP or WebSocket based on useProxy)
311
+ const transportOptions = this.transportOptions;
312
+
313
+ let transport: TransformStream<Int16Array | OverlappingSpeechEvent, OverlappingSpeechEvent>;
314
+ if (this.options.useProxy) {
315
+ const wsResult = createWsTransport(
316
+ transportOptions,
317
+ getState,
318
+ setState,
319
+ handleSpanUpdate,
320
+ onRequestSent,
321
+ getAndResetNumRequests,
322
+ );
323
+ transport = wsResult.transport;
324
+ this.wsReconnect = wsResult.reconnect;
325
+ } else {
326
+ transport = createHttpTransport(
327
+ transportOptions,
328
+ getState,
329
+ setState,
330
+ handleSpanUpdate,
331
+ getAndResetNumRequests,
332
+ );
333
+ }
334
+
335
+ const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({
336
+ transform: (chunk, controller) => {
337
+ this.model.emit('user_overlapping_speech', chunk);
338
+
339
+ const metrics: InterruptionMetrics = {
340
+ type: 'interruption_metrics',
341
+ timestamp: chunk.timestamp,
342
+ totalDuration: chunk.totalDurationInS * 1000,
343
+ predictionDuration: chunk.predictionDurationInS * 1000,
344
+ detectionDelay: chunk.detectionDelayInS * 1000,
345
+ numInterruptions: chunk.isInterruption ? 1 : 0,
346
+ numBackchannels: chunk.isInterruption ? 0 : 1,
347
+ numRequests: chunk.numRequests,
348
+ metadata: {
349
+ modelProvider: this.model.provider,
350
+ modelName: this.model.model,
351
+ },
352
+ };
353
+ this.model.emit('metrics_collected', metrics);
354
+
355
+ controller.enqueue(chunk);
356
+ },
357
+ });
358
+
359
+ // Pipeline: input -> audioTransformer -> transport -> eventEmitter -> eventStream
360
+ return this.inputStream
361
+ .stream()
362
+ .pipeThrough(audioTransformer)
363
+ .pipeThrough(transport)
364
+ .pipeThrough(eventEmitter);
365
+ }
366
+
367
+ private ensureInputNotEnded() {
368
+ if (this.inputStream.closed) {
369
+ throw new Error('input stream is closed');
370
+ }
371
+ }
372
+
373
+ private ensureStreamsNotEnded() {
374
+ this.ensureInputNotEnded();
375
+ }
376
+
377
+ private getResamplerFor(inputSampleRate: number): AudioResampler {
378
+ if (!this.resampler) {
379
+ this.resampler = new AudioResampler(inputSampleRate, this.options.sampleRate);
380
+ }
381
+ return this.resampler;
382
+ }
383
+
384
+ stream(): ReadableStream<OverlappingSpeechEvent> {
385
+ return this.eventStream;
386
+ }
387
+
388
+ async pushFrame(frame: InterruptionSentinel | AudioFrame): Promise<void> {
389
+ this.ensureStreamsNotEnded();
390
+ if (!(frame instanceof AudioFrame)) {
391
+ return this.inputStream.write(frame);
392
+ } else if (this.options.sampleRate !== frame.sampleRate) {
393
+ const resampler = this.getResamplerFor(frame.sampleRate);
394
+ if (resampler.inputRate !== frame.sampleRate) {
395
+ throw new Error('the sample rate of the input frames must be consistent');
396
+ }
397
+ for (const resampledFrame of resampler.push(frame)) {
398
+ await this.inputStream.write(resampledFrame);
399
+ }
400
+ } else {
401
+ await this.inputStream.write(frame);
402
+ }
403
+ }
404
+
405
+ async flush(): Promise<void> {
406
+ this.ensureStreamsNotEnded();
407
+ await this.inputStream.write(InterruptionStreamSentinel.flush());
408
+ }
409
+
410
+ async endInput(): Promise<void> {
411
+ await this.flush();
412
+ await this.inputStream.close();
413
+ }
414
+
415
+ async close(): Promise<void> {
416
+ if (!this.inputStream.closed) await this.inputStream.close();
417
+ this.model.removeStream(this);
418
+ }
419
+ }
420
+
421
+ /**
422
+ * Write the audio frame to the output data array and return the new start index
423
+ * and the number of samples written.
424
+ */
425
+ function writeToInferenceS16Data(
426
+ frame: AudioFrame,
427
+ startIdx: number,
428
+ outData: Int16Array,
429
+ maxAudioDuration: number,
430
+ ): { startIdx: number; samplesWritten: number } {
431
+ const maxWindowSize = Math.floor(maxAudioDuration * frame.sampleRate);
432
+
433
+ if (frame.samplesPerChannel > outData.length) {
434
+ throw new Error('frame samples are greater than the max window size');
435
+ }
436
+
437
+ // Shift the data to the left if the window would overflow
438
+ const shift = startIdx + frame.samplesPerChannel - maxWindowSize;
439
+ if (shift > 0) {
440
+ outData.copyWithin(0, shift, startIdx);
441
+ startIdx -= shift;
442
+ }
443
+
444
+ // Get the frame data as Int16Array
445
+ const frameData = new Int16Array(
446
+ frame.data.buffer,
447
+ frame.data.byteOffset,
448
+ frame.samplesPerChannel * frame.channels,
449
+ );
450
+
451
+ if (frame.channels > 1) {
452
+ // Mix down multiple channels to mono by averaging
453
+ for (let i = 0; i < frame.samplesPerChannel; i++) {
454
+ let sum = 0;
455
+ for (let ch = 0; ch < frame.channels; ch++) {
456
+ sum += frameData[i * frame.channels + ch] ?? 0;
457
+ }
458
+ outData[startIdx + i] = Math.floor(sum / frame.channels);
459
+ }
460
+ } else {
461
+ // Single channel - copy directly
462
+ outData.set(frameData, startIdx);
463
+ }
464
+
465
+ startIdx += frame.samplesPerChannel;
466
+ return { startIdx, samplesWritten: frame.samplesPerChannel };
467
+ }