@livekit/agents 1.0.47 → 1.1.0-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. package/dist/beta/index.cjs +29 -0
  2. package/dist/beta/index.cjs.map +1 -0
  3. package/dist/beta/index.d.cts +2 -0
  4. package/dist/beta/index.d.ts +2 -0
  5. package/dist/beta/index.d.ts.map +1 -0
  6. package/dist/beta/index.js +7 -0
  7. package/dist/beta/index.js.map +1 -0
  8. package/dist/beta/workflows/index.cjs +29 -0
  9. package/dist/beta/workflows/index.cjs.map +1 -0
  10. package/dist/beta/workflows/index.d.cts +2 -0
  11. package/dist/beta/workflows/index.d.ts +2 -0
  12. package/dist/beta/workflows/index.d.ts.map +1 -0
  13. package/dist/beta/workflows/index.js +7 -0
  14. package/dist/beta/workflows/index.js.map +1 -0
  15. package/dist/beta/workflows/task_group.cjs +162 -0
  16. package/dist/beta/workflows/task_group.cjs.map +1 -0
  17. package/dist/beta/workflows/task_group.d.cts +32 -0
  18. package/dist/beta/workflows/task_group.d.ts +32 -0
  19. package/dist/beta/workflows/task_group.d.ts.map +1 -0
  20. package/dist/beta/workflows/task_group.js +138 -0
  21. package/dist/beta/workflows/task_group.js.map +1 -0
  22. package/dist/constants.cjs +27 -0
  23. package/dist/constants.cjs.map +1 -1
  24. package/dist/constants.d.cts +9 -0
  25. package/dist/constants.d.ts +9 -0
  26. package/dist/constants.d.ts.map +1 -1
  27. package/dist/constants.js +18 -0
  28. package/dist/constants.js.map +1 -1
  29. package/dist/index.cjs +3 -0
  30. package/dist/index.cjs.map +1 -1
  31. package/dist/index.d.cts +2 -1
  32. package/dist/index.d.ts +2 -1
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +2 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/inference/api_protos.d.cts +12 -12
  37. package/dist/inference/api_protos.d.ts +12 -12
  38. package/dist/inference/interruption/defaults.cjs +81 -0
  39. package/dist/inference/interruption/defaults.cjs.map +1 -0
  40. package/dist/inference/interruption/defaults.d.cts +19 -0
  41. package/dist/inference/interruption/defaults.d.ts +19 -0
  42. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  43. package/dist/inference/interruption/defaults.js +46 -0
  44. package/dist/inference/interruption/defaults.js.map +1 -0
  45. package/dist/inference/interruption/errors.cjs +44 -0
  46. package/dist/inference/interruption/errors.cjs.map +1 -0
  47. package/dist/inference/interruption/errors.d.cts +12 -0
  48. package/dist/inference/interruption/errors.d.ts +12 -0
  49. package/dist/inference/interruption/errors.d.ts.map +1 -0
  50. package/dist/inference/interruption/errors.js +20 -0
  51. package/dist/inference/interruption/errors.js.map +1 -0
  52. package/dist/inference/interruption/http_transport.cjs +147 -0
  53. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  54. package/dist/inference/interruption/http_transport.d.cts +63 -0
  55. package/dist/inference/interruption/http_transport.d.ts +63 -0
  56. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  57. package/dist/inference/interruption/http_transport.js +121 -0
  58. package/dist/inference/interruption/http_transport.js.map +1 -0
  59. package/dist/inference/interruption/interruption_cache_entry.cjs +58 -0
  60. package/dist/inference/interruption/interruption_cache_entry.cjs.map +1 -0
  61. package/dist/inference/interruption/interruption_cache_entry.d.cts +30 -0
  62. package/dist/inference/interruption/interruption_cache_entry.d.ts +30 -0
  63. package/dist/inference/interruption/interruption_cache_entry.d.ts.map +1 -0
  64. package/dist/inference/interruption/interruption_cache_entry.js +34 -0
  65. package/dist/inference/interruption/interruption_cache_entry.js.map +1 -0
  66. package/dist/inference/interruption/interruption_detector.cjs +181 -0
  67. package/dist/inference/interruption/interruption_detector.cjs.map +1 -0
  68. package/dist/inference/interruption/interruption_detector.d.cts +59 -0
  69. package/dist/inference/interruption/interruption_detector.d.ts +59 -0
  70. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -0
  71. package/dist/inference/interruption/interruption_detector.js +147 -0
  72. package/dist/inference/interruption/interruption_detector.js.map +1 -0
  73. package/dist/inference/interruption/interruption_stream.cjs +368 -0
  74. package/dist/inference/interruption/interruption_stream.cjs.map +1 -0
  75. package/dist/inference/interruption/interruption_stream.d.cts +46 -0
  76. package/dist/inference/interruption/interruption_stream.d.ts +46 -0
  77. package/dist/inference/interruption/interruption_stream.d.ts.map +1 -0
  78. package/dist/inference/interruption/interruption_stream.js +344 -0
  79. package/dist/inference/interruption/interruption_stream.js.map +1 -0
  80. package/dist/inference/interruption/types.cjs +17 -0
  81. package/dist/inference/interruption/types.cjs.map +1 -0
  82. package/dist/inference/interruption/types.d.cts +66 -0
  83. package/dist/inference/interruption/types.d.ts +66 -0
  84. package/dist/inference/interruption/types.d.ts.map +1 -0
  85. package/dist/inference/interruption/types.js +1 -0
  86. package/dist/inference/interruption/types.js.map +1 -0
  87. package/dist/inference/interruption/utils.cjs +130 -0
  88. package/dist/inference/interruption/utils.cjs.map +1 -0
  89. package/dist/inference/interruption/utils.d.cts +41 -0
  90. package/dist/inference/interruption/utils.d.ts +41 -0
  91. package/dist/inference/interruption/utils.d.ts.map +1 -0
  92. package/dist/inference/interruption/utils.js +105 -0
  93. package/dist/inference/interruption/utils.js.map +1 -0
  94. package/dist/inference/interruption/utils.test.cjs +105 -0
  95. package/dist/inference/interruption/utils.test.cjs.map +1 -0
  96. package/dist/inference/interruption/utils.test.js +104 -0
  97. package/dist/inference/interruption/utils.test.js.map +1 -0
  98. package/dist/inference/interruption/ws_transport.cjs +329 -0
  99. package/dist/inference/interruption/ws_transport.cjs.map +1 -0
  100. package/dist/inference/interruption/ws_transport.d.cts +33 -0
  101. package/dist/inference/interruption/ws_transport.d.ts +33 -0
  102. package/dist/inference/interruption/ws_transport.d.ts.map +1 -0
  103. package/dist/inference/interruption/ws_transport.js +295 -0
  104. package/dist/inference/interruption/ws_transport.js.map +1 -0
  105. package/dist/inference/llm.cjs +14 -10
  106. package/dist/inference/llm.cjs.map +1 -1
  107. package/dist/inference/llm.d.cts +2 -1
  108. package/dist/inference/llm.d.ts +2 -1
  109. package/dist/inference/llm.d.ts.map +1 -1
  110. package/dist/inference/llm.js +8 -10
  111. package/dist/inference/llm.js.map +1 -1
  112. package/dist/inference/stt.cjs +7 -2
  113. package/dist/inference/stt.cjs.map +1 -1
  114. package/dist/inference/stt.d.cts +2 -0
  115. package/dist/inference/stt.d.ts +2 -0
  116. package/dist/inference/stt.d.ts.map +1 -1
  117. package/dist/inference/stt.js +8 -3
  118. package/dist/inference/stt.js.map +1 -1
  119. package/dist/inference/tts.cjs +7 -2
  120. package/dist/inference/tts.cjs.map +1 -1
  121. package/dist/inference/tts.d.cts +2 -0
  122. package/dist/inference/tts.d.ts +2 -0
  123. package/dist/inference/tts.d.ts.map +1 -1
  124. package/dist/inference/tts.js +8 -3
  125. package/dist/inference/tts.js.map +1 -1
  126. package/dist/inference/utils.cjs +26 -7
  127. package/dist/inference/utils.cjs.map +1 -1
  128. package/dist/inference/utils.d.cts +13 -0
  129. package/dist/inference/utils.d.ts +13 -0
  130. package/dist/inference/utils.d.ts.map +1 -1
  131. package/dist/inference/utils.js +18 -2
  132. package/dist/inference/utils.js.map +1 -1
  133. package/dist/llm/chat_context.cjs +108 -2
  134. package/dist/llm/chat_context.cjs.map +1 -1
  135. package/dist/llm/chat_context.d.cts +28 -1
  136. package/dist/llm/chat_context.d.ts +28 -1
  137. package/dist/llm/chat_context.d.ts.map +1 -1
  138. package/dist/llm/chat_context.js +108 -2
  139. package/dist/llm/chat_context.js.map +1 -1
  140. package/dist/llm/chat_context.test.cjs +43 -0
  141. package/dist/llm/chat_context.test.cjs.map +1 -1
  142. package/dist/llm/chat_context.test.js +43 -0
  143. package/dist/llm/chat_context.test.js.map +1 -1
  144. package/dist/llm/index.cjs +2 -0
  145. package/dist/llm/index.cjs.map +1 -1
  146. package/dist/llm/index.d.cts +2 -2
  147. package/dist/llm/index.d.ts +2 -2
  148. package/dist/llm/index.d.ts.map +1 -1
  149. package/dist/llm/index.js +3 -1
  150. package/dist/llm/index.js.map +1 -1
  151. package/dist/llm/llm.cjs +16 -1
  152. package/dist/llm/llm.cjs.map +1 -1
  153. package/dist/llm/llm.d.cts +9 -0
  154. package/dist/llm/llm.d.ts +9 -0
  155. package/dist/llm/llm.d.ts.map +1 -1
  156. package/dist/llm/llm.js +16 -1
  157. package/dist/llm/llm.js.map +1 -1
  158. package/dist/llm/provider_format/index.d.cts +1 -1
  159. package/dist/llm/provider_format/index.d.ts +1 -1
  160. package/dist/llm/realtime.cjs +3 -0
  161. package/dist/llm/realtime.cjs.map +1 -1
  162. package/dist/llm/realtime.d.cts +1 -0
  163. package/dist/llm/realtime.d.ts +1 -0
  164. package/dist/llm/realtime.d.ts.map +1 -1
  165. package/dist/llm/realtime.js +3 -0
  166. package/dist/llm/realtime.js.map +1 -1
  167. package/dist/llm/tool_context.cjs +7 -0
  168. package/dist/llm/tool_context.cjs.map +1 -1
  169. package/dist/llm/tool_context.d.cts +10 -2
  170. package/dist/llm/tool_context.d.ts +10 -2
  171. package/dist/llm/tool_context.d.ts.map +1 -1
  172. package/dist/llm/tool_context.js +6 -0
  173. package/dist/llm/tool_context.js.map +1 -1
  174. package/dist/metrics/base.cjs.map +1 -1
  175. package/dist/metrics/base.d.cts +45 -1
  176. package/dist/metrics/base.d.ts +45 -1
  177. package/dist/metrics/base.d.ts.map +1 -1
  178. package/dist/metrics/index.cjs +5 -0
  179. package/dist/metrics/index.cjs.map +1 -1
  180. package/dist/metrics/index.d.cts +2 -1
  181. package/dist/metrics/index.d.ts +2 -1
  182. package/dist/metrics/index.d.ts.map +1 -1
  183. package/dist/metrics/index.js +6 -0
  184. package/dist/metrics/index.js.map +1 -1
  185. package/dist/metrics/model_usage.cjs +189 -0
  186. package/dist/metrics/model_usage.cjs.map +1 -0
  187. package/dist/metrics/model_usage.d.cts +92 -0
  188. package/dist/metrics/model_usage.d.ts +92 -0
  189. package/dist/metrics/model_usage.d.ts.map +1 -0
  190. package/dist/metrics/model_usage.js +164 -0
  191. package/dist/metrics/model_usage.js.map +1 -0
  192. package/dist/metrics/model_usage.test.cjs +474 -0
  193. package/dist/metrics/model_usage.test.cjs.map +1 -0
  194. package/dist/metrics/model_usage.test.js +476 -0
  195. package/dist/metrics/model_usage.test.js.map +1 -0
  196. package/dist/metrics/usage_collector.cjs +3 -0
  197. package/dist/metrics/usage_collector.cjs.map +1 -1
  198. package/dist/metrics/usage_collector.d.cts +9 -0
  199. package/dist/metrics/usage_collector.d.ts +9 -0
  200. package/dist/metrics/usage_collector.d.ts.map +1 -1
  201. package/dist/metrics/usage_collector.js +3 -0
  202. package/dist/metrics/usage_collector.js.map +1 -1
  203. package/dist/metrics/utils.cjs +9 -0
  204. package/dist/metrics/utils.cjs.map +1 -1
  205. package/dist/metrics/utils.d.ts.map +1 -1
  206. package/dist/metrics/utils.js +9 -0
  207. package/dist/metrics/utils.js.map +1 -1
  208. package/dist/stream/multi_input_stream.test.cjs +4 -0
  209. package/dist/stream/multi_input_stream.test.cjs.map +1 -1
  210. package/dist/stream/multi_input_stream.test.js +5 -1
  211. package/dist/stream/multi_input_stream.test.js.map +1 -1
  212. package/dist/stream/stream_channel.cjs +31 -0
  213. package/dist/stream/stream_channel.cjs.map +1 -1
  214. package/dist/stream/stream_channel.d.cts +4 -2
  215. package/dist/stream/stream_channel.d.ts +4 -2
  216. package/dist/stream/stream_channel.d.ts.map +1 -1
  217. package/dist/stream/stream_channel.js +31 -0
  218. package/dist/stream/stream_channel.js.map +1 -1
  219. package/dist/stt/stt.cjs +34 -2
  220. package/dist/stt/stt.cjs.map +1 -1
  221. package/dist/stt/stt.d.cts +22 -0
  222. package/dist/stt/stt.d.ts +22 -0
  223. package/dist/stt/stt.d.ts.map +1 -1
  224. package/dist/stt/stt.js +34 -2
  225. package/dist/stt/stt.js.map +1 -1
  226. package/dist/telemetry/otel_http_exporter.cjs +24 -5
  227. package/dist/telemetry/otel_http_exporter.cjs.map +1 -1
  228. package/dist/telemetry/otel_http_exporter.d.cts +1 -0
  229. package/dist/telemetry/otel_http_exporter.d.ts +1 -0
  230. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -1
  231. package/dist/telemetry/otel_http_exporter.js +24 -5
  232. package/dist/telemetry/otel_http_exporter.js.map +1 -1
  233. package/dist/telemetry/trace_types.cjs +5 -5
  234. package/dist/telemetry/trace_types.cjs.map +1 -1
  235. package/dist/telemetry/trace_types.d.cts +9 -5
  236. package/dist/telemetry/trace_types.d.ts +9 -5
  237. package/dist/telemetry/trace_types.d.ts.map +1 -1
  238. package/dist/telemetry/trace_types.js +5 -5
  239. package/dist/telemetry/trace_types.js.map +1 -1
  240. package/dist/telemetry/traces.cjs +47 -8
  241. package/dist/telemetry/traces.cjs.map +1 -1
  242. package/dist/telemetry/traces.d.ts.map +1 -1
  243. package/dist/telemetry/traces.js +47 -8
  244. package/dist/telemetry/traces.js.map +1 -1
  245. package/dist/tts/tts.cjs +64 -2
  246. package/dist/tts/tts.cjs.map +1 -1
  247. package/dist/tts/tts.d.cts +34 -0
  248. package/dist/tts/tts.d.ts +34 -0
  249. package/dist/tts/tts.d.ts.map +1 -1
  250. package/dist/tts/tts.js +64 -2
  251. package/dist/tts/tts.js.map +1 -1
  252. package/dist/utils.cjs +1 -0
  253. package/dist/utils.cjs.map +1 -1
  254. package/dist/utils.d.ts.map +1 -1
  255. package/dist/utils.js +1 -0
  256. package/dist/utils.js.map +1 -1
  257. package/dist/version.cjs +1 -1
  258. package/dist/version.js +1 -1
  259. package/dist/voice/agent.cjs +34 -4
  260. package/dist/voice/agent.cjs.map +1 -1
  261. package/dist/voice/agent.d.cts +11 -2
  262. package/dist/voice/agent.d.ts +11 -2
  263. package/dist/voice/agent.d.ts.map +1 -1
  264. package/dist/voice/agent.js +34 -4
  265. package/dist/voice/agent.js.map +1 -1
  266. package/dist/voice/agent_activity.cjs +292 -44
  267. package/dist/voice/agent_activity.cjs.map +1 -1
  268. package/dist/voice/agent_activity.d.cts +27 -6
  269. package/dist/voice/agent_activity.d.ts +27 -6
  270. package/dist/voice/agent_activity.d.ts.map +1 -1
  271. package/dist/voice/agent_activity.js +293 -45
  272. package/dist/voice/agent_activity.js.map +1 -1
  273. package/dist/voice/agent_session.cjs +105 -48
  274. package/dist/voice/agent_session.cjs.map +1 -1
  275. package/dist/voice/agent_session.d.cts +90 -20
  276. package/dist/voice/agent_session.d.ts +90 -20
  277. package/dist/voice/agent_session.d.ts.map +1 -1
  278. package/dist/voice/agent_session.js +105 -46
  279. package/dist/voice/agent_session.js.map +1 -1
  280. package/dist/voice/audio_recognition.cjs +287 -6
  281. package/dist/voice/audio_recognition.cjs.map +1 -1
  282. package/dist/voice/audio_recognition.d.cts +42 -3
  283. package/dist/voice/audio_recognition.d.ts +42 -3
  284. package/dist/voice/audio_recognition.d.ts.map +1 -1
  285. package/dist/voice/audio_recognition.js +289 -7
  286. package/dist/voice/audio_recognition.js.map +1 -1
  287. package/dist/voice/client_events.cjs +554 -0
  288. package/dist/voice/client_events.cjs.map +1 -0
  289. package/dist/voice/client_events.d.cts +195 -0
  290. package/dist/voice/client_events.d.ts +195 -0
  291. package/dist/voice/client_events.d.ts.map +1 -0
  292. package/dist/voice/client_events.js +548 -0
  293. package/dist/voice/client_events.js.map +1 -0
  294. package/dist/voice/events.cjs +1 -0
  295. package/dist/voice/events.cjs.map +1 -1
  296. package/dist/voice/events.d.cts +8 -5
  297. package/dist/voice/events.d.ts +8 -5
  298. package/dist/voice/events.d.ts.map +1 -1
  299. package/dist/voice/events.js +1 -0
  300. package/dist/voice/events.js.map +1 -1
  301. package/dist/voice/generation.cjs +43 -8
  302. package/dist/voice/generation.cjs.map +1 -1
  303. package/dist/voice/generation.d.cts +3 -3
  304. package/dist/voice/generation.d.ts +3 -3
  305. package/dist/voice/generation.d.ts.map +1 -1
  306. package/dist/voice/generation.js +43 -8
  307. package/dist/voice/generation.js.map +1 -1
  308. package/dist/voice/index.cjs +1 -0
  309. package/dist/voice/index.cjs.map +1 -1
  310. package/dist/voice/index.d.cts +1 -0
  311. package/dist/voice/index.d.ts +1 -0
  312. package/dist/voice/index.d.ts.map +1 -1
  313. package/dist/voice/index.js +1 -0
  314. package/dist/voice/index.js.map +1 -1
  315. package/dist/voice/report.cjs +20 -8
  316. package/dist/voice/report.cjs.map +1 -1
  317. package/dist/voice/report.d.cts +5 -0
  318. package/dist/voice/report.d.ts +5 -0
  319. package/dist/voice/report.d.ts.map +1 -1
  320. package/dist/voice/report.js +20 -8
  321. package/dist/voice/report.js.map +1 -1
  322. package/dist/voice/report.test.cjs +106 -0
  323. package/dist/voice/report.test.cjs.map +1 -0
  324. package/dist/voice/report.test.js +105 -0
  325. package/dist/voice/report.test.js.map +1 -0
  326. package/dist/voice/room_io/room_io.cjs +16 -41
  327. package/dist/voice/room_io/room_io.cjs.map +1 -1
  328. package/dist/voice/room_io/room_io.d.cts +4 -9
  329. package/dist/voice/room_io/room_io.d.ts +4 -9
  330. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  331. package/dist/voice/room_io/room_io.js +17 -43
  332. package/dist/voice/room_io/room_io.js.map +1 -1
  333. package/dist/voice/testing/fake_llm.cjs +127 -0
  334. package/dist/voice/testing/fake_llm.cjs.map +1 -0
  335. package/dist/voice/testing/fake_llm.d.cts +30 -0
  336. package/dist/voice/testing/fake_llm.d.ts +30 -0
  337. package/dist/voice/testing/fake_llm.d.ts.map +1 -0
  338. package/dist/voice/testing/fake_llm.js +103 -0
  339. package/dist/voice/testing/fake_llm.js.map +1 -0
  340. package/dist/voice/testing/index.cjs +3 -0
  341. package/dist/voice/testing/index.cjs.map +1 -1
  342. package/dist/voice/testing/index.d.cts +1 -0
  343. package/dist/voice/testing/index.d.ts +1 -0
  344. package/dist/voice/testing/index.d.ts.map +1 -1
  345. package/dist/voice/testing/index.js +2 -0
  346. package/dist/voice/testing/index.js.map +1 -1
  347. package/dist/voice/turn_config/endpointing.cjs +33 -0
  348. package/dist/voice/turn_config/endpointing.cjs.map +1 -0
  349. package/dist/voice/turn_config/endpointing.d.cts +30 -0
  350. package/dist/voice/turn_config/endpointing.d.ts +30 -0
  351. package/dist/voice/turn_config/endpointing.d.ts.map +1 -0
  352. package/dist/voice/turn_config/endpointing.js +9 -0
  353. package/dist/voice/turn_config/endpointing.js.map +1 -0
  354. package/dist/voice/turn_config/interruption.cjs +37 -0
  355. package/dist/voice/turn_config/interruption.cjs.map +1 -0
  356. package/dist/voice/turn_config/interruption.d.cts +53 -0
  357. package/dist/voice/turn_config/interruption.d.ts +53 -0
  358. package/dist/voice/turn_config/interruption.d.ts.map +1 -0
  359. package/dist/voice/turn_config/interruption.js +13 -0
  360. package/dist/voice/turn_config/interruption.js.map +1 -0
  361. package/dist/voice/turn_config/turn_handling.cjs +35 -0
  362. package/dist/voice/turn_config/turn_handling.cjs.map +1 -0
  363. package/dist/voice/turn_config/turn_handling.d.cts +36 -0
  364. package/dist/voice/turn_config/turn_handling.d.ts +36 -0
  365. package/dist/voice/turn_config/turn_handling.d.ts.map +1 -0
  366. package/dist/voice/turn_config/turn_handling.js +11 -0
  367. package/dist/voice/turn_config/turn_handling.js.map +1 -0
  368. package/dist/voice/turn_config/utils.cjs +97 -0
  369. package/dist/voice/turn_config/utils.cjs.map +1 -0
  370. package/dist/voice/turn_config/utils.d.cts +25 -0
  371. package/dist/voice/turn_config/utils.d.ts +25 -0
  372. package/dist/voice/turn_config/utils.d.ts.map +1 -0
  373. package/dist/voice/turn_config/utils.js +73 -0
  374. package/dist/voice/turn_config/utils.js.map +1 -0
  375. package/dist/voice/turn_config/utils.test.cjs +86 -0
  376. package/dist/voice/turn_config/utils.test.cjs.map +1 -0
  377. package/dist/voice/turn_config/utils.test.js +85 -0
  378. package/dist/voice/turn_config/utils.test.js.map +1 -0
  379. package/dist/voice/wire_format.cjs +798 -0
  380. package/dist/voice/wire_format.cjs.map +1 -0
  381. package/dist/voice/wire_format.d.cts +5503 -0
  382. package/dist/voice/wire_format.d.ts +5503 -0
  383. package/dist/voice/wire_format.d.ts.map +1 -0
  384. package/dist/voice/wire_format.js +728 -0
  385. package/dist/voice/wire_format.js.map +1 -0
  386. package/package.json +2 -1
  387. package/src/beta/index.ts +9 -0
  388. package/src/beta/workflows/index.ts +9 -0
  389. package/src/beta/workflows/task_group.ts +194 -0
  390. package/src/constants.ts +13 -0
  391. package/src/index.ts +2 -1
  392. package/src/inference/interruption/defaults.ts +51 -0
  393. package/src/inference/interruption/errors.ts +25 -0
  394. package/src/inference/interruption/http_transport.ts +187 -0
  395. package/src/inference/interruption/interruption_cache_entry.ts +50 -0
  396. package/src/inference/interruption/interruption_detector.ts +188 -0
  397. package/src/inference/interruption/interruption_stream.ts +467 -0
  398. package/src/inference/interruption/types.ts +84 -0
  399. package/src/inference/interruption/utils.test.ts +132 -0
  400. package/src/inference/interruption/utils.ts +137 -0
  401. package/src/inference/interruption/ws_transport.ts +402 -0
  402. package/src/inference/llm.ts +9 -12
  403. package/src/inference/stt.ts +10 -3
  404. package/src/inference/tts.ts +10 -3
  405. package/src/inference/utils.ts +29 -1
  406. package/src/llm/chat_context.test.ts +48 -0
  407. package/src/llm/chat_context.ts +161 -0
  408. package/src/llm/index.ts +2 -0
  409. package/src/llm/llm.ts +16 -0
  410. package/src/llm/realtime.ts +4 -0
  411. package/src/llm/tool_context.ts +14 -0
  412. package/src/metrics/base.ts +48 -1
  413. package/src/metrics/index.ts +11 -0
  414. package/src/metrics/model_usage.test.ts +545 -0
  415. package/src/metrics/model_usage.ts +262 -0
  416. package/src/metrics/usage_collector.ts +11 -0
  417. package/src/metrics/utils.ts +11 -0
  418. package/src/stream/multi_input_stream.test.ts +6 -1
  419. package/src/stream/stream_channel.ts +34 -2
  420. package/src/stt/stt.ts +38 -0
  421. package/src/telemetry/otel_http_exporter.ts +28 -5
  422. package/src/telemetry/trace_types.ts +11 -8
  423. package/src/telemetry/traces.ts +111 -54
  424. package/src/tts/tts.ts +69 -1
  425. package/src/utils.ts +5 -0
  426. package/src/voice/agent.ts +41 -3
  427. package/src/voice/agent_activity.ts +371 -34
  428. package/src/voice/agent_session.ts +207 -59
  429. package/src/voice/audio_recognition.ts +385 -9
  430. package/src/voice/client_events.ts +838 -0
  431. package/src/voice/events.ts +14 -4
  432. package/src/voice/generation.ts +52 -9
  433. package/src/voice/index.ts +1 -0
  434. package/src/voice/report.test.ts +117 -0
  435. package/src/voice/report.ts +29 -6
  436. package/src/voice/room_io/room_io.ts +21 -64
  437. package/src/voice/testing/fake_llm.ts +138 -0
  438. package/src/voice/testing/index.ts +2 -0
  439. package/src/voice/turn_config/endpointing.ts +33 -0
  440. package/src/voice/turn_config/interruption.ts +56 -0
  441. package/src/voice/turn_config/turn_handling.ts +45 -0
  442. package/src/voice/turn_config/utils.test.ts +100 -0
  443. package/src/voice/turn_config/utils.ts +103 -0
  444. package/src/voice/wire_format.ts +827 -0
@@ -12,14 +12,22 @@ import {
12
12
  } from '@opentelemetry/api';
13
13
  import type { WritableStreamDefaultWriter } from 'node:stream/web';
14
14
  import { ReadableStream } from 'node:stream/web';
15
+ import { InterruptionDetectionError } from '../inference/interruption/errors.js';
16
+ import type { AdaptiveInterruptionDetector } from '../inference/interruption/interruption_detector.js';
17
+ import { InterruptionStreamSentinel } from '../inference/interruption/interruption_stream.js';
18
+ import {
19
+ type InterruptionSentinel,
20
+ type OverlappingSpeechEvent,
21
+ } from '../inference/interruption/types.js';
15
22
  import { type ChatContext } from '../llm/chat_context.js';
16
23
  import { log } from '../log.js';
17
24
  import { DeferredReadableStream, isStreamReaderReleaseError } from '../stream/deferred_stream.js';
18
25
  import { IdentityTransform } from '../stream/identity_transform.js';
19
26
  import { mergeReadableStreams } from '../stream/merge_readable_streams.js';
27
+ import { type StreamChannel, createStreamChannel } from '../stream/stream_channel.js';
20
28
  import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
21
29
  import { traceTypes, tracer } from '../telemetry/index.js';
22
- import { Task, delay } from '../utils.js';
30
+ import { Task, delay, waitForAbort } from '../utils.js';
23
31
  import { type VAD, type VADEvent, VADEventType } from '../vad.js';
24
32
  import type { TurnDetectionMode } from './agent_session.js';
25
33
  import type { STTNode } from './io.js';
@@ -46,6 +54,7 @@ export interface PreemptiveGenerationInfo {
46
54
  }
47
55
 
48
56
  export interface RecognitionHooks {
57
+ onInterruption: (ev: OverlappingSpeechEvent) => void;
49
58
  onStartOfSpeech: (ev: VADEvent) => void;
50
59
  onVADInferenceDone: (ev: VADEvent) => void;
51
60
  onEndOfSpeech: (ev: VADEvent) => void;
@@ -58,9 +67,13 @@ export interface RecognitionHooks {
58
67
  }
59
68
 
60
69
  export interface _TurnDetector {
70
+ /** The model name used by this turn detector. */
71
+ readonly model: string;
72
+ /** The provider name for this turn detector. */
73
+ readonly provider: string;
61
74
  unlikelyThreshold: (language?: string) => Promise<number | undefined>;
62
75
  supportsLanguage: (language?: string) => Promise<boolean>;
63
- predictEndOfTurn(chatCtx: ChatContext): Promise<number>;
76
+ predictEndOfTurn(chatCtx: ChatContext, timeout?: number): Promise<number>;
64
77
  }
65
78
 
66
79
  export interface AudioRecognitionOptions {
@@ -73,7 +86,8 @@ export interface AudioRecognitionOptions {
73
86
  /** Turn detector for end-of-turn prediction. */
74
87
  turnDetector?: _TurnDetector;
75
88
  /** Turn detection mode. */
76
- turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
89
+ turnDetectionMode?: TurnDetectionMode;
90
+ interruptionDetection?: AdaptiveInterruptionDetector;
77
91
  /** Minimum endpointing delay in milliseconds. */
78
92
  minEndpointingDelay: number;
79
93
  /** Maximum endpointing delay in milliseconds. */
@@ -98,12 +112,13 @@ export interface ParticipantLike {
98
112
  kind: ParticipantKind;
99
113
  }
100
114
 
115
+ // TODO add ability to update stt/vad/interruption-detection
101
116
  export class AudioRecognition {
102
117
  private hooks: RecognitionHooks;
103
118
  private stt?: STTNode;
104
119
  private vad?: VAD;
105
120
  private turnDetector?: _TurnDetector;
106
- private turnDetectionMode?: Exclude<TurnDetectionMode, _TurnDetector>;
121
+ private turnDetectionMode?: TurnDetectionMode;
107
122
  private minEndpointingDelay: number;
108
123
  private maxEndpointingDelay: number;
109
124
  private lastLanguage?: string;
@@ -137,6 +152,16 @@ export class AudioRecognition {
137
152
  private commitUserTurnTask?: Task<void>;
138
153
  private vadTask?: Task<void>;
139
154
  private sttTask?: Task<void>;
155
+ private interruptionTask?: Task<void>;
156
+
157
+ // interruption detection
158
+ private interruptionDetection?: AdaptiveInterruptionDetector;
159
+ private _inputStartedAt?: number;
160
+ private ignoreUserTranscriptUntil?: number;
161
+ private transcriptBuffer: SpeechEvent[];
162
+ private isInterruptionEnabled: boolean;
163
+ private isAgentSpeaking: boolean;
164
+ private interruptionStreamChannel?: StreamChannel<InterruptionSentinel | AudioFrame>;
140
165
 
141
166
  constructor(opts: AudioRecognitionOptions) {
142
167
  this.hooks = opts.recognitionHooks;
@@ -153,9 +178,29 @@ export class AudioRecognition {
153
178
  this.getLinkedParticipant = opts.getLinkedParticipant;
154
179
 
155
180
  this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
156
- const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
157
- this.vadInputStream = vadInputStream;
158
- this.sttInputStream = mergeReadableStreams(sttInputStream, this.silenceAudioTransform.readable);
181
+ this.interruptionDetection = opts.interruptionDetection;
182
+ this.transcriptBuffer = [];
183
+ this.isInterruptionEnabled = !!(opts.interruptionDetection && opts.vad);
184
+ this.isAgentSpeaking = false;
185
+
186
+ if (opts.interruptionDetection) {
187
+ const [vadInputStream, teedInput] = this.deferredInputStream.stream.tee();
188
+ const [inputStream, sttInputStream] = teedInput.tee();
189
+ this.vadInputStream = vadInputStream;
190
+ this.sttInputStream = mergeReadableStreams(
191
+ sttInputStream,
192
+ this.silenceAudioTransform.readable,
193
+ );
194
+ this.interruptionStreamChannel = createStreamChannel();
195
+ this.interruptionStreamChannel.addStreamInput(inputStream);
196
+ } else {
197
+ const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
198
+ this.vadInputStream = vadInputStream;
199
+ this.sttInputStream = mergeReadableStreams(
200
+ sttInputStream,
201
+ this.silenceAudioTransform.readable,
202
+ );
203
+ }
159
204
  this.silenceAudioWriter = this.silenceAudioTransform.writable.getWriter();
160
205
  }
161
206
 
@@ -169,6 +214,16 @@ export class AudioRecognition {
169
214
  return this.audioTranscript;
170
215
  }
171
216
 
217
+ /** @internal */
218
+ get inputStartedAt() {
219
+ return this._inputStartedAt;
220
+ }
221
+
222
+ /** @internal */
223
+ updateOptions(options: { turnDetection: TurnDetectionMode | undefined }): void {
224
+ this.turnDetectionMode = options.turnDetection;
225
+ }
226
+
172
227
  async start() {
173
228
  this.vadTask = Task.from(({ signal }) => this.createVadTask(this.vad, signal));
174
229
  this.vadTask.result.catch((err) => {
@@ -179,6 +234,211 @@ export class AudioRecognition {
179
234
  this.sttTask.result.catch((err) => {
180
235
  this.logger.error(`Error running STT task: ${err}`);
181
236
  });
237
+
238
+ this.interruptionTask = Task.from(({ signal }) =>
239
+ this.createInterruptionTask(this.interruptionDetection, signal),
240
+ );
241
+ this.interruptionTask.result.catch((err) => {
242
+ this.logger.error(`Error running interruption task: ${err}`);
243
+ });
244
+ }
245
+
246
+ async stop() {
247
+ await this.sttTask?.cancelAndWait();
248
+ await this.vadTask?.cancelAndWait();
249
+ await this.interruptionTask?.cancelAndWait();
250
+ }
251
+
252
+ async onStartOfAgentSpeech() {
253
+ this.isAgentSpeaking = true;
254
+ return this.trySendInterruptionSentinel(InterruptionStreamSentinel.agentSpeechStarted());
255
+ }
256
+
257
+ async onEndOfAgentSpeech(ignoreUserTranscriptUntil: number) {
258
+ if (!this.isInterruptionEnabled) {
259
+ this.isAgentSpeaking = false;
260
+ return;
261
+ }
262
+
263
+ const inputOpen = await this.trySendInterruptionSentinel(
264
+ InterruptionStreamSentinel.agentSpeechEnded(),
265
+ );
266
+ if (!inputOpen) {
267
+ this.isAgentSpeaking = false;
268
+ return;
269
+ }
270
+
271
+ if (this.isAgentSpeaking) {
272
+ if (this.ignoreUserTranscriptUntil === undefined) {
273
+ this.onEndOfOverlapSpeech(Date.now());
274
+ }
275
+ this.ignoreUserTranscriptUntil = this.ignoreUserTranscriptUntil
276
+ ? Math.min(ignoreUserTranscriptUntil, this.ignoreUserTranscriptUntil)
277
+ : ignoreUserTranscriptUntil;
278
+
279
+ // flush held transcripts if possible
280
+ await this.flushHeldTranscripts();
281
+ }
282
+ this.isAgentSpeaking = false;
283
+ }
284
+
285
+ /** Start interruption inference when agent is speaking and overlap speech starts. */
286
+ async onStartOfOverlapSpeech(speechDuration: number, startedAt: number, userSpeakingSpan?: Span) {
287
+ if (this.isAgentSpeaking) {
288
+ this.trySendInterruptionSentinel(
289
+ InterruptionStreamSentinel.overlapSpeechStarted(
290
+ speechDuration,
291
+ startedAt,
292
+ userSpeakingSpan,
293
+ ),
294
+ );
295
+ }
296
+ }
297
+
298
+ /** End interruption inference when overlap speech ends. */
299
+ async onEndOfOverlapSpeech(endedAt: number, userSpeakingSpan?: Span) {
300
+ if (!this.isInterruptionEnabled) {
301
+ return;
302
+ }
303
+ if (userSpeakingSpan && userSpeakingSpan.isRecording()) {
304
+ userSpeakingSpan.setAttribute(traceTypes.ATTR_IS_INTERRUPTION, 'false');
305
+ }
306
+
307
+ return this.trySendInterruptionSentinel(InterruptionStreamSentinel.overlapSpeechEnded(endedAt));
308
+ }
309
+
310
+ /**
311
+ * Flush held transcripts whose *end time* is after the ignoreUserTranscriptUntil timestamp.
312
+ * If the event has no timestamps, we assume it is the same as the next valid event.
313
+ */
314
+ private async flushHeldTranscripts() {
315
+ if (
316
+ !this.isInterruptionEnabled ||
317
+ this.ignoreUserTranscriptUntil === undefined ||
318
+ this.transcriptBuffer.length === 0
319
+ ) {
320
+ return;
321
+ }
322
+
323
+ if (!this._inputStartedAt) {
324
+ this.transcriptBuffer = [];
325
+ this.ignoreUserTranscriptUntil = undefined;
326
+ return;
327
+ }
328
+
329
+ let emitFromIndex: number | null = null;
330
+ let shouldFlush = false;
331
+
332
+ for (let i = 0; i < this.transcriptBuffer.length; i++) {
333
+ const ev = this.transcriptBuffer[i];
334
+ if (!ev || !ev.alternatives || ev.alternatives.length === 0) {
335
+ emitFromIndex = Math.min(emitFromIndex ?? i, i);
336
+ continue;
337
+ }
338
+ const firstAlternative = ev.alternatives[0];
339
+ if (
340
+ firstAlternative.startTime === firstAlternative.endTime &&
341
+ firstAlternative.startTime === 0
342
+ ) {
343
+ this.transcriptBuffer = [];
344
+ this.ignoreUserTranscriptUntil = undefined;
345
+ return;
346
+ }
347
+
348
+ if (this.#alternativeEndsBeforeIgnoreWindow(firstAlternative)) {
349
+ emitFromIndex = null;
350
+ } else {
351
+ emitFromIndex = Math.min(emitFromIndex ?? i, i);
352
+ shouldFlush = true;
353
+ break;
354
+ }
355
+ }
356
+
357
+ const eventsToEmit =
358
+ emitFromIndex !== null && shouldFlush ? this.transcriptBuffer.slice(emitFromIndex) : [];
359
+
360
+ this.transcriptBuffer = [];
361
+ this.ignoreUserTranscriptUntil = undefined;
362
+
363
+ for (const event of eventsToEmit) {
364
+ this.logger.trace(
365
+ {
366
+ event: event.type,
367
+ },
368
+ 're-emitting held user transcript',
369
+ );
370
+ this.onSTTEvent(event);
371
+ }
372
+ }
373
+
374
+ #alternativeEndsBeforeIgnoreWindow(
375
+ alternative: NonNullable<SpeechEvent['alternatives']>[number],
376
+ ): boolean {
377
+ if (
378
+ this.ignoreUserTranscriptUntil === undefined ||
379
+ !this._inputStartedAt ||
380
+ alternative.startTime <= 0
381
+ ) {
382
+ return false;
383
+ }
384
+
385
+ // `SpeechData.startTime` is in seconds relative to audio start, while `inputStartedAt` and
386
+ // `ignoreUserTranscriptUntil` are epoch milliseconds.
387
+ return alternative.startTime * 1000 + this._inputStartedAt < this.ignoreUserTranscriptUntil;
388
+ }
389
+
390
+ private shouldHoldSttEvent(ev: SpeechEvent): boolean {
391
+ if (!this.isInterruptionEnabled) {
392
+ return false;
393
+ }
394
+ if (this.isAgentSpeaking) {
395
+ return true;
396
+ }
397
+
398
+ // reset when the user starts speaking after the agent speech
399
+ if (ev.type === SpeechEventType.START_OF_SPEECH) {
400
+ this.ignoreUserTranscriptUntil = undefined;
401
+ this.transcriptBuffer = [];
402
+ return false;
403
+ }
404
+
405
+ if (this.ignoreUserTranscriptUntil === undefined) {
406
+ return false;
407
+ }
408
+ // sentinel events are always held until we have something concrete to release them
409
+ if (!ev.alternatives || ev.alternatives.length === 0) {
410
+ return true;
411
+ }
412
+
413
+ const alternative = ev.alternatives[0];
414
+
415
+ if (
416
+ alternative.startTime !== alternative.endTime &&
417
+ this.#alternativeEndsBeforeIgnoreWindow(alternative)
418
+ ) {
419
+ return true;
420
+ }
421
+ return false;
422
+ }
423
+
424
+ private async trySendInterruptionSentinel(
425
+ frame: AudioFrame | InterruptionSentinel,
426
+ ): Promise<boolean> {
427
+ if (
428
+ this.isInterruptionEnabled &&
429
+ this.interruptionStreamChannel &&
430
+ !this.interruptionStreamChannel.closed
431
+ ) {
432
+ try {
433
+ await this.interruptionStreamChannel.write(frame);
434
+ return true;
435
+ } catch (e: unknown) {
436
+ this.logger.warn(
437
+ `could not forward interruption sentinel: ${e instanceof Error ? e.message : String(e)}`,
438
+ );
439
+ }
440
+ }
441
+ return false;
182
442
  }
183
443
 
184
444
  private ensureUserTurnSpan(startTime?: number): Span {
@@ -234,6 +494,25 @@ export class AudioRecognition {
234
494
  return;
235
495
  }
236
496
 
497
+ // handle interruption detection
498
+ // - hold the event until the ignore_user_transcript_until expires
499
+ // - release only relevant events
500
+ // - allow RECOGNITION_USAGE to pass through immediately
501
+
502
+ if (ev.type !== SpeechEventType.RECOGNITION_USAGE && this.isInterruptionEnabled) {
503
+ if (this.shouldHoldSttEvent(ev)) {
504
+ this.logger.trace(
505
+ { event: ev.type, ignoreUserTranscriptUntil: this.ignoreUserTranscriptUntil },
506
+ 'holding STT event until ignore_user_transcript_until expires',
507
+ );
508
+ this.transcriptBuffer.push(ev);
509
+ return;
510
+ } else {
511
+ await this.flushHeldTranscripts();
512
+ // no return here to allow the new event to be processed normally
513
+ }
514
+ }
515
+
237
516
  switch (ev.type) {
238
517
  case SpeechEventType.FINAL_TRANSCRIPT:
239
518
  const transcript = ev.alternatives?.[0]?.text;
@@ -417,6 +696,12 @@ export class AudioRecognition {
417
696
  }
418
697
  }
419
698
 
699
+ private onOverlapSpeechEvent(ev: OverlappingSpeechEvent) {
700
+ if (ev.isInterruption) {
701
+ this.hooks.onInterruption(ev);
702
+ }
703
+ }
704
+
420
705
  private runEOUDetection(chatCtx: ChatContext) {
421
706
  this.logger.debug(
422
707
  {
@@ -675,7 +960,9 @@ export class AudioRecognition {
675
960
  this.lastSpeakingTime = Date.now();
676
961
 
677
962
  if (this.speechStartTime === undefined) {
678
- this.speechStartTime = Date.now();
963
+ // Backdate speechStartTime to the actual start of accumulated speech.
964
+ // ev.rawAccumulatedSpeech is in ms (VADEvent durations are all ms in TS).
965
+ this.speechStartTime = Date.now() - ev.rawAccumulatedSpeech;
679
966
  }
680
967
  }
681
968
  break;
@@ -707,6 +994,85 @@ export class AudioRecognition {
707
994
  }
708
995
  }
709
996
 
997
+ private async createInterruptionTask(
998
+ interruptionDetection: AdaptiveInterruptionDetector | undefined,
999
+ signal: AbortSignal,
1000
+ ) {
1001
+ if (!interruptionDetection || !this.interruptionStreamChannel) return;
1002
+
1003
+ const stream = interruptionDetection.createStream();
1004
+ const inputReader = this.interruptionStreamChannel.stream().getReader();
1005
+
1006
+ const cleanup = async () => {
1007
+ try {
1008
+ signal.removeEventListener('abort', abortHandler);
1009
+ eventReader.releaseLock();
1010
+ await stream.close();
1011
+ } catch (e) {
1012
+ this.logger.debug('createInterruptionTask: error during abort handler:', e);
1013
+ }
1014
+ };
1015
+
1016
+ // Forward input frames/sentinels to the interruption stream
1017
+ const forwardTask = (async () => {
1018
+ try {
1019
+ const abortPromise = waitForAbort(signal);
1020
+ while (!signal.aborted) {
1021
+ const res = await Promise.race([inputReader.read(), abortPromise]);
1022
+ if (!res) break;
1023
+ const { value, done } = res;
1024
+ if (done) break;
1025
+ // Backdate to the actual start of the audio frame, not when it was received.
1026
+ if (value instanceof AudioFrame) {
1027
+ const frameDurationMs = (value.samplesPerChannel / value.sampleRate) * 1000;
1028
+ this._inputStartedAt ??= Date.now() - frameDurationMs;
1029
+ } else {
1030
+ this._inputStartedAt ??= Date.now();
1031
+ }
1032
+ await stream.pushFrame(value);
1033
+ }
1034
+ } finally {
1035
+ inputReader.releaseLock();
1036
+ }
1037
+ })();
1038
+
1039
+ // Read output events from the interruption stream
1040
+ const eventReader = stream.stream().getReader();
1041
+ const abortHandler = async () => {
1042
+ await cleanup();
1043
+ };
1044
+ signal.addEventListener('abort', abortHandler);
1045
+
1046
+ try {
1047
+ const abortPromise = waitForAbort(signal);
1048
+
1049
+ while (!signal.aborted) {
1050
+ const res = await Promise.race([eventReader.read(), abortPromise]);
1051
+ if (!res) break;
1052
+ const { done, value: ev } = res;
1053
+ if (done) break;
1054
+ this.onOverlapSpeechEvent(ev);
1055
+ }
1056
+ } catch (e) {
1057
+ if (!signal.aborted) {
1058
+ const cause = e instanceof Error ? e : new Error(String(e));
1059
+ interruptionDetection.emitError(
1060
+ new InterruptionDetectionError(
1061
+ cause.message,
1062
+ Date.now(),
1063
+ interruptionDetection.label,
1064
+ false,
1065
+ ),
1066
+ );
1067
+ this.logger.error(e, 'Error in interruption task');
1068
+ }
1069
+ } finally {
1070
+ await cleanup();
1071
+ await forwardTask;
1072
+ this.logger.debug('Interruption task closed');
1073
+ }
1074
+ }
1075
+
710
1076
  setInputAudioStream(audioStream: ReadableStream<AudioFrame>) {
711
1077
  this.deferredInputStream.setSource(audioStream);
712
1078
  }
@@ -783,6 +1149,8 @@ export class AudioRecognition {
783
1149
  await this.sttTask?.cancelAndWait();
784
1150
  await this.vadTask?.cancelAndWait();
785
1151
  await this.bounceEOUTask?.cancelAndWait();
1152
+ await this.interruptionTask?.cancelAndWait();
1153
+ await this.interruptionStreamChannel?.close();
786
1154
  }
787
1155
 
788
1156
  private _endUserTurnSpan({
@@ -809,6 +1177,14 @@ export class AudioRecognition {
809
1177
  }
810
1178
 
811
1179
  private get vadBaseTurnDetection() {
812
- return ['vad', undefined].includes(this.turnDetectionMode);
1180
+ if (typeof this.turnDetectionMode === 'object') {
1181
+ return false;
1182
+ }
1183
+
1184
+ if (this.turnDetectionMode === undefined || this.turnDetectionMode === 'vad') {
1185
+ return true;
1186
+ }
1187
+
1188
+ return false;
813
1189
  }
814
1190
  }