@livekit/agents 0.7.9 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (627) hide show
  1. package/dist/_exceptions.cjs +109 -0
  2. package/dist/_exceptions.cjs.map +1 -0
  3. package/dist/_exceptions.d.cts +64 -0
  4. package/dist/_exceptions.d.ts +64 -0
  5. package/dist/_exceptions.d.ts.map +1 -0
  6. package/dist/_exceptions.js +80 -0
  7. package/dist/_exceptions.js.map +1 -0
  8. package/dist/audio.cjs +10 -3
  9. package/dist/audio.cjs.map +1 -1
  10. package/dist/audio.d.cts +2 -0
  11. package/dist/audio.d.ts +2 -0
  12. package/dist/audio.d.ts.map +1 -1
  13. package/dist/audio.js +8 -2
  14. package/dist/audio.js.map +1 -1
  15. package/dist/cli.cjs +25 -0
  16. package/dist/cli.cjs.map +1 -1
  17. package/dist/cli.d.ts.map +1 -1
  18. package/dist/cli.js +25 -0
  19. package/dist/cli.js.map +1 -1
  20. package/dist/constants.cjs +6 -3
  21. package/dist/constants.cjs.map +1 -1
  22. package/dist/constants.d.cts +2 -1
  23. package/dist/constants.d.ts +2 -1
  24. package/dist/constants.d.ts.map +1 -1
  25. package/dist/constants.js +4 -2
  26. package/dist/constants.js.map +1 -1
  27. package/dist/http_server.cjs.map +1 -1
  28. package/dist/http_server.d.cts +1 -0
  29. package/dist/http_server.d.ts +1 -0
  30. package/dist/http_server.d.ts.map +1 -1
  31. package/dist/http_server.js.map +1 -1
  32. package/dist/index.cjs +27 -20
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +13 -10
  35. package/dist/index.d.ts +13 -10
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +15 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/inference_runner.cjs +0 -1
  40. package/dist/inference_runner.cjs.map +1 -1
  41. package/dist/inference_runner.d.cts +2 -3
  42. package/dist/inference_runner.d.ts +2 -3
  43. package/dist/inference_runner.d.ts.map +1 -1
  44. package/dist/inference_runner.js +0 -1
  45. package/dist/inference_runner.js.map +1 -1
  46. package/dist/ipc/inference_proc_executor.cjs +2 -2
  47. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  48. package/dist/ipc/inference_proc_executor.js +2 -2
  49. package/dist/ipc/inference_proc_executor.js.map +1 -1
  50. package/dist/ipc/job_executor.cjs.map +1 -1
  51. package/dist/ipc/job_executor.js.map +1 -1
  52. package/dist/ipc/job_proc_executor.cjs +1 -0
  53. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  54. package/dist/ipc/job_proc_executor.js +1 -0
  55. package/dist/ipc/job_proc_executor.js.map +1 -1
  56. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  57. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  58. package/dist/ipc/job_proc_lazy_main.js +1 -1
  59. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  60. package/dist/ipc/supervised_proc.d.cts +1 -1
  61. package/dist/ipc/supervised_proc.d.ts +1 -1
  62. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  63. package/dist/job.cjs +14 -2
  64. package/dist/job.cjs.map +1 -1
  65. package/dist/job.d.cts +8 -0
  66. package/dist/job.d.ts +8 -0
  67. package/dist/job.d.ts.map +1 -1
  68. package/dist/job.js +12 -1
  69. package/dist/job.js.map +1 -1
  70. package/dist/llm/chat_context.cjs +332 -82
  71. package/dist/llm/chat_context.cjs.map +1 -1
  72. package/dist/llm/chat_context.d.cts +152 -48
  73. package/dist/llm/chat_context.d.ts +152 -48
  74. package/dist/llm/chat_context.d.ts.map +1 -1
  75. package/dist/llm/chat_context.js +327 -81
  76. package/dist/llm/chat_context.js.map +1 -1
  77. package/dist/llm/chat_context.test.cjs +380 -0
  78. package/dist/llm/chat_context.test.cjs.map +1 -0
  79. package/dist/llm/chat_context.test.js +385 -0
  80. package/dist/llm/chat_context.test.js.map +1 -0
  81. package/dist/llm/index.cjs +37 -8
  82. package/dist/llm/index.cjs.map +1 -1
  83. package/dist/llm/index.d.cts +7 -3
  84. package/dist/llm/index.d.ts +7 -3
  85. package/dist/llm/index.d.ts.map +1 -1
  86. package/dist/llm/index.js +39 -9
  87. package/dist/llm/index.js.map +1 -1
  88. package/dist/llm/llm.cjs +98 -33
  89. package/dist/llm/llm.cjs.map +1 -1
  90. package/dist/llm/llm.d.cts +50 -24
  91. package/dist/llm/llm.d.ts +50 -24
  92. package/dist/llm/llm.d.ts.map +1 -1
  93. package/dist/llm/llm.js +99 -33
  94. package/dist/llm/llm.js.map +1 -1
  95. package/dist/llm/provider_format/google.cjs +128 -0
  96. package/dist/llm/provider_format/google.cjs.map +1 -0
  97. package/dist/llm/provider_format/google.d.cts +6 -0
  98. package/dist/llm/provider_format/google.d.ts +6 -0
  99. package/dist/llm/provider_format/google.d.ts.map +1 -0
  100. package/dist/llm/provider_format/google.js +104 -0
  101. package/dist/llm/provider_format/google.js.map +1 -0
  102. package/dist/llm/provider_format/google.test.cjs +676 -0
  103. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  104. package/dist/llm/provider_format/google.test.js +675 -0
  105. package/dist/llm/provider_format/google.test.js.map +1 -0
  106. package/dist/llm/provider_format/index.cjs +40 -0
  107. package/dist/llm/provider_format/index.cjs.map +1 -0
  108. package/dist/llm/provider_format/index.d.cts +4 -0
  109. package/dist/llm/provider_format/index.d.ts +4 -0
  110. package/dist/llm/provider_format/index.d.ts.map +1 -0
  111. package/dist/llm/provider_format/index.js +16 -0
  112. package/dist/llm/provider_format/index.js.map +1 -0
  113. package/dist/llm/provider_format/openai.cjs +116 -0
  114. package/dist/llm/provider_format/openai.cjs.map +1 -0
  115. package/dist/llm/provider_format/openai.d.cts +3 -0
  116. package/dist/llm/provider_format/openai.d.ts +3 -0
  117. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  118. package/dist/llm/provider_format/openai.js +92 -0
  119. package/dist/llm/provider_format/openai.js.map +1 -0
  120. package/dist/llm/provider_format/openai.test.cjs +490 -0
  121. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  122. package/dist/llm/provider_format/openai.test.js +489 -0
  123. package/dist/llm/provider_format/openai.test.js.map +1 -0
  124. package/dist/llm/provider_format/utils.cjs +146 -0
  125. package/dist/llm/provider_format/utils.cjs.map +1 -0
  126. package/dist/llm/provider_format/utils.d.cts +38 -0
  127. package/dist/llm/provider_format/utils.d.ts +38 -0
  128. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  129. package/dist/llm/provider_format/utils.js +122 -0
  130. package/dist/llm/provider_format/utils.js.map +1 -0
  131. package/dist/llm/realtime.cjs +77 -0
  132. package/dist/llm/realtime.cjs.map +1 -0
  133. package/dist/llm/realtime.d.cts +98 -0
  134. package/dist/llm/realtime.d.ts +98 -0
  135. package/dist/llm/realtime.d.ts.map +1 -0
  136. package/dist/llm/realtime.js +52 -0
  137. package/dist/llm/realtime.js.map +1 -0
  138. package/dist/llm/remote_chat_context.cjs +112 -0
  139. package/dist/llm/remote_chat_context.cjs.map +1 -0
  140. package/dist/llm/remote_chat_context.d.cts +23 -0
  141. package/dist/llm/remote_chat_context.d.ts +23 -0
  142. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  143. package/dist/llm/remote_chat_context.js +88 -0
  144. package/dist/llm/remote_chat_context.js.map +1 -0
  145. package/dist/llm/remote_chat_context.test.cjs +225 -0
  146. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  147. package/dist/llm/remote_chat_context.test.js +224 -0
  148. package/dist/llm/remote_chat_context.test.js.map +1 -0
  149. package/dist/llm/tool_context.cjs +111 -0
  150. package/dist/llm/tool_context.cjs.map +1 -0
  151. package/dist/llm/tool_context.d.cts +125 -0
  152. package/dist/llm/tool_context.d.ts +125 -0
  153. package/dist/llm/tool_context.d.ts.map +1 -0
  154. package/dist/llm/tool_context.js +80 -0
  155. package/dist/llm/tool_context.js.map +1 -0
  156. package/dist/llm/tool_context.test.cjs +162 -0
  157. package/dist/llm/tool_context.test.cjs.map +1 -0
  158. package/dist/llm/tool_context.test.js +161 -0
  159. package/dist/llm/tool_context.test.js.map +1 -0
  160. package/dist/llm/tool_context.type.test.cjs +92 -0
  161. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  162. package/dist/llm/tool_context.type.test.js +91 -0
  163. package/dist/llm/tool_context.type.test.js.map +1 -0
  164. package/dist/llm/utils.cjs +260 -0
  165. package/dist/llm/utils.cjs.map +1 -0
  166. package/dist/llm/utils.d.cts +42 -0
  167. package/dist/llm/utils.d.ts +42 -0
  168. package/dist/llm/utils.d.ts.map +1 -0
  169. package/dist/llm/utils.js +223 -0
  170. package/dist/llm/utils.js.map +1 -0
  171. package/dist/llm/utils.test.cjs +513 -0
  172. package/dist/llm/utils.test.cjs.map +1 -0
  173. package/dist/llm/utils.test.js +490 -0
  174. package/dist/llm/utils.test.js.map +1 -0
  175. package/dist/metrics/base.cjs +0 -27
  176. package/dist/metrics/base.cjs.map +1 -1
  177. package/dist/metrics/base.d.cts +105 -63
  178. package/dist/metrics/base.d.ts +105 -63
  179. package/dist/metrics/base.d.ts.map +1 -1
  180. package/dist/metrics/base.js +0 -19
  181. package/dist/metrics/base.js.map +1 -1
  182. package/dist/metrics/index.cjs +0 -3
  183. package/dist/metrics/index.cjs.map +1 -1
  184. package/dist/metrics/index.d.cts +2 -3
  185. package/dist/metrics/index.d.ts +2 -3
  186. package/dist/metrics/index.d.ts.map +1 -1
  187. package/dist/metrics/index.js +0 -2
  188. package/dist/metrics/index.js.map +1 -1
  189. package/dist/metrics/usage_collector.cjs +17 -12
  190. package/dist/metrics/usage_collector.cjs.map +1 -1
  191. package/dist/metrics/usage_collector.d.cts +3 -2
  192. package/dist/metrics/usage_collector.d.ts +3 -2
  193. package/dist/metrics/usage_collector.d.ts.map +1 -1
  194. package/dist/metrics/usage_collector.js +17 -12
  195. package/dist/metrics/usage_collector.js.map +1 -1
  196. package/dist/metrics/utils.cjs +22 -59
  197. package/dist/metrics/utils.cjs.map +1 -1
  198. package/dist/metrics/utils.d.cts +1 -8
  199. package/dist/metrics/utils.d.ts +1 -8
  200. package/dist/metrics/utils.d.ts.map +1 -1
  201. package/dist/metrics/utils.js +22 -52
  202. package/dist/metrics/utils.js.map +1 -1
  203. package/dist/multimodal/index.cjs +0 -2
  204. package/dist/multimodal/index.cjs.map +1 -1
  205. package/dist/multimodal/index.d.cts +0 -1
  206. package/dist/multimodal/index.d.ts +0 -1
  207. package/dist/multimodal/index.d.ts.map +1 -1
  208. package/dist/multimodal/index.js +0 -1
  209. package/dist/multimodal/index.js.map +1 -1
  210. package/dist/plugin.cjs +24 -8
  211. package/dist/plugin.cjs.map +1 -1
  212. package/dist/plugin.d.cts +18 -4
  213. package/dist/plugin.d.ts +18 -4
  214. package/dist/plugin.d.ts.map +1 -1
  215. package/dist/plugin.js +22 -7
  216. package/dist/plugin.js.map +1 -1
  217. package/dist/stream/deferred_stream.cjs +98 -0
  218. package/dist/stream/deferred_stream.cjs.map +1 -0
  219. package/dist/stream/deferred_stream.d.cts +27 -0
  220. package/dist/stream/deferred_stream.d.ts +27 -0
  221. package/dist/stream/deferred_stream.d.ts.map +1 -0
  222. package/dist/stream/deferred_stream.js +73 -0
  223. package/dist/stream/deferred_stream.js.map +1 -0
  224. package/dist/stream/deferred_stream.test.cjs +527 -0
  225. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  226. package/dist/stream/deferred_stream.test.js +526 -0
  227. package/dist/stream/deferred_stream.test.js.map +1 -0
  228. package/dist/stream/identity_transform.cjs +42 -0
  229. package/dist/stream/identity_transform.cjs.map +1 -0
  230. package/dist/stream/identity_transform.d.cts +6 -0
  231. package/dist/stream/identity_transform.d.ts +6 -0
  232. package/dist/stream/identity_transform.d.ts.map +1 -0
  233. package/dist/stream/identity_transform.js +18 -0
  234. package/dist/stream/identity_transform.js.map +1 -0
  235. package/dist/stream/identity_transform.test.cjs +125 -0
  236. package/dist/stream/identity_transform.test.cjs.map +1 -0
  237. package/dist/stream/identity_transform.test.js +124 -0
  238. package/dist/stream/identity_transform.test.js.map +1 -0
  239. package/dist/stream/index.cjs +38 -0
  240. package/dist/stream/index.cjs.map +1 -0
  241. package/dist/stream/index.d.cts +5 -0
  242. package/dist/stream/index.d.ts +5 -0
  243. package/dist/stream/index.d.ts.map +1 -0
  244. package/dist/stream/index.js +11 -0
  245. package/dist/stream/index.js.map +1 -0
  246. package/dist/stream/merge_readable_streams.cjs +59 -0
  247. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  248. package/dist/stream/merge_readable_streams.d.cts +4 -0
  249. package/dist/stream/merge_readable_streams.d.ts +4 -0
  250. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  251. package/dist/stream/merge_readable_streams.js +35 -0
  252. package/dist/stream/merge_readable_streams.js.map +1 -0
  253. package/dist/stream/stream_channel.cjs +47 -0
  254. package/dist/stream/stream_channel.cjs.map +1 -0
  255. package/dist/stream/stream_channel.d.cts +9 -0
  256. package/dist/stream/stream_channel.d.ts +9 -0
  257. package/dist/stream/stream_channel.d.ts.map +1 -0
  258. package/dist/stream/stream_channel.js +23 -0
  259. package/dist/stream/stream_channel.js.map +1 -0
  260. package/dist/stream/stream_channel.test.cjs +97 -0
  261. package/dist/stream/stream_channel.test.cjs.map +1 -0
  262. package/dist/stream/stream_channel.test.js +96 -0
  263. package/dist/stream/stream_channel.test.js.map +1 -0
  264. package/dist/stt/stream_adapter.cjs +3 -4
  265. package/dist/stt/stream_adapter.cjs.map +1 -1
  266. package/dist/stt/stream_adapter.d.cts +1 -0
  267. package/dist/stt/stream_adapter.d.ts +1 -0
  268. package/dist/stt/stream_adapter.d.ts.map +1 -1
  269. package/dist/stt/stream_adapter.js +3 -4
  270. package/dist/stt/stream_adapter.js.map +1 -1
  271. package/dist/stt/stt.cjs +101 -10
  272. package/dist/stt/stt.cjs.map +1 -1
  273. package/dist/stt/stt.d.cts +26 -5
  274. package/dist/stt/stt.d.ts +26 -5
  275. package/dist/stt/stt.d.ts.map +1 -1
  276. package/dist/stt/stt.js +102 -11
  277. package/dist/stt/stt.js.map +1 -1
  278. package/dist/tokenize/basic/basic.cjs +10 -5
  279. package/dist/tokenize/basic/basic.cjs.map +1 -1
  280. package/dist/tokenize/basic/basic.d.cts +7 -1
  281. package/dist/tokenize/basic/basic.d.ts +7 -1
  282. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  283. package/dist/tokenize/basic/basic.js +10 -5
  284. package/dist/tokenize/basic/basic.js.map +1 -1
  285. package/dist/tokenize/basic/sentence.cjs +14 -6
  286. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  287. package/dist/tokenize/basic/sentence.d.cts +1 -1
  288. package/dist/tokenize/basic/sentence.d.ts +1 -1
  289. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  290. package/dist/tokenize/basic/sentence.js +14 -6
  291. package/dist/tokenize/basic/sentence.js.map +1 -1
  292. package/dist/tokenize/token_stream.cjs +5 -3
  293. package/dist/tokenize/token_stream.cjs.map +1 -1
  294. package/dist/tokenize/token_stream.d.cts +1 -0
  295. package/dist/tokenize/token_stream.d.ts +1 -0
  296. package/dist/tokenize/token_stream.d.ts.map +1 -1
  297. package/dist/tokenize/token_stream.js +6 -4
  298. package/dist/tokenize/token_stream.js.map +1 -1
  299. package/dist/transcription.cjs +1 -2
  300. package/dist/transcription.cjs.map +1 -1
  301. package/dist/transcription.d.ts.map +1 -1
  302. package/dist/transcription.js +2 -3
  303. package/dist/transcription.js.map +1 -1
  304. package/dist/tts/index.cjs +2 -4
  305. package/dist/tts/index.cjs.map +1 -1
  306. package/dist/tts/index.d.cts +1 -1
  307. package/dist/tts/index.d.ts +1 -1
  308. package/dist/tts/index.d.ts.map +1 -1
  309. package/dist/tts/index.js +1 -3
  310. package/dist/tts/index.js.map +1 -1
  311. package/dist/tts/stream_adapter.cjs +26 -13
  312. package/dist/tts/stream_adapter.cjs.map +1 -1
  313. package/dist/tts/stream_adapter.d.cts +1 -1
  314. package/dist/tts/stream_adapter.d.ts +1 -1
  315. package/dist/tts/stream_adapter.d.ts.map +1 -1
  316. package/dist/tts/stream_adapter.js +27 -14
  317. package/dist/tts/stream_adapter.js.map +1 -1
  318. package/dist/tts/tts.cjs +157 -25
  319. package/dist/tts/tts.cjs.map +1 -1
  320. package/dist/tts/tts.d.cts +29 -5
  321. package/dist/tts/tts.d.ts +29 -5
  322. package/dist/tts/tts.d.ts.map +1 -1
  323. package/dist/tts/tts.js +157 -24
  324. package/dist/tts/tts.js.map +1 -1
  325. package/dist/types.cjs +60 -0
  326. package/dist/types.cjs.map +1 -0
  327. package/dist/types.d.cts +13 -0
  328. package/dist/types.d.ts +13 -0
  329. package/dist/types.d.ts.map +1 -0
  330. package/dist/types.js +35 -0
  331. package/dist/types.js.map +1 -0
  332. package/dist/utils.cjs +281 -27
  333. package/dist/utils.cjs.map +1 -1
  334. package/dist/utils.d.cts +134 -9
  335. package/dist/utils.d.ts +134 -9
  336. package/dist/utils.d.ts.map +1 -1
  337. package/dist/utils.js +265 -26
  338. package/dist/utils.js.map +1 -1
  339. package/dist/utils.test.cjs +492 -0
  340. package/dist/utils.test.cjs.map +1 -0
  341. package/dist/utils.test.js +498 -0
  342. package/dist/utils.test.js.map +1 -0
  343. package/dist/vad.cjs +76 -20
  344. package/dist/vad.cjs.map +1 -1
  345. package/dist/vad.d.cts +25 -5
  346. package/dist/vad.d.ts +25 -5
  347. package/dist/vad.d.ts.map +1 -1
  348. package/dist/vad.js +76 -20
  349. package/dist/vad.js.map +1 -1
  350. package/dist/voice/agent.cjs +245 -0
  351. package/dist/voice/agent.cjs.map +1 -0
  352. package/dist/voice/agent.d.cts +78 -0
  353. package/dist/voice/agent.d.ts +78 -0
  354. package/dist/voice/agent.d.ts.map +1 -0
  355. package/dist/voice/agent.js +220 -0
  356. package/dist/voice/agent.js.map +1 -0
  357. package/dist/voice/agent.test.cjs +61 -0
  358. package/dist/voice/agent.test.cjs.map +1 -0
  359. package/dist/voice/agent.test.js +60 -0
  360. package/dist/voice/agent.test.js.map +1 -0
  361. package/dist/voice/agent_activity.cjs +1453 -0
  362. package/dist/voice/agent_activity.cjs.map +1 -0
  363. package/dist/voice/agent_activity.d.cts +94 -0
  364. package/dist/voice/agent_activity.d.ts +94 -0
  365. package/dist/voice/agent_activity.d.ts.map +1 -0
  366. package/dist/voice/agent_activity.js +1449 -0
  367. package/dist/voice/agent_activity.js.map +1 -0
  368. package/dist/voice/agent_session.cjs +312 -0
  369. package/dist/voice/agent_session.cjs.map +1 -0
  370. package/dist/voice/agent_session.d.cts +121 -0
  371. package/dist/voice/agent_session.d.ts +121 -0
  372. package/dist/voice/agent_session.d.ts.map +1 -0
  373. package/dist/voice/agent_session.js +295 -0
  374. package/dist/voice/agent_session.js.map +1 -0
  375. package/dist/voice/audio_recognition.cjs +375 -0
  376. package/dist/voice/audio_recognition.cjs.map +1 -0
  377. package/dist/voice/audio_recognition.d.cts +80 -0
  378. package/dist/voice/audio_recognition.d.ts +80 -0
  379. package/dist/voice/audio_recognition.d.ts.map +1 -0
  380. package/dist/voice/audio_recognition.js +351 -0
  381. package/dist/voice/audio_recognition.js.map +1 -0
  382. package/dist/voice/events.cjs +145 -0
  383. package/dist/voice/events.cjs.map +1 -0
  384. package/dist/voice/events.d.cts +124 -0
  385. package/dist/voice/events.d.ts +124 -0
  386. package/dist/voice/events.d.ts.map +1 -0
  387. package/dist/voice/events.js +110 -0
  388. package/dist/voice/events.js.map +1 -0
  389. package/dist/voice/generation.cjs +700 -0
  390. package/dist/voice/generation.cjs.map +1 -0
  391. package/dist/voice/generation.d.cts +115 -0
  392. package/dist/voice/generation.d.ts +115 -0
  393. package/dist/voice/generation.d.ts.map +1 -0
  394. package/dist/voice/generation.js +672 -0
  395. package/dist/voice/generation.js.map +1 -0
  396. package/dist/voice/index.cjs +40 -0
  397. package/dist/voice/index.cjs.map +1 -0
  398. package/dist/voice/index.d.cts +5 -0
  399. package/dist/voice/index.d.ts +5 -0
  400. package/dist/voice/index.d.ts.map +1 -0
  401. package/dist/voice/index.js +11 -0
  402. package/dist/voice/index.js.map +1 -0
  403. package/dist/voice/io.cjs +245 -0
  404. package/dist/voice/io.cjs.map +1 -0
  405. package/dist/voice/io.d.cts +101 -0
  406. package/dist/voice/io.d.ts +101 -0
  407. package/dist/voice/io.d.ts.map +1 -0
  408. package/dist/voice/io.js +217 -0
  409. package/dist/voice/io.js.map +1 -0
  410. package/dist/voice/room_io/_input.cjs +121 -0
  411. package/dist/voice/room_io/_input.cjs.map +1 -0
  412. package/dist/voice/room_io/_input.d.cts +24 -0
  413. package/dist/voice/room_io/_input.d.ts +24 -0
  414. package/dist/voice/room_io/_input.d.ts.map +1 -0
  415. package/dist/voice/room_io/_input.js +102 -0
  416. package/dist/voice/room_io/_input.js.map +1 -0
  417. package/dist/voice/room_io/_output.cjs +358 -0
  418. package/dist/voice/room_io/_output.cjs.map +1 -0
  419. package/dist/voice/room_io/_output.d.cts +75 -0
  420. package/dist/voice/room_io/_output.d.ts +75 -0
  421. package/dist/voice/room_io/_output.d.ts.map +1 -0
  422. package/dist/voice/room_io/_output.js +342 -0
  423. package/dist/voice/room_io/_output.js.map +1 -0
  424. package/dist/voice/room_io/index.cjs +25 -0
  425. package/dist/voice/room_io/index.cjs.map +1 -0
  426. package/dist/voice/room_io/index.d.cts +3 -0
  427. package/dist/voice/room_io/index.d.ts +3 -0
  428. package/dist/voice/room_io/index.d.ts.map +1 -0
  429. package/dist/voice/room_io/index.js +3 -0
  430. package/dist/voice/room_io/index.js.map +1 -0
  431. package/dist/voice/room_io/room_io.cjs +370 -0
  432. package/dist/voice/room_io/room_io.cjs.map +1 -0
  433. package/dist/voice/room_io/room_io.d.cts +73 -0
  434. package/dist/voice/room_io/room_io.d.ts +73 -0
  435. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  436. package/dist/voice/room_io/room_io.js +361 -0
  437. package/dist/voice/room_io/room_io.js.map +1 -0
  438. package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
  439. package/dist/voice/run_context.cjs.map +1 -0
  440. package/dist/voice/run_context.d.cts +12 -0
  441. package/dist/voice/run_context.d.ts +12 -0
  442. package/dist/voice/run_context.d.ts.map +1 -0
  443. package/dist/voice/run_context.js +14 -0
  444. package/dist/voice/run_context.js.map +1 -0
  445. package/dist/voice/speech_handle.cjs +105 -0
  446. package/dist/voice/speech_handle.cjs.map +1 -0
  447. package/dist/voice/speech_handle.d.cts +46 -0
  448. package/dist/voice/speech_handle.d.ts +46 -0
  449. package/dist/voice/speech_handle.d.ts.map +1 -0
  450. package/dist/voice/speech_handle.js +81 -0
  451. package/dist/voice/speech_handle.js.map +1 -0
  452. package/dist/voice/transcription/_utils.cjs +45 -0
  453. package/dist/voice/transcription/_utils.cjs.map +1 -0
  454. package/dist/voice/transcription/_utils.d.cts +3 -0
  455. package/dist/voice/transcription/_utils.d.ts +3 -0
  456. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  457. package/dist/voice/transcription/_utils.js +21 -0
  458. package/dist/voice/transcription/_utils.js.map +1 -0
  459. package/dist/voice/transcription/index.cjs +23 -0
  460. package/dist/voice/transcription/index.cjs.map +1 -0
  461. package/dist/voice/transcription/index.d.cts +2 -0
  462. package/dist/voice/transcription/index.d.ts +2 -0
  463. package/dist/voice/transcription/index.d.ts.map +1 -0
  464. package/dist/voice/transcription/index.js +2 -0
  465. package/dist/voice/transcription/index.js.map +1 -0
  466. package/dist/voice/transcription/synchronizer.cjs +380 -0
  467. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  468. package/dist/voice/transcription/synchronizer.d.cts +86 -0
  469. package/dist/voice/transcription/synchronizer.d.ts +86 -0
  470. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  471. package/dist/voice/transcription/synchronizer.js +355 -0
  472. package/dist/voice/transcription/synchronizer.js.map +1 -0
  473. package/dist/worker.cjs +22 -4
  474. package/dist/worker.cjs.map +1 -1
  475. package/dist/worker.d.cts +1 -1
  476. package/dist/worker.d.ts +1 -1
  477. package/dist/worker.d.ts.map +1 -1
  478. package/dist/worker.js +22 -4
  479. package/dist/worker.js.map +1 -1
  480. package/package.json +9 -2
  481. package/src/_exceptions.ts +137 -0
  482. package/src/audio.ts +12 -1
  483. package/src/cli.ts +37 -0
  484. package/src/constants.ts +2 -1
  485. package/src/http_server.ts +1 -0
  486. package/src/index.ts +13 -10
  487. package/src/inference_runner.ts +2 -3
  488. package/src/ipc/inference_proc_executor.ts +2 -2
  489. package/src/ipc/job_executor.ts +1 -1
  490. package/src/ipc/job_proc_executor.ts +1 -1
  491. package/src/ipc/job_proc_lazy_main.ts +1 -1
  492. package/src/job.ts +18 -0
  493. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  494. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  495. package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
  496. package/src/llm/chat_context.test.ts +450 -0
  497. package/src/llm/chat_context.ts +501 -103
  498. package/src/llm/index.ts +53 -18
  499. package/src/llm/llm.ts +149 -50
  500. package/src/llm/provider_format/google.test.ts +772 -0
  501. package/src/llm/provider_format/google.ts +130 -0
  502. package/src/llm/provider_format/index.ts +23 -0
  503. package/src/llm/provider_format/openai.test.ts +581 -0
  504. package/src/llm/provider_format/openai.ts +118 -0
  505. package/src/llm/provider_format/utils.ts +183 -0
  506. package/src/llm/realtime.ts +151 -0
  507. package/src/llm/remote_chat_context.test.ts +290 -0
  508. package/src/llm/remote_chat_context.ts +114 -0
  509. package/src/llm/tool_context.test.ts +198 -0
  510. package/src/llm/tool_context.ts +259 -0
  511. package/src/llm/tool_context.type.test.ts +115 -0
  512. package/src/llm/utils.test.ts +670 -0
  513. package/src/llm/utils.ts +324 -0
  514. package/src/metrics/base.ts +110 -78
  515. package/src/metrics/index.ts +3 -9
  516. package/src/metrics/usage_collector.ts +19 -13
  517. package/src/metrics/utils.ts +24 -69
  518. package/src/multimodal/index.ts +0 -1
  519. package/src/plugin.ts +26 -8
  520. package/src/stream/deferred_stream.test.ts +755 -0
  521. package/src/stream/deferred_stream.ts +110 -0
  522. package/src/stream/identity_transform.test.ts +179 -0
  523. package/src/stream/identity_transform.ts +18 -0
  524. package/src/stream/index.ts +7 -0
  525. package/src/stream/merge_readable_streams.ts +40 -0
  526. package/src/stream/stream_channel.test.ts +129 -0
  527. package/src/stream/stream_channel.ts +32 -0
  528. package/src/stt/stream_adapter.ts +3 -5
  529. package/src/stt/stt.ts +135 -17
  530. package/src/tokenize/basic/basic.ts +13 -5
  531. package/src/tokenize/basic/sentence.ts +20 -6
  532. package/src/tokenize/token_stream.ts +7 -4
  533. package/src/transcription.ts +2 -3
  534. package/src/tts/index.ts +0 -1
  535. package/src/tts/stream_adapter.ts +42 -16
  536. package/src/tts/tts.ts +203 -21
  537. package/src/types.ts +42 -0
  538. package/src/utils.test.ts +658 -0
  539. package/src/utils.ts +375 -44
  540. package/src/vad.ts +90 -22
  541. package/src/voice/agent.test.ts +80 -0
  542. package/src/voice/agent.ts +332 -0
  543. package/src/voice/agent_activity.ts +1913 -0
  544. package/src/voice/agent_session.ts +460 -0
  545. package/src/voice/audio_recognition.ts +474 -0
  546. package/src/voice/events.ts +252 -0
  547. package/src/voice/generation.ts +881 -0
  548. package/src/voice/index.ts +7 -0
  549. package/src/voice/io.ts +304 -0
  550. package/src/voice/room_io/_input.ts +144 -0
  551. package/src/voice/room_io/_output.ts +436 -0
  552. package/src/voice/room_io/index.ts +5 -0
  553. package/src/voice/room_io/room_io.ts +495 -0
  554. package/src/voice/run_context.ts +20 -0
  555. package/src/voice/speech_handle.ts +104 -0
  556. package/src/voice/transcription/_utils.ts +25 -0
  557. package/src/voice/transcription/index.ts +4 -0
  558. package/src/voice/transcription/synchronizer.ts +478 -0
  559. package/src/worker.ts +22 -2
  560. package/dist/llm/function_context.cjs +0 -103
  561. package/dist/llm/function_context.cjs.map +0 -1
  562. package/dist/llm/function_context.d.cts +0 -47
  563. package/dist/llm/function_context.d.ts +0 -47
  564. package/dist/llm/function_context.d.ts.map +0 -1
  565. package/dist/llm/function_context.js +0 -78
  566. package/dist/llm/function_context.js.map +0 -1
  567. package/dist/llm/function_context.test.cjs +0 -218
  568. package/dist/llm/function_context.test.cjs.map +0 -1
  569. package/dist/llm/function_context.test.js +0 -217
  570. package/dist/llm/function_context.test.js.map +0 -1
  571. package/dist/multimodal/multimodal_agent.cjs +0 -486
  572. package/dist/multimodal/multimodal_agent.cjs.map +0 -1
  573. package/dist/multimodal/multimodal_agent.d.cts +0 -48
  574. package/dist/multimodal/multimodal_agent.d.ts +0 -48
  575. package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
  576. package/dist/multimodal/multimodal_agent.js +0 -461
  577. package/dist/multimodal/multimodal_agent.js.map +0 -1
  578. package/dist/pipeline/agent_output.cjs +0 -197
  579. package/dist/pipeline/agent_output.cjs.map +0 -1
  580. package/dist/pipeline/agent_output.d.cts +0 -33
  581. package/dist/pipeline/agent_output.d.ts +0 -33
  582. package/dist/pipeline/agent_output.d.ts.map +0 -1
  583. package/dist/pipeline/agent_output.js +0 -172
  584. package/dist/pipeline/agent_output.js.map +0 -1
  585. package/dist/pipeline/agent_playout.cjs +0 -175
  586. package/dist/pipeline/agent_playout.cjs.map +0 -1
  587. package/dist/pipeline/agent_playout.d.cts +0 -40
  588. package/dist/pipeline/agent_playout.d.ts +0 -40
  589. package/dist/pipeline/agent_playout.d.ts.map +0 -1
  590. package/dist/pipeline/agent_playout.js +0 -139
  591. package/dist/pipeline/agent_playout.js.map +0 -1
  592. package/dist/pipeline/human_input.cjs +0 -171
  593. package/dist/pipeline/human_input.cjs.map +0 -1
  594. package/dist/pipeline/human_input.d.cts +0 -30
  595. package/dist/pipeline/human_input.d.ts +0 -30
  596. package/dist/pipeline/human_input.d.ts.map +0 -1
  597. package/dist/pipeline/human_input.js +0 -146
  598. package/dist/pipeline/human_input.js.map +0 -1
  599. package/dist/pipeline/index.cjs.map +0 -1
  600. package/dist/pipeline/index.d.cts +0 -2
  601. package/dist/pipeline/index.d.ts +0 -2
  602. package/dist/pipeline/index.d.ts.map +0 -1
  603. package/dist/pipeline/index.js +0 -11
  604. package/dist/pipeline/index.js.map +0 -1
  605. package/dist/pipeline/pipeline_agent.cjs +0 -859
  606. package/dist/pipeline/pipeline_agent.cjs.map +0 -1
  607. package/dist/pipeline/pipeline_agent.d.cts +0 -150
  608. package/dist/pipeline/pipeline_agent.d.ts +0 -150
  609. package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
  610. package/dist/pipeline/pipeline_agent.js +0 -837
  611. package/dist/pipeline/pipeline_agent.js.map +0 -1
  612. package/dist/pipeline/speech_handle.cjs +0 -176
  613. package/dist/pipeline/speech_handle.cjs.map +0 -1
  614. package/dist/pipeline/speech_handle.d.cts +0 -37
  615. package/dist/pipeline/speech_handle.d.ts +0 -37
  616. package/dist/pipeline/speech_handle.d.ts.map +0 -1
  617. package/dist/pipeline/speech_handle.js +0 -152
  618. package/dist/pipeline/speech_handle.js.map +0 -1
  619. package/src/llm/function_context.test.ts +0 -248
  620. package/src/llm/function_context.ts +0 -142
  621. package/src/multimodal/multimodal_agent.ts +0 -592
  622. package/src/pipeline/agent_output.ts +0 -219
  623. package/src/pipeline/agent_playout.ts +0 -192
  624. package/src/pipeline/human_input.ts +0 -188
  625. package/src/pipeline/index.ts +0 -15
  626. package/src/pipeline/pipeline_agent.ts +0 -1197
  627. package/src/pipeline/speech_handle.ts +0 -201
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+ var import_vitest = require("vitest");
3
+ var import_stream_channel = require("./stream_channel.cjs");
4
+ (0, import_vitest.describe)("StreamChannel", () => {
5
+ (0, import_vitest.it)("should write and read a single value", async () => {
6
+ const channel = (0, import_stream_channel.createStreamChannel)();
7
+ const reader = channel.stream().getReader();
8
+ await channel.write("test value");
9
+ await channel.close();
10
+ const result = await reader.read();
11
+ (0, import_vitest.expect)(result.done).toBe(false);
12
+ (0, import_vitest.expect)(result.value).toBe("test value");
13
+ const nextResult = await reader.read();
14
+ (0, import_vitest.expect)(nextResult.done).toBe(true);
15
+ });
16
+ (0, import_vitest.it)("should write and read multiple values in sequence", async () => {
17
+ const channel = (0, import_stream_channel.createStreamChannel)();
18
+ const reader = channel.stream().getReader();
19
+ const testValues = ["first", "second", "third"];
20
+ for (const value of testValues) {
21
+ await channel.write(value);
22
+ }
23
+ await channel.close();
24
+ const results = [];
25
+ let result = await reader.read();
26
+ while (!result.done) {
27
+ results.push(result.value);
28
+ result = await reader.read();
29
+ }
30
+ (0, import_vitest.expect)(results).toEqual(testValues);
31
+ });
32
+ (0, import_vitest.it)("should handle arrays", async () => {
33
+ const channel = (0, import_stream_channel.createStreamChannel)();
34
+ const reader = channel.stream().getReader();
35
+ const testArray = [1, 2, 3, 4, 5];
36
+ await channel.write(testArray);
37
+ await channel.close();
38
+ const result = await reader.read();
39
+ (0, import_vitest.expect)(result.value).toEqual(testArray);
40
+ (0, import_vitest.expect)(result.value).toBe(testArray);
41
+ });
42
+ (0, import_vitest.it)("should work with concurrent writing and reading", async () => {
43
+ const channel = (0, import_stream_channel.createStreamChannel)();
44
+ const reader = channel.stream().getReader();
45
+ const testData = ["chunk1", "chunk2", "chunk3"];
46
+ const results = [];
47
+ const readPromise = (async () => {
48
+ let result = await reader.read();
49
+ while (!result.done) {
50
+ results.push(result.value);
51
+ result = await reader.read();
52
+ }
53
+ })();
54
+ for (const chunk of testData) {
55
+ await channel.write(chunk);
56
+ }
57
+ await channel.close();
58
+ await readPromise;
59
+ (0, import_vitest.expect)(results).toEqual(testData);
60
+ });
61
+ (0, import_vitest.it)("should handle empty stream", async () => {
62
+ const channel = (0, import_stream_channel.createStreamChannel)();
63
+ const reader = channel.stream().getReader();
64
+ await channel.close();
65
+ const result = await reader.read();
66
+ (0, import_vitest.expect)(result.done).toBe(true);
67
+ });
68
+ (0, import_vitest.it)("should handle non-awaited sequential writes", async () => {
69
+ const channel = (0, import_stream_channel.createStreamChannel)();
70
+ const reader = channel.stream().getReader();
71
+ const testNumbers = Array.from({ length: 100 }, (_, i) => i);
72
+ for (const num of testNumbers) {
73
+ channel.write(num);
74
+ }
75
+ channel.close();
76
+ const results = [];
77
+ let result = await reader.read();
78
+ while (!result.done) {
79
+ results.push(result.value);
80
+ result = await reader.read();
81
+ }
82
+ (0, import_vitest.expect)(results).toEqual(testNumbers);
83
+ });
84
+ (0, import_vitest.it)("should handle double closing without error", async () => {
85
+ const channel = (0, import_stream_channel.createStreamChannel)();
86
+ const reader = channel.stream().getReader();
87
+ await channel.write("test");
88
+ await channel.close();
89
+ await (0, import_vitest.expect)(channel.close()).resolves.toBeUndefined();
90
+ const result = await reader.read();
91
+ (0, import_vitest.expect)(result.done).toBe(false);
92
+ (0, import_vitest.expect)(result.value).toBe("test");
93
+ const nextResult = await reader.read();
94
+ (0, import_vitest.expect)(nextResult.done).toBe(true);
95
+ });
96
+ });
97
+ //# sourceMappingURL=stream_channel.test.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/stream/stream_channel.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { createStreamChannel } from './stream_channel.js';\n\ndescribe('StreamChannel', () => {\n it('should write and read a single value', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n await channel.write('test value');\n await channel.close();\n\n const result = await reader.read();\n expect(result.done).toBe(false);\n expect(result.value).toBe('test value');\n\n const nextResult = await reader.read();\n expect(nextResult.done).toBe(true);\n });\n\n it('should write and read multiple values in sequence', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n const testValues = ['first', 'second', 'third'];\n\n for (const value of testValues) {\n await channel.write(value);\n }\n await channel.close();\n\n const results: string[] = [];\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n result = await reader.read();\n }\n\n expect(results).toEqual(testValues);\n });\n\n it('should handle arrays', async () => {\n const channel = createStreamChannel<number[]>();\n const reader = channel.stream().getReader();\n\n const testArray = [1, 2, 3, 4, 5];\n await channel.write(testArray);\n await channel.close();\n\n const result = await reader.read();\n expect(result.value).toEqual(testArray);\n expect(result.value).toBe(testArray);\n });\n\n it('should work with concurrent writing and reading', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n const testData = ['chunk1', 'chunk2', 'chunk3'];\n const results: string[] = [];\n\n const readPromise = (async () => {\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n result = await reader.read();\n }\n })();\n\n for (const chunk of testData) {\n await channel.write(chunk);\n }\n await channel.close();\n\n await readPromise;\n expect(results).toEqual(testData);\n });\n\n it('should handle empty stream', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n await channel.close();\n\n const result = await reader.read();\n expect(result.done).toBe(true);\n });\n\n it('should handle non-awaited sequential writes', async () => {\n const channel = createStreamChannel<number>();\n const reader = channel.stream().getReader();\n\n const testNumbers = Array.from({ length: 100 }, (_, i) => i);\n\n for (const num of testNumbers) {\n channel.write(num);\n }\n channel.close();\n\n const results: number[] = [];\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n result = await reader.read();\n }\n\n expect(results).toEqual(testNumbers);\n });\n\n it('should handle double closing without error', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n await channel.write('test');\n\n await channel.close();\n // Close again - should not throw\n await expect(channel.close()).resolves.toBeUndefined();\n\n const result = await reader.read();\n expect(result.done).toBe(false);\n expect(result.value).toBe('test');\n\n const nextResult = await reader.read();\n expect(nextResult.done).toBe(true);\n });\n});\n"],"mappings":";AAGA,oBAAqC;AACrC,4BAAoC;AAAA,IAEpC,wBAAS,iBAAiB,MAAM;AAC9B,wBAAG,wCAAwC,YAAY;AACrD,UAAM,cAAU,2CAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,QAAQ,MAAM,YAAY;AAChC,UAAM,QAAQ,MAAM;AAEpB,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,8BAAO,OAAO,IAAI,EAAE,KAAK,KAAK;AAC9B,8BAAO,OAAO,KAAK,EAAE,KAAK,YAAY;AAEtC,UAAM,aAAa,MAAM,OAAO,KAAK;AACrC,8BAAO,WAAW,IAAI,EAAE,KAAK,IAAI;AAAA,EACnC,CAAC;AAED,wBAAG,qDAAqD,YAAY;AAClE,UAAM,cAAU,2CAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,aAAa,CAAC,SAAS,UAAU,OAAO;AAE9C,eAAW,SAAS,YAAY;AAC9B,YAAM,QAAQ,MAAM,KAAK;AAAA,IAC3B;AACA,UAAM,QAAQ,MAAM;AAEpB,UAAM,UAAoB,CAAC;AAC3B,QAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,WAAO,CAAC,OAAO,MAAM;AACnB,cAAQ,KAAK,OAAO,KAAK;AACzB,eAAS,MAAM,OAAO,KAAK;AAAA,IAC7B;AAEA,8BAAO,OAAO,EAAE,QAAQ,UAAU;AAAA,EACpC,CAAC;AAED,wBAAG,wBAAwB,YAAY;AACrC,UAAM,cAAU,2CAA8B;AAC9C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,YAAY,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC;AAChC,UAAM,QAAQ,MAAM,SAAS;AAC7B,UAAM,QAAQ,MAAM;AAEpB,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,8BAAO,OAAO,KAAK,EAAE,QAAQ,SAAS;AACtC,8BAAO,OAAO,KAAK,EAAE,KAAK,SAAS;AAAA,EACrC,CAAC;AAED,wBAAG,mDAAmD,YAAY;AAChE,UAAM,cAAU,2CAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,WAAW,CAAC,UAAU,UAAU,QAAQ;AAC9C,UAAM,UAAoB,CAAC;AAE3B,UAAM,eAAe,YAAY;AAC/B,UAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,aAAO,CAAC,OAAO,MAAM;AACnB,gBAAQ,KAAK,OAAO,KAAK;AACzB,iBAAS,MAAM,OAAO,KAAK;AAAA,MAC7B;AAAA,IACF,GAAG;AAEH,eAAW,SAAS,UAAU;AAC5B,YAAM,QAAQ,MAAM,KAAK;AAAA,IAC3B;AACA,UAAM,QAAQ,MAAM;AAEpB,UAAM;AACN,8BAAO,OAAO,EAAE,QAAQ,QAAQ;AAAA,EAClC,CAAC;AAED,wBAAG,8BAA8B,YAAY;AAC3C,UAAM,cAAU,2CAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,QAAQ,MAAM;AAEpB,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,8BAAO,OAAO,IAAI,EAAE,KAAK,IAAI;AAAA,EAC/B,CAAC;AAED,wBAAG,+CAA+C,YAAY;AAC5D,UAAM,cAAU,2CAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,cAAc,MAAM,KAAK,EAAE,QAAQ,IAAI,GAAG,CAAC,GAAG,MAAM,CAAC;AAE3D,eAAW,OAAO,aAAa;AAC7B,cAAQ,MAAM,GAAG;AAAA,IACnB;AACA,YAAQ,MAAM;AAEd,UAAM,UAAoB,CAAC;AAC3B,QAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,WAAO,CAAC,OAAO,MAAM;AACnB,cAAQ,KAAK,OAAO,KAAK;AACzB,eAAS,MAAM,OAAO,KAAK;AAAA,IAC7B;AAEA,8BAAO,OAAO,EAAE,QAAQ,WAAW;AAAA,EACrC,CAAC;AAED,wBAAG,8CAA8C,YAAY;AAC3D,UAAM,cAAU,2CAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,QAAQ,MAAM,MAAM;AAE1B,UAAM,QAAQ,MAAM;AAEpB,cAAM,sBAAO,QAAQ,MAAM,CAAC,EAAE,SAAS,cAAc;AAErD,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,8BAAO,OAAO,IAAI,EAAE,KAAK,KAAK;AAC9B,8BAAO,OAAO,KAAK,EAAE,KAAK,MAAM;AAEhC,UAAM,aAAa,MAAM,OAAO,KAAK;AACrC,8BAAO,WAAW,IAAI,EAAE,KAAK,IAAI;AAAA,EACnC,CAAC;AACH,CAAC;","names":[]}
@@ -0,0 +1,96 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { createStreamChannel } from "./stream_channel.js";
3
+ describe("StreamChannel", () => {
4
+ it("should write and read a single value", async () => {
5
+ const channel = createStreamChannel();
6
+ const reader = channel.stream().getReader();
7
+ await channel.write("test value");
8
+ await channel.close();
9
+ const result = await reader.read();
10
+ expect(result.done).toBe(false);
11
+ expect(result.value).toBe("test value");
12
+ const nextResult = await reader.read();
13
+ expect(nextResult.done).toBe(true);
14
+ });
15
+ it("should write and read multiple values in sequence", async () => {
16
+ const channel = createStreamChannel();
17
+ const reader = channel.stream().getReader();
18
+ const testValues = ["first", "second", "third"];
19
+ for (const value of testValues) {
20
+ await channel.write(value);
21
+ }
22
+ await channel.close();
23
+ const results = [];
24
+ let result = await reader.read();
25
+ while (!result.done) {
26
+ results.push(result.value);
27
+ result = await reader.read();
28
+ }
29
+ expect(results).toEqual(testValues);
30
+ });
31
+ it("should handle arrays", async () => {
32
+ const channel = createStreamChannel();
33
+ const reader = channel.stream().getReader();
34
+ const testArray = [1, 2, 3, 4, 5];
35
+ await channel.write(testArray);
36
+ await channel.close();
37
+ const result = await reader.read();
38
+ expect(result.value).toEqual(testArray);
39
+ expect(result.value).toBe(testArray);
40
+ });
41
+ it("should work with concurrent writing and reading", async () => {
42
+ const channel = createStreamChannel();
43
+ const reader = channel.stream().getReader();
44
+ const testData = ["chunk1", "chunk2", "chunk3"];
45
+ const results = [];
46
+ const readPromise = (async () => {
47
+ let result = await reader.read();
48
+ while (!result.done) {
49
+ results.push(result.value);
50
+ result = await reader.read();
51
+ }
52
+ })();
53
+ for (const chunk of testData) {
54
+ await channel.write(chunk);
55
+ }
56
+ await channel.close();
57
+ await readPromise;
58
+ expect(results).toEqual(testData);
59
+ });
60
+ it("should handle empty stream", async () => {
61
+ const channel = createStreamChannel();
62
+ const reader = channel.stream().getReader();
63
+ await channel.close();
64
+ const result = await reader.read();
65
+ expect(result.done).toBe(true);
66
+ });
67
+ it("should handle non-awaited sequential writes", async () => {
68
+ const channel = createStreamChannel();
69
+ const reader = channel.stream().getReader();
70
+ const testNumbers = Array.from({ length: 100 }, (_, i) => i);
71
+ for (const num of testNumbers) {
72
+ channel.write(num);
73
+ }
74
+ channel.close();
75
+ const results = [];
76
+ let result = await reader.read();
77
+ while (!result.done) {
78
+ results.push(result.value);
79
+ result = await reader.read();
80
+ }
81
+ expect(results).toEqual(testNumbers);
82
+ });
83
+ it("should handle double closing without error", async () => {
84
+ const channel = createStreamChannel();
85
+ const reader = channel.stream().getReader();
86
+ await channel.write("test");
87
+ await channel.close();
88
+ await expect(channel.close()).resolves.toBeUndefined();
89
+ const result = await reader.read();
90
+ expect(result.done).toBe(false);
91
+ expect(result.value).toBe("test");
92
+ const nextResult = await reader.read();
93
+ expect(nextResult.done).toBe(true);
94
+ });
95
+ });
96
+ //# sourceMappingURL=stream_channel.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/stream/stream_channel.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { describe, expect, it } from 'vitest';\nimport { createStreamChannel } from './stream_channel.js';\n\ndescribe('StreamChannel', () => {\n it('should write and read a single value', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n await channel.write('test value');\n await channel.close();\n\n const result = await reader.read();\n expect(result.done).toBe(false);\n expect(result.value).toBe('test value');\n\n const nextResult = await reader.read();\n expect(nextResult.done).toBe(true);\n });\n\n it('should write and read multiple values in sequence', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n const testValues = ['first', 'second', 'third'];\n\n for (const value of testValues) {\n await channel.write(value);\n }\n await channel.close();\n\n const results: string[] = [];\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n result = await reader.read();\n }\n\n expect(results).toEqual(testValues);\n });\n\n it('should handle arrays', async () => {\n const channel = createStreamChannel<number[]>();\n const reader = channel.stream().getReader();\n\n const testArray = [1, 2, 3, 4, 5];\n await channel.write(testArray);\n await channel.close();\n\n const result = await reader.read();\n expect(result.value).toEqual(testArray);\n expect(result.value).toBe(testArray);\n });\n\n it('should work with concurrent writing and reading', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n const testData = ['chunk1', 'chunk2', 'chunk3'];\n const results: string[] = [];\n\n const readPromise = (async () => {\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n result = await reader.read();\n }\n })();\n\n for (const chunk of testData) {\n await channel.write(chunk);\n }\n await channel.close();\n\n await readPromise;\n expect(results).toEqual(testData);\n });\n\n it('should handle empty stream', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n await channel.close();\n\n const result = await reader.read();\n expect(result.done).toBe(true);\n });\n\n it('should handle non-awaited sequential writes', async () => {\n const channel = createStreamChannel<number>();\n const reader = channel.stream().getReader();\n\n const testNumbers = Array.from({ length: 100 }, (_, i) => i);\n\n for (const num of testNumbers) {\n channel.write(num);\n }\n channel.close();\n\n const results: number[] = [];\n let result = await reader.read();\n while (!result.done) {\n results.push(result.value);\n result = await reader.read();\n }\n\n expect(results).toEqual(testNumbers);\n });\n\n it('should handle double closing without error', async () => {\n const channel = createStreamChannel<string>();\n const reader = channel.stream().getReader();\n\n await channel.write('test');\n\n await channel.close();\n // Close again - should not throw\n await expect(channel.close()).resolves.toBeUndefined();\n\n const result = await reader.read();\n expect(result.done).toBe(false);\n expect(result.value).toBe('test');\n\n const nextResult = await reader.read();\n expect(nextResult.done).toBe(true);\n });\n});\n"],"mappings":"AAGA,SAAS,UAAU,QAAQ,UAAU;AACrC,SAAS,2BAA2B;AAEpC,SAAS,iBAAiB,MAAM;AAC9B,KAAG,wCAAwC,YAAY;AACrD,UAAM,UAAU,oBAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,QAAQ,MAAM,YAAY;AAChC,UAAM,QAAQ,MAAM;AAEpB,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,WAAO,OAAO,IAAI,EAAE,KAAK,KAAK;AAC9B,WAAO,OAAO,KAAK,EAAE,KAAK,YAAY;AAEtC,UAAM,aAAa,MAAM,OAAO,KAAK;AACrC,WAAO,WAAW,IAAI,EAAE,KAAK,IAAI;AAAA,EACnC,CAAC;AAED,KAAG,qDAAqD,YAAY;AAClE,UAAM,UAAU,oBAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,aAAa,CAAC,SAAS,UAAU,OAAO;AAE9C,eAAW,SAAS,YAAY;AAC9B,YAAM,QAAQ,MAAM,KAAK;AAAA,IAC3B;AACA,UAAM,QAAQ,MAAM;AAEpB,UAAM,UAAoB,CAAC;AAC3B,QAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,WAAO,CAAC,OAAO,MAAM;AACnB,cAAQ,KAAK,OAAO,KAAK;AACzB,eAAS,MAAM,OAAO,KAAK;AAAA,IAC7B;AAEA,WAAO,OAAO,EAAE,QAAQ,UAAU;AAAA,EACpC,CAAC;AAED,KAAG,wBAAwB,YAAY;AACrC,UAAM,UAAU,oBAA8B;AAC9C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,YAAY,CAAC,GAAG,GAAG,GAAG,GAAG,CAAC;AAChC,UAAM,QAAQ,MAAM,SAAS;AAC7B,UAAM,QAAQ,MAAM;AAEpB,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,WAAO,OAAO,KAAK,EAAE,QAAQ,SAAS;AACtC,WAAO,OAAO,KAAK,EAAE,KAAK,SAAS;AAAA,EACrC,CAAC;AAED,KAAG,mDAAmD,YAAY;AAChE,UAAM,UAAU,oBAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,WAAW,CAAC,UAAU,UAAU,QAAQ;AAC9C,UAAM,UAAoB,CAAC;AAE3B,UAAM,eAAe,YAAY;AAC/B,UAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,aAAO,CAAC,OAAO,MAAM;AACnB,gBAAQ,KAAK,OAAO,KAAK;AACzB,iBAAS,MAAM,OAAO,KAAK;AAAA,MAC7B;AAAA,IACF,GAAG;AAEH,eAAW,SAAS,UAAU;AAC5B,YAAM,QAAQ,MAAM,KAAK;AAAA,IAC3B;AACA,UAAM,QAAQ,MAAM;AAEpB,UAAM;AACN,WAAO,OAAO,EAAE,QAAQ,QAAQ;AAAA,EAClC,CAAC;AAED,KAAG,8BAA8B,YAAY;AAC3C,UAAM,UAAU,oBAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,QAAQ,MAAM;AAEpB,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,WAAO,OAAO,IAAI,EAAE,KAAK,IAAI;AAAA,EAC/B,CAAC;AAED,KAAG,+CAA+C,YAAY;AAC5D,UAAM,UAAU,oBAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,cAAc,MAAM,KAAK,EAAE,QAAQ,IAAI,GAAG,CAAC,GAAG,MAAM,CAAC;AAE3D,eAAW,OAAO,aAAa;AAC7B,cAAQ,MAAM,GAAG;AAAA,IACnB;AACA,YAAQ,MAAM;AAEd,UAAM,UAAoB,CAAC;AAC3B,QAAI,SAAS,MAAM,OAAO,KAAK;AAC/B,WAAO,CAAC,OAAO,MAAM;AACnB,cAAQ,KAAK,OAAO,KAAK;AACzB,eAAS,MAAM,OAAO,KAAK;AAAA,IAC7B;AAEA,WAAO,OAAO,EAAE,QAAQ,WAAW;AAAA,EACrC,CAAC;AAED,KAAG,8CAA8C,YAAY;AAC3D,UAAM,UAAU,oBAA4B;AAC5C,UAAM,SAAS,QAAQ,OAAO,EAAE,UAAU;AAE1C,UAAM,QAAQ,MAAM,MAAM;AAE1B,UAAM,QAAQ,MAAM;AAEpB,UAAM,OAAO,QAAQ,MAAM,CAAC,EAAE,SAAS,cAAc;AAErD,UAAM,SAAS,MAAM,OAAO,KAAK;AACjC,WAAO,OAAO,IAAI,EAAE,KAAK,KAAK;AAC9B,WAAO,OAAO,KAAK,EAAE,KAAK,MAAM;AAEhC,UAAM,aAAa,MAAM,OAAO,KAAK;AACrC,WAAO,WAAW,IAAI,EAAE,KAAK,IAAI;AAAA,EACnC,CAAC;AACH,CAAC;","names":[]}
@@ -34,8 +34,8 @@ class StreamAdapter extends import_stt.STT {
34
34
  this.#stt = stt;
35
35
  this.#vad = vad;
36
36
  this.label = `stt.StreamAdapter<${this.#stt.label}>`;
37
- this.#stt.on(import_stt.SpeechEventType.METRICS_COLLECTED, (metrics) => {
38
- this.emit(import_stt.SpeechEventType.METRICS_COLLECTED, metrics);
37
+ this.#stt.on("metrics_collected", (metrics) => {
38
+ this.emit("metrics_collected", metrics);
39
39
  });
40
40
  }
41
41
  _recognize(frame) {
@@ -54,12 +54,11 @@ class StreamAdapterWrapper extends import_stt.SpeechStream {
54
54
  this.#stt = stt;
55
55
  this.#vadStream = vad.stream();
56
56
  this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;
57
- this.#run();
58
57
  }
59
58
  async monitorMetrics() {
60
59
  return;
61
60
  }
62
- async #run() {
61
+ async run() {
63
62
  const forwardInput = async () => {
64
63
  for await (const input of this.input) {
65
64
  if (input === import_stt.SpeechStream.FLUSH_SENTINEL) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport type { VAD, VADStream } from '../vad.js';\nimport { VADEventType } from '../vad.js';\nimport type { SpeechEvent } from './stt.js';\nimport { STT, SpeechEventType, SpeechStream } from './stt.js';\n\nexport class StreamAdapter extends STT {\n #stt: STT;\n #vad: VAD;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super({ streaming: true, interimResults: false });\n this.#stt = stt;\n this.#vad = vad;\n this.label = `stt.StreamAdapter<${this.#stt.label}>`;\n\n this.#stt.on(SpeechEventType.METRICS_COLLECTED, (metrics) => {\n this.emit(SpeechEventType.METRICS_COLLECTED, metrics);\n });\n }\n\n _recognize(frame: AudioFrame): Promise<SpeechEvent> {\n return this.#stt.recognize(frame);\n }\n\n stream(): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#stt, this.#vad);\n }\n}\n\nexport class StreamAdapterWrapper extends SpeechStream {\n #stt: STT;\n #vadStream: VADStream;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super(stt);\n this.#stt = stt;\n this.#vadStream = vad.stream();\n this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;\n\n this.#run();\n }\n\n async monitorMetrics() {\n return; // do nothing\n }\n\n async #run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (input === SpeechStream.FLUSH_SENTINEL) {\n this.#vadStream.flush();\n } else {\n this.#vadStream.pushFrame(input);\n }\n }\n this.#vadStream.endInput();\n };\n\n const recognize = async () => {\n for await (const ev of this.#vadStream) {\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.output.put({ type: SpeechEventType.START_OF_SPEECH });\n break;\n case VADEventType.END_OF_SPEECH:\n this.output.put({ type: SpeechEventType.END_OF_SPEECH });\n\n try {\n const event = await this.#stt.recognize(ev.frames);\n if (!event.alternatives![0].text) {\n continue;\n }\n\n this.output.put(event);\n break;\n } catch (error) {\n let logger = log();\n if (error instanceof Error) {\n logger = logger.child({ error: error.message });\n } else {\n logger = logger.child({ error });\n }\n logger.error(`${this.label}: provider recognize task failed`);\n continue;\n }\n }\n }\n };\n\n Promise.all([forwardInput(), recognize()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AAEpB,iBAA6B;AAE7B,iBAAmD;AAE5C,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,CAAC;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,2BAAgB,mBAAmB,CAAC,YAAY;AAC3D,WAAK,KAAK,2BAAgB,mBAAmB,OAAO;AAAA,IACtD,CAAC;AAAA,EACH;AAAA,EAEA,WAAW,OAAyC;AAClD,WAAO,KAAK,KAAK,UAAU,KAAK;AAAA,EAClC;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,IAAI;AAAA,EACtD;AACF;AAEO,MAAM,6BAA6B,wBAAa;AAAA,EACrD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,GAAG;AACT,SAAK,OAAO;AACZ,SAAK,aAAa,IAAI,OAAO;AAC7B,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAExD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,iBAAiB;AACrB;AAAA,EACF;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,UAAU,wBAAa,gBAAgB;AACzC,eAAK,WAAW,MAAM;AAAA,QACxB,OAAO;AACL,eAAK,WAAW,UAAU,KAAK;AAAA,QACjC;AAAA,MACF;AACA,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,MAAM,KAAK,YAAY;AACtC,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,wBAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,2BAAgB,gBAAgB,CAAC;AACzD;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,2BAAgB,cAAc,CAAC;AAEvD,gBAAI;AACF,oBAAM,QAAQ,MAAM,KAAK,KAAK,UAAU,GAAG,MAAM;AACjD,kBAAI,CAAC,MAAM,aAAc,CAAC,EAAE,MAAM;AAChC;AAAA,cACF;AAEA,mBAAK,OAAO,IAAI,KAAK;AACrB;AAAA,YACF,SAAS,OAAO;AACd,kBAAI,aAAS,gBAAI;AACjB,kBAAI,iBAAiB,OAAO;AAC1B,yBAAS,OAAO,MAAM,EAAE,OAAO,MAAM,QAAQ,CAAC;AAAA,cAChD,OAAO;AACL,yBAAS,OAAO,MAAM,EAAE,MAAM,CAAC;AAAA,cACjC;AACA,qBAAO,MAAM,GAAG,KAAK,KAAK,kCAAkC;AAC5D;AAAA,YACF;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,CAAC;AAAA,EAC3C;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/stt/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport type { VAD, VADStream } from '../vad.js';\nimport { VADEventType } from '../vad.js';\nimport type { SpeechEvent } from './stt.js';\nimport { STT, SpeechEventType, SpeechStream } from './stt.js';\n\nexport class StreamAdapter extends STT {\n #stt: STT;\n #vad: VAD;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super({ streaming: true, interimResults: false });\n this.#stt = stt;\n this.#vad = vad;\n this.label = `stt.StreamAdapter<${this.#stt.label}>`;\n\n this.#stt.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n }\n\n _recognize(frame: AudioFrame): Promise<SpeechEvent> {\n return this.#stt.recognize(frame);\n }\n\n stream(): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#stt, this.#vad);\n }\n}\n\nexport class StreamAdapterWrapper extends SpeechStream {\n #stt: STT;\n #vadStream: VADStream;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super(stt);\n this.#stt = stt;\n this.#vadStream = vad.stream();\n this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;\n }\n\n async monitorMetrics() {\n return; // do nothing\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (input === SpeechStream.FLUSH_SENTINEL) {\n this.#vadStream.flush();\n } else {\n this.#vadStream.pushFrame(input);\n }\n }\n this.#vadStream.endInput();\n };\n\n const recognize = async () => {\n for await (const ev of this.#vadStream) {\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.output.put({ type: SpeechEventType.START_OF_SPEECH });\n break;\n case VADEventType.END_OF_SPEECH:\n this.output.put({ type: SpeechEventType.END_OF_SPEECH });\n\n try {\n const event = await this.#stt.recognize(ev.frames);\n if (!event.alternatives![0].text) {\n continue;\n }\n\n this.output.put(event);\n break;\n } catch (error) {\n let logger = log();\n if (error instanceof Error) {\n logger = logger.child({ error: error.message });\n } else {\n logger = logger.child({ error });\n }\n logger.error(`${this.label}: provider recognize task failed`);\n continue;\n }\n }\n }\n };\n\n Promise.all([forwardInput(), recognize()]);\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAIA,iBAAoB;AAEpB,iBAA6B;AAE7B,iBAAmD;AAE5C,MAAM,sBAAsB,eAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,CAAC;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA,EAEA,WAAW,OAAyC;AAClD,WAAO,KAAK,KAAK,UAAU,KAAK;AAAA,EAClC;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,IAAI;AAAA,EACtD;AACF;AAEO,MAAM,6BAA6B,wBAAa;AAAA,EACrD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,GAAG;AACT,SAAK,OAAO;AACZ,SAAK,aAAa,IAAI,OAAO;AAC7B,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAM,iBAAiB;AACrB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,UAAU,wBAAa,gBAAgB;AACzC,eAAK,WAAW,MAAM;AAAA,QACxB,OAAO;AACL,eAAK,WAAW,UAAU,KAAK;AAAA,QACjC;AAAA,MACF;AACA,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,MAAM,KAAK,YAAY;AACtC,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,wBAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,2BAAgB,gBAAgB,CAAC;AACzD;AAAA,UACF,KAAK,wBAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,2BAAgB,cAAc,CAAC;AAEvD,gBAAI;AACF,oBAAM,QAAQ,MAAM,KAAK,KAAK,UAAU,GAAG,MAAM;AACjD,kBAAI,CAAC,MAAM,aAAc,CAAC,EAAE,MAAM;AAChC;AAAA,cACF;AAEA,mBAAK,OAAO,IAAI,KAAK;AACrB;AAAA,YACF,SAAS,OAAO;AACd,kBAAI,aAAS,gBAAI;AACjB,kBAAI,iBAAiB,OAAO;AAC1B,yBAAS,OAAO,MAAM,EAAE,OAAO,MAAM,QAAQ,CAAC;AAAA,cAChD,OAAO;AACL,yBAAS,OAAO,MAAM,EAAE,MAAM,CAAC;AAAA,cACjC;AACA,qBAAO,MAAM,GAAG,KAAK,KAAK,kCAAkC;AAC5D;AAAA,YACF;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,CAAC;AAAA,EAC3C;AACF;","names":[]}
@@ -14,5 +14,6 @@ export declare class StreamAdapterWrapper extends SpeechStream {
14
14
  label: string;
15
15
  constructor(stt: STT, vad: VAD);
16
16
  monitorMetrics(): Promise<void>;
17
+ protected run(): Promise<void>;
17
18
  }
18
19
  //# sourceMappingURL=stream_adapter.d.ts.map
@@ -14,5 +14,6 @@ export declare class StreamAdapterWrapper extends SpeechStream {
14
14
  label: string;
15
15
  constructor(stt: STT, vad: VAD);
16
16
  monitorMetrics(): Promise<void>;
17
+ protected run(): Promise<void>;
17
18
  }
18
19
  //# sourceMappingURL=stream_adapter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"stream_adapter.d.ts","sourceRoot":"","sources":["../../src/stt/stream_adapter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,KAAK,EAAE,GAAG,EAAa,MAAM,WAAW,CAAC;AAEhD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,EAAE,GAAG,EAAmB,YAAY,EAAE,MAAM,UAAU,CAAC;AAE9D,qBAAa,aAAc,SAAQ,GAAG;;IAGpC,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAW9B,UAAU,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IAInD,MAAM,IAAI,oBAAoB;CAG/B;AAED,qBAAa,oBAAqB,SAAQ,YAAY;;IAGpD,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IASxB,cAAc;CAiDrB"}
1
+ {"version":3,"file":"stream_adapter.d.ts","sourceRoot":"","sources":["../../src/stt/stream_adapter.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,mBAAmB,CAAC;AAEpD,OAAO,KAAK,EAAE,GAAG,EAAa,MAAM,WAAW,CAAC;AAEhD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,EAAE,GAAG,EAAmB,YAAY,EAAE,MAAM,UAAU,CAAC;AAE9D,qBAAa,aAAc,SAAQ,GAAG;;IAGpC,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAW9B,UAAU,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC,WAAW,CAAC;IAInD,MAAM,IAAI,oBAAoB;CAG/B;AAED,qBAAa,oBAAqB,SAAQ,YAAY;;IAGpD,KAAK,EAAE,MAAM,CAAC;gBAEF,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG;IAOxB,cAAc;cAIJ,GAAG;CA6CpB"}
@@ -10,8 +10,8 @@ class StreamAdapter extends STT {
10
10
  this.#stt = stt;
11
11
  this.#vad = vad;
12
12
  this.label = `stt.StreamAdapter<${this.#stt.label}>`;
13
- this.#stt.on(SpeechEventType.METRICS_COLLECTED, (metrics) => {
14
- this.emit(SpeechEventType.METRICS_COLLECTED, metrics);
13
+ this.#stt.on("metrics_collected", (metrics) => {
14
+ this.emit("metrics_collected", metrics);
15
15
  });
16
16
  }
17
17
  _recognize(frame) {
@@ -30,12 +30,11 @@ class StreamAdapterWrapper extends SpeechStream {
30
30
  this.#stt = stt;
31
31
  this.#vadStream = vad.stream();
32
32
  this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;
33
- this.#run();
34
33
  }
35
34
  async monitorMetrics() {
36
35
  return;
37
36
  }
38
- async #run() {
37
+ async run() {
39
38
  const forwardInput = async () => {
40
39
  for await (const input of this.input) {
41
40
  if (input === SpeechStream.FLUSH_SENTINEL) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport type { VAD, VADStream } from '../vad.js';\nimport { VADEventType } from '../vad.js';\nimport type { SpeechEvent } from './stt.js';\nimport { STT, SpeechEventType, SpeechStream } from './stt.js';\n\nexport class StreamAdapter extends STT {\n #stt: STT;\n #vad: VAD;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super({ streaming: true, interimResults: false });\n this.#stt = stt;\n this.#vad = vad;\n this.label = `stt.StreamAdapter<${this.#stt.label}>`;\n\n this.#stt.on(SpeechEventType.METRICS_COLLECTED, (metrics) => {\n this.emit(SpeechEventType.METRICS_COLLECTED, metrics);\n });\n }\n\n _recognize(frame: AudioFrame): Promise<SpeechEvent> {\n return this.#stt.recognize(frame);\n }\n\n stream(): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#stt, this.#vad);\n }\n}\n\nexport class StreamAdapterWrapper extends SpeechStream {\n #stt: STT;\n #vadStream: VADStream;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super(stt);\n this.#stt = stt;\n this.#vadStream = vad.stream();\n this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;\n\n this.#run();\n }\n\n async monitorMetrics() {\n return; // do nothing\n }\n\n async #run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (input === SpeechStream.FLUSH_SENTINEL) {\n this.#vadStream.flush();\n } else {\n this.#vadStream.pushFrame(input);\n }\n }\n this.#vadStream.endInput();\n };\n\n const recognize = async () => {\n for await (const ev of this.#vadStream) {\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.output.put({ type: SpeechEventType.START_OF_SPEECH });\n break;\n case VADEventType.END_OF_SPEECH:\n this.output.put({ type: SpeechEventType.END_OF_SPEECH });\n\n try {\n const event = await this.#stt.recognize(ev.frames);\n if (!event.alternatives![0].text) {\n continue;\n }\n\n this.output.put(event);\n break;\n } catch (error) {\n let logger = log();\n if (error instanceof Error) {\n logger = logger.child({ error: error.message });\n } else {\n logger = logger.child({ error });\n }\n logger.error(`${this.label}: provider recognize task failed`);\n continue;\n }\n }\n }\n };\n\n Promise.all([forwardInput(), recognize()]);\n }\n}\n"],"mappings":"AAIA,SAAS,WAAW;AAEpB,SAAS,oBAAoB;AAE7B,SAAS,KAAK,iBAAiB,oBAAoB;AAE5C,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,CAAC;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,gBAAgB,mBAAmB,CAAC,YAAY;AAC3D,WAAK,KAAK,gBAAgB,mBAAmB,OAAO;AAAA,IACtD,CAAC;AAAA,EACH;AAAA,EAEA,WAAW,OAAyC;AAClD,WAAO,KAAK,KAAK,UAAU,KAAK;AAAA,EAClC;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,IAAI;AAAA,EACtD;AACF;AAEO,MAAM,6BAA6B,aAAa;AAAA,EACrD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,GAAG;AACT,SAAK,OAAO;AACZ,SAAK,aAAa,IAAI,OAAO;AAC7B,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAExD,SAAK,KAAK;AAAA,EACZ;AAAA,EAEA,MAAM,iBAAiB;AACrB;AAAA,EACF;AAAA,EAEA,MAAM,OAAO;AACX,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,UAAU,aAAa,gBAAgB;AACzC,eAAK,WAAW,MAAM;AAAA,QACxB,OAAO;AACL,eAAK,WAAW,UAAU,KAAK;AAAA,QACjC;AAAA,MACF;AACA,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,MAAM,KAAK,YAAY;AACtC,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AACzD;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAEvD,gBAAI;AACF,oBAAM,QAAQ,MAAM,KAAK,KAAK,UAAU,GAAG,MAAM;AACjD,kBAAI,CAAC,MAAM,aAAc,CAAC,EAAE,MAAM;AAChC;AAAA,cACF;AAEA,mBAAK,OAAO,IAAI,KAAK;AACrB;AAAA,YACF,SAAS,OAAO;AACd,kBAAI,SAAS,IAAI;AACjB,kBAAI,iBAAiB,OAAO;AAC1B,yBAAS,OAAO,MAAM,EAAE,OAAO,MAAM,QAAQ,CAAC;AAAA,cAChD,OAAO;AACL,yBAAS,OAAO,MAAM,EAAE,MAAM,CAAC;AAAA,cACjC;AACA,qBAAO,MAAM,GAAG,KAAK,KAAK,kCAAkC;AAC5D;AAAA,YACF;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,CAAC;AAAA,EAC3C;AACF;","names":[]}
1
+ {"version":3,"sources":["../../src/stt/stream_adapter.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport { log } from '../log.js';\nimport type { VAD, VADStream } from '../vad.js';\nimport { VADEventType } from '../vad.js';\nimport type { SpeechEvent } from './stt.js';\nimport { STT, SpeechEventType, SpeechStream } from './stt.js';\n\nexport class StreamAdapter extends STT {\n #stt: STT;\n #vad: VAD;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super({ streaming: true, interimResults: false });\n this.#stt = stt;\n this.#vad = vad;\n this.label = `stt.StreamAdapter<${this.#stt.label}>`;\n\n this.#stt.on('metrics_collected', (metrics) => {\n this.emit('metrics_collected', metrics);\n });\n }\n\n _recognize(frame: AudioFrame): Promise<SpeechEvent> {\n return this.#stt.recognize(frame);\n }\n\n stream(): StreamAdapterWrapper {\n return new StreamAdapterWrapper(this.#stt, this.#vad);\n }\n}\n\nexport class StreamAdapterWrapper extends SpeechStream {\n #stt: STT;\n #vadStream: VADStream;\n label: string;\n\n constructor(stt: STT, vad: VAD) {\n super(stt);\n this.#stt = stt;\n this.#vadStream = vad.stream();\n this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`;\n }\n\n async monitorMetrics() {\n return; // do nothing\n }\n\n protected async run() {\n const forwardInput = async () => {\n for await (const input of this.input) {\n if (input === SpeechStream.FLUSH_SENTINEL) {\n this.#vadStream.flush();\n } else {\n this.#vadStream.pushFrame(input);\n }\n }\n this.#vadStream.endInput();\n };\n\n const recognize = async () => {\n for await (const ev of this.#vadStream) {\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.output.put({ type: SpeechEventType.START_OF_SPEECH });\n break;\n case VADEventType.END_OF_SPEECH:\n this.output.put({ type: SpeechEventType.END_OF_SPEECH });\n\n try {\n const event = await this.#stt.recognize(ev.frames);\n if (!event.alternatives![0].text) {\n continue;\n }\n\n this.output.put(event);\n break;\n } catch (error) {\n let logger = log();\n if (error instanceof Error) {\n logger = logger.child({ error: error.message });\n } else {\n logger = logger.child({ error });\n }\n logger.error(`${this.label}: provider recognize task failed`);\n continue;\n }\n }\n }\n };\n\n Promise.all([forwardInput(), recognize()]);\n }\n}\n"],"mappings":"AAIA,SAAS,WAAW;AAEpB,SAAS,oBAAoB;AAE7B,SAAS,KAAK,iBAAiB,oBAAoB;AAE5C,MAAM,sBAAsB,IAAI;AAAA,EACrC;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,EAAE,WAAW,MAAM,gBAAgB,MAAM,CAAC;AAChD,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,QAAQ,qBAAqB,KAAK,KAAK,KAAK;AAEjD,SAAK,KAAK,GAAG,qBAAqB,CAAC,YAAY;AAC7C,WAAK,KAAK,qBAAqB,OAAO;AAAA,IACxC,CAAC;AAAA,EACH;AAAA,EAEA,WAAW,OAAyC;AAClD,WAAO,KAAK,KAAK,UAAU,KAAK;AAAA,EAClC;AAAA,EAEA,SAA+B;AAC7B,WAAO,IAAI,qBAAqB,KAAK,MAAM,KAAK,IAAI;AAAA,EACtD;AACF;AAEO,MAAM,6BAA6B,aAAa;AAAA,EACrD;AAAA,EACA;AAAA,EACA;AAAA,EAEA,YAAY,KAAU,KAAU;AAC9B,UAAM,GAAG;AACT,SAAK,OAAO;AACZ,SAAK,aAAa,IAAI,OAAO;AAC7B,SAAK,QAAQ,4BAA4B,KAAK,KAAK,KAAK;AAAA,EAC1D;AAAA,EAEA,MAAM,iBAAiB;AACrB;AAAA,EACF;AAAA,EAEA,MAAgB,MAAM;AACpB,UAAM,eAAe,YAAY;AAC/B,uBAAiB,SAAS,KAAK,OAAO;AACpC,YAAI,UAAU,aAAa,gBAAgB;AACzC,eAAK,WAAW,MAAM;AAAA,QACxB,OAAO;AACL,eAAK,WAAW,UAAU,KAAK;AAAA,QACjC;AAAA,MACF;AACA,WAAK,WAAW,SAAS;AAAA,IAC3B;AAEA,UAAM,YAAY,YAAY;AAC5B,uBAAiB,MAAM,KAAK,YAAY;AACtC,gBAAQ,GAAG,MAAM;AAAA,UACf,KAAK,aAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,gBAAgB,gBAAgB,CAAC;AACzD;AAAA,UACF,KAAK,aAAa;AAChB,iBAAK,OAAO,IAAI,EAAE,MAAM,gBAAgB,cAAc,CAAC;AAEvD,gBAAI;AACF,oBAAM,QAAQ,MAAM,KAAK,KAAK,UAAU,GAAG,MAAM;AACjD,kBAAI,CAAC,MAAM,aAAc,CAAC,EAAE,MAAM;AAChC;AAAA,cACF;AAEA,mBAAK,OAAO,IAAI,KAAK;AACrB;AAAA,YACF,SAAS,OAAO;AACd,kBAAI,SAAS,IAAI;AACjB,kBAAI,iBAAiB,OAAO;AAC1B,yBAAS,OAAO,MAAM,EAAE,OAAO,MAAM,QAAQ,CAAC;AAAA,cAChD,OAAO;AACL,yBAAS,OAAO,MAAM,EAAE,MAAM,CAAC;AAAA,cACjC;AACA,qBAAO,MAAM,GAAG,KAAK,KAAK,kCAAkC;AAC5D;AAAA,YACF;AAAA,QACJ;AAAA,MACF;AAAA,IACF;AAEA,YAAQ,IAAI,CAAC,aAAa,GAAG,UAAU,CAAC,CAAC;AAAA,EAC3C;AACF;","names":[]}
package/dist/stt/stt.cjs CHANGED
@@ -23,7 +23,14 @@ __export(stt_exports, {
23
23
  SpeechStream: () => SpeechStream
24
24
  });
25
25
  module.exports = __toCommonJS(stt_exports);
26
+ var import_rtc_node = require("@livekit/rtc-node");
27
+ var import_delay = require("@std/async/delay");
26
28
  var import_node_events = require("node:events");
29
+ var import_exceptions = require("../_exceptions.cjs");
30
+ var import_audio = require("../audio.cjs");
31
+ var import_log = require("../log.cjs");
32
+ var import_deferred_stream = require("../stream/deferred_stream.cjs");
33
+ var import_types = require("../types.cjs");
27
34
  var import_utils = require("../utils.cjs");
28
35
  var SpeechEventType = /* @__PURE__ */ ((SpeechEventType2) => {
29
36
  SpeechEventType2[SpeechEventType2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
@@ -31,7 +38,6 @@ var SpeechEventType = /* @__PURE__ */ ((SpeechEventType2) => {
31
38
  SpeechEventType2[SpeechEventType2["FINAL_TRANSCRIPT"] = 2] = "FINAL_TRANSCRIPT";
32
39
  SpeechEventType2[SpeechEventType2["END_OF_SPEECH"] = 3] = "END_OF_SPEECH";
33
40
  SpeechEventType2[SpeechEventType2["RECOGNITION_USAGE"] = 4] = "RECOGNITION_USAGE";
34
- SpeechEventType2[SpeechEventType2["METRICS_COLLECTED"] = 5] = "METRICS_COLLECTED";
35
41
  return SpeechEventType2;
36
42
  })(SpeechEventType || {});
37
43
  class STT extends import_node_events.EventEmitter {
@@ -49,12 +55,13 @@ class STT extends import_node_events.EventEmitter {
49
55
  const startTime = process.hrtime.bigint();
50
56
  const event = await this._recognize(frame);
51
57
  const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1e6));
52
- this.emit(5 /* METRICS_COLLECTED */, {
58
+ this.emit("metrics_collected", {
59
+ type: "stt_metrics",
53
60
  requestId: event.requestId ?? "",
54
61
  timestamp: Date.now(),
55
62
  duration,
56
63
  label: this.label,
57
- audioDuration: Array.isArray(frame) ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0) : frame.samplesPerChannel / frame.sampleRate,
64
+ audioDuration: (0, import_audio.calculateAudioDuration)(frame),
58
65
  streamed: false
59
66
  });
60
67
  return event;
@@ -65,30 +72,102 @@ class SpeechStream {
65
72
  input = new import_utils.AsyncIterableQueue();
66
73
  output = new import_utils.AsyncIterableQueue();
67
74
  queue = new import_utils.AsyncIterableQueue();
75
+ neededSampleRate;
76
+ resampler;
68
77
  closed = false;
69
78
  #stt;
70
- constructor(stt) {
79
+ deferredInputStream;
80
+ logger = (0, import_log.log)();
81
+ _connOptions;
82
+ constructor(stt, sampleRate, connectionOptions = import_types.DEFAULT_API_CONNECT_OPTIONS) {
71
83
  this.#stt = stt;
84
+ this._connOptions = connectionOptions;
85
+ this.deferredInputStream = new import_deferred_stream.DeferredReadableStream();
86
+ this.neededSampleRate = sampleRate;
72
87
  this.monitorMetrics();
88
+ this.pumpInput();
89
+ (0, import_utils.startSoon)(() => this.mainTask().then(() => this.queue.close()));
90
+ }
91
+ async mainTask() {
92
+ for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
93
+ try {
94
+ return await this.run();
95
+ } catch (error) {
96
+ if (error instanceof import_exceptions.APIError) {
97
+ const retryInterval = this._connOptions._intervalForRetry(i);
98
+ if (this._connOptions.maxRetry === 0 || !error.retryable) {
99
+ this.emitError({ error, recoverable: false });
100
+ throw error;
101
+ } else if (i === this._connOptions.maxRetry) {
102
+ this.emitError({ error, recoverable: false });
103
+ throw new import_exceptions.APIConnectionError({
104
+ message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,
105
+ options: { retryable: false }
106
+ });
107
+ } else {
108
+ this.emitError({ error, recoverable: true });
109
+ this.logger.warn(
110
+ { tts: this.#stt.label, attempt: i + 1, error },
111
+ `failed to recognize speech, retrying in ${retryInterval}s`
112
+ );
113
+ }
114
+ if (retryInterval > 0) {
115
+ await (0, import_delay.delay)(retryInterval);
116
+ }
117
+ } else {
118
+ this.emitError({ error: (0, import_utils.toError)(error), recoverable: false });
119
+ throw error;
120
+ }
121
+ }
122
+ }
123
+ }
124
+ emitError({ error, recoverable }) {
125
+ this.#stt.emit("error", {
126
+ type: "stt_error",
127
+ timestamp: Date.now(),
128
+ label: this.#stt.label,
129
+ error,
130
+ recoverable
131
+ });
132
+ }
133
+ async pumpInput() {
134
+ const inputStream = this.deferredInputStream.stream;
135
+ const reader = inputStream.getReader();
136
+ try {
137
+ while (true) {
138
+ const { done, value } = await reader.read();
139
+ if (done) break;
140
+ this.pushFrame(value);
141
+ }
142
+ } catch (error) {
143
+ this.logger.error("Error in STTStream mainTask:", error);
144
+ } finally {
145
+ reader.releaseLock();
146
+ }
73
147
  }
74
148
  async monitorMetrics() {
75
- const startTime = process.hrtime.bigint();
76
149
  for await (const event of this.queue) {
77
150
  this.output.put(event);
78
151
  if (event.type !== 4 /* RECOGNITION_USAGE */) continue;
79
- const duration = process.hrtime.bigint() - startTime;
80
152
  const metrics = {
153
+ type: "stt_metrics",
81
154
  timestamp: Date.now(),
82
155
  requestId: event.requestId,
83
- duration: Math.trunc(Number(duration / BigInt(1e6))),
84
- label: this.label,
156
+ duration: 0,
157
+ label: this.#stt.label,
85
158
  audioDuration: event.recognitionUsage.audioDuration,
86
159
  streamed: true
87
160
  };
88
- this.#stt.emit(5 /* METRICS_COLLECTED */, metrics);
161
+ this.#stt.emit("metrics_collected", metrics);
89
162
  }
90
163
  this.output.close();
91
164
  }
165
+ updateInputStream(audioStream) {
166
+ this.deferredInputStream.setSource(audioStream);
167
+ }
168
+ detachInputStream() {
169
+ this.deferredInputStream.detachSource();
170
+ }
92
171
  /** Push an audio frame to the STT */
93
172
  pushFrame(frame) {
94
173
  if (this.input.closed) {
@@ -97,7 +176,19 @@ class SpeechStream {
97
176
  if (this.closed) {
98
177
  throw new Error("Stream is closed");
99
178
  }
100
- this.input.put(frame);
179
+ if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {
180
+ if (!this.resampler) {
181
+ this.resampler = new import_rtc_node.AudioResampler(frame.sampleRate, this.neededSampleRate);
182
+ }
183
+ }
184
+ if (this.resampler) {
185
+ const frames = this.resampler.push(frame);
186
+ for (const frame2 of frames) {
187
+ this.input.put(frame2);
188
+ }
189
+ } else {
190
+ this.input.put(frame);
191
+ }
101
192
  }
102
193
  /** Flush the STT, causing it to process all pending text */
103
194
  flush() {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { STTMetrics } from '../metrics/base.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n METRICS_COLLECTED = 5,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport type STTCallbacks = {\n [SpeechEventType.METRICS_COLLECTED]: (metrics: STTMetrics) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit(SpeechEventType.METRICS_COLLECTED, {\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n duration,\n label: this.label,\n audioDuration: Array.isArray(frame)\n ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)\n : frame.samplesPerChannel / frame.sampleRate,\n streamed: false,\n });\n return event;\n }\n\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n abstract label: string;\n protected closed = false;\n #stt: STT;\n\n constructor(stt: STT) {\n this.#stt = stt;\n this.monitorMetrics();\n }\n\n protected async monitorMetrics() {\n const startTime = process.hrtime.bigint();\n\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const duration = process.hrtime.bigint() - startTime;\n const metrics: STTMetrics = {\n timestamp: Date.now(),\n requestId: event.requestId!,\n duration: Math.trunc(Number(duration / BigInt(1000000))),\n label: this.label,\n audioDuration: event.recognitionUsage!.audioDuration,\n streamed: true,\n };\n this.#stt.emit(SpeechEventType.METRICS_COLLECTED, metrics);\n }\n this.output.close();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(frame);\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAKA,yBAA6B;AAG7B,mBAAmC;AAG5B,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AACA,EAAAA,kCAAA,uBAAoB,KAApB;AAvBU,SAAAA;AAAA,GAAA;AAqEL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,WAAW,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AAC/E,SAAK,KAAK,2BAAmC;AAAA,MAC3C,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,eAAe,MAAM,QAAQ,KAAK,IAC9B,MAAM,OAAO,CAAC,KAAK,MAAM,MAAM,EAAE,oBAAoB,EAAE,YAAY,CAAC,IACpE,MAAM,oBAAoB,MAAM;AAAA,MACpC,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AASF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAE5C,SAAS;AAAA,EACnB;AAAA,EAEA,YAAY,KAAU;AACpB,SAAK,OAAO;AACZ,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,UAAM,YAAY,QAAQ,OAAO,OAAO;AAExC,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,WAAW,QAAQ,OAAO,OAAO,IAAI;AAC3C,YAAM,UAAsB;AAAA,QAC1B,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,UAAU,KAAK,MAAM,OAAO,WAAW,OAAO,GAAO,CAAC,CAAC;AAAA,QACvD,OAAO,KAAK;AAAA,QACZ,eAAe,MAAM,iBAAkB;AAAA,QACvC,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,2BAAmC,OAAO;AAAA,IAC3D;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,KAAK;AAAA,EACtB;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType"]}
1
+ {"version":3,"sources":["../../src/stt/stt.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { type AudioFrame, AudioResampler } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { delay } from '@std/async/delay';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport { APIConnectionError, APIError } from '../_exceptions.js';\nimport { calculateAudioDuration } from '../audio.js';\nimport { log } from '../log.js';\nimport type { STTMetrics } from '../metrics/base.js';\nimport { DeferredReadableStream } from '../stream/deferred_stream.js';\nimport { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';\nimport type { AudioBuffer } from '../utils.js';\nimport { AsyncIterableQueue, startSoon, toError } from '../utils.js';\n\n/** Indicates start/middle/end of speech */\nexport enum SpeechEventType {\n /**\n * Indicate the start of speech.\n * If the STT doesn't support this event, this will be emitted at the same time\n * as the first INTERIM_TRANSCRIPT.\n */\n START_OF_SPEECH = 0,\n /**\n * Interim transcript, useful for real-time transcription.\n */\n INTERIM_TRANSCRIPT = 1,\n /**\n * Final transcript, emitted when the STT is confident enough that a certain\n * portion of the speech will not change.\n */\n FINAL_TRANSCRIPT = 2,\n /**\n * Indicate the end of speech, emitted when the user stops speaking.\n * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.\n */\n END_OF_SPEECH = 3,\n /** Usage event, emitted periodically to indicate usage metrics. */\n RECOGNITION_USAGE = 4,\n}\n\n/** SpeechData contains metadata about this {@link SpeechEvent}. */\nexport interface SpeechData {\n language: string;\n text: string;\n startTime: number;\n endTime: number;\n confidence: number;\n}\n\nexport interface RecognitionUsage {\n audioDuration: number;\n}\n\n/** SpeechEvent is a packet of speech-to-text data. */\nexport interface SpeechEvent {\n type: SpeechEventType;\n alternatives?: [SpeechData, ...SpeechData[]];\n requestId?: string;\n recognitionUsage?: RecognitionUsage;\n}\n\n/**\n * Describes the capabilities of the STT provider.\n *\n * @remarks\n * At present, the framework only supports providers that have a streaming endpoint.\n */\nexport interface STTCapabilities {\n streaming: boolean;\n interimResults: boolean;\n}\n\nexport interface STTError {\n type: 'stt_error';\n timestamp: number;\n label: string;\n error: Error;\n recoverable: boolean;\n}\n\nexport type STTCallbacks = {\n ['metrics_collected']: (metrics: STTMetrics) => void;\n ['error']: (error: STTError) => void;\n};\n\n/**\n * An instance of a speech-to-text adapter.\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child STT class, which inherits this class's methods.\n */\nexport abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCallbacks>) {\n abstract label: string;\n #capabilities: STTCapabilities;\n\n constructor(capabilities: STTCapabilities) {\n super();\n this.#capabilities = capabilities;\n }\n\n /** Returns this STT's capabilities */\n get capabilities(): STTCapabilities {\n return this.#capabilities;\n }\n\n /** Receives an audio buffer and returns transcription in the form of a {@link SpeechEvent} */\n async recognize(frame: AudioBuffer): Promise<SpeechEvent> {\n const startTime = process.hrtime.bigint();\n const event = await this._recognize(frame);\n const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));\n this.emit('metrics_collected', {\n type: 'stt_metrics',\n requestId: event.requestId ?? '',\n timestamp: Date.now(),\n duration,\n label: this.label,\n audioDuration: calculateAudioDuration(frame),\n streamed: false,\n });\n return event;\n }\n protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;\n\n /**\n * Returns a {@link SpeechStream} that can be used to push audio frames and receive\n * transcriptions\n */\n abstract stream(): SpeechStream;\n}\n\n/**\n * An instance of a speech-to-text stream, as an asynchronous iterable iterator.\n *\n * @example Looping through frames\n * ```ts\n * for await (const event of stream) {\n * if (event.type === SpeechEventType.FINAL_TRANSCRIPT) {\n * console.log(event.alternatives[0].text)\n * }\n * }\n * ```\n *\n * @remarks\n * This class is abstract, and as such cannot be used directly. Instead, use a provider plugin that\n * exports its own child SpeechStream class, which inherits this class's methods.\n */\nexport abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent> {\n protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL');\n protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();\n protected output = new AsyncIterableQueue<SpeechEvent>();\n protected queue = new AsyncIterableQueue<SpeechEvent>();\n protected neededSampleRate?: number;\n protected resampler?: AudioResampler;\n abstract label: string;\n protected closed = false;\n #stt: STT;\n private deferredInputStream: DeferredReadableStream<AudioFrame>;\n private logger = log();\n private _connOptions: APIConnectOptions;\n\n constructor(\n stt: STT,\n sampleRate?: number,\n connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,\n ) {\n this.#stt = stt;\n this._connOptions = connectionOptions;\n this.deferredInputStream = new DeferredReadableStream<AudioFrame>();\n this.neededSampleRate = sampleRate;\n this.monitorMetrics();\n this.pumpInput();\n\n // this is a hack to immitate asyncio.create_task so that mainTask\n // is run **after** the constructor has finished. Otherwise we get\n // runtime error when trying to access class variables in the\n // `run` method.\n startSoon(() => this.mainTask().then(() => this.queue.close()));\n }\n\n private async mainTask() {\n for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {\n try {\n return await this.run();\n } catch (error) {\n if (error instanceof APIError) {\n const retryInterval = this._connOptions._intervalForRetry(i);\n\n if (this._connOptions.maxRetry === 0 || !error.retryable) {\n this.emitError({ error, recoverable: false });\n throw error;\n } else if (i === this._connOptions.maxRetry) {\n this.emitError({ error, recoverable: false });\n throw new APIConnectionError({\n message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,\n options: { retryable: false },\n });\n } else {\n this.emitError({ error, recoverable: true });\n this.logger.warn(\n { tts: this.#stt.label, attempt: i + 1, error },\n `failed to recognize speech, retrying in ${retryInterval}s`,\n );\n }\n\n if (retryInterval > 0) {\n await delay(retryInterval);\n }\n } else {\n this.emitError({ error: toError(error), recoverable: false });\n throw error;\n }\n }\n }\n }\n\n private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {\n this.#stt.emit('error', {\n type: 'stt_error',\n timestamp: Date.now(),\n label: this.#stt.label,\n error,\n recoverable,\n });\n }\n\n protected async pumpInput() {\n // TODO(AJS-35): Implement STT with webstreams API\n const inputStream = this.deferredInputStream.stream;\n const reader = inputStream.getReader();\n\n try {\n while (true) {\n const { done, value } = await reader.read();\n if (done) break;\n this.pushFrame(value);\n }\n } catch (error) {\n this.logger.error('Error in STTStream mainTask:', error);\n } finally {\n reader.releaseLock();\n }\n }\n\n protected async monitorMetrics() {\n for await (const event of this.queue) {\n this.output.put(event);\n if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;\n const metrics: STTMetrics = {\n type: 'stt_metrics',\n timestamp: Date.now(),\n requestId: event.requestId!,\n duration: 0,\n label: this.#stt.label,\n audioDuration: event.recognitionUsage!.audioDuration,\n streamed: true,\n };\n this.#stt.emit('metrics_collected', metrics);\n }\n this.output.close();\n }\n\n protected abstract run(): Promise<void>;\n\n updateInputStream(audioStream: ReadableStream<AudioFrame>) {\n this.deferredInputStream.setSource(audioStream);\n }\n\n detachInputStream() {\n this.deferredInputStream.detachSource();\n }\n\n /** Push an audio frame to the STT */\n pushFrame(frame: AudioFrame) {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n\n if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {\n if (!this.resampler) {\n this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);\n }\n }\n\n if (this.resampler) {\n const frames = this.resampler.push(frame);\n for (const frame of frames) {\n this.input.put(frame);\n }\n } else {\n this.input.put(frame);\n }\n }\n\n /** Flush the STT, causing it to process all pending text */\n flush() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.put(SpeechStream.FLUSH_SENTINEL);\n }\n\n /** Mark the input as ended and forbid additional pushes */\n endInput() {\n if (this.input.closed) {\n throw new Error('Input is closed');\n }\n if (this.closed) {\n throw new Error('Stream is closed');\n }\n this.input.close();\n }\n\n next(): Promise<IteratorResult<SpeechEvent>> {\n return this.output.next();\n }\n\n /** Close both the input and output of the STT stream */\n close() {\n this.input.close();\n this.queue.close();\n this.output.close();\n this.closed = true;\n }\n\n [Symbol.asyncIterator](): SpeechStream {\n return this;\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAGA,sBAAgD;AAEhD,mBAAsB;AACtB,yBAA6B;AAE7B,wBAA6C;AAC7C,mBAAuC;AACvC,iBAAoB;AAEpB,6BAAuC;AACvC,mBAAoE;AAEpE,mBAAuD;AAGhD,IAAK,kBAAL,kBAAKA,qBAAL;AAML,EAAAA,kCAAA,qBAAkB,KAAlB;AAIA,EAAAA,kCAAA,wBAAqB,KAArB;AAKA,EAAAA,kCAAA,sBAAmB,KAAnB;AAKA,EAAAA,kCAAA,mBAAgB,KAAhB;AAEA,EAAAA,kCAAA,uBAAoB,KAApB;AAtBU,SAAAA;AAAA,GAAA;AA6EL,MAAe,YAAa,gCAAsD;AAAA,EAEvF;AAAA,EAEA,YAAY,cAA+B;AACzC,UAAM;AACN,SAAK,gBAAgB;AAAA,EACvB;AAAA;AAAA,EAGA,IAAI,eAAgC;AAClC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA,EAGA,MAAM,UAAU,OAA0C;AACxD,UAAM,YAAY,QAAQ,OAAO,OAAO;AACxC,UAAM,QAAQ,MAAM,KAAK,WAAW,KAAK;AACzC,UAAM,WAAW,QAAQ,QAAQ,OAAO,OAAO,IAAI,aAAa,OAAO,GAAO,CAAC;AAC/E,SAAK,KAAK,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,WAAW,MAAM,aAAa;AAAA,MAC9B,WAAW,KAAK,IAAI;AAAA,MACpB;AAAA,MACA,OAAO,KAAK;AAAA,MACZ,mBAAe,qCAAuB,KAAK;AAAA,MAC3C,UAAU;AAAA,IACZ,CAAC;AACD,WAAO;AAAA,EACT;AAQF;AAkBO,MAAe,aAA2D;AAAA,EAC/E,OAA0B,iBAAiB,OAAO,gBAAgB;AAAA,EACxD,QAAQ,IAAI,gCAAoE;AAAA,EAChF,SAAS,IAAI,gCAAgC;AAAA,EAC7C,QAAQ,IAAI,gCAAgC;AAAA,EAC5C;AAAA,EACA;AAAA,EAEA,SAAS;AAAA,EACnB;AAAA,EACQ;AAAA,EACA,aAAS,gBAAI;AAAA,EACb;AAAA,EAER,YACE,KACA,YACA,oBAAuC,0CACvC;AACA,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,sBAAsB,IAAI,8CAAmC;AAClE,SAAK,mBAAmB;AACxB,SAAK,eAAe;AACpB,SAAK,UAAU;AAMf,gCAAU,MAAM,KAAK,SAAS,EAAE,KAAK,MAAM,KAAK,MAAM,MAAM,CAAC,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,WAAW;AACvB,aAAS,IAAI,GAAG,IAAI,KAAK,aAAa,WAAW,GAAG,KAAK;AACvD,UAAI;AACF,eAAO,MAAM,KAAK,IAAI;AAAA,MACxB,SAAS,OAAO;AACd,YAAI,iBAAiB,4BAAU;AAC7B,gBAAM,gBAAgB,KAAK,aAAa,kBAAkB,CAAC;AAE3D,cAAI,KAAK,aAAa,aAAa,KAAK,CAAC,MAAM,WAAW;AACxD,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM;AAAA,UACR,WAAW,MAAM,KAAK,aAAa,UAAU;AAC3C,iBAAK,UAAU,EAAE,OAAO,aAAa,MAAM,CAAC;AAC5C,kBAAM,IAAI,qCAAmB;AAAA,cAC3B,SAAS,oCAAoC,KAAK,aAAa,WAAW,CAAC;AAAA,cAC3E,SAAS,EAAE,WAAW,MAAM;AAAA,YAC9B,CAAC;AAAA,UACH,OAAO;AACL,iBAAK,UAAU,EAAE,OAAO,aAAa,KAAK,CAAC;AAC3C,iBAAK,OAAO;AAAA,cACV,EAAE,KAAK,KAAK,KAAK,OAAO,SAAS,IAAI,GAAG,MAAM;AAAA,cAC9C,2CAA2C,aAAa;AAAA,YAC1D;AAAA,UACF;AAEA,cAAI,gBAAgB,GAAG;AACrB,sBAAM,oBAAM,aAAa;AAAA,UAC3B;AAAA,QACF,OAAO;AACL,eAAK,UAAU,EAAE,WAAO,sBAAQ,KAAK,GAAG,aAAa,MAAM,CAAC;AAC5D,gBAAM;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,UAAU,EAAE,OAAO,YAAY,GAA2C;AAChF,SAAK,KAAK,KAAK,SAAS;AAAA,MACtB,MAAM;AAAA,MACN,WAAW,KAAK,IAAI;AAAA,MACpB,OAAO,KAAK,KAAK;AAAA,MACjB;AAAA,MACA;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAgB,YAAY;AAE1B,UAAM,cAAc,KAAK,oBAAoB;AAC7C,UAAM,SAAS,YAAY,UAAU;AAErC,QAAI;AACF,aAAO,MAAM;AACX,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AACV,aAAK,UAAU,KAAK;AAAA,MACtB;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,gCAAgC,KAAK;AAAA,IACzD,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAAA,EACF;AAAA,EAEA,MAAgB,iBAAiB;AAC/B,qBAAiB,SAAS,KAAK,OAAO;AACpC,WAAK,OAAO,IAAI,KAAK;AACrB,UAAI,MAAM,SAAS,0BAAmC;AACtD,YAAM,UAAsB;AAAA,QAC1B,MAAM;AAAA,QACN,WAAW,KAAK,IAAI;AAAA,QACpB,WAAW,MAAM;AAAA,QACjB,UAAU;AAAA,QACV,OAAO,KAAK,KAAK;AAAA,QACjB,eAAe,MAAM,iBAAkB;AAAA,QACvC,UAAU;AAAA,MACZ;AACA,WAAK,KAAK,KAAK,qBAAqB,OAAO;AAAA,IAC7C;AACA,SAAK,OAAO,MAAM;AAAA,EACpB;AAAA,EAIA,kBAAkB,aAAyC;AACzD,SAAK,oBAAoB,UAAU,WAAW;AAAA,EAChD;AAAA,EAEA,oBAAoB;AAClB,SAAK,oBAAoB,aAAa;AAAA,EACxC;AAAA;AAAA,EAGA,UAAU,OAAmB;AAC3B,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AAEA,QAAI,KAAK,oBAAoB,MAAM,eAAe,KAAK,kBAAkB;AACvE,UAAI,CAAC,KAAK,WAAW;AACnB,aAAK,YAAY,IAAI,+BAAe,MAAM,YAAY,KAAK,gBAAgB;AAAA,MAC7E;AAAA,IACF;AAEA,QAAI,KAAK,WAAW;AAClB,YAAM,SAAS,KAAK,UAAU,KAAK,KAAK;AACxC,iBAAWC,UAAS,QAAQ;AAC1B,aAAK,MAAM,IAAIA,MAAK;AAAA,MACtB;AAAA,IACF,OAAO;AACL,WAAK,MAAM,IAAI,KAAK;AAAA,IACtB;AAAA,EACF;AAAA;AAAA,EAGA,QAAQ;AACN,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,IAAI,aAAa,cAAc;AAAA,EAC5C;AAAA;AAAA,EAGA,WAAW;AACT,QAAI,KAAK,MAAM,QAAQ;AACrB,YAAM,IAAI,MAAM,iBAAiB;AAAA,IACnC;AACA,QAAI,KAAK,QAAQ;AACf,YAAM,IAAI,MAAM,kBAAkB;AAAA,IACpC;AACA,SAAK,MAAM,MAAM;AAAA,EACnB;AAAA,EAEA,OAA6C;AAC3C,WAAO,KAAK,OAAO,KAAK;AAAA,EAC1B;AAAA;AAAA,EAGA,QAAQ;AACN,SAAK,MAAM,MAAM;AACjB,SAAK,MAAM,MAAM;AACjB,SAAK,OAAO,MAAM;AAClB,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,CAAC,OAAO,aAAa,IAAkB;AACrC,WAAO;AAAA,EACT;AACF;","names":["SpeechEventType","frame"]}
@@ -1,6 +1,9 @@
1
- import type { AudioFrame } from '@livekit/rtc-node';
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { type AudioFrame, AudioResampler } from '@livekit/rtc-node';
2
3
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
4
+ import type { ReadableStream } from 'node:stream/web';
3
5
  import type { STTMetrics } from '../metrics/base.js';
6
+ import { type APIConnectOptions } from '../types.js';
4
7
  import type { AudioBuffer } from '../utils.js';
5
8
  import { AsyncIterableQueue } from '../utils.js';
6
9
  /** Indicates start/middle/end of speech */
@@ -26,8 +29,7 @@ export declare enum SpeechEventType {
26
29
  */
27
30
  END_OF_SPEECH = 3,
28
31
  /** Usage event, emitted periodically to indicate usage metrics. */
29
- RECOGNITION_USAGE = 4,
30
- METRICS_COLLECTED = 5
32
+ RECOGNITION_USAGE = 4
31
33
  }
32
34
  /** SpeechData contains metadata about this {@link SpeechEvent}. */
33
35
  export interface SpeechData {
@@ -57,8 +59,16 @@ export interface STTCapabilities {
57
59
  streaming: boolean;
58
60
  interimResults: boolean;
59
61
  }
62
+ export interface STTError {
63
+ type: 'stt_error';
64
+ timestamp: number;
65
+ label: string;
66
+ error: Error;
67
+ recoverable: boolean;
68
+ }
60
69
  export type STTCallbacks = {
61
- [SpeechEventType.METRICS_COLLECTED]: (metrics: STTMetrics) => void;
70
+ ['metrics_collected']: (metrics: STTMetrics) => void;
71
+ ['error']: (error: STTError) => void;
62
72
  };
63
73
  declare const STT_base: new () => TypedEmitter<STTCallbacks>;
64
74
  /**
@@ -105,10 +115,21 @@ export declare abstract class SpeechStream implements AsyncIterableIterator<Spee
105
115
  protected input: AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>;
106
116
  protected output: AsyncIterableQueue<SpeechEvent>;
107
117
  protected queue: AsyncIterableQueue<SpeechEvent>;
118
+ protected neededSampleRate?: number;
119
+ protected resampler?: AudioResampler;
108
120
  abstract label: string;
109
121
  protected closed: boolean;
110
- constructor(stt: STT);
122
+ private deferredInputStream;
123
+ private logger;
124
+ private _connOptions;
125
+ constructor(stt: STT, sampleRate?: number, connectionOptions?: APIConnectOptions);
126
+ private mainTask;
127
+ private emitError;
128
+ protected pumpInput(): Promise<void>;
111
129
  protected monitorMetrics(): Promise<void>;
130
+ protected abstract run(): Promise<void>;
131
+ updateInputStream(audioStream: ReadableStream<AudioFrame>): void;
132
+ detachInputStream(): void;
112
133
  /** Push an audio frame to the STT */
113
134
  pushFrame(frame: AudioFrame): void;
114
135
  /** Flush the STT, causing it to process all pending text */