@livekit/agents 0.7.9 → 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (627) hide show
  1. package/dist/_exceptions.cjs +109 -0
  2. package/dist/_exceptions.cjs.map +1 -0
  3. package/dist/_exceptions.d.cts +64 -0
  4. package/dist/_exceptions.d.ts +64 -0
  5. package/dist/_exceptions.d.ts.map +1 -0
  6. package/dist/_exceptions.js +80 -0
  7. package/dist/_exceptions.js.map +1 -0
  8. package/dist/audio.cjs +10 -3
  9. package/dist/audio.cjs.map +1 -1
  10. package/dist/audio.d.cts +2 -0
  11. package/dist/audio.d.ts +2 -0
  12. package/dist/audio.d.ts.map +1 -1
  13. package/dist/audio.js +8 -2
  14. package/dist/audio.js.map +1 -1
  15. package/dist/cli.cjs +25 -0
  16. package/dist/cli.cjs.map +1 -1
  17. package/dist/cli.d.ts.map +1 -1
  18. package/dist/cli.js +25 -0
  19. package/dist/cli.js.map +1 -1
  20. package/dist/constants.cjs +6 -3
  21. package/dist/constants.cjs.map +1 -1
  22. package/dist/constants.d.cts +2 -1
  23. package/dist/constants.d.ts +2 -1
  24. package/dist/constants.d.ts.map +1 -1
  25. package/dist/constants.js +4 -2
  26. package/dist/constants.js.map +1 -1
  27. package/dist/http_server.cjs.map +1 -1
  28. package/dist/http_server.d.cts +1 -0
  29. package/dist/http_server.d.ts +1 -0
  30. package/dist/http_server.d.ts.map +1 -1
  31. package/dist/http_server.js.map +1 -1
  32. package/dist/index.cjs +27 -20
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +13 -10
  35. package/dist/index.d.ts +13 -10
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +15 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/inference_runner.cjs +0 -1
  40. package/dist/inference_runner.cjs.map +1 -1
  41. package/dist/inference_runner.d.cts +2 -3
  42. package/dist/inference_runner.d.ts +2 -3
  43. package/dist/inference_runner.d.ts.map +1 -1
  44. package/dist/inference_runner.js +0 -1
  45. package/dist/inference_runner.js.map +1 -1
  46. package/dist/ipc/inference_proc_executor.cjs +2 -2
  47. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  48. package/dist/ipc/inference_proc_executor.js +2 -2
  49. package/dist/ipc/inference_proc_executor.js.map +1 -1
  50. package/dist/ipc/job_executor.cjs.map +1 -1
  51. package/dist/ipc/job_executor.js.map +1 -1
  52. package/dist/ipc/job_proc_executor.cjs +1 -0
  53. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  54. package/dist/ipc/job_proc_executor.js +1 -0
  55. package/dist/ipc/job_proc_executor.js.map +1 -1
  56. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  57. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  58. package/dist/ipc/job_proc_lazy_main.js +1 -1
  59. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  60. package/dist/ipc/supervised_proc.d.cts +1 -1
  61. package/dist/ipc/supervised_proc.d.ts +1 -1
  62. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  63. package/dist/job.cjs +14 -2
  64. package/dist/job.cjs.map +1 -1
  65. package/dist/job.d.cts +8 -0
  66. package/dist/job.d.ts +8 -0
  67. package/dist/job.d.ts.map +1 -1
  68. package/dist/job.js +12 -1
  69. package/dist/job.js.map +1 -1
  70. package/dist/llm/chat_context.cjs +332 -82
  71. package/dist/llm/chat_context.cjs.map +1 -1
  72. package/dist/llm/chat_context.d.cts +152 -48
  73. package/dist/llm/chat_context.d.ts +152 -48
  74. package/dist/llm/chat_context.d.ts.map +1 -1
  75. package/dist/llm/chat_context.js +327 -81
  76. package/dist/llm/chat_context.js.map +1 -1
  77. package/dist/llm/chat_context.test.cjs +380 -0
  78. package/dist/llm/chat_context.test.cjs.map +1 -0
  79. package/dist/llm/chat_context.test.js +385 -0
  80. package/dist/llm/chat_context.test.js.map +1 -0
  81. package/dist/llm/index.cjs +37 -8
  82. package/dist/llm/index.cjs.map +1 -1
  83. package/dist/llm/index.d.cts +7 -3
  84. package/dist/llm/index.d.ts +7 -3
  85. package/dist/llm/index.d.ts.map +1 -1
  86. package/dist/llm/index.js +39 -9
  87. package/dist/llm/index.js.map +1 -1
  88. package/dist/llm/llm.cjs +97 -33
  89. package/dist/llm/llm.cjs.map +1 -1
  90. package/dist/llm/llm.d.cts +50 -24
  91. package/dist/llm/llm.d.ts +50 -24
  92. package/dist/llm/llm.d.ts.map +1 -1
  93. package/dist/llm/llm.js +98 -33
  94. package/dist/llm/llm.js.map +1 -1
  95. package/dist/llm/provider_format/google.cjs +128 -0
  96. package/dist/llm/provider_format/google.cjs.map +1 -0
  97. package/dist/llm/provider_format/google.d.cts +6 -0
  98. package/dist/llm/provider_format/google.d.ts +6 -0
  99. package/dist/llm/provider_format/google.d.ts.map +1 -0
  100. package/dist/llm/provider_format/google.js +104 -0
  101. package/dist/llm/provider_format/google.js.map +1 -0
  102. package/dist/llm/provider_format/google.test.cjs +676 -0
  103. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  104. package/dist/llm/provider_format/google.test.js +675 -0
  105. package/dist/llm/provider_format/google.test.js.map +1 -0
  106. package/dist/llm/provider_format/index.cjs +40 -0
  107. package/dist/llm/provider_format/index.cjs.map +1 -0
  108. package/dist/llm/provider_format/index.d.cts +4 -0
  109. package/dist/llm/provider_format/index.d.ts +4 -0
  110. package/dist/llm/provider_format/index.d.ts.map +1 -0
  111. package/dist/llm/provider_format/index.js +16 -0
  112. package/dist/llm/provider_format/index.js.map +1 -0
  113. package/dist/llm/provider_format/openai.cjs +116 -0
  114. package/dist/llm/provider_format/openai.cjs.map +1 -0
  115. package/dist/llm/provider_format/openai.d.cts +3 -0
  116. package/dist/llm/provider_format/openai.d.ts +3 -0
  117. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  118. package/dist/llm/provider_format/openai.js +92 -0
  119. package/dist/llm/provider_format/openai.js.map +1 -0
  120. package/dist/llm/provider_format/openai.test.cjs +490 -0
  121. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  122. package/dist/llm/provider_format/openai.test.js +489 -0
  123. package/dist/llm/provider_format/openai.test.js.map +1 -0
  124. package/dist/llm/provider_format/utils.cjs +146 -0
  125. package/dist/llm/provider_format/utils.cjs.map +1 -0
  126. package/dist/llm/provider_format/utils.d.cts +38 -0
  127. package/dist/llm/provider_format/utils.d.ts +38 -0
  128. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  129. package/dist/llm/provider_format/utils.js +122 -0
  130. package/dist/llm/provider_format/utils.js.map +1 -0
  131. package/dist/llm/realtime.cjs +77 -0
  132. package/dist/llm/realtime.cjs.map +1 -0
  133. package/dist/llm/realtime.d.cts +98 -0
  134. package/dist/llm/realtime.d.ts +98 -0
  135. package/dist/llm/realtime.d.ts.map +1 -0
  136. package/dist/llm/realtime.js +52 -0
  137. package/dist/llm/realtime.js.map +1 -0
  138. package/dist/llm/remote_chat_context.cjs +112 -0
  139. package/dist/llm/remote_chat_context.cjs.map +1 -0
  140. package/dist/llm/remote_chat_context.d.cts +23 -0
  141. package/dist/llm/remote_chat_context.d.ts +23 -0
  142. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  143. package/dist/llm/remote_chat_context.js +88 -0
  144. package/dist/llm/remote_chat_context.js.map +1 -0
  145. package/dist/llm/remote_chat_context.test.cjs +225 -0
  146. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  147. package/dist/llm/remote_chat_context.test.js +224 -0
  148. package/dist/llm/remote_chat_context.test.js.map +1 -0
  149. package/dist/llm/tool_context.cjs +111 -0
  150. package/dist/llm/tool_context.cjs.map +1 -0
  151. package/dist/llm/tool_context.d.cts +125 -0
  152. package/dist/llm/tool_context.d.ts +125 -0
  153. package/dist/llm/tool_context.d.ts.map +1 -0
  154. package/dist/llm/tool_context.js +80 -0
  155. package/dist/llm/tool_context.js.map +1 -0
  156. package/dist/llm/tool_context.test.cjs +162 -0
  157. package/dist/llm/tool_context.test.cjs.map +1 -0
  158. package/dist/llm/tool_context.test.js +161 -0
  159. package/dist/llm/tool_context.test.js.map +1 -0
  160. package/dist/llm/tool_context.type.test.cjs +92 -0
  161. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  162. package/dist/llm/tool_context.type.test.js +91 -0
  163. package/dist/llm/tool_context.type.test.js.map +1 -0
  164. package/dist/llm/utils.cjs +260 -0
  165. package/dist/llm/utils.cjs.map +1 -0
  166. package/dist/llm/utils.d.cts +42 -0
  167. package/dist/llm/utils.d.ts +42 -0
  168. package/dist/llm/utils.d.ts.map +1 -0
  169. package/dist/llm/utils.js +223 -0
  170. package/dist/llm/utils.js.map +1 -0
  171. package/dist/llm/utils.test.cjs +513 -0
  172. package/dist/llm/utils.test.cjs.map +1 -0
  173. package/dist/llm/utils.test.js +490 -0
  174. package/dist/llm/utils.test.js.map +1 -0
  175. package/dist/metrics/base.cjs +0 -27
  176. package/dist/metrics/base.cjs.map +1 -1
  177. package/dist/metrics/base.d.cts +105 -63
  178. package/dist/metrics/base.d.ts +105 -63
  179. package/dist/metrics/base.d.ts.map +1 -1
  180. package/dist/metrics/base.js +0 -19
  181. package/dist/metrics/base.js.map +1 -1
  182. package/dist/metrics/index.cjs +0 -3
  183. package/dist/metrics/index.cjs.map +1 -1
  184. package/dist/metrics/index.d.cts +2 -3
  185. package/dist/metrics/index.d.ts +2 -3
  186. package/dist/metrics/index.d.ts.map +1 -1
  187. package/dist/metrics/index.js +0 -2
  188. package/dist/metrics/index.js.map +1 -1
  189. package/dist/metrics/usage_collector.cjs +17 -12
  190. package/dist/metrics/usage_collector.cjs.map +1 -1
  191. package/dist/metrics/usage_collector.d.cts +3 -2
  192. package/dist/metrics/usage_collector.d.ts +3 -2
  193. package/dist/metrics/usage_collector.d.ts.map +1 -1
  194. package/dist/metrics/usage_collector.js +17 -12
  195. package/dist/metrics/usage_collector.js.map +1 -1
  196. package/dist/metrics/utils.cjs +22 -59
  197. package/dist/metrics/utils.cjs.map +1 -1
  198. package/dist/metrics/utils.d.cts +1 -8
  199. package/dist/metrics/utils.d.ts +1 -8
  200. package/dist/metrics/utils.d.ts.map +1 -1
  201. package/dist/metrics/utils.js +22 -52
  202. package/dist/metrics/utils.js.map +1 -1
  203. package/dist/multimodal/index.cjs +0 -2
  204. package/dist/multimodal/index.cjs.map +1 -1
  205. package/dist/multimodal/index.d.cts +0 -1
  206. package/dist/multimodal/index.d.ts +0 -1
  207. package/dist/multimodal/index.d.ts.map +1 -1
  208. package/dist/multimodal/index.js +0 -1
  209. package/dist/multimodal/index.js.map +1 -1
  210. package/dist/plugin.cjs +24 -8
  211. package/dist/plugin.cjs.map +1 -1
  212. package/dist/plugin.d.cts +18 -4
  213. package/dist/plugin.d.ts +18 -4
  214. package/dist/plugin.d.ts.map +1 -1
  215. package/dist/plugin.js +22 -7
  216. package/dist/plugin.js.map +1 -1
  217. package/dist/stream/deferred_stream.cjs +98 -0
  218. package/dist/stream/deferred_stream.cjs.map +1 -0
  219. package/dist/stream/deferred_stream.d.cts +27 -0
  220. package/dist/stream/deferred_stream.d.ts +27 -0
  221. package/dist/stream/deferred_stream.d.ts.map +1 -0
  222. package/dist/stream/deferred_stream.js +73 -0
  223. package/dist/stream/deferred_stream.js.map +1 -0
  224. package/dist/stream/deferred_stream.test.cjs +527 -0
  225. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  226. package/dist/stream/deferred_stream.test.js +526 -0
  227. package/dist/stream/deferred_stream.test.js.map +1 -0
  228. package/dist/stream/identity_transform.cjs +42 -0
  229. package/dist/stream/identity_transform.cjs.map +1 -0
  230. package/dist/stream/identity_transform.d.cts +6 -0
  231. package/dist/stream/identity_transform.d.ts +6 -0
  232. package/dist/stream/identity_transform.d.ts.map +1 -0
  233. package/dist/stream/identity_transform.js +18 -0
  234. package/dist/stream/identity_transform.js.map +1 -0
  235. package/dist/stream/identity_transform.test.cjs +125 -0
  236. package/dist/stream/identity_transform.test.cjs.map +1 -0
  237. package/dist/stream/identity_transform.test.js +124 -0
  238. package/dist/stream/identity_transform.test.js.map +1 -0
  239. package/dist/stream/index.cjs +38 -0
  240. package/dist/stream/index.cjs.map +1 -0
  241. package/dist/stream/index.d.cts +5 -0
  242. package/dist/stream/index.d.ts +5 -0
  243. package/dist/stream/index.d.ts.map +1 -0
  244. package/dist/stream/index.js +11 -0
  245. package/dist/stream/index.js.map +1 -0
  246. package/dist/stream/merge_readable_streams.cjs +59 -0
  247. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  248. package/dist/stream/merge_readable_streams.d.cts +4 -0
  249. package/dist/stream/merge_readable_streams.d.ts +4 -0
  250. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  251. package/dist/stream/merge_readable_streams.js +35 -0
  252. package/dist/stream/merge_readable_streams.js.map +1 -0
  253. package/dist/stream/stream_channel.cjs +47 -0
  254. package/dist/stream/stream_channel.cjs.map +1 -0
  255. package/dist/stream/stream_channel.d.cts +9 -0
  256. package/dist/stream/stream_channel.d.ts +9 -0
  257. package/dist/stream/stream_channel.d.ts.map +1 -0
  258. package/dist/stream/stream_channel.js +23 -0
  259. package/dist/stream/stream_channel.js.map +1 -0
  260. package/dist/stream/stream_channel.test.cjs +97 -0
  261. package/dist/stream/stream_channel.test.cjs.map +1 -0
  262. package/dist/stream/stream_channel.test.js +96 -0
  263. package/dist/stream/stream_channel.test.js.map +1 -0
  264. package/dist/stt/stream_adapter.cjs +3 -4
  265. package/dist/stt/stream_adapter.cjs.map +1 -1
  266. package/dist/stt/stream_adapter.d.cts +1 -0
  267. package/dist/stt/stream_adapter.d.ts +1 -0
  268. package/dist/stt/stream_adapter.d.ts.map +1 -1
  269. package/dist/stt/stream_adapter.js +3 -4
  270. package/dist/stt/stream_adapter.js.map +1 -1
  271. package/dist/stt/stt.cjs +100 -10
  272. package/dist/stt/stt.cjs.map +1 -1
  273. package/dist/stt/stt.d.cts +26 -5
  274. package/dist/stt/stt.d.ts +26 -5
  275. package/dist/stt/stt.d.ts.map +1 -1
  276. package/dist/stt/stt.js +101 -11
  277. package/dist/stt/stt.js.map +1 -1
  278. package/dist/tokenize/basic/basic.cjs +10 -5
  279. package/dist/tokenize/basic/basic.cjs.map +1 -1
  280. package/dist/tokenize/basic/basic.d.cts +7 -1
  281. package/dist/tokenize/basic/basic.d.ts +7 -1
  282. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  283. package/dist/tokenize/basic/basic.js +10 -5
  284. package/dist/tokenize/basic/basic.js.map +1 -1
  285. package/dist/tokenize/basic/sentence.cjs +14 -6
  286. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  287. package/dist/tokenize/basic/sentence.d.cts +1 -1
  288. package/dist/tokenize/basic/sentence.d.ts +1 -1
  289. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  290. package/dist/tokenize/basic/sentence.js +14 -6
  291. package/dist/tokenize/basic/sentence.js.map +1 -1
  292. package/dist/tokenize/token_stream.cjs +5 -3
  293. package/dist/tokenize/token_stream.cjs.map +1 -1
  294. package/dist/tokenize/token_stream.d.cts +1 -0
  295. package/dist/tokenize/token_stream.d.ts +1 -0
  296. package/dist/tokenize/token_stream.d.ts.map +1 -1
  297. package/dist/tokenize/token_stream.js +6 -4
  298. package/dist/tokenize/token_stream.js.map +1 -1
  299. package/dist/transcription.cjs +1 -2
  300. package/dist/transcription.cjs.map +1 -1
  301. package/dist/transcription.d.ts.map +1 -1
  302. package/dist/transcription.js +2 -3
  303. package/dist/transcription.js.map +1 -1
  304. package/dist/tts/index.cjs +2 -4
  305. package/dist/tts/index.cjs.map +1 -1
  306. package/dist/tts/index.d.cts +1 -1
  307. package/dist/tts/index.d.ts +1 -1
  308. package/dist/tts/index.d.ts.map +1 -1
  309. package/dist/tts/index.js +1 -3
  310. package/dist/tts/index.js.map +1 -1
  311. package/dist/tts/stream_adapter.cjs +26 -13
  312. package/dist/tts/stream_adapter.cjs.map +1 -1
  313. package/dist/tts/stream_adapter.d.cts +1 -1
  314. package/dist/tts/stream_adapter.d.ts +1 -1
  315. package/dist/tts/stream_adapter.d.ts.map +1 -1
  316. package/dist/tts/stream_adapter.js +27 -14
  317. package/dist/tts/stream_adapter.js.map +1 -1
  318. package/dist/tts/tts.cjs +156 -25
  319. package/dist/tts/tts.cjs.map +1 -1
  320. package/dist/tts/tts.d.cts +29 -5
  321. package/dist/tts/tts.d.ts +29 -5
  322. package/dist/tts/tts.d.ts.map +1 -1
  323. package/dist/tts/tts.js +156 -24
  324. package/dist/tts/tts.js.map +1 -1
  325. package/dist/types.cjs +60 -0
  326. package/dist/types.cjs.map +1 -0
  327. package/dist/types.d.cts +13 -0
  328. package/dist/types.d.ts +13 -0
  329. package/dist/types.d.ts.map +1 -0
  330. package/dist/types.js +35 -0
  331. package/dist/types.js.map +1 -0
  332. package/dist/utils.cjs +298 -27
  333. package/dist/utils.cjs.map +1 -1
  334. package/dist/utils.d.cts +145 -9
  335. package/dist/utils.d.ts +145 -9
  336. package/dist/utils.d.ts.map +1 -1
  337. package/dist/utils.js +281 -26
  338. package/dist/utils.js.map +1 -1
  339. package/dist/utils.test.cjs +491 -0
  340. package/dist/utils.test.cjs.map +1 -0
  341. package/dist/utils.test.js +498 -0
  342. package/dist/utils.test.js.map +1 -0
  343. package/dist/vad.cjs +76 -20
  344. package/dist/vad.cjs.map +1 -1
  345. package/dist/vad.d.cts +25 -5
  346. package/dist/vad.d.ts +25 -5
  347. package/dist/vad.d.ts.map +1 -1
  348. package/dist/vad.js +76 -20
  349. package/dist/vad.js.map +1 -1
  350. package/dist/voice/agent.cjs +245 -0
  351. package/dist/voice/agent.cjs.map +1 -0
  352. package/dist/voice/agent.d.cts +78 -0
  353. package/dist/voice/agent.d.ts +78 -0
  354. package/dist/voice/agent.d.ts.map +1 -0
  355. package/dist/voice/agent.js +220 -0
  356. package/dist/voice/agent.js.map +1 -0
  357. package/dist/voice/agent.test.cjs +61 -0
  358. package/dist/voice/agent.test.cjs.map +1 -0
  359. package/dist/voice/agent.test.js +60 -0
  360. package/dist/voice/agent.test.js.map +1 -0
  361. package/dist/voice/agent_activity.cjs +1453 -0
  362. package/dist/voice/agent_activity.cjs.map +1 -0
  363. package/dist/voice/agent_activity.d.cts +94 -0
  364. package/dist/voice/agent_activity.d.ts +94 -0
  365. package/dist/voice/agent_activity.d.ts.map +1 -0
  366. package/dist/voice/agent_activity.js +1449 -0
  367. package/dist/voice/agent_activity.js.map +1 -0
  368. package/dist/voice/agent_session.cjs +312 -0
  369. package/dist/voice/agent_session.cjs.map +1 -0
  370. package/dist/voice/agent_session.d.cts +121 -0
  371. package/dist/voice/agent_session.d.ts +121 -0
  372. package/dist/voice/agent_session.d.ts.map +1 -0
  373. package/dist/voice/agent_session.js +295 -0
  374. package/dist/voice/agent_session.js.map +1 -0
  375. package/dist/voice/audio_recognition.cjs +374 -0
  376. package/dist/voice/audio_recognition.cjs.map +1 -0
  377. package/dist/voice/audio_recognition.d.cts +80 -0
  378. package/dist/voice/audio_recognition.d.ts +80 -0
  379. package/dist/voice/audio_recognition.d.ts.map +1 -0
  380. package/dist/voice/audio_recognition.js +350 -0
  381. package/dist/voice/audio_recognition.js.map +1 -0
  382. package/dist/voice/events.cjs +145 -0
  383. package/dist/voice/events.cjs.map +1 -0
  384. package/dist/voice/events.d.cts +124 -0
  385. package/dist/voice/events.d.ts +124 -0
  386. package/dist/voice/events.d.ts.map +1 -0
  387. package/dist/voice/events.js +110 -0
  388. package/dist/voice/events.js.map +1 -0
  389. package/dist/voice/generation.cjs +700 -0
  390. package/dist/voice/generation.cjs.map +1 -0
  391. package/dist/voice/generation.d.cts +115 -0
  392. package/dist/voice/generation.d.ts +115 -0
  393. package/dist/voice/generation.d.ts.map +1 -0
  394. package/dist/voice/generation.js +672 -0
  395. package/dist/voice/generation.js.map +1 -0
  396. package/dist/voice/index.cjs +40 -0
  397. package/dist/voice/index.cjs.map +1 -0
  398. package/dist/voice/index.d.cts +5 -0
  399. package/dist/voice/index.d.ts +5 -0
  400. package/dist/voice/index.d.ts.map +1 -0
  401. package/dist/voice/index.js +11 -0
  402. package/dist/voice/index.js.map +1 -0
  403. package/dist/voice/io.cjs +245 -0
  404. package/dist/voice/io.cjs.map +1 -0
  405. package/dist/voice/io.d.cts +101 -0
  406. package/dist/voice/io.d.ts +101 -0
  407. package/dist/voice/io.d.ts.map +1 -0
  408. package/dist/voice/io.js +217 -0
  409. package/dist/voice/io.js.map +1 -0
  410. package/dist/voice/room_io/_input.cjs +121 -0
  411. package/dist/voice/room_io/_input.cjs.map +1 -0
  412. package/dist/voice/room_io/_input.d.cts +24 -0
  413. package/dist/voice/room_io/_input.d.ts +24 -0
  414. package/dist/voice/room_io/_input.d.ts.map +1 -0
  415. package/dist/voice/room_io/_input.js +102 -0
  416. package/dist/voice/room_io/_input.js.map +1 -0
  417. package/dist/voice/room_io/_output.cjs +358 -0
  418. package/dist/voice/room_io/_output.cjs.map +1 -0
  419. package/dist/voice/room_io/_output.d.cts +75 -0
  420. package/dist/voice/room_io/_output.d.ts +75 -0
  421. package/dist/voice/room_io/_output.d.ts.map +1 -0
  422. package/dist/voice/room_io/_output.js +342 -0
  423. package/dist/voice/room_io/_output.js.map +1 -0
  424. package/dist/voice/room_io/index.cjs +25 -0
  425. package/dist/voice/room_io/index.cjs.map +1 -0
  426. package/dist/voice/room_io/index.d.cts +3 -0
  427. package/dist/voice/room_io/index.d.ts +3 -0
  428. package/dist/voice/room_io/index.d.ts.map +1 -0
  429. package/dist/voice/room_io/index.js +3 -0
  430. package/dist/voice/room_io/index.js.map +1 -0
  431. package/dist/voice/room_io/room_io.cjs +370 -0
  432. package/dist/voice/room_io/room_io.cjs.map +1 -0
  433. package/dist/voice/room_io/room_io.d.cts +73 -0
  434. package/dist/voice/room_io/room_io.d.ts +73 -0
  435. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  436. package/dist/voice/room_io/room_io.js +361 -0
  437. package/dist/voice/room_io/room_io.js.map +1 -0
  438. package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
  439. package/dist/voice/run_context.cjs.map +1 -0
  440. package/dist/voice/run_context.d.cts +12 -0
  441. package/dist/voice/run_context.d.ts +12 -0
  442. package/dist/voice/run_context.d.ts.map +1 -0
  443. package/dist/voice/run_context.js +14 -0
  444. package/dist/voice/run_context.js.map +1 -0
  445. package/dist/voice/speech_handle.cjs +105 -0
  446. package/dist/voice/speech_handle.cjs.map +1 -0
  447. package/dist/voice/speech_handle.d.cts +46 -0
  448. package/dist/voice/speech_handle.d.ts +46 -0
  449. package/dist/voice/speech_handle.d.ts.map +1 -0
  450. package/dist/voice/speech_handle.js +81 -0
  451. package/dist/voice/speech_handle.js.map +1 -0
  452. package/dist/voice/transcription/_utils.cjs +45 -0
  453. package/dist/voice/transcription/_utils.cjs.map +1 -0
  454. package/dist/voice/transcription/_utils.d.cts +3 -0
  455. package/dist/voice/transcription/_utils.d.ts +3 -0
  456. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  457. package/dist/voice/transcription/_utils.js +21 -0
  458. package/dist/voice/transcription/_utils.js.map +1 -0
  459. package/dist/voice/transcription/index.cjs +23 -0
  460. package/dist/voice/transcription/index.cjs.map +1 -0
  461. package/dist/voice/transcription/index.d.cts +2 -0
  462. package/dist/voice/transcription/index.d.ts +2 -0
  463. package/dist/voice/transcription/index.d.ts.map +1 -0
  464. package/dist/voice/transcription/index.js +2 -0
  465. package/dist/voice/transcription/index.js.map +1 -0
  466. package/dist/voice/transcription/synchronizer.cjs +379 -0
  467. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  468. package/dist/voice/transcription/synchronizer.d.cts +86 -0
  469. package/dist/voice/transcription/synchronizer.d.ts +86 -0
  470. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  471. package/dist/voice/transcription/synchronizer.js +354 -0
  472. package/dist/voice/transcription/synchronizer.js.map +1 -0
  473. package/dist/worker.cjs +22 -4
  474. package/dist/worker.cjs.map +1 -1
  475. package/dist/worker.d.cts +1 -1
  476. package/dist/worker.d.ts +1 -1
  477. package/dist/worker.d.ts.map +1 -1
  478. package/dist/worker.js +22 -4
  479. package/dist/worker.js.map +1 -1
  480. package/package.json +8 -2
  481. package/src/_exceptions.ts +137 -0
  482. package/src/audio.ts +12 -1
  483. package/src/cli.ts +37 -0
  484. package/src/constants.ts +2 -1
  485. package/src/http_server.ts +1 -0
  486. package/src/index.ts +13 -10
  487. package/src/inference_runner.ts +2 -3
  488. package/src/ipc/inference_proc_executor.ts +2 -2
  489. package/src/ipc/job_executor.ts +1 -1
  490. package/src/ipc/job_proc_executor.ts +1 -1
  491. package/src/ipc/job_proc_lazy_main.ts +1 -1
  492. package/src/job.ts +18 -0
  493. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  494. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  495. package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
  496. package/src/llm/chat_context.test.ts +450 -0
  497. package/src/llm/chat_context.ts +501 -103
  498. package/src/llm/index.ts +53 -18
  499. package/src/llm/llm.ts +148 -50
  500. package/src/llm/provider_format/google.test.ts +772 -0
  501. package/src/llm/provider_format/google.ts +130 -0
  502. package/src/llm/provider_format/index.ts +23 -0
  503. package/src/llm/provider_format/openai.test.ts +581 -0
  504. package/src/llm/provider_format/openai.ts +118 -0
  505. package/src/llm/provider_format/utils.ts +183 -0
  506. package/src/llm/realtime.ts +151 -0
  507. package/src/llm/remote_chat_context.test.ts +290 -0
  508. package/src/llm/remote_chat_context.ts +114 -0
  509. package/src/llm/tool_context.test.ts +198 -0
  510. package/src/llm/tool_context.ts +259 -0
  511. package/src/llm/tool_context.type.test.ts +115 -0
  512. package/src/llm/utils.test.ts +670 -0
  513. package/src/llm/utils.ts +324 -0
  514. package/src/metrics/base.ts +110 -78
  515. package/src/metrics/index.ts +3 -9
  516. package/src/metrics/usage_collector.ts +19 -13
  517. package/src/metrics/utils.ts +24 -69
  518. package/src/multimodal/index.ts +0 -1
  519. package/src/plugin.ts +26 -8
  520. package/src/stream/deferred_stream.test.ts +755 -0
  521. package/src/stream/deferred_stream.ts +110 -0
  522. package/src/stream/identity_transform.test.ts +179 -0
  523. package/src/stream/identity_transform.ts +18 -0
  524. package/src/stream/index.ts +7 -0
  525. package/src/stream/merge_readable_streams.ts +40 -0
  526. package/src/stream/stream_channel.test.ts +129 -0
  527. package/src/stream/stream_channel.ts +32 -0
  528. package/src/stt/stream_adapter.ts +3 -5
  529. package/src/stt/stt.ts +134 -17
  530. package/src/tokenize/basic/basic.ts +13 -5
  531. package/src/tokenize/basic/sentence.ts +20 -6
  532. package/src/tokenize/token_stream.ts +7 -4
  533. package/src/transcription.ts +2 -3
  534. package/src/tts/index.ts +0 -1
  535. package/src/tts/stream_adapter.ts +42 -16
  536. package/src/tts/tts.ts +202 -21
  537. package/src/types.ts +42 -0
  538. package/src/utils.test.ts +658 -0
  539. package/src/utils.ts +402 -44
  540. package/src/vad.ts +90 -22
  541. package/src/voice/agent.test.ts +80 -0
  542. package/src/voice/agent.ts +332 -0
  543. package/src/voice/agent_activity.ts +1913 -0
  544. package/src/voice/agent_session.ts +460 -0
  545. package/src/voice/audio_recognition.ts +473 -0
  546. package/src/voice/events.ts +252 -0
  547. package/src/voice/generation.ts +881 -0
  548. package/src/voice/index.ts +7 -0
  549. package/src/voice/io.ts +304 -0
  550. package/src/voice/room_io/_input.ts +144 -0
  551. package/src/voice/room_io/_output.ts +436 -0
  552. package/src/voice/room_io/index.ts +5 -0
  553. package/src/voice/room_io/room_io.ts +495 -0
  554. package/src/voice/run_context.ts +20 -0
  555. package/src/voice/speech_handle.ts +104 -0
  556. package/src/voice/transcription/_utils.ts +25 -0
  557. package/src/voice/transcription/index.ts +4 -0
  558. package/src/voice/transcription/synchronizer.ts +477 -0
  559. package/src/worker.ts +22 -2
  560. package/dist/llm/function_context.cjs +0 -103
  561. package/dist/llm/function_context.cjs.map +0 -1
  562. package/dist/llm/function_context.d.cts +0 -47
  563. package/dist/llm/function_context.d.ts +0 -47
  564. package/dist/llm/function_context.d.ts.map +0 -1
  565. package/dist/llm/function_context.js +0 -78
  566. package/dist/llm/function_context.js.map +0 -1
  567. package/dist/llm/function_context.test.cjs +0 -218
  568. package/dist/llm/function_context.test.cjs.map +0 -1
  569. package/dist/llm/function_context.test.js +0 -217
  570. package/dist/llm/function_context.test.js.map +0 -1
  571. package/dist/multimodal/multimodal_agent.cjs +0 -486
  572. package/dist/multimodal/multimodal_agent.cjs.map +0 -1
  573. package/dist/multimodal/multimodal_agent.d.cts +0 -48
  574. package/dist/multimodal/multimodal_agent.d.ts +0 -48
  575. package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
  576. package/dist/multimodal/multimodal_agent.js +0 -461
  577. package/dist/multimodal/multimodal_agent.js.map +0 -1
  578. package/dist/pipeline/agent_output.cjs +0 -197
  579. package/dist/pipeline/agent_output.cjs.map +0 -1
  580. package/dist/pipeline/agent_output.d.cts +0 -33
  581. package/dist/pipeline/agent_output.d.ts +0 -33
  582. package/dist/pipeline/agent_output.d.ts.map +0 -1
  583. package/dist/pipeline/agent_output.js +0 -172
  584. package/dist/pipeline/agent_output.js.map +0 -1
  585. package/dist/pipeline/agent_playout.cjs +0 -175
  586. package/dist/pipeline/agent_playout.cjs.map +0 -1
  587. package/dist/pipeline/agent_playout.d.cts +0 -40
  588. package/dist/pipeline/agent_playout.d.ts +0 -40
  589. package/dist/pipeline/agent_playout.d.ts.map +0 -1
  590. package/dist/pipeline/agent_playout.js +0 -139
  591. package/dist/pipeline/agent_playout.js.map +0 -1
  592. package/dist/pipeline/human_input.cjs +0 -171
  593. package/dist/pipeline/human_input.cjs.map +0 -1
  594. package/dist/pipeline/human_input.d.cts +0 -30
  595. package/dist/pipeline/human_input.d.ts +0 -30
  596. package/dist/pipeline/human_input.d.ts.map +0 -1
  597. package/dist/pipeline/human_input.js +0 -146
  598. package/dist/pipeline/human_input.js.map +0 -1
  599. package/dist/pipeline/index.cjs.map +0 -1
  600. package/dist/pipeline/index.d.cts +0 -2
  601. package/dist/pipeline/index.d.ts +0 -2
  602. package/dist/pipeline/index.d.ts.map +0 -1
  603. package/dist/pipeline/index.js +0 -11
  604. package/dist/pipeline/index.js.map +0 -1
  605. package/dist/pipeline/pipeline_agent.cjs +0 -859
  606. package/dist/pipeline/pipeline_agent.cjs.map +0 -1
  607. package/dist/pipeline/pipeline_agent.d.cts +0 -150
  608. package/dist/pipeline/pipeline_agent.d.ts +0 -150
  609. package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
  610. package/dist/pipeline/pipeline_agent.js +0 -837
  611. package/dist/pipeline/pipeline_agent.js.map +0 -1
  612. package/dist/pipeline/speech_handle.cjs +0 -176
  613. package/dist/pipeline/speech_handle.cjs.map +0 -1
  614. package/dist/pipeline/speech_handle.d.cts +0 -37
  615. package/dist/pipeline/speech_handle.d.ts +0 -37
  616. package/dist/pipeline/speech_handle.d.ts.map +0 -1
  617. package/dist/pipeline/speech_handle.js +0 -152
  618. package/dist/pipeline/speech_handle.js.map +0 -1
  619. package/src/llm/function_context.test.ts +0 -248
  620. package/src/llm/function_context.ts +0 -142
  621. package/src/multimodal/multimodal_agent.ts +0 -592
  622. package/src/pipeline/agent_output.ts +0 -219
  623. package/src/pipeline/agent_playout.ts +0 -192
  624. package/src/pipeline/human_input.ts +0 -188
  625. package/src/pipeline/index.ts +0 -15
  626. package/src/pipeline/pipeline_agent.ts +0 -1197
  627. package/src/pipeline/speech_handle.ts +0 -201
@@ -1,40 +0,0 @@
1
- import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
2
- import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
- import type { TextAudioSynchronizer } from '../transcription.js';
4
- import { Future } from '../utils.js';
5
- import { SynthesisHandle } from './agent_output.js';
6
- export declare enum AgentPlayoutEvent {
7
- PLAYOUT_STARTED = 0,
8
- PLAYOUT_STOPPED = 1
9
- }
10
- export type AgentPlayoutCallbacks = {
11
- [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;
12
- [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;
13
- };
14
- export declare class PlayoutHandle {
15
- #private;
16
- playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
17
- totalPlayedTime?: number;
18
- synchronizer: TextAudioSynchronizer;
19
- pushedDuration: number;
20
- intFut: Future;
21
- doneFut: Future;
22
- constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer);
23
- get speechId(): string;
24
- get interrupted(): boolean;
25
- get timePlayed(): number;
26
- get done(): boolean;
27
- interrupt(): void;
28
- join(): Future;
29
- }
30
- declare const AgentPlayout_base: new () => TypedEmitter<AgentPlayoutCallbacks>;
31
- export declare class AgentPlayout extends AgentPlayout_base {
32
- #private;
33
- constructor(audioSource: AudioSource);
34
- get targetVolume(): number;
35
- set targetVolume(vol: number);
36
- play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer): PlayoutHandle;
37
- close(): Promise<void>;
38
- }
39
- export {};
40
- //# sourceMappingURL=agent_playout.d.ts.map
@@ -1,40 +0,0 @@
1
- import type { AudioFrame, AudioSource } from '@livekit/rtc-node';
2
- import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
- import type { TextAudioSynchronizer } from '../transcription.js';
4
- import { Future } from '../utils.js';
5
- import { SynthesisHandle } from './agent_output.js';
6
- export declare enum AgentPlayoutEvent {
7
- PLAYOUT_STARTED = 0,
8
- PLAYOUT_STOPPED = 1
9
- }
10
- export type AgentPlayoutCallbacks = {
11
- [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;
12
- [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;
13
- };
14
- export declare class PlayoutHandle {
15
- #private;
16
- playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;
17
- totalPlayedTime?: number;
18
- synchronizer: TextAudioSynchronizer;
19
- pushedDuration: number;
20
- intFut: Future;
21
- doneFut: Future;
22
- constructor(speechId: string, audioSource: AudioSource, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer);
23
- get speechId(): string;
24
- get interrupted(): boolean;
25
- get timePlayed(): number;
26
- get done(): boolean;
27
- interrupt(): void;
28
- join(): Future;
29
- }
30
- declare const AgentPlayout_base: new () => TypedEmitter<AgentPlayoutCallbacks>;
31
- export declare class AgentPlayout extends AgentPlayout_base {
32
- #private;
33
- constructor(audioSource: AudioSource);
34
- get targetVolume(): number;
35
- set targetVolume(vol: number);
36
- play(speechId: string, playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>, synchronizer: TextAudioSynchronizer): PlayoutHandle;
37
- close(): Promise<void>;
38
- }
39
- export {};
40
- //# sourceMappingURL=agent_playout.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"agent_playout.d.ts","sourceRoot":"","sources":["../../src/pipeline/agent_playout.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,UAAU,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACjE,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,qBAAqB,CAAC;AACjE,OAAO,EAAsB,MAAM,EAAoB,MAAM,aAAa,CAAC;AAC3E,OAAO,EAAE,eAAe,EAAE,MAAM,mBAAmB,CAAC;AAEpD,oBAAY,iBAAiB;IAC3B,eAAe,IAAA;IACf,eAAe,IAAA;CAChB;AAED,MAAM,MAAM,qBAAqB,GAAG;IAClC,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,MAAM,IAAI,CAAC;IAChD,CAAC,iBAAiB,CAAC,eAAe,CAAC,EAAE,CAAC,SAAS,EAAE,OAAO,KAAK,IAAI,CAAC;CACnE,CAAC;AAEF,qBAAa,aAAa;;IAGxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,CAAC;IACjF,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,YAAY,EAAE,qBAAqB,CAAC;IAEpC,cAAc,SAAK;IACnB,MAAM,SAAgB;IACtB,OAAO,SAAgB;gBAGrB,QAAQ,EAAE,MAAM,EAChB,WAAW,EAAE,WAAW,EACxB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB;IAQrC,IAAI,QAAQ,IAAI,MAAM,CAErB;IAED,IAAI,WAAW,IAAI,OAAO,CAEzB;IAED,IAAI,UAAU,IAAI,MAAM,CAEvB;IAED,IAAI,IAAI,IAAI,OAAO,CAElB;IAED,SAAS;IAST,IAAI,IAAI,MAAM;CAGf;2CAE4D,aAAa,qBAAqB,CAAC;AAAhG,qBAAa,YAAa,SAAQ,iBAA+D;;gBAOnF,WAAW,EAAE,WAAW;IAKpC,IAAI,YAAY,IAAI,MAAM,CAEzB;IAED,IAAI,YAAY,CAAC,GAAG,EAAE,MAAM,EAE3B;IAED,IAAI,CACF,QAAQ,EAAE,MAAM,EAChB,aAAa,EAAE,aAAa,CAAC,UAAU,GAAG,OAAO,eAAe,CAAC,cAAc,CAAC,EAChF,YAAY,EAAE,qBAAqB,GAClC,aAAa;IAyFV,KAAK;CAIZ"}
@@ -1,139 +0,0 @@
1
- import EventEmitter from "node:events";
2
- import { log } from "../log.js";
3
- import { CancellablePromise, Future, gracefullyCancel } from "../utils.js";
4
- import { SynthesisHandle } from "./agent_output.js";
5
- var AgentPlayoutEvent = /* @__PURE__ */ ((AgentPlayoutEvent2) => {
6
- AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STARTED"] = 0] = "PLAYOUT_STARTED";
7
- AgentPlayoutEvent2[AgentPlayoutEvent2["PLAYOUT_STOPPED"] = 1] = "PLAYOUT_STOPPED";
8
- return AgentPlayoutEvent2;
9
- })(AgentPlayoutEvent || {});
10
- class PlayoutHandle {
11
- #speechId;
12
- #audioSource;
13
- playoutSource;
14
- totalPlayedTime;
15
- synchronizer;
16
- #interrupted = false;
17
- pushedDuration = 0;
18
- intFut = new Future();
19
- doneFut = new Future();
20
- constructor(speechId, audioSource, playoutSource, synchronizer) {
21
- this.#speechId = speechId;
22
- this.#audioSource = audioSource;
23
- this.playoutSource = playoutSource;
24
- this.synchronizer = synchronizer;
25
- }
26
- get speechId() {
27
- return this.#speechId;
28
- }
29
- get interrupted() {
30
- return this.#interrupted;
31
- }
32
- get timePlayed() {
33
- return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;
34
- }
35
- get done() {
36
- return this.doneFut.done || this.#interrupted;
37
- }
38
- interrupt() {
39
- if (this.done) {
40
- return;
41
- }
42
- this.intFut.resolve();
43
- this.#interrupted = true;
44
- }
45
- join() {
46
- return this.doneFut;
47
- }
48
- }
49
- class AgentPlayout extends EventEmitter {
50
- #closed = false;
51
- #audioSource;
52
- #targetVolume = 1;
53
- #playoutTask;
54
- #logger = log();
55
- constructor(audioSource) {
56
- super();
57
- this.#audioSource = audioSource;
58
- }
59
- get targetVolume() {
60
- return this.#targetVolume;
61
- }
62
- set targetVolume(vol) {
63
- this.#targetVolume = vol;
64
- }
65
- play(speechId, playoutSource, synchronizer) {
66
- if (this.#closed) {
67
- throw new Error("source closed");
68
- }
69
- const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);
70
- this.#playoutTask = this.#playout(handle, this.#playoutTask);
71
- return handle;
72
- }
73
- #playout(handle, oldTask) {
74
- return new CancellablePromise(async (resolve, _, onCancel) => {
75
- const cancel = () => {
76
- captureTask.cancel();
77
- handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;
78
- if (handle.interrupted || captureTask.error) {
79
- handle.synchronizer.close(true);
80
- this.#audioSource.clearQueue();
81
- }
82
- if (!firstFrame) {
83
- this.emit(1 /* PLAYOUT_STOPPED */, handle.interrupted);
84
- }
85
- handle.doneFut.resolve();
86
- this.#logger.child({ speechId: handle.speechId, interrupted: handle.interrupted }).debug("playout finished");
87
- };
88
- onCancel(() => {
89
- cancel();
90
- });
91
- if (oldTask) {
92
- await gracefullyCancel(oldTask);
93
- }
94
- if (this.#audioSource.queuedDuration > 0) {
95
- this.#logger.child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration }).warn("new playout while the source is still playing");
96
- }
97
- let firstFrame = true;
98
- const captureTask = new CancellablePromise(async (resolve2, _2, onCancel2) => {
99
- let cancelled = false;
100
- onCancel2(() => {
101
- cancelled = true;
102
- });
103
- for await (const frame of handle.playoutSource) {
104
- if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {
105
- break;
106
- }
107
- if (firstFrame) {
108
- this.#logger.child({ speechId: handle.speechId }).debug("started playing the first time");
109
- this.emit(0 /* PLAYOUT_STARTED */);
110
- handle.synchronizer.segmentPlayoutStarted();
111
- firstFrame = false;
112
- }
113
- handle.pushedDuration += frame.samplesPerChannel / frame.sampleRate * 1e3;
114
- handle.synchronizer.pushAudio(frame);
115
- await this.#audioSource.captureFrame(frame);
116
- }
117
- await this.#audioSource.waitForPlayout();
118
- handle.synchronizer.close(false);
119
- resolve2();
120
- });
121
- try {
122
- await Promise.any([captureTask, handle.intFut.await]);
123
- } finally {
124
- cancel();
125
- resolve();
126
- }
127
- });
128
- }
129
- async close() {
130
- this.#closed = true;
131
- await this.#playoutTask;
132
- }
133
- }
134
- export {
135
- AgentPlayout,
136
- AgentPlayoutEvent,
137
- PlayoutHandle
138
- };
139
- //# sourceMappingURL=agent_playout.js.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../src/pipeline/agent_playout.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, AudioSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport EventEmitter from 'node:events';\nimport { log } from '../log.js';\nimport type { TextAudioSynchronizer } from '../transcription.js';\nimport { CancellablePromise, Future, gracefullyCancel } from '../utils.js';\nimport { SynthesisHandle } from './agent_output.js';\n\nexport enum AgentPlayoutEvent {\n PLAYOUT_STARTED,\n PLAYOUT_STOPPED,\n}\n\nexport type AgentPlayoutCallbacks = {\n [AgentPlayoutEvent.PLAYOUT_STARTED]: () => void;\n [AgentPlayoutEvent.PLAYOUT_STOPPED]: (interrupt: boolean) => void;\n};\n\nexport class PlayoutHandle {\n #speechId: string;\n #audioSource: AudioSource;\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>;\n totalPlayedTime?: number;\n synchronizer: TextAudioSynchronizer;\n #interrupted = false;\n pushedDuration = 0;\n intFut = new Future();\n doneFut = new Future();\n\n constructor(\n speechId: string,\n audioSource: AudioSource,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ) {\n this.#speechId = speechId;\n this.#audioSource = audioSource;\n this.playoutSource = playoutSource;\n this.synchronizer = synchronizer;\n }\n\n get speechId(): string {\n return this.#speechId;\n }\n\n get interrupted(): boolean {\n return this.#interrupted;\n }\n\n get timePlayed(): number {\n return this.totalPlayedTime || this.pushedDuration - this.#audioSource.queuedDuration;\n }\n\n get done(): boolean {\n return this.doneFut.done || this.#interrupted;\n }\n\n interrupt() {\n if (this.done) {\n return;\n }\n\n this.intFut.resolve();\n this.#interrupted = true;\n }\n\n join(): Future {\n return this.doneFut;\n }\n}\n\nexport class AgentPlayout extends (EventEmitter as new () => TypedEmitter<AgentPlayoutCallbacks>) {\n #closed = false;\n #audioSource: AudioSource;\n #targetVolume = 1;\n #playoutTask?: CancellablePromise<void>;\n #logger = log();\n\n constructor(audioSource: AudioSource) {\n super();\n this.#audioSource = audioSource;\n }\n\n get targetVolume(): number {\n return this.#targetVolume;\n }\n\n set targetVolume(vol: number) {\n this.#targetVolume = vol;\n }\n\n play(\n speechId: string,\n playoutSource: AsyncIterable<AudioFrame | typeof SynthesisHandle.FLUSH_SENTINEL>,\n synchronizer: TextAudioSynchronizer,\n ): PlayoutHandle {\n if (this.#closed) {\n throw new Error('source closed');\n }\n\n const handle = new PlayoutHandle(speechId, this.#audioSource, playoutSource, synchronizer);\n\n this.#playoutTask = this.#playout(handle, this.#playoutTask);\n return handle;\n }\n\n #playout(handle: PlayoutHandle, oldTask?: CancellablePromise<void>): CancellablePromise<void> {\n return new CancellablePromise(async (resolve, _, onCancel) => {\n const cancel = () => {\n captureTask.cancel();\n handle.totalPlayedTime = handle.pushedDuration - this.#audioSource.queuedDuration;\n\n if (handle.interrupted || captureTask.error) {\n handle.synchronizer.close(true);\n this.#audioSource.clearQueue(); // make sure to remove any queued frames\n }\n\n if (!firstFrame) {\n this.emit(AgentPlayoutEvent.PLAYOUT_STOPPED, handle.interrupted);\n }\n\n handle.doneFut.resolve();\n\n this.#logger\n .child({ speechId: handle.speechId, interrupted: handle.interrupted })\n .debug('playout finished');\n };\n\n onCancel(() => {\n cancel();\n });\n\n if (oldTask) {\n await gracefullyCancel(oldTask);\n }\n\n if (this.#audioSource.queuedDuration > 0) {\n // this should not happen, but log it just in case\n this.#logger\n .child({ speechId: handle.speechId, queuedDuration: this.#audioSource.queuedDuration })\n .warn('new playout while the source is still playing');\n }\n\n let firstFrame = true;\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n const captureTask = new CancellablePromise<void>(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n for await (const frame of handle.playoutSource) {\n if (cancelled || frame === SynthesisHandle.FLUSH_SENTINEL) {\n break;\n }\n if (firstFrame) {\n this.#logger\n .child({ speechId: handle.speechId })\n .debug('started playing the first time');\n this.emit(AgentPlayoutEvent.PLAYOUT_STARTED);\n handle.synchronizer.segmentPlayoutStarted();\n firstFrame = false;\n }\n handle.pushedDuration += (frame.samplesPerChannel / frame.sampleRate) * 1000;\n handle.synchronizer.pushAudio(frame);\n await this.#audioSource.captureFrame(frame);\n }\n\n await this.#audioSource.waitForPlayout();\n\n handle.synchronizer.close(false);\n resolve();\n });\n\n try {\n await Promise.any([captureTask, handle.intFut.await]);\n } finally {\n cancel();\n resolve();\n }\n });\n }\n\n async close() {\n this.#closed = true;\n await this.#playoutTask;\n }\n}\n"],"mappings":"AAKA,OAAO,kBAAkB;AACzB,SAAS,WAAW;AAEpB,SAAS,oBAAoB,QAAQ,wBAAwB;AAC7D,SAAS,uBAAuB;AAEzB,IAAK,oBAAL,kBAAKA,uBAAL;AACL,EAAAA,sCAAA;AACA,EAAAA,sCAAA;AAFU,SAAAA;AAAA,GAAA;AAUL,MAAM,cAAc;AAAA,EACzB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,eAAe;AAAA,EACf,iBAAiB;AAAA,EACjB,SAAS,IAAI,OAAO;AAAA,EACpB,UAAU,IAAI,OAAO;AAAA,EAErB,YACE,UACA,aACA,eACA,cACA;AACA,SAAK,YAAY;AACjB,SAAK,eAAe;AACpB,SAAK,gBAAgB;AACrB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,WAAmB;AACrB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,cAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAqB;AACvB,WAAO,KAAK,mBAAmB,KAAK,iBAAiB,KAAK,aAAa;AAAA,EACzE;AAAA,EAEA,IAAI,OAAgB;AAClB,WAAO,KAAK,QAAQ,QAAQ,KAAK;AAAA,EACnC;AAAA,EAEA,YAAY;AACV,QAAI,KAAK,MAAM;AACb;AAAA,IACF;AAEA,SAAK,OAAO,QAAQ;AACpB,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,OAAe;AACb,WAAO,KAAK;AAAA,EACd;AACF;AAEO,MAAM,qBAAsB,aAA+D;AAAA,EAChG,UAAU;AAAA,EACV;AAAA,EACA,gBAAgB;AAAA,EAChB;AAAA,EACA,UAAU,IAAI;AAAA,EAEd,YAAY,aAA0B;AACpC,UAAM;AACN,SAAK,eAAe;AAAA,EACtB;AAAA,EAEA,IAAI,eAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,aAAa,KAAa;AAC5B,SAAK,gBAAgB;AAAA,EACvB;AAAA,EAEA,KACE,UACA,eACA,cACe;AACf,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,eAAe;AAAA,IACjC;AAEA,UAAM,SAAS,IAAI,cAAc,UAAU,KAAK,cAAc,eAAe,YAAY;AAEzF,SAAK,eAAe,KAAK,SAAS,QAAQ,KAAK,YAAY;AAC3D,WAAO;AAAA,EACT;AAAA,EAEA,SAAS,QAAuB,SAA8D;AAC5F,WAAO,IAAI,mBAAmB,OAAO,SAAS,GAAG,aAAa;AAC5D,YAAM,SAAS,MAAM;AACnB,oBAAY,OAAO;AACnB,eAAO,kBAAkB,OAAO,iBAAiB,KAAK,aAAa;AAEnE,YAAI,OAAO,eAAe,YAAY,OAAO;AAC3C,iBAAO,aAAa,MAAM,IAAI;AAC9B,eAAK,aAAa,WAAW;AAAA,QAC/B;AAEA,YAAI,CAAC,YAAY;AACf,eAAK,KAAK,yBAAmC,OAAO,WAAW;AAAA,QACjE;AAEA,eAAO,QAAQ,QAAQ;AAEvB,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,aAAa,OAAO,YAAY,CAAC,EACpE,MAAM,kBAAkB;AAAA,MAC7B;AAEA,eAAS,MAAM;AACb,eAAO;AAAA,MACT,CAAC;AAED,UAAI,SAAS;AACX,cAAM,iBAAiB,OAAO;AAAA,MAChC;AAEA,UAAI,KAAK,aAAa,iBAAiB,GAAG;AAExC,aAAK,QACF,MAAM,EAAE,UAAU,OAAO,UAAU,gBAAgB,KAAK,aAAa,eAAe,CAAC,EACrF,KAAK,+CAA+C;AAAA,MACzD;AAEA,UAAI,aAAa;AAGjB,YAAM,cAAc,IAAI,mBAAyB,OAAOC,UAASC,IAAGC,cAAa;AAC/E,YAAI,YAAY;AAChB,QAAAA,UAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,yBAAiB,SAAS,OAAO,eAAe;AAC9C,cAAI,aAAa,UAAU,gBAAgB,gBAAgB;AACzD;AAAA,UACF;AACA,cAAI,YAAY;AACd,iBAAK,QACF,MAAM,EAAE,UAAU,OAAO,SAAS,CAAC,EACnC,MAAM,gCAAgC;AACzC,iBAAK,KAAK,uBAAiC;AAC3C,mBAAO,aAAa,sBAAsB;AAC1C,yBAAa;AAAA,UACf;AACA,iBAAO,kBAAmB,MAAM,oBAAoB,MAAM,aAAc;AACxE,iBAAO,aAAa,UAAU,KAAK;AACnC,gBAAM,KAAK,aAAa,aAAa,KAAK;AAAA,QAC5C;AAEA,cAAM,KAAK,aAAa,eAAe;AAEvC,eAAO,aAAa,MAAM,KAAK;AAC/B,QAAAF,SAAQ;AAAA,MACV,CAAC;AAED,UAAI;AACF,cAAM,QAAQ,IAAI,CAAC,aAAa,OAAO,OAAO,KAAK,CAAC;AAAA,MACtD,UAAE;AACA,eAAO;AACP,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AAAA,EAEA,MAAM,QAAQ;AACZ,SAAK,UAAU;AACf,UAAM,KAAK;AAAA,EACb;AACF;","names":["AgentPlayoutEvent","resolve","_","onCancel"]}
@@ -1,171 +0,0 @@
1
- "use strict";
2
- var __defProp = Object.defineProperty;
3
- var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
- var __getOwnPropNames = Object.getOwnPropertyNames;
5
- var __hasOwnProp = Object.prototype.hasOwnProperty;
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
9
- };
10
- var __copyProps = (to, from, except, desc) => {
11
- if (from && typeof from === "object" || typeof from === "function") {
12
- for (let key of __getOwnPropNames(from))
13
- if (!__hasOwnProp.call(to, key) && key !== except)
14
- __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
- }
16
- return to;
17
- };
18
- var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
- var human_input_exports = {};
20
- __export(human_input_exports, {
21
- HumanInput: () => HumanInput,
22
- HumanInputEvent: () => HumanInputEvent
23
- });
24
- module.exports = __toCommonJS(human_input_exports);
25
- var import_rtc_node = require("@livekit/rtc-node");
26
- var import_node_events = require("node:events");
27
- var import_log = require("../log.cjs");
28
- var import_stt = require("../stt/stt.cjs");
29
- var import_utils = require("../utils.cjs");
30
- var import_vad = require("../vad.cjs");
31
- var HumanInputEvent = /* @__PURE__ */ ((HumanInputEvent2) => {
32
- HumanInputEvent2[HumanInputEvent2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
33
- HumanInputEvent2[HumanInputEvent2["VAD_INFERENCE_DONE"] = 1] = "VAD_INFERENCE_DONE";
34
- HumanInputEvent2[HumanInputEvent2["END_OF_SPEECH"] = 2] = "END_OF_SPEECH";
35
- HumanInputEvent2[HumanInputEvent2["FINAL_TRANSCRIPT"] = 3] = "FINAL_TRANSCRIPT";
36
- HumanInputEvent2[HumanInputEvent2["INTERIM_TRANSCRIPT"] = 4] = "INTERIM_TRANSCRIPT";
37
- return HumanInputEvent2;
38
- })(HumanInputEvent || {});
39
- class HumanInput extends import_node_events.EventEmitter {
40
- #closed = false;
41
- #room;
42
- #vad;
43
- #stt;
44
- #participant;
45
- #subscribedTrack;
46
- #recognizeTask;
47
- #speaking = false;
48
- #speechProbability = 0;
49
- #logger = (0, import_log.log)();
50
- #noiseCancellation;
51
- constructor(room, vad, stt, participant, noiseCancellation) {
52
- super();
53
- this.#room = room;
54
- this.#vad = vad;
55
- this.#stt = stt;
56
- this.#participant = participant;
57
- this.#noiseCancellation = noiseCancellation;
58
- this.#room.on(import_rtc_node.RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
59
- this.#room.on(import_rtc_node.RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
60
- this.#subscribeToMicrophone();
61
- }
62
- get participant() {
63
- return this.#participant;
64
- }
65
- get subscribedTrack() {
66
- return this.#subscribedTrack;
67
- }
68
- #subscribeToMicrophone() {
69
- if (!this.#participant) {
70
- this.#logger.error("Participant is not set");
71
- return;
72
- }
73
- let microphonePublication = void 0;
74
- for (const publication of this.#participant.trackPublications.values()) {
75
- if (publication.source === import_rtc_node.TrackSource.SOURCE_MICROPHONE) {
76
- microphonePublication = publication;
77
- break;
78
- }
79
- }
80
- if (!microphonePublication) {
81
- return;
82
- }
83
- if (!microphonePublication.subscribed) {
84
- microphonePublication.setSubscribed(true);
85
- }
86
- const track = microphonePublication.track;
87
- if (track && track !== this.#subscribedTrack) {
88
- this.#subscribedTrack = track;
89
- if (this.#recognizeTask) {
90
- this.#recognizeTask.cancel();
91
- }
92
- const audioStreamOptions = {
93
- sampleRate: 16e3,
94
- numChannels: 1,
95
- ...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
96
- };
97
- const audioStream = new import_rtc_node.AudioStream(track, audioStreamOptions);
98
- this.#recognizeTask = new import_utils.CancellablePromise(async (resolve, _, onCancel) => {
99
- let cancelled = false;
100
- onCancel(() => {
101
- cancelled = true;
102
- });
103
- const sttStream = this.#stt.stream();
104
- const vadStream = this.#vad.stream();
105
- const audioStreamCo = async () => {
106
- for await (const ev of audioStream) {
107
- if (cancelled) return;
108
- sttStream.pushFrame(ev);
109
- vadStream.pushFrame(ev);
110
- }
111
- };
112
- const vadStreamCo = async () => {
113
- for await (const ev of vadStream) {
114
- if (cancelled) return;
115
- switch (ev.type) {
116
- case import_vad.VADEventType.START_OF_SPEECH:
117
- this.#speaking = true;
118
- this.emit(0 /* START_OF_SPEECH */, ev);
119
- break;
120
- case import_vad.VADEventType.INFERENCE_DONE:
121
- this.#speechProbability = ev.probability;
122
- this.emit(1 /* VAD_INFERENCE_DONE */, ev);
123
- break;
124
- case import_vad.VADEventType.END_OF_SPEECH:
125
- this.#speaking = false;
126
- this.emit(2 /* END_OF_SPEECH */, ev);
127
- break;
128
- }
129
- }
130
- };
131
- const sttStreamCo = async () => {
132
- for await (const ev of sttStream) {
133
- if (cancelled) return;
134
- if (ev.type === import_stt.SpeechEventType.FINAL_TRANSCRIPT) {
135
- this.emit(3 /* FINAL_TRANSCRIPT */, ev);
136
- } else if (ev.type == import_stt.SpeechEventType.INTERIM_TRANSCRIPT) {
137
- this.emit(4 /* INTERIM_TRANSCRIPT */, ev);
138
- }
139
- }
140
- };
141
- await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
142
- sttStream.close();
143
- vadStream.close();
144
- resolve();
145
- });
146
- }
147
- }
148
- get speaking() {
149
- return this.#speaking;
150
- }
151
- get speakingProbability() {
152
- return this.#speechProbability;
153
- }
154
- async close() {
155
- if (this.#closed) {
156
- throw new Error("HumanInput already closed");
157
- }
158
- this.#closed = true;
159
- this.#room.removeAllListeners();
160
- this.#speaking = false;
161
- if (this.#recognizeTask) {
162
- await (0, import_utils.gracefullyCancel)(this.#recognizeTask);
163
- }
164
- }
165
- }
166
- // Annotate the CommonJS export names for ESM import in node:
167
- 0 && (module.exports = {
168
- HumanInput,
169
- HumanInputEvent
170
- });
171
- //# sourceMappingURL=human_input.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../../src/pipeline/human_input.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n NoiseCancellationOptions,\n RemoteAudioTrack,\n RemoteParticipant,\n RemoteTrackPublication,\n Room,\n} from '@livekit/rtc-node';\nimport { AudioStream, RoomEvent, TrackSource } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport { log } from '../log.js';\nimport type { STT, SpeechEvent } from '../stt/stt.js';\nimport { SpeechEventType } from '../stt/stt.js';\nimport { CancellablePromise, gracefullyCancel } from '../utils.js';\nimport type { VAD, VADEvent } from '../vad.js';\nimport { VADEventType } from '../vad.js';\n\nexport enum HumanInputEvent {\n START_OF_SPEECH,\n VAD_INFERENCE_DONE,\n END_OF_SPEECH,\n FINAL_TRANSCRIPT,\n INTERIM_TRANSCRIPT,\n}\n\nexport type HumanInputCallbacks = {\n [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;\n [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;\n [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;\n [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;\n};\n\nexport class HumanInput extends (EventEmitter as new () => TypedEmitter<HumanInputCallbacks>) {\n #closed = false;\n #room: Room;\n #vad: VAD;\n #stt: STT;\n #participant: RemoteParticipant;\n #subscribedTrack?: RemoteAudioTrack;\n #recognizeTask?: CancellablePromise<void>;\n #speaking = false;\n #speechProbability = 0;\n #logger = log();\n #noiseCancellation?: NoiseCancellationOptions;\n\n constructor(\n room: Room,\n vad: VAD,\n stt: STT,\n participant: RemoteParticipant,\n noiseCancellation?: NoiseCancellationOptions,\n ) {\n super();\n this.#room = room;\n this.#vad = vad;\n this.#stt = stt;\n this.#participant = participant;\n this.#noiseCancellation = noiseCancellation;\n\n this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));\n this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));\n this.#subscribeToMicrophone();\n }\n\n get participant(): RemoteParticipant {\n return this.#participant;\n }\n\n get subscribedTrack(): RemoteAudioTrack | undefined {\n return this.#subscribedTrack;\n }\n\n #subscribeToMicrophone(): void {\n if (!this.#participant) {\n this.#logger.error('Participant is not set');\n return;\n }\n\n let microphonePublication: RemoteTrackPublication | undefined = undefined;\n for (const publication of this.#participant.trackPublications.values()) {\n if (publication.source === TrackSource.SOURCE_MICROPHONE) {\n microphonePublication = publication;\n break;\n }\n }\n if (!microphonePublication) {\n return;\n }\n\n if (!microphonePublication.subscribed) {\n microphonePublication.setSubscribed(true);\n }\n\n const track = microphonePublication.track;\n if (track && track !== this.#subscribedTrack) {\n this.#subscribedTrack = track;\n if (this.#recognizeTask) {\n this.#recognizeTask.cancel();\n }\n\n const audioStreamOptions = {\n sampleRate: 16000,\n numChannels: 1,\n ...(this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}),\n };\n const audioStream = new AudioStream(track, audioStreamOptions);\n\n // eslint-disable-next-line @typescript-eslint/no-unused-vars\n this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {\n let cancelled = false;\n onCancel(() => {\n cancelled = true;\n });\n\n const sttStream = this.#stt.stream();\n const vadStream = this.#vad.stream();\n\n const audioStreamCo = async () => {\n for await (const ev of audioStream) {\n if (cancelled) return;\n sttStream.pushFrame(ev);\n vadStream.pushFrame(ev);\n }\n };\n\n const vadStreamCo = async () => {\n for await (const ev of vadStream) {\n if (cancelled) return;\n switch (ev.type) {\n case VADEventType.START_OF_SPEECH:\n this.#speaking = true;\n this.emit(HumanInputEvent.START_OF_SPEECH, ev);\n break;\n case VADEventType.INFERENCE_DONE:\n this.#speechProbability = ev.probability;\n this.emit(HumanInputEvent.VAD_INFERENCE_DONE, ev);\n break;\n case VADEventType.END_OF_SPEECH:\n this.#speaking = false;\n this.emit(HumanInputEvent.END_OF_SPEECH, ev);\n break;\n }\n }\n };\n\n const sttStreamCo = async () => {\n for await (const ev of sttStream) {\n if (cancelled) return;\n if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {\n this.emit(HumanInputEvent.FINAL_TRANSCRIPT, ev);\n } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {\n this.emit(HumanInputEvent.INTERIM_TRANSCRIPT, ev);\n }\n }\n };\n\n await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);\n sttStream.close();\n vadStream.close();\n resolve();\n });\n }\n }\n\n get speaking(): boolean {\n return this.#speaking;\n }\n\n get speakingProbability(): number {\n return this.#speechProbability;\n }\n\n async close() {\n if (this.#closed) {\n throw new Error('HumanInput already closed');\n }\n this.#closed = true;\n this.#room.removeAllListeners();\n this.#speaking = false;\n if (this.#recognizeTask) {\n await gracefullyCancel(this.#recognizeTask);\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAUA,sBAAoD;AAEpD,yBAA6B;AAC7B,iBAAoB;AAEpB,iBAAgC;AAChC,mBAAqD;AAErD,iBAA6B;AAEtB,IAAK,kBAAL,kBAAKA,qBAAL;AACL,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AACA,EAAAA,kCAAA;AALU,SAAAA;AAAA,GAAA;AAgBL,MAAM,mBAAoB,gCAA6D;AAAA,EAC5F,UAAU;AAAA,EACV;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EACZ,qBAAqB;AAAA,EACrB,cAAU,gBAAI;AAAA,EACd;AAAA,EAEA,YACE,MACA,KACA,KACA,aACA,mBACA;AACA,UAAM;AACN,SAAK,QAAQ;AACb,SAAK,OAAO;AACZ,SAAK,OAAO;AACZ,SAAK,eAAe;AACpB,SAAK,qBAAqB;AAE1B,SAAK,MAAM,GAAG,0BAAU,gBAAgB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC9E,SAAK,MAAM,GAAG,0BAAU,iBAAiB,KAAK,uBAAuB,KAAK,IAAI,CAAC;AAC/E,SAAK,uBAAuB;AAAA,EAC9B;AAAA,EAEA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,kBAAgD;AAClD,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,yBAA+B;AAC7B,QAAI,CAAC,KAAK,cAAc;AACtB,WAAK,QAAQ,MAAM,wBAAwB;AAC3C;AAAA,IACF;AAEA,QAAI,wBAA4D;AAChE,eAAW,eAAe,KAAK,aAAa,kBAAkB,OAAO,GAAG;AACtE,UAAI,YAAY,WAAW,4BAAY,mBAAmB;AACxD,gCAAwB;AACxB;AAAA,MACF;AAAA,IACF;AACA,QAAI,CAAC,uBAAuB;AAC1B;AAAA,IACF;AAEA,QAAI,CAAC,sBAAsB,YAAY;AACrC,4BAAsB,cAAc,IAAI;AAAA,IAC1C;AAEA,UAAM,QAAQ,sBAAsB;AACpC,QAAI,SAAS,UAAU,KAAK,kBAAkB;AAC5C,WAAK,mBAAmB;AACxB,UAAI,KAAK,gBAAgB;AACvB,aAAK,eAAe,OAAO;AAAA,MAC7B;AAEA,YAAM,qBAAqB;AAAA,QACzB,YAAY;AAAA,QACZ,aAAa;AAAA,QACb,GAAI,KAAK,qBAAqB,EAAE,mBAAmB,KAAK,mBAAmB,IAAI,CAAC;AAAA,MAClF;AACA,YAAM,cAAc,IAAI,4BAAY,OAAO,kBAAkB;AAG7D,WAAK,iBAAiB,IAAI,gCAAmB,OAAO,SAAS,GAAG,aAAa;AAC3E,YAAI,YAAY;AAChB,iBAAS,MAAM;AACb,sBAAY;AAAA,QACd,CAAC;AAED,cAAM,YAAY,KAAK,KAAK,OAAO;AACnC,cAAM,YAAY,KAAK,KAAK,OAAO;AAEnC,cAAM,gBAAgB,YAAY;AAChC,2BAAiB,MAAM,aAAa;AAClC,gBAAI,UAAW;AACf,sBAAU,UAAU,EAAE;AACtB,sBAAU,UAAU,EAAE;AAAA,UACxB;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,oBAAQ,GAAG,MAAM;AAAA,cACf,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,yBAAiC,EAAE;AAC7C;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,qBAAqB,GAAG;AAC7B,qBAAK,KAAK,4BAAoC,EAAE;AAChD;AAAA,cACF,KAAK,wBAAa;AAChB,qBAAK,YAAY;AACjB,qBAAK,KAAK,uBAA+B,EAAE;AAC3C;AAAA,YACJ;AAAA,UACF;AAAA,QACF;AAEA,cAAM,cAAc,YAAY;AAC9B,2BAAiB,MAAM,WAAW;AAChC,gBAAI,UAAW;AACf,gBAAI,GAAG,SAAS,2BAAgB,kBAAkB;AAChD,mBAAK,KAAK,0BAAkC,EAAE;AAAA,YAChD,WAAW,GAAG,QAAQ,2BAAgB,oBAAoB;AACxD,mBAAK,KAAK,4BAAoC,EAAE;AAAA,YAClD;AAAA,UACF;AAAA,QACF;AAEA,cAAM,QAAQ,IAAI,CAAC,cAAc,GAAG,YAAY,GAAG,YAAY,CAAC,CAAC;AACjE,kBAAU,MAAM;AAChB,kBAAU,MAAM;AAChB,gBAAQ;AAAA,MACV,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEA,IAAI,WAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,sBAA8B;AAChC,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAQ;AACZ,QAAI,KAAK,SAAS;AAChB,YAAM,IAAI,MAAM,2BAA2B;AAAA,IAC7C;AACA,SAAK,UAAU;AACf,SAAK,MAAM,mBAAmB;AAC9B,SAAK,YAAY;AACjB,QAAI,KAAK,gBAAgB;AACvB,gBAAM,+BAAiB,KAAK,cAAc;AAAA,IAC5C;AAAA,EACF;AACF;","names":["HumanInputEvent"]}
@@ -1,30 +0,0 @@
1
- import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
2
- import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
- import type { STT, SpeechEvent } from '../stt/stt.js';
4
- import type { VAD, VADEvent } from '../vad.js';
5
- export declare enum HumanInputEvent {
6
- START_OF_SPEECH = 0,
7
- VAD_INFERENCE_DONE = 1,
8
- END_OF_SPEECH = 2,
9
- FINAL_TRANSCRIPT = 3,
10
- INTERIM_TRANSCRIPT = 4
11
- }
12
- export type HumanInputCallbacks = {
13
- [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
14
- [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
15
- [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
16
- [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
17
- [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
18
- };
19
- declare const HumanInput_base: new () => TypedEmitter<HumanInputCallbacks>;
20
- export declare class HumanInput extends HumanInput_base {
21
- #private;
22
- constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant, noiseCancellation?: NoiseCancellationOptions);
23
- get participant(): RemoteParticipant;
24
- get subscribedTrack(): RemoteAudioTrack | undefined;
25
- get speaking(): boolean;
26
- get speakingProbability(): number;
27
- close(): Promise<void>;
28
- }
29
- export {};
30
- //# sourceMappingURL=human_input.d.ts.map
@@ -1,30 +0,0 @@
1
- import type { NoiseCancellationOptions, RemoteAudioTrack, RemoteParticipant, Room } from '@livekit/rtc-node';
2
- import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
3
- import type { STT, SpeechEvent } from '../stt/stt.js';
4
- import type { VAD, VADEvent } from '../vad.js';
5
- export declare enum HumanInputEvent {
6
- START_OF_SPEECH = 0,
7
- VAD_INFERENCE_DONE = 1,
8
- END_OF_SPEECH = 2,
9
- FINAL_TRANSCRIPT = 3,
10
- INTERIM_TRANSCRIPT = 4
11
- }
12
- export type HumanInputCallbacks = {
13
- [HumanInputEvent.START_OF_SPEECH]: (event: VADEvent) => void;
14
- [HumanInputEvent.VAD_INFERENCE_DONE]: (event: VADEvent) => void;
15
- [HumanInputEvent.END_OF_SPEECH]: (event: VADEvent) => void;
16
- [HumanInputEvent.FINAL_TRANSCRIPT]: (event: SpeechEvent) => void;
17
- [HumanInputEvent.INTERIM_TRANSCRIPT]: (event: SpeechEvent) => void;
18
- };
19
- declare const HumanInput_base: new () => TypedEmitter<HumanInputCallbacks>;
20
- export declare class HumanInput extends HumanInput_base {
21
- #private;
22
- constructor(room: Room, vad: VAD, stt: STT, participant: RemoteParticipant, noiseCancellation?: NoiseCancellationOptions);
23
- get participant(): RemoteParticipant;
24
- get subscribedTrack(): RemoteAudioTrack | undefined;
25
- get speaking(): boolean;
26
- get speakingProbability(): number;
27
- close(): Promise<void>;
28
- }
29
- export {};
30
- //# sourceMappingURL=human_input.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"human_input.d.ts","sourceRoot":"","sources":["../../src/pipeline/human_input.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,wBAAwB,EACxB,gBAAgB,EAChB,iBAAiB,EAEjB,IAAI,EACL,MAAM,mBAAmB,CAAC;AAE3B,OAAO,KAAK,EAAE,iBAAiB,IAAI,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAGhF,OAAO,KAAK,EAAE,GAAG,EAAE,WAAW,EAAE,MAAM,eAAe,CAAC;AAGtD,OAAO,KAAK,EAAE,GAAG,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAG/C,oBAAY,eAAe;IACzB,eAAe,IAAA;IACf,kBAAkB,IAAA;IAClB,aAAa,IAAA;IACb,gBAAgB,IAAA;IAChB,kBAAkB,IAAA;CACnB;AAED,MAAM,MAAM,mBAAmB,GAAG;IAChC,CAAC,eAAe,CAAC,eAAe,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC7D,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAChE,CAAC,eAAe,CAAC,aAAa,CAAC,EAAE,CAAC,KAAK,EAAE,QAAQ,KAAK,IAAI,CAAC;IAC3D,CAAC,eAAe,CAAC,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;IACjE,CAAC,eAAe,CAAC,kBAAkB,CAAC,EAAE,CAAC,KAAK,EAAE,WAAW,KAAK,IAAI,CAAC;CACpE,CAAC;yCAEyD,aAAa,mBAAmB,CAAC;AAA5F,qBAAa,UAAW,SAAQ,eAA6D;;gBAczF,IAAI,EAAE,IAAI,EACV,GAAG,EAAE,GAAG,EACR,GAAG,EAAE,GAAG,EACR,WAAW,EAAE,iBAAiB,EAC9B,iBAAiB,CAAC,EAAE,wBAAwB;IAc9C,IAAI,WAAW,IAAI,iBAAiB,CAEnC;IAED,IAAI,eAAe,IAAI,gBAAgB,GAAG,SAAS,CAElD;IA8FD,IAAI,QAAQ,IAAI,OAAO,CAEtB;IAED,IAAI,mBAAmB,IAAI,MAAM,CAEhC;IAEK,KAAK;CAWZ"}
@@ -1,146 +0,0 @@
1
- import { AudioStream, RoomEvent, TrackSource } from "@livekit/rtc-node";
2
- import { EventEmitter } from "node:events";
3
- import { log } from "../log.js";
4
- import { SpeechEventType } from "../stt/stt.js";
5
- import { CancellablePromise, gracefullyCancel } from "../utils.js";
6
- import { VADEventType } from "../vad.js";
7
- var HumanInputEvent = /* @__PURE__ */ ((HumanInputEvent2) => {
8
- HumanInputEvent2[HumanInputEvent2["START_OF_SPEECH"] = 0] = "START_OF_SPEECH";
9
- HumanInputEvent2[HumanInputEvent2["VAD_INFERENCE_DONE"] = 1] = "VAD_INFERENCE_DONE";
10
- HumanInputEvent2[HumanInputEvent2["END_OF_SPEECH"] = 2] = "END_OF_SPEECH";
11
- HumanInputEvent2[HumanInputEvent2["FINAL_TRANSCRIPT"] = 3] = "FINAL_TRANSCRIPT";
12
- HumanInputEvent2[HumanInputEvent2["INTERIM_TRANSCRIPT"] = 4] = "INTERIM_TRANSCRIPT";
13
- return HumanInputEvent2;
14
- })(HumanInputEvent || {});
15
- class HumanInput extends EventEmitter {
16
- #closed = false;
17
- #room;
18
- #vad;
19
- #stt;
20
- #participant;
21
- #subscribedTrack;
22
- #recognizeTask;
23
- #speaking = false;
24
- #speechProbability = 0;
25
- #logger = log();
26
- #noiseCancellation;
27
- constructor(room, vad, stt, participant, noiseCancellation) {
28
- super();
29
- this.#room = room;
30
- this.#vad = vad;
31
- this.#stt = stt;
32
- this.#participant = participant;
33
- this.#noiseCancellation = noiseCancellation;
34
- this.#room.on(RoomEvent.TrackPublished, this.#subscribeToMicrophone.bind(this));
35
- this.#room.on(RoomEvent.TrackSubscribed, this.#subscribeToMicrophone.bind(this));
36
- this.#subscribeToMicrophone();
37
- }
38
- get participant() {
39
- return this.#participant;
40
- }
41
- get subscribedTrack() {
42
- return this.#subscribedTrack;
43
- }
44
- #subscribeToMicrophone() {
45
- if (!this.#participant) {
46
- this.#logger.error("Participant is not set");
47
- return;
48
- }
49
- let microphonePublication = void 0;
50
- for (const publication of this.#participant.trackPublications.values()) {
51
- if (publication.source === TrackSource.SOURCE_MICROPHONE) {
52
- microphonePublication = publication;
53
- break;
54
- }
55
- }
56
- if (!microphonePublication) {
57
- return;
58
- }
59
- if (!microphonePublication.subscribed) {
60
- microphonePublication.setSubscribed(true);
61
- }
62
- const track = microphonePublication.track;
63
- if (track && track !== this.#subscribedTrack) {
64
- this.#subscribedTrack = track;
65
- if (this.#recognizeTask) {
66
- this.#recognizeTask.cancel();
67
- }
68
- const audioStreamOptions = {
69
- sampleRate: 16e3,
70
- numChannels: 1,
71
- ...this.#noiseCancellation ? { noiseCancellation: this.#noiseCancellation } : {}
72
- };
73
- const audioStream = new AudioStream(track, audioStreamOptions);
74
- this.#recognizeTask = new CancellablePromise(async (resolve, _, onCancel) => {
75
- let cancelled = false;
76
- onCancel(() => {
77
- cancelled = true;
78
- });
79
- const sttStream = this.#stt.stream();
80
- const vadStream = this.#vad.stream();
81
- const audioStreamCo = async () => {
82
- for await (const ev of audioStream) {
83
- if (cancelled) return;
84
- sttStream.pushFrame(ev);
85
- vadStream.pushFrame(ev);
86
- }
87
- };
88
- const vadStreamCo = async () => {
89
- for await (const ev of vadStream) {
90
- if (cancelled) return;
91
- switch (ev.type) {
92
- case VADEventType.START_OF_SPEECH:
93
- this.#speaking = true;
94
- this.emit(0 /* START_OF_SPEECH */, ev);
95
- break;
96
- case VADEventType.INFERENCE_DONE:
97
- this.#speechProbability = ev.probability;
98
- this.emit(1 /* VAD_INFERENCE_DONE */, ev);
99
- break;
100
- case VADEventType.END_OF_SPEECH:
101
- this.#speaking = false;
102
- this.emit(2 /* END_OF_SPEECH */, ev);
103
- break;
104
- }
105
- }
106
- };
107
- const sttStreamCo = async () => {
108
- for await (const ev of sttStream) {
109
- if (cancelled) return;
110
- if (ev.type === SpeechEventType.FINAL_TRANSCRIPT) {
111
- this.emit(3 /* FINAL_TRANSCRIPT */, ev);
112
- } else if (ev.type == SpeechEventType.INTERIM_TRANSCRIPT) {
113
- this.emit(4 /* INTERIM_TRANSCRIPT */, ev);
114
- }
115
- }
116
- };
117
- await Promise.all([audioStreamCo(), vadStreamCo(), sttStreamCo()]);
118
- sttStream.close();
119
- vadStream.close();
120
- resolve();
121
- });
122
- }
123
- }
124
- get speaking() {
125
- return this.#speaking;
126
- }
127
- get speakingProbability() {
128
- return this.#speechProbability;
129
- }
130
- async close() {
131
- if (this.#closed) {
132
- throw new Error("HumanInput already closed");
133
- }
134
- this.#closed = true;
135
- this.#room.removeAllListeners();
136
- this.#speaking = false;
137
- if (this.#recognizeTask) {
138
- await gracefullyCancel(this.#recognizeTask);
139
- }
140
- }
141
- }
142
- export {
143
- HumanInput,
144
- HumanInputEvent
145
- };
146
- //# sourceMappingURL=human_input.js.map