@livekit/agents 0.7.9 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (627) hide show
  1. package/dist/_exceptions.cjs +109 -0
  2. package/dist/_exceptions.cjs.map +1 -0
  3. package/dist/_exceptions.d.cts +64 -0
  4. package/dist/_exceptions.d.ts +64 -0
  5. package/dist/_exceptions.d.ts.map +1 -0
  6. package/dist/_exceptions.js +80 -0
  7. package/dist/_exceptions.js.map +1 -0
  8. package/dist/audio.cjs +10 -3
  9. package/dist/audio.cjs.map +1 -1
  10. package/dist/audio.d.cts +2 -0
  11. package/dist/audio.d.ts +2 -0
  12. package/dist/audio.d.ts.map +1 -1
  13. package/dist/audio.js +8 -2
  14. package/dist/audio.js.map +1 -1
  15. package/dist/cli.cjs +25 -0
  16. package/dist/cli.cjs.map +1 -1
  17. package/dist/cli.d.ts.map +1 -1
  18. package/dist/cli.js +25 -0
  19. package/dist/cli.js.map +1 -1
  20. package/dist/constants.cjs +6 -3
  21. package/dist/constants.cjs.map +1 -1
  22. package/dist/constants.d.cts +2 -1
  23. package/dist/constants.d.ts +2 -1
  24. package/dist/constants.d.ts.map +1 -1
  25. package/dist/constants.js +4 -2
  26. package/dist/constants.js.map +1 -1
  27. package/dist/http_server.cjs.map +1 -1
  28. package/dist/http_server.d.cts +1 -0
  29. package/dist/http_server.d.ts +1 -0
  30. package/dist/http_server.d.ts.map +1 -1
  31. package/dist/http_server.js.map +1 -1
  32. package/dist/index.cjs +27 -20
  33. package/dist/index.cjs.map +1 -1
  34. package/dist/index.d.cts +13 -10
  35. package/dist/index.d.ts +13 -10
  36. package/dist/index.d.ts.map +1 -1
  37. package/dist/index.js +15 -11
  38. package/dist/index.js.map +1 -1
  39. package/dist/inference_runner.cjs +0 -1
  40. package/dist/inference_runner.cjs.map +1 -1
  41. package/dist/inference_runner.d.cts +2 -3
  42. package/dist/inference_runner.d.ts +2 -3
  43. package/dist/inference_runner.d.ts.map +1 -1
  44. package/dist/inference_runner.js +0 -1
  45. package/dist/inference_runner.js.map +1 -1
  46. package/dist/ipc/inference_proc_executor.cjs +2 -2
  47. package/dist/ipc/inference_proc_executor.cjs.map +1 -1
  48. package/dist/ipc/inference_proc_executor.js +2 -2
  49. package/dist/ipc/inference_proc_executor.js.map +1 -1
  50. package/dist/ipc/job_executor.cjs.map +1 -1
  51. package/dist/ipc/job_executor.js.map +1 -1
  52. package/dist/ipc/job_proc_executor.cjs +1 -0
  53. package/dist/ipc/job_proc_executor.cjs.map +1 -1
  54. package/dist/ipc/job_proc_executor.js +1 -0
  55. package/dist/ipc/job_proc_executor.js.map +1 -1
  56. package/dist/ipc/job_proc_lazy_main.cjs +1 -1
  57. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  58. package/dist/ipc/job_proc_lazy_main.js +1 -1
  59. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  60. package/dist/ipc/supervised_proc.d.cts +1 -1
  61. package/dist/ipc/supervised_proc.d.ts +1 -1
  62. package/dist/ipc/supervised_proc.d.ts.map +1 -1
  63. package/dist/job.cjs +14 -2
  64. package/dist/job.cjs.map +1 -1
  65. package/dist/job.d.cts +8 -0
  66. package/dist/job.d.ts +8 -0
  67. package/dist/job.d.ts.map +1 -1
  68. package/dist/job.js +12 -1
  69. package/dist/job.js.map +1 -1
  70. package/dist/llm/chat_context.cjs +332 -82
  71. package/dist/llm/chat_context.cjs.map +1 -1
  72. package/dist/llm/chat_context.d.cts +152 -48
  73. package/dist/llm/chat_context.d.ts +152 -48
  74. package/dist/llm/chat_context.d.ts.map +1 -1
  75. package/dist/llm/chat_context.js +327 -81
  76. package/dist/llm/chat_context.js.map +1 -1
  77. package/dist/llm/chat_context.test.cjs +380 -0
  78. package/dist/llm/chat_context.test.cjs.map +1 -0
  79. package/dist/llm/chat_context.test.js +385 -0
  80. package/dist/llm/chat_context.test.js.map +1 -0
  81. package/dist/llm/index.cjs +37 -8
  82. package/dist/llm/index.cjs.map +1 -1
  83. package/dist/llm/index.d.cts +7 -3
  84. package/dist/llm/index.d.ts +7 -3
  85. package/dist/llm/index.d.ts.map +1 -1
  86. package/dist/llm/index.js +39 -9
  87. package/dist/llm/index.js.map +1 -1
  88. package/dist/llm/llm.cjs +98 -33
  89. package/dist/llm/llm.cjs.map +1 -1
  90. package/dist/llm/llm.d.cts +50 -24
  91. package/dist/llm/llm.d.ts +50 -24
  92. package/dist/llm/llm.d.ts.map +1 -1
  93. package/dist/llm/llm.js +99 -33
  94. package/dist/llm/llm.js.map +1 -1
  95. package/dist/llm/provider_format/google.cjs +128 -0
  96. package/dist/llm/provider_format/google.cjs.map +1 -0
  97. package/dist/llm/provider_format/google.d.cts +6 -0
  98. package/dist/llm/provider_format/google.d.ts +6 -0
  99. package/dist/llm/provider_format/google.d.ts.map +1 -0
  100. package/dist/llm/provider_format/google.js +104 -0
  101. package/dist/llm/provider_format/google.js.map +1 -0
  102. package/dist/llm/provider_format/google.test.cjs +676 -0
  103. package/dist/llm/provider_format/google.test.cjs.map +1 -0
  104. package/dist/llm/provider_format/google.test.js +675 -0
  105. package/dist/llm/provider_format/google.test.js.map +1 -0
  106. package/dist/llm/provider_format/index.cjs +40 -0
  107. package/dist/llm/provider_format/index.cjs.map +1 -0
  108. package/dist/llm/provider_format/index.d.cts +4 -0
  109. package/dist/llm/provider_format/index.d.ts +4 -0
  110. package/dist/llm/provider_format/index.d.ts.map +1 -0
  111. package/dist/llm/provider_format/index.js +16 -0
  112. package/dist/llm/provider_format/index.js.map +1 -0
  113. package/dist/llm/provider_format/openai.cjs +116 -0
  114. package/dist/llm/provider_format/openai.cjs.map +1 -0
  115. package/dist/llm/provider_format/openai.d.cts +3 -0
  116. package/dist/llm/provider_format/openai.d.ts +3 -0
  117. package/dist/llm/provider_format/openai.d.ts.map +1 -0
  118. package/dist/llm/provider_format/openai.js +92 -0
  119. package/dist/llm/provider_format/openai.js.map +1 -0
  120. package/dist/llm/provider_format/openai.test.cjs +490 -0
  121. package/dist/llm/provider_format/openai.test.cjs.map +1 -0
  122. package/dist/llm/provider_format/openai.test.js +489 -0
  123. package/dist/llm/provider_format/openai.test.js.map +1 -0
  124. package/dist/llm/provider_format/utils.cjs +146 -0
  125. package/dist/llm/provider_format/utils.cjs.map +1 -0
  126. package/dist/llm/provider_format/utils.d.cts +38 -0
  127. package/dist/llm/provider_format/utils.d.ts +38 -0
  128. package/dist/llm/provider_format/utils.d.ts.map +1 -0
  129. package/dist/llm/provider_format/utils.js +122 -0
  130. package/dist/llm/provider_format/utils.js.map +1 -0
  131. package/dist/llm/realtime.cjs +77 -0
  132. package/dist/llm/realtime.cjs.map +1 -0
  133. package/dist/llm/realtime.d.cts +98 -0
  134. package/dist/llm/realtime.d.ts +98 -0
  135. package/dist/llm/realtime.d.ts.map +1 -0
  136. package/dist/llm/realtime.js +52 -0
  137. package/dist/llm/realtime.js.map +1 -0
  138. package/dist/llm/remote_chat_context.cjs +112 -0
  139. package/dist/llm/remote_chat_context.cjs.map +1 -0
  140. package/dist/llm/remote_chat_context.d.cts +23 -0
  141. package/dist/llm/remote_chat_context.d.ts +23 -0
  142. package/dist/llm/remote_chat_context.d.ts.map +1 -0
  143. package/dist/llm/remote_chat_context.js +88 -0
  144. package/dist/llm/remote_chat_context.js.map +1 -0
  145. package/dist/llm/remote_chat_context.test.cjs +225 -0
  146. package/dist/llm/remote_chat_context.test.cjs.map +1 -0
  147. package/dist/llm/remote_chat_context.test.js +224 -0
  148. package/dist/llm/remote_chat_context.test.js.map +1 -0
  149. package/dist/llm/tool_context.cjs +111 -0
  150. package/dist/llm/tool_context.cjs.map +1 -0
  151. package/dist/llm/tool_context.d.cts +125 -0
  152. package/dist/llm/tool_context.d.ts +125 -0
  153. package/dist/llm/tool_context.d.ts.map +1 -0
  154. package/dist/llm/tool_context.js +80 -0
  155. package/dist/llm/tool_context.js.map +1 -0
  156. package/dist/llm/tool_context.test.cjs +162 -0
  157. package/dist/llm/tool_context.test.cjs.map +1 -0
  158. package/dist/llm/tool_context.test.js +161 -0
  159. package/dist/llm/tool_context.test.js.map +1 -0
  160. package/dist/llm/tool_context.type.test.cjs +92 -0
  161. package/dist/llm/tool_context.type.test.cjs.map +1 -0
  162. package/dist/llm/tool_context.type.test.js +91 -0
  163. package/dist/llm/tool_context.type.test.js.map +1 -0
  164. package/dist/llm/utils.cjs +260 -0
  165. package/dist/llm/utils.cjs.map +1 -0
  166. package/dist/llm/utils.d.cts +42 -0
  167. package/dist/llm/utils.d.ts +42 -0
  168. package/dist/llm/utils.d.ts.map +1 -0
  169. package/dist/llm/utils.js +223 -0
  170. package/dist/llm/utils.js.map +1 -0
  171. package/dist/llm/utils.test.cjs +513 -0
  172. package/dist/llm/utils.test.cjs.map +1 -0
  173. package/dist/llm/utils.test.js +490 -0
  174. package/dist/llm/utils.test.js.map +1 -0
  175. package/dist/metrics/base.cjs +0 -27
  176. package/dist/metrics/base.cjs.map +1 -1
  177. package/dist/metrics/base.d.cts +105 -63
  178. package/dist/metrics/base.d.ts +105 -63
  179. package/dist/metrics/base.d.ts.map +1 -1
  180. package/dist/metrics/base.js +0 -19
  181. package/dist/metrics/base.js.map +1 -1
  182. package/dist/metrics/index.cjs +0 -3
  183. package/dist/metrics/index.cjs.map +1 -1
  184. package/dist/metrics/index.d.cts +2 -3
  185. package/dist/metrics/index.d.ts +2 -3
  186. package/dist/metrics/index.d.ts.map +1 -1
  187. package/dist/metrics/index.js +0 -2
  188. package/dist/metrics/index.js.map +1 -1
  189. package/dist/metrics/usage_collector.cjs +17 -12
  190. package/dist/metrics/usage_collector.cjs.map +1 -1
  191. package/dist/metrics/usage_collector.d.cts +3 -2
  192. package/dist/metrics/usage_collector.d.ts +3 -2
  193. package/dist/metrics/usage_collector.d.ts.map +1 -1
  194. package/dist/metrics/usage_collector.js +17 -12
  195. package/dist/metrics/usage_collector.js.map +1 -1
  196. package/dist/metrics/utils.cjs +22 -59
  197. package/dist/metrics/utils.cjs.map +1 -1
  198. package/dist/metrics/utils.d.cts +1 -8
  199. package/dist/metrics/utils.d.ts +1 -8
  200. package/dist/metrics/utils.d.ts.map +1 -1
  201. package/dist/metrics/utils.js +22 -52
  202. package/dist/metrics/utils.js.map +1 -1
  203. package/dist/multimodal/index.cjs +0 -2
  204. package/dist/multimodal/index.cjs.map +1 -1
  205. package/dist/multimodal/index.d.cts +0 -1
  206. package/dist/multimodal/index.d.ts +0 -1
  207. package/dist/multimodal/index.d.ts.map +1 -1
  208. package/dist/multimodal/index.js +0 -1
  209. package/dist/multimodal/index.js.map +1 -1
  210. package/dist/plugin.cjs +24 -8
  211. package/dist/plugin.cjs.map +1 -1
  212. package/dist/plugin.d.cts +18 -4
  213. package/dist/plugin.d.ts +18 -4
  214. package/dist/plugin.d.ts.map +1 -1
  215. package/dist/plugin.js +22 -7
  216. package/dist/plugin.js.map +1 -1
  217. package/dist/stream/deferred_stream.cjs +98 -0
  218. package/dist/stream/deferred_stream.cjs.map +1 -0
  219. package/dist/stream/deferred_stream.d.cts +27 -0
  220. package/dist/stream/deferred_stream.d.ts +27 -0
  221. package/dist/stream/deferred_stream.d.ts.map +1 -0
  222. package/dist/stream/deferred_stream.js +73 -0
  223. package/dist/stream/deferred_stream.js.map +1 -0
  224. package/dist/stream/deferred_stream.test.cjs +527 -0
  225. package/dist/stream/deferred_stream.test.cjs.map +1 -0
  226. package/dist/stream/deferred_stream.test.js +526 -0
  227. package/dist/stream/deferred_stream.test.js.map +1 -0
  228. package/dist/stream/identity_transform.cjs +42 -0
  229. package/dist/stream/identity_transform.cjs.map +1 -0
  230. package/dist/stream/identity_transform.d.cts +6 -0
  231. package/dist/stream/identity_transform.d.ts +6 -0
  232. package/dist/stream/identity_transform.d.ts.map +1 -0
  233. package/dist/stream/identity_transform.js +18 -0
  234. package/dist/stream/identity_transform.js.map +1 -0
  235. package/dist/stream/identity_transform.test.cjs +125 -0
  236. package/dist/stream/identity_transform.test.cjs.map +1 -0
  237. package/dist/stream/identity_transform.test.js +124 -0
  238. package/dist/stream/identity_transform.test.js.map +1 -0
  239. package/dist/stream/index.cjs +38 -0
  240. package/dist/stream/index.cjs.map +1 -0
  241. package/dist/stream/index.d.cts +5 -0
  242. package/dist/stream/index.d.ts +5 -0
  243. package/dist/stream/index.d.ts.map +1 -0
  244. package/dist/stream/index.js +11 -0
  245. package/dist/stream/index.js.map +1 -0
  246. package/dist/stream/merge_readable_streams.cjs +59 -0
  247. package/dist/stream/merge_readable_streams.cjs.map +1 -0
  248. package/dist/stream/merge_readable_streams.d.cts +4 -0
  249. package/dist/stream/merge_readable_streams.d.ts +4 -0
  250. package/dist/stream/merge_readable_streams.d.ts.map +1 -0
  251. package/dist/stream/merge_readable_streams.js +35 -0
  252. package/dist/stream/merge_readable_streams.js.map +1 -0
  253. package/dist/stream/stream_channel.cjs +47 -0
  254. package/dist/stream/stream_channel.cjs.map +1 -0
  255. package/dist/stream/stream_channel.d.cts +9 -0
  256. package/dist/stream/stream_channel.d.ts +9 -0
  257. package/dist/stream/stream_channel.d.ts.map +1 -0
  258. package/dist/stream/stream_channel.js +23 -0
  259. package/dist/stream/stream_channel.js.map +1 -0
  260. package/dist/stream/stream_channel.test.cjs +97 -0
  261. package/dist/stream/stream_channel.test.cjs.map +1 -0
  262. package/dist/stream/stream_channel.test.js +96 -0
  263. package/dist/stream/stream_channel.test.js.map +1 -0
  264. package/dist/stt/stream_adapter.cjs +3 -4
  265. package/dist/stt/stream_adapter.cjs.map +1 -1
  266. package/dist/stt/stream_adapter.d.cts +1 -0
  267. package/dist/stt/stream_adapter.d.ts +1 -0
  268. package/dist/stt/stream_adapter.d.ts.map +1 -1
  269. package/dist/stt/stream_adapter.js +3 -4
  270. package/dist/stt/stream_adapter.js.map +1 -1
  271. package/dist/stt/stt.cjs +101 -10
  272. package/dist/stt/stt.cjs.map +1 -1
  273. package/dist/stt/stt.d.cts +26 -5
  274. package/dist/stt/stt.d.ts +26 -5
  275. package/dist/stt/stt.d.ts.map +1 -1
  276. package/dist/stt/stt.js +102 -11
  277. package/dist/stt/stt.js.map +1 -1
  278. package/dist/tokenize/basic/basic.cjs +10 -5
  279. package/dist/tokenize/basic/basic.cjs.map +1 -1
  280. package/dist/tokenize/basic/basic.d.cts +7 -1
  281. package/dist/tokenize/basic/basic.d.ts +7 -1
  282. package/dist/tokenize/basic/basic.d.ts.map +1 -1
  283. package/dist/tokenize/basic/basic.js +10 -5
  284. package/dist/tokenize/basic/basic.js.map +1 -1
  285. package/dist/tokenize/basic/sentence.cjs +14 -6
  286. package/dist/tokenize/basic/sentence.cjs.map +1 -1
  287. package/dist/tokenize/basic/sentence.d.cts +1 -1
  288. package/dist/tokenize/basic/sentence.d.ts +1 -1
  289. package/dist/tokenize/basic/sentence.d.ts.map +1 -1
  290. package/dist/tokenize/basic/sentence.js +14 -6
  291. package/dist/tokenize/basic/sentence.js.map +1 -1
  292. package/dist/tokenize/token_stream.cjs +5 -3
  293. package/dist/tokenize/token_stream.cjs.map +1 -1
  294. package/dist/tokenize/token_stream.d.cts +1 -0
  295. package/dist/tokenize/token_stream.d.ts +1 -0
  296. package/dist/tokenize/token_stream.d.ts.map +1 -1
  297. package/dist/tokenize/token_stream.js +6 -4
  298. package/dist/tokenize/token_stream.js.map +1 -1
  299. package/dist/transcription.cjs +1 -2
  300. package/dist/transcription.cjs.map +1 -1
  301. package/dist/transcription.d.ts.map +1 -1
  302. package/dist/transcription.js +2 -3
  303. package/dist/transcription.js.map +1 -1
  304. package/dist/tts/index.cjs +2 -4
  305. package/dist/tts/index.cjs.map +1 -1
  306. package/dist/tts/index.d.cts +1 -1
  307. package/dist/tts/index.d.ts +1 -1
  308. package/dist/tts/index.d.ts.map +1 -1
  309. package/dist/tts/index.js +1 -3
  310. package/dist/tts/index.js.map +1 -1
  311. package/dist/tts/stream_adapter.cjs +26 -13
  312. package/dist/tts/stream_adapter.cjs.map +1 -1
  313. package/dist/tts/stream_adapter.d.cts +1 -1
  314. package/dist/tts/stream_adapter.d.ts +1 -1
  315. package/dist/tts/stream_adapter.d.ts.map +1 -1
  316. package/dist/tts/stream_adapter.js +27 -14
  317. package/dist/tts/stream_adapter.js.map +1 -1
  318. package/dist/tts/tts.cjs +157 -25
  319. package/dist/tts/tts.cjs.map +1 -1
  320. package/dist/tts/tts.d.cts +29 -5
  321. package/dist/tts/tts.d.ts +29 -5
  322. package/dist/tts/tts.d.ts.map +1 -1
  323. package/dist/tts/tts.js +157 -24
  324. package/dist/tts/tts.js.map +1 -1
  325. package/dist/types.cjs +60 -0
  326. package/dist/types.cjs.map +1 -0
  327. package/dist/types.d.cts +13 -0
  328. package/dist/types.d.ts +13 -0
  329. package/dist/types.d.ts.map +1 -0
  330. package/dist/types.js +35 -0
  331. package/dist/types.js.map +1 -0
  332. package/dist/utils.cjs +281 -27
  333. package/dist/utils.cjs.map +1 -1
  334. package/dist/utils.d.cts +134 -9
  335. package/dist/utils.d.ts +134 -9
  336. package/dist/utils.d.ts.map +1 -1
  337. package/dist/utils.js +265 -26
  338. package/dist/utils.js.map +1 -1
  339. package/dist/utils.test.cjs +492 -0
  340. package/dist/utils.test.cjs.map +1 -0
  341. package/dist/utils.test.js +498 -0
  342. package/dist/utils.test.js.map +1 -0
  343. package/dist/vad.cjs +76 -20
  344. package/dist/vad.cjs.map +1 -1
  345. package/dist/vad.d.cts +25 -5
  346. package/dist/vad.d.ts +25 -5
  347. package/dist/vad.d.ts.map +1 -1
  348. package/dist/vad.js +76 -20
  349. package/dist/vad.js.map +1 -1
  350. package/dist/voice/agent.cjs +245 -0
  351. package/dist/voice/agent.cjs.map +1 -0
  352. package/dist/voice/agent.d.cts +78 -0
  353. package/dist/voice/agent.d.ts +78 -0
  354. package/dist/voice/agent.d.ts.map +1 -0
  355. package/dist/voice/agent.js +220 -0
  356. package/dist/voice/agent.js.map +1 -0
  357. package/dist/voice/agent.test.cjs +61 -0
  358. package/dist/voice/agent.test.cjs.map +1 -0
  359. package/dist/voice/agent.test.js +60 -0
  360. package/dist/voice/agent.test.js.map +1 -0
  361. package/dist/voice/agent_activity.cjs +1453 -0
  362. package/dist/voice/agent_activity.cjs.map +1 -0
  363. package/dist/voice/agent_activity.d.cts +94 -0
  364. package/dist/voice/agent_activity.d.ts +94 -0
  365. package/dist/voice/agent_activity.d.ts.map +1 -0
  366. package/dist/voice/agent_activity.js +1449 -0
  367. package/dist/voice/agent_activity.js.map +1 -0
  368. package/dist/voice/agent_session.cjs +312 -0
  369. package/dist/voice/agent_session.cjs.map +1 -0
  370. package/dist/voice/agent_session.d.cts +121 -0
  371. package/dist/voice/agent_session.d.ts +121 -0
  372. package/dist/voice/agent_session.d.ts.map +1 -0
  373. package/dist/voice/agent_session.js +295 -0
  374. package/dist/voice/agent_session.js.map +1 -0
  375. package/dist/voice/audio_recognition.cjs +375 -0
  376. package/dist/voice/audio_recognition.cjs.map +1 -0
  377. package/dist/voice/audio_recognition.d.cts +80 -0
  378. package/dist/voice/audio_recognition.d.ts +80 -0
  379. package/dist/voice/audio_recognition.d.ts.map +1 -0
  380. package/dist/voice/audio_recognition.js +351 -0
  381. package/dist/voice/audio_recognition.js.map +1 -0
  382. package/dist/voice/events.cjs +145 -0
  383. package/dist/voice/events.cjs.map +1 -0
  384. package/dist/voice/events.d.cts +124 -0
  385. package/dist/voice/events.d.ts +124 -0
  386. package/dist/voice/events.d.ts.map +1 -0
  387. package/dist/voice/events.js +110 -0
  388. package/dist/voice/events.js.map +1 -0
  389. package/dist/voice/generation.cjs +700 -0
  390. package/dist/voice/generation.cjs.map +1 -0
  391. package/dist/voice/generation.d.cts +115 -0
  392. package/dist/voice/generation.d.ts +115 -0
  393. package/dist/voice/generation.d.ts.map +1 -0
  394. package/dist/voice/generation.js +672 -0
  395. package/dist/voice/generation.js.map +1 -0
  396. package/dist/voice/index.cjs +40 -0
  397. package/dist/voice/index.cjs.map +1 -0
  398. package/dist/voice/index.d.cts +5 -0
  399. package/dist/voice/index.d.ts +5 -0
  400. package/dist/voice/index.d.ts.map +1 -0
  401. package/dist/voice/index.js +11 -0
  402. package/dist/voice/index.js.map +1 -0
  403. package/dist/voice/io.cjs +245 -0
  404. package/dist/voice/io.cjs.map +1 -0
  405. package/dist/voice/io.d.cts +101 -0
  406. package/dist/voice/io.d.ts +101 -0
  407. package/dist/voice/io.d.ts.map +1 -0
  408. package/dist/voice/io.js +217 -0
  409. package/dist/voice/io.js.map +1 -0
  410. package/dist/voice/room_io/_input.cjs +121 -0
  411. package/dist/voice/room_io/_input.cjs.map +1 -0
  412. package/dist/voice/room_io/_input.d.cts +24 -0
  413. package/dist/voice/room_io/_input.d.ts +24 -0
  414. package/dist/voice/room_io/_input.d.ts.map +1 -0
  415. package/dist/voice/room_io/_input.js +102 -0
  416. package/dist/voice/room_io/_input.js.map +1 -0
  417. package/dist/voice/room_io/_output.cjs +358 -0
  418. package/dist/voice/room_io/_output.cjs.map +1 -0
  419. package/dist/voice/room_io/_output.d.cts +75 -0
  420. package/dist/voice/room_io/_output.d.ts +75 -0
  421. package/dist/voice/room_io/_output.d.ts.map +1 -0
  422. package/dist/voice/room_io/_output.js +342 -0
  423. package/dist/voice/room_io/_output.js.map +1 -0
  424. package/dist/voice/room_io/index.cjs +25 -0
  425. package/dist/voice/room_io/index.cjs.map +1 -0
  426. package/dist/voice/room_io/index.d.cts +3 -0
  427. package/dist/voice/room_io/index.d.ts +3 -0
  428. package/dist/voice/room_io/index.d.ts.map +1 -0
  429. package/dist/voice/room_io/index.js +3 -0
  430. package/dist/voice/room_io/index.js.map +1 -0
  431. package/dist/voice/room_io/room_io.cjs +370 -0
  432. package/dist/voice/room_io/room_io.cjs.map +1 -0
  433. package/dist/voice/room_io/room_io.d.cts +73 -0
  434. package/dist/voice/room_io/room_io.d.ts +73 -0
  435. package/dist/voice/room_io/room_io.d.ts.map +1 -0
  436. package/dist/voice/room_io/room_io.js +361 -0
  437. package/dist/voice/room_io/room_io.js.map +1 -0
  438. package/dist/{pipeline/index.cjs → voice/run_context.cjs} +16 -11
  439. package/dist/voice/run_context.cjs.map +1 -0
  440. package/dist/voice/run_context.d.cts +12 -0
  441. package/dist/voice/run_context.d.ts +12 -0
  442. package/dist/voice/run_context.d.ts.map +1 -0
  443. package/dist/voice/run_context.js +14 -0
  444. package/dist/voice/run_context.js.map +1 -0
  445. package/dist/voice/speech_handle.cjs +105 -0
  446. package/dist/voice/speech_handle.cjs.map +1 -0
  447. package/dist/voice/speech_handle.d.cts +46 -0
  448. package/dist/voice/speech_handle.d.ts +46 -0
  449. package/dist/voice/speech_handle.d.ts.map +1 -0
  450. package/dist/voice/speech_handle.js +81 -0
  451. package/dist/voice/speech_handle.js.map +1 -0
  452. package/dist/voice/transcription/_utils.cjs +45 -0
  453. package/dist/voice/transcription/_utils.cjs.map +1 -0
  454. package/dist/voice/transcription/_utils.d.cts +3 -0
  455. package/dist/voice/transcription/_utils.d.ts +3 -0
  456. package/dist/voice/transcription/_utils.d.ts.map +1 -0
  457. package/dist/voice/transcription/_utils.js +21 -0
  458. package/dist/voice/transcription/_utils.js.map +1 -0
  459. package/dist/voice/transcription/index.cjs +23 -0
  460. package/dist/voice/transcription/index.cjs.map +1 -0
  461. package/dist/voice/transcription/index.d.cts +2 -0
  462. package/dist/voice/transcription/index.d.ts +2 -0
  463. package/dist/voice/transcription/index.d.ts.map +1 -0
  464. package/dist/voice/transcription/index.js +2 -0
  465. package/dist/voice/transcription/index.js.map +1 -0
  466. package/dist/voice/transcription/synchronizer.cjs +380 -0
  467. package/dist/voice/transcription/synchronizer.cjs.map +1 -0
  468. package/dist/voice/transcription/synchronizer.d.cts +86 -0
  469. package/dist/voice/transcription/synchronizer.d.ts +86 -0
  470. package/dist/voice/transcription/synchronizer.d.ts.map +1 -0
  471. package/dist/voice/transcription/synchronizer.js +355 -0
  472. package/dist/voice/transcription/synchronizer.js.map +1 -0
  473. package/dist/worker.cjs +22 -4
  474. package/dist/worker.cjs.map +1 -1
  475. package/dist/worker.d.cts +1 -1
  476. package/dist/worker.d.ts +1 -1
  477. package/dist/worker.d.ts.map +1 -1
  478. package/dist/worker.js +22 -4
  479. package/dist/worker.js.map +1 -1
  480. package/package.json +9 -2
  481. package/src/_exceptions.ts +137 -0
  482. package/src/audio.ts +12 -1
  483. package/src/cli.ts +37 -0
  484. package/src/constants.ts +2 -1
  485. package/src/http_server.ts +1 -0
  486. package/src/index.ts +13 -10
  487. package/src/inference_runner.ts +2 -3
  488. package/src/ipc/inference_proc_executor.ts +2 -2
  489. package/src/ipc/job_executor.ts +1 -1
  490. package/src/ipc/job_proc_executor.ts +1 -1
  491. package/src/ipc/job_proc_lazy_main.ts +1 -1
  492. package/src/job.ts +18 -0
  493. package/src/llm/__snapshots__/chat_context.test.ts.snap +527 -0
  494. package/src/llm/__snapshots__/tool_context.test.ts.snap +177 -0
  495. package/src/llm/__snapshots__/utils.test.ts.snap +65 -0
  496. package/src/llm/chat_context.test.ts +450 -0
  497. package/src/llm/chat_context.ts +501 -103
  498. package/src/llm/index.ts +53 -18
  499. package/src/llm/llm.ts +149 -50
  500. package/src/llm/provider_format/google.test.ts +772 -0
  501. package/src/llm/provider_format/google.ts +130 -0
  502. package/src/llm/provider_format/index.ts +23 -0
  503. package/src/llm/provider_format/openai.test.ts +581 -0
  504. package/src/llm/provider_format/openai.ts +118 -0
  505. package/src/llm/provider_format/utils.ts +183 -0
  506. package/src/llm/realtime.ts +151 -0
  507. package/src/llm/remote_chat_context.test.ts +290 -0
  508. package/src/llm/remote_chat_context.ts +114 -0
  509. package/src/llm/tool_context.test.ts +198 -0
  510. package/src/llm/tool_context.ts +259 -0
  511. package/src/llm/tool_context.type.test.ts +115 -0
  512. package/src/llm/utils.test.ts +670 -0
  513. package/src/llm/utils.ts +324 -0
  514. package/src/metrics/base.ts +110 -78
  515. package/src/metrics/index.ts +3 -9
  516. package/src/metrics/usage_collector.ts +19 -13
  517. package/src/metrics/utils.ts +24 -69
  518. package/src/multimodal/index.ts +0 -1
  519. package/src/plugin.ts +26 -8
  520. package/src/stream/deferred_stream.test.ts +755 -0
  521. package/src/stream/deferred_stream.ts +110 -0
  522. package/src/stream/identity_transform.test.ts +179 -0
  523. package/src/stream/identity_transform.ts +18 -0
  524. package/src/stream/index.ts +7 -0
  525. package/src/stream/merge_readable_streams.ts +40 -0
  526. package/src/stream/stream_channel.test.ts +129 -0
  527. package/src/stream/stream_channel.ts +32 -0
  528. package/src/stt/stream_adapter.ts +3 -5
  529. package/src/stt/stt.ts +135 -17
  530. package/src/tokenize/basic/basic.ts +13 -5
  531. package/src/tokenize/basic/sentence.ts +20 -6
  532. package/src/tokenize/token_stream.ts +7 -4
  533. package/src/transcription.ts +2 -3
  534. package/src/tts/index.ts +0 -1
  535. package/src/tts/stream_adapter.ts +42 -16
  536. package/src/tts/tts.ts +203 -21
  537. package/src/types.ts +42 -0
  538. package/src/utils.test.ts +658 -0
  539. package/src/utils.ts +375 -44
  540. package/src/vad.ts +90 -22
  541. package/src/voice/agent.test.ts +80 -0
  542. package/src/voice/agent.ts +332 -0
  543. package/src/voice/agent_activity.ts +1913 -0
  544. package/src/voice/agent_session.ts +460 -0
  545. package/src/voice/audio_recognition.ts +474 -0
  546. package/src/voice/events.ts +252 -0
  547. package/src/voice/generation.ts +881 -0
  548. package/src/voice/index.ts +7 -0
  549. package/src/voice/io.ts +304 -0
  550. package/src/voice/room_io/_input.ts +144 -0
  551. package/src/voice/room_io/_output.ts +436 -0
  552. package/src/voice/room_io/index.ts +5 -0
  553. package/src/voice/room_io/room_io.ts +495 -0
  554. package/src/voice/run_context.ts +20 -0
  555. package/src/voice/speech_handle.ts +104 -0
  556. package/src/voice/transcription/_utils.ts +25 -0
  557. package/src/voice/transcription/index.ts +4 -0
  558. package/src/voice/transcription/synchronizer.ts +478 -0
  559. package/src/worker.ts +22 -2
  560. package/dist/llm/function_context.cjs +0 -103
  561. package/dist/llm/function_context.cjs.map +0 -1
  562. package/dist/llm/function_context.d.cts +0 -47
  563. package/dist/llm/function_context.d.ts +0 -47
  564. package/dist/llm/function_context.d.ts.map +0 -1
  565. package/dist/llm/function_context.js +0 -78
  566. package/dist/llm/function_context.js.map +0 -1
  567. package/dist/llm/function_context.test.cjs +0 -218
  568. package/dist/llm/function_context.test.cjs.map +0 -1
  569. package/dist/llm/function_context.test.js +0 -217
  570. package/dist/llm/function_context.test.js.map +0 -1
  571. package/dist/multimodal/multimodal_agent.cjs +0 -486
  572. package/dist/multimodal/multimodal_agent.cjs.map +0 -1
  573. package/dist/multimodal/multimodal_agent.d.cts +0 -48
  574. package/dist/multimodal/multimodal_agent.d.ts +0 -48
  575. package/dist/multimodal/multimodal_agent.d.ts.map +0 -1
  576. package/dist/multimodal/multimodal_agent.js +0 -461
  577. package/dist/multimodal/multimodal_agent.js.map +0 -1
  578. package/dist/pipeline/agent_output.cjs +0 -197
  579. package/dist/pipeline/agent_output.cjs.map +0 -1
  580. package/dist/pipeline/agent_output.d.cts +0 -33
  581. package/dist/pipeline/agent_output.d.ts +0 -33
  582. package/dist/pipeline/agent_output.d.ts.map +0 -1
  583. package/dist/pipeline/agent_output.js +0 -172
  584. package/dist/pipeline/agent_output.js.map +0 -1
  585. package/dist/pipeline/agent_playout.cjs +0 -175
  586. package/dist/pipeline/agent_playout.cjs.map +0 -1
  587. package/dist/pipeline/agent_playout.d.cts +0 -40
  588. package/dist/pipeline/agent_playout.d.ts +0 -40
  589. package/dist/pipeline/agent_playout.d.ts.map +0 -1
  590. package/dist/pipeline/agent_playout.js +0 -139
  591. package/dist/pipeline/agent_playout.js.map +0 -1
  592. package/dist/pipeline/human_input.cjs +0 -171
  593. package/dist/pipeline/human_input.cjs.map +0 -1
  594. package/dist/pipeline/human_input.d.cts +0 -30
  595. package/dist/pipeline/human_input.d.ts +0 -30
  596. package/dist/pipeline/human_input.d.ts.map +0 -1
  597. package/dist/pipeline/human_input.js +0 -146
  598. package/dist/pipeline/human_input.js.map +0 -1
  599. package/dist/pipeline/index.cjs.map +0 -1
  600. package/dist/pipeline/index.d.cts +0 -2
  601. package/dist/pipeline/index.d.ts +0 -2
  602. package/dist/pipeline/index.d.ts.map +0 -1
  603. package/dist/pipeline/index.js +0 -11
  604. package/dist/pipeline/index.js.map +0 -1
  605. package/dist/pipeline/pipeline_agent.cjs +0 -859
  606. package/dist/pipeline/pipeline_agent.cjs.map +0 -1
  607. package/dist/pipeline/pipeline_agent.d.cts +0 -150
  608. package/dist/pipeline/pipeline_agent.d.ts +0 -150
  609. package/dist/pipeline/pipeline_agent.d.ts.map +0 -1
  610. package/dist/pipeline/pipeline_agent.js +0 -837
  611. package/dist/pipeline/pipeline_agent.js.map +0 -1
  612. package/dist/pipeline/speech_handle.cjs +0 -176
  613. package/dist/pipeline/speech_handle.cjs.map +0 -1
  614. package/dist/pipeline/speech_handle.d.cts +0 -37
  615. package/dist/pipeline/speech_handle.d.ts +0 -37
  616. package/dist/pipeline/speech_handle.d.ts.map +0 -1
  617. package/dist/pipeline/speech_handle.js +0 -152
  618. package/dist/pipeline/speech_handle.js.map +0 -1
  619. package/src/llm/function_context.test.ts +0 -248
  620. package/src/llm/function_context.ts +0 -142
  621. package/src/multimodal/multimodal_agent.ts +0 -592
  622. package/src/pipeline/agent_output.ts +0 -219
  623. package/src/pipeline/agent_playout.ts +0 -192
  624. package/src/pipeline/human_input.ts +0 -188
  625. package/src/pipeline/index.ts +0 -15
  626. package/src/pipeline/pipeline_agent.ts +0 -1197
  627. package/src/pipeline/speech_handle.ts +0 -201
package/src/stt/stt.ts CHANGED
@@ -1,12 +1,19 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import type { AudioFrame } from '@livekit/rtc-node';
4
+ import { type AudioFrame, AudioResampler } from '@livekit/rtc-node';
5
5
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
6
+ import { delay } from '@std/async/delay';
6
7
  import { EventEmitter } from 'node:events';
8
+ import type { ReadableStream } from 'node:stream/web';
9
+ import { APIConnectionError, APIError } from '../_exceptions.js';
10
+ import { calculateAudioDuration } from '../audio.js';
11
+ import { log } from '../log.js';
7
12
  import type { STTMetrics } from '../metrics/base.js';
13
+ import { DeferredReadableStream } from '../stream/deferred_stream.js';
14
+ import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
8
15
  import type { AudioBuffer } from '../utils.js';
9
- import { AsyncIterableQueue } from '../utils.js';
16
+ import { AsyncIterableQueue, startSoon, toError } from '../utils.js';
10
17
 
11
18
  /** Indicates start/middle/end of speech */
12
19
  export enum SpeechEventType {
@@ -32,7 +39,6 @@ export enum SpeechEventType {
32
39
  END_OF_SPEECH = 3,
33
40
  /** Usage event, emitted periodically to indicate usage metrics. */
34
41
  RECOGNITION_USAGE = 4,
35
- METRICS_COLLECTED = 5,
36
42
  }
37
43
 
38
44
  /** SpeechData contains metadata about this {@link SpeechEvent}. */
@@ -67,8 +73,17 @@ export interface STTCapabilities {
67
73
  interimResults: boolean;
68
74
  }
69
75
 
76
+ export interface STTError {
77
+ type: 'stt_error';
78
+ timestamp: number;
79
+ label: string;
80
+ error: Error;
81
+ recoverable: boolean;
82
+ }
83
+
70
84
  export type STTCallbacks = {
71
- [SpeechEventType.METRICS_COLLECTED]: (metrics: STTMetrics) => void;
85
+ ['metrics_collected']: (metrics: STTMetrics) => void;
86
+ ['error']: (error: STTError) => void;
72
87
  };
73
88
 
74
89
  /**
@@ -97,19 +112,17 @@ export abstract class STT extends (EventEmitter as new () => TypedEmitter<STTCal
97
112
  const startTime = process.hrtime.bigint();
98
113
  const event = await this._recognize(frame);
99
114
  const duration = Number((process.hrtime.bigint() - startTime) / BigInt(1000000));
100
- this.emit(SpeechEventType.METRICS_COLLECTED, {
115
+ this.emit('metrics_collected', {
116
+ type: 'stt_metrics',
101
117
  requestId: event.requestId ?? '',
102
118
  timestamp: Date.now(),
103
119
  duration,
104
120
  label: this.label,
105
- audioDuration: Array.isArray(frame)
106
- ? frame.reduce((sum, a) => sum + a.samplesPerChannel / a.sampleRate, 0)
107
- : frame.samplesPerChannel / frame.sampleRate,
121
+ audioDuration: calculateAudioDuration(frame),
108
122
  streamed: false,
109
123
  });
110
124
  return event;
111
125
  }
112
-
113
126
  protected abstract _recognize(frame: AudioBuffer): Promise<SpeechEvent>;
114
127
 
115
128
  /**
@@ -140,35 +153,126 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
140
153
  protected input = new AsyncIterableQueue<AudioFrame | typeof SpeechStream.FLUSH_SENTINEL>();
141
154
  protected output = new AsyncIterableQueue<SpeechEvent>();
142
155
  protected queue = new AsyncIterableQueue<SpeechEvent>();
156
+ protected neededSampleRate?: number;
157
+ protected resampler?: AudioResampler;
143
158
  abstract label: string;
144
159
  protected closed = false;
145
160
  #stt: STT;
161
+ private deferredInputStream: DeferredReadableStream<AudioFrame>;
162
+ private logger = log();
163
+ private _connOptions: APIConnectOptions;
146
164
 
147
- constructor(stt: STT) {
165
+ constructor(
166
+ stt: STT,
167
+ sampleRate?: number,
168
+ connectionOptions: APIConnectOptions = DEFAULT_API_CONNECT_OPTIONS,
169
+ ) {
148
170
  this.#stt = stt;
171
+ this._connOptions = connectionOptions;
172
+ this.deferredInputStream = new DeferredReadableStream<AudioFrame>();
173
+ this.neededSampleRate = sampleRate;
149
174
  this.monitorMetrics();
175
+ this.pumpInput();
176
+
177
+ // this is a hack to immitate asyncio.create_task so that mainTask
178
+ // is run **after** the constructor has finished. Otherwise we get
179
+ // runtime error when trying to access class variables in the
180
+ // `run` method.
181
+ startSoon(() => this.mainTask().then(() => this.queue.close()));
150
182
  }
151
183
 
152
- protected async monitorMetrics() {
153
- const startTime = process.hrtime.bigint();
184
+ private async mainTask() {
185
+ for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
186
+ try {
187
+ return await this.run();
188
+ } catch (error) {
189
+ if (error instanceof APIError) {
190
+ const retryInterval = this._connOptions._intervalForRetry(i);
191
+
192
+ if (this._connOptions.maxRetry === 0 || !error.retryable) {
193
+ this.emitError({ error, recoverable: false });
194
+ throw error;
195
+ } else if (i === this._connOptions.maxRetry) {
196
+ this.emitError({ error, recoverable: false });
197
+ throw new APIConnectionError({
198
+ message: `failed to recognize speech after ${this._connOptions.maxRetry + 1} attempts`,
199
+ options: { retryable: false },
200
+ });
201
+ } else {
202
+ this.emitError({ error, recoverable: true });
203
+ this.logger.warn(
204
+ { tts: this.#stt.label, attempt: i + 1, error },
205
+ `failed to recognize speech, retrying in ${retryInterval}s`,
206
+ );
207
+ }
208
+
209
+ if (retryInterval > 0) {
210
+ await delay(retryInterval);
211
+ }
212
+ } else {
213
+ this.emitError({ error: toError(error), recoverable: false });
214
+ throw error;
215
+ }
216
+ }
217
+ }
218
+ }
219
+
220
+ private emitError({ error, recoverable }: { error: Error; recoverable: boolean }) {
221
+ this.#stt.emit('error', {
222
+ type: 'stt_error',
223
+ timestamp: Date.now(),
224
+ label: this.#stt.label,
225
+ error,
226
+ recoverable,
227
+ });
228
+ }
154
229
 
230
+ protected async pumpInput() {
231
+ // TODO(AJS-35): Implement STT with webstreams API
232
+ const inputStream = this.deferredInputStream.stream;
233
+ const reader = inputStream.getReader();
234
+
235
+ try {
236
+ while (true) {
237
+ const { done, value } = await reader.read();
238
+ if (done) break;
239
+ this.pushFrame(value);
240
+ }
241
+ } catch (error) {
242
+ this.logger.error('Error in STTStream mainTask:', error);
243
+ } finally {
244
+ reader.releaseLock();
245
+ }
246
+ }
247
+
248
+ protected async monitorMetrics() {
155
249
  for await (const event of this.queue) {
156
250
  this.output.put(event);
157
251
  if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;
158
- const duration = process.hrtime.bigint() - startTime;
159
252
  const metrics: STTMetrics = {
253
+ type: 'stt_metrics',
160
254
  timestamp: Date.now(),
161
255
  requestId: event.requestId!,
162
- duration: Math.trunc(Number(duration / BigInt(1000000))),
163
- label: this.label,
256
+ duration: 0,
257
+ label: this.#stt.label,
164
258
  audioDuration: event.recognitionUsage!.audioDuration,
165
259
  streamed: true,
166
260
  };
167
- this.#stt.emit(SpeechEventType.METRICS_COLLECTED, metrics);
261
+ this.#stt.emit('metrics_collected', metrics);
168
262
  }
169
263
  this.output.close();
170
264
  }
171
265
 
266
+ protected abstract run(): Promise<void>;
267
+
268
+ updateInputStream(audioStream: ReadableStream<AudioFrame>) {
269
+ this.deferredInputStream.setSource(audioStream);
270
+ }
271
+
272
+ detachInputStream() {
273
+ this.deferredInputStream.detachSource();
274
+ }
275
+
172
276
  /** Push an audio frame to the STT */
173
277
  pushFrame(frame: AudioFrame) {
174
278
  if (this.input.closed) {
@@ -177,7 +281,21 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
177
281
  if (this.closed) {
178
282
  throw new Error('Stream is closed');
179
283
  }
180
- this.input.put(frame);
284
+
285
+ if (this.neededSampleRate && frame.sampleRate !== this.neededSampleRate) {
286
+ if (!this.resampler) {
287
+ this.resampler = new AudioResampler(frame.sampleRate, this.neededSampleRate);
288
+ }
289
+ }
290
+
291
+ if (this.resampler) {
292
+ const frames = this.resampler.push(frame);
293
+ for (const frame of frames) {
294
+ this.input.put(frame);
295
+ }
296
+ } else {
297
+ this.input.put(frame);
298
+ }
181
299
  }
182
300
 
183
301
  /** Flush the STT, causing it to process all pending text */
@@ -12,17 +12,24 @@ interface TokenizerOptions {
12
12
  language: string;
13
13
  minSentenceLength: number;
14
14
  streamContextLength: number;
15
+ retainFormat: boolean;
15
16
  }
16
17
 
18
+ const defaultTokenizerOptions: TokenizerOptions = {
19
+ language: 'en-US',
20
+ minSentenceLength: 20,
21
+ streamContextLength: 10,
22
+ retainFormat: false,
23
+ };
24
+
17
25
  export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
18
26
  #config: TokenizerOptions;
19
27
 
20
- constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) {
28
+ constructor(options?: Partial<TokenizerOptions>) {
21
29
  super();
22
30
  this.#config = {
23
- language,
24
- minSentenceLength,
25
- streamContextLength,
31
+ ...defaultTokenizerOptions,
32
+ ...options,
26
33
  };
27
34
  }
28
35
 
@@ -34,7 +41,8 @@ export class SentenceTokenizer extends tokenizer.SentenceTokenizer {
34
41
  // eslint-disable-next-line @typescript-eslint/no-unused-vars
35
42
  stream(language?: string): tokenizer.SentenceStream {
36
43
  return new BufferedSentenceStream(
37
- (text: string) => splitSentences(text, this.#config.minSentenceLength),
44
+ (text: string) =>
45
+ splitSentences(text, this.#config.minSentenceLength, this.#config.retainFormat),
38
46
  this.#config.minSentenceLength,
39
47
  this.#config.streamContextLength,
40
48
  );
@@ -5,7 +5,11 @@
5
5
  /**
6
6
  * Split the text into sentences.
7
7
  */
8
- export const splitSentences = (text: string, minLength = 20): [string, number, number][] => {
8
+ export const splitSentences = (
9
+ text: string,
10
+ minLength = 20,
11
+ retainFormat: boolean = false,
12
+ ): [string, number, number][] => {
9
13
  const alphabets = /([A-Za-z])/g;
10
14
  const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g;
11
15
  const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g;
@@ -16,7 +20,12 @@ export const splitSentences = (text: string, minLength = 20): [string, number, n
16
20
  const digits = /([0-9])/g;
17
21
  const dots = /\.{2,}/g;
18
22
 
19
- text = text.replaceAll('\n', ' ');
23
+ if (retainFormat) {
24
+ text = text.replaceAll('\n', '<nel><stop>');
25
+ } else {
26
+ text = text.replaceAll('\n', ' ');
27
+ }
28
+
20
29
  text = text.replaceAll(prefixes, '$1<prd>');
21
30
  text = text.replaceAll(websites, '<prd>$2');
22
31
  text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1<prd>$2');
@@ -47,6 +56,10 @@ export const splitSentences = (text: string, minLength = 20): [string, number, n
47
56
  text = text.replaceAll('!', '!<stop>');
48
57
  text = text.replaceAll('<prd>', '.');
49
58
 
59
+ if (retainFormat) {
60
+ text = text.replaceAll('<nel>', '\n');
61
+ }
62
+
50
63
  const split = text.split('<stop>');
51
64
  text = text.replaceAll('<stop>', '');
52
65
 
@@ -54,21 +67,22 @@ export const splitSentences = (text: string, minLength = 20): [string, number, n
54
67
  let buf = '';
55
68
  let start = 0;
56
69
  let end = 0;
70
+ const prePad = retainFormat ? '' : ' ';
57
71
  for (const match of split) {
58
- const sentence = match.trim();
72
+ const sentence = retainFormat ? match : match.trim();
59
73
  if (!sentence) continue;
60
74
 
61
- buf += ' ' + sentence;
75
+ buf += prePad + sentence;
62
76
  end += match.length;
63
77
  if (buf.length > minLength) {
64
- sentences.push([buf.slice(1), start, end]);
78
+ sentences.push([buf.slice(prePad.length), start, end]);
65
79
  start = end;
66
80
  buf = '';
67
81
  }
68
82
  }
69
83
 
70
84
  if (buf) {
71
- sentences.push([buf.slice(1), start, text.length - 1]);
85
+ sentences.push([buf.slice(prePad.length), start, text.length - 1]);
72
86
  }
73
87
 
74
88
  return sentences;
@@ -1,8 +1,7 @@
1
1
  // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { randomUUID } from 'node:crypto';
5
- import { AsyncIterableQueue } from '../utils.js';
4
+ import { AsyncIterableQueue, shortuuid } from '../utils.js';
6
5
  import type { TokenData } from './tokenizer.js';
7
6
  import { SentenceStream, WordStream } from './tokenizer.js';
8
7
 
@@ -25,7 +24,7 @@ export class BufferedTokenStream implements AsyncIterableIterator<TokenData> {
25
24
  this.#minTokenLength = minTokenLength;
26
25
  this.#minContextLength = minContextLength;
27
26
 
28
- this.#currentSegmentId = randomUUID();
27
+ this.#currentSegmentId = shortuuid();
29
28
  }
30
29
 
31
30
  /** Push a string of text into the token stream */
@@ -90,7 +89,7 @@ export class BufferedTokenStream implements AsyncIterableIterator<TokenData> {
90
89
  this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId });
91
90
  }
92
91
 
93
- this.#currentSegmentId = randomUUID();
92
+ this.#currentSegmentId = shortuuid();
94
93
  }
95
94
 
96
95
  this.#inBuf = '';
@@ -142,6 +141,10 @@ export class BufferedSentenceStream extends SentenceStream {
142
141
  this.#stream.close();
143
142
  }
144
143
 
144
+ endInput() {
145
+ this.#stream.endInput();
146
+ }
147
+
145
148
  next(): Promise<IteratorResult<TokenData>> {
146
149
  return this.#stream.next();
147
150
  }
@@ -4,11 +4,10 @@
4
4
  import { TranscriptionSegment } from '@livekit/protocol';
5
5
  import { AudioFrame } from '@livekit/rtc-node';
6
6
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
7
- import { randomUUID } from 'node:crypto';
8
7
  import { EventEmitter } from 'node:events';
9
8
  import { basic } from './tokenize/index.js';
10
9
  import type { SentenceStream, SentenceTokenizer } from './tokenize/tokenizer.js';
11
- import { AsyncIterableQueue, Future } from './utils.js';
10
+ import { AsyncIterableQueue, Future, shortuuid } from './utils.js';
12
11
 
13
12
  // standard speech rate in hyphens/ms
14
13
  const STANDARD_SPEECH_RATE = 3830;
@@ -215,7 +214,7 @@ export class TextAudioSynchronizer extends (EventEmitter as new () => TypedEmitt
215
214
  realSpeed = this.#calcHyphens(textData.pushedText).length / audioData.pushedDuration;
216
215
  }
217
216
 
218
- const segId = 'SG_' + randomUUID();
217
+ const segId = shortuuid('SG_');
219
218
  const words = this.#opts.splitWords(sentence);
220
219
  const processedWords: string[] = [];
221
220
 
package/src/tts/index.ts CHANGED
@@ -6,7 +6,6 @@ export {
6
6
  type TTSCapabilities,
7
7
  type TTSCallbacks,
8
8
  TTS,
9
- TTSEvent,
10
9
  SynthesizeStream,
11
10
  ChunkedStream,
12
11
  } from './tts.js';
@@ -2,8 +2,9 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js';
5
+ import { Task } from '../utils.js';
5
6
  import type { ChunkedStream } from './tts.js';
6
- import { SynthesizeStream, TTS, TTSEvent } from './tts.js';
7
+ import { SynthesizeStream, TTS } from './tts.js';
7
8
 
8
9
  export class StreamAdapter extends TTS {
9
10
  #tts: TTS;
@@ -17,8 +18,8 @@ export class StreamAdapter extends TTS {
17
18
  this.label = this.#tts.label;
18
19
  this.label = `tts.StreamAdapter<${this.#tts.label}>`;
19
20
 
20
- this.#tts.on(TTSEvent.METRICS_COLLECTED, (metrics) => {
21
- this.emit(TTSEvent.METRICS_COLLECTED, metrics);
21
+ this.#tts.on('metrics_collected', (metrics) => {
22
+ this.emit('metrics_collected', metrics);
22
23
  });
23
24
  }
24
25
 
@@ -41,17 +42,13 @@ export class StreamAdapterWrapper extends SynthesizeStream {
41
42
  this.#tts = tts;
42
43
  this.#sentenceStream = sentenceTokenizer.stream();
43
44
  this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`;
44
-
45
- this.#run();
46
- }
47
-
48
- async monitorMetrics() {
49
- return; // do nothing
50
45
  }
51
46
 
52
- async #run() {
47
+ protected async run() {
53
48
  const forwardInput = async () => {
54
49
  for await (const input of this.input) {
50
+ if (this.abortController.signal.aborted) break;
51
+
55
52
  if (input === SynthesizeStream.FLUSH_SENTINEL) {
56
53
  this.#sentenceStream.flush();
57
54
  } else {
@@ -62,15 +59,44 @@ export class StreamAdapterWrapper extends SynthesizeStream {
62
59
  this.#sentenceStream.close();
63
60
  };
64
61
 
65
- const synthesize = async () => {
62
+ const synthesizeSentenceStream = async () => {
63
+ let task: Task<void> | undefined;
64
+ const tokenCompletionTasks: Task<void>[] = [];
65
+
66
66
  for await (const ev of this.#sentenceStream) {
67
- for await (const audio of this.#tts.synthesize(ev.token)) {
68
- this.output.put(audio);
69
- }
67
+ if (this.abortController.signal.aborted) break;
68
+
69
+ // this will enable non-blocking synthesis of the stream of tokens
70
+ task = Task.from(
71
+ (controller) => synthesize(ev.token, task, controller),
72
+ this.abortController,
73
+ );
74
+
75
+ tokenCompletionTasks.push(task);
76
+ }
77
+
78
+ await Promise.all(tokenCompletionTasks.map((t) => t.result));
79
+ this.queue.put(SynthesizeStream.END_OF_STREAM);
80
+ };
81
+
82
+ const synthesize = async (
83
+ token: string,
84
+ prevTask: Task<void> | undefined,
85
+ controller: AbortController,
86
+ ) => {
87
+ const audioStream = this.#tts.synthesize(token);
88
+
89
+ // wait for previous audio transcription to complete before starting
90
+ // to queuing audio frames of the current token
91
+ await prevTask?.result;
92
+ if (controller.signal.aborted) return;
93
+
94
+ for await (const audio of audioStream) {
95
+ if (controller.signal.aborted) break;
96
+ this.queue.put(audio);
70
97
  }
71
- this.output.put(SynthesizeStream.END_OF_STREAM);
72
98
  };
73
99
 
74
- Promise.all([forwardInput(), synthesize()]);
100
+ await Promise.all([forwardInput(), synthesizeSentenceStream()]);
75
101
  }
76
102
  }