@livekit/agents 1.1.0-dev.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (292) hide show
  1. package/dist/cli.cjs +2 -0
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +2 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/constants.cjs +3 -0
  7. package/dist/constants.cjs.map +1 -1
  8. package/dist/constants.d.cts +1 -0
  9. package/dist/constants.d.ts +1 -0
  10. package/dist/constants.d.ts.map +1 -1
  11. package/dist/constants.js +2 -0
  12. package/dist/constants.js.map +1 -1
  13. package/dist/cpu.cjs +189 -0
  14. package/dist/cpu.cjs.map +1 -0
  15. package/dist/cpu.d.cts +24 -0
  16. package/dist/cpu.d.ts +24 -0
  17. package/dist/cpu.d.ts.map +1 -0
  18. package/dist/cpu.js +152 -0
  19. package/dist/cpu.js.map +1 -0
  20. package/dist/cpu.test.cjs +227 -0
  21. package/dist/cpu.test.cjs.map +1 -0
  22. package/dist/cpu.test.js +204 -0
  23. package/dist/cpu.test.js.map +1 -0
  24. package/dist/index.cjs +12 -10
  25. package/dist/index.cjs.map +1 -1
  26. package/dist/index.d.cts +13 -13
  27. package/dist/index.d.ts +13 -13
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +11 -10
  30. package/dist/index.js.map +1 -1
  31. package/dist/inference/interruption/defaults.cjs +1 -1
  32. package/dist/inference/interruption/defaults.cjs.map +1 -1
  33. package/dist/inference/interruption/defaults.d.cts +1 -1
  34. package/dist/inference/interruption/defaults.d.ts +1 -1
  35. package/dist/inference/interruption/defaults.d.ts.map +1 -1
  36. package/dist/inference/interruption/defaults.js +1 -1
  37. package/dist/inference/interruption/defaults.js.map +1 -1
  38. package/dist/inference/interruption/http_transport.cjs +44 -28
  39. package/dist/inference/interruption/http_transport.cjs.map +1 -1
  40. package/dist/inference/interruption/http_transport.d.ts.map +1 -1
  41. package/dist/inference/interruption/http_transport.js +45 -29
  42. package/dist/inference/interruption/http_transport.js.map +1 -1
  43. package/dist/inference/interruption/interruption_detector.cjs +22 -5
  44. package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
  45. package/dist/inference/interruption/interruption_detector.d.cts +2 -2
  46. package/dist/inference/interruption/interruption_detector.d.ts +2 -2
  47. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
  48. package/dist/inference/interruption/interruption_detector.js +22 -5
  49. package/dist/inference/interruption/interruption_detector.js.map +1 -1
  50. package/dist/inference/interruption/interruption_stream.cjs +4 -4
  51. package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
  52. package/dist/inference/interruption/interruption_stream.js +4 -4
  53. package/dist/inference/interruption/interruption_stream.js.map +1 -1
  54. package/dist/inference/interruption/types.cjs.map +1 -1
  55. package/dist/inference/interruption/types.d.cts +2 -2
  56. package/dist/inference/interruption/types.d.ts +2 -2
  57. package/dist/inference/interruption/types.d.ts.map +1 -1
  58. package/dist/inference/interruption/ws_transport.cjs +60 -47
  59. package/dist/inference/interruption/ws_transport.cjs.map +1 -1
  60. package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
  61. package/dist/inference/interruption/ws_transport.js +60 -47
  62. package/dist/inference/interruption/ws_transport.js.map +1 -1
  63. package/dist/inference/llm.cjs.map +1 -1
  64. package/dist/inference/llm.d.cts +1 -1
  65. package/dist/inference/llm.d.ts +1 -1
  66. package/dist/inference/llm.d.ts.map +1 -1
  67. package/dist/inference/llm.js.map +1 -1
  68. package/dist/inference/stt.cjs +20 -12
  69. package/dist/inference/stt.cjs.map +1 -1
  70. package/dist/inference/stt.d.cts +3 -2
  71. package/dist/inference/stt.d.ts +3 -2
  72. package/dist/inference/stt.d.ts.map +1 -1
  73. package/dist/inference/stt.js +20 -12
  74. package/dist/inference/stt.js.map +1 -1
  75. package/dist/inference/stt.test.cjs +14 -0
  76. package/dist/inference/stt.test.cjs.map +1 -1
  77. package/dist/inference/stt.test.js +14 -0
  78. package/dist/inference/stt.test.js.map +1 -1
  79. package/dist/inference/tts.cjs +13 -4
  80. package/dist/inference/tts.cjs.map +1 -1
  81. package/dist/inference/tts.d.cts +8 -1
  82. package/dist/inference/tts.d.ts +8 -1
  83. package/dist/inference/tts.d.ts.map +1 -1
  84. package/dist/inference/tts.js +13 -4
  85. package/dist/inference/tts.js.map +1 -1
  86. package/dist/inference/tts.test.cjs +10 -0
  87. package/dist/inference/tts.test.cjs.map +1 -1
  88. package/dist/inference/tts.test.js +10 -0
  89. package/dist/inference/tts.test.js.map +1 -1
  90. package/dist/ipc/job_proc_lazy_main.cjs +41 -23
  91. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  92. package/dist/ipc/job_proc_lazy_main.js +41 -23
  93. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  94. package/dist/job.cjs +1 -1
  95. package/dist/job.cjs.map +1 -1
  96. package/dist/job.js +1 -1
  97. package/dist/job.js.map +1 -1
  98. package/dist/language.cjs +394 -0
  99. package/dist/language.cjs.map +1 -0
  100. package/dist/language.d.cts +15 -0
  101. package/dist/language.d.ts +15 -0
  102. package/dist/language.d.ts.map +1 -0
  103. package/dist/language.js +363 -0
  104. package/dist/language.js.map +1 -0
  105. package/dist/language.test.cjs +43 -0
  106. package/dist/language.test.cjs.map +1 -0
  107. package/dist/language.test.js +49 -0
  108. package/dist/language.test.js.map +1 -0
  109. package/dist/llm/index.cjs +2 -0
  110. package/dist/llm/index.cjs.map +1 -1
  111. package/dist/llm/index.d.cts +1 -1
  112. package/dist/llm/index.d.ts +1 -1
  113. package/dist/llm/index.d.ts.map +1 -1
  114. package/dist/llm/index.js +2 -0
  115. package/dist/llm/index.js.map +1 -1
  116. package/dist/stream/deferred_stream.cjs +6 -2
  117. package/dist/stream/deferred_stream.cjs.map +1 -1
  118. package/dist/stream/deferred_stream.d.ts.map +1 -1
  119. package/dist/stream/deferred_stream.js +6 -2
  120. package/dist/stream/deferred_stream.js.map +1 -1
  121. package/dist/stt/stt.cjs.map +1 -1
  122. package/dist/stt/stt.d.cts +2 -1
  123. package/dist/stt/stt.d.ts +2 -1
  124. package/dist/stt/stt.d.ts.map +1 -1
  125. package/dist/stt/stt.js.map +1 -1
  126. package/dist/utils.cjs +15 -0
  127. package/dist/utils.cjs.map +1 -1
  128. package/dist/utils.d.cts +8 -0
  129. package/dist/utils.d.ts +8 -0
  130. package/dist/utils.d.ts.map +1 -1
  131. package/dist/utils.js +13 -0
  132. package/dist/utils.js.map +1 -1
  133. package/dist/version.cjs +1 -1
  134. package/dist/version.js +1 -1
  135. package/dist/voice/agent.cjs +14 -17
  136. package/dist/voice/agent.cjs.map +1 -1
  137. package/dist/voice/agent.d.cts +10 -11
  138. package/dist/voice/agent.d.ts +10 -11
  139. package/dist/voice/agent.d.ts.map +1 -1
  140. package/dist/voice/agent.js +15 -18
  141. package/dist/voice/agent.js.map +1 -1
  142. package/dist/voice/agent.test.cjs +194 -0
  143. package/dist/voice/agent.test.cjs.map +1 -1
  144. package/dist/voice/agent.test.js +195 -1
  145. package/dist/voice/agent.test.js.map +1 -1
  146. package/dist/voice/agent_activity.cjs +116 -39
  147. package/dist/voice/agent_activity.cjs.map +1 -1
  148. package/dist/voice/agent_activity.d.cts +2 -0
  149. package/dist/voice/agent_activity.d.ts +2 -0
  150. package/dist/voice/agent_activity.d.ts.map +1 -1
  151. package/dist/voice/agent_activity.js +117 -40
  152. package/dist/voice/agent_activity.js.map +1 -1
  153. package/dist/voice/agent_activity.test.cjs +135 -0
  154. package/dist/voice/agent_activity.test.cjs.map +1 -0
  155. package/dist/voice/agent_activity.test.js +134 -0
  156. package/dist/voice/agent_activity.test.js.map +1 -0
  157. package/dist/voice/agent_session.cjs +38 -38
  158. package/dist/voice/agent_session.cjs.map +1 -1
  159. package/dist/voice/agent_session.d.cts +65 -56
  160. package/dist/voice/agent_session.d.ts +65 -56
  161. package/dist/voice/agent_session.d.ts.map +1 -1
  162. package/dist/voice/agent_session.js +37 -37
  163. package/dist/voice/agent_session.js.map +1 -1
  164. package/dist/voice/audio_recognition.cjs +106 -52
  165. package/dist/voice/audio_recognition.cjs.map +1 -1
  166. package/dist/voice/audio_recognition.d.cts +4 -2
  167. package/dist/voice/audio_recognition.d.ts +4 -2
  168. package/dist/voice/audio_recognition.d.ts.map +1 -1
  169. package/dist/voice/audio_recognition.js +106 -52
  170. package/dist/voice/audio_recognition.js.map +1 -1
  171. package/dist/voice/audio_recognition_span.test.cjs +84 -22
  172. package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
  173. package/dist/voice/audio_recognition_span.test.js +90 -23
  174. package/dist/voice/audio_recognition_span.test.js.map +1 -1
  175. package/dist/voice/events.cjs +1 -1
  176. package/dist/voice/events.cjs.map +1 -1
  177. package/dist/voice/events.d.cts +4 -3
  178. package/dist/voice/events.d.ts +4 -3
  179. package/dist/voice/events.d.ts.map +1 -1
  180. package/dist/voice/events.js +1 -1
  181. package/dist/voice/events.js.map +1 -1
  182. package/dist/voice/index.cjs +9 -1
  183. package/dist/voice/index.cjs.map +1 -1
  184. package/dist/voice/index.d.cts +1 -1
  185. package/dist/voice/index.d.ts +1 -1
  186. package/dist/voice/index.d.ts.map +1 -1
  187. package/dist/voice/index.js +10 -1
  188. package/dist/voice/index.js.map +1 -1
  189. package/dist/voice/remote_session.cjs +922 -0
  190. package/dist/voice/remote_session.cjs.map +1 -0
  191. package/dist/voice/remote_session.d.cts +108 -0
  192. package/dist/voice/remote_session.d.ts +108 -0
  193. package/dist/voice/remote_session.d.ts.map +1 -0
  194. package/dist/voice/remote_session.js +887 -0
  195. package/dist/voice/remote_session.js.map +1 -0
  196. package/dist/voice/report.cjs +11 -10
  197. package/dist/voice/report.cjs.map +1 -1
  198. package/dist/voice/report.d.cts +5 -3
  199. package/dist/voice/report.d.ts +5 -3
  200. package/dist/voice/report.d.ts.map +1 -1
  201. package/dist/voice/report.js +11 -10
  202. package/dist/voice/report.js.map +1 -1
  203. package/dist/voice/report.test.cjs +15 -0
  204. package/dist/voice/report.test.cjs.map +1 -1
  205. package/dist/voice/report.test.js +15 -0
  206. package/dist/voice/report.test.js.map +1 -1
  207. package/dist/voice/room_io/room_io.cjs +39 -0
  208. package/dist/voice/room_io/room_io.cjs.map +1 -1
  209. package/dist/voice/room_io/room_io.d.cts +3 -1
  210. package/dist/voice/room_io/room_io.d.ts +3 -1
  211. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  212. package/dist/voice/room_io/room_io.js +40 -1
  213. package/dist/voice/room_io/room_io.js.map +1 -1
  214. package/dist/voice/turn_config/interruption.cjs.map +1 -1
  215. package/dist/voice/turn_config/interruption.d.cts +1 -1
  216. package/dist/voice/turn_config/interruption.d.ts +1 -1
  217. package/dist/voice/turn_config/interruption.d.ts.map +1 -1
  218. package/dist/voice/turn_config/interruption.js.map +1 -1
  219. package/dist/voice/turn_config/utils.cjs +95 -35
  220. package/dist/voice/turn_config/utils.cjs.map +1 -1
  221. package/dist/voice/turn_config/utils.d.cts +17 -5
  222. package/dist/voice/turn_config/utils.d.ts +17 -5
  223. package/dist/voice/turn_config/utils.d.ts.map +1 -1
  224. package/dist/voice/turn_config/utils.js +93 -35
  225. package/dist/voice/turn_config/utils.js.map +1 -1
  226. package/dist/voice/turn_config/utils.test.cjs +83 -41
  227. package/dist/voice/turn_config/utils.test.cjs.map +1 -1
  228. package/dist/voice/turn_config/utils.test.js +84 -42
  229. package/dist/voice/turn_config/utils.test.js.map +1 -1
  230. package/dist/worker.cjs +6 -29
  231. package/dist/worker.cjs.map +1 -1
  232. package/dist/worker.d.ts.map +1 -1
  233. package/dist/worker.js +6 -19
  234. package/dist/worker.js.map +1 -1
  235. package/package.json +3 -2
  236. package/src/cli.ts +2 -0
  237. package/src/constants.ts +1 -0
  238. package/src/cpu.test.ts +239 -0
  239. package/src/cpu.ts +173 -0
  240. package/src/index.ts +13 -15
  241. package/src/inference/interruption/defaults.ts +1 -1
  242. package/src/inference/interruption/http_transport.ts +49 -30
  243. package/src/inference/interruption/interruption_detector.ts +22 -6
  244. package/src/inference/interruption/interruption_stream.ts +4 -4
  245. package/src/inference/interruption/types.ts +2 -2
  246. package/src/inference/interruption/ws_transport.ts +63 -59
  247. package/src/inference/llm.ts +3 -1
  248. package/src/inference/stt.test.ts +17 -0
  249. package/src/inference/stt.ts +22 -14
  250. package/src/inference/tts.test.ts +12 -0
  251. package/src/inference/tts.ts +22 -6
  252. package/src/ipc/job_proc_lazy_main.ts +44 -24
  253. package/src/job.ts +1 -1
  254. package/src/language.test.ts +62 -0
  255. package/src/language.ts +380 -0
  256. package/src/llm/index.ts +2 -0
  257. package/src/stream/deferred_stream.ts +5 -1
  258. package/src/stt/stt.ts +2 -1
  259. package/src/utils.ts +20 -0
  260. package/src/voice/agent.test.ts +208 -1
  261. package/src/voice/agent.ts +21 -22
  262. package/src/voice/agent_activity.test.ts +194 -0
  263. package/src/voice/agent_activity.ts +161 -43
  264. package/src/voice/agent_session.ts +103 -92
  265. package/src/voice/audio_recognition.ts +124 -61
  266. package/src/voice/audio_recognition_span.test.ts +115 -35
  267. package/src/voice/events.ts +4 -3
  268. package/src/voice/index.ts +10 -1
  269. package/src/voice/remote_session.ts +1083 -0
  270. package/src/voice/report.test.ts +22 -3
  271. package/src/voice/report.ts +31 -14
  272. package/src/voice/room_io/room_io.ts +52 -2
  273. package/src/voice/turn_config/interruption.ts +1 -1
  274. package/src/voice/turn_config/utils.test.ts +91 -43
  275. package/src/voice/turn_config/utils.ts +120 -56
  276. package/src/worker.ts +34 -50
  277. package/dist/voice/client_events.cjs +0 -554
  278. package/dist/voice/client_events.cjs.map +0 -1
  279. package/dist/voice/client_events.d.cts +0 -195
  280. package/dist/voice/client_events.d.ts +0 -195
  281. package/dist/voice/client_events.d.ts.map +0 -1
  282. package/dist/voice/client_events.js +0 -548
  283. package/dist/voice/client_events.js.map +0 -1
  284. package/dist/voice/wire_format.cjs +0 -798
  285. package/dist/voice/wire_format.cjs.map +0 -1
  286. package/dist/voice/wire_format.d.cts +0 -5503
  287. package/dist/voice/wire_format.d.ts +0 -5503
  288. package/dist/voice/wire_format.d.ts.map +0 -1
  289. package/dist/voice/wire_format.js +0 -728
  290. package/dist/voice/wire_format.js.map +0 -1
  291. package/src/voice/client_events.ts +0 -838
  292. package/src/voice/wire_format.ts +0 -827
package/src/index.ts CHANGED
@@ -9,33 +9,31 @@
9
9
  * @see {@link https://docs.livekit.io/agents/overview | LiveKit Agents documentation}
10
10
  * @packageDocumentation
11
11
  */
12
- import * as beta from './beta/index.js';
13
- import * as cli from './cli.js';
14
- import * as inference from './inference/index.js';
15
- import * as ipc from './ipc/index.js';
16
- import * as llm from './llm/index.js';
17
- import * as metrics from './metrics/index.js';
18
- import * as stream from './stream/index.js';
19
- import * as stt from './stt/index.js';
20
- import * as telemetry from './telemetry/index.js';
21
- import * as tokenize from './tokenize/index.js';
22
- import * as tts from './tts/index.js';
23
- import * as voice from './voice/index.js';
24
-
25
12
  export * from './_exceptions.js';
26
13
  export * from './audio.js';
14
+ export * as beta from './beta/index.js';
15
+ export * as cli from './cli.js';
27
16
  export * from './connection_pool.js';
28
17
  export * from './generator.js';
18
+ export * as inference from './inference/index.js';
29
19
  export * from './inference_runner.js';
20
+ export * as ipc from './ipc/index.js';
30
21
  export * from './job.js';
22
+ export * from './language.js';
23
+ export * as llm from './llm/index.js';
31
24
  export * from './log.js';
25
+ export * as metrics from './metrics/index.js';
32
26
  export * from './plugin.js';
27
+ export * as stream from './stream/index.js';
28
+ export * as stt from './stt/index.js';
29
+ export * as telemetry from './telemetry/index.js';
30
+ export * as tokenize from './tokenize/index.js';
33
31
  export * from './transcription.js';
32
+ export * as tts from './tts/index.js';
34
33
  export * from './types.js';
35
34
  export * from './utils.js';
36
35
  export * from './vad.js';
37
36
  export * from './version.js';
37
+ export * as voice from './voice/index.js';
38
38
  export { createTimedString, isTimedString, type TimedString } from './voice/io.js';
39
39
  export * from './worker.js';
40
-
41
- export { beta, cli, inference, ipc, llm, metrics, stream, stt, telemetry, tokenize, tts, voice };
@@ -9,7 +9,7 @@ export const THRESHOLD = 0.5;
9
9
  export const MAX_AUDIO_DURATION_IN_S = 3.0;
10
10
  export const AUDIO_PREFIX_DURATION_IN_S = 0.5;
11
11
  export const DETECTION_INTERVAL_IN_S = 0.1;
12
- export const REMOTE_INFERENCE_TIMEOUT_IN_S = 1.0;
12
+ export const REMOTE_INFERENCE_TIMEOUT_IN_S = 0.7;
13
13
  export const SAMPLE_RATE = 16000;
14
14
  export const FRAMES_PER_SECOND = 40;
15
15
  export const FRAME_DURATION_IN_S = 0.025; // 25ms per frame
@@ -1,12 +1,12 @@
1
1
  // SPDX-FileCopyrightText: 2026 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
- import { ofetch } from 'ofetch';
4
+ import { FetchError, ofetch } from 'ofetch';
5
5
  import { TransformStream } from 'stream/web';
6
6
  import { z } from 'zod';
7
+ import { APIConnectionError, APIError, APIStatusError, isAPIError } from '../../_exceptions.js';
7
8
  import { log } from '../../log.js';
8
9
  import { createAccessToken } from '../utils.js';
9
- import { intervalForRetry } from './defaults.js';
10
10
  import { InterruptionCacheEntry } from './interruption_cache_entry.js';
11
11
  import type { OverlappingSpeechEvent } from './types.js';
12
12
  import type { BoundedCache } from './utils.js';
@@ -50,31 +50,50 @@ export async function predictHTTP(
50
50
  url.searchParams.append('min_frames', predictOptions.minFrames.toFixed());
51
51
  url.searchParams.append('created_at', createdAt.toFixed());
52
52
 
53
- let retryCount = 0;
54
- const response = await ofetch(url.toString(), {
55
- retry: options.maxRetries ?? 3,
56
- retryDelay: () => {
57
- const delay = intervalForRetry(retryCount);
58
- retryCount++;
59
- return delay;
60
- },
61
- headers: {
62
- 'Content-Type': 'application/octet-stream',
63
- Authorization: `Bearer ${options.token}`,
64
- },
65
- signal: options.signal,
66
- timeout: options.timeout,
67
- method: 'POST',
68
- body: data,
69
- });
70
- const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);
71
-
72
- return {
73
- createdAt: created_at,
74
- isBargein: is_bargein,
75
- probabilities,
76
- predictionDurationInS: (performance.now() - createdAt) / 1000,
77
- };
53
+ try {
54
+ const response = await ofetch(url.toString(), {
55
+ retry: 0,
56
+ headers: {
57
+ 'Content-Type': 'application/octet-stream',
58
+ Authorization: `Bearer ${options.token}`,
59
+ },
60
+ signal: options.signal,
61
+ timeout: options.timeout,
62
+ method: 'POST',
63
+ body: data,
64
+ });
65
+ const { created_at, is_bargein, probabilities } = predictEndpointResponseSchema.parse(response);
66
+
67
+ return {
68
+ createdAt: created_at,
69
+ isBargein: is_bargein,
70
+ probabilities,
71
+ predictionDurationInS: (performance.now() - createdAt) / 1000,
72
+ };
73
+ } catch (err) {
74
+ if (isAPIError(err)) throw err;
75
+ if (err instanceof FetchError) {
76
+ if (err.statusCode) {
77
+ throw new APIStatusError({
78
+ message: `error during interruption prediction: ${err.message}`,
79
+ options: { statusCode: err.statusCode, body: err.data },
80
+ });
81
+ }
82
+ if (
83
+ err.cause instanceof Error &&
84
+ (err.cause.name === 'TimeoutError' || err.cause.name === 'AbortError')
85
+ ) {
86
+ throw new APIStatusError({
87
+ message: `interruption inference timeout: ${err.message}`,
88
+ options: { statusCode: 408, retryable: false },
89
+ });
90
+ }
91
+ throw new APIConnectionError({
92
+ message: `interruption inference connection error: ${err.message}`,
93
+ });
94
+ }
95
+ throw new APIError(`error during interruption prediction: ${err}`);
96
+ }
78
97
  }
79
98
 
80
99
  export interface HttpTransportOptions {
@@ -154,8 +173,8 @@ export function createHttpTransport(
154
173
  updateUserSpeakingSpan(entry);
155
174
  }
156
175
  const event: OverlappingSpeechEvent = {
157
- type: 'user_overlapping_speech',
158
- timestamp: Date.now(),
176
+ type: 'overlapping_speech',
177
+ detectedAt: Date.now(),
159
178
  overlapStartedAt: overlapSpeechStartedAt,
160
179
  isInterruption: entry.isInterruption,
161
180
  speechInput: entry.speechInput,
@@ -177,7 +196,7 @@ export function createHttpTransport(
177
196
  controller.enqueue(event);
178
197
  }
179
198
  } catch (err) {
180
- logger.error({ err }, 'Failed to send audio data over HTTP');
199
+ controller.error(err);
181
200
  }
182
201
  },
183
202
  },
@@ -7,12 +7,12 @@ import { log } from '../../log.js';
7
7
  import type { InterruptionMetrics } from '../../metrics/base.js';
8
8
  import { DEFAULT_INFERENCE_URL, STAGING_INFERENCE_URL, getDefaultInferenceUrl } from '../utils.js';
9
9
  import { FRAMES_PER_SECOND, SAMPLE_RATE, interruptionOptionDefaults } from './defaults.js';
10
- import type { InterruptionDetectionError } from './errors.js';
10
+ import { InterruptionDetectionError } from './errors.js';
11
11
  import { InterruptionStreamBase } from './interruption_stream.js';
12
12
  import type { InterruptionOptions, OverlappingSpeechEvent } from './types.js';
13
13
 
14
14
  type InterruptionCallbacks = {
15
- user_overlapping_speech: (event: OverlappingSpeechEvent) => void;
15
+ overlapping_speech: (event: OverlappingSpeechEvent) => void;
16
16
  metrics_collected: (metrics: InterruptionMetrics) => void;
17
17
  error: (error: InterruptionDetectionError) => void;
18
18
  };
@@ -76,6 +76,15 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
76
76
  } else {
77
77
  useProxy = false;
78
78
  }
79
+ const transport = useProxy ? 'websocket' : 'http';
80
+ this.logger.debug(
81
+ {
82
+ baseUrl: lkBaseUrl,
83
+ useProxy,
84
+ transport,
85
+ },
86
+ '=== Resolved interruption detector transport configuration',
87
+ );
79
88
 
80
89
  this.options = {
81
90
  sampleRate: SAMPLE_RATE,
@@ -104,8 +113,9 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
104
113
  threshold: this.options.threshold,
105
114
  inferenceTimeout: this.options.inferenceTimeout,
106
115
  useProxy: this.options.useProxy,
116
+ transport,
107
117
  },
108
- 'adaptive interruption detector initialized',
118
+ '=== Adaptive interruption detector initialized',
109
119
  );
110
120
  }
111
121
 
@@ -150,9 +160,15 @@ export class AdaptiveInterruptionDetector extends (EventEmitter as new () => Typ
150
160
  * Use this when you need direct access to the stream for pushing frames.
151
161
  */
152
162
  createStream(): InterruptionStreamBase {
153
- const streamBase = new InterruptionStreamBase(this, {});
154
- this.streams.add(streamBase);
155
- return streamBase;
163
+ try {
164
+ const streamBase = new InterruptionStreamBase(this, {});
165
+ this.streams.add(streamBase);
166
+ return streamBase;
167
+ } catch (e) {
168
+ const cause = e instanceof Error ? e : new Error(String(e));
169
+ this.emitError(new InterruptionDetectionError(cause.message, Date.now(), this._label, false));
170
+ throw e;
171
+ }
156
172
  }
157
173
 
158
174
  /**
@@ -281,8 +281,8 @@ export class InterruptionStreamBase {
281
281
  }
282
282
  const e = latestEntry ?? InterruptionCacheEntry.default();
283
283
  const event: OverlappingSpeechEvent = {
284
- type: 'user_overlapping_speech',
285
- timestamp: chunk.endedAt,
284
+ type: 'overlapping_speech',
285
+ detectedAt: chunk.endedAt,
286
286
  isInterruption: false,
287
287
  overlapStartedAt: this.overlapSpeechStartedAt,
288
288
  speechInput: e.speechInput,
@@ -334,11 +334,11 @@ export class InterruptionStreamBase {
334
334
 
335
335
  const eventEmitter = new TransformStream<OverlappingSpeechEvent, OverlappingSpeechEvent>({
336
336
  transform: (chunk, controller) => {
337
- this.model.emit('user_overlapping_speech', chunk);
337
+ this.model.emit('overlapping_speech', chunk);
338
338
 
339
339
  const metrics: InterruptionMetrics = {
340
340
  type: 'interruption_metrics',
341
- timestamp: chunk.timestamp,
341
+ timestamp: chunk.detectedAt,
342
342
  totalDuration: chunk.totalDurationInS * 1000,
343
343
  predictionDuration: chunk.predictionDurationInS * 1000,
344
344
  detectionDelay: chunk.detectionDelayInS * 1000,
@@ -4,8 +4,8 @@
4
4
  import type { Span } from '@opentelemetry/api';
5
5
 
6
6
  export interface OverlappingSpeechEvent {
7
- type: 'user_overlapping_speech';
8
- timestamp: number;
7
+ type: 'overlapping_speech';
8
+ detectedAt: number;
9
9
  isInterruption: boolean;
10
10
  totalDurationInS: number;
11
11
  predictionDurationInS: number;
@@ -4,9 +4,9 @@
4
4
  import { TransformStream } from 'stream/web';
5
5
  import WebSocket from 'ws';
6
6
  import { z } from 'zod';
7
+ import { APIConnectionError, APIStatusError, APITimeoutError } from '../../_exceptions.js';
7
8
  import { log } from '../../log.js';
8
9
  import { createAccessToken } from '../utils.js';
9
- import { intervalForRetry } from './defaults.js';
10
10
  import { InterruptionCacheEntry } from './interruption_cache_entry.js';
11
11
  import type { OverlappingSpeechEvent } from './types.js';
12
12
  import type { BoundedCache } from './utils.js';
@@ -82,16 +82,32 @@ async function connectWebSocket(options: WsTransportOptions): Promise<WebSocket>
82
82
  await new Promise<void>((resolve, reject) => {
83
83
  const timeout = setTimeout(() => {
84
84
  ws.terminate();
85
- reject(new Error('WebSocket connection timeout'));
85
+ reject(
86
+ new APITimeoutError({
87
+ message: 'WebSocket connection timeout',
88
+ options: { retryable: false },
89
+ }),
90
+ );
86
91
  }, options.timeout);
87
92
  ws.once('open', () => {
88
93
  clearTimeout(timeout);
89
94
  resolve();
90
95
  });
96
+ ws.once('unexpected-response', (_req, res) => {
97
+ clearTimeout(timeout);
98
+ ws.terminate();
99
+ const statusCode = res.statusCode ?? -1;
100
+ reject(
101
+ new APIStatusError({
102
+ message: `WebSocket connection rejected with status ${statusCode}`,
103
+ options: { statusCode, retryable: false },
104
+ }),
105
+ );
106
+ });
91
107
  ws.once('error', (err: Error) => {
92
108
  clearTimeout(timeout);
93
109
  ws.terminate();
94
- reject(err);
110
+ reject(new APIConnectionError({ message: `WebSocket connection error: ${err.message}` }));
95
111
  });
96
112
  });
97
113
 
@@ -133,7 +149,9 @@ export function createWsTransport(
133
149
  });
134
150
 
135
151
  socket.on('error', (err: Error) => {
136
- logger.error({ err }, 'WebSocket error');
152
+ outputController?.error(
153
+ new APIConnectionError({ message: `WebSocket error: ${err.message}` }),
154
+ );
137
155
  });
138
156
 
139
157
  socket.on('close', (code: number, reason: Buffer) => {
@@ -144,41 +162,20 @@ export function createWsTransport(
144
162
  async function ensureConnection(): Promise<void> {
145
163
  if (ws && ws.readyState === WebSocket.OPEN) return;
146
164
 
147
- const maxRetries = options.maxRetries ?? 3;
148
- let lastError: Error | null = null;
149
-
150
- for (let attempt = 0; attempt <= maxRetries; attempt++) {
151
- try {
152
- ws = await connectWebSocket(options);
153
- setupMessageHandler(ws);
154
-
155
- // Send session.create message
156
- const sessionCreateMsg = JSON.stringify({
157
- type: MSG_SESSION_CREATE,
158
- settings: {
159
- sample_rate: options.sampleRate,
160
- num_channels: 1,
161
- threshold: options.threshold,
162
- min_frames: options.minFrames,
163
- encoding: 's16le',
164
- },
165
- });
166
- ws.send(sessionCreateMsg);
167
- return;
168
- } catch (err) {
169
- lastError = err instanceof Error ? err : new Error(String(err));
170
- if (attempt < maxRetries) {
171
- const delay = intervalForRetry(attempt);
172
- logger.debug(
173
- { attempt, delay, err: lastError.message },
174
- 'WebSocket connection failed, retrying',
175
- );
176
- await new Promise((resolve) => setTimeout(resolve, delay));
177
- }
178
- }
179
- }
180
-
181
- throw lastError ?? new Error('Failed to connect to WebSocket after retries');
165
+ ws = await connectWebSocket(options);
166
+ setupMessageHandler(ws);
167
+
168
+ const sessionCreateMsg = JSON.stringify({
169
+ type: MSG_SESSION_CREATE,
170
+ settings: {
171
+ sample_rate: options.sampleRate,
172
+ num_channels: 1,
173
+ threshold: options.threshold,
174
+ min_frames: options.minFrames,
175
+ encoding: 's16le',
176
+ },
177
+ });
178
+ ws.send(sessionCreateMsg);
182
179
  }
183
180
 
184
181
  function handleMessage(message: WsMessage): void {
@@ -229,8 +226,8 @@ export function createWsTransport(
229
226
  );
230
227
 
231
228
  const event: OverlappingSpeechEvent = {
232
- type: 'user_overlapping_speech',
233
- timestamp: Date.now(),
229
+ type: 'overlapping_speech',
230
+ detectedAt: Date.now(),
234
231
  isInterruption: true,
235
232
  totalDurationInS: entry.totalDurationInS,
236
233
  predictionDurationInS: entry.predictionDurationInS,
@@ -288,11 +285,10 @@ export function createWsTransport(
288
285
 
289
286
  case MSG_ERROR:
290
287
  outputController?.error(
291
- new Error(
292
- `LiveKit Adaptive Interruption error${
293
- message.code !== undefined ? ` (${message.code})` : ''
294
- }: ${message.message}`,
295
- ),
288
+ new APIStatusError({
289
+ message: `LiveKit Adaptive Interruption error: ${message.message}`,
290
+ options: { statusCode: message.code ?? -1 },
291
+ }),
296
292
  );
297
293
  break;
298
294
  }
@@ -300,15 +296,12 @@ export function createWsTransport(
300
296
 
301
297
  function sendAudioData(audioSlice: Int16Array): void {
302
298
  if (!ws || ws.readyState !== WebSocket.OPEN) {
303
- throw new Error('WebSocket not connected');
299
+ throw new APIConnectionError({ message: 'WebSocket not connected' });
304
300
  }
305
301
 
306
302
  const state = getState();
307
- // Use truncated timestamp consistently for both cache key and header
308
- // This ensures the server's response created_at matches our cache key
309
303
  const createdAt = Math.floor(performance.now());
310
304
 
311
- // Store the audio data in cache with truncated timestamp
312
305
  state.cache.set(
313
306
  createdAt,
314
307
  new InterruptionCacheEntry({
@@ -318,13 +311,11 @@ export function createWsTransport(
318
311
  }),
319
312
  );
320
313
 
321
- // Create header: 8-byte little-endian uint64 timestamp (milliseconds as integer)
322
314
  const header = new ArrayBuffer(8);
323
315
  const view = new DataView(header);
324
316
  view.setUint32(0, createdAt >>> 0, true);
325
317
  view.setUint32(4, Math.floor(createdAt / 0x100000000) >>> 0, true);
326
318
 
327
- // Combine header and audio data
328
319
  const audioBytes = new Uint8Array(
329
320
  audioSlice.buffer,
330
321
  audioSlice.byteOffset,
@@ -334,12 +325,8 @@ export function createWsTransport(
334
325
  combined.set(new Uint8Array(header), 0);
335
326
  combined.set(audioBytes, 8);
336
327
 
337
- try {
338
- ws.send(combined);
339
- onRequestSent?.();
340
- } catch (e: unknown) {
341
- logger.error(e, `failed to send audio via websocket`);
342
- }
328
+ ws.send(combined);
329
+ onRequestSent?.();
343
330
  }
344
331
 
345
332
  function close(): void {
@@ -383,10 +370,27 @@ export function createWsTransport(
383
370
  const state = getState();
384
371
  if (!state.overlapSpeechStartedAt || !state.overlapSpeechStarted) return;
385
372
 
373
+ if (options.timeout > 0) {
374
+ const now = performance.now();
375
+ for (const [, entry] of state.cache.entries()) {
376
+ if (entry.totalDurationInS !== 0) continue;
377
+ if (now - entry.createdAt > options.timeout) {
378
+ controller.error(
379
+ new APIStatusError({
380
+ message: `interruption inference timed out after ${((now - entry.createdAt) / 1000).toFixed(1)}s (ws)`,
381
+ options: { statusCode: 408, retryable: false },
382
+ }),
383
+ );
384
+ return;
385
+ }
386
+ break;
387
+ }
388
+ }
389
+
386
390
  try {
387
391
  sendAudioData(chunk);
388
392
  } catch (err) {
389
- logger.error({ err }, 'Failed to send audio data over WebSocket');
393
+ controller.error(err);
390
394
  }
391
395
  },
392
396
 
@@ -4,12 +4,14 @@
4
4
  import OpenAI from 'openai';
5
5
  import { APIConnectionError, APIStatusError, APITimeoutError } from '../_exceptions.js';
6
6
  import * as llm from '../llm/index.js';
7
- import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
8
7
  import type { APIConnectOptions } from '../types.js';
8
+ import { DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
9
9
  import { type Expand, toError } from '../utils.js';
10
10
  import { type AnyString, createAccessToken, getDefaultInferenceUrl } from './utils.js';
11
11
 
12
12
  export type OpenAIModels =
13
+ | 'openai/gpt-5.4'
14
+ | 'openai/gpt-5.3-chat-latest'
13
15
  | 'openai/gpt-5.2'
14
16
  | 'openai/gpt-5.2-chat-latest'
15
17
  | 'openai/gpt-5.1'
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { beforeAll, describe, expect, it } from 'vitest';
5
+ import { normalizeLanguage } from '../language.js';
5
6
  import { initializeLogger } from '../log.js';
6
7
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
7
8
  import { STT, type STTFallbackModel, normalizeSTTFallback, parseSTTModelString } from './stt.js';
@@ -34,6 +35,12 @@ describe('parseSTTModelString', () => {
34
35
  expect(language).toBe('en');
35
36
  });
36
37
 
38
+ it('normalizes language suffixes', () => {
39
+ const [model, language] = parseSTTModelString('deepgram:english');
40
+ expect(model).toBe('deepgram');
41
+ expect(language).toBe('en');
42
+ });
43
+
37
44
  it('provider/model format without language', () => {
38
45
  const [model, language] = parseSTTModelString('deepgram/nova-3');
39
46
  expect(model).toBe('deepgram/nova-3');
@@ -151,6 +158,16 @@ describe('normalizeSTTFallback', () => {
151
158
  });
152
159
 
153
160
  describe('STT constructor fallback and connOptions', () => {
161
+ it('normalizes language in constructor and model string', () => {
162
+ const stt = makeStt({ model: 'deepgram/nova-3:english' });
163
+ expect(stt['opts'].language).toBe('en');
164
+ });
165
+
166
+ it('prefers explicit normalized language over model suffix', () => {
167
+ const stt = makeStt({ model: 'deepgram/nova-3:english', language: 'en_US' });
168
+ expect(stt['opts'].language).toBe(normalizeLanguage('en_US'));
169
+ });
170
+
154
171
  it('fallback not given defaults to undefined', () => {
155
172
  const stt = makeStt();
156
173
  expect(stt['opts'].fallback).toBeUndefined();
@@ -5,6 +5,7 @@ import { type AudioFrame } from '@livekit/rtc-node';
5
5
  import type { WebSocket } from 'ws';
6
6
  import { APIError, APIStatusError } from '../_exceptions.js';
7
7
  import { AudioByteStream } from '../audio.js';
8
+ import { type LanguageCode, areLanguagesEquivalent, normalizeLanguage } from '../language.js';
8
9
  import { log } from '../log.js';
9
10
  import { createStreamChannel } from '../stream/stream_channel.js';
10
11
  import {
@@ -121,10 +122,10 @@ export interface STTFallbackModel {
121
122
  export type STTFallbackModelType = STTFallbackModel | string;
122
123
 
123
124
  /** Parse a model string into [model, language]. Language is undefined if not specified. */
124
- export function parseSTTModelString(model: string): [string, string | undefined] {
125
+ export function parseSTTModelString(model: string): [string, LanguageCode | undefined] {
125
126
  const idx = model.lastIndexOf(':');
126
127
  if (idx !== -1) {
127
- return [model.slice(0, idx), model.slice(idx + 1)];
128
+ return [model.slice(0, idx), normalizeLanguage(model.slice(idx + 1))];
128
129
  }
129
130
  return [model, undefined];
130
131
  }
@@ -155,7 +156,7 @@ const DEFAULT_CANCEL_TIMEOUT = 5000;
155
156
 
156
157
  export interface InferenceSTTOptions<TModel extends STTModels> {
157
158
  model?: TModel;
158
- language?: STTLanguages;
159
+ language?: LanguageCode;
159
160
  encoding: STTEncoding;
160
161
  sampleRate: number;
161
162
  baseURL: string;
@@ -218,25 +219,24 @@ export class STT<TModel extends STTModels> extends BaseSTT {
218
219
  let nextModel = model;
219
220
  let nextLanguage = language;
220
221
  if (typeof nextModel === 'string') {
221
- const idx = nextModel.lastIndexOf(':');
222
- if (idx !== -1) {
223
- const languageFromModel = nextModel.slice(idx + 1) as STTLanguages;
224
- if (nextLanguage && nextLanguage !== languageFromModel) {
222
+ const [parsedModel, parsedLanguage] = parseSTTModelString(nextModel);
223
+ if (parsedLanguage !== undefined) {
224
+ if (nextLanguage && !areLanguagesEquivalent(nextLanguage, parsedLanguage)) {
225
225
  this.#logger.warn(
226
226
  '`language` is provided via both argument and model, using the one from the argument',
227
227
  { language: nextLanguage, model: nextModel },
228
228
  );
229
229
  } else {
230
- nextLanguage = languageFromModel;
230
+ nextLanguage = parsedLanguage as STTLanguages;
231
231
  }
232
- nextModel = nextModel.slice(0, idx) as TModel;
232
+ nextModel = parsedModel as TModel;
233
233
  }
234
234
  }
235
235
  const normalizedFallback = fallback ? normalizeSTTFallback(fallback) : undefined;
236
236
 
237
237
  this.opts = {
238
238
  model: nextModel as TModel,
239
- language: nextLanguage,
239
+ language: nextLanguage ? normalizeLanguage(nextLanguage) : undefined,
240
240
  encoding,
241
241
  sampleRate,
242
242
  baseURL: lkBaseURL,
@@ -270,7 +270,11 @@ export class STT<TModel extends STTModels> extends BaseSTT {
270
270
  }
271
271
 
272
272
  updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
273
- this.opts = { ...this.opts, ...opts };
273
+ this.opts = {
274
+ ...this.opts,
275
+ ...opts,
276
+ language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
277
+ };
274
278
 
275
279
  for (const stream of this.streams) {
276
280
  stream.updateOptions(opts);
@@ -285,7 +289,7 @@ export class STT<TModel extends STTModels> extends BaseSTT {
285
289
  options || {};
286
290
  const streamOpts = {
287
291
  ...this.opts,
288
- language: language ?? this.opts.language,
292
+ language: language !== undefined ? normalizeLanguage(language) : this.opts.language,
289
293
  } as InferenceSTTOptions<TModel>;
290
294
 
291
295
  const stream = new SpeechStream(this, streamOpts, connOptions);
@@ -371,7 +375,11 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
371
375
  }
372
376
 
373
377
  updateOptions(opts: Partial<Pick<InferenceSTTOptions<TModel>, 'model' | 'language'>>): void {
374
- this.opts = { ...this.opts, ...opts };
378
+ this.opts = {
379
+ ...this.opts,
380
+ ...opts,
381
+ language: opts.language !== undefined ? normalizeLanguage(opts.language) : this.opts.language,
382
+ };
375
383
  this.reconnectEvent.set();
376
384
  }
377
385
 
@@ -576,7 +584,7 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
576
584
 
577
585
  const requestId = data.session_id || this.requestId;
578
586
  const text = data.transcript;
579
- const language = data.language || this.opts.language || 'en';
587
+ const language = normalizeLanguage(data.language || this.opts.language || 'en');
580
588
 
581
589
  if (!text && !isFinal) return;
582
590
 
@@ -2,6 +2,7 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { beforeAll, describe, expect, it } from 'vitest';
5
+ import { normalizeLanguage } from '../language.js';
5
6
  import { initializeLogger } from '../log.js';
6
7
  import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
7
8
  import { TTS, type TTSFallbackModel, normalizeTTSFallback, parseTTSModelString } from './tts.js';
@@ -165,6 +166,17 @@ describe('normalizeTTSFallback', () => {
165
166
  });
166
167
 
167
168
  describe('TTS constructor fallback and connOptions', () => {
169
+ it('normalizes language in constructor', () => {
170
+ const tts = makeTts({ language: 'english' });
171
+ expect(tts['opts'].language).toBe('en');
172
+ });
173
+
174
+ it('normalizes updated language values', () => {
175
+ const tts = makeTts();
176
+ tts.updateOptions({ language: 'en_US' });
177
+ expect(tts['opts'].language).toBe(normalizeLanguage('en_US'));
178
+ });
179
+
168
180
  it('fallback not given defaults to undefined', () => {
169
181
  const tts = makeTts();
170
182
  expect(tts['opts'].fallback).toBeUndefined();