@livekit/agents 1.1.0-dev.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (292) hide show
  1. package/dist/cli.cjs +2 -0
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +2 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/constants.cjs +3 -0
  7. package/dist/constants.cjs.map +1 -1
  8. package/dist/constants.d.cts +1 -0
  9. package/dist/constants.d.ts +1 -0
  10. package/dist/constants.d.ts.map +1 -1
  11. package/dist/constants.js +2 -0
  12. package/dist/constants.js.map +1 -1
  13. package/dist/cpu.cjs +189 -0
  14. package/dist/cpu.cjs.map +1 -0
  15. package/dist/cpu.d.cts +24 -0
  16. package/dist/cpu.d.ts +24 -0
  17. package/dist/cpu.d.ts.map +1 -0
  18. package/dist/cpu.js +152 -0
  19. package/dist/cpu.js.map +1 -0
  20. package/dist/cpu.test.cjs +227 -0
  21. package/dist/cpu.test.cjs.map +1 -0
  22. package/dist/cpu.test.js +204 -0
  23. package/dist/cpu.test.js.map +1 -0
  24. package/dist/index.cjs +12 -10
  25. package/dist/index.cjs.map +1 -1
  26. package/dist/index.d.cts +13 -13
  27. package/dist/index.d.ts +13 -13
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +11 -10
  30. package/dist/index.js.map +1 -1
  31. package/dist/inference/interruption/defaults.cjs +1 -1
  32. package/dist/inference/interruption/defaults.cjs.map +1 -1
  33. package/dist/inference/interruption/defaults.d.cts +1 -1
  34. package/dist/inference/interruption/defaults.d.ts +1 -1
  35. package/dist/inference/interruption/defaults.d.ts.map +1 -1
  36. package/dist/inference/interruption/defaults.js +1 -1
  37. package/dist/inference/interruption/defaults.js.map +1 -1
  38. package/dist/inference/interruption/http_transport.cjs +44 -28
  39. package/dist/inference/interruption/http_transport.cjs.map +1 -1
  40. package/dist/inference/interruption/http_transport.d.ts.map +1 -1
  41. package/dist/inference/interruption/http_transport.js +45 -29
  42. package/dist/inference/interruption/http_transport.js.map +1 -1
  43. package/dist/inference/interruption/interruption_detector.cjs +22 -5
  44. package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
  45. package/dist/inference/interruption/interruption_detector.d.cts +2 -2
  46. package/dist/inference/interruption/interruption_detector.d.ts +2 -2
  47. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
  48. package/dist/inference/interruption/interruption_detector.js +22 -5
  49. package/dist/inference/interruption/interruption_detector.js.map +1 -1
  50. package/dist/inference/interruption/interruption_stream.cjs +4 -4
  51. package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
  52. package/dist/inference/interruption/interruption_stream.js +4 -4
  53. package/dist/inference/interruption/interruption_stream.js.map +1 -1
  54. package/dist/inference/interruption/types.cjs.map +1 -1
  55. package/dist/inference/interruption/types.d.cts +2 -2
  56. package/dist/inference/interruption/types.d.ts +2 -2
  57. package/dist/inference/interruption/types.d.ts.map +1 -1
  58. package/dist/inference/interruption/ws_transport.cjs +60 -47
  59. package/dist/inference/interruption/ws_transport.cjs.map +1 -1
  60. package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
  61. package/dist/inference/interruption/ws_transport.js +60 -47
  62. package/dist/inference/interruption/ws_transport.js.map +1 -1
  63. package/dist/inference/llm.cjs.map +1 -1
  64. package/dist/inference/llm.d.cts +1 -1
  65. package/dist/inference/llm.d.ts +1 -1
  66. package/dist/inference/llm.d.ts.map +1 -1
  67. package/dist/inference/llm.js.map +1 -1
  68. package/dist/inference/stt.cjs +20 -12
  69. package/dist/inference/stt.cjs.map +1 -1
  70. package/dist/inference/stt.d.cts +3 -2
  71. package/dist/inference/stt.d.ts +3 -2
  72. package/dist/inference/stt.d.ts.map +1 -1
  73. package/dist/inference/stt.js +20 -12
  74. package/dist/inference/stt.js.map +1 -1
  75. package/dist/inference/stt.test.cjs +14 -0
  76. package/dist/inference/stt.test.cjs.map +1 -1
  77. package/dist/inference/stt.test.js +14 -0
  78. package/dist/inference/stt.test.js.map +1 -1
  79. package/dist/inference/tts.cjs +13 -4
  80. package/dist/inference/tts.cjs.map +1 -1
  81. package/dist/inference/tts.d.cts +8 -1
  82. package/dist/inference/tts.d.ts +8 -1
  83. package/dist/inference/tts.d.ts.map +1 -1
  84. package/dist/inference/tts.js +13 -4
  85. package/dist/inference/tts.js.map +1 -1
  86. package/dist/inference/tts.test.cjs +10 -0
  87. package/dist/inference/tts.test.cjs.map +1 -1
  88. package/dist/inference/tts.test.js +10 -0
  89. package/dist/inference/tts.test.js.map +1 -1
  90. package/dist/ipc/job_proc_lazy_main.cjs +41 -23
  91. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  92. package/dist/ipc/job_proc_lazy_main.js +41 -23
  93. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  94. package/dist/job.cjs +1 -1
  95. package/dist/job.cjs.map +1 -1
  96. package/dist/job.js +1 -1
  97. package/dist/job.js.map +1 -1
  98. package/dist/language.cjs +394 -0
  99. package/dist/language.cjs.map +1 -0
  100. package/dist/language.d.cts +15 -0
  101. package/dist/language.d.ts +15 -0
  102. package/dist/language.d.ts.map +1 -0
  103. package/dist/language.js +363 -0
  104. package/dist/language.js.map +1 -0
  105. package/dist/language.test.cjs +43 -0
  106. package/dist/language.test.cjs.map +1 -0
  107. package/dist/language.test.js +49 -0
  108. package/dist/language.test.js.map +1 -0
  109. package/dist/llm/index.cjs +2 -0
  110. package/dist/llm/index.cjs.map +1 -1
  111. package/dist/llm/index.d.cts +1 -1
  112. package/dist/llm/index.d.ts +1 -1
  113. package/dist/llm/index.d.ts.map +1 -1
  114. package/dist/llm/index.js +2 -0
  115. package/dist/llm/index.js.map +1 -1
  116. package/dist/stream/deferred_stream.cjs +6 -2
  117. package/dist/stream/deferred_stream.cjs.map +1 -1
  118. package/dist/stream/deferred_stream.d.ts.map +1 -1
  119. package/dist/stream/deferred_stream.js +6 -2
  120. package/dist/stream/deferred_stream.js.map +1 -1
  121. package/dist/stt/stt.cjs.map +1 -1
  122. package/dist/stt/stt.d.cts +2 -1
  123. package/dist/stt/stt.d.ts +2 -1
  124. package/dist/stt/stt.d.ts.map +1 -1
  125. package/dist/stt/stt.js.map +1 -1
  126. package/dist/utils.cjs +15 -0
  127. package/dist/utils.cjs.map +1 -1
  128. package/dist/utils.d.cts +8 -0
  129. package/dist/utils.d.ts +8 -0
  130. package/dist/utils.d.ts.map +1 -1
  131. package/dist/utils.js +13 -0
  132. package/dist/utils.js.map +1 -1
  133. package/dist/version.cjs +1 -1
  134. package/dist/version.js +1 -1
  135. package/dist/voice/agent.cjs +14 -17
  136. package/dist/voice/agent.cjs.map +1 -1
  137. package/dist/voice/agent.d.cts +10 -11
  138. package/dist/voice/agent.d.ts +10 -11
  139. package/dist/voice/agent.d.ts.map +1 -1
  140. package/dist/voice/agent.js +15 -18
  141. package/dist/voice/agent.js.map +1 -1
  142. package/dist/voice/agent.test.cjs +194 -0
  143. package/dist/voice/agent.test.cjs.map +1 -1
  144. package/dist/voice/agent.test.js +195 -1
  145. package/dist/voice/agent.test.js.map +1 -1
  146. package/dist/voice/agent_activity.cjs +116 -39
  147. package/dist/voice/agent_activity.cjs.map +1 -1
  148. package/dist/voice/agent_activity.d.cts +2 -0
  149. package/dist/voice/agent_activity.d.ts +2 -0
  150. package/dist/voice/agent_activity.d.ts.map +1 -1
  151. package/dist/voice/agent_activity.js +117 -40
  152. package/dist/voice/agent_activity.js.map +1 -1
  153. package/dist/voice/agent_activity.test.cjs +135 -0
  154. package/dist/voice/agent_activity.test.cjs.map +1 -0
  155. package/dist/voice/agent_activity.test.js +134 -0
  156. package/dist/voice/agent_activity.test.js.map +1 -0
  157. package/dist/voice/agent_session.cjs +38 -38
  158. package/dist/voice/agent_session.cjs.map +1 -1
  159. package/dist/voice/agent_session.d.cts +65 -56
  160. package/dist/voice/agent_session.d.ts +65 -56
  161. package/dist/voice/agent_session.d.ts.map +1 -1
  162. package/dist/voice/agent_session.js +37 -37
  163. package/dist/voice/agent_session.js.map +1 -1
  164. package/dist/voice/audio_recognition.cjs +106 -52
  165. package/dist/voice/audio_recognition.cjs.map +1 -1
  166. package/dist/voice/audio_recognition.d.cts +4 -2
  167. package/dist/voice/audio_recognition.d.ts +4 -2
  168. package/dist/voice/audio_recognition.d.ts.map +1 -1
  169. package/dist/voice/audio_recognition.js +106 -52
  170. package/dist/voice/audio_recognition.js.map +1 -1
  171. package/dist/voice/audio_recognition_span.test.cjs +84 -22
  172. package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
  173. package/dist/voice/audio_recognition_span.test.js +90 -23
  174. package/dist/voice/audio_recognition_span.test.js.map +1 -1
  175. package/dist/voice/events.cjs +1 -1
  176. package/dist/voice/events.cjs.map +1 -1
  177. package/dist/voice/events.d.cts +4 -3
  178. package/dist/voice/events.d.ts +4 -3
  179. package/dist/voice/events.d.ts.map +1 -1
  180. package/dist/voice/events.js +1 -1
  181. package/dist/voice/events.js.map +1 -1
  182. package/dist/voice/index.cjs +9 -1
  183. package/dist/voice/index.cjs.map +1 -1
  184. package/dist/voice/index.d.cts +1 -1
  185. package/dist/voice/index.d.ts +1 -1
  186. package/dist/voice/index.d.ts.map +1 -1
  187. package/dist/voice/index.js +10 -1
  188. package/dist/voice/index.js.map +1 -1
  189. package/dist/voice/remote_session.cjs +922 -0
  190. package/dist/voice/remote_session.cjs.map +1 -0
  191. package/dist/voice/remote_session.d.cts +108 -0
  192. package/dist/voice/remote_session.d.ts +108 -0
  193. package/dist/voice/remote_session.d.ts.map +1 -0
  194. package/dist/voice/remote_session.js +887 -0
  195. package/dist/voice/remote_session.js.map +1 -0
  196. package/dist/voice/report.cjs +11 -10
  197. package/dist/voice/report.cjs.map +1 -1
  198. package/dist/voice/report.d.cts +5 -3
  199. package/dist/voice/report.d.ts +5 -3
  200. package/dist/voice/report.d.ts.map +1 -1
  201. package/dist/voice/report.js +11 -10
  202. package/dist/voice/report.js.map +1 -1
  203. package/dist/voice/report.test.cjs +15 -0
  204. package/dist/voice/report.test.cjs.map +1 -1
  205. package/dist/voice/report.test.js +15 -0
  206. package/dist/voice/report.test.js.map +1 -1
  207. package/dist/voice/room_io/room_io.cjs +39 -0
  208. package/dist/voice/room_io/room_io.cjs.map +1 -1
  209. package/dist/voice/room_io/room_io.d.cts +3 -1
  210. package/dist/voice/room_io/room_io.d.ts +3 -1
  211. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  212. package/dist/voice/room_io/room_io.js +40 -1
  213. package/dist/voice/room_io/room_io.js.map +1 -1
  214. package/dist/voice/turn_config/interruption.cjs.map +1 -1
  215. package/dist/voice/turn_config/interruption.d.cts +1 -1
  216. package/dist/voice/turn_config/interruption.d.ts +1 -1
  217. package/dist/voice/turn_config/interruption.d.ts.map +1 -1
  218. package/dist/voice/turn_config/interruption.js.map +1 -1
  219. package/dist/voice/turn_config/utils.cjs +95 -35
  220. package/dist/voice/turn_config/utils.cjs.map +1 -1
  221. package/dist/voice/turn_config/utils.d.cts +17 -5
  222. package/dist/voice/turn_config/utils.d.ts +17 -5
  223. package/dist/voice/turn_config/utils.d.ts.map +1 -1
  224. package/dist/voice/turn_config/utils.js +93 -35
  225. package/dist/voice/turn_config/utils.js.map +1 -1
  226. package/dist/voice/turn_config/utils.test.cjs +83 -41
  227. package/dist/voice/turn_config/utils.test.cjs.map +1 -1
  228. package/dist/voice/turn_config/utils.test.js +84 -42
  229. package/dist/voice/turn_config/utils.test.js.map +1 -1
  230. package/dist/worker.cjs +6 -29
  231. package/dist/worker.cjs.map +1 -1
  232. package/dist/worker.d.ts.map +1 -1
  233. package/dist/worker.js +6 -19
  234. package/dist/worker.js.map +1 -1
  235. package/package.json +3 -2
  236. package/src/cli.ts +2 -0
  237. package/src/constants.ts +1 -0
  238. package/src/cpu.test.ts +239 -0
  239. package/src/cpu.ts +173 -0
  240. package/src/index.ts +13 -15
  241. package/src/inference/interruption/defaults.ts +1 -1
  242. package/src/inference/interruption/http_transport.ts +49 -30
  243. package/src/inference/interruption/interruption_detector.ts +22 -6
  244. package/src/inference/interruption/interruption_stream.ts +4 -4
  245. package/src/inference/interruption/types.ts +2 -2
  246. package/src/inference/interruption/ws_transport.ts +63 -59
  247. package/src/inference/llm.ts +3 -1
  248. package/src/inference/stt.test.ts +17 -0
  249. package/src/inference/stt.ts +22 -14
  250. package/src/inference/tts.test.ts +12 -0
  251. package/src/inference/tts.ts +22 -6
  252. package/src/ipc/job_proc_lazy_main.ts +44 -24
  253. package/src/job.ts +1 -1
  254. package/src/language.test.ts +62 -0
  255. package/src/language.ts +380 -0
  256. package/src/llm/index.ts +2 -0
  257. package/src/stream/deferred_stream.ts +5 -1
  258. package/src/stt/stt.ts +2 -1
  259. package/src/utils.ts +20 -0
  260. package/src/voice/agent.test.ts +208 -1
  261. package/src/voice/agent.ts +21 -22
  262. package/src/voice/agent_activity.test.ts +194 -0
  263. package/src/voice/agent_activity.ts +161 -43
  264. package/src/voice/agent_session.ts +103 -92
  265. package/src/voice/audio_recognition.ts +124 -61
  266. package/src/voice/audio_recognition_span.test.ts +115 -35
  267. package/src/voice/events.ts +4 -3
  268. package/src/voice/index.ts +10 -1
  269. package/src/voice/remote_session.ts +1083 -0
  270. package/src/voice/report.test.ts +22 -3
  271. package/src/voice/report.ts +31 -14
  272. package/src/voice/room_io/room_io.ts +52 -2
  273. package/src/voice/turn_config/interruption.ts +1 -1
  274. package/src/voice/turn_config/utils.test.ts +91 -43
  275. package/src/voice/turn_config/utils.ts +120 -56
  276. package/src/worker.ts +34 -50
  277. package/dist/voice/client_events.cjs +0 -554
  278. package/dist/voice/client_events.cjs.map +0 -1
  279. package/dist/voice/client_events.d.cts +0 -195
  280. package/dist/voice/client_events.d.ts +0 -195
  281. package/dist/voice/client_events.d.ts.map +0 -1
  282. package/dist/voice/client_events.js +0 -548
  283. package/dist/voice/client_events.js.map +0 -1
  284. package/dist/voice/wire_format.cjs +0 -798
  285. package/dist/voice/wire_format.cjs.map +0 -1
  286. package/dist/voice/wire_format.d.cts +0 -5503
  287. package/dist/voice/wire_format.d.ts +0 -5503
  288. package/dist/voice/wire_format.d.ts.map +0 -1
  289. package/dist/voice/wire_format.js +0 -728
  290. package/dist/voice/wire_format.js.map +0 -1
  291. package/src/voice/client_events.ts +0 -838
  292. package/src/voice/wire_format.ts +0 -827
@@ -3,10 +3,12 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { describe, expect, it } from 'vitest';
5
5
  import { ChatContext } from '../llm/chat_context.js';
6
- import type { VoiceOptions } from './agent_session.js';
6
+ import type { AgentSessionOptions, VoiceOptions } from './agent_session.js';
7
7
  import { createSessionReport, sessionReportToJSON } from './report.js';
8
8
 
9
- function baseOptions(): VoiceOptions {
9
+ type ReportOptions = AgentSessionOptions & Partial<VoiceOptions>;
10
+
11
+ function baseOptions(): ReportOptions {
10
12
  return {
11
13
  maxToolSteps: 3,
12
14
  preemptiveGeneration: false,
@@ -16,7 +18,7 @@ function baseOptions(): VoiceOptions {
16
18
  };
17
19
  }
18
20
 
19
- function serializeOptions(options: VoiceOptions) {
21
+ function serializeOptions(options: ReportOptions) {
20
22
  const report = createSessionReport({
21
23
  jobId: 'job',
22
24
  roomId: 'room-id',
@@ -94,6 +96,23 @@ describe('sessionReportToJSON', () => {
94
96
  });
95
97
  });
96
98
 
99
+ it('serializes allow_interruptions from interruption.enabled when present', () => {
100
+ const options = baseOptions();
101
+ options.allowInterruptions = true;
102
+ options.turnHandling = {
103
+ interruption: {
104
+ enabled: false,
105
+ mode: 'adaptive',
106
+ },
107
+ };
108
+
109
+ const serialized = serializeOptions(options);
110
+ expect(serialized).toMatchObject({
111
+ allow_interruptions: false,
112
+ max_tool_steps: 3,
113
+ });
114
+ });
115
+
97
116
  it('falls back to deprecated flat fields when turnHandling values are absent', () => {
98
117
  const options = baseOptions();
99
118
  options.allowInterruptions = false;
@@ -3,14 +3,16 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { ChatContext } from '../llm/chat_context.js';
5
5
  import { type ModelUsage, filterZeroValues } from '../metrics/model_usage.js';
6
- import type { VoiceOptions } from './agent_session.js';
6
+ import type { AgentSessionOptions, VoiceOptions } from './agent_session.js';
7
7
  import type { AgentEvent } from './events.js';
8
8
 
9
+ type ReportOptions = AgentSessionOptions & Partial<VoiceOptions>;
10
+
9
11
  export interface SessionReport {
10
12
  jobId: string;
11
13
  roomId: string;
12
14
  room: string;
13
- options: VoiceOptions;
15
+ options: ReportOptions;
14
16
  events: AgentEvent[];
15
17
  chatHistory: ChatContext;
16
18
  enableRecording: boolean;
@@ -32,7 +34,7 @@ export interface SessionReportOptions {
32
34
  jobId: string;
33
35
  roomId: string;
34
36
  room: string;
35
- options: VoiceOptions;
37
+ options: ReportOptions;
36
38
  events: AgentEvent[];
37
39
  chatHistory: ChatContext;
38
40
  enableRecording?: boolean;
@@ -76,22 +78,37 @@ export function createSessionReport(opts: SessionReportOptions): SessionReport {
76
78
  // - Uploads to LiveKit Cloud observability endpoint with JWT auth
77
79
  export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
78
80
  const events: Record<string, unknown>[] = [];
79
- const interruptionConfig = report.options.turnHandling?.interruption;
80
- const endpointingConfig = report.options.turnHandling?.endpointing;
81
+ const options = report.options;
82
+ const interruptionConfig = options.turnHandling?.interruption;
83
+ const endpointingConfig = options.turnHandling?.endpointing;
81
84
 
82
85
  // Keep backwards compatibility with deprecated fields
83
86
  const allowInterruptions =
84
- interruptionConfig?.mode !== undefined
85
- ? interruptionConfig.mode !== false
86
- : report.options.allowInterruptions;
87
+ interruptionConfig?.enabled !== undefined
88
+ ? interruptionConfig.enabled
89
+ : interruptionConfig?.mode !== undefined
90
+ ? true
91
+ : options.allowInterruptions ?? options.voiceOptions?.allowInterruptions;
87
92
  const discardAudioIfUninterruptible =
88
93
  interruptionConfig?.discardAudioIfUninterruptible ??
89
- report.options.discardAudioIfUninterruptible;
94
+ options.discardAudioIfUninterruptible ??
95
+ options.voiceOptions?.discardAudioIfUninterruptible;
90
96
  const minInterruptionDuration =
91
- interruptionConfig?.minDuration ?? report.options.minInterruptionDuration;
92
- const minInterruptionWords = interruptionConfig?.minWords ?? report.options.minInterruptionWords;
93
- const minEndpointingDelay = endpointingConfig?.minDelay ?? report.options.minEndpointingDelay;
94
- const maxEndpointingDelay = endpointingConfig?.maxDelay ?? report.options.maxEndpointingDelay;
97
+ interruptionConfig?.minDuration ??
98
+ options.minInterruptionDuration ??
99
+ options.voiceOptions?.minInterruptionDuration;
100
+ const minInterruptionWords =
101
+ interruptionConfig?.minWords ??
102
+ options.minInterruptionWords ??
103
+ options.voiceOptions?.minInterruptionWords;
104
+ const minEndpointingDelay =
105
+ endpointingConfig?.minDelay ??
106
+ options.minEndpointingDelay ??
107
+ options.voiceOptions?.minEndpointingDelay;
108
+ const maxEndpointingDelay =
109
+ endpointingConfig?.maxDelay ??
110
+ options.maxEndpointingDelay ??
111
+ options.voiceOptions?.maxEndpointingDelay;
95
112
 
96
113
  for (const event of report.events) {
97
114
  if (event.type === 'metrics_collected') {
@@ -113,7 +130,7 @@ export function sessionReportToJSON(report: SessionReport): Record<string, unkno
113
130
  min_interruption_words: minInterruptionWords,
114
131
  min_endpointing_delay: minEndpointingDelay,
115
132
  max_endpointing_delay: maxEndpointingDelay,
116
- max_tool_steps: report.options.maxToolSteps,
133
+ max_tool_steps: options.maxToolSteps,
117
134
  },
118
135
  chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
119
136
  enable_user_data_training: report.enableRecording,
@@ -1,6 +1,7 @@
1
1
  // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
+ import type { TextStreamReader } from '@livekit/rtc-node';
4
5
  import {
5
6
  type AudioFrame,
6
7
  ConnectionState,
@@ -16,12 +17,11 @@ import {
16
17
  TrackSource,
17
18
  } from '@livekit/rtc-node';
18
19
  import type { WritableStreamDefaultWriter } from 'node:stream/web';
19
- import { ATTRIBUTE_PUBLISH_ON_BEHALF } from '../../constants.js';
20
+ import { ATTRIBUTE_PUBLISH_ON_BEHALF, TOPIC_CHAT } from '../../constants.js';
20
21
  import { log } from '../../log.js';
21
22
  import { IdentityTransform } from '../../stream/identity_transform.js';
22
23
  import { Future, Task, waitForAbort } from '../../utils.js';
23
24
  import { type AgentSession } from '../agent_session.js';
24
- import type { TextInputCallback } from '../client_events.js';
25
25
  import {
26
26
  AgentSessionEventTypes,
27
27
  type AgentStateChangedEvent,
@@ -29,6 +29,7 @@ import {
29
29
  type UserInputTranscribedEvent,
30
30
  } from '../events.js';
31
31
  import type { AudioOutput, TextOutput } from '../io.js';
32
+ import type { TextInputCallback } from '../remote_session.js';
32
33
  import { TranscriptionSynchronizer } from '../transcription/synchronizer.js';
33
34
  import { ParticipantAudioInputStream } from './_input.js';
34
35
  import {
@@ -127,6 +128,7 @@ export class RoomIO {
127
128
  private agentTranscriptOutput?: ParalellTextOutput;
128
129
  private transcriptionSynchronizer?: TranscriptionSynchronizer;
129
130
  private participantIdentity: string | null = null;
131
+ private textStreamHandlerRegistered = false;
130
132
 
131
133
  private participantAvailableFuture: Future<RemoteParticipant> = new Future();
132
134
  private roomConnectedFuture: Future<void> = new Future();
@@ -271,6 +273,37 @@ export class RoomIO {
271
273
  }
272
274
  };
273
275
 
276
+ private onUserTextInput = (reader: TextStreamReader, participantInfo: { identity: string }) => {
277
+ if (this.participantIdentity && participantInfo.identity !== this.participantIdentity) {
278
+ return;
279
+ }
280
+
281
+ const participant = this.room.remoteParticipants.get(participantInfo.identity);
282
+ if (!participant) {
283
+ this.logger.warn('participant not found, ignoring text input');
284
+ return;
285
+ }
286
+
287
+ const readText = async () => {
288
+ const text = await reader.readAll();
289
+
290
+ const textInputResult = this.inputOptions.textInputCallback!(this.agentSession, {
291
+ text,
292
+ info: reader.info,
293
+ participantIdentity: participantInfo.identity,
294
+ });
295
+
296
+ // check if callback is a Promise
297
+ if (textInputResult instanceof Promise) {
298
+ await textInputResult;
299
+ }
300
+ };
301
+
302
+ readText().catch((error) => {
303
+ this.logger.error({ error }, 'Error reading text input');
304
+ });
305
+ };
306
+
274
307
  private async forwardUserTranscript(signal: AbortSignal): Promise<void> {
275
308
  const reader = this.userTranscriptStream.readable.getReader();
276
309
  try {
@@ -402,6 +435,18 @@ export class RoomIO {
402
435
 
403
436
  start() {
404
437
  // -- create inputs --
438
+
439
+ if (this.inputOptions.textEnabled) {
440
+ try {
441
+ this.room.registerTextStreamHandler(TOPIC_CHAT, this.onUserTextInput);
442
+ this.textStreamHandlerRegistered = true;
443
+ } catch (error) {
444
+ if (this.inputOptions.textEnabled) {
445
+ this.logger.warn(`text stream handler for topic "${TOPIC_CHAT}" already set, ignoring`);
446
+ }
447
+ }
448
+ }
449
+
405
450
  if (this.inputOptions.audioEnabled) {
406
451
  this.audioInput = new ParticipantAudioInputStream({
407
452
  room: this.room,
@@ -476,6 +521,11 @@ export class RoomIO {
476
521
  this.agentSession.off(AgentSessionEventTypes.UserInputTranscribed, this.onUserInputTranscribed);
477
522
  this.agentSession.off(AgentSessionEventTypes.AgentStateChanged, this.onAgentStateChanged);
478
523
 
524
+ if (this.textStreamHandlerRegistered) {
525
+ this.room.unregisterTextStreamHandler(TOPIC_CHAT);
526
+ this.textStreamHandlerRegistered = false;
527
+ }
528
+
479
529
  await this.initTask?.cancelAndWait();
480
530
 
481
531
  // Close stream FIRST so reader.read() in forwardUserTranscript can exit.
@@ -15,7 +15,7 @@ export interface InterruptionOptions {
15
15
  * voice-activity detection. `undefined` means auto-detect.
16
16
  * @defaultValue undefined
17
17
  */
18
- mode: 'adaptive' | 'vad' | false | undefined;
18
+ mode: 'adaptive' | 'vad' | undefined;
19
19
  /**
20
20
  * When `true`, buffered audio is dropped while the agent is speaking and cannot be interrupted.
21
21
  * @defaultValue true
@@ -3,10 +3,11 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { beforeAll, describe, expect, it } from 'vitest';
5
5
  import { initializeLogger } from '../../log.js';
6
+ import { defaultAgentSessionOptions } from '../agent_session.js';
6
7
  import { defaultEndpointingOptions } from './endpointing.js';
7
8
  import { defaultInterruptionOptions } from './interruption.js';
8
9
  import { defaultTurnHandlingOptions } from './turn_handling.js';
9
- import { migrateLegacyOptions } from './utils.js';
10
+ import { migrateLegacyOptions, migrateTurnHandling } from './utils.js';
10
11
 
11
12
  beforeAll(() => {
12
13
  initializeLogger({ pretty: true, level: 'info' });
@@ -14,20 +15,20 @@ beforeAll(() => {
14
15
 
15
16
  describe('migrateLegacyOptions', () => {
16
17
  it('should return all defaults when no options are provided', () => {
17
- const result = migrateLegacyOptions({});
18
+ const { agentSessionOptions: result } = migrateLegacyOptions({});
18
19
 
19
- expect(result.options.turnHandling).toEqual({
20
+ expect(result.turnHandling).toEqual({
20
21
  turnDetection: defaultTurnHandlingOptions.turnDetection,
21
22
  endpointing: defaultEndpointingOptions,
22
23
  interruption: defaultInterruptionOptions,
23
24
  });
24
- expect(result.options.maxToolSteps).toBe(3);
25
- expect(result.options.preemptiveGeneration).toBe(false);
26
- expect(result.options.userAwayTimeout).toBe(15.0);
25
+ expect(result.maxToolSteps).toBe(defaultAgentSessionOptions.maxToolSteps);
26
+ expect(result.preemptiveGeneration).toBe(defaultAgentSessionOptions.preemptiveGeneration);
27
+ expect(result.userAwayTimeout).toBe(defaultAgentSessionOptions.userAwayTimeout);
27
28
  });
28
29
 
29
30
  it('should migrate legacy flat fields into nested turnHandling config', () => {
30
- const result = migrateLegacyOptions({
31
+ const { agentSessionOptions: result } = migrateLegacyOptions({
31
32
  voiceOptions: {
32
33
  minInterruptionDuration: 1000,
33
34
  minInterruptionWords: 3,
@@ -37,64 +38,111 @@ describe('migrateLegacyOptions', () => {
37
38
  },
38
39
  });
39
40
 
40
- expect(result.options.turnHandling.interruption!.minDuration).toBe(1000);
41
- expect(result.options.turnHandling.interruption!.minWords).toBe(3);
42
- expect(result.options.turnHandling.interruption!.discardAudioIfUninterruptible).toBe(false);
43
- expect(result.options.turnHandling.endpointing!.minDelay).toBe(800);
44
- expect(result.options.turnHandling.endpointing!.maxDelay).toBe(5000);
41
+ expect(result.turnHandling.interruption!.minDuration).toBe(1000);
42
+ expect(result.turnHandling.interruption!.minWords).toBe(3);
43
+ expect(result.turnHandling.interruption!.discardAudioIfUninterruptible).toBe(false);
44
+ expect(result.turnHandling.endpointing!.minDelay).toBe(800);
45
+ expect(result.turnHandling.endpointing!.maxDelay).toBe(5000);
45
46
  });
46
47
 
47
48
  it('should set interruption.enabled to false when allowInterruptions is false', () => {
48
- const result = migrateLegacyOptions({
49
- options: {
50
- allowInterruptions: false,
51
- },
49
+ const { agentSessionOptions: result } = migrateLegacyOptions({
50
+ voiceOptions: { allowInterruptions: false },
52
51
  });
53
52
 
54
- expect(result.options.turnHandling.interruption!.enabled).toBe(false);
53
+ expect(result.turnHandling.interruption!.enabled).toBe(false);
55
54
  });
56
55
 
57
- it('should give options precedence over voiceOptions when both are provided', () => {
58
- const result = migrateLegacyOptions({
56
+ it('should give top-level fields precedence over voiceOptions', () => {
57
+ const { agentSessionOptions: result } = migrateLegacyOptions({
59
58
  voiceOptions: {
60
59
  minInterruptionDuration: 1000,
61
60
  maxEndpointingDelay: 5000,
62
61
  maxToolSteps: 10,
63
62
  },
64
- options: {
65
- minInterruptionDuration: 2000,
66
- maxEndpointingDelay: 8000,
67
- maxToolSteps: 5,
68
- },
69
- });
70
-
71
- expect(result.options.turnHandling.interruption!.minDuration).toBe(2000);
72
- expect(result.options.turnHandling.endpointing!.maxDelay).toBe(8000);
73
- expect(result.options.maxToolSteps).toBe(5);
74
- });
75
-
76
- it('should let explicit turnHandling override legacy flat fields', () => {
77
- const result = migrateLegacyOptions({
78
- options: {
79
- minInterruptionDuration: 1000,
80
- minEndpointingDelay: 800,
81
- turnHandling: {
82
- interruption: { minDuration: 3000 },
83
- endpointing: { minDelay: 2000 },
63
+ turnHandling: {
64
+ interruption: {
65
+ minDuration: 2000,
66
+ },
67
+ endpointing: {
68
+ maxDelay: 8000,
84
69
  },
85
70
  },
71
+ maxToolSteps: 5,
86
72
  });
87
73
 
88
- expect(result.options.turnHandling.interruption!.minDuration).toBe(3000);
89
- expect(result.options.turnHandling.endpointing!.minDelay).toBe(2000);
74
+ expect(result.turnHandling.interruption!.minDuration).toBe(2000);
75
+ expect(result.turnHandling.endpointing!.maxDelay).toBe(8000);
76
+ expect(result.maxToolSteps).toBe(5);
90
77
  });
91
78
 
92
79
  it('should preserve top-level turnDetection in the result', () => {
93
- const result = migrateLegacyOptions({
80
+ const { agentSessionOptions: result } = migrateLegacyOptions({
94
81
  turnDetection: 'vad',
95
82
  });
96
83
 
84
+ expect(result.turnHandling.turnDetection).toBe('vad');
85
+ });
86
+ });
87
+
88
+ describe('migrateTurnHandling', () => {
89
+ it('should return empty partial when no deprecated Agent fields are given', () => {
90
+ const result = migrateTurnHandling({});
91
+ expect(result).toEqual({});
92
+ });
93
+
94
+ it('should set interruption.enabled to false when allowInterruptions is false', () => {
95
+ const result = migrateTurnHandling({ allowInterruptions: false });
96
+ expect(result.interruption).toEqual({ enabled: false });
97
+ expect(result.endpointing).toBeUndefined();
98
+ expect(result.turnDetection).toBeUndefined();
99
+ });
100
+
101
+ it('should not set interruption when allowInterruptions is true or undefined', () => {
102
+ expect(migrateTurnHandling({ allowInterruptions: true })).toEqual({});
103
+ expect(migrateTurnHandling({ allowInterruptions: undefined })).toEqual({});
104
+ });
105
+
106
+ it('should map minEndpointingDelay to endpointing.minDelay', () => {
107
+ const result = migrateTurnHandling({ minEndpointingDelay: 800 });
108
+ expect(result.endpointing).toEqual({ minDelay: 800 });
109
+ });
110
+
111
+ it('should map maxEndpointingDelay to endpointing.maxDelay', () => {
112
+ const result = migrateTurnHandling({ maxEndpointingDelay: 5000 });
113
+ expect(result.endpointing).toEqual({ maxDelay: 5000 });
114
+ });
115
+
116
+ it('should pass through turnDetection', () => {
117
+ const result = migrateTurnHandling({ turnDetection: 'vad' });
97
118
  expect(result.turnDetection).toBe('vad');
98
- expect(result.options.turnHandling.turnDetection).toBe('vad');
119
+ });
120
+
121
+ it('should combine all deprecated Agent fields', () => {
122
+ const result = migrateTurnHandling({
123
+ turnDetection: 'stt',
124
+ allowInterruptions: false,
125
+ minEndpointingDelay: 400,
126
+ maxEndpointingDelay: 3000,
127
+ });
128
+ expect(result.turnDetection).toBe('stt');
129
+ expect(result.interruption).toEqual({ enabled: false });
130
+ expect(result.endpointing).toEqual({ minDelay: 400, maxDelay: 3000 });
131
+ });
132
+
133
+ it('should ignore deprecated Agent fields when explicit turnHandling is provided', () => {
134
+ const turnHandling = {
135
+ endpointing: { minDelay: 999, maxDelay: 4000 },
136
+ interruption: { enabled: true },
137
+ turnDetection: 'vad' as const,
138
+ };
139
+ const result = migrateTurnHandling({
140
+ turnHandling,
141
+ turnDetection: 'stt',
142
+ allowInterruptions: false,
143
+ minEndpointingDelay: 100,
144
+ maxEndpointingDelay: 200,
145
+ });
146
+ expect(result).toEqual(turnHandling);
99
147
  });
100
148
  });
@@ -5,88 +5,105 @@ import { log } from '../../log.js';
5
5
  import {
6
6
  type AgentSessionOptions,
7
7
  type InternalSessionOptions,
8
- defaultSessionOptions,
8
+ type TurnDetectionMode,
9
+ type VoiceOptions,
9
10
  } from '../agent_session.js';
10
11
  import { defaultEndpointingOptions } from './endpointing.js';
11
12
  import { defaultInterruptionOptions } from './interruption.js';
12
13
  import { type TurnHandlingOptions, defaultTurnHandlingOptions } from './turn_handling.js';
13
14
 
14
- export function migrateLegacyOptions<UserData>(
15
- legacyOptions: AgentSessionOptions<UserData>,
16
- ): AgentSessionOptions<UserData> & { options: InternalSessionOptions } {
15
+ const defaultSessionOptions = {
16
+ maxToolSteps: 3,
17
+ preemptiveGeneration: true,
18
+ userAwayTimeout: 15.0,
19
+ aecWarmupDuration: 3000,
20
+ turnHandling: {},
21
+ useTtsAlignedTranscript: true,
22
+ } as const satisfies AgentSessionOptions;
23
+
24
+ const defaultLegacyVoiceOptions: VoiceOptions = {
25
+ minEndpointingDelay: defaultTurnHandlingOptions.endpointing.minDelay,
26
+ maxEndpointingDelay: defaultTurnHandlingOptions.endpointing.maxDelay,
27
+ maxToolSteps: defaultSessionOptions.maxToolSteps,
28
+ preemptiveGeneration: defaultSessionOptions.preemptiveGeneration,
29
+ };
30
+
31
+ export function migrateLegacyOptions<UserData>(legacyOptions: AgentSessionOptions<UserData>): {
32
+ agentSessionOptions: InternalSessionOptions<UserData>;
33
+ legacyVoiceOptions: VoiceOptions;
34
+ } {
17
35
  const logger = log();
18
- const { voiceOptions, turnDetection, options: sessionOptions, ...rest } = legacyOptions;
36
+ const {
37
+ voiceOptions,
38
+ turnDetection,
39
+ stt,
40
+ vad,
41
+ llm,
42
+ tts,
43
+ userData,
44
+ connOptions,
45
+ ...sessionOptions
46
+ } = legacyOptions;
19
47
 
20
- if (voiceOptions !== undefined && sessionOptions !== undefined) {
48
+ if (voiceOptions !== undefined) {
21
49
  logger.warn(
22
- 'Both voiceOptions and options have been supplied as part of the AgentSessionOptions, voiceOptions will be merged with options taking precedence',
50
+ 'voiceOptions is deprecated, use top-level SessionOptions fields on AgentSessionOptions instead',
23
51
  );
24
52
  }
25
53
 
26
- // Preserve turnDetection before cloning since structuredClone converts class instances to plain objects
27
- const originalTurnDetection =
28
- sessionOptions?.turnHandling?.turnDetection ??
29
- voiceOptions?.turnHandling?.turnDetection ??
30
- turnDetection;
31
-
32
- // Exclude potentially non-cloneable turnDetection objects before structuredClone.
33
- // They are restored from originalTurnDetection below.
34
- const cloneableVoiceOptions = voiceOptions
35
- ? {
36
- ...voiceOptions,
37
- turnHandling: voiceOptions.turnHandling
38
- ? { ...voiceOptions.turnHandling, turnDetection: undefined }
39
- : voiceOptions.turnHandling,
40
- }
41
- : voiceOptions;
42
- const cloneableSessionOptions = sessionOptions
43
- ? {
44
- ...sessionOptions,
45
- turnHandling: sessionOptions.turnHandling
46
- ? { ...sessionOptions.turnHandling, turnDetection: undefined }
47
- : sessionOptions.turnHandling,
48
- }
49
- : sessionOptions;
50
-
51
- const mergedOptions = structuredClone({ ...cloneableVoiceOptions, ...cloneableSessionOptions });
52
-
53
54
  const turnHandling: TurnHandlingOptions = {
54
55
  interruption: {
55
- discardAudioIfUninterruptible: mergedOptions?.discardAudioIfUninterruptible,
56
- minDuration: mergedOptions?.minInterruptionDuration,
57
- minWords: mergedOptions?.minInterruptionWords,
56
+ discardAudioIfUninterruptible: voiceOptions?.discardAudioIfUninterruptible,
57
+ minDuration: voiceOptions?.minInterruptionDuration,
58
+ minWords: voiceOptions?.minInterruptionWords,
59
+ ...sessionOptions.turnHandling?.interruption,
58
60
  },
59
61
  endpointing: {
60
- minDelay: mergedOptions?.minEndpointingDelay,
61
- maxDelay: mergedOptions?.maxEndpointingDelay,
62
+ minDelay: voiceOptions?.minEndpointingDelay,
63
+ maxDelay: voiceOptions?.maxEndpointingDelay,
64
+ ...sessionOptions.turnHandling?.endpointing,
62
65
  },
63
66
 
64
- ...mergedOptions.turnHandling,
65
- // Restore original turnDetection after spread to preserve class instance with methods
66
- // (structuredClone converts class instances to plain objects, losing prototype methods)
67
- turnDetection: originalTurnDetection,
67
+ turnDetection: sessionOptions?.turnHandling?.turnDetection ?? turnDetection,
68
68
  } as const;
69
69
 
70
- if (mergedOptions?.allowInterruptions === false) {
70
+ if (
71
+ voiceOptions?.allowInterruptions === false &&
72
+ turnHandling.interruption.enabled === undefined
73
+ ) {
71
74
  turnHandling.interruption.enabled = false;
72
75
  }
73
76
 
74
- const optionsWithDefaults = {
77
+ const migratedVoiceOptions: AgentSessionOptions<UserData> = {};
78
+
79
+ if (voiceOptions?.maxToolSteps !== undefined) {
80
+ migratedVoiceOptions.maxToolSteps = voiceOptions.maxToolSteps;
81
+ }
82
+ if (voiceOptions?.preemptiveGeneration !== undefined) {
83
+ migratedVoiceOptions.preemptiveGeneration = voiceOptions.preemptiveGeneration;
84
+ }
85
+ if (voiceOptions?.userAwayTimeout !== undefined) {
86
+ migratedVoiceOptions.userAwayTimeout = voiceOptions.userAwayTimeout;
87
+ }
88
+
89
+ const legacyVoiceOptions = { ...defaultLegacyVoiceOptions, ...voiceOptions };
90
+
91
+ const agentSessionOptions = {
92
+ stt,
93
+ vad,
94
+ llm,
95
+ tts,
96
+ userData,
97
+ connOptions,
75
98
  ...defaultSessionOptions,
76
- ...mergedOptions,
99
+ ...migratedVoiceOptions,
100
+ ...sessionOptions,
77
101
  turnHandling: mergeWithDefaults(turnHandling),
102
+ // repopulate the deprecated voice options with migrated options for backwards compatibility
103
+ voiceOptions: legacyVoiceOptions,
78
104
  };
79
105
 
80
- const newAgentSessionOptions: AgentSessionOptions<UserData> & {
81
- options: InternalSessionOptions;
82
- } = {
83
- ...rest,
84
- options: optionsWithDefaults,
85
- voiceOptions: optionsWithDefaults,
86
- turnDetection: turnHandling.turnDetection,
87
- };
88
-
89
- return newAgentSessionOptions;
106
+ return { agentSessionOptions, legacyVoiceOptions };
90
107
  }
91
108
 
92
109
  /** Remove keys whose value is `undefined` so they don't shadow defaults when spread. */
@@ -101,3 +118,50 @@ export function mergeWithDefaults(config: TurnHandlingOptions) {
101
118
  interruption: { ...defaultInterruptionOptions, ...stripUndefined(config.interruption) },
102
119
  } as const;
103
120
  }
121
+
122
+ /**
123
+ * Build a partial {@link TurnHandlingOptions} from deprecated Agent constructor fields.
124
+ * Mirrors the Python Agent compatibility path, but keeps the JS API surface explicit.
125
+ */
126
+ export function migrateTurnHandling(opts: {
127
+ turnDetection?: TurnDetectionMode;
128
+ allowInterruptions?: boolean;
129
+ minEndpointingDelay?: number;
130
+ maxEndpointingDelay?: number;
131
+ turnHandling?: TurnHandlingOptions;
132
+ }): Partial<TurnHandlingOptions> {
133
+ if (opts.turnHandling !== undefined) {
134
+ return opts.turnHandling;
135
+ }
136
+
137
+ const migrated: Partial<TurnHandlingOptions> = {};
138
+
139
+ const endpointing: Partial<TurnHandlingOptions['endpointing']> = {};
140
+ if (opts.minEndpointingDelay !== undefined) {
141
+ endpointing.minDelay = opts.minEndpointingDelay;
142
+ }
143
+ if (opts.maxEndpointingDelay !== undefined) {
144
+ endpointing.maxDelay = opts.maxEndpointingDelay;
145
+ }
146
+ if (Object.keys(endpointing).length > 0) {
147
+ migrated.endpointing = endpointing;
148
+ }
149
+
150
+ const interruption: Partial<TurnHandlingOptions['interruption']> = {};
151
+ if (opts.allowInterruptions === false) {
152
+ interruption.enabled = false;
153
+ }
154
+ if (Object.keys(interruption).length > 0) {
155
+ migrated.interruption = interruption;
156
+ }
157
+
158
+ if (opts.turnDetection !== undefined) {
159
+ migrated.turnDetection = opts.turnDetection;
160
+ }
161
+
162
+ return {
163
+ ...(migrated.endpointing ? { endpointing: migrated.endpointing } : {}),
164
+ ...(migrated.interruption ? { interruption: migrated.interruption } : {}),
165
+ ...(migrated.turnDetection !== undefined ? { turnDetection: migrated.turnDetection } : {}),
166
+ };
167
+ }