@livekit/agents 1.0.35 → 1.0.36-dev.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/index.cjs +3 -1
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +1 -0
  4. package/dist/index.d.ts +1 -0
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +1 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +152 -0
  9. package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +1 -0
  10. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +50 -0
  11. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +50 -0
  12. package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +1 -0
  13. package/dist/inference/interruption/AdaptiveInterruptionDetector.js +125 -0
  14. package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +1 -0
  15. package/dist/inference/interruption/InterruptionStream.cjs +310 -0
  16. package/dist/inference/interruption/InterruptionStream.cjs.map +1 -0
  17. package/dist/inference/interruption/InterruptionStream.d.cts +57 -0
  18. package/dist/inference/interruption/InterruptionStream.d.ts +57 -0
  19. package/dist/inference/interruption/InterruptionStream.d.ts.map +1 -0
  20. package/dist/inference/interruption/InterruptionStream.js +288 -0
  21. package/dist/inference/interruption/InterruptionStream.js.map +1 -0
  22. package/dist/inference/interruption/defaults.cjs +76 -0
  23. package/dist/inference/interruption/defaults.cjs.map +1 -0
  24. package/dist/inference/interruption/defaults.d.cts +14 -0
  25. package/dist/inference/interruption/defaults.d.ts +14 -0
  26. package/dist/inference/interruption/defaults.d.ts.map +1 -0
  27. package/dist/inference/interruption/defaults.js +42 -0
  28. package/dist/inference/interruption/defaults.js.map +1 -0
  29. package/dist/inference/interruption/errors.cjs +2 -0
  30. package/dist/inference/interruption/errors.cjs.map +1 -0
  31. package/dist/inference/interruption/errors.d.cts +2 -0
  32. package/dist/inference/interruption/errors.d.ts +2 -0
  33. package/dist/inference/interruption/errors.d.ts.map +1 -0
  34. package/dist/inference/interruption/errors.js +1 -0
  35. package/dist/inference/interruption/errors.js.map +1 -0
  36. package/dist/inference/interruption/http_transport.cjs +57 -0
  37. package/dist/inference/interruption/http_transport.cjs.map +1 -0
  38. package/dist/inference/interruption/http_transport.d.cts +23 -0
  39. package/dist/inference/interruption/http_transport.d.ts +23 -0
  40. package/dist/inference/interruption/http_transport.d.ts.map +1 -0
  41. package/dist/inference/interruption/http_transport.js +33 -0
  42. package/dist/inference/interruption/http_transport.js.map +1 -0
  43. package/dist/inference/interruption/index.cjs +34 -0
  44. package/dist/inference/interruption/index.cjs.map +1 -0
  45. package/dist/inference/interruption/index.d.cts +5 -0
  46. package/dist/inference/interruption/index.d.ts +5 -0
  47. package/dist/inference/interruption/index.d.ts.map +1 -0
  48. package/dist/inference/interruption/index.js +7 -0
  49. package/dist/inference/interruption/index.js.map +1 -0
  50. package/dist/inference/interruption/interruption.cjs +85 -0
  51. package/dist/inference/interruption/interruption.cjs.map +1 -0
  52. package/dist/inference/interruption/interruption.d.cts +48 -0
  53. package/dist/inference/interruption/interruption.d.ts +48 -0
  54. package/dist/inference/interruption/interruption.d.ts.map +1 -0
  55. package/dist/inference/interruption/interruption.js +59 -0
  56. package/dist/inference/interruption/interruption.js.map +1 -0
  57. package/dist/inference/utils.cjs +15 -2
  58. package/dist/inference/utils.cjs.map +1 -1
  59. package/dist/inference/utils.d.cts +1 -0
  60. package/dist/inference/utils.d.ts +1 -0
  61. package/dist/inference/utils.d.ts.map +1 -1
  62. package/dist/inference/utils.js +13 -1
  63. package/dist/inference/utils.js.map +1 -1
  64. package/dist/inference/utils.test.cjs +20 -0
  65. package/dist/inference/utils.test.cjs.map +1 -0
  66. package/dist/inference/utils.test.js +19 -0
  67. package/dist/inference/utils.test.js.map +1 -0
  68. package/dist/stream/stream_channel.cjs +3 -0
  69. package/dist/stream/stream_channel.cjs.map +1 -1
  70. package/dist/stream/stream_channel.d.cts +3 -2
  71. package/dist/stream/stream_channel.d.ts +3 -2
  72. package/dist/stream/stream_channel.d.ts.map +1 -1
  73. package/dist/stream/stream_channel.js +3 -0
  74. package/dist/stream/stream_channel.js.map +1 -1
  75. package/dist/telemetry/trace_types.cjs +15 -0
  76. package/dist/telemetry/trace_types.cjs.map +1 -1
  77. package/dist/telemetry/trace_types.d.cts +5 -0
  78. package/dist/telemetry/trace_types.d.ts +5 -0
  79. package/dist/telemetry/trace_types.d.ts.map +1 -1
  80. package/dist/telemetry/trace_types.js +10 -0
  81. package/dist/telemetry/trace_types.js.map +1 -1
  82. package/dist/utils/ws_transport.cjs +51 -0
  83. package/dist/utils/ws_transport.cjs.map +1 -0
  84. package/dist/utils/ws_transport.d.cts +9 -0
  85. package/dist/utils/ws_transport.d.ts +9 -0
  86. package/dist/utils/ws_transport.d.ts.map +1 -0
  87. package/dist/utils/ws_transport.js +17 -0
  88. package/dist/utils/ws_transport.js.map +1 -0
  89. package/dist/utils/ws_transport.test.cjs +212 -0
  90. package/dist/utils/ws_transport.test.cjs.map +1 -0
  91. package/dist/utils/ws_transport.test.js +211 -0
  92. package/dist/utils/ws_transport.test.js.map +1 -0
  93. package/dist/voice/agent_activity.cjs +49 -0
  94. package/dist/voice/agent_activity.cjs.map +1 -1
  95. package/dist/voice/agent_activity.d.cts +14 -0
  96. package/dist/voice/agent_activity.d.ts +14 -0
  97. package/dist/voice/agent_activity.d.ts.map +1 -1
  98. package/dist/voice/agent_activity.js +49 -0
  99. package/dist/voice/agent_activity.js.map +1 -1
  100. package/dist/voice/agent_session.cjs +12 -1
  101. package/dist/voice/agent_session.cjs.map +1 -1
  102. package/dist/voice/agent_session.d.cts +3 -0
  103. package/dist/voice/agent_session.d.ts +3 -0
  104. package/dist/voice/agent_session.d.ts.map +1 -1
  105. package/dist/voice/agent_session.js +12 -1
  106. package/dist/voice/agent_session.js.map +1 -1
  107. package/dist/voice/audio_recognition.cjs +124 -2
  108. package/dist/voice/audio_recognition.cjs.map +1 -1
  109. package/dist/voice/audio_recognition.d.cts +32 -1
  110. package/dist/voice/audio_recognition.d.ts +32 -1
  111. package/dist/voice/audio_recognition.d.ts.map +1 -1
  112. package/dist/voice/audio_recognition.js +127 -2
  113. package/dist/voice/audio_recognition.js.map +1 -1
  114. package/package.json +2 -1
  115. package/src/index.ts +2 -0
  116. package/src/inference/interruption/AdaptiveInterruptionDetector.ts +166 -0
  117. package/src/inference/interruption/InterruptionStream.ts +397 -0
  118. package/src/inference/interruption/defaults.ts +33 -0
  119. package/src/inference/interruption/errors.ts +0 -0
  120. package/src/inference/interruption/http_transport.ts +61 -0
  121. package/src/inference/interruption/index.ts +4 -0
  122. package/src/inference/interruption/interruption.ts +88 -0
  123. package/src/inference/utils.test.ts +31 -0
  124. package/src/inference/utils.ts +15 -0
  125. package/src/stream/stream_channel.ts +6 -2
  126. package/src/telemetry/trace_types.ts +7 -0
  127. package/src/utils/ws_transport.test.ts +282 -0
  128. package/src/utils/ws_transport.ts +22 -0
  129. package/src/voice/agent_activity.ts +61 -0
  130. package/src/voice/agent_session.ts +22 -2
  131. package/src/voice/audio_recognition.ts +161 -1
@@ -0,0 +1,282 @@
1
+ // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { describe, expect, it } from 'vitest';
5
+ import { WebSocket, WebSocketServer } from 'ws';
6
+ import { webSocketStream } from './ws_transport.js';
7
+
8
+ describe('webSocketStream', () => {
9
+ describe('readable stream', () => {
10
+ it('receives messages from the WebSocket', async () => {
11
+ const wss = await new Promise<WebSocketServer>((resolve) => {
12
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
13
+ });
14
+
15
+ const port = (wss.address() as { port: number }).port;
16
+
17
+ wss.on('connection', (serverWs) => {
18
+ serverWs.send('hello');
19
+ serverWs.send('world');
20
+ serverWs.close();
21
+ });
22
+
23
+ const { readable } = webSocketStream(`ws://localhost:${port}`);
24
+ const reader = readable.getReader();
25
+
26
+ const messages: string[] = [];
27
+ try {
28
+ while (true) {
29
+ const { done, value } = await reader.read();
30
+ if (done) break;
31
+ messages.push(Buffer.from(value).toString());
32
+ }
33
+ } finally {
34
+ reader.releaseLock();
35
+ }
36
+
37
+ expect(messages).toEqual(['hello', 'world']);
38
+
39
+ wss.close();
40
+ });
41
+
42
+ it('handles binary messages', async () => {
43
+ const wss = await new Promise<WebSocketServer>((resolve) => {
44
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
45
+ });
46
+
47
+ const port = (wss.address() as { port: number }).port;
48
+
49
+ const binaryData = new Uint8Array([1, 2, 3, 4, 5]);
50
+
51
+ wss.on('connection', (serverWs) => {
52
+ serverWs.send(binaryData);
53
+ serverWs.close();
54
+ });
55
+
56
+ const { readable } = webSocketStream(`ws://localhost:${port}`);
57
+ const reader = readable.getReader();
58
+
59
+ const chunks: Uint8Array[] = [];
60
+ try {
61
+ while (true) {
62
+ const { done, value } = await reader.read();
63
+ if (done) break;
64
+ chunks.push(new Uint8Array(value));
65
+ }
66
+ } finally {
67
+ reader.releaseLock();
68
+ }
69
+
70
+ expect(chunks).toHaveLength(1);
71
+ expect(Array.from(chunks[0]!)).toEqual([1, 2, 3, 4, 5]);
72
+
73
+ wss.close();
74
+ });
75
+
76
+ it('handles empty stream when connection closes immediately', async () => {
77
+ const wss = await new Promise<WebSocketServer>((resolve) => {
78
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
79
+ });
80
+
81
+ const port = (wss.address() as { port: number }).port;
82
+
83
+ wss.on('connection', (serverWs) => {
84
+ serverWs.close();
85
+ });
86
+ const { readable } = webSocketStream(`ws://localhost:${port}`);
87
+ const reader = readable.getReader();
88
+
89
+ const chunks: Uint8Array[] = [];
90
+ try {
91
+ while (true) {
92
+ const { done, value } = await reader.read();
93
+ if (done) break;
94
+ chunks.push(value);
95
+ }
96
+ } finally {
97
+ reader.releaseLock();
98
+ }
99
+
100
+ expect(chunks).toEqual([]);
101
+
102
+ wss.close();
103
+ });
104
+ });
105
+
106
+ describe('writable stream', () => {
107
+ it('sends messages through the WebSocket', async () => {
108
+ const wss = await new Promise<WebSocketServer>((resolve) => {
109
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
110
+ });
111
+
112
+ const port = (wss.address() as { port: number }).port;
113
+ const ws = new WebSocket(`ws://localhost:${port}`);
114
+
115
+ const connected = new Promise<void>((resolve) => {
116
+ ws.on('open', resolve);
117
+ });
118
+
119
+ const messagesReceived: string[] = [];
120
+ const serverClosed = new Promise<void>((resolve) => {
121
+ wss.on('connection', (serverWs) => {
122
+ serverWs.on('message', (data) => {
123
+ messagesReceived.push(data.toString());
124
+ });
125
+ serverWs.on('close', resolve);
126
+ });
127
+ });
128
+
129
+ await connected;
130
+ const { writable } = webSocketStream(`ws://localhost:${port}`);
131
+ const writer = writable.getWriter();
132
+
133
+ await writer.write(new TextEncoder().encode('hello'));
134
+ await writer.write(new TextEncoder().encode('world'));
135
+ await writer.close();
136
+
137
+ await serverClosed;
138
+
139
+ expect(messagesReceived).toEqual(['hello', 'world']);
140
+
141
+ wss.close();
142
+ });
143
+
144
+ it('sends binary data through the WebSocket', async () => {
145
+ const wss = await new Promise<WebSocketServer>((resolve) => {
146
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
147
+ });
148
+
149
+ const port = (wss.address() as { port: number }).port;
150
+
151
+ const chunksReceived: Buffer[] = [];
152
+ const serverClosed = new Promise<void>((resolve) => {
153
+ wss.on('connection', (serverWs) => {
154
+ serverWs.on('message', (data) => {
155
+ chunksReceived.push(Buffer.from(data as Buffer));
156
+ });
157
+ serverWs.on('close', resolve);
158
+ });
159
+ });
160
+
161
+ const { writable } = webSocketStream(`ws://localhost:${port}`);
162
+ const writer = writable.getWriter();
163
+
164
+ const binaryData = new Uint8Array([10, 20, 30, 40, 50]);
165
+ await writer.write(binaryData);
166
+ await writer.close();
167
+
168
+ await serverClosed;
169
+
170
+ expect(chunksReceived).toHaveLength(1);
171
+ expect(Array.from(chunksReceived[0]!)).toEqual([10, 20, 30, 40, 50]);
172
+
173
+ wss.close();
174
+ });
175
+
176
+ it('buffers writes if readyState is CONNECTING', async () => {
177
+ const wss = await new Promise<WebSocketServer>((resolve) => {
178
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
179
+ });
180
+
181
+ const port = (wss.address() as { port: number }).port;
182
+
183
+ const { writable } = webSocketStream(`ws://localhost:${port}`);
184
+ const writer = writable.getWriter();
185
+
186
+ const messagesReceived: string[] = [];
187
+ const serverClosed = new Promise<void>((resolve) => {
188
+ wss.on('connection', (serverWs) => {
189
+ serverWs.on('message', (data) => {
190
+ messagesReceived.push(data.toString());
191
+ });
192
+ serverWs.on('close', resolve);
193
+ });
194
+ });
195
+
196
+ // These writes should be buffered
197
+ await writer.write(new TextEncoder().encode('buffered message'));
198
+ await writer.close();
199
+
200
+ await serverClosed;
201
+
202
+ expect(messagesReceived).toEqual(['buffered message']);
203
+
204
+ wss.close();
205
+ });
206
+ });
207
+
208
+ describe('bidirectional communication', () => {
209
+ it('supports echo pattern with readable and writable', async () => {
210
+ const wss = await new Promise<WebSocketServer>((resolve) => {
211
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
212
+ });
213
+
214
+ const port = (wss.address() as { port: number }).port;
215
+
216
+ // Server echoes messages back
217
+ wss.on('connection', (serverWs) => {
218
+ serverWs.on('message', (data) => {
219
+ serverWs.send(data);
220
+ });
221
+ });
222
+
223
+ const { readable, writable } = webSocketStream(`ws://localhost:${port}`);
224
+ const writer = writable.getWriter();
225
+ const reader = readable.getReader();
226
+
227
+ // Send messages
228
+ await writer.write(new TextEncoder().encode('ping1'));
229
+ await writer.write(new TextEncoder().encode('ping2'));
230
+
231
+ // Read echoed responses
232
+ const { value: response1 } = await reader.read();
233
+ const { value: response2 } = await reader.read();
234
+
235
+ expect(Buffer.from(response1!).toString()).toBe('ping1');
236
+ expect(Buffer.from(response2!).toString()).toBe('ping2');
237
+
238
+ reader.releaseLock();
239
+ await writer.close();
240
+
241
+ wss.close();
242
+ });
243
+ });
244
+
245
+ describe('error handling', () => {
246
+ it('readable stream ends when WebSocket closes unexpectedly', async () => {
247
+ const wss = await new Promise<WebSocketServer>((resolve) => {
248
+ const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
249
+ });
250
+
251
+ const port = (wss.address() as { port: number }).port;
252
+
253
+ wss.on('connection', (serverWs) => {
254
+ serverWs.send('before close');
255
+ // Terminate connection abruptly
256
+ serverWs.terminate();
257
+ });
258
+
259
+ const { readable } = webSocketStream(`ws://localhost:${port}`);
260
+ const reader = readable.getReader();
261
+
262
+ const chunks: string[] = [];
263
+ try {
264
+ while (true) {
265
+ const { done, value } = await reader.read();
266
+ if (done) break;
267
+ chunks.push(Buffer.from(value).toString());
268
+ }
269
+ } catch (error) {
270
+ console.error(error);
271
+ // Connection terminated, stream may error
272
+ } finally {
273
+ reader.releaseLock();
274
+ }
275
+
276
+ // Should have received the message sent before termination
277
+ expect(chunks).toContain('before close');
278
+
279
+ wss.close();
280
+ });
281
+ });
282
+ });
@@ -0,0 +1,22 @@
1
+ import { Readable, Writable } from 'node:stream';
2
+ import WebSocket, { createWebSocketStream } from 'ws';
3
+
4
+ export function webSocketStream(wsUrl: string) {
5
+ const ws = new WebSocket(wsUrl);
6
+ const duplex = createWebSocketStream(ws);
7
+ duplex.on('error', console.error);
8
+
9
+ // End the write side when the read side ends to properly close the stream.
10
+ // This is needed because Readable.toWeb() waits for both sides of the duplex
11
+ // to close before signaling done on the ReadableStream.
12
+ duplex.on('end', () => {
13
+ duplex.end();
14
+ });
15
+
16
+ // Convert the writable side
17
+ const writable = Writable.toWeb(duplex);
18
+ // Convert the readable side
19
+ const readable = Readable.toWeb(duplex);
20
+
21
+ return { readable, writable, close: ws.close };
22
+ }
@@ -41,6 +41,8 @@ import { recordRealtimeMetrics, traceTypes, tracer } from '../telemetry/index.js
41
41
  import { splitWords } from '../tokenize/basic/word.js';
42
42
  import { TTS, type TTSError } from '../tts/tts.js';
43
43
  import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
44
+ import type { InterruptionEvent } from '../inference/interruption/interruption.js';
45
+ import { InterruptionEventType } from '../inference/interruption/interruption.js';
44
46
  import { VAD, type VADEvent } from '../vad.js';
45
47
  import type { Agent, ModelSettings } from './agent.js';
46
48
  import { StopResponse, asyncLocalStorage } from './agent.js';
@@ -112,6 +114,24 @@ export class AgentActivity implements RecognitionHooks {
112
114
  _mainTask?: Task<void>;
113
115
  _userTurnCompletedTask?: Promise<void>;
114
116
 
117
+ /**
118
+ * Notify that agent started speaking.
119
+ * This enables interruption detection in AudioRecognition.
120
+ * @internal
121
+ */
122
+ notifyAgentSpeechStarted(): void {
123
+ this.audioRecognition?.onStartOfAgentSpeech();
124
+ }
125
+
126
+ /**
127
+ * Notify that agent stopped speaking.
128
+ * This disables interruption detection in AudioRecognition.
129
+ * @internal
130
+ */
131
+ notifyAgentSpeechEnded(): void {
132
+ this.audioRecognition?.onEndOfAgentSpeech();
133
+ }
134
+
115
135
  constructor(agent: Agent, agentSession: AgentSession) {
116
136
  this.agent = agent;
117
137
  this.agentSession = agentSession;
@@ -292,6 +312,7 @@ export class AgentActivity implements RecognitionHooks {
292
312
  // Disable stt node if stt is not provided
293
313
  stt: this.stt ? (...args) => this.agent.sttNode(...args) : undefined,
294
314
  vad: this.vad,
315
+ interruptionDetector: this.agentSession.interruptionDetector,
295
316
  turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
296
317
  turnDetectionMode: this.turnDetectionMode,
297
318
  minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
@@ -697,6 +718,46 @@ export class AgentActivity implements RecognitionHooks {
697
718
  }
698
719
  }
699
720
 
721
+ onInterruption(ev: InterruptionEvent): void {
722
+ if (ev.type !== InterruptionEventType.INTERRUPTION) {
723
+ // Only handle actual interruptions, not overlap_speech_ended events
724
+ return;
725
+ }
726
+
727
+ this.logger.info(
728
+ {
729
+ probability: ev.probability,
730
+ detectionDelay: ev.detectionDelay,
731
+ totalDuration: ev.totalDuration,
732
+ },
733
+ 'adaptive interruption detected',
734
+ );
735
+
736
+ // Similar to onVADInferenceDone but triggered by the adaptive interruption detector
737
+ if (this.turnDetection === 'manual' || this.turnDetection === 'realtime_llm') {
738
+ return;
739
+ }
740
+
741
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
742
+ return;
743
+ }
744
+
745
+ this.realtimeSession?.startUserActivity();
746
+
747
+ if (
748
+ this._currentSpeech &&
749
+ !this._currentSpeech.interrupted &&
750
+ this._currentSpeech.allowInterruptions
751
+ ) {
752
+ this.logger.info(
753
+ { 'speech id': this._currentSpeech.id },
754
+ 'speech interrupted by adaptive interruption detector',
755
+ );
756
+ this.realtimeSession?.interrupt();
757
+ this._currentSpeech.interrupt();
758
+ }
759
+ }
760
+
700
761
  onInterimTranscript(ev: SpeechEvent): void {
701
762
  if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
702
763
  // skip stt transcription if userTranscription is enabled on the realtime model
@@ -15,6 +15,7 @@ import {
15
15
  type STTModelString,
16
16
  type TTSModelString,
17
17
  } from '../inference/index.js';
18
+ import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
18
19
  import { type JobContext, getJobContext } from '../job.js';
19
20
  import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
20
21
  import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
@@ -106,6 +107,7 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
106
107
  vad?: VAD;
107
108
  llm?: LLM | RealtimeModel | LLMModels;
108
109
  tts?: TTS | TTSModelString;
110
+ interruptionDetector?: AdaptiveInterruptionDetector;
109
111
  userData?: UserData;
110
112
  voiceOptions?: Partial<VoiceOptions>;
111
113
  connOptions?: SessionConnectOptions;
@@ -167,6 +169,8 @@ export class AgentSession<
167
169
  /** @internal - Timestamp when the session started (milliseconds) */
168
170
  _startedAt?: number;
169
171
 
172
+ interruptionDetector?: AdaptiveInterruptionDetector;
173
+
170
174
  constructor(opts: AgentSessionOptions<UserData>) {
171
175
  super();
172
176
 
@@ -176,6 +180,7 @@ export class AgentSession<
176
180
  llm,
177
181
  tts,
178
182
  turnDetection,
183
+ interruptionDetector,
179
184
  userData,
180
185
  voiceOptions = defaultVoiceOptions,
181
186
  connOptions,
@@ -212,6 +217,7 @@ export class AgentSession<
212
217
  }
213
218
 
214
219
  this.turnDetection = turnDetection;
220
+ this.interruptionDetector = interruptionDetector;
215
221
  this._userData = userData;
216
222
 
217
223
  // configurable IO
@@ -637,6 +643,8 @@ export class AgentSession<
637
643
  return;
638
644
  }
639
645
 
646
+ const oldState = this._agentState;
647
+
640
648
  if (state === 'speaking') {
641
649
  // Reset error counts when agent starts speaking
642
650
  this.llmErrorCounts = 0;
@@ -651,13 +659,25 @@ export class AgentSession<
651
659
  // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
652
660
  // (Ref: Python agent_session.py line 1161-1164)
653
661
  }
662
+
663
+ // Notify AudioRecognition that agent started speaking (for interruption detection)
664
+ this.activity?.notifyAgentSpeechStarted();
665
+ } else if (oldState === 'speaking') {
666
+ // Agent stopped speaking
667
+ if (this.agentSpeakingSpan !== undefined) {
668
+ // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
669
+ this.agentSpeakingSpan.end();
670
+ this.agentSpeakingSpan = undefined;
671
+ }
672
+
673
+ // Notify AudioRecognition that agent stopped speaking (for interruption detection)
674
+ this.activity?.notifyAgentSpeechEnded();
654
675
  } else if (this.agentSpeakingSpan !== undefined) {
655
- // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
676
+ // Non-speaking to non-speaking transition but span is still open
656
677
  this.agentSpeakingSpan.end();
657
678
  this.agentSpeakingSpan = undefined;
658
679
  }
659
680
 
660
- const oldState = this._agentState;
661
681
  this._agentState = state;
662
682
 
663
683
  // Handle user away timer based on state changes