@livekit/agents 1.0.35 → 1.0.36-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs +152 -0
- package/dist/inference/interruption/AdaptiveInterruptionDetector.cjs.map +1 -0
- package/dist/inference/interruption/AdaptiveInterruptionDetector.d.cts +50 -0
- package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts +50 -0
- package/dist/inference/interruption/AdaptiveInterruptionDetector.d.ts.map +1 -0
- package/dist/inference/interruption/AdaptiveInterruptionDetector.js +125 -0
- package/dist/inference/interruption/AdaptiveInterruptionDetector.js.map +1 -0
- package/dist/inference/interruption/InterruptionStream.cjs +310 -0
- package/dist/inference/interruption/InterruptionStream.cjs.map +1 -0
- package/dist/inference/interruption/InterruptionStream.d.cts +57 -0
- package/dist/inference/interruption/InterruptionStream.d.ts +57 -0
- package/dist/inference/interruption/InterruptionStream.d.ts.map +1 -0
- package/dist/inference/interruption/InterruptionStream.js +288 -0
- package/dist/inference/interruption/InterruptionStream.js.map +1 -0
- package/dist/inference/interruption/defaults.cjs +76 -0
- package/dist/inference/interruption/defaults.cjs.map +1 -0
- package/dist/inference/interruption/defaults.d.cts +14 -0
- package/dist/inference/interruption/defaults.d.ts +14 -0
- package/dist/inference/interruption/defaults.d.ts.map +1 -0
- package/dist/inference/interruption/defaults.js +42 -0
- package/dist/inference/interruption/defaults.js.map +1 -0
- package/dist/inference/interruption/errors.cjs +2 -0
- package/dist/inference/interruption/errors.cjs.map +1 -0
- package/dist/inference/interruption/errors.d.cts +2 -0
- package/dist/inference/interruption/errors.d.ts +2 -0
- package/dist/inference/interruption/errors.d.ts.map +1 -0
- package/dist/inference/interruption/errors.js +1 -0
- package/dist/inference/interruption/errors.js.map +1 -0
- package/dist/inference/interruption/http_transport.cjs +57 -0
- package/dist/inference/interruption/http_transport.cjs.map +1 -0
- package/dist/inference/interruption/http_transport.d.cts +23 -0
- package/dist/inference/interruption/http_transport.d.ts +23 -0
- package/dist/inference/interruption/http_transport.d.ts.map +1 -0
- package/dist/inference/interruption/http_transport.js +33 -0
- package/dist/inference/interruption/http_transport.js.map +1 -0
- package/dist/inference/interruption/index.cjs +34 -0
- package/dist/inference/interruption/index.cjs.map +1 -0
- package/dist/inference/interruption/index.d.cts +5 -0
- package/dist/inference/interruption/index.d.ts +5 -0
- package/dist/inference/interruption/index.d.ts.map +1 -0
- package/dist/inference/interruption/index.js +7 -0
- package/dist/inference/interruption/index.js.map +1 -0
- package/dist/inference/interruption/interruption.cjs +85 -0
- package/dist/inference/interruption/interruption.cjs.map +1 -0
- package/dist/inference/interruption/interruption.d.cts +48 -0
- package/dist/inference/interruption/interruption.d.ts +48 -0
- package/dist/inference/interruption/interruption.d.ts.map +1 -0
- package/dist/inference/interruption/interruption.js +59 -0
- package/dist/inference/interruption/interruption.js.map +1 -0
- package/dist/inference/utils.cjs +15 -2
- package/dist/inference/utils.cjs.map +1 -1
- package/dist/inference/utils.d.cts +1 -0
- package/dist/inference/utils.d.ts +1 -0
- package/dist/inference/utils.d.ts.map +1 -1
- package/dist/inference/utils.js +13 -1
- package/dist/inference/utils.js.map +1 -1
- package/dist/inference/utils.test.cjs +20 -0
- package/dist/inference/utils.test.cjs.map +1 -0
- package/dist/inference/utils.test.js +19 -0
- package/dist/inference/utils.test.js.map +1 -0
- package/dist/stream/stream_channel.cjs +3 -0
- package/dist/stream/stream_channel.cjs.map +1 -1
- package/dist/stream/stream_channel.d.cts +3 -2
- package/dist/stream/stream_channel.d.ts +3 -2
- package/dist/stream/stream_channel.d.ts.map +1 -1
- package/dist/stream/stream_channel.js +3 -0
- package/dist/stream/stream_channel.js.map +1 -1
- package/dist/telemetry/trace_types.cjs +15 -0
- package/dist/telemetry/trace_types.cjs.map +1 -1
- package/dist/telemetry/trace_types.d.cts +5 -0
- package/dist/telemetry/trace_types.d.ts +5 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -1
- package/dist/telemetry/trace_types.js +10 -0
- package/dist/telemetry/trace_types.js.map +1 -1
- package/dist/utils/ws_transport.cjs +51 -0
- package/dist/utils/ws_transport.cjs.map +1 -0
- package/dist/utils/ws_transport.d.cts +9 -0
- package/dist/utils/ws_transport.d.ts +9 -0
- package/dist/utils/ws_transport.d.ts.map +1 -0
- package/dist/utils/ws_transport.js +17 -0
- package/dist/utils/ws_transport.js.map +1 -0
- package/dist/utils/ws_transport.test.cjs +212 -0
- package/dist/utils/ws_transport.test.cjs.map +1 -0
- package/dist/utils/ws_transport.test.js +211 -0
- package/dist/utils/ws_transport.test.js.map +1 -0
- package/dist/voice/agent_activity.cjs +49 -0
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +14 -0
- package/dist/voice/agent_activity.d.ts +14 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +49 -0
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +12 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +3 -0
- package/dist/voice/agent_session.d.ts +3 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +12 -1
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +124 -2
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +32 -1
- package/dist/voice/audio_recognition.d.ts +32 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +127 -2
- package/dist/voice/audio_recognition.js.map +1 -1
- package/package.json +2 -1
- package/src/index.ts +2 -0
- package/src/inference/interruption/AdaptiveInterruptionDetector.ts +166 -0
- package/src/inference/interruption/InterruptionStream.ts +397 -0
- package/src/inference/interruption/defaults.ts +33 -0
- package/src/inference/interruption/errors.ts +0 -0
- package/src/inference/interruption/http_transport.ts +61 -0
- package/src/inference/interruption/index.ts +4 -0
- package/src/inference/interruption/interruption.ts +88 -0
- package/src/inference/utils.test.ts +31 -0
- package/src/inference/utils.ts +15 -0
- package/src/stream/stream_channel.ts +6 -2
- package/src/telemetry/trace_types.ts +7 -0
- package/src/utils/ws_transport.test.ts +282 -0
- package/src/utils/ws_transport.ts +22 -0
- package/src/voice/agent_activity.ts +61 -0
- package/src/voice/agent_session.ts +22 -2
- package/src/voice/audio_recognition.ts +161 -1
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import { describe, expect, it } from 'vitest';
|
|
5
|
+
import { WebSocket, WebSocketServer } from 'ws';
|
|
6
|
+
import { webSocketStream } from './ws_transport.js';
|
|
7
|
+
|
|
8
|
+
describe('webSocketStream', () => {
|
|
9
|
+
describe('readable stream', () => {
|
|
10
|
+
it('receives messages from the WebSocket', async () => {
|
|
11
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
12
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
13
|
+
});
|
|
14
|
+
|
|
15
|
+
const port = (wss.address() as { port: number }).port;
|
|
16
|
+
|
|
17
|
+
wss.on('connection', (serverWs) => {
|
|
18
|
+
serverWs.send('hello');
|
|
19
|
+
serverWs.send('world');
|
|
20
|
+
serverWs.close();
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
const { readable } = webSocketStream(`ws://localhost:${port}`);
|
|
24
|
+
const reader = readable.getReader();
|
|
25
|
+
|
|
26
|
+
const messages: string[] = [];
|
|
27
|
+
try {
|
|
28
|
+
while (true) {
|
|
29
|
+
const { done, value } = await reader.read();
|
|
30
|
+
if (done) break;
|
|
31
|
+
messages.push(Buffer.from(value).toString());
|
|
32
|
+
}
|
|
33
|
+
} finally {
|
|
34
|
+
reader.releaseLock();
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
expect(messages).toEqual(['hello', 'world']);
|
|
38
|
+
|
|
39
|
+
wss.close();
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('handles binary messages', async () => {
|
|
43
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
44
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
const port = (wss.address() as { port: number }).port;
|
|
48
|
+
|
|
49
|
+
const binaryData = new Uint8Array([1, 2, 3, 4, 5]);
|
|
50
|
+
|
|
51
|
+
wss.on('connection', (serverWs) => {
|
|
52
|
+
serverWs.send(binaryData);
|
|
53
|
+
serverWs.close();
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
const { readable } = webSocketStream(`ws://localhost:${port}`);
|
|
57
|
+
const reader = readable.getReader();
|
|
58
|
+
|
|
59
|
+
const chunks: Uint8Array[] = [];
|
|
60
|
+
try {
|
|
61
|
+
while (true) {
|
|
62
|
+
const { done, value } = await reader.read();
|
|
63
|
+
if (done) break;
|
|
64
|
+
chunks.push(new Uint8Array(value));
|
|
65
|
+
}
|
|
66
|
+
} finally {
|
|
67
|
+
reader.releaseLock();
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
expect(chunks).toHaveLength(1);
|
|
71
|
+
expect(Array.from(chunks[0]!)).toEqual([1, 2, 3, 4, 5]);
|
|
72
|
+
|
|
73
|
+
wss.close();
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
it('handles empty stream when connection closes immediately', async () => {
|
|
77
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
78
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
const port = (wss.address() as { port: number }).port;
|
|
82
|
+
|
|
83
|
+
wss.on('connection', (serverWs) => {
|
|
84
|
+
serverWs.close();
|
|
85
|
+
});
|
|
86
|
+
const { readable } = webSocketStream(`ws://localhost:${port}`);
|
|
87
|
+
const reader = readable.getReader();
|
|
88
|
+
|
|
89
|
+
const chunks: Uint8Array[] = [];
|
|
90
|
+
try {
|
|
91
|
+
while (true) {
|
|
92
|
+
const { done, value } = await reader.read();
|
|
93
|
+
if (done) break;
|
|
94
|
+
chunks.push(value);
|
|
95
|
+
}
|
|
96
|
+
} finally {
|
|
97
|
+
reader.releaseLock();
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
expect(chunks).toEqual([]);
|
|
101
|
+
|
|
102
|
+
wss.close();
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
describe('writable stream', () => {
|
|
107
|
+
it('sends messages through the WebSocket', async () => {
|
|
108
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
109
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const port = (wss.address() as { port: number }).port;
|
|
113
|
+
const ws = new WebSocket(`ws://localhost:${port}`);
|
|
114
|
+
|
|
115
|
+
const connected = new Promise<void>((resolve) => {
|
|
116
|
+
ws.on('open', resolve);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
const messagesReceived: string[] = [];
|
|
120
|
+
const serverClosed = new Promise<void>((resolve) => {
|
|
121
|
+
wss.on('connection', (serverWs) => {
|
|
122
|
+
serverWs.on('message', (data) => {
|
|
123
|
+
messagesReceived.push(data.toString());
|
|
124
|
+
});
|
|
125
|
+
serverWs.on('close', resolve);
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
await connected;
|
|
130
|
+
const { writable } = webSocketStream(`ws://localhost:${port}`);
|
|
131
|
+
const writer = writable.getWriter();
|
|
132
|
+
|
|
133
|
+
await writer.write(new TextEncoder().encode('hello'));
|
|
134
|
+
await writer.write(new TextEncoder().encode('world'));
|
|
135
|
+
await writer.close();
|
|
136
|
+
|
|
137
|
+
await serverClosed;
|
|
138
|
+
|
|
139
|
+
expect(messagesReceived).toEqual(['hello', 'world']);
|
|
140
|
+
|
|
141
|
+
wss.close();
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('sends binary data through the WebSocket', async () => {
|
|
145
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
146
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const port = (wss.address() as { port: number }).port;
|
|
150
|
+
|
|
151
|
+
const chunksReceived: Buffer[] = [];
|
|
152
|
+
const serverClosed = new Promise<void>((resolve) => {
|
|
153
|
+
wss.on('connection', (serverWs) => {
|
|
154
|
+
serverWs.on('message', (data) => {
|
|
155
|
+
chunksReceived.push(Buffer.from(data as Buffer));
|
|
156
|
+
});
|
|
157
|
+
serverWs.on('close', resolve);
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
|
|
161
|
+
const { writable } = webSocketStream(`ws://localhost:${port}`);
|
|
162
|
+
const writer = writable.getWriter();
|
|
163
|
+
|
|
164
|
+
const binaryData = new Uint8Array([10, 20, 30, 40, 50]);
|
|
165
|
+
await writer.write(binaryData);
|
|
166
|
+
await writer.close();
|
|
167
|
+
|
|
168
|
+
await serverClosed;
|
|
169
|
+
|
|
170
|
+
expect(chunksReceived).toHaveLength(1);
|
|
171
|
+
expect(Array.from(chunksReceived[0]!)).toEqual([10, 20, 30, 40, 50]);
|
|
172
|
+
|
|
173
|
+
wss.close();
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('buffers writes if readyState is CONNECTING', async () => {
|
|
177
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
178
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
const port = (wss.address() as { port: number }).port;
|
|
182
|
+
|
|
183
|
+
const { writable } = webSocketStream(`ws://localhost:${port}`);
|
|
184
|
+
const writer = writable.getWriter();
|
|
185
|
+
|
|
186
|
+
const messagesReceived: string[] = [];
|
|
187
|
+
const serverClosed = new Promise<void>((resolve) => {
|
|
188
|
+
wss.on('connection', (serverWs) => {
|
|
189
|
+
serverWs.on('message', (data) => {
|
|
190
|
+
messagesReceived.push(data.toString());
|
|
191
|
+
});
|
|
192
|
+
serverWs.on('close', resolve);
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
// These writes should be buffered
|
|
197
|
+
await writer.write(new TextEncoder().encode('buffered message'));
|
|
198
|
+
await writer.close();
|
|
199
|
+
|
|
200
|
+
await serverClosed;
|
|
201
|
+
|
|
202
|
+
expect(messagesReceived).toEqual(['buffered message']);
|
|
203
|
+
|
|
204
|
+
wss.close();
|
|
205
|
+
});
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
describe('bidirectional communication', () => {
|
|
209
|
+
it('supports echo pattern with readable and writable', async () => {
|
|
210
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
211
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
const port = (wss.address() as { port: number }).port;
|
|
215
|
+
|
|
216
|
+
// Server echoes messages back
|
|
217
|
+
wss.on('connection', (serverWs) => {
|
|
218
|
+
serverWs.on('message', (data) => {
|
|
219
|
+
serverWs.send(data);
|
|
220
|
+
});
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
const { readable, writable } = webSocketStream(`ws://localhost:${port}`);
|
|
224
|
+
const writer = writable.getWriter();
|
|
225
|
+
const reader = readable.getReader();
|
|
226
|
+
|
|
227
|
+
// Send messages
|
|
228
|
+
await writer.write(new TextEncoder().encode('ping1'));
|
|
229
|
+
await writer.write(new TextEncoder().encode('ping2'));
|
|
230
|
+
|
|
231
|
+
// Read echoed responses
|
|
232
|
+
const { value: response1 } = await reader.read();
|
|
233
|
+
const { value: response2 } = await reader.read();
|
|
234
|
+
|
|
235
|
+
expect(Buffer.from(response1!).toString()).toBe('ping1');
|
|
236
|
+
expect(Buffer.from(response2!).toString()).toBe('ping2');
|
|
237
|
+
|
|
238
|
+
reader.releaseLock();
|
|
239
|
+
await writer.close();
|
|
240
|
+
|
|
241
|
+
wss.close();
|
|
242
|
+
});
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
describe('error handling', () => {
|
|
246
|
+
it('readable stream ends when WebSocket closes unexpectedly', async () => {
|
|
247
|
+
const wss = await new Promise<WebSocketServer>((resolve) => {
|
|
248
|
+
const server: WebSocketServer = new WebSocketServer({ port: 0 }, () => resolve(server));
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
const port = (wss.address() as { port: number }).port;
|
|
252
|
+
|
|
253
|
+
wss.on('connection', (serverWs) => {
|
|
254
|
+
serverWs.send('before close');
|
|
255
|
+
// Terminate connection abruptly
|
|
256
|
+
serverWs.terminate();
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
const { readable } = webSocketStream(`ws://localhost:${port}`);
|
|
260
|
+
const reader = readable.getReader();
|
|
261
|
+
|
|
262
|
+
const chunks: string[] = [];
|
|
263
|
+
try {
|
|
264
|
+
while (true) {
|
|
265
|
+
const { done, value } = await reader.read();
|
|
266
|
+
if (done) break;
|
|
267
|
+
chunks.push(Buffer.from(value).toString());
|
|
268
|
+
}
|
|
269
|
+
} catch (error) {
|
|
270
|
+
console.error(error);
|
|
271
|
+
// Connection terminated, stream may error
|
|
272
|
+
} finally {
|
|
273
|
+
reader.releaseLock();
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Should have received the message sent before termination
|
|
277
|
+
expect(chunks).toContain('before close');
|
|
278
|
+
|
|
279
|
+
wss.close();
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
});
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Readable, Writable } from 'node:stream';
|
|
2
|
+
import WebSocket, { createWebSocketStream } from 'ws';
|
|
3
|
+
|
|
4
|
+
export function webSocketStream(wsUrl: string) {
|
|
5
|
+
const ws = new WebSocket(wsUrl);
|
|
6
|
+
const duplex = createWebSocketStream(ws);
|
|
7
|
+
duplex.on('error', console.error);
|
|
8
|
+
|
|
9
|
+
// End the write side when the read side ends to properly close the stream.
|
|
10
|
+
// This is needed because Readable.toWeb() waits for both sides of the duplex
|
|
11
|
+
// to close before signaling done on the ReadableStream.
|
|
12
|
+
duplex.on('end', () => {
|
|
13
|
+
duplex.end();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
// Convert the writable side
|
|
17
|
+
const writable = Writable.toWeb(duplex);
|
|
18
|
+
// Convert the readable side
|
|
19
|
+
const readable = Readable.toWeb(duplex);
|
|
20
|
+
|
|
21
|
+
return { readable, writable, close: ws.close };
|
|
22
|
+
}
|
|
@@ -41,6 +41,8 @@ import { recordRealtimeMetrics, traceTypes, tracer } from '../telemetry/index.js
|
|
|
41
41
|
import { splitWords } from '../tokenize/basic/word.js';
|
|
42
42
|
import { TTS, type TTSError } from '../tts/tts.js';
|
|
43
43
|
import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
|
|
44
|
+
import type { InterruptionEvent } from '../inference/interruption/interruption.js';
|
|
45
|
+
import { InterruptionEventType } from '../inference/interruption/interruption.js';
|
|
44
46
|
import { VAD, type VADEvent } from '../vad.js';
|
|
45
47
|
import type { Agent, ModelSettings } from './agent.js';
|
|
46
48
|
import { StopResponse, asyncLocalStorage } from './agent.js';
|
|
@@ -112,6 +114,24 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
112
114
|
_mainTask?: Task<void>;
|
|
113
115
|
_userTurnCompletedTask?: Promise<void>;
|
|
114
116
|
|
|
117
|
+
/**
|
|
118
|
+
* Notify that agent started speaking.
|
|
119
|
+
* This enables interruption detection in AudioRecognition.
|
|
120
|
+
* @internal
|
|
121
|
+
*/
|
|
122
|
+
notifyAgentSpeechStarted(): void {
|
|
123
|
+
this.audioRecognition?.onStartOfAgentSpeech();
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Notify that agent stopped speaking.
|
|
128
|
+
* This disables interruption detection in AudioRecognition.
|
|
129
|
+
* @internal
|
|
130
|
+
*/
|
|
131
|
+
notifyAgentSpeechEnded(): void {
|
|
132
|
+
this.audioRecognition?.onEndOfAgentSpeech();
|
|
133
|
+
}
|
|
134
|
+
|
|
115
135
|
constructor(agent: Agent, agentSession: AgentSession) {
|
|
116
136
|
this.agent = agent;
|
|
117
137
|
this.agentSession = agentSession;
|
|
@@ -292,6 +312,7 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
292
312
|
// Disable stt node if stt is not provided
|
|
293
313
|
stt: this.stt ? (...args) => this.agent.sttNode(...args) : undefined,
|
|
294
314
|
vad: this.vad,
|
|
315
|
+
interruptionDetector: this.agentSession.interruptionDetector,
|
|
295
316
|
turnDetector: typeof this.turnDetection === 'string' ? undefined : this.turnDetection,
|
|
296
317
|
turnDetectionMode: this.turnDetectionMode,
|
|
297
318
|
minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
|
|
@@ -697,6 +718,46 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
697
718
|
}
|
|
698
719
|
}
|
|
699
720
|
|
|
721
|
+
onInterruption(ev: InterruptionEvent): void {
|
|
722
|
+
if (ev.type !== InterruptionEventType.INTERRUPTION) {
|
|
723
|
+
// Only handle actual interruptions, not overlap_speech_ended events
|
|
724
|
+
return;
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
this.logger.info(
|
|
728
|
+
{
|
|
729
|
+
probability: ev.probability,
|
|
730
|
+
detectionDelay: ev.detectionDelay,
|
|
731
|
+
totalDuration: ev.totalDuration,
|
|
732
|
+
},
|
|
733
|
+
'adaptive interruption detected',
|
|
734
|
+
);
|
|
735
|
+
|
|
736
|
+
// Similar to onVADInferenceDone but triggered by the adaptive interruption detector
|
|
737
|
+
if (this.turnDetection === 'manual' || this.turnDetection === 'realtime_llm') {
|
|
738
|
+
return;
|
|
739
|
+
}
|
|
740
|
+
|
|
741
|
+
if (this.llm instanceof RealtimeModel && this.llm.capabilities.turnDetection) {
|
|
742
|
+
return;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
this.realtimeSession?.startUserActivity();
|
|
746
|
+
|
|
747
|
+
if (
|
|
748
|
+
this._currentSpeech &&
|
|
749
|
+
!this._currentSpeech.interrupted &&
|
|
750
|
+
this._currentSpeech.allowInterruptions
|
|
751
|
+
) {
|
|
752
|
+
this.logger.info(
|
|
753
|
+
{ 'speech id': this._currentSpeech.id },
|
|
754
|
+
'speech interrupted by adaptive interruption detector',
|
|
755
|
+
);
|
|
756
|
+
this.realtimeSession?.interrupt();
|
|
757
|
+
this._currentSpeech.interrupt();
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
700
761
|
onInterimTranscript(ev: SpeechEvent): void {
|
|
701
762
|
if (this.llm instanceof RealtimeModel && this.llm.capabilities.userTranscription) {
|
|
702
763
|
// skip stt transcription if userTranscription is enabled on the realtime model
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
type STTModelString,
|
|
16
16
|
type TTSModelString,
|
|
17
17
|
} from '../inference/index.js';
|
|
18
|
+
import type { AdaptiveInterruptionDetector } from '../inference/interruption/AdaptiveInterruptionDetector.js';
|
|
18
19
|
import { type JobContext, getJobContext } from '../job.js';
|
|
19
20
|
import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
|
|
20
21
|
import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
@@ -106,6 +107,7 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
|
|
|
106
107
|
vad?: VAD;
|
|
107
108
|
llm?: LLM | RealtimeModel | LLMModels;
|
|
108
109
|
tts?: TTS | TTSModelString;
|
|
110
|
+
interruptionDetector?: AdaptiveInterruptionDetector;
|
|
109
111
|
userData?: UserData;
|
|
110
112
|
voiceOptions?: Partial<VoiceOptions>;
|
|
111
113
|
connOptions?: SessionConnectOptions;
|
|
@@ -167,6 +169,8 @@ export class AgentSession<
|
|
|
167
169
|
/** @internal - Timestamp when the session started (milliseconds) */
|
|
168
170
|
_startedAt?: number;
|
|
169
171
|
|
|
172
|
+
interruptionDetector?: AdaptiveInterruptionDetector;
|
|
173
|
+
|
|
170
174
|
constructor(opts: AgentSessionOptions<UserData>) {
|
|
171
175
|
super();
|
|
172
176
|
|
|
@@ -176,6 +180,7 @@ export class AgentSession<
|
|
|
176
180
|
llm,
|
|
177
181
|
tts,
|
|
178
182
|
turnDetection,
|
|
183
|
+
interruptionDetector,
|
|
179
184
|
userData,
|
|
180
185
|
voiceOptions = defaultVoiceOptions,
|
|
181
186
|
connOptions,
|
|
@@ -212,6 +217,7 @@ export class AgentSession<
|
|
|
212
217
|
}
|
|
213
218
|
|
|
214
219
|
this.turnDetection = turnDetection;
|
|
220
|
+
this.interruptionDetector = interruptionDetector;
|
|
215
221
|
this._userData = userData;
|
|
216
222
|
|
|
217
223
|
// configurable IO
|
|
@@ -637,6 +643,8 @@ export class AgentSession<
|
|
|
637
643
|
return;
|
|
638
644
|
}
|
|
639
645
|
|
|
646
|
+
const oldState = this._agentState;
|
|
647
|
+
|
|
640
648
|
if (state === 'speaking') {
|
|
641
649
|
// Reset error counts when agent starts speaking
|
|
642
650
|
this.llmErrorCounts = 0;
|
|
@@ -651,13 +659,25 @@ export class AgentSession<
|
|
|
651
659
|
// TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
|
|
652
660
|
// (Ref: Python agent_session.py line 1161-1164)
|
|
653
661
|
}
|
|
662
|
+
|
|
663
|
+
// Notify AudioRecognition that agent started speaking (for interruption detection)
|
|
664
|
+
this.activity?.notifyAgentSpeechStarted();
|
|
665
|
+
} else if (oldState === 'speaking') {
|
|
666
|
+
// Agent stopped speaking
|
|
667
|
+
if (this.agentSpeakingSpan !== undefined) {
|
|
668
|
+
// TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
|
|
669
|
+
this.agentSpeakingSpan.end();
|
|
670
|
+
this.agentSpeakingSpan = undefined;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// Notify AudioRecognition that agent stopped speaking (for interruption detection)
|
|
674
|
+
this.activity?.notifyAgentSpeechEnded();
|
|
654
675
|
} else if (this.agentSpeakingSpan !== undefined) {
|
|
655
|
-
//
|
|
676
|
+
// Non-speaking to non-speaking transition but span is still open
|
|
656
677
|
this.agentSpeakingSpan.end();
|
|
657
678
|
this.agentSpeakingSpan = undefined;
|
|
658
679
|
}
|
|
659
680
|
|
|
660
|
-
const oldState = this._agentState;
|
|
661
681
|
this._agentState = state;
|
|
662
682
|
|
|
663
683
|
// Handle user away timer based on state changes
|