@livekit/agents 1.1.0-dev.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.cjs +2 -0
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +2 -0
- package/dist/cli.js.map +1 -1
- package/dist/constants.cjs +3 -0
- package/dist/constants.cjs.map +1 -1
- package/dist/constants.d.cts +1 -0
- package/dist/constants.d.ts +1 -0
- package/dist/constants.d.ts.map +1 -1
- package/dist/constants.js +2 -0
- package/dist/constants.js.map +1 -1
- package/dist/cpu.cjs +189 -0
- package/dist/cpu.cjs.map +1 -0
- package/dist/cpu.d.cts +24 -0
- package/dist/cpu.d.ts +24 -0
- package/dist/cpu.d.ts.map +1 -0
- package/dist/cpu.js +152 -0
- package/dist/cpu.js.map +1 -0
- package/dist/cpu.test.cjs +227 -0
- package/dist/cpu.test.cjs.map +1 -0
- package/dist/cpu.test.js +204 -0
- package/dist/cpu.test.js.map +1 -0
- package/dist/index.cjs +12 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +13 -13
- package/dist/index.d.ts +13 -13
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +11 -10
- package/dist/index.js.map +1 -1
- package/dist/inference/interruption/defaults.cjs +1 -1
- package/dist/inference/interruption/defaults.cjs.map +1 -1
- package/dist/inference/interruption/defaults.d.cts +1 -1
- package/dist/inference/interruption/defaults.d.ts +1 -1
- package/dist/inference/interruption/defaults.d.ts.map +1 -1
- package/dist/inference/interruption/defaults.js +1 -1
- package/dist/inference/interruption/defaults.js.map +1 -1
- package/dist/inference/interruption/http_transport.cjs +44 -28
- package/dist/inference/interruption/http_transport.cjs.map +1 -1
- package/dist/inference/interruption/http_transport.d.ts.map +1 -1
- package/dist/inference/interruption/http_transport.js +45 -29
- package/dist/inference/interruption/http_transport.js.map +1 -1
- package/dist/inference/interruption/interruption_detector.cjs +22 -5
- package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
- package/dist/inference/interruption/interruption_detector.d.cts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts +2 -2
- package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
- package/dist/inference/interruption/interruption_detector.js +22 -5
- package/dist/inference/interruption/interruption_detector.js.map +1 -1
- package/dist/inference/interruption/interruption_stream.cjs +4 -4
- package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
- package/dist/inference/interruption/interruption_stream.js +4 -4
- package/dist/inference/interruption/interruption_stream.js.map +1 -1
- package/dist/inference/interruption/types.cjs.map +1 -1
- package/dist/inference/interruption/types.d.cts +2 -2
- package/dist/inference/interruption/types.d.ts +2 -2
- package/dist/inference/interruption/types.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.cjs +60 -47
- package/dist/inference/interruption/ws_transport.cjs.map +1 -1
- package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
- package/dist/inference/interruption/ws_transport.js +60 -47
- package/dist/inference/interruption/ws_transport.js.map +1 -1
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +1 -1
- package/dist/inference/llm.d.ts +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +20 -12
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.cts +3 -2
- package/dist/inference/stt.d.ts +3 -2
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +20 -12
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/stt.test.cjs +14 -0
- package/dist/inference/stt.test.cjs.map +1 -1
- package/dist/inference/stt.test.js +14 -0
- package/dist/inference/stt.test.js.map +1 -1
- package/dist/inference/tts.cjs +13 -4
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.cts +8 -1
- package/dist/inference/tts.d.ts +8 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +13 -4
- package/dist/inference/tts.js.map +1 -1
- package/dist/inference/tts.test.cjs +10 -0
- package/dist/inference/tts.test.cjs.map +1 -1
- package/dist/inference/tts.test.js +10 -0
- package/dist/inference/tts.test.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +41 -23
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +41 -23
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +1 -1
- package/dist/job.cjs.map +1 -1
- package/dist/job.js +1 -1
- package/dist/job.js.map +1 -1
- package/dist/language.cjs +394 -0
- package/dist/language.cjs.map +1 -0
- package/dist/language.d.cts +15 -0
- package/dist/language.d.ts +15 -0
- package/dist/language.d.ts.map +1 -0
- package/dist/language.js +363 -0
- package/dist/language.js.map +1 -0
- package/dist/language.test.cjs +43 -0
- package/dist/language.test.cjs.map +1 -0
- package/dist/language.test.js +49 -0
- package/dist/language.test.js.map +1 -0
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +6 -2
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +6 -2
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +2 -1
- package/dist/stt/stt.d.ts +2 -1
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js.map +1 -1
- package/dist/utils.cjs +15 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +8 -0
- package/dist/utils.d.ts +8 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +13 -0
- package/dist/utils.js.map +1 -1
- package/dist/version.cjs +1 -1
- package/dist/version.js +1 -1
- package/dist/voice/agent.cjs +14 -17
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +10 -11
- package/dist/voice/agent.d.ts +10 -11
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -18
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent.test.cjs +194 -0
- package/dist/voice/agent.test.cjs.map +1 -1
- package/dist/voice/agent.test.js +195 -1
- package/dist/voice/agent.test.js.map +1 -1
- package/dist/voice/agent_activity.cjs +116 -39
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +2 -0
- package/dist/voice/agent_activity.d.ts +2 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +117 -40
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_activity.test.cjs +135 -0
- package/dist/voice/agent_activity.test.cjs.map +1 -0
- package/dist/voice/agent_activity.test.js +134 -0
- package/dist/voice/agent_activity.test.js.map +1 -0
- package/dist/voice/agent_session.cjs +38 -38
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +65 -56
- package/dist/voice/agent_session.d.ts +65 -56
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +37 -37
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +106 -52
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +4 -2
- package/dist/voice/audio_recognition.d.ts +4 -2
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +106 -52
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/audio_recognition_span.test.cjs +84 -22
- package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
- package/dist/voice/audio_recognition_span.test.js +90 -23
- package/dist/voice/audio_recognition_span.test.js.map +1 -1
- package/dist/voice/events.cjs +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +4 -3
- package/dist/voice/events.d.ts +4 -3
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js +1 -1
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/index.cjs +9 -1
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -1
- package/dist/voice/index.d.ts +1 -1
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +10 -1
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/remote_session.cjs +922 -0
- package/dist/voice/remote_session.cjs.map +1 -0
- package/dist/voice/remote_session.d.cts +108 -0
- package/dist/voice/remote_session.d.ts +108 -0
- package/dist/voice/remote_session.d.ts.map +1 -0
- package/dist/voice/remote_session.js +887 -0
- package/dist/voice/remote_session.js.map +1 -0
- package/dist/voice/report.cjs +11 -10
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +5 -3
- package/dist/voice/report.d.ts +5 -3
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +11 -10
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/report.test.cjs +15 -0
- package/dist/voice/report.test.cjs.map +1 -1
- package/dist/voice/report.test.js +15 -0
- package/dist/voice/report.test.js.map +1 -1
- package/dist/voice/room_io/room_io.cjs +39 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +3 -1
- package/dist/voice/room_io/room_io.d.ts +3 -1
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +40 -1
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/dist/voice/turn_config/interruption.cjs.map +1 -1
- package/dist/voice/turn_config/interruption.d.cts +1 -1
- package/dist/voice/turn_config/interruption.d.ts +1 -1
- package/dist/voice/turn_config/interruption.d.ts.map +1 -1
- package/dist/voice/turn_config/interruption.js.map +1 -1
- package/dist/voice/turn_config/utils.cjs +95 -35
- package/dist/voice/turn_config/utils.cjs.map +1 -1
- package/dist/voice/turn_config/utils.d.cts +17 -5
- package/dist/voice/turn_config/utils.d.ts +17 -5
- package/dist/voice/turn_config/utils.d.ts.map +1 -1
- package/dist/voice/turn_config/utils.js +93 -35
- package/dist/voice/turn_config/utils.js.map +1 -1
- package/dist/voice/turn_config/utils.test.cjs +83 -41
- package/dist/voice/turn_config/utils.test.cjs.map +1 -1
- package/dist/voice/turn_config/utils.test.js +84 -42
- package/dist/voice/turn_config/utils.test.js.map +1 -1
- package/dist/worker.cjs +6 -29
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +6 -19
- package/dist/worker.js.map +1 -1
- package/package.json +3 -2
- package/src/cli.ts +2 -0
- package/src/constants.ts +1 -0
- package/src/cpu.test.ts +239 -0
- package/src/cpu.ts +173 -0
- package/src/index.ts +13 -15
- package/src/inference/interruption/defaults.ts +1 -1
- package/src/inference/interruption/http_transport.ts +49 -30
- package/src/inference/interruption/interruption_detector.ts +22 -6
- package/src/inference/interruption/interruption_stream.ts +4 -4
- package/src/inference/interruption/types.ts +2 -2
- package/src/inference/interruption/ws_transport.ts +63 -59
- package/src/inference/llm.ts +3 -1
- package/src/inference/stt.test.ts +17 -0
- package/src/inference/stt.ts +22 -14
- package/src/inference/tts.test.ts +12 -0
- package/src/inference/tts.ts +22 -6
- package/src/ipc/job_proc_lazy_main.ts +44 -24
- package/src/job.ts +1 -1
- package/src/language.test.ts +62 -0
- package/src/language.ts +380 -0
- package/src/llm/index.ts +2 -0
- package/src/stream/deferred_stream.ts +5 -1
- package/src/stt/stt.ts +2 -1
- package/src/utils.ts +20 -0
- package/src/voice/agent.test.ts +208 -1
- package/src/voice/agent.ts +21 -22
- package/src/voice/agent_activity.test.ts +194 -0
- package/src/voice/agent_activity.ts +161 -43
- package/src/voice/agent_session.ts +103 -92
- package/src/voice/audio_recognition.ts +124 -61
- package/src/voice/audio_recognition_span.test.ts +115 -35
- package/src/voice/events.ts +4 -3
- package/src/voice/index.ts +10 -1
- package/src/voice/remote_session.ts +1083 -0
- package/src/voice/report.test.ts +22 -3
- package/src/voice/report.ts +31 -14
- package/src/voice/room_io/room_io.ts +52 -2
- package/src/voice/turn_config/interruption.ts +1 -1
- package/src/voice/turn_config/utils.test.ts +91 -43
- package/src/voice/turn_config/utils.ts +120 -56
- package/src/worker.ts +34 -50
- package/dist/voice/client_events.cjs +0 -554
- package/dist/voice/client_events.cjs.map +0 -1
- package/dist/voice/client_events.d.cts +0 -195
- package/dist/voice/client_events.d.ts +0 -195
- package/dist/voice/client_events.d.ts.map +0 -1
- package/dist/voice/client_events.js +0 -548
- package/dist/voice/client_events.js.map +0 -1
- package/dist/voice/wire_format.cjs +0 -798
- package/dist/voice/wire_format.cjs.map +0 -1
- package/dist/voice/wire_format.d.cts +0 -5503
- package/dist/voice/wire_format.d.ts +0 -5503
- package/dist/voice/wire_format.d.ts.map +0 -1
- package/dist/voice/wire_format.js +0 -728
- package/dist/voice/wire_format.js.map +0 -1
- package/src/voice/client_events.ts +0 -838
- package/src/voice/wire_format.ts +0 -827
package/src/voice/agent.test.ts
CHANGED
|
@@ -7,7 +7,11 @@ import { tool } from '../llm/index.js';
|
|
|
7
7
|
import { initializeLogger } from '../log.js';
|
|
8
8
|
import { Task } from '../utils.js';
|
|
9
9
|
import { Agent, AgentTask, _setActivityTaskInfo } from './agent.js';
|
|
10
|
-
import { agentActivityStorage } from './agent_activity.js';
|
|
10
|
+
import { AgentActivity, agentActivityStorage } from './agent_activity.js';
|
|
11
|
+
import { defaultEndpointingOptions } from './turn_config/endpointing.js';
|
|
12
|
+
import { defaultInterruptionOptions } from './turn_config/interruption.js';
|
|
13
|
+
|
|
14
|
+
vi.mock('ofetch', () => ({ ofetch: vi.fn() }));
|
|
11
15
|
|
|
12
16
|
initializeLogger({ pretty: false, level: 'error' });
|
|
13
17
|
|
|
@@ -215,4 +219,207 @@ describe('Agent', () => {
|
|
|
215
219
|
await expect(wrapper.result).resolves.toBe('ok');
|
|
216
220
|
expect(closeOldActivity).toHaveBeenCalledTimes(1);
|
|
217
221
|
});
|
|
222
|
+
|
|
223
|
+
describe('Agent constructor option migration', () => {
|
|
224
|
+
it('should set allowInterruptions to false via deprecated constructor field', () => {
|
|
225
|
+
const agent = new Agent({ instructions: 'test', allowInterruptions: false });
|
|
226
|
+
expect(agent.turnHandling?.interruption?.enabled).toBe(false);
|
|
227
|
+
});
|
|
228
|
+
|
|
229
|
+
it('should not set derived properties when no compatibility fields are provided', () => {
|
|
230
|
+
const agent = new Agent({ instructions: 'test' });
|
|
231
|
+
expect(agent.turnHandling).toBeUndefined();
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('should expose minConsecutiveSpeechDelay', () => {
|
|
235
|
+
const agent = new Agent({ instructions: 'test', minConsecutiveSpeechDelay: 1.5 });
|
|
236
|
+
expect(agent.minConsecutiveSpeechDelay).toBe(1.5);
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
it('should ignore deprecated constructor fields when turnHandling is provided', () => {
|
|
240
|
+
const agent = new Agent({
|
|
241
|
+
instructions: 'test',
|
|
242
|
+
turnHandling: {
|
|
243
|
+
endpointing: { minDelay: 999 },
|
|
244
|
+
interruption: {},
|
|
245
|
+
turnDetection: 'vad',
|
|
246
|
+
},
|
|
247
|
+
allowInterruptions: false,
|
|
248
|
+
});
|
|
249
|
+
expect(agent.turnHandling?.endpointing?.minDelay).toBe(999);
|
|
250
|
+
expect(agent.turnHandling?.endpointing?.maxDelay).toBeUndefined();
|
|
251
|
+
expect(agent.turnHandling?.interruption?.enabled).toBeUndefined();
|
|
252
|
+
expect(agent.turnHandling?.turnDetection).toBe('vad');
|
|
253
|
+
});
|
|
254
|
+
|
|
255
|
+
it('should let turnHandling override deprecated constructor fields on conflicts', () => {
|
|
256
|
+
const agent = new Agent({
|
|
257
|
+
instructions: 'test',
|
|
258
|
+
turnHandling: {
|
|
259
|
+
endpointing: { minDelay: 999, maxDelay: 4000 },
|
|
260
|
+
interruption: { enabled: true },
|
|
261
|
+
turnDetection: 'vad',
|
|
262
|
+
},
|
|
263
|
+
allowInterruptions: false,
|
|
264
|
+
turnDetection: 'stt',
|
|
265
|
+
});
|
|
266
|
+
expect(agent.turnHandling?.endpointing?.minDelay).toBe(999);
|
|
267
|
+
expect(agent.turnHandling?.endpointing?.maxDelay).toBe(4000);
|
|
268
|
+
expect(agent.turnHandling?.interruption?.enabled).toBe(true);
|
|
269
|
+
expect(agent.turnHandling?.turnDetection).toBe('vad');
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
it('should set interruptionDetection from turnHandling.interruption.mode', () => {
|
|
273
|
+
const agent = new Agent({
|
|
274
|
+
instructions: 'test',
|
|
275
|
+
turnHandling: {
|
|
276
|
+
interruption: { mode: 'adaptive' },
|
|
277
|
+
endpointing: {},
|
|
278
|
+
turnDetection: undefined,
|
|
279
|
+
},
|
|
280
|
+
});
|
|
281
|
+
expect(agent.turnHandling?.interruption?.mode).toBe('adaptive');
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
it('should let AgentActivity prefer agent-level overrides over session defaults', () => {
|
|
285
|
+
const agent = new Agent({
|
|
286
|
+
instructions: 'test',
|
|
287
|
+
turnHandling: {
|
|
288
|
+
endpointing: { minDelay: 111, maxDelay: 222 },
|
|
289
|
+
interruption: { enabled: false },
|
|
290
|
+
turnDetection: 'manual',
|
|
291
|
+
},
|
|
292
|
+
});
|
|
293
|
+
const session = {
|
|
294
|
+
options: {
|
|
295
|
+
turnHandling: {
|
|
296
|
+
endpointing: defaultEndpointingOptions,
|
|
297
|
+
interruption: defaultInterruptionOptions,
|
|
298
|
+
},
|
|
299
|
+
},
|
|
300
|
+
turnDetection: 'stt',
|
|
301
|
+
useTtsAlignedTranscript: true,
|
|
302
|
+
vad: undefined,
|
|
303
|
+
stt: undefined,
|
|
304
|
+
llm: undefined,
|
|
305
|
+
tts: undefined,
|
|
306
|
+
interruptionDetection: undefined,
|
|
307
|
+
} as any;
|
|
308
|
+
|
|
309
|
+
const activity = new AgentActivity(agent as any, session);
|
|
310
|
+
|
|
311
|
+
expect(activity.allowInterruptions).toBe(false);
|
|
312
|
+
expect(activity.turnDetection).toBe('manual');
|
|
313
|
+
expect(activity.turnHandling.endpointing?.minDelay).toBe(111);
|
|
314
|
+
expect(activity.turnHandling.endpointing?.maxDelay).toBe(222);
|
|
315
|
+
});
|
|
316
|
+
|
|
317
|
+
it('should disable adaptive interruption detection in default mode when prerequisites are missing', () => {
|
|
318
|
+
const previousRemoteEotUrl = process.env.LIVEKIT_REMOTE_EOT_URL;
|
|
319
|
+
process.env.LIVEKIT_REMOTE_EOT_URL = 'http://localhost:9999';
|
|
320
|
+
|
|
321
|
+
try {
|
|
322
|
+
const agent = new Agent({ instructions: 'test' });
|
|
323
|
+
const session = {
|
|
324
|
+
options: {
|
|
325
|
+
turnHandling: {
|
|
326
|
+
endpointing: defaultEndpointingOptions,
|
|
327
|
+
interruption: defaultInterruptionOptions,
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
sessionOptions: {
|
|
331
|
+
turnHandling: {
|
|
332
|
+
endpointing: defaultEndpointingOptions,
|
|
333
|
+
interruption: defaultInterruptionOptions,
|
|
334
|
+
},
|
|
335
|
+
},
|
|
336
|
+
turnDetection: 'manual',
|
|
337
|
+
useTtsAlignedTranscript: true,
|
|
338
|
+
vad: {},
|
|
339
|
+
stt: {
|
|
340
|
+
capabilities: {
|
|
341
|
+
alignedTranscript: true,
|
|
342
|
+
streaming: true,
|
|
343
|
+
},
|
|
344
|
+
},
|
|
345
|
+
llm: undefined,
|
|
346
|
+
tts: undefined,
|
|
347
|
+
interruptionDetection: undefined,
|
|
348
|
+
} as any;
|
|
349
|
+
|
|
350
|
+
const activity = new AgentActivity(agent as any, session);
|
|
351
|
+
expect((activity as any).interruptionDetector).toBeUndefined();
|
|
352
|
+
} finally {
|
|
353
|
+
if (previousRemoteEotUrl === undefined) {
|
|
354
|
+
delete process.env.LIVEKIT_REMOTE_EOT_URL;
|
|
355
|
+
} else {
|
|
356
|
+
process.env.LIVEKIT_REMOTE_EOT_URL = previousRemoteEotUrl;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
it('should warn when session explicitly requests adaptive detection even if agent overrides it', () => {
|
|
362
|
+
const activity = Object.create(AgentActivity.prototype) as any;
|
|
363
|
+
activity.agent = {
|
|
364
|
+
turnHandling: { interruption: { mode: 'vad' } },
|
|
365
|
+
turnDetection: undefined,
|
|
366
|
+
};
|
|
367
|
+
activity.agentSession = {
|
|
368
|
+
interruptionDetection: 'adaptive',
|
|
369
|
+
turnDetection: 'manual',
|
|
370
|
+
};
|
|
371
|
+
activity.logger = { warn: vi.fn() };
|
|
372
|
+
|
|
373
|
+
expect(activity.resolveInterruptionDetector()).toBeUndefined();
|
|
374
|
+
expect(activity.logger.warn).toHaveBeenCalledWith(
|
|
375
|
+
"interruptionDetection is provided, but it's not compatible with the current configuration and will be disabled",
|
|
376
|
+
);
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
it('should disable adaptive interruption detection when interruptions are disabled', () => {
|
|
380
|
+
const previousRemoteEotUrl = process.env.LIVEKIT_REMOTE_EOT_URL;
|
|
381
|
+
process.env.LIVEKIT_REMOTE_EOT_URL = 'http://localhost:9999';
|
|
382
|
+
|
|
383
|
+
try {
|
|
384
|
+
const activity = Object.create(AgentActivity.prototype) as any;
|
|
385
|
+
activity.agent = {
|
|
386
|
+
turnHandling: {
|
|
387
|
+
interruption: { enabled: false },
|
|
388
|
+
},
|
|
389
|
+
turnDetection: undefined,
|
|
390
|
+
stt: undefined,
|
|
391
|
+
vad: undefined,
|
|
392
|
+
llm: undefined,
|
|
393
|
+
};
|
|
394
|
+
activity.agentSession = {
|
|
395
|
+
interruptionDetection: undefined,
|
|
396
|
+
turnDetection: 'stt',
|
|
397
|
+
sessionOptions: {
|
|
398
|
+
turnHandling: {
|
|
399
|
+
interruption: defaultInterruptionOptions,
|
|
400
|
+
endpointing: defaultEndpointingOptions,
|
|
401
|
+
},
|
|
402
|
+
},
|
|
403
|
+
stt: {
|
|
404
|
+
capabilities: {
|
|
405
|
+
alignedTranscript: true,
|
|
406
|
+
streaming: true,
|
|
407
|
+
},
|
|
408
|
+
},
|
|
409
|
+
vad: {},
|
|
410
|
+
llm: undefined,
|
|
411
|
+
};
|
|
412
|
+
activity.logger = { warn: vi.fn() };
|
|
413
|
+
|
|
414
|
+
expect(activity.resolveInterruptionDetector()).toBeUndefined();
|
|
415
|
+
expect(activity.logger.warn).not.toHaveBeenCalled();
|
|
416
|
+
} finally {
|
|
417
|
+
if (previousRemoteEotUrl === undefined) {
|
|
418
|
+
delete process.env.LIVEKIT_REMOTE_EOT_URL;
|
|
419
|
+
} else {
|
|
420
|
+
process.env.LIVEKIT_REMOTE_EOT_URL = previousRemoteEotUrl;
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
});
|
|
424
|
+
});
|
|
218
425
|
});
|
package/src/voice/agent.ts
CHANGED
|
@@ -35,9 +35,8 @@ import { type AgentActivity, agentActivityStorage } from './agent_activity.js';
|
|
|
35
35
|
import type { AgentSession, TurnDetectionMode } from './agent_session.js';
|
|
36
36
|
import type { TimedString } from './io.js';
|
|
37
37
|
import type { SpeechHandle } from './speech_handle.js';
|
|
38
|
-
import type { InterruptionOptions } from './turn_config/interruption.js';
|
|
39
38
|
import type { TurnHandlingOptions } from './turn_config/turn_handling.js';
|
|
40
|
-
import {
|
|
39
|
+
import { migrateTurnHandling } from './turn_config/utils.js';
|
|
41
40
|
|
|
42
41
|
export const functionCallStorage = new AsyncLocalStorage<{ functionCall?: FunctionCall }>();
|
|
43
42
|
export const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
|
|
@@ -113,16 +112,17 @@ export interface AgentOptions<UserData> {
|
|
|
113
112
|
instructions: string;
|
|
114
113
|
chatCtx?: ChatContext;
|
|
115
114
|
tools?: ToolContext<UserData>;
|
|
116
|
-
/** @deprecated use turnHandling instead */
|
|
117
|
-
turnDetection?: TurnDetectionMode;
|
|
118
115
|
stt?: STT | STTModelString;
|
|
119
116
|
vad?: VAD;
|
|
120
117
|
llm?: LLM | RealtimeModel | LLMModels;
|
|
121
118
|
tts?: TTS | TTSModelString;
|
|
122
|
-
allowInterruptions?: boolean;
|
|
123
|
-
minConsecutiveSpeechDelay?: number;
|
|
124
119
|
turnHandling?: TurnHandlingOptions;
|
|
120
|
+
minConsecutiveSpeechDelay?: number;
|
|
125
121
|
useTtsAlignedTranscript?: boolean;
|
|
122
|
+
/** @deprecated use turnHandling.turnDetection instead */
|
|
123
|
+
turnDetection?: TurnDetectionMode;
|
|
124
|
+
/** @deprecated use turnHandling.interruption.enabled instead */
|
|
125
|
+
allowInterruptions?: boolean;
|
|
126
126
|
}
|
|
127
127
|
|
|
128
128
|
export class Agent<UserData = any> {
|
|
@@ -131,9 +131,9 @@ export class Agent<UserData = any> {
|
|
|
131
131
|
private _vad?: VAD;
|
|
132
132
|
private _llm?: LLM | RealtimeModel;
|
|
133
133
|
private _tts?: TTS;
|
|
134
|
-
private
|
|
135
|
-
|
|
136
|
-
private
|
|
134
|
+
private _turnHandling?: Partial<TurnHandlingOptions>;
|
|
135
|
+
|
|
136
|
+
private _minConsecutiveSpeechDelay?: number;
|
|
137
137
|
private _useTtsAlignedTranscript?: boolean;
|
|
138
138
|
|
|
139
139
|
/** @internal */
|
|
@@ -158,14 +158,14 @@ export class Agent<UserData = any> {
|
|
|
158
158
|
vad,
|
|
159
159
|
llm,
|
|
160
160
|
tts,
|
|
161
|
+
allowInterruptions,
|
|
161
162
|
turnHandling,
|
|
163
|
+
minConsecutiveSpeechDelay,
|
|
162
164
|
useTtsAlignedTranscript,
|
|
163
|
-
allowInterruptions,
|
|
164
165
|
}: AgentOptions<UserData>) {
|
|
165
166
|
if (id) {
|
|
166
167
|
this._id = id;
|
|
167
168
|
} else {
|
|
168
|
-
// Convert class name to snake_case
|
|
169
169
|
const className = this.constructor.name;
|
|
170
170
|
if (className === 'Agent') {
|
|
171
171
|
this._id = 'default_agent';
|
|
@@ -185,11 +185,13 @@ export class Agent<UserData = any> {
|
|
|
185
185
|
})
|
|
186
186
|
: ChatContext.empty();
|
|
187
187
|
|
|
188
|
-
const
|
|
188
|
+
const resolvedTurnHandling = migrateTurnHandling({
|
|
189
189
|
turnDetection,
|
|
190
|
-
|
|
190
|
+
allowInterruptions,
|
|
191
|
+
turnHandling,
|
|
191
192
|
});
|
|
192
|
-
this.
|
|
193
|
+
this._turnHandling =
|
|
194
|
+
Object.keys(resolvedTurnHandling).length > 0 ? resolvedTurnHandling : undefined;
|
|
193
195
|
|
|
194
196
|
this._vad = vad;
|
|
195
197
|
|
|
@@ -211,10 +213,7 @@ export class Agent<UserData = any> {
|
|
|
211
213
|
this._tts = tts;
|
|
212
214
|
}
|
|
213
215
|
|
|
214
|
-
this.
|
|
215
|
-
if (this.turnHandling?.interruption.mode !== undefined) {
|
|
216
|
-
this._allowInterruptions = !!this.turnHandling.interruption.mode;
|
|
217
|
-
}
|
|
216
|
+
this._minConsecutiveSpeechDelay = minConsecutiveSpeechDelay;
|
|
218
217
|
this._useTtsAlignedTranscript = useTtsAlignedTranscript;
|
|
219
218
|
|
|
220
219
|
this._agentActivity = undefined;
|
|
@@ -260,12 +259,12 @@ export class Agent<UserData = any> {
|
|
|
260
259
|
return this.getActivityOrThrow().agentSession as AgentSession<UserData>;
|
|
261
260
|
}
|
|
262
261
|
|
|
263
|
-
get
|
|
264
|
-
return this.
|
|
262
|
+
get turnHandling(): Partial<TurnHandlingOptions> | undefined {
|
|
263
|
+
return this._turnHandling;
|
|
265
264
|
}
|
|
266
265
|
|
|
267
|
-
get
|
|
268
|
-
return this.
|
|
266
|
+
get minConsecutiveSpeechDelay(): number | undefined {
|
|
267
|
+
return this._minConsecutiveSpeechDelay;
|
|
269
268
|
}
|
|
270
269
|
|
|
271
270
|
async onEnter(): Promise<void> {}
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Regression tests for mainTask speech handle processing.
|
|
7
|
+
*
|
|
8
|
+
* When a speech handle is interrupted after _authorizeGeneration() but before the
|
|
9
|
+
* reply task calls _markGenerationDone(), mainTask hangs on _waitForGeneration()
|
|
10
|
+
* indefinitely. All subsequent speech handles queue behind it and the agent becomes
|
|
11
|
+
* unresponsive.
|
|
12
|
+
*
|
|
13
|
+
* Fix: race _waitForGeneration() against the interrupt future via waitIfNotInterrupted().
|
|
14
|
+
*
|
|
15
|
+
* Related: #1124, #1089, #836
|
|
16
|
+
*/
|
|
17
|
+
import { Heap } from 'heap-js';
|
|
18
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
19
|
+
import { Future } from '../utils.js';
|
|
20
|
+
import { AgentActivity } from './agent_activity.js';
|
|
21
|
+
import { SpeechHandle } from './speech_handle.js';
|
|
22
|
+
|
|
23
|
+
// Break circular dependency: agent_activity.ts → agent.js → beta/workflows/task_group.ts
|
|
24
|
+
vi.mock('./agent.js', () => {
|
|
25
|
+
class Agent {}
|
|
26
|
+
class AgentTask extends Agent {}
|
|
27
|
+
class StopResponse {}
|
|
28
|
+
return {
|
|
29
|
+
Agent,
|
|
30
|
+
AgentTask,
|
|
31
|
+
StopResponse,
|
|
32
|
+
_getActivityTaskInfo: () => null,
|
|
33
|
+
_setActivityTaskInfo: () => {},
|
|
34
|
+
functionCallStorage: {
|
|
35
|
+
getStore: () => undefined,
|
|
36
|
+
enterWith: () => {},
|
|
37
|
+
run: (_: unknown, fn: () => unknown) => fn(),
|
|
38
|
+
},
|
|
39
|
+
speechHandleStorage: {
|
|
40
|
+
getStore: () => undefined,
|
|
41
|
+
enterWith: () => {},
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
vi.mock('../version.js', () => ({ version: '0.0.0-test' }));
|
|
47
|
+
|
|
48
|
+
async function raceTimeout(promise: Promise<unknown>, ms: number): Promise<'resolved' | 'timeout'> {
|
|
49
|
+
let timer: ReturnType<typeof setTimeout>;
|
|
50
|
+
const timeout = new Promise<'timeout'>((resolve) => {
|
|
51
|
+
timer = setTimeout(() => resolve('timeout'), ms);
|
|
52
|
+
});
|
|
53
|
+
return Promise.race([promise.then(() => 'resolved' as const), timeout]).finally(() =>
|
|
54
|
+
clearTimeout(timer),
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Build a minimal stand-in with just enough state for mainTask to run.
|
|
60
|
+
*
|
|
61
|
+
* mainTask accesses: q_updated, speechQueue, _currentSpeech, _schedulingPaused,
|
|
62
|
+
* getDrainPendingSpeechTasks(), and logger. We provide stubs for all of these,
|
|
63
|
+
* then bind the real AgentActivity.prototype.mainTask to this object.
|
|
64
|
+
*/
|
|
65
|
+
function buildMainTaskRunner() {
|
|
66
|
+
const q_updated = new Future<void>();
|
|
67
|
+
type HeapItem = [number, number, SpeechHandle];
|
|
68
|
+
const speechQueue = new Heap<HeapItem>((a: HeapItem, b: HeapItem) => b[0] - a[0] || a[1] - b[1]);
|
|
69
|
+
|
|
70
|
+
const fakeActivity = {
|
|
71
|
+
q_updated,
|
|
72
|
+
speechQueue,
|
|
73
|
+
_currentSpeech: undefined as SpeechHandle | undefined,
|
|
74
|
+
_schedulingPaused: false,
|
|
75
|
+
getDrainPendingSpeechTasks: () => [],
|
|
76
|
+
logger: {
|
|
77
|
+
info: () => {},
|
|
78
|
+
debug: () => {},
|
|
79
|
+
warn: () => {},
|
|
80
|
+
error: () => {},
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
const mainTask = (AgentActivity.prototype as Record<string, unknown>).mainTask as (
|
|
85
|
+
signal: AbortSignal,
|
|
86
|
+
) => Promise<void>;
|
|
87
|
+
|
|
88
|
+
return {
|
|
89
|
+
fakeActivity,
|
|
90
|
+
mainTask: mainTask.bind(fakeActivity),
|
|
91
|
+
speechQueue,
|
|
92
|
+
q_updated,
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
describe('AgentActivity - mainTask', () => {
|
|
97
|
+
it('should recover when speech handle is interrupted after authorization', async () => {
|
|
98
|
+
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();
|
|
99
|
+
|
|
100
|
+
const handle = SpeechHandle.create({ allowInterruptions: true });
|
|
101
|
+
|
|
102
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 1, handle]);
|
|
103
|
+
handle._markScheduled();
|
|
104
|
+
q_updated.resolve();
|
|
105
|
+
|
|
106
|
+
const ac = new AbortController();
|
|
107
|
+
const mainTaskPromise = mainTask(ac.signal);
|
|
108
|
+
|
|
109
|
+
// Give mainTask time to pop the handle and call _authorizeGeneration
|
|
110
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
111
|
+
|
|
112
|
+
// Interrupt while waiting for generation
|
|
113
|
+
handle.interrupt();
|
|
114
|
+
|
|
115
|
+
// Let mainTask react to the interrupt, then signal exit
|
|
116
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
117
|
+
fakeActivity._schedulingPaused = true;
|
|
118
|
+
fakeActivity.q_updated = new Future();
|
|
119
|
+
fakeActivity.q_updated.resolve();
|
|
120
|
+
ac.abort();
|
|
121
|
+
|
|
122
|
+
const result = await raceTimeout(mainTaskPromise, 2000);
|
|
123
|
+
expect(result).toBe('resolved');
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('should process next queued handle after an interrupted one', async () => {
|
|
127
|
+
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();
|
|
128
|
+
|
|
129
|
+
const handleA = SpeechHandle.create({ allowInterruptions: true });
|
|
130
|
+
const handleB = SpeechHandle.create({ allowInterruptions: true });
|
|
131
|
+
|
|
132
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 1, handleA]);
|
|
133
|
+
handleA._markScheduled();
|
|
134
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 2, handleB]);
|
|
135
|
+
handleB._markScheduled();
|
|
136
|
+
q_updated.resolve();
|
|
137
|
+
|
|
138
|
+
const ac = new AbortController();
|
|
139
|
+
const mainTaskPromise = mainTask(ac.signal);
|
|
140
|
+
|
|
141
|
+
// Wait for mainTask to pick up handle A
|
|
142
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
143
|
+
|
|
144
|
+
// Interrupt handle A
|
|
145
|
+
handleA.interrupt();
|
|
146
|
+
|
|
147
|
+
// Wait for mainTask to move to handle B and authorize it
|
|
148
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
149
|
+
|
|
150
|
+
// Resolve handle B's generation (simulating normal reply task completion).
|
|
151
|
+
// If mainTask is stuck on handle A (bug), handle B was never authorized and this
|
|
152
|
+
// throws — we catch it and let the timeout assert the real failure.
|
|
153
|
+
try {
|
|
154
|
+
handleB._markGenerationDone();
|
|
155
|
+
} catch {
|
|
156
|
+
// Expected when fix is absent: handle B has no active generation
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Let mainTask finish
|
|
160
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
161
|
+
fakeActivity._schedulingPaused = true;
|
|
162
|
+
fakeActivity.q_updated = new Future();
|
|
163
|
+
fakeActivity.q_updated.resolve();
|
|
164
|
+
ac.abort();
|
|
165
|
+
|
|
166
|
+
const result = await raceTimeout(mainTaskPromise, 2000);
|
|
167
|
+
expect(result).toBe('resolved');
|
|
168
|
+
});
|
|
169
|
+
|
|
170
|
+
it('should skip handles that were interrupted before being popped', async () => {
|
|
171
|
+
const { fakeActivity, mainTask, speechQueue, q_updated } = buildMainTaskRunner();
|
|
172
|
+
|
|
173
|
+
const handle = SpeechHandle.create({ allowInterruptions: true });
|
|
174
|
+
|
|
175
|
+
// Interrupt before mainTask ever sees it
|
|
176
|
+
handle.interrupt();
|
|
177
|
+
|
|
178
|
+
speechQueue.push([SpeechHandle.SPEECH_PRIORITY_NORMAL, 1, handle]);
|
|
179
|
+
handle._markScheduled();
|
|
180
|
+
q_updated.resolve();
|
|
181
|
+
|
|
182
|
+
const ac = new AbortController();
|
|
183
|
+
const mainTaskPromise = mainTask(ac.signal);
|
|
184
|
+
|
|
185
|
+
await new Promise((r) => setTimeout(r, 50));
|
|
186
|
+
fakeActivity._schedulingPaused = true;
|
|
187
|
+
fakeActivity.q_updated = new Future();
|
|
188
|
+
fakeActivity.q_updated.resolve();
|
|
189
|
+
ac.abort();
|
|
190
|
+
|
|
191
|
+
const result = await raceTimeout(mainTaskPromise, 2000);
|
|
192
|
+
expect(result).toBe('resolved');
|
|
193
|
+
});
|
|
194
|
+
});
|