@livekit/agents 1.0.17 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/llm.cjs +35 -13
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +10 -5
- package/dist/inference/llm.d.ts +10 -5
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +35 -13
- package/dist/inference/llm.js.map +1 -1
- package/dist/llm/chat_context.d.cts +1 -1
- package/dist/llm/chat_context.d.ts +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -1
- package/dist/llm/llm.d.ts +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs.map +1 -1
- package/dist/llm/provider_format/google.d.cts +1 -1
- package/dist/llm/provider_format/google.d.ts +1 -1
- package/dist/llm/provider_format/google.d.ts.map +1 -1
- package/dist/llm/provider_format/google.js.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +4 -0
- package/dist/llm/realtime.d.ts +4 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js.map +1 -1
- package/dist/llm/utils.cjs +2 -2
- package/dist/llm/utils.cjs.map +1 -1
- package/dist/llm/utils.d.cts +1 -1
- package/dist/llm/utils.d.ts +1 -1
- package/dist/llm/utils.d.ts.map +1 -1
- package/dist/llm/utils.js +2 -2
- package/dist/llm/utils.js.map +1 -1
- package/dist/llm/zod-utils.cjs +6 -3
- package/dist/llm/zod-utils.cjs.map +1 -1
- package/dist/llm/zod-utils.d.cts +1 -1
- package/dist/llm/zod-utils.d.ts +1 -1
- package/dist/llm/zod-utils.d.ts.map +1 -1
- package/dist/llm/zod-utils.js +6 -3
- package/dist/llm/zod-utils.js.map +1 -1
- package/dist/llm/zod-utils.test.cjs +83 -0
- package/dist/llm/zod-utils.test.cjs.map +1 -1
- package/dist/llm/zod-utils.test.js +83 -0
- package/dist/llm/zod-utils.test.js.map +1 -1
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +69 -20
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +69 -20
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +40 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +5 -0
- package/dist/voice/agent_session.d.ts +5 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +40 -1
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/interruption_detection.test.cjs +114 -0
- package/dist/voice/interruption_detection.test.cjs.map +1 -0
- package/dist/voice/interruption_detection.test.js +113 -0
- package/dist/voice/interruption_detection.test.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +3 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +1 -0
- package/dist/voice/room_io/room_io.d.ts +1 -0
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +3 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/package.json +3 -3
- package/src/inference/llm.ts +53 -21
- package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
- package/src/llm/llm.ts +1 -1
- package/src/llm/provider_format/google.ts +4 -4
- package/src/llm/realtime.ts +8 -1
- package/src/llm/utils.ts +7 -2
- package/src/llm/zod-utils.test.ts +101 -0
- package/src/llm/zod-utils.ts +12 -3
- package/src/utils.ts +17 -0
- package/src/voice/agent_activity.ts +96 -24
- package/src/voice/agent_session.ts +54 -0
- package/src/voice/interruption_detection.test.ts +151 -0
- package/src/voice/room_io/room_io.ts +4 -0
|
@@ -58,6 +58,7 @@ export interface VoiceOptions {
|
|
|
58
58
|
maxEndpointingDelay: number;
|
|
59
59
|
maxToolSteps: number;
|
|
60
60
|
preemptiveGeneration: boolean;
|
|
61
|
+
userAwayTimeout?: number | null;
|
|
61
62
|
}
|
|
62
63
|
|
|
63
64
|
const defaultVoiceOptions: VoiceOptions = {
|
|
@@ -69,6 +70,7 @@ const defaultVoiceOptions: VoiceOptions = {
|
|
|
69
70
|
maxEndpointingDelay: 6000,
|
|
70
71
|
maxToolSteps: 3,
|
|
71
72
|
preemptiveGeneration: false,
|
|
73
|
+
userAwayTimeout: 15.0,
|
|
72
74
|
} as const;
|
|
73
75
|
|
|
74
76
|
export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
|
|
@@ -123,6 +125,7 @@ export class AgentSession<
|
|
|
123
125
|
private _output: AgentOutput;
|
|
124
126
|
|
|
125
127
|
private closingTask: Promise<void> | null = null;
|
|
128
|
+
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
126
129
|
|
|
127
130
|
constructor(opts: AgentSessionOptions<UserData>) {
|
|
128
131
|
super();
|
|
@@ -167,6 +170,8 @@ export class AgentSession<
|
|
|
167
170
|
// This is the "global" chat context, it holds the entire conversation history
|
|
168
171
|
this._chatCtx = ChatContext.empty();
|
|
169
172
|
this.options = { ...defaultVoiceOptions, ...voiceOptions };
|
|
173
|
+
|
|
174
|
+
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
|
|
170
175
|
}
|
|
171
176
|
|
|
172
177
|
get input(): AgentInput {
|
|
@@ -416,6 +421,14 @@ export class AgentSession<
|
|
|
416
421
|
|
|
417
422
|
const oldState = this._agentState;
|
|
418
423
|
this._agentState = state;
|
|
424
|
+
|
|
425
|
+
// Handle user away timer based on state changes
|
|
426
|
+
if (state === 'listening' && this.userState === 'listening') {
|
|
427
|
+
this._setUserAwayTimer();
|
|
428
|
+
} else {
|
|
429
|
+
this._cancelUserAwayTimer();
|
|
430
|
+
}
|
|
431
|
+
|
|
419
432
|
this.emit(
|
|
420
433
|
AgentSessionEventTypes.AgentStateChanged,
|
|
421
434
|
createAgentStateChangedEvent(oldState, state),
|
|
@@ -430,6 +443,14 @@ export class AgentSession<
|
|
|
430
443
|
|
|
431
444
|
const oldState = this.userState;
|
|
432
445
|
this.userState = state;
|
|
446
|
+
|
|
447
|
+
// Handle user away timer based on state changes
|
|
448
|
+
if (state === 'listening' && this._agentState === 'listening') {
|
|
449
|
+
this._setUserAwayTimer();
|
|
450
|
+
} else {
|
|
451
|
+
this._cancelUserAwayTimer();
|
|
452
|
+
}
|
|
453
|
+
|
|
433
454
|
this.emit(
|
|
434
455
|
AgentSessionEventTypes.UserStateChanged,
|
|
435
456
|
createUserStateChangedEvent(oldState, state),
|
|
@@ -451,6 +472,37 @@ export class AgentSession<
|
|
|
451
472
|
|
|
452
473
|
private onTextOutputChanged(): void {}
|
|
453
474
|
|
|
475
|
+
private _setUserAwayTimer(): void {
|
|
476
|
+
this._cancelUserAwayTimer();
|
|
477
|
+
|
|
478
|
+
if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {
|
|
479
|
+
return;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
if (this.roomIO && !this.roomIO.isParticipantAvailable) {
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
this.userAwayTimer = setTimeout(() => {
|
|
487
|
+
this.logger.debug('User away timeout triggered');
|
|
488
|
+
this._updateUserState('away');
|
|
489
|
+
}, this.options.userAwayTimeout * 1000);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
private _cancelUserAwayTimer(): void {
|
|
493
|
+
if (this.userAwayTimer !== null) {
|
|
494
|
+
clearTimeout(this.userAwayTimer);
|
|
495
|
+
this.userAwayTimer = null;
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
|
|
500
|
+
if (this.userState === 'away' && ev.isFinal) {
|
|
501
|
+
this.logger.debug('User returned from away state due to speech input');
|
|
502
|
+
this._updateUserState('listening');
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
|
|
454
506
|
private async closeImpl(
|
|
455
507
|
reason: CloseReason,
|
|
456
508
|
error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
|
|
@@ -460,6 +512,8 @@ export class AgentSession<
|
|
|
460
512
|
return;
|
|
461
513
|
}
|
|
462
514
|
|
|
515
|
+
this._cancelUserAwayTimer();
|
|
516
|
+
|
|
463
517
|
if (this.activity) {
|
|
464
518
|
if (!drain) {
|
|
465
519
|
try {
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Unit tests for interruption detection logic in AgentActivity.
|
|
7
|
+
*
|
|
8
|
+
* Tests the refactored minInterruptionWords check which ensures:
|
|
9
|
+
* - Consistent word count filtering across all speech scenarios
|
|
10
|
+
* - Proper handling of empty strings, undefined, and short speech
|
|
11
|
+
* - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
|
|
12
|
+
*/
|
|
13
|
+
import { describe, expect, it } from 'vitest';
|
|
14
|
+
import { splitWords } from '../tokenize/basic/word.js';
|
|
15
|
+
|
|
16
|
+
describe('Interruption Detection - Word Counting', () => {
|
|
17
|
+
describe('Word Splitting Behavior', () => {
|
|
18
|
+
it('should count empty string as 0 words', () => {
|
|
19
|
+
const text = '';
|
|
20
|
+
const wordCount = splitWords(text, true).length;
|
|
21
|
+
expect(wordCount).toBe(0);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('should count single word correctly', () => {
|
|
25
|
+
const text = 'hello';
|
|
26
|
+
const wordCount = splitWords(text, true).length;
|
|
27
|
+
expect(wordCount).toBe(1);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('should count two words correctly', () => {
|
|
31
|
+
const text = 'hello world';
|
|
32
|
+
const wordCount = splitWords(text, true).length;
|
|
33
|
+
expect(wordCount).toBe(2);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it('should count multiple words correctly', () => {
|
|
37
|
+
const text = 'hello this is a full sentence';
|
|
38
|
+
const wordCount = splitWords(text, true).length;
|
|
39
|
+
expect(wordCount).toBe(6);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('should handle punctuation correctly', () => {
|
|
43
|
+
const text = 'hello, world!';
|
|
44
|
+
const wordCount = splitWords(text, true).length;
|
|
45
|
+
expect(wordCount).toBe(2);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('should handle multiple spaces between words', () => {
|
|
49
|
+
const text = 'hello world';
|
|
50
|
+
const wordCount = splitWords(text, true).length;
|
|
51
|
+
expect(wordCount).toBe(2);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should count whitespace-only string as 0 words', () => {
|
|
55
|
+
const text = ' ';
|
|
56
|
+
const wordCount = splitWords(text, true).length;
|
|
57
|
+
expect(wordCount).toBe(0);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should handle leading and trailing whitespace', () => {
|
|
61
|
+
const text = ' hello world ';
|
|
62
|
+
const wordCount = splitWords(text, true).length;
|
|
63
|
+
expect(wordCount).toBe(2);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('Integration: Full Interruption Check Logic', () => {
|
|
68
|
+
it('should block interruption for empty transcript with threshold 2', () => {
|
|
69
|
+
const text = '';
|
|
70
|
+
const minInterruptionWords = 2;
|
|
71
|
+
|
|
72
|
+
const normalizedText = text ?? '';
|
|
73
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
74
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
75
|
+
|
|
76
|
+
expect(normalizedText).toBe('');
|
|
77
|
+
expect(wordCount).toBe(0);
|
|
78
|
+
expect(shouldBlock).toBe(true);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should block interruption for undefined transcript with threshold 2', () => {
|
|
82
|
+
const text: string | undefined = undefined;
|
|
83
|
+
const minInterruptionWords = 2;
|
|
84
|
+
|
|
85
|
+
const normalizedText = text ?? '';
|
|
86
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
87
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
88
|
+
|
|
89
|
+
expect(normalizedText).toBe('');
|
|
90
|
+
expect(wordCount).toBe(0);
|
|
91
|
+
expect(shouldBlock).toBe(true);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it('should block interruption for single word with threshold 2', () => {
|
|
95
|
+
const text = 'hello';
|
|
96
|
+
const minInterruptionWords = 2;
|
|
97
|
+
|
|
98
|
+
const normalizedText = text ?? '';
|
|
99
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
100
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
101
|
+
|
|
102
|
+
expect(normalizedText).toBe('hello');
|
|
103
|
+
expect(wordCount).toBe(1);
|
|
104
|
+
expect(shouldBlock).toBe(true);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('should allow interruption when word count exactly meets threshold', () => {
|
|
108
|
+
const text = 'hello world';
|
|
109
|
+
const minInterruptionWords = 2;
|
|
110
|
+
|
|
111
|
+
const normalizedText = text ?? '';
|
|
112
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
113
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
114
|
+
|
|
115
|
+
expect(normalizedText).toBe('hello world');
|
|
116
|
+
expect(wordCount).toBe(2);
|
|
117
|
+
expect(shouldBlock).toBe(false);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('should allow interruption when word count exceeds threshold', () => {
|
|
121
|
+
const text = 'hello this is a full sentence';
|
|
122
|
+
const minInterruptionWords = 2;
|
|
123
|
+
|
|
124
|
+
const normalizedText = text ?? '';
|
|
125
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
126
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
127
|
+
|
|
128
|
+
expect(normalizedText).toBe('hello this is a full sentence');
|
|
129
|
+
expect(wordCount).toBe(6);
|
|
130
|
+
expect(shouldBlock).toBe(false);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('should apply consistent word counting logic in both methods', () => {
|
|
134
|
+
const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
|
|
135
|
+
const threshold = 2;
|
|
136
|
+
|
|
137
|
+
transcripts.forEach((transcript) => {
|
|
138
|
+
const text1 = transcript;
|
|
139
|
+
const normalizedText1 = text1 ?? '';
|
|
140
|
+
const wordCount1 = splitWords(normalizedText1, true).length;
|
|
141
|
+
const shouldBlock1 = wordCount1 < threshold;
|
|
142
|
+
|
|
143
|
+
const wordCount2 = splitWords(transcript, true).length;
|
|
144
|
+
const shouldBlock2 = wordCount2 < threshold;
|
|
145
|
+
|
|
146
|
+
expect(wordCount1).toBe(wordCount2);
|
|
147
|
+
expect(shouldBlock1).toBe(shouldBlock2);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
});
|
|
@@ -369,6 +369,10 @@ export class RoomIO {
|
|
|
369
369
|
return this.transcriptionSynchronizer.textOutput;
|
|
370
370
|
}
|
|
371
371
|
|
|
372
|
+
get isParticipantAvailable(): boolean {
|
|
373
|
+
return this.participantAvailableFuture.done;
|
|
374
|
+
}
|
|
375
|
+
|
|
372
376
|
/** Switch to a different participant */
|
|
373
377
|
setParticipant(participantIdentity: string | null) {
|
|
374
378
|
this.logger.debug({ participantIdentity }, 'setting participant');
|