@livekit/agents 1.0.17 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/inference/llm.cjs +35 -13
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.cts +10 -5
  4. package/dist/inference/llm.d.ts +10 -5
  5. package/dist/inference/llm.d.ts.map +1 -1
  6. package/dist/inference/llm.js +35 -13
  7. package/dist/inference/llm.js.map +1 -1
  8. package/dist/llm/chat_context.d.cts +1 -1
  9. package/dist/llm/chat_context.d.ts +1 -1
  10. package/dist/llm/llm.cjs.map +1 -1
  11. package/dist/llm/llm.d.cts +1 -1
  12. package/dist/llm/llm.d.ts +1 -1
  13. package/dist/llm/llm.d.ts.map +1 -1
  14. package/dist/llm/llm.js.map +1 -1
  15. package/dist/llm/provider_format/google.cjs.map +1 -1
  16. package/dist/llm/provider_format/google.d.cts +1 -1
  17. package/dist/llm/provider_format/google.d.ts +1 -1
  18. package/dist/llm/provider_format/google.d.ts.map +1 -1
  19. package/dist/llm/provider_format/google.js.map +1 -1
  20. package/dist/llm/provider_format/index.d.cts +1 -1
  21. package/dist/llm/provider_format/index.d.ts +1 -1
  22. package/dist/llm/provider_format/index.d.ts.map +1 -1
  23. package/dist/llm/realtime.cjs.map +1 -1
  24. package/dist/llm/realtime.d.cts +4 -0
  25. package/dist/llm/realtime.d.ts +4 -0
  26. package/dist/llm/realtime.d.ts.map +1 -1
  27. package/dist/llm/realtime.js.map +1 -1
  28. package/dist/llm/utils.cjs +2 -2
  29. package/dist/llm/utils.cjs.map +1 -1
  30. package/dist/llm/utils.d.cts +1 -1
  31. package/dist/llm/utils.d.ts +1 -1
  32. package/dist/llm/utils.d.ts.map +1 -1
  33. package/dist/llm/utils.js +2 -2
  34. package/dist/llm/utils.js.map +1 -1
  35. package/dist/llm/zod-utils.cjs +6 -3
  36. package/dist/llm/zod-utils.cjs.map +1 -1
  37. package/dist/llm/zod-utils.d.cts +1 -1
  38. package/dist/llm/zod-utils.d.ts +1 -1
  39. package/dist/llm/zod-utils.d.ts.map +1 -1
  40. package/dist/llm/zod-utils.js +6 -3
  41. package/dist/llm/zod-utils.js.map +1 -1
  42. package/dist/llm/zod-utils.test.cjs +83 -0
  43. package/dist/llm/zod-utils.test.cjs.map +1 -1
  44. package/dist/llm/zod-utils.test.js +83 -0
  45. package/dist/llm/zod-utils.test.js.map +1 -1
  46. package/dist/utils.cjs.map +1 -1
  47. package/dist/utils.d.cts +7 -0
  48. package/dist/utils.d.ts +7 -0
  49. package/dist/utils.d.ts.map +1 -1
  50. package/dist/utils.js.map +1 -1
  51. package/dist/voice/agent_activity.cjs +69 -20
  52. package/dist/voice/agent_activity.cjs.map +1 -1
  53. package/dist/voice/agent_activity.d.ts.map +1 -1
  54. package/dist/voice/agent_activity.js +69 -20
  55. package/dist/voice/agent_activity.js.map +1 -1
  56. package/dist/voice/agent_session.cjs +40 -1
  57. package/dist/voice/agent_session.cjs.map +1 -1
  58. package/dist/voice/agent_session.d.cts +5 -0
  59. package/dist/voice/agent_session.d.ts +5 -0
  60. package/dist/voice/agent_session.d.ts.map +1 -1
  61. package/dist/voice/agent_session.js +40 -1
  62. package/dist/voice/agent_session.js.map +1 -1
  63. package/dist/voice/interruption_detection.test.cjs +114 -0
  64. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  65. package/dist/voice/interruption_detection.test.js +113 -0
  66. package/dist/voice/interruption_detection.test.js.map +1 -0
  67. package/dist/voice/room_io/room_io.cjs +3 -0
  68. package/dist/voice/room_io/room_io.cjs.map +1 -1
  69. package/dist/voice/room_io/room_io.d.cts +1 -0
  70. package/dist/voice/room_io/room_io.d.ts +1 -0
  71. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  72. package/dist/voice/room_io/room_io.js +3 -0
  73. package/dist/voice/room_io/room_io.js.map +1 -1
  74. package/package.json +3 -3
  75. package/src/inference/llm.ts +53 -21
  76. package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
  77. package/src/llm/llm.ts +1 -1
  78. package/src/llm/provider_format/google.ts +4 -4
  79. package/src/llm/realtime.ts +8 -1
  80. package/src/llm/utils.ts +7 -2
  81. package/src/llm/zod-utils.test.ts +101 -0
  82. package/src/llm/zod-utils.ts +12 -3
  83. package/src/utils.ts +17 -0
  84. package/src/voice/agent_activity.ts +96 -24
  85. package/src/voice/agent_session.ts +54 -0
  86. package/src/voice/interruption_detection.test.ts +151 -0
  87. package/src/voice/room_io/room_io.ts +4 -0
@@ -58,6 +58,7 @@ export interface VoiceOptions {
58
58
  maxEndpointingDelay: number;
59
59
  maxToolSteps: number;
60
60
  preemptiveGeneration: boolean;
61
+ userAwayTimeout?: number | null;
61
62
  }
62
63
 
63
64
  const defaultVoiceOptions: VoiceOptions = {
@@ -69,6 +70,7 @@ const defaultVoiceOptions: VoiceOptions = {
69
70
  maxEndpointingDelay: 6000,
70
71
  maxToolSteps: 3,
71
72
  preemptiveGeneration: false,
73
+ userAwayTimeout: 15.0,
72
74
  } as const;
73
75
 
74
76
  export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
@@ -123,6 +125,7 @@ export class AgentSession<
123
125
  private _output: AgentOutput;
124
126
 
125
127
  private closingTask: Promise<void> | null = null;
128
+ private userAwayTimer: NodeJS.Timeout | null = null;
126
129
 
127
130
  constructor(opts: AgentSessionOptions<UserData>) {
128
131
  super();
@@ -167,6 +170,8 @@ export class AgentSession<
167
170
  // This is the "global" chat context, it holds the entire conversation history
168
171
  this._chatCtx = ChatContext.empty();
169
172
  this.options = { ...defaultVoiceOptions, ...voiceOptions };
173
+
174
+ this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
170
175
  }
171
176
 
172
177
  get input(): AgentInput {
@@ -416,6 +421,14 @@ export class AgentSession<
416
421
 
417
422
  const oldState = this._agentState;
418
423
  this._agentState = state;
424
+
425
+ // Handle user away timer based on state changes
426
+ if (state === 'listening' && this.userState === 'listening') {
427
+ this._setUserAwayTimer();
428
+ } else {
429
+ this._cancelUserAwayTimer();
430
+ }
431
+
419
432
  this.emit(
420
433
  AgentSessionEventTypes.AgentStateChanged,
421
434
  createAgentStateChangedEvent(oldState, state),
@@ -430,6 +443,14 @@ export class AgentSession<
430
443
 
431
444
  const oldState = this.userState;
432
445
  this.userState = state;
446
+
447
+ // Handle user away timer based on state changes
448
+ if (state === 'listening' && this._agentState === 'listening') {
449
+ this._setUserAwayTimer();
450
+ } else {
451
+ this._cancelUserAwayTimer();
452
+ }
453
+
433
454
  this.emit(
434
455
  AgentSessionEventTypes.UserStateChanged,
435
456
  createUserStateChangedEvent(oldState, state),
@@ -451,6 +472,37 @@ export class AgentSession<
451
472
 
452
473
  private onTextOutputChanged(): void {}
453
474
 
475
+ private _setUserAwayTimer(): void {
476
+ this._cancelUserAwayTimer();
477
+
478
+ if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {
479
+ return;
480
+ }
481
+
482
+ if (this.roomIO && !this.roomIO.isParticipantAvailable) {
483
+ return;
484
+ }
485
+
486
+ this.userAwayTimer = setTimeout(() => {
487
+ this.logger.debug('User away timeout triggered');
488
+ this._updateUserState('away');
489
+ }, this.options.userAwayTimeout * 1000);
490
+ }
491
+
492
+ private _cancelUserAwayTimer(): void {
493
+ if (this.userAwayTimer !== null) {
494
+ clearTimeout(this.userAwayTimer);
495
+ this.userAwayTimer = null;
496
+ }
497
+ }
498
+
499
+ private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
500
+ if (this.userState === 'away' && ev.isFinal) {
501
+ this.logger.debug('User returned from away state due to speech input');
502
+ this._updateUserState('listening');
503
+ }
504
+ }
505
+
454
506
  private async closeImpl(
455
507
  reason: CloseReason,
456
508
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
@@ -460,6 +512,8 @@ export class AgentSession<
460
512
  return;
461
513
  }
462
514
 
515
+ this._cancelUserAwayTimer();
516
+
463
517
  if (this.activity) {
464
518
  if (!drain) {
465
519
  try {
@@ -0,0 +1,151 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ /**
6
+ * Unit tests for interruption detection logic in AgentActivity.
7
+ *
8
+ * Tests the refactored minInterruptionWords check which ensures:
9
+ * - Consistent word count filtering across all speech scenarios
10
+ * - Proper handling of empty strings, undefined, and short speech
11
+ * - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
12
+ */
13
+ import { describe, expect, it } from 'vitest';
14
+ import { splitWords } from '../tokenize/basic/word.js';
15
+
16
+ describe('Interruption Detection - Word Counting', () => {
17
+ describe('Word Splitting Behavior', () => {
18
+ it('should count empty string as 0 words', () => {
19
+ const text = '';
20
+ const wordCount = splitWords(text, true).length;
21
+ expect(wordCount).toBe(0);
22
+ });
23
+
24
+ it('should count single word correctly', () => {
25
+ const text = 'hello';
26
+ const wordCount = splitWords(text, true).length;
27
+ expect(wordCount).toBe(1);
28
+ });
29
+
30
+ it('should count two words correctly', () => {
31
+ const text = 'hello world';
32
+ const wordCount = splitWords(text, true).length;
33
+ expect(wordCount).toBe(2);
34
+ });
35
+
36
+ it('should count multiple words correctly', () => {
37
+ const text = 'hello this is a full sentence';
38
+ const wordCount = splitWords(text, true).length;
39
+ expect(wordCount).toBe(6);
40
+ });
41
+
42
+ it('should handle punctuation correctly', () => {
43
+ const text = 'hello, world!';
44
+ const wordCount = splitWords(text, true).length;
45
+ expect(wordCount).toBe(2);
46
+ });
47
+
48
+ it('should handle multiple spaces between words', () => {
49
+ const text = 'hello world';
50
+ const wordCount = splitWords(text, true).length;
51
+ expect(wordCount).toBe(2);
52
+ });
53
+
54
+ it('should count whitespace-only string as 0 words', () => {
55
+ const text = ' ';
56
+ const wordCount = splitWords(text, true).length;
57
+ expect(wordCount).toBe(0);
58
+ });
59
+
60
+ it('should handle leading and trailing whitespace', () => {
61
+ const text = ' hello world ';
62
+ const wordCount = splitWords(text, true).length;
63
+ expect(wordCount).toBe(2);
64
+ });
65
+ });
66
+
67
+ describe('Integration: Full Interruption Check Logic', () => {
68
+ it('should block interruption for empty transcript with threshold 2', () => {
69
+ const text = '';
70
+ const minInterruptionWords = 2;
71
+
72
+ const normalizedText = text ?? '';
73
+ const wordCount = splitWords(normalizedText, true).length;
74
+ const shouldBlock = wordCount < minInterruptionWords;
75
+
76
+ expect(normalizedText).toBe('');
77
+ expect(wordCount).toBe(0);
78
+ expect(shouldBlock).toBe(true);
79
+ });
80
+
81
+ it('should block interruption for undefined transcript with threshold 2', () => {
82
+ const text: string | undefined = undefined;
83
+ const minInterruptionWords = 2;
84
+
85
+ const normalizedText = text ?? '';
86
+ const wordCount = splitWords(normalizedText, true).length;
87
+ const shouldBlock = wordCount < minInterruptionWords;
88
+
89
+ expect(normalizedText).toBe('');
90
+ expect(wordCount).toBe(0);
91
+ expect(shouldBlock).toBe(true);
92
+ });
93
+
94
+ it('should block interruption for single word with threshold 2', () => {
95
+ const text = 'hello';
96
+ const minInterruptionWords = 2;
97
+
98
+ const normalizedText = text ?? '';
99
+ const wordCount = splitWords(normalizedText, true).length;
100
+ const shouldBlock = wordCount < minInterruptionWords;
101
+
102
+ expect(normalizedText).toBe('hello');
103
+ expect(wordCount).toBe(1);
104
+ expect(shouldBlock).toBe(true);
105
+ });
106
+
107
+ it('should allow interruption when word count exactly meets threshold', () => {
108
+ const text = 'hello world';
109
+ const minInterruptionWords = 2;
110
+
111
+ const normalizedText = text ?? '';
112
+ const wordCount = splitWords(normalizedText, true).length;
113
+ const shouldBlock = wordCount < minInterruptionWords;
114
+
115
+ expect(normalizedText).toBe('hello world');
116
+ expect(wordCount).toBe(2);
117
+ expect(shouldBlock).toBe(false);
118
+ });
119
+
120
+ it('should allow interruption when word count exceeds threshold', () => {
121
+ const text = 'hello this is a full sentence';
122
+ const minInterruptionWords = 2;
123
+
124
+ const normalizedText = text ?? '';
125
+ const wordCount = splitWords(normalizedText, true).length;
126
+ const shouldBlock = wordCount < minInterruptionWords;
127
+
128
+ expect(normalizedText).toBe('hello this is a full sentence');
129
+ expect(wordCount).toBe(6);
130
+ expect(shouldBlock).toBe(false);
131
+ });
132
+
133
+ it('should apply consistent word counting logic in both methods', () => {
134
+ const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
135
+ const threshold = 2;
136
+
137
+ transcripts.forEach((transcript) => {
138
+ const text1 = transcript;
139
+ const normalizedText1 = text1 ?? '';
140
+ const wordCount1 = splitWords(normalizedText1, true).length;
141
+ const shouldBlock1 = wordCount1 < threshold;
142
+
143
+ const wordCount2 = splitWords(transcript, true).length;
144
+ const shouldBlock2 = wordCount2 < threshold;
145
+
146
+ expect(wordCount1).toBe(wordCount2);
147
+ expect(shouldBlock1).toBe(shouldBlock2);
148
+ });
149
+ });
150
+ });
151
+ });
@@ -369,6 +369,10 @@ export class RoomIO {
369
369
  return this.transcriptionSynchronizer.textOutput;
370
370
  }
371
371
 
372
+ get isParticipantAvailable(): boolean {
373
+ return this.participantAvailableFuture.done;
374
+ }
375
+
372
376
  /** Switch to a different participant */
373
377
  setParticipant(participantIdentity: string | null) {
374
378
  this.logger.debug({ participantIdentity }, 'setting participant');