@livekit/agents 1.0.16 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/dist/inference/llm.cjs +35 -13
  2. package/dist/inference/llm.cjs.map +1 -1
  3. package/dist/inference/llm.d.cts +10 -5
  4. package/dist/inference/llm.d.ts +10 -5
  5. package/dist/inference/llm.d.ts.map +1 -1
  6. package/dist/inference/llm.js +35 -13
  7. package/dist/inference/llm.js.map +1 -1
  8. package/dist/llm/chat_context.d.cts +1 -1
  9. package/dist/llm/chat_context.d.ts +1 -1
  10. package/dist/llm/llm.cjs.map +1 -1
  11. package/dist/llm/llm.d.cts +1 -1
  12. package/dist/llm/llm.d.ts +1 -1
  13. package/dist/llm/llm.d.ts.map +1 -1
  14. package/dist/llm/llm.js.map +1 -1
  15. package/dist/llm/provider_format/google.cjs.map +1 -1
  16. package/dist/llm/provider_format/google.d.cts +1 -1
  17. package/dist/llm/provider_format/google.d.ts +1 -1
  18. package/dist/llm/provider_format/google.d.ts.map +1 -1
  19. package/dist/llm/provider_format/google.js.map +1 -1
  20. package/dist/llm/provider_format/index.d.cts +1 -1
  21. package/dist/llm/provider_format/index.d.ts +1 -1
  22. package/dist/llm/provider_format/index.d.ts.map +1 -1
  23. package/dist/llm/realtime.cjs.map +1 -1
  24. package/dist/llm/realtime.d.cts +4 -0
  25. package/dist/llm/realtime.d.ts +4 -0
  26. package/dist/llm/realtime.d.ts.map +1 -1
  27. package/dist/llm/realtime.js.map +1 -1
  28. package/dist/llm/utils.cjs +2 -2
  29. package/dist/llm/utils.cjs.map +1 -1
  30. package/dist/llm/utils.d.cts +1 -1
  31. package/dist/llm/utils.d.ts +1 -1
  32. package/dist/llm/utils.d.ts.map +1 -1
  33. package/dist/llm/utils.js +2 -2
  34. package/dist/llm/utils.js.map +1 -1
  35. package/dist/llm/zod-utils.cjs +6 -3
  36. package/dist/llm/zod-utils.cjs.map +1 -1
  37. package/dist/llm/zod-utils.d.cts +1 -1
  38. package/dist/llm/zod-utils.d.ts +1 -1
  39. package/dist/llm/zod-utils.d.ts.map +1 -1
  40. package/dist/llm/zod-utils.js +6 -3
  41. package/dist/llm/zod-utils.js.map +1 -1
  42. package/dist/llm/zod-utils.test.cjs +83 -0
  43. package/dist/llm/zod-utils.test.cjs.map +1 -1
  44. package/dist/llm/zod-utils.test.js +83 -0
  45. package/dist/llm/zod-utils.test.js.map +1 -1
  46. package/dist/stt/stt.cjs +0 -1
  47. package/dist/stt/stt.cjs.map +1 -1
  48. package/dist/stt/stt.d.ts.map +1 -1
  49. package/dist/stt/stt.js +0 -1
  50. package/dist/stt/stt.js.map +1 -1
  51. package/dist/tts/tts.cjs +2 -4
  52. package/dist/tts/tts.cjs.map +1 -1
  53. package/dist/tts/tts.d.ts.map +1 -1
  54. package/dist/tts/tts.js +3 -5
  55. package/dist/tts/tts.js.map +1 -1
  56. package/dist/utils.cjs.map +1 -1
  57. package/dist/utils.d.cts +7 -0
  58. package/dist/utils.d.ts +7 -0
  59. package/dist/utils.d.ts.map +1 -1
  60. package/dist/utils.js.map +1 -1
  61. package/dist/voice/agent_activity.cjs +69 -20
  62. package/dist/voice/agent_activity.cjs.map +1 -1
  63. package/dist/voice/agent_activity.d.ts.map +1 -1
  64. package/dist/voice/agent_activity.js +69 -20
  65. package/dist/voice/agent_activity.js.map +1 -1
  66. package/dist/voice/agent_session.cjs +40 -1
  67. package/dist/voice/agent_session.cjs.map +1 -1
  68. package/dist/voice/agent_session.d.cts +5 -0
  69. package/dist/voice/agent_session.d.ts +5 -0
  70. package/dist/voice/agent_session.d.ts.map +1 -1
  71. package/dist/voice/agent_session.js +40 -1
  72. package/dist/voice/agent_session.js.map +1 -1
  73. package/dist/voice/interruption_detection.test.cjs +114 -0
  74. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  75. package/dist/voice/interruption_detection.test.js +113 -0
  76. package/dist/voice/interruption_detection.test.js.map +1 -0
  77. package/dist/voice/room_io/room_io.cjs +3 -0
  78. package/dist/voice/room_io/room_io.cjs.map +1 -1
  79. package/dist/voice/room_io/room_io.d.cts +1 -0
  80. package/dist/voice/room_io/room_io.d.ts +1 -0
  81. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  82. package/dist/voice/room_io/room_io.js +3 -0
  83. package/dist/voice/room_io/room_io.js.map +1 -1
  84. package/package.json +3 -3
  85. package/src/inference/llm.ts +53 -21
  86. package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
  87. package/src/llm/llm.ts +1 -1
  88. package/src/llm/provider_format/google.ts +4 -4
  89. package/src/llm/realtime.ts +8 -1
  90. package/src/llm/utils.ts +7 -2
  91. package/src/llm/zod-utils.test.ts +101 -0
  92. package/src/llm/zod-utils.ts +12 -3
  93. package/src/stt/stt.ts +2 -1
  94. package/src/tts/tts.ts +7 -5
  95. package/src/utils.ts +17 -0
  96. package/src/voice/agent_activity.ts +96 -24
  97. package/src/voice/agent_session.ts +54 -0
  98. package/src/voice/interruption_detection.test.ts +151 -0
  99. package/src/voice/room_io/room_io.ts +4 -0
@@ -235,6 +235,14 @@ export class AgentActivity implements RecognitionHooks {
235
235
  } catch (error) {
236
236
  this.logger.error(error, 'failed to update the tools');
237
237
  }
238
+
239
+ if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
240
+ this.logger.error(
241
+ 'audio output is enabled but RealtimeModel has no audio modality ' +
242
+ 'and no TTS is set. Either enable audio modality in the RealtimeModel ' +
243
+ 'or set a TTS model.',
244
+ );
245
+ }
238
246
  } else if (this.llm instanceof LLM) {
239
247
  try {
240
248
  updateInstructions({
@@ -625,11 +633,21 @@ export class AgentActivity implements RecognitionHooks {
625
633
  return;
626
634
  }
627
635
 
636
+ // Refactored interruption word count check:
637
+ // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
638
+ // - Apply check to all STT results: empty string, undefined, or any length
639
+ // - This ensures consistent behavior across all interruption scenarios
628
640
  if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
629
641
  const text = this.audioRecognition.currentTranscript;
630
-
631
642
  // TODO(shubhra): better word splitting for multi-language
632
- if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
643
+
644
+ // Normalize text: convert undefined/null to empty string for consistent word counting
645
+ const normalizedText = text ?? '';
646
+ const wordCount = splitWords(normalizedText, true).length;
647
+
648
+ // Only allow interruption if word count meets or exceeds minInterruptionWords
649
+ // This applies to all cases: empty strings, partial speech, and full speech
650
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
633
651
  return;
634
652
  }
635
653
  }
@@ -767,19 +785,30 @@ export class AgentActivity implements RecognitionHooks {
767
785
  return true;
768
786
  }
769
787
 
788
+ // Refactored interruption word count check for consistency with onVADInferenceDone:
789
+ // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
790
+ // - Use consistent word splitting logic with splitWords (matching onVADInferenceDone pattern)
770
791
  if (
771
792
  this.stt &&
772
793
  this.turnDetection !== 'manual' &&
773
794
  this._currentSpeech &&
774
795
  this._currentSpeech.allowInterruptions &&
775
796
  !this._currentSpeech.interrupted &&
776
- this.agentSession.options.minInterruptionWords > 0 &&
777
- info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
797
+ this.agentSession.options.minInterruptionWords > 0
778
798
  ) {
779
- // avoid interruption if the new_transcript is too short
780
- this.cancelPreemptiveGeneration();
781
- this.logger.info('skipping user input, new_transcript is too short');
782
- return false;
799
+ const wordCount = splitWords(info.newTranscript, true).length;
800
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
801
+ // avoid interruption if the new_transcript contains fewer words than minInterruptionWords
802
+ this.cancelPreemptiveGeneration();
803
+ this.logger.info(
804
+ {
805
+ wordCount,
806
+ minInterruptionWords: this.agentSession.options.minInterruptionWords,
807
+ },
808
+ 'skipping user input, word count below minimum interruption threshold',
809
+ );
810
+ return false;
811
+ }
783
812
  }
784
813
 
785
814
  const oldTask = this._userTurnCompletedTask;
@@ -1612,7 +1641,7 @@ export class AgentActivity implements RecognitionHooks {
1612
1641
 
1613
1642
  const readMessages = async (
1614
1643
  abortController: AbortController,
1615
- outputs: Array<[string, _TextOut | null, _AudioOut | null]>,
1644
+ outputs: Array<[string, _TextOut | null, _AudioOut | null, ('text' | 'audio')[] | undefined]>,
1616
1645
  ) => {
1617
1646
  replyAbortController.signal.addEventListener('abort', () => abortController.abort(), {
1618
1647
  once: true,
@@ -1627,7 +1656,25 @@ export class AgentActivity implements RecognitionHooks {
1627
1656
  );
1628
1657
  break;
1629
1658
  }
1630
- const trNodeResult = await this.agent.transcriptionNode(msg.textStream, modelSettings);
1659
+
1660
+ const msgModalities = msg.modalities ? await msg.modalities : undefined;
1661
+ let ttsTextInput: ReadableStream<string> | null = null;
1662
+ let trTextInput: ReadableStream<string>;
1663
+
1664
+ if (msgModalities && !msgModalities.includes('audio') && this.tts) {
1665
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1666
+ this.logger.warn(
1667
+ 'text response received from realtime API, falling back to use a TTS model.',
1668
+ );
1669
+ }
1670
+ const [_ttsTextInput, _trTextInput] = msg.textStream.tee();
1671
+ ttsTextInput = _ttsTextInput;
1672
+ trTextInput = _trTextInput;
1673
+ } else {
1674
+ trTextInput = msg.textStream;
1675
+ }
1676
+
1677
+ const trNodeResult = await this.agent.transcriptionNode(trTextInput, modelSettings);
1631
1678
  let textOut: _TextOut | null = null;
1632
1679
  if (trNodeResult) {
1633
1680
  const [textForwardTask, _textOut] = performTextForwarding(
@@ -1638,30 +1685,51 @@ export class AgentActivity implements RecognitionHooks {
1638
1685
  forwardTasks.push(textForwardTask);
1639
1686
  textOut = _textOut;
1640
1687
  }
1688
+
1641
1689
  let audioOut: _AudioOut | null = null;
1642
1690
  if (audioOutput) {
1643
- const realtimeAudio = await this.agent.realtimeAudioOutputNode(
1644
- msg.audioStream,
1645
- modelSettings,
1646
- );
1647
- if (realtimeAudio) {
1691
+ let realtimeAudioResult: ReadableStream<AudioFrame> | null = null;
1692
+
1693
+ if (ttsTextInput) {
1694
+ const [ttsTask, ttsStream] = performTTSInference(
1695
+ (...args) => this.agent.ttsNode(...args),
1696
+ ttsTextInput,
1697
+ modelSettings,
1698
+ abortController,
1699
+ );
1700
+ tasks.push(ttsTask);
1701
+ realtimeAudioResult = ttsStream;
1702
+ } else if (msgModalities && msgModalities.includes('audio')) {
1703
+ realtimeAudioResult = await this.agent.realtimeAudioOutputNode(
1704
+ msg.audioStream,
1705
+ modelSettings,
1706
+ );
1707
+ } else if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1708
+ this.logger.error(
1709
+ 'Text message received from Realtime API with audio modality. ' +
1710
+ 'This usually happens when text chat context is synced to the API. ' +
1711
+ 'Try to add a TTS model as fallback or use text modality with TTS instead.',
1712
+ );
1713
+ } else {
1714
+ this.logger.warn(
1715
+ 'audio output is enabled but neither tts nor realtime audio is available',
1716
+ );
1717
+ }
1718
+
1719
+ if (realtimeAudioResult) {
1648
1720
  const [forwardTask, _audioOut] = performAudioForwarding(
1649
- realtimeAudio,
1721
+ realtimeAudioResult,
1650
1722
  audioOutput,
1651
1723
  abortController,
1652
1724
  );
1653
1725
  forwardTasks.push(forwardTask);
1654
1726
  audioOut = _audioOut;
1655
1727
  audioOut.firstFrameFut.await.finally(onFirstFrame);
1656
- } else {
1657
- this.logger.warn(
1658
- 'audio output is enabled but neither tts nor realtime audio is available',
1659
- );
1660
1728
  }
1661
1729
  } else if (textOut) {
1662
1730
  textOut.firstTextFut.await.finally(onFirstFrame);
1663
1731
  }
1664
- outputs.push([msg.messageId, textOut, audioOut]);
1732
+ outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
1665
1733
  }
1666
1734
  await waitFor(forwardTasks);
1667
1735
  } catch (error) {
@@ -1671,7 +1739,9 @@ export class AgentActivity implements RecognitionHooks {
1671
1739
  }
1672
1740
  };
1673
1741
 
1674
- const messageOutputs: Array<[string, _TextOut | null, _AudioOut | null]> = [];
1742
+ const messageOutputs: Array<
1743
+ [string, _TextOut | null, _AudioOut | null, ('text' | 'audio')[] | undefined]
1744
+ > = [];
1675
1745
  const tasks = [
1676
1746
  Task.from(
1677
1747
  (controller) => readMessages(controller, messageOutputs),
@@ -1750,7 +1820,7 @@ export class AgentActivity implements RecognitionHooks {
1750
1820
 
1751
1821
  if (messageOutputs.length > 0) {
1752
1822
  // there should be only one message
1753
- const [msgId, textOut, audioOut] = messageOutputs[0]!;
1823
+ const [msgId, textOut, audioOut, msgModalities] = messageOutputs[0]!;
1754
1824
  let forwardedText = textOut?.text || '';
1755
1825
 
1756
1826
  if (audioOutput) {
@@ -1775,6 +1845,8 @@ export class AgentActivity implements RecognitionHooks {
1775
1845
  this.realtimeSession.truncate({
1776
1846
  messageId: msgId,
1777
1847
  audioEndMs: Math.floor(playbackPosition),
1848
+ modalities: msgModalities,
1849
+ audioTranscript: forwardedText,
1778
1850
  });
1779
1851
  }
1780
1852
 
@@ -1805,7 +1877,7 @@ export class AgentActivity implements RecognitionHooks {
1805
1877
 
1806
1878
  if (messageOutputs.length > 0) {
1807
1879
  // there should be only one message
1808
- const [msgId, textOut, _] = messageOutputs[0]!;
1880
+ const [msgId, textOut, _, __] = messageOutputs[0]!;
1809
1881
  const message = ChatMessage.create({
1810
1882
  role: 'assistant',
1811
1883
  content: textOut?.text || '',
@@ -58,6 +58,7 @@ export interface VoiceOptions {
58
58
  maxEndpointingDelay: number;
59
59
  maxToolSteps: number;
60
60
  preemptiveGeneration: boolean;
61
+ userAwayTimeout?: number | null;
61
62
  }
62
63
 
63
64
  const defaultVoiceOptions: VoiceOptions = {
@@ -69,6 +70,7 @@ const defaultVoiceOptions: VoiceOptions = {
69
70
  maxEndpointingDelay: 6000,
70
71
  maxToolSteps: 3,
71
72
  preemptiveGeneration: false,
73
+ userAwayTimeout: 15.0,
72
74
  } as const;
73
75
 
74
76
  export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
@@ -123,6 +125,7 @@ export class AgentSession<
123
125
  private _output: AgentOutput;
124
126
 
125
127
  private closingTask: Promise<void> | null = null;
128
+ private userAwayTimer: NodeJS.Timeout | null = null;
126
129
 
127
130
  constructor(opts: AgentSessionOptions<UserData>) {
128
131
  super();
@@ -167,6 +170,8 @@ export class AgentSession<
167
170
  // This is the "global" chat context, it holds the entire conversation history
168
171
  this._chatCtx = ChatContext.empty();
169
172
  this.options = { ...defaultVoiceOptions, ...voiceOptions };
173
+
174
+ this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
170
175
  }
171
176
 
172
177
  get input(): AgentInput {
@@ -416,6 +421,14 @@ export class AgentSession<
416
421
 
417
422
  const oldState = this._agentState;
418
423
  this._agentState = state;
424
+
425
+ // Handle user away timer based on state changes
426
+ if (state === 'listening' && this.userState === 'listening') {
427
+ this._setUserAwayTimer();
428
+ } else {
429
+ this._cancelUserAwayTimer();
430
+ }
431
+
419
432
  this.emit(
420
433
  AgentSessionEventTypes.AgentStateChanged,
421
434
  createAgentStateChangedEvent(oldState, state),
@@ -430,6 +443,14 @@ export class AgentSession<
430
443
 
431
444
  const oldState = this.userState;
432
445
  this.userState = state;
446
+
447
+ // Handle user away timer based on state changes
448
+ if (state === 'listening' && this._agentState === 'listening') {
449
+ this._setUserAwayTimer();
450
+ } else {
451
+ this._cancelUserAwayTimer();
452
+ }
453
+
433
454
  this.emit(
434
455
  AgentSessionEventTypes.UserStateChanged,
435
456
  createUserStateChangedEvent(oldState, state),
@@ -451,6 +472,37 @@ export class AgentSession<
451
472
 
452
473
  private onTextOutputChanged(): void {}
453
474
 
475
+ private _setUserAwayTimer(): void {
476
+ this._cancelUserAwayTimer();
477
+
478
+ if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {
479
+ return;
480
+ }
481
+
482
+ if (this.roomIO && !this.roomIO.isParticipantAvailable) {
483
+ return;
484
+ }
485
+
486
+ this.userAwayTimer = setTimeout(() => {
487
+ this.logger.debug('User away timeout triggered');
488
+ this._updateUserState('away');
489
+ }, this.options.userAwayTimeout * 1000);
490
+ }
491
+
492
+ private _cancelUserAwayTimer(): void {
493
+ if (this.userAwayTimer !== null) {
494
+ clearTimeout(this.userAwayTimer);
495
+ this.userAwayTimer = null;
496
+ }
497
+ }
498
+
499
+ private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
500
+ if (this.userState === 'away' && ev.isFinal) {
501
+ this.logger.debug('User returned from away state due to speech input');
502
+ this._updateUserState('listening');
503
+ }
504
+ }
505
+
454
506
  private async closeImpl(
455
507
  reason: CloseReason,
456
508
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
@@ -460,6 +512,8 @@ export class AgentSession<
460
512
  return;
461
513
  }
462
514
 
515
+ this._cancelUserAwayTimer();
516
+
463
517
  if (this.activity) {
464
518
  if (!drain) {
465
519
  try {
@@ -0,0 +1,151 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ /**
6
+ * Unit tests for interruption detection logic in AgentActivity.
7
+ *
8
+ * Tests the refactored minInterruptionWords check which ensures:
9
+ * - Consistent word count filtering across all speech scenarios
10
+ * - Proper handling of empty strings, undefined, and short speech
11
+ * - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
12
+ */
13
+ import { describe, expect, it } from 'vitest';
14
+ import { splitWords } from '../tokenize/basic/word.js';
15
+
16
+ describe('Interruption Detection - Word Counting', () => {
17
+ describe('Word Splitting Behavior', () => {
18
+ it('should count empty string as 0 words', () => {
19
+ const text = '';
20
+ const wordCount = splitWords(text, true).length;
21
+ expect(wordCount).toBe(0);
22
+ });
23
+
24
+ it('should count single word correctly', () => {
25
+ const text = 'hello';
26
+ const wordCount = splitWords(text, true).length;
27
+ expect(wordCount).toBe(1);
28
+ });
29
+
30
+ it('should count two words correctly', () => {
31
+ const text = 'hello world';
32
+ const wordCount = splitWords(text, true).length;
33
+ expect(wordCount).toBe(2);
34
+ });
35
+
36
+ it('should count multiple words correctly', () => {
37
+ const text = 'hello this is a full sentence';
38
+ const wordCount = splitWords(text, true).length;
39
+ expect(wordCount).toBe(6);
40
+ });
41
+
42
+ it('should handle punctuation correctly', () => {
43
+ const text = 'hello, world!';
44
+ const wordCount = splitWords(text, true).length;
45
+ expect(wordCount).toBe(2);
46
+ });
47
+
48
+ it('should handle multiple spaces between words', () => {
49
+ const text = 'hello world';
50
+ const wordCount = splitWords(text, true).length;
51
+ expect(wordCount).toBe(2);
52
+ });
53
+
54
+ it('should count whitespace-only string as 0 words', () => {
55
+ const text = ' ';
56
+ const wordCount = splitWords(text, true).length;
57
+ expect(wordCount).toBe(0);
58
+ });
59
+
60
+ it('should handle leading and trailing whitespace', () => {
61
+ const text = ' hello world ';
62
+ const wordCount = splitWords(text, true).length;
63
+ expect(wordCount).toBe(2);
64
+ });
65
+ });
66
+
67
+ describe('Integration: Full Interruption Check Logic', () => {
68
+ it('should block interruption for empty transcript with threshold 2', () => {
69
+ const text = '';
70
+ const minInterruptionWords = 2;
71
+
72
+ const normalizedText = text ?? '';
73
+ const wordCount = splitWords(normalizedText, true).length;
74
+ const shouldBlock = wordCount < minInterruptionWords;
75
+
76
+ expect(normalizedText).toBe('');
77
+ expect(wordCount).toBe(0);
78
+ expect(shouldBlock).toBe(true);
79
+ });
80
+
81
+ it('should block interruption for undefined transcript with threshold 2', () => {
82
+ const text: string | undefined = undefined;
83
+ const minInterruptionWords = 2;
84
+
85
+ const normalizedText = text ?? '';
86
+ const wordCount = splitWords(normalizedText, true).length;
87
+ const shouldBlock = wordCount < minInterruptionWords;
88
+
89
+ expect(normalizedText).toBe('');
90
+ expect(wordCount).toBe(0);
91
+ expect(shouldBlock).toBe(true);
92
+ });
93
+
94
+ it('should block interruption for single word with threshold 2', () => {
95
+ const text = 'hello';
96
+ const minInterruptionWords = 2;
97
+
98
+ const normalizedText = text ?? '';
99
+ const wordCount = splitWords(normalizedText, true).length;
100
+ const shouldBlock = wordCount < minInterruptionWords;
101
+
102
+ expect(normalizedText).toBe('hello');
103
+ expect(wordCount).toBe(1);
104
+ expect(shouldBlock).toBe(true);
105
+ });
106
+
107
+ it('should allow interruption when word count exactly meets threshold', () => {
108
+ const text = 'hello world';
109
+ const minInterruptionWords = 2;
110
+
111
+ const normalizedText = text ?? '';
112
+ const wordCount = splitWords(normalizedText, true).length;
113
+ const shouldBlock = wordCount < minInterruptionWords;
114
+
115
+ expect(normalizedText).toBe('hello world');
116
+ expect(wordCount).toBe(2);
117
+ expect(shouldBlock).toBe(false);
118
+ });
119
+
120
+ it('should allow interruption when word count exceeds threshold', () => {
121
+ const text = 'hello this is a full sentence';
122
+ const minInterruptionWords = 2;
123
+
124
+ const normalizedText = text ?? '';
125
+ const wordCount = splitWords(normalizedText, true).length;
126
+ const shouldBlock = wordCount < minInterruptionWords;
127
+
128
+ expect(normalizedText).toBe('hello this is a full sentence');
129
+ expect(wordCount).toBe(6);
130
+ expect(shouldBlock).toBe(false);
131
+ });
132
+
133
+ it('should apply consistent word counting logic in both methods', () => {
134
+ const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
135
+ const threshold = 2;
136
+
137
+ transcripts.forEach((transcript) => {
138
+ const text1 = transcript;
139
+ const normalizedText1 = text1 ?? '';
140
+ const wordCount1 = splitWords(normalizedText1, true).length;
141
+ const shouldBlock1 = wordCount1 < threshold;
142
+
143
+ const wordCount2 = splitWords(transcript, true).length;
144
+ const shouldBlock2 = wordCount2 < threshold;
145
+
146
+ expect(wordCount1).toBe(wordCount2);
147
+ expect(shouldBlock1).toBe(shouldBlock2);
148
+ });
149
+ });
150
+ });
151
+ });
@@ -369,6 +369,10 @@ export class RoomIO {
369
369
  return this.transcriptionSynchronizer.textOutput;
370
370
  }
371
371
 
372
+ get isParticipantAvailable(): boolean {
373
+ return this.participantAvailableFuture.done;
374
+ }
375
+
372
376
  /** Switch to a different participant */
373
377
  setParticipant(participantIdentity: string | null) {
374
378
  this.logger.debug({ participantIdentity }, 'setting participant');