@livekit/agents 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.d.cts +12 -12
  9. package/dist/inference/api_protos.d.ts +12 -12
  10. package/dist/inference/llm.cjs +35 -13
  11. package/dist/inference/llm.cjs.map +1 -1
  12. package/dist/inference/llm.d.cts +10 -5
  13. package/dist/inference/llm.d.ts +10 -5
  14. package/dist/inference/llm.d.ts.map +1 -1
  15. package/dist/inference/llm.js +35 -13
  16. package/dist/inference/llm.js.map +1 -1
  17. package/dist/inference/tts.cjs +1 -1
  18. package/dist/inference/tts.cjs.map +1 -1
  19. package/dist/inference/tts.js +1 -1
  20. package/dist/inference/tts.js.map +1 -1
  21. package/dist/ipc/job_proc_lazy_main.cjs +6 -2
  22. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  23. package/dist/ipc/job_proc_lazy_main.js +6 -2
  24. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  25. package/dist/job.cjs +31 -0
  26. package/dist/job.cjs.map +1 -1
  27. package/dist/job.d.cts +6 -0
  28. package/dist/job.d.ts +6 -0
  29. package/dist/job.d.ts.map +1 -1
  30. package/dist/job.js +31 -0
  31. package/dist/job.js.map +1 -1
  32. package/dist/llm/chat_context.cjs +33 -0
  33. package/dist/llm/chat_context.cjs.map +1 -1
  34. package/dist/llm/chat_context.d.cts +22 -2
  35. package/dist/llm/chat_context.d.ts +22 -2
  36. package/dist/llm/chat_context.d.ts.map +1 -1
  37. package/dist/llm/chat_context.js +32 -0
  38. package/dist/llm/chat_context.js.map +1 -1
  39. package/dist/llm/index.cjs +2 -0
  40. package/dist/llm/index.cjs.map +1 -1
  41. package/dist/llm/index.d.cts +1 -1
  42. package/dist/llm/index.d.ts +1 -1
  43. package/dist/llm/index.d.ts.map +1 -1
  44. package/dist/llm/index.js +2 -0
  45. package/dist/llm/index.js.map +1 -1
  46. package/dist/llm/llm.cjs.map +1 -1
  47. package/dist/llm/llm.d.cts +1 -1
  48. package/dist/llm/llm.d.ts +1 -1
  49. package/dist/llm/llm.d.ts.map +1 -1
  50. package/dist/llm/llm.js.map +1 -1
  51. package/dist/llm/provider_format/google.cjs.map +1 -1
  52. package/dist/llm/provider_format/google.d.cts +1 -1
  53. package/dist/llm/provider_format/google.d.ts +1 -1
  54. package/dist/llm/provider_format/google.d.ts.map +1 -1
  55. package/dist/llm/provider_format/google.js.map +1 -1
  56. package/dist/llm/provider_format/google.test.cjs +48 -0
  57. package/dist/llm/provider_format/google.test.cjs.map +1 -1
  58. package/dist/llm/provider_format/google.test.js +54 -1
  59. package/dist/llm/provider_format/google.test.js.map +1 -1
  60. package/dist/llm/provider_format/index.d.cts +1 -1
  61. package/dist/llm/provider_format/index.d.ts +1 -1
  62. package/dist/llm/provider_format/index.d.ts.map +1 -1
  63. package/dist/llm/provider_format/openai.cjs +1 -2
  64. package/dist/llm/provider_format/openai.cjs.map +1 -1
  65. package/dist/llm/provider_format/openai.js +1 -2
  66. package/dist/llm/provider_format/openai.js.map +1 -1
  67. package/dist/llm/provider_format/openai.test.cjs +32 -0
  68. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  69. package/dist/llm/provider_format/openai.test.js +38 -1
  70. package/dist/llm/provider_format/openai.test.js.map +1 -1
  71. package/dist/llm/realtime.cjs.map +1 -1
  72. package/dist/llm/realtime.d.cts +4 -0
  73. package/dist/llm/realtime.d.ts +4 -0
  74. package/dist/llm/realtime.d.ts.map +1 -1
  75. package/dist/llm/realtime.js.map +1 -1
  76. package/dist/llm/utils.cjs +2 -2
  77. package/dist/llm/utils.cjs.map +1 -1
  78. package/dist/llm/utils.d.cts +1 -1
  79. package/dist/llm/utils.d.ts +1 -1
  80. package/dist/llm/utils.d.ts.map +1 -1
  81. package/dist/llm/utils.js +2 -2
  82. package/dist/llm/utils.js.map +1 -1
  83. package/dist/llm/zod-utils.cjs +6 -3
  84. package/dist/llm/zod-utils.cjs.map +1 -1
  85. package/dist/llm/zod-utils.d.cts +1 -1
  86. package/dist/llm/zod-utils.d.ts +1 -1
  87. package/dist/llm/zod-utils.d.ts.map +1 -1
  88. package/dist/llm/zod-utils.js +6 -3
  89. package/dist/llm/zod-utils.js.map +1 -1
  90. package/dist/llm/zod-utils.test.cjs +83 -0
  91. package/dist/llm/zod-utils.test.cjs.map +1 -1
  92. package/dist/llm/zod-utils.test.js +83 -0
  93. package/dist/llm/zod-utils.test.js.map +1 -1
  94. package/dist/log.cjs.map +1 -1
  95. package/dist/log.d.ts.map +1 -1
  96. package/dist/log.js.map +1 -1
  97. package/dist/telemetry/index.cjs +51 -0
  98. package/dist/telemetry/index.cjs.map +1 -0
  99. package/dist/telemetry/index.d.cts +4 -0
  100. package/dist/telemetry/index.d.ts +4 -0
  101. package/dist/telemetry/index.d.ts.map +1 -0
  102. package/dist/telemetry/index.js +12 -0
  103. package/dist/telemetry/index.js.map +1 -0
  104. package/dist/telemetry/trace_types.cjs +191 -0
  105. package/dist/telemetry/trace_types.cjs.map +1 -0
  106. package/dist/telemetry/trace_types.d.cts +56 -0
  107. package/dist/telemetry/trace_types.d.ts +56 -0
  108. package/dist/telemetry/trace_types.d.ts.map +1 -0
  109. package/dist/telemetry/trace_types.js +113 -0
  110. package/dist/telemetry/trace_types.js.map +1 -0
  111. package/dist/telemetry/traces.cjs +196 -0
  112. package/dist/telemetry/traces.cjs.map +1 -0
  113. package/dist/telemetry/traces.d.cts +97 -0
  114. package/dist/telemetry/traces.d.ts +97 -0
  115. package/dist/telemetry/traces.d.ts.map +1 -0
  116. package/dist/telemetry/traces.js +173 -0
  117. package/dist/telemetry/traces.js.map +1 -0
  118. package/dist/telemetry/utils.cjs +86 -0
  119. package/dist/telemetry/utils.cjs.map +1 -0
  120. package/dist/telemetry/utils.d.cts +5 -0
  121. package/dist/telemetry/utils.d.ts +5 -0
  122. package/dist/telemetry/utils.d.ts.map +1 -0
  123. package/dist/telemetry/utils.js +51 -0
  124. package/dist/telemetry/utils.js.map +1 -0
  125. package/dist/tts/tts.cjs.map +1 -1
  126. package/dist/tts/tts.d.ts.map +1 -1
  127. package/dist/tts/tts.js.map +1 -1
  128. package/dist/utils.cjs.map +1 -1
  129. package/dist/utils.d.cts +7 -0
  130. package/dist/utils.d.ts +7 -0
  131. package/dist/utils.d.ts.map +1 -1
  132. package/dist/utils.js.map +1 -1
  133. package/dist/voice/agent.cjs +15 -0
  134. package/dist/voice/agent.cjs.map +1 -1
  135. package/dist/voice/agent.d.cts +4 -1
  136. package/dist/voice/agent.d.ts +4 -1
  137. package/dist/voice/agent.d.ts.map +1 -1
  138. package/dist/voice/agent.js +15 -0
  139. package/dist/voice/agent.js.map +1 -1
  140. package/dist/voice/agent_activity.cjs +71 -20
  141. package/dist/voice/agent_activity.cjs.map +1 -1
  142. package/dist/voice/agent_activity.d.ts.map +1 -1
  143. package/dist/voice/agent_activity.js +71 -20
  144. package/dist/voice/agent_activity.js.map +1 -1
  145. package/dist/voice/agent_session.cjs +69 -2
  146. package/dist/voice/agent_session.cjs.map +1 -1
  147. package/dist/voice/agent_session.d.cts +11 -2
  148. package/dist/voice/agent_session.d.ts +11 -2
  149. package/dist/voice/agent_session.d.ts.map +1 -1
  150. package/dist/voice/agent_session.js +70 -3
  151. package/dist/voice/agent_session.js.map +1 -1
  152. package/dist/voice/audio_recognition.cjs.map +1 -1
  153. package/dist/voice/audio_recognition.d.ts.map +1 -1
  154. package/dist/voice/audio_recognition.js.map +1 -1
  155. package/dist/voice/generation.cjs.map +1 -1
  156. package/dist/voice/generation.d.ts.map +1 -1
  157. package/dist/voice/generation.js.map +1 -1
  158. package/dist/voice/index.cjs +2 -0
  159. package/dist/voice/index.cjs.map +1 -1
  160. package/dist/voice/index.d.cts +1 -0
  161. package/dist/voice/index.d.ts +1 -0
  162. package/dist/voice/index.d.ts.map +1 -1
  163. package/dist/voice/index.js +1 -0
  164. package/dist/voice/index.js.map +1 -1
  165. package/dist/voice/interruption_detection.test.cjs +114 -0
  166. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  167. package/dist/voice/interruption_detection.test.js +113 -0
  168. package/dist/voice/interruption_detection.test.js.map +1 -0
  169. package/dist/voice/report.cjs +69 -0
  170. package/dist/voice/report.cjs.map +1 -0
  171. package/dist/voice/report.d.cts +26 -0
  172. package/dist/voice/report.d.ts +26 -0
  173. package/dist/voice/report.d.ts.map +1 -0
  174. package/dist/voice/report.js +44 -0
  175. package/dist/voice/report.js.map +1 -0
  176. package/dist/voice/room_io/room_io.cjs +3 -0
  177. package/dist/voice/room_io/room_io.cjs.map +1 -1
  178. package/dist/voice/room_io/room_io.d.cts +1 -0
  179. package/dist/voice/room_io/room_io.d.ts +1 -0
  180. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  181. package/dist/voice/room_io/room_io.js +3 -0
  182. package/dist/voice/room_io/room_io.js.map +1 -1
  183. package/package.json +12 -5
  184. package/src/index.ts +2 -1
  185. package/src/inference/llm.ts +53 -21
  186. package/src/inference/tts.ts +1 -1
  187. package/src/ipc/job_proc_lazy_main.ts +10 -2
  188. package/src/job.ts +48 -0
  189. package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
  190. package/src/llm/chat_context.ts +53 -1
  191. package/src/llm/index.ts +1 -0
  192. package/src/llm/llm.ts +3 -1
  193. package/src/llm/provider_format/google.test.ts +72 -1
  194. package/src/llm/provider_format/google.ts +4 -4
  195. package/src/llm/provider_format/openai.test.ts +55 -1
  196. package/src/llm/provider_format/openai.ts +3 -2
  197. package/src/llm/realtime.ts +8 -1
  198. package/src/llm/utils.ts +7 -2
  199. package/src/llm/zod-utils.test.ts +101 -0
  200. package/src/llm/zod-utils.ts +12 -3
  201. package/src/log.ts +1 -0
  202. package/src/telemetry/index.ts +10 -0
  203. package/src/telemetry/trace_types.ts +88 -0
  204. package/src/telemetry/traces.ts +266 -0
  205. package/src/telemetry/utils.ts +61 -0
  206. package/src/tts/tts.ts +4 -0
  207. package/src/utils.ts +17 -0
  208. package/src/voice/agent.ts +22 -0
  209. package/src/voice/agent_activity.ts +102 -24
  210. package/src/voice/agent_session.ts +98 -1
  211. package/src/voice/audio_recognition.ts +2 -0
  212. package/src/voice/generation.ts +3 -0
  213. package/src/voice/index.ts +1 -0
  214. package/src/voice/interruption_detection.test.ts +151 -0
  215. package/src/voice/report.ts +77 -0
  216. package/src/voice/room_io/room_io.ts +4 -0
@@ -14,7 +14,7 @@ import {
14
14
  type TTSModelString,
15
15
  } from '../inference/index.js';
16
16
  import { getJobContext } from '../job.js';
17
- import { ChatContext, ChatMessage } from '../llm/chat_context.js';
17
+ import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
18
18
  import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
19
19
  import type { LLMError } from '../llm/llm.js';
20
20
  import { log } from '../log.js';
@@ -26,6 +26,7 @@ import type { Agent } from './agent.js';
26
26
  import { AgentActivity } from './agent_activity.js';
27
27
  import type { _TurnDetector } from './audio_recognition.js';
28
28
  import {
29
+ type AgentEvent,
29
30
  AgentSessionEventTypes,
30
31
  type AgentState,
31
32
  type AgentStateChangedEvent,
@@ -58,6 +59,7 @@ export interface VoiceOptions {
58
59
  maxEndpointingDelay: number;
59
60
  maxToolSteps: number;
60
61
  preemptiveGeneration: boolean;
62
+ userAwayTimeout?: number | null;
61
63
  }
62
64
 
63
65
  const defaultVoiceOptions: VoiceOptions = {
@@ -69,6 +71,7 @@ const defaultVoiceOptions: VoiceOptions = {
69
71
  maxEndpointingDelay: 6000,
70
72
  maxToolSteps: 3,
71
73
  preemptiveGeneration: false,
74
+ userAwayTimeout: 15.0,
72
75
  } as const;
73
76
 
74
77
  export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
@@ -123,6 +126,10 @@ export class AgentSession<
123
126
  private _output: AgentOutput;
124
127
 
125
128
  private closingTask: Promise<void> | null = null;
129
+ private userAwayTimer: NodeJS.Timeout | null = null;
130
+
131
+ /** @internal */
132
+ _recordedEvents: AgentEvent[] = [];
126
133
 
127
134
  constructor(opts: AgentSessionOptions<UserData>) {
128
135
  super();
@@ -167,6 +174,17 @@ export class AgentSession<
167
174
  // This is the "global" chat context, it holds the entire conversation history
168
175
  this._chatCtx = ChatContext.empty();
169
176
  this.options = { ...defaultVoiceOptions, ...voiceOptions };
177
+
178
+ this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
179
+ }
180
+
181
+ emit<K extends keyof AgentSessionCallbacks>(
182
+ event: K,
183
+ ...args: Parameters<AgentSessionCallbacks[K]>
184
+ ): boolean {
185
+ const eventData = args[0] as AgentEvent;
186
+ this._recordedEvents.push(eventData);
187
+ return super.emit(event, ...args);
170
188
  }
171
189
 
172
190
  get input(): AgentInput {
@@ -194,15 +212,20 @@ export class AgentSession<
194
212
  }
195
213
 
196
214
  async start({
215
+ // TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
216
+ // TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
217
+ // TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
197
218
  agent,
198
219
  room,
199
220
  inputOptions,
200
221
  outputOptions,
222
+ record = true,
201
223
  }: {
202
224
  agent: Agent;
203
225
  room: Room;
204
226
  inputOptions?: Partial<RoomInputOptions>;
205
227
  outputOptions?: Partial<RoomOutputOptions>;
228
+ record?: boolean;
206
229
  }): Promise<void> {
207
230
  if (this.started) {
208
231
  return;
@@ -242,6 +265,17 @@ export class AgentSession<
242
265
  this.logger.debug('Auto-connecting to room via job context');
243
266
  tasks.push(ctx.connect());
244
267
  }
268
+
269
+ if (record) {
270
+ if (ctx._primaryAgentSession === undefined) {
271
+ ctx._primaryAgentSession = this;
272
+ } else {
273
+ throw new Error(
274
+ 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',
275
+ );
276
+ }
277
+ }
278
+
245
279
  // TODO(AJS-265): add shutdown callback to job context
246
280
  tasks.push(this.updateActivity(this.agent));
247
281
 
@@ -336,6 +370,8 @@ export class AgentSession<
336
370
  // TODO(AJS-129): add lock to agent activity core lifecycle
337
371
  this.nextActivity = new AgentActivity(agent, this);
338
372
 
373
+ const previousActivity = this.activity;
374
+
339
375
  if (this.activity) {
340
376
  await this.activity.drain();
341
377
  await this.activity.close();
@@ -344,6 +380,14 @@ export class AgentSession<
344
380
  this.activity = this.nextActivity;
345
381
  this.nextActivity = undefined;
346
382
 
383
+ this._chatCtx.insert(
384
+ new AgentHandoffItem({
385
+ oldAgentId: previousActivity?.agent.id,
386
+ newAgentId: agent.id,
387
+ }),
388
+ );
389
+ this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
390
+
347
391
  await this.activity.start();
348
392
 
349
393
  if (this._input.audio) {
@@ -414,8 +458,18 @@ export class AgentSession<
414
458
  return;
415
459
  }
416
460
 
461
+ // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._agentSpeakingSpan, create tracer.startSpan('agent_speaking') with participant attributes
462
+ // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._agentSpeakingSpan, end and clear this._agentSpeakingSpan
417
463
  const oldState = this._agentState;
418
464
  this._agentState = state;
465
+
466
+ // Handle user away timer based on state changes
467
+ if (state === 'listening' && this.userState === 'listening') {
468
+ this._setUserAwayTimer();
469
+ } else {
470
+ this._cancelUserAwayTimer();
471
+ }
472
+
419
473
  this.emit(
420
474
  AgentSessionEventTypes.AgentStateChanged,
421
475
  createAgentStateChangedEvent(oldState, state),
@@ -428,8 +482,18 @@ export class AgentSession<
428
482
  return;
429
483
  }
430
484
 
485
+ // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._userSpeakingSpan, create tracer.startSpan('user_speaking') with participant attributes
486
+ // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._userSpeakingSpan, end and clear this._userSpeakingSpan
431
487
  const oldState = this.userState;
432
488
  this.userState = state;
489
+
490
+ // Handle user away timer based on state changes
491
+ if (state === 'listening' && this._agentState === 'listening') {
492
+ this._setUserAwayTimer();
493
+ } else {
494
+ this._cancelUserAwayTimer();
495
+ }
496
+
433
497
  this.emit(
434
498
  AgentSessionEventTypes.UserStateChanged,
435
499
  createUserStateChangedEvent(oldState, state),
@@ -451,6 +515,37 @@ export class AgentSession<
451
515
 
452
516
  private onTextOutputChanged(): void {}
453
517
 
518
+ private _setUserAwayTimer(): void {
519
+ this._cancelUserAwayTimer();
520
+
521
+ if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {
522
+ return;
523
+ }
524
+
525
+ if (this.roomIO && !this.roomIO.isParticipantAvailable) {
526
+ return;
527
+ }
528
+
529
+ this.userAwayTimer = setTimeout(() => {
530
+ this.logger.debug('User away timeout triggered');
531
+ this._updateUserState('away');
532
+ }, this.options.userAwayTimeout * 1000);
533
+ }
534
+
535
+ private _cancelUserAwayTimer(): void {
536
+ if (this.userAwayTimer !== null) {
537
+ clearTimeout(this.userAwayTimer);
538
+ this.userAwayTimer = null;
539
+ }
540
+ }
541
+
542
+ private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
543
+ if (this.userState === 'away' && ev.isFinal) {
544
+ this.logger.debug('User returned from away state due to speech input');
545
+ this._updateUserState('listening');
546
+ }
547
+ }
548
+
454
549
  private async closeImpl(
455
550
  reason: CloseReason,
456
551
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
@@ -460,6 +555,8 @@ export class AgentSession<
460
555
  return;
461
556
  }
462
557
 
558
+ this._cancelUserAwayTimer();
559
+
463
560
  if (this.activity) {
464
561
  if (!drain) {
465
562
  try {
@@ -57,6 +57,8 @@ export interface AudioRecognitionOptions {
57
57
  maxEndpointingDelay: number;
58
58
  }
59
59
 
60
+ // TODO(brian): PR3 - Add span: private _userTurnSpan?: Span, create lazily in _ensureUserTurnSpan() method (tracer.startSpan('user_turn') with participant attributes)
61
+ // TODO(brian): PR3 - Add span: 'eou_detection' span when running EOU detection (in runEOUDetection method)
60
62
  export class AudioRecognition {
61
63
  private hooks: RecognitionHooks;
62
64
  private stt?: STTNode;
@@ -377,6 +377,7 @@ export function updateInstructions(options: {
377
377
  }
378
378
  }
379
379
 
380
+ // TODO(brian): PR3 - Add @tracer.startActiveSpan('llm_node') decorator/wrapper
380
381
  export function performLLMInference(
381
382
  node: LLMNode,
382
383
  chatCtx: ChatContext,
@@ -467,6 +468,7 @@ export function performLLMInference(
467
468
  ];
468
469
  }
469
470
 
471
+ // TODO(brian): PR3 - Add @tracer.startActiveSpan('tts_node') decorator/wrapper
470
472
  export function performTTSInference(
471
473
  node: TTSNode,
472
474
  text: ReadableStream<string>,
@@ -650,6 +652,7 @@ export function performAudioForwarding(
650
652
  ];
651
653
  }
652
654
 
655
+ // TODO(brian): PR3 - Add @tracer.startActiveSpan('function_tool') wrapper for each tool execution
653
656
  export function performToolExecutions({
654
657
  session,
655
658
  speechHandle,
@@ -6,5 +6,6 @@ export { AgentSession, type AgentSessionOptions } from './agent_session.js';
6
6
  export * from './avatar/index.js';
7
7
  export * from './background_audio.js';
8
8
  export * from './events.js';
9
+ export * from './report.js';
9
10
  export * from './room_io/index.js';
10
11
  export { RunContext } from './run_context.js';
@@ -0,0 +1,151 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+
5
+ /**
6
+ * Unit tests for interruption detection logic in AgentActivity.
7
+ *
8
+ * Tests the refactored minInterruptionWords check which ensures:
9
+ * - Consistent word count filtering across all speech scenarios
10
+ * - Proper handling of empty strings, undefined, and short speech
11
+ * - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
12
+ */
13
+ import { describe, expect, it } from 'vitest';
14
+ import { splitWords } from '../tokenize/basic/word.js';
15
+
16
+ describe('Interruption Detection - Word Counting', () => {
17
+ describe('Word Splitting Behavior', () => {
18
+ it('should count empty string as 0 words', () => {
19
+ const text = '';
20
+ const wordCount = splitWords(text, true).length;
21
+ expect(wordCount).toBe(0);
22
+ });
23
+
24
+ it('should count single word correctly', () => {
25
+ const text = 'hello';
26
+ const wordCount = splitWords(text, true).length;
27
+ expect(wordCount).toBe(1);
28
+ });
29
+
30
+ it('should count two words correctly', () => {
31
+ const text = 'hello world';
32
+ const wordCount = splitWords(text, true).length;
33
+ expect(wordCount).toBe(2);
34
+ });
35
+
36
+ it('should count multiple words correctly', () => {
37
+ const text = 'hello this is a full sentence';
38
+ const wordCount = splitWords(text, true).length;
39
+ expect(wordCount).toBe(6);
40
+ });
41
+
42
+ it('should handle punctuation correctly', () => {
43
+ const text = 'hello, world!';
44
+ const wordCount = splitWords(text, true).length;
45
+ expect(wordCount).toBe(2);
46
+ });
47
+
48
+ it('should handle multiple spaces between words', () => {
49
+ const text = 'hello world';
50
+ const wordCount = splitWords(text, true).length;
51
+ expect(wordCount).toBe(2);
52
+ });
53
+
54
+ it('should count whitespace-only string as 0 words', () => {
55
+ const text = ' ';
56
+ const wordCount = splitWords(text, true).length;
57
+ expect(wordCount).toBe(0);
58
+ });
59
+
60
+ it('should handle leading and trailing whitespace', () => {
61
+ const text = ' hello world ';
62
+ const wordCount = splitWords(text, true).length;
63
+ expect(wordCount).toBe(2);
64
+ });
65
+ });
66
+
67
+ describe('Integration: Full Interruption Check Logic', () => {
68
+ it('should block interruption for empty transcript with threshold 2', () => {
69
+ const text = '';
70
+ const minInterruptionWords = 2;
71
+
72
+ const normalizedText = text ?? '';
73
+ const wordCount = splitWords(normalizedText, true).length;
74
+ const shouldBlock = wordCount < minInterruptionWords;
75
+
76
+ expect(normalizedText).toBe('');
77
+ expect(wordCount).toBe(0);
78
+ expect(shouldBlock).toBe(true);
79
+ });
80
+
81
+ it('should block interruption for undefined transcript with threshold 2', () => {
82
+ const text: string | undefined = undefined;
83
+ const minInterruptionWords = 2;
84
+
85
+ const normalizedText = text ?? '';
86
+ const wordCount = splitWords(normalizedText, true).length;
87
+ const shouldBlock = wordCount < minInterruptionWords;
88
+
89
+ expect(normalizedText).toBe('');
90
+ expect(wordCount).toBe(0);
91
+ expect(shouldBlock).toBe(true);
92
+ });
93
+
94
+ it('should block interruption for single word with threshold 2', () => {
95
+ const text = 'hello';
96
+ const minInterruptionWords = 2;
97
+
98
+ const normalizedText = text ?? '';
99
+ const wordCount = splitWords(normalizedText, true).length;
100
+ const shouldBlock = wordCount < minInterruptionWords;
101
+
102
+ expect(normalizedText).toBe('hello');
103
+ expect(wordCount).toBe(1);
104
+ expect(shouldBlock).toBe(true);
105
+ });
106
+
107
+ it('should allow interruption when word count exactly meets threshold', () => {
108
+ const text = 'hello world';
109
+ const minInterruptionWords = 2;
110
+
111
+ const normalizedText = text ?? '';
112
+ const wordCount = splitWords(normalizedText, true).length;
113
+ const shouldBlock = wordCount < minInterruptionWords;
114
+
115
+ expect(normalizedText).toBe('hello world');
116
+ expect(wordCount).toBe(2);
117
+ expect(shouldBlock).toBe(false);
118
+ });
119
+
120
+ it('should allow interruption when word count exceeds threshold', () => {
121
+ const text = 'hello this is a full sentence';
122
+ const minInterruptionWords = 2;
123
+
124
+ const normalizedText = text ?? '';
125
+ const wordCount = splitWords(normalizedText, true).length;
126
+ const shouldBlock = wordCount < minInterruptionWords;
127
+
128
+ expect(normalizedText).toBe('hello this is a full sentence');
129
+ expect(wordCount).toBe(6);
130
+ expect(shouldBlock).toBe(false);
131
+ });
132
+
133
+ it('should apply consistent word counting logic in both methods', () => {
134
+ const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
135
+ const threshold = 2;
136
+
137
+ transcripts.forEach((transcript) => {
138
+ const text1 = transcript;
139
+ const normalizedText1 = text1 ?? '';
140
+ const wordCount1 = splitWords(normalizedText1, true).length;
141
+ const shouldBlock1 = wordCount1 < threshold;
142
+
143
+ const wordCount2 = splitWords(transcript, true).length;
144
+ const shouldBlock2 = wordCount2 < threshold;
145
+
146
+ expect(wordCount1).toBe(wordCount2);
147
+ expect(shouldBlock1).toBe(shouldBlock2);
148
+ });
149
+ });
150
+ });
151
+ });
@@ -0,0 +1,77 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import type { ChatContext } from '../llm/chat_context.js';
5
+ import type { VoiceOptions } from './agent_session.js';
6
+ import type { AgentEvent } from './events.js';
7
+
8
+ export interface SessionReport {
9
+ jobId: string;
10
+ roomId: string;
11
+ room: string;
12
+ options: VoiceOptions;
13
+ events: AgentEvent[];
14
+ chatHistory: ChatContext;
15
+ enableUserDataTraining: boolean;
16
+ timestamp: number;
17
+ }
18
+
19
+ export interface SessionReportOptions {
20
+ jobId: string;
21
+ roomId: string;
22
+ room: string;
23
+ options: VoiceOptions;
24
+ events: AgentEvent[];
25
+ chatHistory: ChatContext;
26
+ enableUserDataTraining?: boolean;
27
+ timestamp?: number;
28
+ }
29
+
30
+ export function createSessionReport(opts: SessionReportOptions): SessionReport {
31
+ return {
32
+ jobId: opts.jobId,
33
+ roomId: opts.roomId,
34
+ room: opts.room,
35
+ options: opts.options,
36
+ events: opts.events,
37
+ chatHistory: opts.chatHistory,
38
+ enableUserDataTraining: opts.enableUserDataTraining ?? false,
39
+ timestamp: opts.timestamp ?? Date.now(),
40
+ };
41
+ }
42
+
43
+ // TODO(brian): PR5 - Add uploadSessionReport() function that creates multipart form with:
44
+ // - header: protobuf MetricsRecordingHeader (room_id, duration, start_time)
45
+ // - chat_history: JSON serialized chat history (use sessionReportToJSON)
46
+ // - audio: audio recording file if available (ogg format)
47
+ // - Uploads to LiveKit Cloud observability endpoint with JWT auth
48
+ export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
49
+ const events: Record<string, unknown>[] = [];
50
+
51
+ for (const event of report.events) {
52
+ if (event.type === 'metrics_collected') {
53
+ continue; // metrics are too noisy, Cloud is using the chat_history as the source of truth
54
+ }
55
+
56
+ events.push({ ...event });
57
+ }
58
+
59
+ return {
60
+ job_id: report.jobId,
61
+ room_id: report.roomId,
62
+ room: report.room,
63
+ events,
64
+ options: {
65
+ allow_interruptions: report.options.allowInterruptions,
66
+ discard_audio_if_uninterruptible: report.options.discardAudioIfUninterruptible,
67
+ min_interruption_duration: report.options.minInterruptionDuration,
68
+ min_interruption_words: report.options.minInterruptionWords,
69
+ min_endpointing_delay: report.options.minEndpointingDelay,
70
+ max_endpointing_delay: report.options.maxEndpointingDelay,
71
+ max_tool_steps: report.options.maxToolSteps,
72
+ },
73
+ chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
74
+ enable_user_data_training: report.enableUserDataTraining,
75
+ timestamp: report.timestamp,
76
+ };
77
+ }
@@ -369,6 +369,10 @@ export class RoomIO {
369
369
  return this.transcriptionSynchronizer.textOutput;
370
370
  }
371
371
 
372
+ get isParticipantAvailable(): boolean {
373
+ return this.participantAvailableFuture.done;
374
+ }
375
+
372
376
  /** Switch to a different participant */
373
377
  setParticipant(participantIdentity: string | null) {
374
378
  this.logger.debug({ participantIdentity }, 'setting participant');