@livekit/agents 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +12 -12
- package/dist/inference/api_protos.d.ts +12 -12
- package/dist/inference/llm.cjs +35 -13
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.cts +10 -5
- package/dist/inference/llm.d.ts +10 -5
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +35 -13
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/tts.cjs +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.js +1 -1
- package/dist/inference/tts.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +6 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +6 -2
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +31 -0
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +6 -0
- package/dist/job.d.ts +6 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +31 -0
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.cjs +33 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +22 -2
- package/dist/llm/chat_context.d.ts +22 -2
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +32 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -1
- package/dist/llm/llm.d.ts +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.cjs.map +1 -1
- package/dist/llm/provider_format/google.d.cts +1 -1
- package/dist/llm/provider_format/google.d.ts +1 -1
- package/dist/llm/provider_format/google.d.ts.map +1 -1
- package/dist/llm/provider_format/google.js.map +1 -1
- package/dist/llm/provider_format/google.test.cjs +48 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -1
- package/dist/llm/provider_format/google.test.js +54 -1
- package/dist/llm/provider_format/google.test.js.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/index.d.ts.map +1 -1
- package/dist/llm/provider_format/openai.cjs +1 -2
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.js +1 -2
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +32 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +38 -1
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/llm/realtime.cjs.map +1 -1
- package/dist/llm/realtime.d.cts +4 -0
- package/dist/llm/realtime.d.ts +4 -0
- package/dist/llm/realtime.d.ts.map +1 -1
- package/dist/llm/realtime.js.map +1 -1
- package/dist/llm/utils.cjs +2 -2
- package/dist/llm/utils.cjs.map +1 -1
- package/dist/llm/utils.d.cts +1 -1
- package/dist/llm/utils.d.ts +1 -1
- package/dist/llm/utils.d.ts.map +1 -1
- package/dist/llm/utils.js +2 -2
- package/dist/llm/utils.js.map +1 -1
- package/dist/llm/zod-utils.cjs +6 -3
- package/dist/llm/zod-utils.cjs.map +1 -1
- package/dist/llm/zod-utils.d.cts +1 -1
- package/dist/llm/zod-utils.d.ts +1 -1
- package/dist/llm/zod-utils.d.ts.map +1 -1
- package/dist/llm/zod-utils.js +6 -3
- package/dist/llm/zod-utils.js.map +1 -1
- package/dist/llm/zod-utils.test.cjs +83 -0
- package/dist/llm/zod-utils.test.cjs.map +1 -1
- package/dist/llm/zod-utils.test.js +83 -0
- package/dist/llm/zod-utils.test.js.map +1 -1
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js.map +1 -1
- package/dist/telemetry/index.cjs +51 -0
- package/dist/telemetry/index.cjs.map +1 -0
- package/dist/telemetry/index.d.cts +4 -0
- package/dist/telemetry/index.d.ts +4 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +12 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/telemetry/trace_types.cjs +191 -0
- package/dist/telemetry/trace_types.cjs.map +1 -0
- package/dist/telemetry/trace_types.d.cts +56 -0
- package/dist/telemetry/trace_types.d.ts +56 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -0
- package/dist/telemetry/trace_types.js +113 -0
- package/dist/telemetry/trace_types.js.map +1 -0
- package/dist/telemetry/traces.cjs +196 -0
- package/dist/telemetry/traces.cjs.map +1 -0
- package/dist/telemetry/traces.d.cts +97 -0
- package/dist/telemetry/traces.d.ts +97 -0
- package/dist/telemetry/traces.d.ts.map +1 -0
- package/dist/telemetry/traces.js +173 -0
- package/dist/telemetry/traces.js.map +1 -0
- package/dist/telemetry/utils.cjs +86 -0
- package/dist/telemetry/utils.cjs.map +1 -0
- package/dist/telemetry/utils.d.cts +5 -0
- package/dist/telemetry/utils.d.ts +5 -0
- package/dist/telemetry/utils.d.ts.map +1 -0
- package/dist/telemetry/utils.js +51 -0
- package/dist/telemetry/utils.js.map +1 -0
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +7 -0
- package/dist/utils.d.ts +7 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent.cjs +15 -0
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +4 -1
- package/dist/voice/agent.d.ts +4 -1
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -0
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +71 -20
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +71 -20
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +69 -2
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +11 -2
- package/dist/voice/agent_session.d.ts +11 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +70 -3
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/interruption_detection.test.cjs +114 -0
- package/dist/voice/interruption_detection.test.cjs.map +1 -0
- package/dist/voice/interruption_detection.test.js +113 -0
- package/dist/voice/interruption_detection.test.js.map +1 -0
- package/dist/voice/report.cjs +69 -0
- package/dist/voice/report.cjs.map +1 -0
- package/dist/voice/report.d.cts +26 -0
- package/dist/voice/report.d.ts +26 -0
- package/dist/voice/report.d.ts.map +1 -0
- package/dist/voice/report.js +44 -0
- package/dist/voice/report.js.map +1 -0
- package/dist/voice/room_io/room_io.cjs +3 -0
- package/dist/voice/room_io/room_io.cjs.map +1 -1
- package/dist/voice/room_io/room_io.d.cts +1 -0
- package/dist/voice/room_io/room_io.d.ts +1 -0
- package/dist/voice/room_io/room_io.d.ts.map +1 -1
- package/dist/voice/room_io/room_io.js +3 -0
- package/dist/voice/room_io/room_io.js.map +1 -1
- package/package.json +12 -5
- package/src/index.ts +2 -1
- package/src/inference/llm.ts +53 -21
- package/src/inference/tts.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +10 -2
- package/src/job.ts +48 -0
- package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
- package/src/llm/chat_context.ts +53 -1
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +3 -1
- package/src/llm/provider_format/google.test.ts +72 -1
- package/src/llm/provider_format/google.ts +4 -4
- package/src/llm/provider_format/openai.test.ts +55 -1
- package/src/llm/provider_format/openai.ts +3 -2
- package/src/llm/realtime.ts +8 -1
- package/src/llm/utils.ts +7 -2
- package/src/llm/zod-utils.test.ts +101 -0
- package/src/llm/zod-utils.ts +12 -3
- package/src/log.ts +1 -0
- package/src/telemetry/index.ts +10 -0
- package/src/telemetry/trace_types.ts +88 -0
- package/src/telemetry/traces.ts +266 -0
- package/src/telemetry/utils.ts +61 -0
- package/src/tts/tts.ts +4 -0
- package/src/utils.ts +17 -0
- package/src/voice/agent.ts +22 -0
- package/src/voice/agent_activity.ts +102 -24
- package/src/voice/agent_session.ts +98 -1
- package/src/voice/audio_recognition.ts +2 -0
- package/src/voice/generation.ts +3 -0
- package/src/voice/index.ts +1 -0
- package/src/voice/interruption_detection.test.ts +151 -0
- package/src/voice/report.ts +77 -0
- package/src/voice/room_io/room_io.ts +4 -0
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
type TTSModelString,
|
|
15
15
|
} from '../inference/index.js';
|
|
16
16
|
import { getJobContext } from '../job.js';
|
|
17
|
-
import { ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
17
|
+
import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
18
18
|
import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
|
|
19
19
|
import type { LLMError } from '../llm/llm.js';
|
|
20
20
|
import { log } from '../log.js';
|
|
@@ -26,6 +26,7 @@ import type { Agent } from './agent.js';
|
|
|
26
26
|
import { AgentActivity } from './agent_activity.js';
|
|
27
27
|
import type { _TurnDetector } from './audio_recognition.js';
|
|
28
28
|
import {
|
|
29
|
+
type AgentEvent,
|
|
29
30
|
AgentSessionEventTypes,
|
|
30
31
|
type AgentState,
|
|
31
32
|
type AgentStateChangedEvent,
|
|
@@ -58,6 +59,7 @@ export interface VoiceOptions {
|
|
|
58
59
|
maxEndpointingDelay: number;
|
|
59
60
|
maxToolSteps: number;
|
|
60
61
|
preemptiveGeneration: boolean;
|
|
62
|
+
userAwayTimeout?: number | null;
|
|
61
63
|
}
|
|
62
64
|
|
|
63
65
|
const defaultVoiceOptions: VoiceOptions = {
|
|
@@ -69,6 +71,7 @@ const defaultVoiceOptions: VoiceOptions = {
|
|
|
69
71
|
maxEndpointingDelay: 6000,
|
|
70
72
|
maxToolSteps: 3,
|
|
71
73
|
preemptiveGeneration: false,
|
|
74
|
+
userAwayTimeout: 15.0,
|
|
72
75
|
} as const;
|
|
73
76
|
|
|
74
77
|
export type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;
|
|
@@ -123,6 +126,10 @@ export class AgentSession<
|
|
|
123
126
|
private _output: AgentOutput;
|
|
124
127
|
|
|
125
128
|
private closingTask: Promise<void> | null = null;
|
|
129
|
+
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
130
|
+
|
|
131
|
+
/** @internal */
|
|
132
|
+
_recordedEvents: AgentEvent[] = [];
|
|
126
133
|
|
|
127
134
|
constructor(opts: AgentSessionOptions<UserData>) {
|
|
128
135
|
super();
|
|
@@ -167,6 +174,17 @@ export class AgentSession<
|
|
|
167
174
|
// This is the "global" chat context, it holds the entire conversation history
|
|
168
175
|
this._chatCtx = ChatContext.empty();
|
|
169
176
|
this.options = { ...defaultVoiceOptions, ...voiceOptions };
|
|
177
|
+
|
|
178
|
+
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
emit<K extends keyof AgentSessionCallbacks>(
|
|
182
|
+
event: K,
|
|
183
|
+
...args: Parameters<AgentSessionCallbacks[K]>
|
|
184
|
+
): boolean {
|
|
185
|
+
const eventData = args[0] as AgentEvent;
|
|
186
|
+
this._recordedEvents.push(eventData);
|
|
187
|
+
return super.emit(event, ...args);
|
|
170
188
|
}
|
|
171
189
|
|
|
172
190
|
get input(): AgentInput {
|
|
@@ -194,15 +212,20 @@ export class AgentSession<
|
|
|
194
212
|
}
|
|
195
213
|
|
|
196
214
|
async start({
|
|
215
|
+
// TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
|
|
216
|
+
// TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
|
|
217
|
+
// TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
|
|
197
218
|
agent,
|
|
198
219
|
room,
|
|
199
220
|
inputOptions,
|
|
200
221
|
outputOptions,
|
|
222
|
+
record = true,
|
|
201
223
|
}: {
|
|
202
224
|
agent: Agent;
|
|
203
225
|
room: Room;
|
|
204
226
|
inputOptions?: Partial<RoomInputOptions>;
|
|
205
227
|
outputOptions?: Partial<RoomOutputOptions>;
|
|
228
|
+
record?: boolean;
|
|
206
229
|
}): Promise<void> {
|
|
207
230
|
if (this.started) {
|
|
208
231
|
return;
|
|
@@ -242,6 +265,17 @@ export class AgentSession<
|
|
|
242
265
|
this.logger.debug('Auto-connecting to room via job context');
|
|
243
266
|
tasks.push(ctx.connect());
|
|
244
267
|
}
|
|
268
|
+
|
|
269
|
+
if (record) {
|
|
270
|
+
if (ctx._primaryAgentSession === undefined) {
|
|
271
|
+
ctx._primaryAgentSession = this;
|
|
272
|
+
} else {
|
|
273
|
+
throw new Error(
|
|
274
|
+
'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
245
279
|
// TODO(AJS-265): add shutdown callback to job context
|
|
246
280
|
tasks.push(this.updateActivity(this.agent));
|
|
247
281
|
|
|
@@ -336,6 +370,8 @@ export class AgentSession<
|
|
|
336
370
|
// TODO(AJS-129): add lock to agent activity core lifecycle
|
|
337
371
|
this.nextActivity = new AgentActivity(agent, this);
|
|
338
372
|
|
|
373
|
+
const previousActivity = this.activity;
|
|
374
|
+
|
|
339
375
|
if (this.activity) {
|
|
340
376
|
await this.activity.drain();
|
|
341
377
|
await this.activity.close();
|
|
@@ -344,6 +380,14 @@ export class AgentSession<
|
|
|
344
380
|
this.activity = this.nextActivity;
|
|
345
381
|
this.nextActivity = undefined;
|
|
346
382
|
|
|
383
|
+
this._chatCtx.insert(
|
|
384
|
+
new AgentHandoffItem({
|
|
385
|
+
oldAgentId: previousActivity?.agent.id,
|
|
386
|
+
newAgentId: agent.id,
|
|
387
|
+
}),
|
|
388
|
+
);
|
|
389
|
+
this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
|
|
390
|
+
|
|
347
391
|
await this.activity.start();
|
|
348
392
|
|
|
349
393
|
if (this._input.audio) {
|
|
@@ -414,8 +458,18 @@ export class AgentSession<
|
|
|
414
458
|
return;
|
|
415
459
|
}
|
|
416
460
|
|
|
461
|
+
// TODO(brian): PR3 - Add span: if state === 'speaking' && !this._agentSpeakingSpan, create tracer.startSpan('agent_speaking') with participant attributes
|
|
462
|
+
// TODO(brian): PR3 - Add span: if state !== 'speaking' && this._agentSpeakingSpan, end and clear this._agentSpeakingSpan
|
|
417
463
|
const oldState = this._agentState;
|
|
418
464
|
this._agentState = state;
|
|
465
|
+
|
|
466
|
+
// Handle user away timer based on state changes
|
|
467
|
+
if (state === 'listening' && this.userState === 'listening') {
|
|
468
|
+
this._setUserAwayTimer();
|
|
469
|
+
} else {
|
|
470
|
+
this._cancelUserAwayTimer();
|
|
471
|
+
}
|
|
472
|
+
|
|
419
473
|
this.emit(
|
|
420
474
|
AgentSessionEventTypes.AgentStateChanged,
|
|
421
475
|
createAgentStateChangedEvent(oldState, state),
|
|
@@ -428,8 +482,18 @@ export class AgentSession<
|
|
|
428
482
|
return;
|
|
429
483
|
}
|
|
430
484
|
|
|
485
|
+
// TODO(brian): PR3 - Add span: if state === 'speaking' && !this._userSpeakingSpan, create tracer.startSpan('user_speaking') with participant attributes
|
|
486
|
+
// TODO(brian): PR3 - Add span: if state !== 'speaking' && this._userSpeakingSpan, end and clear this._userSpeakingSpan
|
|
431
487
|
const oldState = this.userState;
|
|
432
488
|
this.userState = state;
|
|
489
|
+
|
|
490
|
+
// Handle user away timer based on state changes
|
|
491
|
+
if (state === 'listening' && this._agentState === 'listening') {
|
|
492
|
+
this._setUserAwayTimer();
|
|
493
|
+
} else {
|
|
494
|
+
this._cancelUserAwayTimer();
|
|
495
|
+
}
|
|
496
|
+
|
|
433
497
|
this.emit(
|
|
434
498
|
AgentSessionEventTypes.UserStateChanged,
|
|
435
499
|
createUserStateChangedEvent(oldState, state),
|
|
@@ -451,6 +515,37 @@ export class AgentSession<
|
|
|
451
515
|
|
|
452
516
|
private onTextOutputChanged(): void {}
|
|
453
517
|
|
|
518
|
+
private _setUserAwayTimer(): void {
|
|
519
|
+
this._cancelUserAwayTimer();
|
|
520
|
+
|
|
521
|
+
if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {
|
|
522
|
+
return;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
if (this.roomIO && !this.roomIO.isParticipantAvailable) {
|
|
526
|
+
return;
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
this.userAwayTimer = setTimeout(() => {
|
|
530
|
+
this.logger.debug('User away timeout triggered');
|
|
531
|
+
this._updateUserState('away');
|
|
532
|
+
}, this.options.userAwayTimeout * 1000);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
private _cancelUserAwayTimer(): void {
|
|
536
|
+
if (this.userAwayTimer !== null) {
|
|
537
|
+
clearTimeout(this.userAwayTimer);
|
|
538
|
+
this.userAwayTimer = null;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {
|
|
543
|
+
if (this.userState === 'away' && ev.isFinal) {
|
|
544
|
+
this.logger.debug('User returned from away state due to speech input');
|
|
545
|
+
this._updateUserState('listening');
|
|
546
|
+
}
|
|
547
|
+
}
|
|
548
|
+
|
|
454
549
|
private async closeImpl(
|
|
455
550
|
reason: CloseReason,
|
|
456
551
|
error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
|
|
@@ -460,6 +555,8 @@ export class AgentSession<
|
|
|
460
555
|
return;
|
|
461
556
|
}
|
|
462
557
|
|
|
558
|
+
this._cancelUserAwayTimer();
|
|
559
|
+
|
|
463
560
|
if (this.activity) {
|
|
464
561
|
if (!drain) {
|
|
465
562
|
try {
|
|
@@ -57,6 +57,8 @@ export interface AudioRecognitionOptions {
|
|
|
57
57
|
maxEndpointingDelay: number;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
// TODO(brian): PR3 - Add span: private _userTurnSpan?: Span, create lazily in _ensureUserTurnSpan() method (tracer.startSpan('user_turn') with participant attributes)
|
|
61
|
+
// TODO(brian): PR3 - Add span: 'eou_detection' span when running EOU detection (in runEOUDetection method)
|
|
60
62
|
export class AudioRecognition {
|
|
61
63
|
private hooks: RecognitionHooks;
|
|
62
64
|
private stt?: STTNode;
|
package/src/voice/generation.ts
CHANGED
|
@@ -377,6 +377,7 @@ export function updateInstructions(options: {
|
|
|
377
377
|
}
|
|
378
378
|
}
|
|
379
379
|
|
|
380
|
+
// TODO(brian): PR3 - Add @tracer.startActiveSpan('llm_node') decorator/wrapper
|
|
380
381
|
export function performLLMInference(
|
|
381
382
|
node: LLMNode,
|
|
382
383
|
chatCtx: ChatContext,
|
|
@@ -467,6 +468,7 @@ export function performLLMInference(
|
|
|
467
468
|
];
|
|
468
469
|
}
|
|
469
470
|
|
|
471
|
+
// TODO(brian): PR3 - Add @tracer.startActiveSpan('tts_node') decorator/wrapper
|
|
470
472
|
export function performTTSInference(
|
|
471
473
|
node: TTSNode,
|
|
472
474
|
text: ReadableStream<string>,
|
|
@@ -650,6 +652,7 @@ export function performAudioForwarding(
|
|
|
650
652
|
];
|
|
651
653
|
}
|
|
652
654
|
|
|
655
|
+
// TODO(brian): PR3 - Add @tracer.startActiveSpan('function_tool') wrapper for each tool execution
|
|
653
656
|
export function performToolExecutions({
|
|
654
657
|
session,
|
|
655
658
|
speechHandle,
|
package/src/voice/index.ts
CHANGED
|
@@ -6,5 +6,6 @@ export { AgentSession, type AgentSessionOptions } from './agent_session.js';
|
|
|
6
6
|
export * from './avatar/index.js';
|
|
7
7
|
export * from './background_audio.js';
|
|
8
8
|
export * from './events.js';
|
|
9
|
+
export * from './report.js';
|
|
9
10
|
export * from './room_io/index.js';
|
|
10
11
|
export { RunContext } from './run_context.js';
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Unit tests for interruption detection logic in AgentActivity.
|
|
7
|
+
*
|
|
8
|
+
* Tests the refactored minInterruptionWords check which ensures:
|
|
9
|
+
* - Consistent word count filtering across all speech scenarios
|
|
10
|
+
* - Proper handling of empty strings, undefined, and short speech
|
|
11
|
+
* - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
|
|
12
|
+
*/
|
|
13
|
+
import { describe, expect, it } from 'vitest';
|
|
14
|
+
import { splitWords } from '../tokenize/basic/word.js';
|
|
15
|
+
|
|
16
|
+
describe('Interruption Detection - Word Counting', () => {
|
|
17
|
+
describe('Word Splitting Behavior', () => {
|
|
18
|
+
it('should count empty string as 0 words', () => {
|
|
19
|
+
const text = '';
|
|
20
|
+
const wordCount = splitWords(text, true).length;
|
|
21
|
+
expect(wordCount).toBe(0);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('should count single word correctly', () => {
|
|
25
|
+
const text = 'hello';
|
|
26
|
+
const wordCount = splitWords(text, true).length;
|
|
27
|
+
expect(wordCount).toBe(1);
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it('should count two words correctly', () => {
|
|
31
|
+
const text = 'hello world';
|
|
32
|
+
const wordCount = splitWords(text, true).length;
|
|
33
|
+
expect(wordCount).toBe(2);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it('should count multiple words correctly', () => {
|
|
37
|
+
const text = 'hello this is a full sentence';
|
|
38
|
+
const wordCount = splitWords(text, true).length;
|
|
39
|
+
expect(wordCount).toBe(6);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('should handle punctuation correctly', () => {
|
|
43
|
+
const text = 'hello, world!';
|
|
44
|
+
const wordCount = splitWords(text, true).length;
|
|
45
|
+
expect(wordCount).toBe(2);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('should handle multiple spaces between words', () => {
|
|
49
|
+
const text = 'hello world';
|
|
50
|
+
const wordCount = splitWords(text, true).length;
|
|
51
|
+
expect(wordCount).toBe(2);
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
it('should count whitespace-only string as 0 words', () => {
|
|
55
|
+
const text = ' ';
|
|
56
|
+
const wordCount = splitWords(text, true).length;
|
|
57
|
+
expect(wordCount).toBe(0);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should handle leading and trailing whitespace', () => {
|
|
61
|
+
const text = ' hello world ';
|
|
62
|
+
const wordCount = splitWords(text, true).length;
|
|
63
|
+
expect(wordCount).toBe(2);
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
|
|
67
|
+
describe('Integration: Full Interruption Check Logic', () => {
|
|
68
|
+
it('should block interruption for empty transcript with threshold 2', () => {
|
|
69
|
+
const text = '';
|
|
70
|
+
const minInterruptionWords = 2;
|
|
71
|
+
|
|
72
|
+
const normalizedText = text ?? '';
|
|
73
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
74
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
75
|
+
|
|
76
|
+
expect(normalizedText).toBe('');
|
|
77
|
+
expect(wordCount).toBe(0);
|
|
78
|
+
expect(shouldBlock).toBe(true);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should block interruption for undefined transcript with threshold 2', () => {
|
|
82
|
+
const text: string | undefined = undefined;
|
|
83
|
+
const minInterruptionWords = 2;
|
|
84
|
+
|
|
85
|
+
const normalizedText = text ?? '';
|
|
86
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
87
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
88
|
+
|
|
89
|
+
expect(normalizedText).toBe('');
|
|
90
|
+
expect(wordCount).toBe(0);
|
|
91
|
+
expect(shouldBlock).toBe(true);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it('should block interruption for single word with threshold 2', () => {
|
|
95
|
+
const text = 'hello';
|
|
96
|
+
const minInterruptionWords = 2;
|
|
97
|
+
|
|
98
|
+
const normalizedText = text ?? '';
|
|
99
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
100
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
101
|
+
|
|
102
|
+
expect(normalizedText).toBe('hello');
|
|
103
|
+
expect(wordCount).toBe(1);
|
|
104
|
+
expect(shouldBlock).toBe(true);
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
it('should allow interruption when word count exactly meets threshold', () => {
|
|
108
|
+
const text = 'hello world';
|
|
109
|
+
const minInterruptionWords = 2;
|
|
110
|
+
|
|
111
|
+
const normalizedText = text ?? '';
|
|
112
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
113
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
114
|
+
|
|
115
|
+
expect(normalizedText).toBe('hello world');
|
|
116
|
+
expect(wordCount).toBe(2);
|
|
117
|
+
expect(shouldBlock).toBe(false);
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
it('should allow interruption when word count exceeds threshold', () => {
|
|
121
|
+
const text = 'hello this is a full sentence';
|
|
122
|
+
const minInterruptionWords = 2;
|
|
123
|
+
|
|
124
|
+
const normalizedText = text ?? '';
|
|
125
|
+
const wordCount = splitWords(normalizedText, true).length;
|
|
126
|
+
const shouldBlock = wordCount < minInterruptionWords;
|
|
127
|
+
|
|
128
|
+
expect(normalizedText).toBe('hello this is a full sentence');
|
|
129
|
+
expect(wordCount).toBe(6);
|
|
130
|
+
expect(shouldBlock).toBe(false);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it('should apply consistent word counting logic in both methods', () => {
|
|
134
|
+
const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
|
|
135
|
+
const threshold = 2;
|
|
136
|
+
|
|
137
|
+
transcripts.forEach((transcript) => {
|
|
138
|
+
const text1 = transcript;
|
|
139
|
+
const normalizedText1 = text1 ?? '';
|
|
140
|
+
const wordCount1 = splitWords(normalizedText1, true).length;
|
|
141
|
+
const shouldBlock1 = wordCount1 < threshold;
|
|
142
|
+
|
|
143
|
+
const wordCount2 = splitWords(transcript, true).length;
|
|
144
|
+
const shouldBlock2 = wordCount2 < threshold;
|
|
145
|
+
|
|
146
|
+
expect(wordCount1).toBe(wordCount2);
|
|
147
|
+
expect(shouldBlock1).toBe(shouldBlock2);
|
|
148
|
+
});
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
});
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { ChatContext } from '../llm/chat_context.js';
|
|
5
|
+
import type { VoiceOptions } from './agent_session.js';
|
|
6
|
+
import type { AgentEvent } from './events.js';
|
|
7
|
+
|
|
8
|
+
export interface SessionReport {
|
|
9
|
+
jobId: string;
|
|
10
|
+
roomId: string;
|
|
11
|
+
room: string;
|
|
12
|
+
options: VoiceOptions;
|
|
13
|
+
events: AgentEvent[];
|
|
14
|
+
chatHistory: ChatContext;
|
|
15
|
+
enableUserDataTraining: boolean;
|
|
16
|
+
timestamp: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface SessionReportOptions {
|
|
20
|
+
jobId: string;
|
|
21
|
+
roomId: string;
|
|
22
|
+
room: string;
|
|
23
|
+
options: VoiceOptions;
|
|
24
|
+
events: AgentEvent[];
|
|
25
|
+
chatHistory: ChatContext;
|
|
26
|
+
enableUserDataTraining?: boolean;
|
|
27
|
+
timestamp?: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function createSessionReport(opts: SessionReportOptions): SessionReport {
|
|
31
|
+
return {
|
|
32
|
+
jobId: opts.jobId,
|
|
33
|
+
roomId: opts.roomId,
|
|
34
|
+
room: opts.room,
|
|
35
|
+
options: opts.options,
|
|
36
|
+
events: opts.events,
|
|
37
|
+
chatHistory: opts.chatHistory,
|
|
38
|
+
enableUserDataTraining: opts.enableUserDataTraining ?? false,
|
|
39
|
+
timestamp: opts.timestamp ?? Date.now(),
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// TODO(brian): PR5 - Add uploadSessionReport() function that creates multipart form with:
|
|
44
|
+
// - header: protobuf MetricsRecordingHeader (room_id, duration, start_time)
|
|
45
|
+
// - chat_history: JSON serialized chat history (use sessionReportToJSON)
|
|
46
|
+
// - audio: audio recording file if available (ogg format)
|
|
47
|
+
// - Uploads to LiveKit Cloud observability endpoint with JWT auth
|
|
48
|
+
export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
|
|
49
|
+
const events: Record<string, unknown>[] = [];
|
|
50
|
+
|
|
51
|
+
for (const event of report.events) {
|
|
52
|
+
if (event.type === 'metrics_collected') {
|
|
53
|
+
continue; // metrics are too noisy, Cloud is using the chat_history as the source of truth
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
events.push({ ...event });
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
job_id: report.jobId,
|
|
61
|
+
room_id: report.roomId,
|
|
62
|
+
room: report.room,
|
|
63
|
+
events,
|
|
64
|
+
options: {
|
|
65
|
+
allow_interruptions: report.options.allowInterruptions,
|
|
66
|
+
discard_audio_if_uninterruptible: report.options.discardAudioIfUninterruptible,
|
|
67
|
+
min_interruption_duration: report.options.minInterruptionDuration,
|
|
68
|
+
min_interruption_words: report.options.minInterruptionWords,
|
|
69
|
+
min_endpointing_delay: report.options.minEndpointingDelay,
|
|
70
|
+
max_endpointing_delay: report.options.maxEndpointingDelay,
|
|
71
|
+
max_tool_steps: report.options.maxToolSteps,
|
|
72
|
+
},
|
|
73
|
+
chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
|
|
74
|
+
enable_user_data_training: report.enableUserDataTraining,
|
|
75
|
+
timestamp: report.timestamp,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
@@ -369,6 +369,10 @@ export class RoomIO {
|
|
|
369
369
|
return this.transcriptionSynchronizer.textOutput;
|
|
370
370
|
}
|
|
371
371
|
|
|
372
|
+
get isParticipantAvailable(): boolean {
|
|
373
|
+
return this.participantAvailableFuture.done;
|
|
374
|
+
}
|
|
375
|
+
|
|
372
376
|
/** Switch to a different participant */
|
|
373
377
|
setParticipant(participantIdentity: string | null) {
|
|
374
378
|
this.logger.debug({ participantIdentity }, 'setting participant');
|