@livekit/agents 1.0.18 → 1.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +3 -0
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/inference/api_protos.d.cts +12 -12
- package/dist/inference/api_protos.d.ts +12 -12
- package/dist/inference/tts.cjs +1 -1
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.js +1 -1
- package/dist/inference/tts.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +6 -2
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +6 -2
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +31 -0
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +6 -0
- package/dist/job.d.ts +6 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +31 -0
- package/dist/job.js.map +1 -1
- package/dist/llm/chat_context.cjs +33 -0
- package/dist/llm/chat_context.cjs.map +1 -1
- package/dist/llm/chat_context.d.cts +22 -2
- package/dist/llm/chat_context.d.ts +22 -2
- package/dist/llm/chat_context.d.ts.map +1 -1
- package/dist/llm/chat_context.js +32 -0
- package/dist/llm/chat_context.js.map +1 -1
- package/dist/llm/index.cjs +2 -0
- package/dist/llm/index.cjs.map +1 -1
- package/dist/llm/index.d.cts +1 -1
- package/dist/llm/index.d.ts +1 -1
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +2 -0
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/llm/provider_format/google.test.cjs +48 -0
- package/dist/llm/provider_format/google.test.cjs.map +1 -1
- package/dist/llm/provider_format/google.test.js +54 -1
- package/dist/llm/provider_format/google.test.js.map +1 -1
- package/dist/llm/provider_format/index.d.cts +1 -1
- package/dist/llm/provider_format/index.d.ts +1 -1
- package/dist/llm/provider_format/openai.cjs +1 -2
- package/dist/llm/provider_format/openai.cjs.map +1 -1
- package/dist/llm/provider_format/openai.js +1 -2
- package/dist/llm/provider_format/openai.js.map +1 -1
- package/dist/llm/provider_format/openai.test.cjs +32 -0
- package/dist/llm/provider_format/openai.test.cjs.map +1 -1
- package/dist/llm/provider_format/openai.test.js +38 -1
- package/dist/llm/provider_format/openai.test.js.map +1 -1
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js.map +1 -1
- package/dist/stt/stt.cjs +3 -0
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +1 -0
- package/dist/stt/stt.d.ts +1 -0
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +3 -0
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/index.cjs +51 -0
- package/dist/telemetry/index.cjs.map +1 -0
- package/dist/telemetry/index.d.cts +4 -0
- package/dist/telemetry/index.d.ts +4 -0
- package/dist/telemetry/index.d.ts.map +1 -0
- package/dist/telemetry/index.js +12 -0
- package/dist/telemetry/index.js.map +1 -0
- package/dist/telemetry/trace_types.cjs +191 -0
- package/dist/telemetry/trace_types.cjs.map +1 -0
- package/dist/telemetry/trace_types.d.cts +56 -0
- package/dist/telemetry/trace_types.d.ts +56 -0
- package/dist/telemetry/trace_types.d.ts.map +1 -0
- package/dist/telemetry/trace_types.js +113 -0
- package/dist/telemetry/trace_types.js.map +1 -0
- package/dist/telemetry/traces.cjs +196 -0
- package/dist/telemetry/traces.cjs.map +1 -0
- package/dist/telemetry/traces.d.cts +97 -0
- package/dist/telemetry/traces.d.ts +97 -0
- package/dist/telemetry/traces.d.ts.map +1 -0
- package/dist/telemetry/traces.js +173 -0
- package/dist/telemetry/traces.js.map +1 -0
- package/dist/telemetry/utils.cjs +86 -0
- package/dist/telemetry/utils.cjs.map +1 -0
- package/dist/telemetry/utils.d.cts +5 -0
- package/dist/telemetry/utils.d.ts +5 -0
- package/dist/telemetry/utils.d.ts.map +1 -0
- package/dist/telemetry/utils.js +51 -0
- package/dist/telemetry/utils.js.map +1 -0
- package/dist/tts/tts.cjs +3 -0
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +1 -0
- package/dist/tts/tts.d.ts +1 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +3 -0
- package/dist/tts/tts.js.map +1 -1
- package/dist/vad.cjs +3 -0
- package/dist/vad.cjs.map +1 -1
- package/dist/vad.d.cts +1 -0
- package/dist/vad.d.ts +1 -0
- package/dist/vad.d.ts.map +1 -1
- package/dist/vad.js +3 -0
- package/dist/vad.js.map +1 -1
- package/dist/voice/agent.cjs +15 -0
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.cts +4 -1
- package/dist/voice/agent.d.ts +4 -1
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +15 -0
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +5 -0
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +5 -0
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +29 -1
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +6 -2
- package/dist/voice/agent_session.d.ts +6 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +30 -2
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +1 -1
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +1 -1
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/index.cjs +2 -0
- package/dist/voice/index.cjs.map +1 -1
- package/dist/voice/index.d.cts +1 -0
- package/dist/voice/index.d.ts +1 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +1 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/report.cjs +69 -0
- package/dist/voice/report.cjs.map +1 -0
- package/dist/voice/report.d.cts +26 -0
- package/dist/voice/report.d.ts +26 -0
- package/dist/voice/report.d.ts.map +1 -0
- package/dist/voice/report.js +44 -0
- package/dist/voice/report.js.map +1 -0
- package/package.json +10 -3
- package/src/index.ts +2 -1
- package/src/inference/tts.ts +1 -1
- package/src/ipc/job_proc_lazy_main.ts +10 -2
- package/src/job.ts +48 -0
- package/src/llm/chat_context.ts +53 -1
- package/src/llm/index.ts +1 -0
- package/src/llm/llm.ts +2 -0
- package/src/llm/provider_format/google.test.ts +72 -1
- package/src/llm/provider_format/openai.test.ts +55 -1
- package/src/llm/provider_format/openai.ts +3 -2
- package/src/log.ts +1 -0
- package/src/stt/stt.ts +4 -0
- package/src/telemetry/index.ts +10 -0
- package/src/telemetry/trace_types.ts +88 -0
- package/src/telemetry/traces.ts +266 -0
- package/src/telemetry/utils.ts +61 -0
- package/src/tts/tts.ts +8 -0
- package/src/vad.ts +4 -0
- package/src/voice/agent.ts +22 -0
- package/src/voice/agent_activity.ts +9 -0
- package/src/voice/agent_session.ts +44 -1
- package/src/voice/audio_recognition.ts +3 -1
- package/src/voice/generation.ts +3 -0
- package/src/voice/index.ts +1 -0
- package/src/voice/report.ts +77 -0
|
@@ -14,7 +14,7 @@ import {
|
|
|
14
14
|
type TTSModelString,
|
|
15
15
|
} from '../inference/index.js';
|
|
16
16
|
import { getJobContext } from '../job.js';
|
|
17
|
-
import { ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
17
|
+
import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
18
18
|
import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
|
|
19
19
|
import type { LLMError } from '../llm/llm.js';
|
|
20
20
|
import { log } from '../log.js';
|
|
@@ -26,6 +26,7 @@ import type { Agent } from './agent.js';
|
|
|
26
26
|
import { AgentActivity } from './agent_activity.js';
|
|
27
27
|
import type { _TurnDetector } from './audio_recognition.js';
|
|
28
28
|
import {
|
|
29
|
+
type AgentEvent,
|
|
29
30
|
AgentSessionEventTypes,
|
|
30
31
|
type AgentState,
|
|
31
32
|
type AgentStateChangedEvent,
|
|
@@ -127,6 +128,9 @@ export class AgentSession<
|
|
|
127
128
|
private closingTask: Promise<void> | null = null;
|
|
128
129
|
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
129
130
|
|
|
131
|
+
/** @internal */
|
|
132
|
+
_recordedEvents: AgentEvent[] = [];
|
|
133
|
+
|
|
130
134
|
constructor(opts: AgentSessionOptions<UserData>) {
|
|
131
135
|
super();
|
|
132
136
|
|
|
@@ -174,6 +178,15 @@ export class AgentSession<
|
|
|
174
178
|
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
|
|
175
179
|
}
|
|
176
180
|
|
|
181
|
+
emit<K extends keyof AgentSessionCallbacks>(
|
|
182
|
+
event: K,
|
|
183
|
+
...args: Parameters<AgentSessionCallbacks[K]>
|
|
184
|
+
): boolean {
|
|
185
|
+
const eventData = args[0] as AgentEvent;
|
|
186
|
+
this._recordedEvents.push(eventData);
|
|
187
|
+
return super.emit(event, ...args);
|
|
188
|
+
}
|
|
189
|
+
|
|
177
190
|
get input(): AgentInput {
|
|
178
191
|
return this._input;
|
|
179
192
|
}
|
|
@@ -199,15 +212,20 @@ export class AgentSession<
|
|
|
199
212
|
}
|
|
200
213
|
|
|
201
214
|
async start({
|
|
215
|
+
// TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
|
|
216
|
+
// TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
|
|
217
|
+
// TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
|
|
202
218
|
agent,
|
|
203
219
|
room,
|
|
204
220
|
inputOptions,
|
|
205
221
|
outputOptions,
|
|
222
|
+
record = true,
|
|
206
223
|
}: {
|
|
207
224
|
agent: Agent;
|
|
208
225
|
room: Room;
|
|
209
226
|
inputOptions?: Partial<RoomInputOptions>;
|
|
210
227
|
outputOptions?: Partial<RoomOutputOptions>;
|
|
228
|
+
record?: boolean;
|
|
211
229
|
}): Promise<void> {
|
|
212
230
|
if (this.started) {
|
|
213
231
|
return;
|
|
@@ -247,6 +265,17 @@ export class AgentSession<
|
|
|
247
265
|
this.logger.debug('Auto-connecting to room via job context');
|
|
248
266
|
tasks.push(ctx.connect());
|
|
249
267
|
}
|
|
268
|
+
|
|
269
|
+
if (record) {
|
|
270
|
+
if (ctx._primaryAgentSession === undefined) {
|
|
271
|
+
ctx._primaryAgentSession = this;
|
|
272
|
+
} else {
|
|
273
|
+
throw new Error(
|
|
274
|
+
'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',
|
|
275
|
+
);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
250
279
|
// TODO(AJS-265): add shutdown callback to job context
|
|
251
280
|
tasks.push(this.updateActivity(this.agent));
|
|
252
281
|
|
|
@@ -341,6 +370,8 @@ export class AgentSession<
|
|
|
341
370
|
// TODO(AJS-129): add lock to agent activity core lifecycle
|
|
342
371
|
this.nextActivity = new AgentActivity(agent, this);
|
|
343
372
|
|
|
373
|
+
const previousActivity = this.activity;
|
|
374
|
+
|
|
344
375
|
if (this.activity) {
|
|
345
376
|
await this.activity.drain();
|
|
346
377
|
await this.activity.close();
|
|
@@ -349,6 +380,14 @@ export class AgentSession<
|
|
|
349
380
|
this.activity = this.nextActivity;
|
|
350
381
|
this.nextActivity = undefined;
|
|
351
382
|
|
|
383
|
+
this._chatCtx.insert(
|
|
384
|
+
new AgentHandoffItem({
|
|
385
|
+
oldAgentId: previousActivity?.agent.id,
|
|
386
|
+
newAgentId: agent.id,
|
|
387
|
+
}),
|
|
388
|
+
);
|
|
389
|
+
this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
|
|
390
|
+
|
|
352
391
|
await this.activity.start();
|
|
353
392
|
|
|
354
393
|
if (this._input.audio) {
|
|
@@ -419,6 +458,8 @@ export class AgentSession<
|
|
|
419
458
|
return;
|
|
420
459
|
}
|
|
421
460
|
|
|
461
|
+
// TODO(brian): PR3 - Add span: if state === 'speaking' && !this._agentSpeakingSpan, create tracer.startSpan('agent_speaking') with participant attributes
|
|
462
|
+
// TODO(brian): PR3 - Add span: if state !== 'speaking' && this._agentSpeakingSpan, end and clear this._agentSpeakingSpan
|
|
422
463
|
const oldState = this._agentState;
|
|
423
464
|
this._agentState = state;
|
|
424
465
|
|
|
@@ -441,6 +482,8 @@ export class AgentSession<
|
|
|
441
482
|
return;
|
|
442
483
|
}
|
|
443
484
|
|
|
485
|
+
// TODO(brian): PR3 - Add span: if state === 'speaking' && !this._userSpeakingSpan, create tracer.startSpan('user_speaking') with participant attributes
|
|
486
|
+
// TODO(brian): PR3 - Add span: if state !== 'speaking' && this._userSpeakingSpan, end and clear this._userSpeakingSpan
|
|
444
487
|
const oldState = this.userState;
|
|
445
488
|
this.userState = state;
|
|
446
489
|
|
|
@@ -57,6 +57,8 @@ export interface AudioRecognitionOptions {
|
|
|
57
57
|
maxEndpointingDelay: number;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
// TODO(brian): PR3 - Add span: private _userTurnSpan?: Span, create lazily in _ensureUserTurnSpan() method (tracer.startSpan('user_turn') with participant attributes)
|
|
61
|
+
// TODO(brian): PR3 - Add span: 'eou_detection' span when running EOU detection (in runEOUDetection method)
|
|
60
62
|
export class AudioRecognition {
|
|
61
63
|
private hooks: RecognitionHooks;
|
|
62
64
|
private stt?: STTNode;
|
|
@@ -356,7 +358,7 @@ export class AudioRecognition {
|
|
|
356
358
|
|
|
357
359
|
if (turnDetector) {
|
|
358
360
|
this.logger.debug('Running turn detector model');
|
|
359
|
-
if (!turnDetector.supportsLanguage(this.lastLanguage)) {
|
|
361
|
+
if (!(await turnDetector.supportsLanguage(this.lastLanguage))) {
|
|
360
362
|
this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);
|
|
361
363
|
} else {
|
|
362
364
|
const endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);
|
package/src/voice/generation.ts
CHANGED
|
@@ -377,6 +377,7 @@ export function updateInstructions(options: {
|
|
|
377
377
|
}
|
|
378
378
|
}
|
|
379
379
|
|
|
380
|
+
// TODO(brian): PR3 - Add @tracer.startActiveSpan('llm_node') decorator/wrapper
|
|
380
381
|
export function performLLMInference(
|
|
381
382
|
node: LLMNode,
|
|
382
383
|
chatCtx: ChatContext,
|
|
@@ -467,6 +468,7 @@ export function performLLMInference(
|
|
|
467
468
|
];
|
|
468
469
|
}
|
|
469
470
|
|
|
471
|
+
// TODO(brian): PR3 - Add @tracer.startActiveSpan('tts_node') decorator/wrapper
|
|
470
472
|
export function performTTSInference(
|
|
471
473
|
node: TTSNode,
|
|
472
474
|
text: ReadableStream<string>,
|
|
@@ -650,6 +652,7 @@ export function performAudioForwarding(
|
|
|
650
652
|
];
|
|
651
653
|
}
|
|
652
654
|
|
|
655
|
+
// TODO(brian): PR3 - Add @tracer.startActiveSpan('function_tool') wrapper for each tool execution
|
|
653
656
|
export function performToolExecutions({
|
|
654
657
|
session,
|
|
655
658
|
speechHandle,
|
package/src/voice/index.ts
CHANGED
|
@@ -6,5 +6,6 @@ export { AgentSession, type AgentSessionOptions } from './agent_session.js';
|
|
|
6
6
|
export * from './avatar/index.js';
|
|
7
7
|
export * from './background_audio.js';
|
|
8
8
|
export * from './events.js';
|
|
9
|
+
export * from './report.js';
|
|
9
10
|
export * from './room_io/index.js';
|
|
10
11
|
export { RunContext } from './run_context.js';
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
|
|
2
|
+
//
|
|
3
|
+
// SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
import type { ChatContext } from '../llm/chat_context.js';
|
|
5
|
+
import type { VoiceOptions } from './agent_session.js';
|
|
6
|
+
import type { AgentEvent } from './events.js';
|
|
7
|
+
|
|
8
|
+
export interface SessionReport {
|
|
9
|
+
jobId: string;
|
|
10
|
+
roomId: string;
|
|
11
|
+
room: string;
|
|
12
|
+
options: VoiceOptions;
|
|
13
|
+
events: AgentEvent[];
|
|
14
|
+
chatHistory: ChatContext;
|
|
15
|
+
enableUserDataTraining: boolean;
|
|
16
|
+
timestamp: number;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface SessionReportOptions {
|
|
20
|
+
jobId: string;
|
|
21
|
+
roomId: string;
|
|
22
|
+
room: string;
|
|
23
|
+
options: VoiceOptions;
|
|
24
|
+
events: AgentEvent[];
|
|
25
|
+
chatHistory: ChatContext;
|
|
26
|
+
enableUserDataTraining?: boolean;
|
|
27
|
+
timestamp?: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function createSessionReport(opts: SessionReportOptions): SessionReport {
|
|
31
|
+
return {
|
|
32
|
+
jobId: opts.jobId,
|
|
33
|
+
roomId: opts.roomId,
|
|
34
|
+
room: opts.room,
|
|
35
|
+
options: opts.options,
|
|
36
|
+
events: opts.events,
|
|
37
|
+
chatHistory: opts.chatHistory,
|
|
38
|
+
enableUserDataTraining: opts.enableUserDataTraining ?? false,
|
|
39
|
+
timestamp: opts.timestamp ?? Date.now(),
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// TODO(brian): PR5 - Add uploadSessionReport() function that creates multipart form with:
|
|
44
|
+
// - header: protobuf MetricsRecordingHeader (room_id, duration, start_time)
|
|
45
|
+
// - chat_history: JSON serialized chat history (use sessionReportToJSON)
|
|
46
|
+
// - audio: audio recording file if available (ogg format)
|
|
47
|
+
// - Uploads to LiveKit Cloud observability endpoint with JWT auth
|
|
48
|
+
export function sessionReportToJSON(report: SessionReport): Record<string, unknown> {
|
|
49
|
+
const events: Record<string, unknown>[] = [];
|
|
50
|
+
|
|
51
|
+
for (const event of report.events) {
|
|
52
|
+
if (event.type === 'metrics_collected') {
|
|
53
|
+
continue; // metrics are too noisy, Cloud is using the chat_history as the source of truth
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
events.push({ ...event });
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return {
|
|
60
|
+
job_id: report.jobId,
|
|
61
|
+
room_id: report.roomId,
|
|
62
|
+
room: report.room,
|
|
63
|
+
events,
|
|
64
|
+
options: {
|
|
65
|
+
allow_interruptions: report.options.allowInterruptions,
|
|
66
|
+
discard_audio_if_uninterruptible: report.options.discardAudioIfUninterruptible,
|
|
67
|
+
min_interruption_duration: report.options.minInterruptionDuration,
|
|
68
|
+
min_interruption_words: report.options.minInterruptionWords,
|
|
69
|
+
min_endpointing_delay: report.options.minEndpointingDelay,
|
|
70
|
+
max_endpointing_delay: report.options.maxEndpointingDelay,
|
|
71
|
+
max_tool_steps: report.options.maxToolSteps,
|
|
72
|
+
},
|
|
73
|
+
chat_history: report.chatHistory.toJSON({ excludeTimestamp: false }),
|
|
74
|
+
enable_user_data_training: report.enableUserDataTraining,
|
|
75
|
+
timestamp: report.timestamp,
|
|
76
|
+
};
|
|
77
|
+
}
|