@livekit/agents 1.0.23 → 1.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/llm.cjs +1 -2
- package/dist/inference/llm.cjs.map +1 -1
- package/dist/inference/llm.d.ts.map +1 -1
- package/dist/inference/llm.js +1 -2
- package/dist/inference/llm.js.map +1 -1
- package/dist/inference/stt.cjs +1 -1
- package/dist/inference/stt.cjs.map +1 -1
- package/dist/inference/stt.d.ts.map +1 -1
- package/dist/inference/stt.js +1 -1
- package/dist/inference/stt.js.map +1 -1
- package/dist/inference/tts.cjs +4 -2
- package/dist/inference/tts.cjs.map +1 -1
- package/dist/inference/tts.d.ts.map +1 -1
- package/dist/inference/tts.js +4 -2
- package/dist/inference/tts.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +1 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +29 -2
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +6 -0
- package/dist/job.d.ts +6 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +19 -2
- package/dist/job.js.map +1 -1
- package/dist/llm/llm.cjs +2 -1
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -1
- package/dist/llm/llm.d.ts +1 -1
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +2 -1
- package/dist/llm/llm.js.map +1 -1
- package/dist/stream/deferred_stream.cjs +12 -4
- package/dist/stream/deferred_stream.cjs.map +1 -1
- package/dist/stream/deferred_stream.d.cts +6 -1
- package/dist/stream/deferred_stream.d.ts +6 -1
- package/dist/stream/deferred_stream.d.ts.map +1 -1
- package/dist/stream/deferred_stream.js +12 -4
- package/dist/stream/deferred_stream.js.map +1 -1
- package/dist/stream/deferred_stream.test.cjs +2 -2
- package/dist/stream/deferred_stream.test.cjs.map +1 -1
- package/dist/stream/deferred_stream.test.js +2 -2
- package/dist/stream/deferred_stream.test.js.map +1 -1
- package/dist/stt/stream_adapter.cjs +15 -8
- package/dist/stt/stream_adapter.cjs.map +1 -1
- package/dist/stt/stream_adapter.d.cts +7 -3
- package/dist/stt/stream_adapter.d.ts +7 -3
- package/dist/stt/stream_adapter.d.ts.map +1 -1
- package/dist/stt/stream_adapter.js +15 -8
- package/dist/stt/stream_adapter.js.map +1 -1
- package/dist/stt/stt.cjs +8 -3
- package/dist/stt/stt.cjs.map +1 -1
- package/dist/stt/stt.d.cts +9 -3
- package/dist/stt/stt.d.ts +9 -3
- package/dist/stt/stt.d.ts.map +1 -1
- package/dist/stt/stt.js +9 -4
- package/dist/stt/stt.js.map +1 -1
- package/dist/telemetry/traces.cjs +23 -2
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +23 -2
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/stream_adapter.cjs +4 -4
- package/dist/tts/stream_adapter.cjs.map +1 -1
- package/dist/tts/stream_adapter.d.cts +5 -2
- package/dist/tts/stream_adapter.d.ts +5 -2
- package/dist/tts/stream_adapter.d.ts.map +1 -1
- package/dist/tts/stream_adapter.js +4 -4
- package/dist/tts/stream_adapter.js.map +1 -1
- package/dist/tts/tts.cjs +2 -2
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +5 -1
- package/dist/tts/tts.d.ts +5 -1
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +3 -3
- package/dist/tts/tts.js.map +1 -1
- package/dist/types.cjs +21 -32
- package/dist/types.cjs.map +1 -1
- package/dist/types.d.cts +41 -10
- package/dist/types.d.ts +41 -10
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js +18 -30
- package/dist/types.js.map +1 -1
- package/dist/voice/agent.cjs +54 -19
- package/dist/voice/agent.cjs.map +1 -1
- package/dist/voice/agent.d.ts.map +1 -1
- package/dist/voice/agent.js +54 -19
- package/dist/voice/agent.js.map +1 -1
- package/dist/voice/agent_activity.cjs +0 -3
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +0 -3
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +106 -28
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +16 -2
- package/dist/voice/agent_session.d.ts +16 -2
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +109 -28
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/events.cjs.map +1 -1
- package/dist/voice/events.d.cts +4 -4
- package/dist/voice/events.d.ts +4 -4
- package/dist/voice/events.d.ts.map +1 -1
- package/dist/voice/events.js.map +1 -1
- package/dist/voice/generation.cjs +6 -7
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +7 -8
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/io.cjs +16 -0
- package/dist/voice/io.cjs.map +1 -1
- package/dist/voice/io.d.cts +8 -0
- package/dist/voice/io.d.ts +8 -0
- package/dist/voice/io.d.ts.map +1 -1
- package/dist/voice/io.js +16 -0
- package/dist/voice/io.js.map +1 -1
- package/dist/voice/recorder_io/index.cjs +23 -0
- package/dist/voice/recorder_io/index.cjs.map +1 -0
- package/dist/voice/recorder_io/index.d.cts +2 -0
- package/dist/voice/recorder_io/index.d.ts +2 -0
- package/dist/voice/recorder_io/index.d.ts.map +1 -0
- package/dist/voice/recorder_io/index.js +2 -0
- package/dist/voice/recorder_io/index.js.map +1 -0
- package/dist/voice/recorder_io/recorder_io.cjs +542 -0
- package/dist/voice/recorder_io/recorder_io.cjs.map +1 -0
- package/dist/voice/recorder_io/recorder_io.d.cts +100 -0
- package/dist/voice/recorder_io/recorder_io.d.ts +100 -0
- package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -0
- package/dist/voice/recorder_io/recorder_io.js +508 -0
- package/dist/voice/recorder_io/recorder_io.js.map +1 -0
- package/dist/voice/report.cjs +7 -2
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +11 -1
- package/dist/voice/report.d.ts +11 -1
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +7 -2
- package/dist/voice/report.js.map +1 -1
- package/dist/voice/room_io/_input.cjs +2 -1
- package/dist/voice/room_io/_input.cjs.map +1 -1
- package/dist/voice/room_io/_input.d.ts.map +1 -1
- package/dist/voice/room_io/_input.js +2 -1
- package/dist/voice/room_io/_input.js.map +1 -1
- package/dist/voice/room_io/_output.cjs +8 -7
- package/dist/voice/room_io/_output.cjs.map +1 -1
- package/dist/voice/room_io/_output.d.cts +2 -1
- package/dist/voice/room_io/_output.d.ts +2 -1
- package/dist/voice/room_io/_output.d.ts.map +1 -1
- package/dist/voice/room_io/_output.js +8 -7
- package/dist/voice/room_io/_output.js.map +1 -1
- package/dist/worker.cjs +4 -3
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.js +4 -3
- package/dist/worker.js.map +1 -1
- package/package.json +1 -1
- package/src/inference/llm.ts +0 -1
- package/src/inference/stt.ts +1 -2
- package/src/inference/tts.ts +5 -2
- package/src/ipc/job_proc_lazy_main.ts +1 -1
- package/src/job.ts +21 -2
- package/src/llm/llm.ts +2 -2
- package/src/stream/deferred_stream.test.ts +3 -3
- package/src/stream/deferred_stream.ts +22 -5
- package/src/stt/stream_adapter.ts +18 -8
- package/src/stt/stt.ts +19 -6
- package/src/telemetry/traces.ts +25 -3
- package/src/tts/stream_adapter.ts +5 -4
- package/src/tts/tts.ts +6 -4
- package/src/types.ts +57 -33
- package/src/voice/agent.ts +59 -19
- package/src/voice/agent_activity.ts +0 -3
- package/src/voice/agent_session.ts +141 -36
- package/src/voice/events.ts +6 -3
- package/src/voice/generation.ts +10 -8
- package/src/voice/io.ts +19 -0
- package/src/voice/recorder_io/index.ts +4 -0
- package/src/voice/recorder_io/recorder_io.ts +690 -0
- package/src/voice/report.ts +20 -3
- package/src/voice/room_io/_input.ts +2 -1
- package/src/voice/room_io/_output.ts +10 -7
- package/src/worker.ts +1 -1
|
@@ -15,7 +15,7 @@ import {
|
|
|
15
15
|
type STTModelString,
|
|
16
16
|
type TTSModelString,
|
|
17
17
|
} from '../inference/index.js';
|
|
18
|
-
import { getJobContext } from '../job.js';
|
|
18
|
+
import { type JobContext, getJobContext } from '../job.js';
|
|
19
19
|
import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
|
|
20
20
|
import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
21
21
|
import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
|
|
@@ -25,6 +25,12 @@ import type { STT } from '../stt/index.js';
|
|
|
25
25
|
import type { STTError } from '../stt/stt.js';
|
|
26
26
|
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
27
27
|
import type { TTS, TTSError } from '../tts/tts.js';
|
|
28
|
+
import {
|
|
29
|
+
DEFAULT_API_CONNECT_OPTIONS,
|
|
30
|
+
DEFAULT_SESSION_CONNECT_OPTIONS,
|
|
31
|
+
type ResolvedSessionConnectOptions,
|
|
32
|
+
type SessionConnectOptions,
|
|
33
|
+
} from '../types.js';
|
|
28
34
|
import type { VAD } from '../vad.js';
|
|
29
35
|
import type { Agent } from './agent.js';
|
|
30
36
|
import { AgentActivity } from './agent_activity.js';
|
|
@@ -40,6 +46,7 @@ import {
|
|
|
40
46
|
type ErrorEvent,
|
|
41
47
|
type FunctionToolsExecutedEvent,
|
|
42
48
|
type MetricsCollectedEvent,
|
|
49
|
+
type ShutdownReason,
|
|
43
50
|
type SpeechCreatedEvent,
|
|
44
51
|
type UserInputTranscribedEvent,
|
|
45
52
|
type UserState,
|
|
@@ -50,6 +57,7 @@ import {
|
|
|
50
57
|
createUserStateChangedEvent,
|
|
51
58
|
} from './events.js';
|
|
52
59
|
import { AgentInput, AgentOutput } from './io.js';
|
|
60
|
+
import { RecorderIO } from './recorder_io/index.js';
|
|
53
61
|
import { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';
|
|
54
62
|
import type { UnknownUserData } from './run_context.js';
|
|
55
63
|
import type { SpeechHandle } from './speech_handle.js';
|
|
@@ -100,6 +108,7 @@ export type AgentSessionOptions<UserData = UnknownUserData> = {
|
|
|
100
108
|
tts?: TTS | TTSModelString;
|
|
101
109
|
userData?: UserData;
|
|
102
110
|
voiceOptions?: Partial<VoiceOptions>;
|
|
111
|
+
connOptions?: SessionConnectOptions;
|
|
103
112
|
};
|
|
104
113
|
|
|
105
114
|
export class AgentSession<
|
|
@@ -132,10 +141,20 @@ export class AgentSession<
|
|
|
132
141
|
private closingTask: Promise<void> | null = null;
|
|
133
142
|
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
134
143
|
|
|
144
|
+
// Connection options for STT, LLM, and TTS
|
|
145
|
+
private _connOptions: ResolvedSessionConnectOptions;
|
|
146
|
+
|
|
147
|
+
// Unrecoverable error counts, reset after agent speaking
|
|
148
|
+
private llmErrorCounts = 0;
|
|
149
|
+
private ttsErrorCounts = 0;
|
|
150
|
+
|
|
135
151
|
private sessionSpan?: Span;
|
|
136
152
|
private userSpeakingSpan?: Span;
|
|
137
153
|
private agentSpeakingSpan?: Span;
|
|
138
154
|
|
|
155
|
+
/** @internal */
|
|
156
|
+
_recorderIO?: RecorderIO;
|
|
157
|
+
|
|
139
158
|
/** @internal */
|
|
140
159
|
rootSpanContext?: Context;
|
|
141
160
|
|
|
@@ -159,8 +178,19 @@ export class AgentSession<
|
|
|
159
178
|
turnDetection,
|
|
160
179
|
userData,
|
|
161
180
|
voiceOptions = defaultVoiceOptions,
|
|
181
|
+
connOptions,
|
|
162
182
|
} = opts;
|
|
163
183
|
|
|
184
|
+
// Merge user-provided connOptions with defaults
|
|
185
|
+
this._connOptions = {
|
|
186
|
+
sttConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.sttConnOptions },
|
|
187
|
+
llmConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.llmConnOptions },
|
|
188
|
+
ttsConnOptions: { ...DEFAULT_API_CONNECT_OPTIONS, ...connOptions?.ttsConnOptions },
|
|
189
|
+
maxUnrecoverableErrors:
|
|
190
|
+
connOptions?.maxUnrecoverableErrors ??
|
|
191
|
+
DEFAULT_SESSION_CONNECT_OPTIONS.maxUnrecoverableErrors,
|
|
192
|
+
};
|
|
193
|
+
|
|
164
194
|
this.vad = vad;
|
|
165
195
|
|
|
166
196
|
if (typeof stt === 'string') {
|
|
@@ -225,6 +255,11 @@ export class AgentSession<
|
|
|
225
255
|
return this._chatCtx;
|
|
226
256
|
}
|
|
227
257
|
|
|
258
|
+
/** Connection options for STT, LLM, and TTS. */
|
|
259
|
+
get connOptions(): ResolvedSessionConnectOptions {
|
|
260
|
+
return this._connOptions;
|
|
261
|
+
}
|
|
262
|
+
|
|
228
263
|
set userData(value: UserData) {
|
|
229
264
|
this._userData = value;
|
|
230
265
|
}
|
|
@@ -234,14 +269,12 @@ export class AgentSession<
|
|
|
234
269
|
room,
|
|
235
270
|
inputOptions,
|
|
236
271
|
outputOptions,
|
|
237
|
-
record,
|
|
238
272
|
span,
|
|
239
273
|
}: {
|
|
240
274
|
agent: Agent;
|
|
241
275
|
room: Room;
|
|
242
276
|
inputOptions?: Partial<RoomInputOptions>;
|
|
243
277
|
outputOptions?: Partial<RoomOutputOptions>;
|
|
244
|
-
record: boolean;
|
|
245
278
|
span: Span;
|
|
246
279
|
}): Promise<void> {
|
|
247
280
|
span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
|
|
@@ -275,20 +308,38 @@ export class AgentSession<
|
|
|
275
308
|
});
|
|
276
309
|
this.roomIO.start();
|
|
277
310
|
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
311
|
+
let ctx: JobContext | undefined = undefined;
|
|
312
|
+
try {
|
|
313
|
+
ctx = getJobContext();
|
|
314
|
+
} catch (error) {
|
|
315
|
+
// JobContext is not available in evals
|
|
282
316
|
}
|
|
283
317
|
|
|
284
|
-
if (
|
|
318
|
+
if (ctx) {
|
|
319
|
+
if (ctx.room === room && !room.isConnected) {
|
|
320
|
+
this.logger.debug('Auto-connecting to room via job context');
|
|
321
|
+
tasks.push(ctx.connect());
|
|
322
|
+
}
|
|
323
|
+
|
|
285
324
|
if (ctx._primaryAgentSession === undefined) {
|
|
286
325
|
ctx._primaryAgentSession = this;
|
|
287
|
-
} else {
|
|
326
|
+
} else if (this._enableRecording) {
|
|
288
327
|
throw new Error(
|
|
289
|
-
'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record
|
|
328
|
+
'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use `session.start({ record: false })`.',
|
|
290
329
|
);
|
|
291
330
|
}
|
|
331
|
+
|
|
332
|
+
if (this.input.audio && this.output.audio && this._enableRecording) {
|
|
333
|
+
this._recorderIO = new RecorderIO({ agentSession: this });
|
|
334
|
+
this.input.audio = this._recorderIO.recordInput(this.input.audio);
|
|
335
|
+
this.output.audio = this._recorderIO.recordOutput(this.output.audio);
|
|
336
|
+
|
|
337
|
+
// Start recording to session directory
|
|
338
|
+
const sessionDir = ctx.sessionDirectory;
|
|
339
|
+
if (sessionDir) {
|
|
340
|
+
tasks.push(this._recorderIO.start(`${sessionDir}/audio.ogg`));
|
|
341
|
+
}
|
|
342
|
+
}
|
|
292
343
|
}
|
|
293
344
|
|
|
294
345
|
// TODO(AJS-265): add shutdown callback to job context
|
|
@@ -315,7 +366,7 @@ export class AgentSession<
|
|
|
315
366
|
room,
|
|
316
367
|
inputOptions,
|
|
317
368
|
outputOptions,
|
|
318
|
-
record
|
|
369
|
+
record,
|
|
319
370
|
}: {
|
|
320
371
|
agent: Agent;
|
|
321
372
|
room: Room;
|
|
@@ -327,29 +378,28 @@ export class AgentSession<
|
|
|
327
378
|
return;
|
|
328
379
|
}
|
|
329
380
|
|
|
330
|
-
|
|
381
|
+
let ctx: JobContext | undefined = undefined;
|
|
382
|
+
try {
|
|
383
|
+
ctx = getJobContext();
|
|
331
384
|
|
|
332
|
-
|
|
333
|
-
|
|
385
|
+
if (record === undefined) {
|
|
386
|
+
record = ctx.job.enableRecording;
|
|
387
|
+
}
|
|
334
388
|
|
|
335
|
-
|
|
336
|
-
{ record, enableRecording: ctx.info.job.enableRecording },
|
|
337
|
-
'Configuring session recording',
|
|
338
|
-
);
|
|
389
|
+
this._enableRecording = record;
|
|
339
390
|
|
|
340
|
-
|
|
341
|
-
|
|
391
|
+
if (this._enableRecording) {
|
|
392
|
+
ctx.initRecording();
|
|
393
|
+
}
|
|
394
|
+
} catch (error) {
|
|
395
|
+
// JobContext is not available in evals
|
|
342
396
|
}
|
|
343
397
|
|
|
344
|
-
// Create agent_session as a ROOT span (new trace) to match Python behavior
|
|
345
|
-
// This creates a separate trace for better cloud dashboard organization
|
|
346
398
|
this.sessionSpan = tracer.startSpan({
|
|
347
399
|
name: 'agent_session',
|
|
348
400
|
context: ROOT_CONTEXT,
|
|
349
401
|
});
|
|
350
402
|
|
|
351
|
-
// Set the session span as the active span in the context
|
|
352
|
-
// This ensures all child spans (agent_turn, user_turn, etc.) are parented to it
|
|
353
403
|
this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
|
|
354
404
|
|
|
355
405
|
await this._startImpl({
|
|
@@ -357,7 +407,6 @@ export class AgentSession<
|
|
|
357
407
|
room,
|
|
358
408
|
inputOptions,
|
|
359
409
|
outputOptions,
|
|
360
|
-
record,
|
|
361
410
|
span: this.sessionSpan,
|
|
362
411
|
});
|
|
363
412
|
}
|
|
@@ -397,7 +446,17 @@ export class AgentSession<
|
|
|
397
446
|
throw new Error('AgentSession is not running');
|
|
398
447
|
}
|
|
399
448
|
|
|
400
|
-
|
|
449
|
+
const doSay = (activity: AgentActivity) => {
|
|
450
|
+
return activity.say(text, options);
|
|
451
|
+
};
|
|
452
|
+
|
|
453
|
+
// attach to the session span if called outside of the AgentSession
|
|
454
|
+
const activeSpan = trace.getActiveSpan();
|
|
455
|
+
if (!activeSpan && this.rootSpanContext) {
|
|
456
|
+
return otelContext.with(this.rootSpanContext, () => doSay(this.activity!));
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
return doSay(this.activity);
|
|
401
460
|
}
|
|
402
461
|
|
|
403
462
|
interrupt() {
|
|
@@ -424,14 +483,25 @@ export class AgentSession<
|
|
|
424
483
|
})
|
|
425
484
|
: undefined;
|
|
426
485
|
|
|
427
|
-
|
|
428
|
-
if (
|
|
429
|
-
|
|
486
|
+
const doGenerateReply = (activity: AgentActivity, nextActivity?: AgentActivity) => {
|
|
487
|
+
if (activity.draining) {
|
|
488
|
+
if (!nextActivity) {
|
|
489
|
+
throw new Error('AgentSession is closing, cannot use generateReply()');
|
|
490
|
+
}
|
|
491
|
+
return nextActivity.generateReply({ userMessage, ...options });
|
|
430
492
|
}
|
|
431
|
-
return
|
|
493
|
+
return activity.generateReply({ userMessage, ...options });
|
|
494
|
+
};
|
|
495
|
+
|
|
496
|
+
// attach to the session span if called outside of the AgentSession
|
|
497
|
+
const activeSpan = trace.getActiveSpan();
|
|
498
|
+
if (!activeSpan && this.rootSpanContext) {
|
|
499
|
+
return otelContext.with(this.rootSpanContext, () =>
|
|
500
|
+
doGenerateReply(this.activity!, this.nextActivity),
|
|
501
|
+
);
|
|
432
502
|
}
|
|
433
503
|
|
|
434
|
-
return this.activity.
|
|
504
|
+
return doGenerateReply(this.activity!, this.nextActivity);
|
|
435
505
|
}
|
|
436
506
|
|
|
437
507
|
private async updateActivity(agent: Agent): Promise<void> {
|
|
@@ -492,13 +562,22 @@ export class AgentSession<
|
|
|
492
562
|
await this.closeImpl(CloseReason.USER_INITIATED);
|
|
493
563
|
}
|
|
494
564
|
|
|
565
|
+
shutdown(options?: { drain?: boolean; reason?: ShutdownReason }): void {
|
|
566
|
+
const { drain = true, reason = CloseReason.USER_INITIATED } = options ?? {};
|
|
567
|
+
|
|
568
|
+
this._closeSoon({
|
|
569
|
+
reason,
|
|
570
|
+
drain,
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
|
|
495
574
|
/** @internal */
|
|
496
575
|
_closeSoon({
|
|
497
576
|
reason,
|
|
498
577
|
drain = false,
|
|
499
578
|
error = null,
|
|
500
579
|
}: {
|
|
501
|
-
reason:
|
|
580
|
+
reason: ShutdownReason;
|
|
502
581
|
drain?: boolean;
|
|
503
582
|
error?: RealtimeModelError | STTError | TTSError | LLMError | null;
|
|
504
583
|
}): void {
|
|
@@ -514,6 +593,19 @@ export class AgentSession<
|
|
|
514
593
|
return;
|
|
515
594
|
}
|
|
516
595
|
|
|
596
|
+
// Track error counts per type to implement max_unrecoverable_errors logic
|
|
597
|
+
if (error.type === 'llm_error') {
|
|
598
|
+
this.llmErrorCounts += 1;
|
|
599
|
+
if (this.llmErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
|
|
600
|
+
return;
|
|
601
|
+
}
|
|
602
|
+
} else if (error.type === 'tts_error') {
|
|
603
|
+
this.ttsErrorCounts += 1;
|
|
604
|
+
if (this.ttsErrorCounts <= this._connOptions.maxUnrecoverableErrors) {
|
|
605
|
+
return;
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
|
|
517
609
|
this.logger.error(error, 'AgentSession is closing due to unrecoverable error');
|
|
518
610
|
|
|
519
611
|
this.closingTask = (async () => {
|
|
@@ -541,7 +633,9 @@ export class AgentSession<
|
|
|
541
633
|
}
|
|
542
634
|
|
|
543
635
|
if (state === 'speaking') {
|
|
544
|
-
//
|
|
636
|
+
// Reset error counts when agent starts speaking
|
|
637
|
+
this.llmErrorCounts = 0;
|
|
638
|
+
this.ttsErrorCounts = 0;
|
|
545
639
|
|
|
546
640
|
if (this.agentSpeakingSpan === undefined) {
|
|
547
641
|
this.agentSpeakingSpan = tracer.startSpan({
|
|
@@ -657,7 +751,7 @@ export class AgentSession<
|
|
|
657
751
|
}
|
|
658
752
|
|
|
659
753
|
private async closeImpl(
|
|
660
|
-
reason:
|
|
754
|
+
reason: ShutdownReason,
|
|
661
755
|
error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
|
|
662
756
|
drain: boolean = false,
|
|
663
757
|
): Promise<void> {
|
|
@@ -671,7 +765,7 @@ export class AgentSession<
|
|
|
671
765
|
}
|
|
672
766
|
|
|
673
767
|
private async closeImplInner(
|
|
674
|
-
reason:
|
|
768
|
+
reason: ShutdownReason,
|
|
675
769
|
error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
|
|
676
770
|
drain: boolean = false,
|
|
677
771
|
): Promise<void> {
|
|
@@ -694,7 +788,16 @@ export class AgentSession<
|
|
|
694
788
|
await this.activity.drain();
|
|
695
789
|
// wait any uninterruptible speech to finish
|
|
696
790
|
await this.activity.currentSpeech?.waitForPlayout();
|
|
697
|
-
|
|
791
|
+
try {
|
|
792
|
+
this.activity.detachAudioInput();
|
|
793
|
+
} catch (error) {
|
|
794
|
+
// Ignore detach errors during cleanup - source may not have been set
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
// Close recorder before detaching inputs/outputs (keep reference for session report)
|
|
799
|
+
if (this._recorderIO) {
|
|
800
|
+
await this._recorderIO.close();
|
|
698
801
|
}
|
|
699
802
|
|
|
700
803
|
// detach the inputs and outputs
|
|
@@ -730,6 +833,8 @@ export class AgentSession<
|
|
|
730
833
|
this.userState = 'listening';
|
|
731
834
|
this._agentState = 'initializing';
|
|
732
835
|
this.rootSpanContext = undefined;
|
|
836
|
+
this.llmErrorCounts = 0;
|
|
837
|
+
this.ttsErrorCounts = 0;
|
|
733
838
|
|
|
734
839
|
this.logger.info({ reason, error }, 'AgentSession closed');
|
|
735
840
|
}
|
package/src/voice/events.ts
CHANGED
|
@@ -5,9 +5,10 @@ import type {
|
|
|
5
5
|
ChatMessage,
|
|
6
6
|
FunctionCall,
|
|
7
7
|
FunctionCallOutput,
|
|
8
|
+
LLM,
|
|
9
|
+
RealtimeModel,
|
|
8
10
|
RealtimeModelError,
|
|
9
11
|
} from '../llm/index.js';
|
|
10
|
-
import type { LLM, RealtimeModel } from '../llm/index.js';
|
|
11
12
|
import type { LLMError } from '../llm/llm.js';
|
|
12
13
|
import type { AgentMetrics } from '../metrics/base.js';
|
|
13
14
|
import type { STT } from '../stt/index.js';
|
|
@@ -38,6 +39,8 @@ export enum CloseReason {
|
|
|
38
39
|
USER_INITIATED = 'user_initiated',
|
|
39
40
|
}
|
|
40
41
|
|
|
42
|
+
export type ShutdownReason = CloseReason | string;
|
|
43
|
+
|
|
41
44
|
export type SpeechSource = 'say' | 'generate_reply' | 'tool_response';
|
|
42
45
|
|
|
43
46
|
export type UserStateChangedEvent = {
|
|
@@ -231,12 +234,12 @@ export const createErrorEvent = (
|
|
|
231
234
|
export type CloseEvent = {
|
|
232
235
|
type: 'close';
|
|
233
236
|
error: RealtimeModelError | STTError | TTSError | LLMError | null;
|
|
234
|
-
reason:
|
|
237
|
+
reason: ShutdownReason;
|
|
235
238
|
createdAt: number;
|
|
236
239
|
};
|
|
237
240
|
|
|
238
241
|
export const createCloseEvent = (
|
|
239
|
-
reason:
|
|
242
|
+
reason: ShutdownReason,
|
|
240
243
|
error: RealtimeModelError | STTError | TTSError | LLMError | null = null,
|
|
241
244
|
createdAt: number = Date.now(),
|
|
242
245
|
): CloseEvent => ({
|
package/src/voice/generation.ts
CHANGED
|
@@ -24,7 +24,7 @@ import { isZodSchema, parseZodSchema } from '../llm/zod-utils.js';
|
|
|
24
24
|
import { log } from '../log.js';
|
|
25
25
|
import { IdentityTransform } from '../stream/identity_transform.js';
|
|
26
26
|
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
27
|
-
import { Future, Task, shortuuid, toError } from '../utils.js';
|
|
27
|
+
import { Future, Task, shortuuid, toError, waitForAbort } from '../utils.js';
|
|
28
28
|
import { type Agent, type ModelSettings, asyncLocalStorage, isStopResponse } from './agent.js';
|
|
29
29
|
import type { AgentSession } from './agent_session.js';
|
|
30
30
|
import type { AudioOutput, LLMNode, TTSNode, TextOutput } from './io.js';
|
|
@@ -411,17 +411,19 @@ export function performLLMInference(
|
|
|
411
411
|
return;
|
|
412
412
|
}
|
|
413
413
|
|
|
414
|
+
const abortPromise = waitForAbort(signal);
|
|
415
|
+
|
|
414
416
|
// TODO(brian): add support for dynamic tools
|
|
415
417
|
|
|
416
418
|
llmStreamReader = llmStream.getReader();
|
|
417
419
|
while (true) {
|
|
418
|
-
if (signal.aborted)
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
420
|
+
if (signal.aborted) break;
|
|
421
|
+
|
|
422
|
+
const result = await Promise.race([llmStreamReader.read(), abortPromise]);
|
|
423
|
+
if (result === undefined) break;
|
|
424
|
+
|
|
425
|
+
const { done, value: chunk } = result;
|
|
426
|
+
if (done) break;
|
|
425
427
|
|
|
426
428
|
if (typeof chunk === 'string') {
|
|
427
429
|
data.generatedText += chunk;
|
package/src/voice/io.ts
CHANGED
|
@@ -28,6 +28,7 @@ export type TTSNode = (
|
|
|
28
28
|
text: ReadableStream<string>,
|
|
29
29
|
modelSettings: ModelSettings,
|
|
30
30
|
) => Promise<ReadableStream<AudioFrame> | null>;
|
|
31
|
+
|
|
31
32
|
export abstract class AudioInput {
|
|
32
33
|
protected deferredStream: DeferredReadableStream<AudioFrame> =
|
|
33
34
|
new DeferredReadableStream<AudioFrame>();
|
|
@@ -128,6 +129,24 @@ export abstract class AudioOutput extends EventEmitter {
|
|
|
128
129
|
this.nextInChain.onDetached();
|
|
129
130
|
}
|
|
130
131
|
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Pause the audio playback
|
|
135
|
+
*/
|
|
136
|
+
pause(): void {
|
|
137
|
+
if (this.nextInChain) {
|
|
138
|
+
this.nextInChain.pause();
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Resume the audio playback
|
|
144
|
+
*/
|
|
145
|
+
resume(): void {
|
|
146
|
+
if (this.nextInChain) {
|
|
147
|
+
this.nextInChain.resume();
|
|
148
|
+
}
|
|
149
|
+
}
|
|
131
150
|
}
|
|
132
151
|
|
|
133
152
|
export interface PlaybackFinishedEvent {
|