@livekit/agents 1.0.22 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/api_protos.cjs +2 -2
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +16 -16
- package/dist/inference/api_protos.d.ts +16 -16
- package/dist/inference/api_protos.js +2 -2
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +35 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +13 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +52 -6
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +2 -0
- package/dist/job.d.ts +2 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +52 -6
- package/dist/job.js.map +1 -1
- package/dist/llm/llm.cjs +38 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -0
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +38 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +34 -10
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +7 -0
- package/dist/log.d.ts +7 -0
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +34 -11
- package/dist/log.js.map +1 -1
- package/dist/telemetry/index.cjs +23 -2
- package/dist/telemetry/index.cjs.map +1 -1
- package/dist/telemetry/index.d.cts +4 -1
- package/dist/telemetry/index.d.ts +4 -1
- package/dist/telemetry/index.d.ts.map +1 -1
- package/dist/telemetry/index.js +27 -2
- package/dist/telemetry/index.js.map +1 -1
- package/dist/telemetry/logging.cjs +65 -0
- package/dist/telemetry/logging.cjs.map +1 -0
- package/dist/telemetry/logging.d.cts +21 -0
- package/dist/telemetry/logging.d.ts +21 -0
- package/dist/telemetry/logging.d.ts.map +1 -0
- package/dist/telemetry/logging.js +40 -0
- package/dist/telemetry/logging.js.map +1 -0
- package/dist/telemetry/otel_http_exporter.cjs +144 -0
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
- package/dist/telemetry/otel_http_exporter.d.cts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
- package/dist/telemetry/otel_http_exporter.js +120 -0
- package/dist/telemetry/otel_http_exporter.js.map +1 -0
- package/dist/telemetry/pino_otel_transport.cjs +217 -0
- package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
- package/dist/telemetry/pino_otel_transport.d.cts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
- package/dist/telemetry/pino_otel_transport.js +189 -0
- package/dist/telemetry/pino_otel_transport.js.map +1 -0
- package/dist/telemetry/traces.cjs +225 -16
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.cts +17 -0
- package/dist/telemetry/traces.d.ts +17 -0
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +211 -14
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +62 -5
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +2 -0
- package/dist/tts/tts.d.ts +2 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +62 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +6 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +5 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +93 -7
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +3 -0
- package/dist/voice/agent_activity.d.ts +3 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +93 -7
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +122 -27
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +15 -0
- package/dist/voice/agent_session.d.ts +15 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +122 -27
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +69 -22
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +5 -0
- package/dist/voice/audio_recognition.d.ts +5 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +69 -22
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs +43 -3
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -3
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/report.cjs +3 -2
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +7 -1
- package/dist/voice/report.d.ts +7 -1
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +3 -2
- package/dist/voice/report.js.map +1 -1
- package/package.json +8 -2
- package/src/inference/api_protos.ts +2 -2
- package/src/ipc/job_proc_lazy_main.ts +12 -1
- package/src/job.ts +59 -10
- package/src/llm/llm.ts +48 -5
- package/src/log.ts +52 -15
- package/src/telemetry/index.ts +22 -4
- package/src/telemetry/logging.ts +55 -0
- package/src/telemetry/otel_http_exporter.ts +191 -0
- package/src/telemetry/pino_otel_transport.ts +265 -0
- package/src/telemetry/traces.ts +320 -20
- package/src/tts/tts.ts +71 -9
- package/src/utils.ts +5 -0
- package/src/voice/agent_activity.ts +140 -22
- package/src/voice/agent_session.ts +174 -34
- package/src/voice/audio_recognition.ts +85 -26
- package/src/voice/generation.ts +59 -7
- package/src/voice/report.ts +10 -4
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { ROOT_CONTEXT, context as otelContext, trace } from "@opentelemetry/api";
|
|
1
2
|
import { EventEmitter } from "node:events";
|
|
2
3
|
import {
|
|
3
4
|
LLM as InferenceLLM,
|
|
@@ -7,6 +8,7 @@ import {
|
|
|
7
8
|
import { getJobContext } from "../job.js";
|
|
8
9
|
import { AgentHandoffItem, ChatContext, ChatMessage } from "../llm/chat_context.js";
|
|
9
10
|
import { log } from "../log.js";
|
|
11
|
+
import { traceTypes, tracer } from "../telemetry/index.js";
|
|
10
12
|
import { AgentActivity } from "./agent_activity.js";
|
|
11
13
|
import {
|
|
12
14
|
AgentSessionEventTypes,
|
|
@@ -50,8 +52,17 @@ class AgentSession extends EventEmitter {
|
|
|
50
52
|
_output;
|
|
51
53
|
closingTask = null;
|
|
52
54
|
userAwayTimer = null;
|
|
55
|
+
sessionSpan;
|
|
56
|
+
userSpeakingSpan;
|
|
57
|
+
agentSpeakingSpan;
|
|
58
|
+
/** @internal */
|
|
59
|
+
rootSpanContext;
|
|
53
60
|
/** @internal */
|
|
54
61
|
_recordedEvents = [];
|
|
62
|
+
/** @internal */
|
|
63
|
+
_enableRecording = false;
|
|
64
|
+
/** @internal - Timestamp when the session started (milliseconds) */
|
|
65
|
+
_startedAt;
|
|
55
66
|
constructor(opts) {
|
|
56
67
|
super();
|
|
57
68
|
const {
|
|
@@ -85,7 +96,8 @@ class AgentSession extends EventEmitter {
|
|
|
85
96
|
this._output = new AgentOutput(this.onAudioOutputChanged, this.onTextOutputChanged);
|
|
86
97
|
this._chatCtx = ChatContext.empty();
|
|
87
98
|
this.options = { ...defaultVoiceOptions, ...voiceOptions };
|
|
88
|
-
this.
|
|
99
|
+
this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
|
|
100
|
+
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
89
101
|
}
|
|
90
102
|
emit(event, ...args) {
|
|
91
103
|
const eventData = args[0];
|
|
@@ -110,19 +122,15 @@ class AgentSession extends EventEmitter {
|
|
|
110
122
|
set userData(value) {
|
|
111
123
|
this._userData = value;
|
|
112
124
|
}
|
|
113
|
-
async
|
|
114
|
-
// TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
|
|
115
|
-
// TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
|
|
116
|
-
// TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
|
|
125
|
+
async _startImpl({
|
|
117
126
|
agent,
|
|
118
127
|
room,
|
|
119
128
|
inputOptions,
|
|
120
129
|
outputOptions,
|
|
121
|
-
record
|
|
130
|
+
record,
|
|
131
|
+
span
|
|
122
132
|
}) {
|
|
123
|
-
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
133
|
+
span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
|
|
126
134
|
this.agent = agent;
|
|
127
135
|
this._updateAgentState("initializing");
|
|
128
136
|
const tasks = [];
|
|
@@ -169,8 +177,43 @@ class AgentSession extends EventEmitter {
|
|
|
169
177
|
`using transcript io: \`AgentSession\` -> ${this.output.transcription ? "`" + this.output.transcription.constructor.name + "`" : "(none)"}`
|
|
170
178
|
);
|
|
171
179
|
this.started = true;
|
|
180
|
+
this._startedAt = Date.now();
|
|
172
181
|
this._updateAgentState("listening");
|
|
173
182
|
}
|
|
183
|
+
async start({
|
|
184
|
+
agent,
|
|
185
|
+
room,
|
|
186
|
+
inputOptions,
|
|
187
|
+
outputOptions,
|
|
188
|
+
record = true
|
|
189
|
+
}) {
|
|
190
|
+
if (this.started) {
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
const ctx = getJobContext();
|
|
194
|
+
record = record ?? ctx.info.job.enableRecording;
|
|
195
|
+
this._enableRecording = record;
|
|
196
|
+
this.logger.info(
|
|
197
|
+
{ record, enableRecording: ctx.info.job.enableRecording },
|
|
198
|
+
"Configuring session recording"
|
|
199
|
+
);
|
|
200
|
+
if (this._enableRecording) {
|
|
201
|
+
await ctx.initRecording();
|
|
202
|
+
}
|
|
203
|
+
this.sessionSpan = tracer.startSpan({
|
|
204
|
+
name: "agent_session",
|
|
205
|
+
context: ROOT_CONTEXT
|
|
206
|
+
});
|
|
207
|
+
this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
|
|
208
|
+
await this._startImpl({
|
|
209
|
+
agent,
|
|
210
|
+
room,
|
|
211
|
+
inputOptions,
|
|
212
|
+
outputOptions,
|
|
213
|
+
record,
|
|
214
|
+
span: this.sessionSpan
|
|
215
|
+
});
|
|
216
|
+
}
|
|
174
217
|
updateAgent(agent) {
|
|
175
218
|
this.agent = agent;
|
|
176
219
|
if (this.started) {
|
|
@@ -218,25 +261,31 @@ class AgentSession extends EventEmitter {
|
|
|
218
261
|
return this.activity.generateReply({ userMessage, ...options });
|
|
219
262
|
}
|
|
220
263
|
async updateActivity(agent) {
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
264
|
+
const runWithContext = async () => {
|
|
265
|
+
this.nextActivity = new AgentActivity(agent, this);
|
|
266
|
+
const previousActivity = this.activity;
|
|
267
|
+
if (this.activity) {
|
|
268
|
+
await this.activity.drain();
|
|
269
|
+
await this.activity.close();
|
|
270
|
+
}
|
|
271
|
+
this.activity = this.nextActivity;
|
|
272
|
+
this.nextActivity = void 0;
|
|
273
|
+
this._chatCtx.insert(
|
|
274
|
+
new AgentHandoffItem({
|
|
275
|
+
oldAgentId: previousActivity == null ? void 0 : previousActivity.agent.id,
|
|
276
|
+
newAgentId: agent.id
|
|
277
|
+
})
|
|
278
|
+
);
|
|
279
|
+
this.logger.debug({ previousActivity, agent }, "Agent handoff inserted into chat context");
|
|
280
|
+
await this.activity.start();
|
|
281
|
+
if (this._input.audio) {
|
|
282
|
+
this.activity.attachAudioInput(this._input.audio.stream);
|
|
283
|
+
}
|
|
284
|
+
};
|
|
285
|
+
if (this.rootSpanContext) {
|
|
286
|
+
return otelContext.with(this.rootSpanContext, runWithContext);
|
|
239
287
|
}
|
|
288
|
+
return runWithContext();
|
|
240
289
|
}
|
|
241
290
|
get chatCtx() {
|
|
242
291
|
return this._chatCtx.copy();
|
|
@@ -282,10 +331,25 @@ class AgentSession extends EventEmitter {
|
|
|
282
331
|
this.emit(AgentSessionEventTypes.ConversationItemAdded, createConversationItemAddedEvent(item));
|
|
283
332
|
}
|
|
284
333
|
/** @internal */
|
|
334
|
+
_toolItemsAdded(items) {
|
|
335
|
+
this._chatCtx.insert(items);
|
|
336
|
+
}
|
|
337
|
+
/** @internal */
|
|
285
338
|
_updateAgentState(state) {
|
|
286
339
|
if (this._agentState === state) {
|
|
287
340
|
return;
|
|
288
341
|
}
|
|
342
|
+
if (state === "speaking") {
|
|
343
|
+
if (this.agentSpeakingSpan === void 0) {
|
|
344
|
+
this.agentSpeakingSpan = tracer.startSpan({
|
|
345
|
+
name: "agent_speaking",
|
|
346
|
+
context: this.rootSpanContext
|
|
347
|
+
});
|
|
348
|
+
}
|
|
349
|
+
} else if (this.agentSpeakingSpan !== void 0) {
|
|
350
|
+
this.agentSpeakingSpan.end();
|
|
351
|
+
this.agentSpeakingSpan = void 0;
|
|
352
|
+
}
|
|
289
353
|
const oldState = this._agentState;
|
|
290
354
|
this._agentState = state;
|
|
291
355
|
if (state === "listening" && this.userState === "listening") {
|
|
@@ -303,6 +367,15 @@ class AgentSession extends EventEmitter {
|
|
|
303
367
|
if (this.userState === state) {
|
|
304
368
|
return;
|
|
305
369
|
}
|
|
370
|
+
if (state === "speaking" && this.userSpeakingSpan === void 0) {
|
|
371
|
+
this.userSpeakingSpan = tracer.startSpan({
|
|
372
|
+
name: "user_speaking",
|
|
373
|
+
context: this.rootSpanContext
|
|
374
|
+
});
|
|
375
|
+
} else if (this.userSpeakingSpan !== void 0) {
|
|
376
|
+
this.userSpeakingSpan.end();
|
|
377
|
+
this.userSpeakingSpan = void 0;
|
|
378
|
+
}
|
|
306
379
|
const oldState = this.userState;
|
|
307
380
|
this.userState = state;
|
|
308
381
|
if (state === "listening" && this._agentState === "listening") {
|
|
@@ -354,11 +427,20 @@ class AgentSession extends EventEmitter {
|
|
|
354
427
|
}
|
|
355
428
|
}
|
|
356
429
|
async closeImpl(reason, error = null, drain = false) {
|
|
430
|
+
if (this.rootSpanContext) {
|
|
431
|
+
return otelContext.with(this.rootSpanContext, async () => {
|
|
432
|
+
await this.closeImplInner(reason, error, drain);
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
return this.closeImplInner(reason, error, drain);
|
|
436
|
+
}
|
|
437
|
+
async closeImplInner(reason, error = null, drain = false) {
|
|
357
438
|
var _a, _b, _c;
|
|
358
439
|
if (!this.started) {
|
|
359
440
|
return;
|
|
360
441
|
}
|
|
361
442
|
this._cancelUserAwayTimer();
|
|
443
|
+
this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
362
444
|
if (this.activity) {
|
|
363
445
|
if (!drain) {
|
|
364
446
|
try {
|
|
@@ -377,10 +459,23 @@ class AgentSession extends EventEmitter {
|
|
|
377
459
|
this.roomIO = void 0;
|
|
378
460
|
await ((_c = this.activity) == null ? void 0 : _c.close());
|
|
379
461
|
this.activity = void 0;
|
|
462
|
+
if (this.sessionSpan) {
|
|
463
|
+
this.sessionSpan.end();
|
|
464
|
+
this.sessionSpan = void 0;
|
|
465
|
+
}
|
|
466
|
+
if (this.userSpeakingSpan) {
|
|
467
|
+
this.userSpeakingSpan.end();
|
|
468
|
+
this.userSpeakingSpan = void 0;
|
|
469
|
+
}
|
|
470
|
+
if (this.agentSpeakingSpan) {
|
|
471
|
+
this.agentSpeakingSpan.end();
|
|
472
|
+
this.agentSpeakingSpan = void 0;
|
|
473
|
+
}
|
|
380
474
|
this.started = false;
|
|
381
475
|
this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));
|
|
382
476
|
this.userState = "listening";
|
|
383
477
|
this._agentState = "initializing";
|
|
478
|
+
this.rootSpanContext = void 0;
|
|
384
479
|
this.logger.info({ reason, error }, "AgentSession closed");
|
|
385
480
|
}
|
|
386
481
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/voice/agent_session.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, Room } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport {\n LLM as InferenceLLM,\n STT as InferenceSTT,\n TTS as InferenceTTS,\n type LLMModels,\n type STTModelString,\n type TTSModelString,\n} from '../inference/index.js';\nimport { getJobContext } from '../job.js';\nimport { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';\nimport type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport { log } from '../log.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport type { TTS, TTSError } from '../tts/tts.js';\nimport type { VAD } from '../vad.js';\nimport type { Agent } from './agent.js';\nimport { AgentActivity } from './agent_activity.js';\nimport type { _TurnDetector } from './audio_recognition.js';\nimport {\n type AgentEvent,\n AgentSessionEventTypes,\n type AgentState,\n type AgentStateChangedEvent,\n type CloseEvent,\n CloseReason,\n type ConversationItemAddedEvent,\n type ErrorEvent,\n type FunctionToolsExecutedEvent,\n type MetricsCollectedEvent,\n type SpeechCreatedEvent,\n type UserInputTranscribedEvent,\n type UserState,\n type UserStateChangedEvent,\n createAgentStateChangedEvent,\n createCloseEvent,\n createConversationItemAddedEvent,\n createUserStateChangedEvent,\n} from './events.js';\nimport { AgentInput, AgentOutput } from './io.js';\nimport { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';\nimport type { UnknownUserData } from './run_context.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport interface VoiceOptions {\n allowInterruptions: boolean;\n discardAudioIfUninterruptible: boolean;\n minInterruptionDuration: number;\n minInterruptionWords: number;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n maxToolSteps: number;\n preemptiveGeneration: boolean;\n userAwayTimeout?: number | null;\n}\n\nconst defaultVoiceOptions: VoiceOptions = {\n allowInterruptions: true,\n discardAudioIfUninterruptible: true,\n minInterruptionDuration: 500,\n minInterruptionWords: 0,\n minEndpointingDelay: 500,\n maxEndpointingDelay: 6000,\n maxToolSteps: 3,\n preemptiveGeneration: false,\n userAwayTimeout: 15.0,\n} as const;\n\nexport type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;\n\nexport type AgentSessionCallbacks = {\n [AgentSessionEventTypes.UserInputTranscribed]: (ev: UserInputTranscribedEvent) => void;\n [AgentSessionEventTypes.AgentStateChanged]: (ev: AgentStateChangedEvent) => void;\n [AgentSessionEventTypes.UserStateChanged]: (ev: UserStateChangedEvent) => void;\n [AgentSessionEventTypes.ConversationItemAdded]: (ev: ConversationItemAddedEvent) => void;\n [AgentSessionEventTypes.FunctionToolsExecuted]: (ev: FunctionToolsExecutedEvent) => void;\n [AgentSessionEventTypes.MetricsCollected]: (ev: MetricsCollectedEvent) => void;\n [AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;\n [AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;\n [AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;\n};\n\nexport type AgentSessionOptions<UserData = UnknownUserData> = {\n turnDetection?: TurnDetectionMode;\n stt?: STT | STTModelString;\n vad?: VAD;\n llm?: LLM | RealtimeModel | LLMModels;\n tts?: TTS | TTSModelString;\n userData?: UserData;\n voiceOptions?: Partial<VoiceOptions>;\n};\n\nexport class AgentSession<\n UserData = UnknownUserData,\n> extends (EventEmitter as new () => TypedEmitter<AgentSessionCallbacks>) {\n vad?: VAD;\n stt?: STT;\n llm?: LLM | RealtimeModel;\n tts?: TTS;\n turnDetection?: TurnDetectionMode;\n\n readonly options: VoiceOptions;\n\n private agent?: Agent;\n private activity?: AgentActivity;\n private nextActivity?: AgentActivity;\n private started = false;\n private userState: UserState = 'listening';\n\n private roomIO?: RoomIO;\n private logger = log();\n\n private _chatCtx: ChatContext;\n private _userData: UserData | undefined;\n private _agentState: AgentState = 'initializing';\n\n private _input: AgentInput;\n private _output: AgentOutput;\n\n private closingTask: Promise<void> | null = null;\n private userAwayTimer: NodeJS.Timeout | null = null;\n\n /** @internal */\n _recordedEvents: AgentEvent[] = [];\n\n constructor(opts: AgentSessionOptions<UserData>) {\n super();\n\n const {\n vad,\n stt,\n llm,\n tts,\n turnDetection,\n userData,\n voiceOptions = defaultVoiceOptions,\n } = opts;\n\n this.vad = vad;\n\n if (typeof stt === 'string') {\n this.stt = InferenceSTT.fromModelString(stt);\n } else {\n this.stt = stt;\n }\n\n if (typeof llm === 'string') {\n this.llm = InferenceLLM.fromModelString(llm);\n } else {\n this.llm = llm;\n }\n\n if (typeof tts === 'string') {\n this.tts = InferenceTTS.fromModelString(tts);\n } else {\n this.tts = tts;\n }\n\n this.turnDetection = turnDetection;\n this._userData = userData;\n\n // configurable IO\n this._input = new AgentInput(this.onAudioInputChanged);\n this._output = new AgentOutput(this.onAudioOutputChanged, this.onTextOutputChanged);\n\n // This is the \"global\" chat context, it holds the entire conversation history\n this._chatCtx = ChatContext.empty();\n this.options = { ...defaultVoiceOptions, ...voiceOptions };\n\n this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));\n }\n\n emit<K extends keyof AgentSessionCallbacks>(\n event: K,\n ...args: Parameters<AgentSessionCallbacks[K]>\n ): boolean {\n const eventData = args[0] as AgentEvent;\n this._recordedEvents.push(eventData);\n return super.emit(event, ...args);\n }\n\n get input(): AgentInput {\n return this._input;\n }\n\n get output(): AgentOutput {\n return this._output;\n }\n\n get userData(): UserData {\n if (this._userData === undefined) {\n throw new Error('Voice agent userData is not set');\n }\n\n return this._userData;\n }\n\n get history(): ChatContext {\n return this._chatCtx;\n }\n\n set userData(value: UserData) {\n this._userData = value;\n }\n\n async start({\n // TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled\n // TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property\n // TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge\n agent,\n room,\n inputOptions,\n outputOptions,\n record = true,\n }: {\n agent: Agent;\n room: Room;\n inputOptions?: Partial<RoomInputOptions>;\n outputOptions?: Partial<RoomOutputOptions>;\n record?: boolean;\n }): Promise<void> {\n if (this.started) {\n return;\n }\n\n this.agent = agent;\n this._updateAgentState('initializing');\n\n const tasks: Promise<void>[] = [];\n // Check for existing input/output configuration and warn if needed\n if (this.input.audio && inputOptions?.audioEnabled !== false) {\n this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');\n }\n\n if (this.output.audio && outputOptions?.audioEnabled !== false) {\n this.logger.warn(\n 'RoomIO audio output is enabled but output.audio is already set, ignoring..',\n );\n }\n\n if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {\n this.logger.warn(\n 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',\n );\n }\n\n this.roomIO = new RoomIO({\n agentSession: this,\n room,\n inputOptions,\n outputOptions,\n });\n this.roomIO.start();\n\n const ctx = getJobContext();\n if (ctx && ctx.room === room && !room.isConnected) {\n this.logger.debug('Auto-connecting to room via job context');\n tasks.push(ctx.connect());\n }\n\n if (record) {\n if (ctx._primaryAgentSession === undefined) {\n ctx._primaryAgentSession = this;\n } else {\n throw new Error(\n 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',\n );\n }\n }\n\n // TODO(AJS-265): add shutdown callback to job context\n tasks.push(this.updateActivity(this.agent));\n\n await Promise.allSettled(tasks);\n\n // Log used IO configuration\n this.logger.debug(\n `using audio io: ${this.input.audio ? '`' + this.input.audio.constructor.name + '`' : '(none)'} -> \\`AgentSession\\` -> ${this.output.audio ? '`' + this.output.audio.constructor.name + '`' : '(none)'}`,\n );\n\n this.logger.debug(\n `using transcript io: \\`AgentSession\\` -> ${this.output.transcription ? '`' + this.output.transcription.constructor.name + '`' : '(none)'}`,\n );\n\n this.started = true;\n this._updateAgentState('listening');\n }\n\n updateAgent(agent: Agent): void {\n this.agent = agent;\n\n if (this.started) {\n this.updateActivity(agent);\n }\n }\n\n commitUserTurn() {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n\n this.activity.commitUserTurn();\n }\n\n clearUserTurn() {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n this.activity.clearUserTurn();\n }\n\n say(\n text: string | ReadableStream<string>,\n options?: {\n audio?: ReadableStream<AudioFrame>;\n allowInterruptions?: boolean;\n addToChatCtx?: boolean;\n },\n ): SpeechHandle {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n\n return this.activity.say(text, options);\n }\n\n interrupt() {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n return this.activity.interrupt();\n }\n\n generateReply(options?: {\n userInput?: string;\n instructions?: string;\n toolChoice?: ToolChoice;\n allowInterruptions?: boolean;\n }): SpeechHandle {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n\n const userMessage = options?.userInput\n ? new ChatMessage({\n role: 'user',\n content: options.userInput,\n })\n : undefined;\n\n if (this.activity.draining) {\n if (!this.nextActivity) {\n throw new Error('AgentSession is closing, cannot use generateReply()');\n }\n return this.nextActivity.generateReply({ userMessage, ...options });\n }\n\n return this.activity.generateReply({ userMessage, ...options });\n }\n\n private async updateActivity(agent: Agent): Promise<void> {\n // TODO(AJS-129): add lock to agent activity core lifecycle\n this.nextActivity = new AgentActivity(agent, this);\n\n const previousActivity = this.activity;\n\n if (this.activity) {\n await this.activity.drain();\n await this.activity.close();\n }\n\n this.activity = this.nextActivity;\n this.nextActivity = undefined;\n\n this._chatCtx.insert(\n new AgentHandoffItem({\n oldAgentId: previousActivity?.agent.id,\n newAgentId: agent.id,\n }),\n );\n this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');\n\n await this.activity.start();\n\n if (this._input.audio) {\n this.activity.attachAudioInput(this._input.audio.stream);\n }\n }\n\n get chatCtx(): ChatContext {\n return this._chatCtx.copy();\n }\n\n get agentState(): AgentState {\n return this._agentState;\n }\n\n get currentAgent(): Agent {\n if (!this.agent) {\n throw new Error('AgentSession is not running');\n }\n\n return this.agent;\n }\n\n async close(): Promise<void> {\n await this.closeImpl(CloseReason.USER_INITIATED);\n }\n\n /** @internal */\n _closeSoon({\n reason,\n drain = false,\n error = null,\n }: {\n reason: CloseReason;\n drain?: boolean;\n error?: RealtimeModelError | STTError | TTSError | LLMError | null;\n }): void {\n if (this.closingTask) {\n return;\n }\n this.closeImpl(reason, error, drain);\n }\n\n /** @internal */\n _onError(error: RealtimeModelError | STTError | TTSError | LLMError): void {\n if (this.closingTask || error.recoverable) {\n return;\n }\n\n this.logger.error(error, 'AgentSession is closing due to unrecoverable error');\n\n this.closingTask = (async () => {\n await this.closeImpl(CloseReason.ERROR, error);\n })().then(() => {\n this.closingTask = null;\n });\n }\n\n /** @internal */\n _conversationItemAdded(item: ChatMessage): void {\n this._chatCtx.insert(item);\n this.emit(AgentSessionEventTypes.ConversationItemAdded, createConversationItemAddedEvent(item));\n }\n\n /** @internal */\n _updateAgentState(state: AgentState) {\n if (this._agentState === state) {\n return;\n }\n\n // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._agentSpeakingSpan, create tracer.startSpan('agent_speaking') with participant attributes\n // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._agentSpeakingSpan, end and clear this._agentSpeakingSpan\n const oldState = this._agentState;\n this._agentState = state;\n\n // Handle user away timer based on state changes\n if (state === 'listening' && this.userState === 'listening') {\n this._setUserAwayTimer();\n } else {\n this._cancelUserAwayTimer();\n }\n\n this.emit(\n AgentSessionEventTypes.AgentStateChanged,\n createAgentStateChangedEvent(oldState, state),\n );\n }\n\n /** @internal */\n _updateUserState(state: UserState, _lastSpeakingTime?: number) {\n if (this.userState === state) {\n return;\n }\n\n // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._userSpeakingSpan, create tracer.startSpan('user_speaking') with participant attributes\n // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._userSpeakingSpan, end and clear this._userSpeakingSpan\n const oldState = this.userState;\n this.userState = state;\n\n // Handle user away timer based on state changes\n if (state === 'listening' && this._agentState === 'listening') {\n this._setUserAwayTimer();\n } else {\n this._cancelUserAwayTimer();\n }\n\n this.emit(\n AgentSessionEventTypes.UserStateChanged,\n createUserStateChangedEvent(oldState, state),\n );\n }\n\n // -- User changed input/output streams/sinks --\n private onAudioInputChanged(): void {\n if (!this.started) {\n return;\n }\n\n if (this.activity && this._input.audio) {\n this.activity.attachAudioInput(this._input.audio.stream);\n }\n }\n\n private onAudioOutputChanged(): void {}\n\n private onTextOutputChanged(): void {}\n\n private _setUserAwayTimer(): void {\n this._cancelUserAwayTimer();\n\n if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {\n return;\n }\n\n if (this.roomIO && !this.roomIO.isParticipantAvailable) {\n return;\n }\n\n this.userAwayTimer = setTimeout(() => {\n this.logger.debug('User away timeout triggered');\n this._updateUserState('away');\n }, this.options.userAwayTimeout * 1000);\n }\n\n private _cancelUserAwayTimer(): void {\n if (this.userAwayTimer !== null) {\n clearTimeout(this.userAwayTimer);\n this.userAwayTimer = null;\n }\n }\n\n private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {\n if (this.userState === 'away' && ev.isFinal) {\n this.logger.debug('User returned from away state due to speech input');\n this._updateUserState('listening');\n }\n }\n\n private async closeImpl(\n reason: CloseReason,\n error: RealtimeModelError | LLMError | TTSError | STTError | null = null,\n drain: boolean = false,\n ): Promise<void> {\n if (!this.started) {\n return;\n }\n\n this._cancelUserAwayTimer();\n\n if (this.activity) {\n if (!drain) {\n try {\n this.activity.interrupt();\n } catch (error) {\n // uninterruptible speech [copied from python]\n // TODO(shubhra): force interrupt or wait for it to finish?\n // it might be an audio played from the error callback\n }\n }\n await this.activity.drain();\n // wait any uninterruptible speech to finish\n await this.activity.currentSpeech?.waitForPlayout();\n this.activity.detachAudioInput();\n }\n\n // detach the inputs and outputs\n this.input.audio = null;\n this.output.audio = null;\n this.output.transcription = null;\n\n await this.roomIO?.close();\n this.roomIO = undefined;\n\n await this.activity?.close();\n this.activity = undefined;\n\n this.started = false;\n\n this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));\n\n this.userState = 'listening';\n this._agentState = 'initializing';\n\n this.logger.info({ reason, error }, 'AgentSession closed');\n }\n}\n"],"mappings":"AAKA,SAAS,oBAAoB;AAE7B;AAAA,EACE,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AAAA,OAIF;AACP,SAAS,qBAAqB;AAC9B,SAAS,kBAAkB,aAAa,mBAAmB;AAG3D,SAAS,WAAW;AAMpB,SAAS,qBAAqB;AAE9B;AAAA,EAEE;AAAA,EAIA;AAAA,EASA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY,mBAAmB;AACxC,SAAS,cAA6D;AAgBtE,MAAM,sBAAoC;AAAA,EACxC,oBAAoB;AAAA,EACpB,+BAA+B;AAAA,EAC/B,yBAAyB;AAAA,EACzB,sBAAsB;AAAA,EACtB,qBAAqB;AAAA,EACrB,qBAAqB;AAAA,EACrB,cAAc;AAAA,EACd,sBAAsB;AAAA,EACtB,iBAAiB;AACnB;AA0BO,MAAM,qBAEF,aAA+D;AAAA,EACxE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAES;AAAA,EAED;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU;AAAA,EACV,YAAuB;AAAA,EAEvB;AAAA,EACA,SAAS,IAAI;AAAA,EAEb;AAAA,EACA;AAAA,EACA,cAA0B;AAAA,EAE1B;AAAA,EACA;AAAA,EAEA,cAAoC;AAAA,EACpC,gBAAuC;AAAA;AAAA,EAG/C,kBAAgC,CAAC;AAAA,EAEjC,YAAY,MAAqC;AAC/C,UAAM;AAEN,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,eAAe;AAAA,IACjB,IAAI;AAEJ,SAAK,MAAM;AAEX,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,MAAM,aAAa,gBAAgB,GAAG;AAAA,IAC7C,OAAO;AACL,WAAK,MAAM;AAAA,IACb;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,MAAM,aAAa,gBAAgB,GAAG;AAAA,IAC7C,OAAO;AACL,WAAK,MAAM;AAAA,IACb;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,MAAM,aAAa,gBAAgB,GAAG;AAAA,IAC7C,OAAO;AACL,WAAK,MAAM;AAAA,IACb;AAEA,SAAK,gBAAgB;AACrB,SAAK,YAAY;AAGjB,SAAK,SAAS,IAAI,WAAW,KAAK,mBAAmB;AACrD,SAAK,UAAU,IAAI,YAAY,KAAK,sBAAsB,KAAK,mBAAmB;AAGlF,SAAK,WAAW,YAAY,MAAM;AAClC,SAAK,UAAU,EAAE,GAAG,qBAAqB,GAAG,aAAa;AAEzD,SAAK,GAAG,uBAAuB,sBAAsB,KAAK,wBAAwB,KAAK,IAAI,CAAC;AAAA,EAC9F;AAAA,EAEA,KACE,UACG,MACM;AACT,UAAM,YAAY,KAAK,CAAC;AACxB,SAAK,gBAAgB,KAAK,SAAS;AACnC,WAAO,MAAM,KAAK,OAAO,GAAG,IAAI;AAAA,EAClC;AAAA,EAEA,IAAI,QAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,WAAqB;AACvB,QAAI,KAAK,cAAc,QAAW;AAChC,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,OAAiB;AAC5B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAM,MAAM;AAAA;AAAA;AAAA;AAAA,IAIV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS;AAAA,EACX,GAMkB;AAChB,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AAEA,SAAK,QAAQ;AACb,SAAK,kBAAkB,cAAc;AAErC,UAAM,QAAyB,CAAC;AAEhC,QAAI,KAAK,MAAM,UAAS,6CAAc,kBAAiB,OAAO;AAC5D,WAAK,OAAO,KAAK,0EAA0E;AAAA,IAC7F;AAEA,QAAI,KAAK,OAAO,UAAS,+CAAe,kBAAiB,OAAO;AAC9D,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,kBAAiB,+CAAe,0BAAyB,OAAO;AAC9E,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS,IAAI,OAAO;AAAA,MACvB,cAAc;AAAA,MACd;AAAA,MACA;AAAA,MACA;AAAA,IACF,CAAC;AACD,SAAK,OAAO,MAAM;AAElB,UAAM,MAAM,cAAc;AAC1B,QAAI,OAAO,IAAI,SAAS,QAAQ,CAAC,KAAK,aAAa;AACjD,WAAK,OAAO,MAAM,yCAAyC;AAC3D,YAAM,KAAK,IAAI,QAAQ,CAAC;AAAA,IAC1B;AAEA,QAAI,QAAQ;AACV,UAAI,IAAI,yBAAyB,QAAW;AAC1C,YAAI,uBAAuB;AAAA,MAC7B,OAAO;AACL,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,UAAM,KAAK,KAAK,eAAe,KAAK,KAAK,CAAC;AAE1C,UAAM,QAAQ,WAAW,KAAK;AAG9B,SAAK,OAAO;AAAA,MACV,mBAAmB,KAAK,MAAM,QAAQ,MAAM,KAAK,MAAM,MAAM,YAAY,OAAO,MAAM,QAAQ,2BAA2B,KAAK,OAAO,QAAQ,MAAM,KAAK,OAAO,MAAM,YAAY,OAAO,MAAM,QAAQ;AAAA,IACxM;AAEA,SAAK,OAAO;AAAA,MACV,4CAA4C,KAAK,OAAO,gBAAgB,MAAM,KAAK,OAAO,cAAc,YAAY,OAAO,MAAM,QAAQ;AAAA,IAC3I;AAEA,SAAK,UAAU;AACf,SAAK,kBAAkB,WAAW;AAAA,EACpC;AAAA,EAEA,YAAY,OAAoB;AAC9B,SAAK,QAAQ;AAEb,QAAI,KAAK,SAAS;AAChB,WAAK,eAAe,KAAK;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,iBAAiB;AACf,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,SAAK,SAAS,eAAe;AAAA,EAC/B;AAAA,EAEA,gBAAgB;AACd,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AACA,SAAK,SAAS,cAAc;AAAA,EAC9B;AAAA,EAEA,IACE,MACA,SAKc;AACd,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,WAAO,KAAK,SAAS,IAAI,MAAM,OAAO;AAAA,EACxC;AAAA,EAEA,YAAY;AACV,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AACA,WAAO,KAAK,SAAS,UAAU;AAAA,EACjC;AAAA,EAEA,cAAc,SAKG;AACf,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,UAAM,eAAc,mCAAS,aACzB,IAAI,YAAY;AAAA,MACd,MAAM;AAAA,MACN,SAAS,QAAQ;AAAA,IACnB,CAAC,IACD;AAEJ,QAAI,KAAK,SAAS,UAAU;AAC1B,UAAI,CAAC,KAAK,cAAc;AACtB,cAAM,IAAI,MAAM,qDAAqD;AAAA,MACvE;AACA,aAAO,KAAK,aAAa,cAAc,EAAE,aAAa,GAAG,QAAQ,CAAC;AAAA,IACpE;AAEA,WAAO,KAAK,SAAS,cAAc,EAAE,aAAa,GAAG,QAAQ,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,eAAe,OAA6B;AAExD,SAAK,eAAe,IAAI,cAAc,OAAO,IAAI;AAEjD,UAAM,mBAAmB,KAAK;AAE9B,QAAI,KAAK,UAAU;AACjB,YAAM,KAAK,SAAS,MAAM;AAC1B,YAAM,KAAK,SAAS,MAAM;AAAA,IAC5B;AAEA,SAAK,WAAW,KAAK;AACrB,SAAK,eAAe;AAEpB,SAAK,SAAS;AAAA,MACZ,IAAI,iBAAiB;AAAA,QACnB,YAAY,qDAAkB,MAAM;AAAA,QACpC,YAAY,MAAM;AAAA,MACpB,CAAC;AAAA,IACH;AACA,SAAK,OAAO,MAAM,EAAE,kBAAkB,MAAM,GAAG,0CAA0C;AAEzF,UAAM,KAAK,SAAS,MAAM;AAE1B,QAAI,KAAK,OAAO,OAAO;AACrB,WAAK,SAAS,iBAAiB,KAAK,OAAO,MAAM,MAAM;AAAA,IACzD;AAAA,EACF;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK,SAAS,KAAK;AAAA,EAC5B;AAAA,EAEA,IAAI,aAAyB;AAC3B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAsB;AACxB,QAAI,CAAC,KAAK,OAAO;AACf,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,UAAU,YAAY,cAAc;AAAA,EACjD;AAAA;AAAA,EAGA,WAAW;AAAA,IACT;AAAA,IACA,QAAQ;AAAA,IACR,QAAQ;AAAA,EACV,GAIS;AACP,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AACA,SAAK,UAAU,QAAQ,OAAO,KAAK;AAAA,EACrC;AAAA;AAAA,EAGA,SAAS,OAAkE;AACzE,QAAI,KAAK,eAAe,MAAM,aAAa;AACzC;AAAA,IACF;AAEA,SAAK,OAAO,MAAM,OAAO,oDAAoD;AAE7E,SAAK,eAAe,YAAY;AAC9B,YAAM,KAAK,UAAU,YAAY,OAAO,KAAK;AAAA,IAC/C,GAAG,EAAE,KAAK,MAAM;AACd,WAAK,cAAc;AAAA,IACrB,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,uBAAuB,MAAyB;AAC9C,SAAK,SAAS,OAAO,IAAI;AACzB,SAAK,KAAK,uBAAuB,uBAAuB,iCAAiC,IAAI,CAAC;AAAA,EAChG;AAAA;AAAA,EAGA,kBAAkB,OAAmB;AACnC,QAAI,KAAK,gBAAgB,OAAO;AAC9B;AAAA,IACF;AAIA,UAAM,WAAW,KAAK;AACtB,SAAK,cAAc;AAGnB,QAAI,UAAU,eAAe,KAAK,cAAc,aAAa;AAC3D,WAAK,kBAAkB;AAAA,IACzB,OAAO;AACL,WAAK,qBAAqB;AAAA,IAC5B;AAEA,SAAK;AAAA,MACH,uBAAuB;AAAA,MACvB,6BAA6B,UAAU,KAAK;AAAA,IAC9C;AAAA,EACF;AAAA;AAAA,EAGA,iBAAiB,OAAkB,mBAA4B;AAC7D,QAAI,KAAK,cAAc,OAAO;AAC5B;AAAA,IACF;AAIA,UAAM,WAAW,KAAK;AACtB,SAAK,YAAY;AAGjB,QAAI,UAAU,eAAe,KAAK,gBAAgB,aAAa;AAC7D,WAAK,kBAAkB;AAAA,IACzB,OAAO;AACL,WAAK,qBAAqB;AAAA,IAC5B;AAEA,SAAK;AAAA,MACH,uBAAuB;AAAA,MACvB,4BAA4B,UAAU,KAAK;AAAA,IAC7C;AAAA,EACF;AAAA;AAAA,EAGQ,sBAA4B;AAClC,QAAI,CAAC,KAAK,SAAS;AACjB;AAAA,IACF;AAEA,QAAI,KAAK,YAAY,KAAK,OAAO,OAAO;AACtC,WAAK,SAAS,iBAAiB,KAAK,OAAO,MAAM,MAAM;AAAA,IACzD;AAAA,EACF;AAAA,EAEQ,uBAA6B;AAAA,EAAC;AAAA,EAE9B,sBAA4B;AAAA,EAAC;AAAA,EAE7B,oBAA0B;AAChC,SAAK,qBAAqB;AAE1B,QAAI,KAAK,QAAQ,oBAAoB,QAAQ,KAAK,QAAQ,oBAAoB,QAAW;AACvF;AAAA,IACF;AAEA,QAAI,KAAK,UAAU,CAAC,KAAK,OAAO,wBAAwB;AACtD;AAAA,IACF;AAEA,SAAK,gBAAgB,WAAW,MAAM;AACpC,WAAK,OAAO,MAAM,6BAA6B;AAC/C,WAAK,iBAAiB,MAAM;AAAA,IAC9B,GAAG,KAAK,QAAQ,kBAAkB,GAAI;AAAA,EACxC;AAAA,EAEQ,uBAA6B;AACnC,QAAI,KAAK,kBAAkB,MAAM;AAC/B,mBAAa,KAAK,aAAa;AAC/B,WAAK,gBAAgB;AAAA,IACvB;AAAA,EACF;AAAA,EAEQ,wBAAwB,IAAqC;AACnE,QAAI,KAAK,cAAc,UAAU,GAAG,SAAS;AAC3C,WAAK,OAAO,MAAM,mDAAmD;AACrE,WAAK,iBAAiB,WAAW;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,MAAc,UACZ,QACA,QAAoE,MACpE,QAAiB,OACF;AAxiBnB;AAyiBI,QAAI,CAAC,KAAK,SAAS;AACjB;AAAA,IACF;AAEA,SAAK,qBAAqB;AAE1B,QAAI,KAAK,UAAU;AACjB,UAAI,CAAC,OAAO;AACV,YAAI;AACF,eAAK,SAAS,UAAU;AAAA,QAC1B,SAASA,QAAO;AAAA,QAIhB;AAAA,MACF;AACA,YAAM,KAAK,SAAS,MAAM;AAE1B,cAAM,UAAK,SAAS,kBAAd,mBAA6B;AACnC,WAAK,SAAS,iBAAiB;AAAA,IACjC;AAGA,SAAK,MAAM,QAAQ;AACnB,SAAK,OAAO,QAAQ;AACpB,SAAK,OAAO,gBAAgB;AAE5B,YAAM,UAAK,WAAL,mBAAa;AACnB,SAAK,SAAS;AAEd,YAAM,UAAK,aAAL,mBAAe;AACrB,SAAK,WAAW;AAEhB,SAAK,UAAU;AAEf,SAAK,KAAK,uBAAuB,OAAO,iBAAiB,QAAQ,KAAK,CAAC;AAEvE,SAAK,YAAY;AACjB,SAAK,cAAc;AAEnB,SAAK,OAAO,KAAK,EAAE,QAAQ,MAAM,GAAG,qBAAqB;AAAA,EAC3D;AACF;","names":["error"]}
|
|
1
|
+
{"version":3,"sources":["../../src/voice/agent_session.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2024 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type { AudioFrame, Room } from '@livekit/rtc-node';\nimport type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';\nimport type { Context, Span } from '@opentelemetry/api';\nimport { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';\nimport { EventEmitter } from 'node:events';\nimport type { ReadableStream } from 'node:stream/web';\nimport {\n LLM as InferenceLLM,\n STT as InferenceSTT,\n TTS as InferenceTTS,\n type LLMModels,\n type STTModelString,\n type TTSModelString,\n} from '../inference/index.js';\nimport { getJobContext } from '../job.js';\nimport type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';\nimport { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';\nimport type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';\nimport type { LLMError } from '../llm/llm.js';\nimport { log } from '../log.js';\nimport type { STT } from '../stt/index.js';\nimport type { STTError } from '../stt/stt.js';\nimport { traceTypes, tracer } from '../telemetry/index.js';\nimport type { TTS, TTSError } from '../tts/tts.js';\nimport type { VAD } from '../vad.js';\nimport type { Agent } from './agent.js';\nimport { AgentActivity } from './agent_activity.js';\nimport type { _TurnDetector } from './audio_recognition.js';\nimport {\n type AgentEvent,\n AgentSessionEventTypes,\n type AgentState,\n type AgentStateChangedEvent,\n type CloseEvent,\n CloseReason,\n type ConversationItemAddedEvent,\n type ErrorEvent,\n type FunctionToolsExecutedEvent,\n type MetricsCollectedEvent,\n type SpeechCreatedEvent,\n type UserInputTranscribedEvent,\n type UserState,\n type UserStateChangedEvent,\n createAgentStateChangedEvent,\n createCloseEvent,\n createConversationItemAddedEvent,\n createUserStateChangedEvent,\n} from './events.js';\nimport { AgentInput, AgentOutput } from './io.js';\nimport { RoomIO, type RoomInputOptions, type RoomOutputOptions } from './room_io/index.js';\nimport type { UnknownUserData } from './run_context.js';\nimport type { SpeechHandle } from './speech_handle.js';\n\nexport interface VoiceOptions {\n allowInterruptions: boolean;\n discardAudioIfUninterruptible: boolean;\n minInterruptionDuration: number;\n minInterruptionWords: number;\n minEndpointingDelay: number;\n maxEndpointingDelay: number;\n maxToolSteps: number;\n preemptiveGeneration: boolean;\n userAwayTimeout?: number | null;\n}\n\nconst defaultVoiceOptions: VoiceOptions = {\n allowInterruptions: true,\n discardAudioIfUninterruptible: true,\n minInterruptionDuration: 500,\n minInterruptionWords: 0,\n minEndpointingDelay: 500,\n maxEndpointingDelay: 6000,\n maxToolSteps: 3,\n preemptiveGeneration: false,\n userAwayTimeout: 15.0,\n} as const;\n\nexport type TurnDetectionMode = 'stt' | 'vad' | 'realtime_llm' | 'manual' | _TurnDetector;\n\nexport type AgentSessionCallbacks = {\n [AgentSessionEventTypes.UserInputTranscribed]: (ev: UserInputTranscribedEvent) => void;\n [AgentSessionEventTypes.AgentStateChanged]: (ev: AgentStateChangedEvent) => void;\n [AgentSessionEventTypes.UserStateChanged]: (ev: UserStateChangedEvent) => void;\n [AgentSessionEventTypes.ConversationItemAdded]: (ev: ConversationItemAddedEvent) => void;\n [AgentSessionEventTypes.FunctionToolsExecuted]: (ev: FunctionToolsExecutedEvent) => void;\n [AgentSessionEventTypes.MetricsCollected]: (ev: MetricsCollectedEvent) => void;\n [AgentSessionEventTypes.SpeechCreated]: (ev: SpeechCreatedEvent) => void;\n [AgentSessionEventTypes.Error]: (ev: ErrorEvent) => void;\n [AgentSessionEventTypes.Close]: (ev: CloseEvent) => void;\n};\n\nexport type AgentSessionOptions<UserData = UnknownUserData> = {\n turnDetection?: TurnDetectionMode;\n stt?: STT | STTModelString;\n vad?: VAD;\n llm?: LLM | RealtimeModel | LLMModels;\n tts?: TTS | TTSModelString;\n userData?: UserData;\n voiceOptions?: Partial<VoiceOptions>;\n};\n\nexport class AgentSession<\n UserData = UnknownUserData,\n> extends (EventEmitter as new () => TypedEmitter<AgentSessionCallbacks>) {\n vad?: VAD;\n stt?: STT;\n llm?: LLM | RealtimeModel;\n tts?: TTS;\n turnDetection?: TurnDetectionMode;\n\n readonly options: VoiceOptions;\n\n private agent?: Agent;\n private activity?: AgentActivity;\n private nextActivity?: AgentActivity;\n private started = false;\n private userState: UserState = 'listening';\n\n private roomIO?: RoomIO;\n private logger = log();\n\n private _chatCtx: ChatContext;\n private _userData: UserData | undefined;\n private _agentState: AgentState = 'initializing';\n\n private _input: AgentInput;\n private _output: AgentOutput;\n\n private closingTask: Promise<void> | null = null;\n private userAwayTimer: NodeJS.Timeout | null = null;\n\n private sessionSpan?: Span;\n private userSpeakingSpan?: Span;\n private agentSpeakingSpan?: Span;\n\n /** @internal */\n rootSpanContext?: Context;\n\n /** @internal */\n _recordedEvents: AgentEvent[] = [];\n\n /** @internal */\n _enableRecording = false;\n\n /** @internal - Timestamp when the session started (milliseconds) */\n _startedAt?: number;\n\n constructor(opts: AgentSessionOptions<UserData>) {\n super();\n\n const {\n vad,\n stt,\n llm,\n tts,\n turnDetection,\n userData,\n voiceOptions = defaultVoiceOptions,\n } = opts;\n\n this.vad = vad;\n\n if (typeof stt === 'string') {\n this.stt = InferenceSTT.fromModelString(stt);\n } else {\n this.stt = stt;\n }\n\n if (typeof llm === 'string') {\n this.llm = InferenceLLM.fromModelString(llm);\n } else {\n this.llm = llm;\n }\n\n if (typeof tts === 'string') {\n this.tts = InferenceTTS.fromModelString(tts);\n } else {\n this.tts = tts;\n }\n\n this.turnDetection = turnDetection;\n this._userData = userData;\n\n // configurable IO\n this._input = new AgentInput(this.onAudioInputChanged);\n this._output = new AgentOutput(this.onAudioOutputChanged, this.onTextOutputChanged);\n\n // This is the \"global\" chat context, it holds the entire conversation history\n this._chatCtx = ChatContext.empty();\n this.options = { ...defaultVoiceOptions, ...voiceOptions };\n\n this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);\n this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);\n }\n\n emit<K extends keyof AgentSessionCallbacks>(\n event: K,\n ...args: Parameters<AgentSessionCallbacks[K]>\n ): boolean {\n const eventData = args[0] as AgentEvent;\n this._recordedEvents.push(eventData);\n return super.emit(event, ...args);\n }\n\n get input(): AgentInput {\n return this._input;\n }\n\n get output(): AgentOutput {\n return this._output;\n }\n\n get userData(): UserData {\n if (this._userData === undefined) {\n throw new Error('Voice agent userData is not set');\n }\n\n return this._userData;\n }\n\n get history(): ChatContext {\n return this._chatCtx;\n }\n\n set userData(value: UserData) {\n this._userData = value;\n }\n\n private async _startImpl({\n agent,\n room,\n inputOptions,\n outputOptions,\n record,\n span,\n }: {\n agent: Agent;\n room: Room;\n inputOptions?: Partial<RoomInputOptions>;\n outputOptions?: Partial<RoomOutputOptions>;\n record: boolean;\n span: Span;\n }): Promise<void> {\n span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);\n\n this.agent = agent;\n this._updateAgentState('initializing');\n\n const tasks: Promise<void>[] = [];\n // Check for existing input/output configuration and warn if needed\n if (this.input.audio && inputOptions?.audioEnabled !== false) {\n this.logger.warn('RoomIO audio input is enabled but input.audio is already set, ignoring..');\n }\n\n if (this.output.audio && outputOptions?.audioEnabled !== false) {\n this.logger.warn(\n 'RoomIO audio output is enabled but output.audio is already set, ignoring..',\n );\n }\n\n if (this.output.transcription && outputOptions?.transcriptionEnabled !== false) {\n this.logger.warn(\n 'RoomIO transcription output is enabled but output.transcription is already set, ignoring..',\n );\n }\n\n this.roomIO = new RoomIO({\n agentSession: this,\n room,\n inputOptions,\n outputOptions,\n });\n this.roomIO.start();\n\n const ctx = getJobContext();\n if (ctx && ctx.room === room && !room.isConnected) {\n this.logger.debug('Auto-connecting to room via job context');\n tasks.push(ctx.connect());\n }\n\n if (record) {\n if (ctx._primaryAgentSession === undefined) {\n ctx._primaryAgentSession = this;\n } else {\n throw new Error(\n 'Only one `AgentSession` can be the primary at a time. If you want to ignore primary designation, use session.start(record=False).',\n );\n }\n }\n\n // TODO(AJS-265): add shutdown callback to job context\n tasks.push(this.updateActivity(this.agent));\n\n await Promise.allSettled(tasks);\n\n // Log used IO configuration\n this.logger.debug(\n `using audio io: ${this.input.audio ? '`' + this.input.audio.constructor.name + '`' : '(none)'} -> \\`AgentSession\\` -> ${this.output.audio ? '`' + this.output.audio.constructor.name + '`' : '(none)'}`,\n );\n\n this.logger.debug(\n `using transcript io: \\`AgentSession\\` -> ${this.output.transcription ? '`' + this.output.transcription.constructor.name + '`' : '(none)'}`,\n );\n\n this.started = true;\n this._startedAt = Date.now();\n this._updateAgentState('listening');\n }\n\n async start({\n agent,\n room,\n inputOptions,\n outputOptions,\n record = true,\n }: {\n agent: Agent;\n room: Room;\n inputOptions?: Partial<RoomInputOptions>;\n outputOptions?: Partial<RoomOutputOptions>;\n record?: boolean;\n }): Promise<void> {\n if (this.started) {\n return;\n }\n\n const ctx = getJobContext();\n\n record = record ?? ctx.info.job.enableRecording;\n this._enableRecording = record;\n\n this.logger.info(\n { record, enableRecording: ctx.info.job.enableRecording },\n 'Configuring session recording',\n );\n\n if (this._enableRecording) {\n await ctx.initRecording();\n }\n\n // Create agent_session as a ROOT span (new trace) to match Python behavior\n // This creates a separate trace for better cloud dashboard organization\n this.sessionSpan = tracer.startSpan({\n name: 'agent_session',\n context: ROOT_CONTEXT,\n });\n\n // Set the session span as the active span in the context\n // This ensures all child spans (agent_turn, user_turn, etc.) are parented to it\n this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);\n\n await this._startImpl({\n agent,\n room,\n inputOptions,\n outputOptions,\n record,\n span: this.sessionSpan,\n });\n }\n\n updateAgent(agent: Agent): void {\n this.agent = agent;\n\n if (this.started) {\n this.updateActivity(agent);\n }\n }\n\n commitUserTurn() {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n\n this.activity.commitUserTurn();\n }\n\n clearUserTurn() {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n this.activity.clearUserTurn();\n }\n\n say(\n text: string | ReadableStream<string>,\n options?: {\n audio?: ReadableStream<AudioFrame>;\n allowInterruptions?: boolean;\n addToChatCtx?: boolean;\n },\n ): SpeechHandle {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n\n return this.activity.say(text, options);\n }\n\n interrupt() {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n return this.activity.interrupt();\n }\n\n generateReply(options?: {\n userInput?: string;\n instructions?: string;\n toolChoice?: ToolChoice;\n allowInterruptions?: boolean;\n }): SpeechHandle {\n if (!this.activity) {\n throw new Error('AgentSession is not running');\n }\n\n const userMessage = options?.userInput\n ? new ChatMessage({\n role: 'user',\n content: options.userInput,\n })\n : undefined;\n\n if (this.activity.draining) {\n if (!this.nextActivity) {\n throw new Error('AgentSession is closing, cannot use generateReply()');\n }\n return this.nextActivity.generateReply({ userMessage, ...options });\n }\n\n return this.activity.generateReply({ userMessage, ...options });\n }\n\n private async updateActivity(agent: Agent): Promise<void> {\n const runWithContext = async () => {\n // TODO(AJS-129): add lock to agent activity core lifecycle\n this.nextActivity = new AgentActivity(agent, this);\n\n const previousActivity = this.activity;\n\n if (this.activity) {\n await this.activity.drain();\n await this.activity.close();\n }\n\n this.activity = this.nextActivity;\n this.nextActivity = undefined;\n\n this._chatCtx.insert(\n new AgentHandoffItem({\n oldAgentId: previousActivity?.agent.id,\n newAgentId: agent.id,\n }),\n );\n this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');\n\n await this.activity.start();\n\n if (this._input.audio) {\n this.activity.attachAudioInput(this._input.audio.stream);\n }\n };\n\n // Run within session span context if available\n if (this.rootSpanContext) {\n return otelContext.with(this.rootSpanContext, runWithContext);\n }\n\n return runWithContext();\n }\n\n get chatCtx(): ChatContext {\n return this._chatCtx.copy();\n }\n\n get agentState(): AgentState {\n return this._agentState;\n }\n\n get currentAgent(): Agent {\n if (!this.agent) {\n throw new Error('AgentSession is not running');\n }\n\n return this.agent;\n }\n\n async close(): Promise<void> {\n await this.closeImpl(CloseReason.USER_INITIATED);\n }\n\n /** @internal */\n _closeSoon({\n reason,\n drain = false,\n error = null,\n }: {\n reason: CloseReason;\n drain?: boolean;\n error?: RealtimeModelError | STTError | TTSError | LLMError | null;\n }): void {\n if (this.closingTask) {\n return;\n }\n this.closeImpl(reason, error, drain);\n }\n\n /** @internal */\n _onError(error: RealtimeModelError | STTError | TTSError | LLMError): void {\n if (this.closingTask || error.recoverable) {\n return;\n }\n\n this.logger.error(error, 'AgentSession is closing due to unrecoverable error');\n\n this.closingTask = (async () => {\n await this.closeImpl(CloseReason.ERROR, error);\n })().then(() => {\n this.closingTask = null;\n });\n }\n\n /** @internal */\n _conversationItemAdded(item: ChatMessage): void {\n this._chatCtx.insert(item);\n this.emit(AgentSessionEventTypes.ConversationItemAdded, createConversationItemAddedEvent(item));\n }\n\n /** @internal */\n _toolItemsAdded(items: (FunctionCall | FunctionCallOutput)[]): void {\n this._chatCtx.insert(items);\n }\n\n /** @internal */\n _updateAgentState(state: AgentState) {\n if (this._agentState === state) {\n return;\n }\n\n if (state === 'speaking') {\n // TODO(brian): PR4 - Track error counts\n\n if (this.agentSpeakingSpan === undefined) {\n this.agentSpeakingSpan = tracer.startSpan({\n name: 'agent_speaking',\n context: this.rootSpanContext,\n });\n\n // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available\n // (Ref: Python agent_session.py line 1161-1164)\n }\n } else if (this.agentSpeakingSpan !== undefined) {\n // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available\n this.agentSpeakingSpan.end();\n this.agentSpeakingSpan = undefined;\n }\n\n const oldState = this._agentState;\n this._agentState = state;\n\n // Handle user away timer based on state changes\n if (state === 'listening' && this.userState === 'listening') {\n this._setUserAwayTimer();\n } else {\n this._cancelUserAwayTimer();\n }\n\n this.emit(\n AgentSessionEventTypes.AgentStateChanged,\n createAgentStateChangedEvent(oldState, state),\n );\n }\n\n /** @internal */\n _updateUserState(state: UserState, _lastSpeakingTime?: number) {\n if (this.userState === state) {\n return;\n }\n\n if (state === 'speaking' && this.userSpeakingSpan === undefined) {\n this.userSpeakingSpan = tracer.startSpan({\n name: 'user_speaking',\n context: this.rootSpanContext,\n });\n\n // TODO(brian): PR4 - Set participant attributes if roomIO.linkedParticipant is available\n // (Ref: Python agent_session.py line 1192-1195)\n } else if (this.userSpeakingSpan !== undefined) {\n // TODO(brian): PR4 - Set ATTR_END_TIME attribute with lastSpeakingTime if available\n this.userSpeakingSpan.end();\n this.userSpeakingSpan = undefined;\n }\n\n const oldState = this.userState;\n this.userState = state;\n\n // Handle user away timer based on state changes\n if (state === 'listening' && this._agentState === 'listening') {\n this._setUserAwayTimer();\n } else {\n this._cancelUserAwayTimer();\n }\n\n this.emit(\n AgentSessionEventTypes.UserStateChanged,\n createUserStateChangedEvent(oldState, state),\n );\n }\n\n // -- User changed input/output streams/sinks --\n private onAudioInputChanged(): void {\n if (!this.started) {\n return;\n }\n\n if (this.activity && this._input.audio) {\n this.activity.attachAudioInput(this._input.audio.stream);\n }\n }\n\n private onAudioOutputChanged(): void {}\n\n private onTextOutputChanged(): void {}\n\n private _setUserAwayTimer(): void {\n this._cancelUserAwayTimer();\n\n if (this.options.userAwayTimeout === null || this.options.userAwayTimeout === undefined) {\n return;\n }\n\n if (this.roomIO && !this.roomIO.isParticipantAvailable) {\n return;\n }\n\n this.userAwayTimer = setTimeout(() => {\n this.logger.debug('User away timeout triggered');\n this._updateUserState('away');\n }, this.options.userAwayTimeout * 1000);\n }\n\n private _cancelUserAwayTimer(): void {\n if (this.userAwayTimer !== null) {\n clearTimeout(this.userAwayTimer);\n this.userAwayTimer = null;\n }\n }\n\n private _onUserInputTranscribed(ev: UserInputTranscribedEvent): void {\n if (this.userState === 'away' && ev.isFinal) {\n this.logger.debug('User returned from away state due to speech input');\n this._updateUserState('listening');\n }\n }\n\n private async closeImpl(\n reason: CloseReason,\n error: RealtimeModelError | LLMError | TTSError | STTError | null = null,\n drain: boolean = false,\n ): Promise<void> {\n if (this.rootSpanContext) {\n return otelContext.with(this.rootSpanContext, async () => {\n await this.closeImplInner(reason, error, drain);\n });\n }\n\n return this.closeImplInner(reason, error, drain);\n }\n\n private async closeImplInner(\n reason: CloseReason,\n error: RealtimeModelError | LLMError | TTSError | STTError | null = null,\n drain: boolean = false,\n ): Promise<void> {\n if (!this.started) {\n return;\n }\n\n this._cancelUserAwayTimer();\n this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);\n\n if (this.activity) {\n if (!drain) {\n try {\n this.activity.interrupt();\n } catch (error) {\n // TODO(shubhra): force interrupt or wait for it to finish?\n // it might be an audio played from the error callback\n }\n }\n await this.activity.drain();\n // wait any uninterruptible speech to finish\n await this.activity.currentSpeech?.waitForPlayout();\n this.activity.detachAudioInput();\n }\n\n // detach the inputs and outputs\n this.input.audio = null;\n this.output.audio = null;\n this.output.transcription = null;\n\n await this.roomIO?.close();\n this.roomIO = undefined;\n\n await this.activity?.close();\n this.activity = undefined;\n\n if (this.sessionSpan) {\n this.sessionSpan.end();\n this.sessionSpan = undefined;\n }\n\n if (this.userSpeakingSpan) {\n this.userSpeakingSpan.end();\n this.userSpeakingSpan = undefined;\n }\n\n if (this.agentSpeakingSpan) {\n this.agentSpeakingSpan.end();\n this.agentSpeakingSpan = undefined;\n }\n\n this.started = false;\n\n this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));\n\n this.userState = 'listening';\n this._agentState = 'initializing';\n this.rootSpanContext = undefined;\n\n this.logger.info({ reason, error }, 'AgentSession closed');\n }\n}\n"],"mappings":"AAMA,SAAS,cAAc,WAAW,aAAa,aAAa;AAC5D,SAAS,oBAAoB;AAE7B;AAAA,EACE,OAAO;AAAA,EACP,OAAO;AAAA,EACP,OAAO;AAAA,OAIF;AACP,SAAS,qBAAqB;AAE9B,SAAS,kBAAkB,aAAa,mBAAmB;AAG3D,SAAS,WAAW;AAGpB,SAAS,YAAY,cAAc;AAInC,SAAS,qBAAqB;AAE9B;AAAA,EAEE;AAAA,EAIA;AAAA,EASA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP,SAAS,YAAY,mBAAmB;AACxC,SAAS,cAA6D;AAgBtE,MAAM,sBAAoC;AAAA,EACxC,oBAAoB;AAAA,EACpB,+BAA+B;AAAA,EAC/B,yBAAyB;AAAA,EACzB,sBAAsB;AAAA,EACtB,qBAAqB;AAAA,EACrB,qBAAqB;AAAA,EACrB,cAAc;AAAA,EACd,sBAAsB;AAAA,EACtB,iBAAiB;AACnB;AA0BO,MAAM,qBAEF,aAA+D;AAAA,EACxE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAES;AAAA,EAED;AAAA,EACA;AAAA,EACA;AAAA,EACA,UAAU;AAAA,EACV,YAAuB;AAAA,EAEvB;AAAA,EACA,SAAS,IAAI;AAAA,EAEb;AAAA,EACA;AAAA,EACA,cAA0B;AAAA,EAE1B;AAAA,EACA;AAAA,EAEA,cAAoC;AAAA,EACpC,gBAAuC;AAAA,EAEvC;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EAGR;AAAA;AAAA,EAGA,kBAAgC,CAAC;AAAA;AAAA,EAGjC,mBAAmB;AAAA;AAAA,EAGnB;AAAA,EAEA,YAAY,MAAqC;AAC/C,UAAM;AAEN,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,eAAe;AAAA,IACjB,IAAI;AAEJ,SAAK,MAAM;AAEX,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,MAAM,aAAa,gBAAgB,GAAG;AAAA,IAC7C,OAAO;AACL,WAAK,MAAM;AAAA,IACb;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,MAAM,aAAa,gBAAgB,GAAG;AAAA,IAC7C,OAAO;AACL,WAAK,MAAM;AAAA,IACb;AAEA,QAAI,OAAO,QAAQ,UAAU;AAC3B,WAAK,MAAM,aAAa,gBAAgB,GAAG;AAAA,IAC7C,OAAO;AACL,WAAK,MAAM;AAAA,IACb;AAEA,SAAK,gBAAgB;AACrB,SAAK,YAAY;AAGjB,SAAK,SAAS,IAAI,WAAW,KAAK,mBAAmB;AACrD,SAAK,UAAU,IAAI,YAAY,KAAK,sBAAsB,KAAK,mBAAmB;AAGlF,SAAK,WAAW,YAAY,MAAM;AAClC,SAAK,UAAU,EAAE,GAAG,qBAAqB,GAAG,aAAa;AAEzD,SAAK,0BAA0B,KAAK,wBAAwB,KAAK,IAAI;AACrE,SAAK,GAAG,uBAAuB,sBAAsB,KAAK,uBAAuB;AAAA,EACnF;AAAA,EAEA,KACE,UACG,MACM;AACT,UAAM,YAAY,KAAK,CAAC;AACxB,SAAK,gBAAgB,KAAK,SAAS;AACnC,WAAO,MAAM,KAAK,OAAO,GAAG,IAAI;AAAA,EAClC;AAAA,EAEA,IAAI,QAAoB;AACtB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAsB;AACxB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,WAAqB;AACvB,QAAI,KAAK,cAAc,QAAW;AAChC,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,SAAS,OAAiB;AAC5B,SAAK,YAAY;AAAA,EACnB;AAAA,EAEA,MAAc,WAAW;AAAA,IACvB;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,GAOkB;AAChB,SAAK,aAAa,WAAW,kBAAkB,MAAM,EAAE;AAEvD,SAAK,QAAQ;AACb,SAAK,kBAAkB,cAAc;AAErC,UAAM,QAAyB,CAAC;AAEhC,QAAI,KAAK,MAAM,UAAS,6CAAc,kBAAiB,OAAO;AAC5D,WAAK,OAAO,KAAK,0EAA0E;AAAA,IAC7F;AAEA,QAAI,KAAK,OAAO,UAAS,+CAAe,kBAAiB,OAAO;AAC9D,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AAAA,IACF;AAEA,QAAI,KAAK,OAAO,kBAAiB,+CAAe,0BAAyB,OAAO;AAC9E,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AAAA,IACF;AAEA,SAAK,SAAS,IAAI,OAAO;AAAA,MACvB,cAAc;AAAA,MACd;AAAA,MACA;AAAA,MACA;AAAA,IACF,CAAC;AACD,SAAK,OAAO,MAAM;AAElB,UAAM,MAAM,cAAc;AAC1B,QAAI,OAAO,IAAI,SAAS,QAAQ,CAAC,KAAK,aAAa;AACjD,WAAK,OAAO,MAAM,yCAAyC;AAC3D,YAAM,KAAK,IAAI,QAAQ,CAAC;AAAA,IAC1B;AAEA,QAAI,QAAQ;AACV,UAAI,IAAI,yBAAyB,QAAW;AAC1C,YAAI,uBAAuB;AAAA,MAC7B,OAAO;AACL,cAAM,IAAI;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,UAAM,KAAK,KAAK,eAAe,KAAK,KAAK,CAAC;AAE1C,UAAM,QAAQ,WAAW,KAAK;AAG9B,SAAK,OAAO;AAAA,MACV,mBAAmB,KAAK,MAAM,QAAQ,MAAM,KAAK,MAAM,MAAM,YAAY,OAAO,MAAM,QAAQ,2BAA2B,KAAK,OAAO,QAAQ,MAAM,KAAK,OAAO,MAAM,YAAY,OAAO,MAAM,QAAQ;AAAA,IACxM;AAEA,SAAK,OAAO;AAAA,MACV,4CAA4C,KAAK,OAAO,gBAAgB,MAAM,KAAK,OAAO,cAAc,YAAY,OAAO,MAAM,QAAQ;AAAA,IAC3I;AAEA,SAAK,UAAU;AACf,SAAK,aAAa,KAAK,IAAI;AAC3B,SAAK,kBAAkB,WAAW;AAAA,EACpC;AAAA,EAEA,MAAM,MAAM;AAAA,IACV;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS;AAAA,EACX,GAMkB;AAChB,QAAI,KAAK,SAAS;AAChB;AAAA,IACF;AAEA,UAAM,MAAM,cAAc;AAE1B,aAAS,UAAU,IAAI,KAAK,IAAI;AAChC,SAAK,mBAAmB;AAExB,SAAK,OAAO;AAAA,MACV,EAAE,QAAQ,iBAAiB,IAAI,KAAK,IAAI,gBAAgB;AAAA,MACxD;AAAA,IACF;AAEA,QAAI,KAAK,kBAAkB;AACzB,YAAM,IAAI,cAAc;AAAA,IAC1B;AAIA,SAAK,cAAc,OAAO,UAAU;AAAA,MAClC,MAAM;AAAA,MACN,SAAS;AAAA,IACX,CAAC;AAID,SAAK,kBAAkB,MAAM,QAAQ,cAAc,KAAK,WAAW;AAEnE,UAAM,KAAK,WAAW;AAAA,MACpB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,MAAM,KAAK;AAAA,IACb,CAAC;AAAA,EACH;AAAA,EAEA,YAAY,OAAoB;AAC9B,SAAK,QAAQ;AAEb,QAAI,KAAK,SAAS;AAChB,WAAK,eAAe,KAAK;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,iBAAiB;AACf,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,SAAK,SAAS,eAAe;AAAA,EAC/B;AAAA,EAEA,gBAAgB;AACd,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AACA,SAAK,SAAS,cAAc;AAAA,EAC9B;AAAA,EAEA,IACE,MACA,SAKc;AACd,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,WAAO,KAAK,SAAS,IAAI,MAAM,OAAO;AAAA,EACxC;AAAA,EAEA,YAAY;AACV,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AACA,WAAO,KAAK,SAAS,UAAU;AAAA,EACjC;AAAA,EAEA,cAAc,SAKG;AACf,QAAI,CAAC,KAAK,UAAU;AAClB,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,UAAM,eAAc,mCAAS,aACzB,IAAI,YAAY;AAAA,MACd,MAAM;AAAA,MACN,SAAS,QAAQ;AAAA,IACnB,CAAC,IACD;AAEJ,QAAI,KAAK,SAAS,UAAU;AAC1B,UAAI,CAAC,KAAK,cAAc;AACtB,cAAM,IAAI,MAAM,qDAAqD;AAAA,MACvE;AACA,aAAO,KAAK,aAAa,cAAc,EAAE,aAAa,GAAG,QAAQ,CAAC;AAAA,IACpE;AAEA,WAAO,KAAK,SAAS,cAAc,EAAE,aAAa,GAAG,QAAQ,CAAC;AAAA,EAChE;AAAA,EAEA,MAAc,eAAe,OAA6B;AACxD,UAAM,iBAAiB,YAAY;AAEjC,WAAK,eAAe,IAAI,cAAc,OAAO,IAAI;AAEjD,YAAM,mBAAmB,KAAK;AAE9B,UAAI,KAAK,UAAU;AACjB,cAAM,KAAK,SAAS,MAAM;AAC1B,cAAM,KAAK,SAAS,MAAM;AAAA,MAC5B;AAEA,WAAK,WAAW,KAAK;AACrB,WAAK,eAAe;AAEpB,WAAK,SAAS;AAAA,QACZ,IAAI,iBAAiB;AAAA,UACnB,YAAY,qDAAkB,MAAM;AAAA,UACpC,YAAY,MAAM;AAAA,QACpB,CAAC;AAAA,MACH;AACA,WAAK,OAAO,MAAM,EAAE,kBAAkB,MAAM,GAAG,0CAA0C;AAEzF,YAAM,KAAK,SAAS,MAAM;AAE1B,UAAI,KAAK,OAAO,OAAO;AACrB,aAAK,SAAS,iBAAiB,KAAK,OAAO,MAAM,MAAM;AAAA,MACzD;AAAA,IACF;AAGA,QAAI,KAAK,iBAAiB;AACxB,aAAO,YAAY,KAAK,KAAK,iBAAiB,cAAc;AAAA,IAC9D;AAEA,WAAO,eAAe;AAAA,EACxB;AAAA,EAEA,IAAI,UAAuB;AACzB,WAAO,KAAK,SAAS,KAAK;AAAA,EAC5B;AAAA,EAEA,IAAI,aAAyB;AAC3B,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,IAAI,eAAsB;AACxB,QAAI,CAAC,KAAK,OAAO;AACf,YAAM,IAAI,MAAM,6BAA6B;AAAA,IAC/C;AAEA,WAAO,KAAK;AAAA,EACd;AAAA,EAEA,MAAM,QAAuB;AAC3B,UAAM,KAAK,UAAU,YAAY,cAAc;AAAA,EACjD;AAAA;AAAA,EAGA,WAAW;AAAA,IACT;AAAA,IACA,QAAQ;AAAA,IACR,QAAQ;AAAA,EACV,GAIS;AACP,QAAI,KAAK,aAAa;AACpB;AAAA,IACF;AACA,SAAK,UAAU,QAAQ,OAAO,KAAK;AAAA,EACrC;AAAA;AAAA,EAGA,SAAS,OAAkE;AACzE,QAAI,KAAK,eAAe,MAAM,aAAa;AACzC;AAAA,IACF;AAEA,SAAK,OAAO,MAAM,OAAO,oDAAoD;AAE7E,SAAK,eAAe,YAAY;AAC9B,YAAM,KAAK,UAAU,YAAY,OAAO,KAAK;AAAA,IAC/C,GAAG,EAAE,KAAK,MAAM;AACd,WAAK,cAAc;AAAA,IACrB,CAAC;AAAA,EACH;AAAA;AAAA,EAGA,uBAAuB,MAAyB;AAC9C,SAAK,SAAS,OAAO,IAAI;AACzB,SAAK,KAAK,uBAAuB,uBAAuB,iCAAiC,IAAI,CAAC;AAAA,EAChG;AAAA;AAAA,EAGA,gBAAgB,OAAoD;AAClE,SAAK,SAAS,OAAO,KAAK;AAAA,EAC5B;AAAA;AAAA,EAGA,kBAAkB,OAAmB;AACnC,QAAI,KAAK,gBAAgB,OAAO;AAC9B;AAAA,IACF;AAEA,QAAI,UAAU,YAAY;AAGxB,UAAI,KAAK,sBAAsB,QAAW;AACxC,aAAK,oBAAoB,OAAO,UAAU;AAAA,UACxC,MAAM;AAAA,UACN,SAAS,KAAK;AAAA,QAChB,CAAC;AAAA,MAIH;AAAA,IACF,WAAW,KAAK,sBAAsB,QAAW;AAE/C,WAAK,kBAAkB,IAAI;AAC3B,WAAK,oBAAoB;AAAA,IAC3B;AAEA,UAAM,WAAW,KAAK;AACtB,SAAK,cAAc;AAGnB,QAAI,UAAU,eAAe,KAAK,cAAc,aAAa;AAC3D,WAAK,kBAAkB;AAAA,IACzB,OAAO;AACL,WAAK,qBAAqB;AAAA,IAC5B;AAEA,SAAK;AAAA,MACH,uBAAuB;AAAA,MACvB,6BAA6B,UAAU,KAAK;AAAA,IAC9C;AAAA,EACF;AAAA;AAAA,EAGA,iBAAiB,OAAkB,mBAA4B;AAC7D,QAAI,KAAK,cAAc,OAAO;AAC5B;AAAA,IACF;AAEA,QAAI,UAAU,cAAc,KAAK,qBAAqB,QAAW;AAC/D,WAAK,mBAAmB,OAAO,UAAU;AAAA,QACvC,MAAM;AAAA,QACN,SAAS,KAAK;AAAA,MAChB,CAAC;AAAA,IAIH,WAAW,KAAK,qBAAqB,QAAW;AAE9C,WAAK,iBAAiB,IAAI;AAC1B,WAAK,mBAAmB;AAAA,IAC1B;AAEA,UAAM,WAAW,KAAK;AACtB,SAAK,YAAY;AAGjB,QAAI,UAAU,eAAe,KAAK,gBAAgB,aAAa;AAC7D,WAAK,kBAAkB;AAAA,IACzB,OAAO;AACL,WAAK,qBAAqB;AAAA,IAC5B;AAEA,SAAK;AAAA,MACH,uBAAuB;AAAA,MACvB,4BAA4B,UAAU,KAAK;AAAA,IAC7C;AAAA,EACF;AAAA;AAAA,EAGQ,sBAA4B;AAClC,QAAI,CAAC,KAAK,SAAS;AACjB;AAAA,IACF;AAEA,QAAI,KAAK,YAAY,KAAK,OAAO,OAAO;AACtC,WAAK,SAAS,iBAAiB,KAAK,OAAO,MAAM,MAAM;AAAA,IACzD;AAAA,EACF;AAAA,EAEQ,uBAA6B;AAAA,EAAC;AAAA,EAE9B,sBAA4B;AAAA,EAAC;AAAA,EAE7B,oBAA0B;AAChC,SAAK,qBAAqB;AAE1B,QAAI,KAAK,QAAQ,oBAAoB,QAAQ,KAAK,QAAQ,oBAAoB,QAAW;AACvF;AAAA,IACF;AAEA,QAAI,KAAK,UAAU,CAAC,KAAK,OAAO,wBAAwB;AACtD;AAAA,IACF;AAEA,SAAK,gBAAgB,WAAW,MAAM;AACpC,WAAK,OAAO,MAAM,6BAA6B;AAC/C,WAAK,iBAAiB,MAAM;AAAA,IAC9B,GAAG,KAAK,QAAQ,kBAAkB,GAAI;AAAA,EACxC;AAAA,EAEQ,uBAA6B;AACnC,QAAI,KAAK,kBAAkB,MAAM;AAC/B,mBAAa,KAAK,aAAa;AAC/B,WAAK,gBAAgB;AAAA,IACvB;AAAA,EACF;AAAA,EAEQ,wBAAwB,IAAqC;AACnE,QAAI,KAAK,cAAc,UAAU,GAAG,SAAS;AAC3C,WAAK,OAAO,MAAM,mDAAmD;AACrE,WAAK,iBAAiB,WAAW;AAAA,IACnC;AAAA,EACF;AAAA,EAEA,MAAc,UACZ,QACA,QAAoE,MACpE,QAAiB,OACF;AACf,QAAI,KAAK,iBAAiB;AACxB,aAAO,YAAY,KAAK,KAAK,iBAAiB,YAAY;AACxD,cAAM,KAAK,eAAe,QAAQ,OAAO,KAAK;AAAA,MAChD,CAAC;AAAA,IACH;AAEA,WAAO,KAAK,eAAe,QAAQ,OAAO,KAAK;AAAA,EACjD;AAAA,EAEA,MAAc,eACZ,QACA,QAAoE,MACpE,QAAiB,OACF;AApqBnB;AAqqBI,QAAI,CAAC,KAAK,SAAS;AACjB;AAAA,IACF;AAEA,SAAK,qBAAqB;AAC1B,SAAK,IAAI,uBAAuB,sBAAsB,KAAK,uBAAuB;AAElF,QAAI,KAAK,UAAU;AACjB,UAAI,CAAC,OAAO;AACV,YAAI;AACF,eAAK,SAAS,UAAU;AAAA,QAC1B,SAASA,QAAO;AAAA,QAGhB;AAAA,MACF;AACA,YAAM,KAAK,SAAS,MAAM;AAE1B,cAAM,UAAK,SAAS,kBAAd,mBAA6B;AACnC,WAAK,SAAS,iBAAiB;AAAA,IACjC;AAGA,SAAK,MAAM,QAAQ;AACnB,SAAK,OAAO,QAAQ;AACpB,SAAK,OAAO,gBAAgB;AAE5B,YAAM,UAAK,WAAL,mBAAa;AACnB,SAAK,SAAS;AAEd,YAAM,UAAK,aAAL,mBAAe;AACrB,SAAK,WAAW;AAEhB,QAAI,KAAK,aAAa;AACpB,WAAK,YAAY,IAAI;AACrB,WAAK,cAAc;AAAA,IACrB;AAEA,QAAI,KAAK,kBAAkB;AACzB,WAAK,iBAAiB,IAAI;AAC1B,WAAK,mBAAmB;AAAA,IAC1B;AAEA,QAAI,KAAK,mBAAmB;AAC1B,WAAK,kBAAkB,IAAI;AAC3B,WAAK,oBAAoB;AAAA,IAC3B;AAEA,SAAK,UAAU;AAEf,SAAK,KAAK,uBAAuB,OAAO,iBAAiB,QAAQ,KAAK,CAAC;AAEvE,SAAK,YAAY;AACjB,SAAK,cAAc;AACnB,SAAK,kBAAkB;AAEvB,SAAK,OAAO,KAAK,EAAE,QAAQ,MAAM,GAAG,qBAAqB;AAAA,EAC3D;AACF;","names":["error"]}
|
|
@@ -29,6 +29,7 @@ var import_deferred_stream = require("../stream/deferred_stream.cjs");
|
|
|
29
29
|
var import_identity_transform = require("../stream/identity_transform.cjs");
|
|
30
30
|
var import_merge_readable_streams = require("../stream/merge_readable_streams.cjs");
|
|
31
31
|
var import_stt = require("../stt/stt.cjs");
|
|
32
|
+
var import_telemetry = require("../telemetry/index.cjs");
|
|
32
33
|
var import_utils = require("../utils.cjs");
|
|
33
34
|
var import_vad = require("../vad.cjs");
|
|
34
35
|
class AudioRecognition {
|
|
@@ -40,6 +41,7 @@ class AudioRecognition {
|
|
|
40
41
|
minEndpointingDelay;
|
|
41
42
|
maxEndpointingDelay;
|
|
42
43
|
lastLanguage;
|
|
44
|
+
rootSpanContext;
|
|
43
45
|
deferredInputStream;
|
|
44
46
|
logger = (0, import_log.log)();
|
|
45
47
|
lastFinalTranscriptTime = 0;
|
|
@@ -52,6 +54,7 @@ class AudioRecognition {
|
|
|
52
54
|
userTurnCommitted = false;
|
|
53
55
|
speaking = false;
|
|
54
56
|
sampleRate;
|
|
57
|
+
userTurnSpan;
|
|
55
58
|
vadInputStream;
|
|
56
59
|
sttInputStream;
|
|
57
60
|
silenceAudioTransform = new import_identity_transform.IdentityTransform();
|
|
@@ -70,6 +73,7 @@ class AudioRecognition {
|
|
|
70
73
|
this.minEndpointingDelay = opts.minEndpointingDelay;
|
|
71
74
|
this.maxEndpointingDelay = opts.maxEndpointingDelay;
|
|
72
75
|
this.lastLanguage = void 0;
|
|
76
|
+
this.rootSpanContext = opts.rootSpanContext;
|
|
73
77
|
this.deferredInputStream = new import_deferred_stream.DeferredReadableStream();
|
|
74
78
|
const [vadInputStream, sttInputStream] = this.deferredInputStream.stream.tee();
|
|
75
79
|
this.vadInputStream = vadInputStream;
|
|
@@ -264,29 +268,42 @@ class AudioRecognition {
|
|
|
264
268
|
const bounceEOUTask = (lastSpeakingTime, lastFinalTranscriptTime, speechStartTime) => async (controller) => {
|
|
265
269
|
let endpointingDelay = this.minEndpointingDelay;
|
|
266
270
|
if (turnDetector) {
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
271
|
+
await import_telemetry.tracer.startActiveSpan(
|
|
272
|
+
async (span) => {
|
|
273
|
+
this.logger.debug("Running turn detector model");
|
|
274
|
+
let endOfTurnProbability = 0;
|
|
275
|
+
let unlikelyThreshold;
|
|
276
|
+
if (!await turnDetector.supportsLanguage(this.lastLanguage)) {
|
|
277
|
+
this.logger.debug(`Turn detector does not support language ${this.lastLanguage}`);
|
|
278
|
+
} else {
|
|
279
|
+
try {
|
|
280
|
+
endOfTurnProbability = await turnDetector.predictEndOfTurn(chatCtx);
|
|
281
|
+
unlikelyThreshold = await turnDetector.unlikelyThreshold(this.lastLanguage);
|
|
282
|
+
this.logger.debug(
|
|
283
|
+
{ endOfTurnProbability, unlikelyThreshold, language: this.lastLanguage },
|
|
284
|
+
"end of turn probability"
|
|
285
|
+
);
|
|
286
|
+
if (unlikelyThreshold && endOfTurnProbability < unlikelyThreshold) {
|
|
287
|
+
endpointingDelay = this.maxEndpointingDelay;
|
|
288
|
+
}
|
|
289
|
+
} catch (error) {
|
|
290
|
+
this.logger.error(error, "Error predicting end of turn");
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
span.setAttribute(
|
|
294
|
+
import_telemetry.traceTypes.ATTR_CHAT_CTX,
|
|
295
|
+
JSON.stringify(chatCtx.toJSON({ excludeTimestamp: false }))
|
|
296
|
+
);
|
|
297
|
+
span.setAttribute(import_telemetry.traceTypes.ATTR_EOU_PROBABILITY, endOfTurnProbability);
|
|
298
|
+
span.setAttribute(import_telemetry.traceTypes.ATTR_EOU_UNLIKELY_THRESHOLD, unlikelyThreshold ?? 0);
|
|
299
|
+
span.setAttribute(import_telemetry.traceTypes.ATTR_EOU_DELAY, endpointingDelay);
|
|
300
|
+
span.setAttribute(import_telemetry.traceTypes.ATTR_EOU_LANGUAGE, this.lastLanguage ?? "");
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
name: "eou_detection",
|
|
304
|
+
context: this.rootSpanContext
|
|
288
305
|
}
|
|
289
|
-
|
|
306
|
+
);
|
|
290
307
|
}
|
|
291
308
|
let extraSleep = endpointingDelay;
|
|
292
309
|
if (lastSpeakingTime !== void 0) {
|
|
@@ -316,6 +333,12 @@ class AudioRecognition {
|
|
|
316
333
|
stoppedSpeakingAt
|
|
317
334
|
});
|
|
318
335
|
if (committed) {
|
|
336
|
+
this._endUserTurnSpan({
|
|
337
|
+
transcript: this.audioTranscript,
|
|
338
|
+
confidence: confidenceAvg,
|
|
339
|
+
transcriptionDelay: transcriptionDelay ?? 0,
|
|
340
|
+
endOfUtteranceDelay: endOfUtteranceDelay ?? 0
|
|
341
|
+
});
|
|
319
342
|
this.audioTranscript = "";
|
|
320
343
|
this.finalTranscriptConfidence = [];
|
|
321
344
|
this.lastSpeakingTime = void 0;
|
|
@@ -400,6 +423,12 @@ class AudioRecognition {
|
|
|
400
423
|
this.logger.debug("VAD task: START_OF_SPEECH");
|
|
401
424
|
this.hooks.onStartOfSpeech(ev);
|
|
402
425
|
this.speaking = true;
|
|
426
|
+
if (!this.userTurnSpan) {
|
|
427
|
+
this.userTurnSpan = import_telemetry.tracer.startSpan({
|
|
428
|
+
name: "user_turn",
|
|
429
|
+
context: this.rootSpanContext
|
|
430
|
+
});
|
|
431
|
+
}
|
|
403
432
|
if (ev.frames.length > 0 && ev.frames[0]) {
|
|
404
433
|
this.sampleRate = ev.frames[0].sampleRate;
|
|
405
434
|
}
|
|
@@ -483,11 +512,29 @@ class AudioRecognition {
|
|
|
483
512
|
async close() {
|
|
484
513
|
var _a, _b, _c, _d;
|
|
485
514
|
this.detachInputAudioStream();
|
|
515
|
+
this.silenceAudioWriter.releaseLock();
|
|
486
516
|
await ((_a = this.commitUserTurnTask) == null ? void 0 : _a.cancelAndWait());
|
|
487
517
|
await ((_b = this.sttTask) == null ? void 0 : _b.cancelAndWait());
|
|
488
518
|
await ((_c = this.vadTask) == null ? void 0 : _c.cancelAndWait());
|
|
489
519
|
await ((_d = this.bounceEOUTask) == null ? void 0 : _d.cancelAndWait());
|
|
490
520
|
}
|
|
521
|
+
_endUserTurnSpan({
|
|
522
|
+
transcript,
|
|
523
|
+
confidence,
|
|
524
|
+
transcriptionDelay,
|
|
525
|
+
endOfUtteranceDelay
|
|
526
|
+
}) {
|
|
527
|
+
if (this.userTurnSpan) {
|
|
528
|
+
this.userTurnSpan.setAttributes({
|
|
529
|
+
[import_telemetry.traceTypes.ATTR_USER_TRANSCRIPT]: transcript,
|
|
530
|
+
[import_telemetry.traceTypes.ATTR_TRANSCRIPT_CONFIDENCE]: confidence,
|
|
531
|
+
[import_telemetry.traceTypes.ATTR_TRANSCRIPTION_DELAY]: transcriptionDelay,
|
|
532
|
+
[import_telemetry.traceTypes.ATTR_END_OF_TURN_DELAY]: endOfUtteranceDelay
|
|
533
|
+
});
|
|
534
|
+
this.userTurnSpan.end();
|
|
535
|
+
this.userTurnSpan = void 0;
|
|
536
|
+
}
|
|
537
|
+
}
|
|
491
538
|
get vadBaseTurnDetection() {
|
|
492
539
|
return ["vad", void 0].includes(this.turnDetectionMode);
|
|
493
540
|
}
|