@livekit/agents 1.0.22 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/inference/api_protos.cjs +2 -2
- package/dist/inference/api_protos.cjs.map +1 -1
- package/dist/inference/api_protos.d.cts +16 -16
- package/dist/inference/api_protos.d.ts +16 -16
- package/dist/inference/api_protos.js +2 -2
- package/dist/inference/api_protos.js.map +1 -1
- package/dist/ipc/job_proc_lazy_main.cjs +35 -1
- package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
- package/dist/ipc/job_proc_lazy_main.js +13 -1
- package/dist/ipc/job_proc_lazy_main.js.map +1 -1
- package/dist/job.cjs +52 -6
- package/dist/job.cjs.map +1 -1
- package/dist/job.d.cts +2 -0
- package/dist/job.d.ts +2 -0
- package/dist/job.d.ts.map +1 -1
- package/dist/job.js +52 -6
- package/dist/job.js.map +1 -1
- package/dist/llm/llm.cjs +38 -3
- package/dist/llm/llm.cjs.map +1 -1
- package/dist/llm/llm.d.cts +1 -0
- package/dist/llm/llm.d.ts +1 -0
- package/dist/llm/llm.d.ts.map +1 -1
- package/dist/llm/llm.js +38 -3
- package/dist/llm/llm.js.map +1 -1
- package/dist/log.cjs +34 -10
- package/dist/log.cjs.map +1 -1
- package/dist/log.d.cts +7 -0
- package/dist/log.d.ts +7 -0
- package/dist/log.d.ts.map +1 -1
- package/dist/log.js +34 -11
- package/dist/log.js.map +1 -1
- package/dist/telemetry/index.cjs +23 -2
- package/dist/telemetry/index.cjs.map +1 -1
- package/dist/telemetry/index.d.cts +4 -1
- package/dist/telemetry/index.d.ts +4 -1
- package/dist/telemetry/index.d.ts.map +1 -1
- package/dist/telemetry/index.js +27 -2
- package/dist/telemetry/index.js.map +1 -1
- package/dist/telemetry/logging.cjs +65 -0
- package/dist/telemetry/logging.cjs.map +1 -0
- package/dist/telemetry/logging.d.cts +21 -0
- package/dist/telemetry/logging.d.ts +21 -0
- package/dist/telemetry/logging.d.ts.map +1 -0
- package/dist/telemetry/logging.js +40 -0
- package/dist/telemetry/logging.js.map +1 -0
- package/dist/telemetry/otel_http_exporter.cjs +144 -0
- package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
- package/dist/telemetry/otel_http_exporter.d.cts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts +62 -0
- package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
- package/dist/telemetry/otel_http_exporter.js +120 -0
- package/dist/telemetry/otel_http_exporter.js.map +1 -0
- package/dist/telemetry/pino_otel_transport.cjs +217 -0
- package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
- package/dist/telemetry/pino_otel_transport.d.cts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts +58 -0
- package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
- package/dist/telemetry/pino_otel_transport.js +189 -0
- package/dist/telemetry/pino_otel_transport.js.map +1 -0
- package/dist/telemetry/traces.cjs +225 -16
- package/dist/telemetry/traces.cjs.map +1 -1
- package/dist/telemetry/traces.d.cts +17 -0
- package/dist/telemetry/traces.d.ts +17 -0
- package/dist/telemetry/traces.d.ts.map +1 -1
- package/dist/telemetry/traces.js +211 -14
- package/dist/telemetry/traces.js.map +1 -1
- package/dist/tts/tts.cjs +62 -5
- package/dist/tts/tts.cjs.map +1 -1
- package/dist/tts/tts.d.cts +2 -0
- package/dist/tts/tts.d.ts +2 -0
- package/dist/tts/tts.d.ts.map +1 -1
- package/dist/tts/tts.js +62 -5
- package/dist/tts/tts.js.map +1 -1
- package/dist/utils.cjs +6 -0
- package/dist/utils.cjs.map +1 -1
- package/dist/utils.d.cts +1 -0
- package/dist/utils.d.ts +1 -0
- package/dist/utils.d.ts.map +1 -1
- package/dist/utils.js +5 -0
- package/dist/utils.js.map +1 -1
- package/dist/voice/agent_activity.cjs +93 -7
- package/dist/voice/agent_activity.cjs.map +1 -1
- package/dist/voice/agent_activity.d.cts +3 -0
- package/dist/voice/agent_activity.d.ts +3 -0
- package/dist/voice/agent_activity.d.ts.map +1 -1
- package/dist/voice/agent_activity.js +93 -7
- package/dist/voice/agent_activity.js.map +1 -1
- package/dist/voice/agent_session.cjs +122 -27
- package/dist/voice/agent_session.cjs.map +1 -1
- package/dist/voice/agent_session.d.cts +15 -0
- package/dist/voice/agent_session.d.ts +15 -0
- package/dist/voice/agent_session.d.ts.map +1 -1
- package/dist/voice/agent_session.js +122 -27
- package/dist/voice/agent_session.js.map +1 -1
- package/dist/voice/audio_recognition.cjs +69 -22
- package/dist/voice/audio_recognition.cjs.map +1 -1
- package/dist/voice/audio_recognition.d.cts +5 -0
- package/dist/voice/audio_recognition.d.ts +5 -0
- package/dist/voice/audio_recognition.d.ts.map +1 -1
- package/dist/voice/audio_recognition.js +69 -22
- package/dist/voice/audio_recognition.js.map +1 -1
- package/dist/voice/generation.cjs +43 -3
- package/dist/voice/generation.cjs.map +1 -1
- package/dist/voice/generation.d.ts.map +1 -1
- package/dist/voice/generation.js +43 -3
- package/dist/voice/generation.js.map +1 -1
- package/dist/voice/report.cjs +3 -2
- package/dist/voice/report.cjs.map +1 -1
- package/dist/voice/report.d.cts +7 -1
- package/dist/voice/report.d.ts +7 -1
- package/dist/voice/report.d.ts.map +1 -1
- package/dist/voice/report.js +3 -2
- package/dist/voice/report.js.map +1 -1
- package/package.json +8 -2
- package/src/inference/api_protos.ts +2 -2
- package/src/ipc/job_proc_lazy_main.ts +12 -1
- package/src/job.ts +59 -10
- package/src/llm/llm.ts +48 -5
- package/src/log.ts +52 -15
- package/src/telemetry/index.ts +22 -4
- package/src/telemetry/logging.ts +55 -0
- package/src/telemetry/otel_http_exporter.ts +191 -0
- package/src/telemetry/pino_otel_transport.ts +265 -0
- package/src/telemetry/traces.ts +320 -20
- package/src/tts/tts.ts +71 -9
- package/src/utils.ts +5 -0
- package/src/voice/agent_activity.ts +140 -22
- package/src/voice/agent_session.ts +174 -34
- package/src/voice/audio_recognition.ts +85 -26
- package/src/voice/generation.ts +59 -7
- package/src/voice/report.ts +10 -4
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import { Mutex } from '@livekit/mutex';
|
|
5
5
|
import type { AudioFrame } from '@livekit/rtc-node';
|
|
6
|
+
import type { Span } from '@opentelemetry/api';
|
|
7
|
+
import { ROOT_CONTEXT, trace } from '@opentelemetry/api';
|
|
6
8
|
import { Heap } from 'heap-js';
|
|
7
9
|
import { AsyncLocalStorage } from 'node:async_hooks';
|
|
8
10
|
import { ReadableStream } from 'node:stream/web';
|
|
@@ -10,6 +12,7 @@ import { type ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
|
10
12
|
import {
|
|
11
13
|
type ChatItem,
|
|
12
14
|
type FunctionCall,
|
|
15
|
+
type FunctionCallOutput,
|
|
13
16
|
type GenerationCreatedEvent,
|
|
14
17
|
type InputSpeechStartedEvent,
|
|
15
18
|
type InputSpeechStoppedEvent,
|
|
@@ -34,6 +37,7 @@ import type {
|
|
|
34
37
|
} from '../metrics/base.js';
|
|
35
38
|
import { DeferredReadableStream } from '../stream/deferred_stream.js';
|
|
36
39
|
import { STT, type STTError, type SpeechEvent } from '../stt/stt.js';
|
|
40
|
+
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
37
41
|
import { splitWords } from '../tokenize/basic/word.js';
|
|
38
42
|
import { TTS, type TTSError } from '../tts/tts.js';
|
|
39
43
|
import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
|
|
@@ -70,7 +74,6 @@ import {
|
|
|
70
74
|
} from './generation.js';
|
|
71
75
|
import { SpeechHandle } from './speech_handle.js';
|
|
72
76
|
|
|
73
|
-
// equivalent to Python's contextvars
|
|
74
77
|
const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
|
|
75
78
|
|
|
76
79
|
interface PreemptiveGeneration {
|
|
@@ -202,10 +205,15 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
202
205
|
}
|
|
203
206
|
|
|
204
207
|
async start(): Promise<void> {
|
|
205
|
-
// TODO(brian): PR3 - Add span: startSpan = tracer.startSpan('start_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
|
|
206
|
-
// TODO(brian): PR3 - Wrap prewarm calls with trace.useSpan(startSpan, endOnExit: false)
|
|
207
208
|
const unlock = await this.lock.lock();
|
|
208
209
|
try {
|
|
210
|
+
// Create start_agent_activity as a ROOT span (new trace) to match Python behavior
|
|
211
|
+
const startSpan = tracer.startSpan({
|
|
212
|
+
name: 'start_agent_activity',
|
|
213
|
+
attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
|
|
214
|
+
context: ROOT_CONTEXT,
|
|
215
|
+
});
|
|
216
|
+
|
|
209
217
|
this.agent._agentActivity = this;
|
|
210
218
|
|
|
211
219
|
if (this.llm instanceof RealtimeModel) {
|
|
@@ -286,16 +294,26 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
286
294
|
turnDetectionMode: this.turnDetectionMode,
|
|
287
295
|
minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
|
|
288
296
|
maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
|
|
297
|
+
rootSpanContext: this.agentSession.rootSpanContext,
|
|
289
298
|
});
|
|
290
299
|
this.audioRecognition.start();
|
|
291
300
|
this.started = true;
|
|
292
301
|
|
|
293
302
|
this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
|
|
294
|
-
|
|
303
|
+
|
|
304
|
+
// Create on_enter as a child of start_agent_activity in the new trace
|
|
305
|
+
const onEnterTask = tracer.startActiveSpan(async () => this.agent.onEnter(), {
|
|
306
|
+
name: 'on_enter',
|
|
307
|
+
context: trace.setSpan(ROOT_CONTEXT, startSpan),
|
|
308
|
+
attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
|
|
309
|
+
});
|
|
310
|
+
|
|
295
311
|
this.createSpeechTask({
|
|
296
|
-
task: Task.from(() =>
|
|
312
|
+
task: Task.from(() => onEnterTask),
|
|
297
313
|
name: 'AgentActivity_onEnter',
|
|
298
314
|
});
|
|
315
|
+
|
|
316
|
+
startSpan.end();
|
|
299
317
|
} finally {
|
|
300
318
|
unlock();
|
|
301
319
|
}
|
|
@@ -577,7 +595,6 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
577
595
|
}
|
|
578
596
|
|
|
579
597
|
if (this.draining) {
|
|
580
|
-
// copied from python:
|
|
581
598
|
// TODO(shubhra): should we "forward" this new turn to the next agent?
|
|
582
599
|
this.logger.warn('skipping new realtime generation, the agent is draining');
|
|
583
600
|
return;
|
|
@@ -783,7 +800,6 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
783
800
|
if (this.draining) {
|
|
784
801
|
this.cancelPreemptiveGeneration();
|
|
785
802
|
this.logger.warn({ user_input: info.newTranscript }, 'skipping user input, task is draining');
|
|
786
|
-
// copied from python:
|
|
787
803
|
// TODO(shubhra): should we "forward" this new turn to the next agent/activity?
|
|
788
804
|
return true;
|
|
789
805
|
}
|
|
@@ -1254,17 +1270,35 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1254
1270
|
}
|
|
1255
1271
|
}
|
|
1256
1272
|
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1273
|
+
private _pipelineReplyTaskImpl = async ({
|
|
1274
|
+
speechHandle,
|
|
1275
|
+
chatCtx,
|
|
1276
|
+
toolCtx,
|
|
1277
|
+
modelSettings,
|
|
1278
|
+
replyAbortController,
|
|
1279
|
+
instructions,
|
|
1280
|
+
newMessage,
|
|
1281
|
+
toolsMessages,
|
|
1282
|
+
span,
|
|
1283
|
+
}: {
|
|
1284
|
+
speechHandle: SpeechHandle;
|
|
1285
|
+
chatCtx: ChatContext;
|
|
1286
|
+
toolCtx: ToolContext;
|
|
1287
|
+
modelSettings: ModelSettings;
|
|
1288
|
+
replyAbortController: AbortController;
|
|
1289
|
+
instructions?: string;
|
|
1290
|
+
newMessage?: ChatMessage;
|
|
1291
|
+
toolsMessages?: ChatItem[];
|
|
1292
|
+
span: Span;
|
|
1293
|
+
}): Promise<void> => {
|
|
1294
|
+
span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
|
|
1295
|
+
if (instructions) {
|
|
1296
|
+
span.setAttribute(traceTypes.ATTR_INSTRUCTIONS, instructions);
|
|
1297
|
+
}
|
|
1298
|
+
if (newMessage) {
|
|
1299
|
+
span.setAttribute(traceTypes.ATTR_USER_INPUT, newMessage.textContent || '');
|
|
1300
|
+
}
|
|
1301
|
+
|
|
1268
1302
|
speechHandleStorage.enterWith(speechHandle);
|
|
1269
1303
|
|
|
1270
1304
|
const audioOutput = this.agentSession.output.audioEnabled
|
|
@@ -1406,6 +1440,8 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1406
1440
|
msg.createdAt = replyStartedAt;
|
|
1407
1441
|
}
|
|
1408
1442
|
this.agent._chatCtx.insert(toolsMessages);
|
|
1443
|
+
// Also add to session history (matches Python agent_session.py _tool_items_added)
|
|
1444
|
+
this.agentSession._toolItemsAdded(toolsMessages as (FunctionCall | FunctionCallOutput)[]);
|
|
1409
1445
|
}
|
|
1410
1446
|
|
|
1411
1447
|
if (speechHandle.interrupted) {
|
|
@@ -1601,8 +1637,38 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1601
1637
|
msg.createdAt = replyStartedAt;
|
|
1602
1638
|
}
|
|
1603
1639
|
this.agent._chatCtx.insert(toolMessages);
|
|
1640
|
+
this.agentSession._toolItemsAdded(toolMessages as (FunctionCall | FunctionCallOutput)[]);
|
|
1604
1641
|
}
|
|
1605
|
-
}
|
|
1642
|
+
};
|
|
1643
|
+
|
|
1644
|
+
private pipelineReplyTask = async (
|
|
1645
|
+
speechHandle: SpeechHandle,
|
|
1646
|
+
chatCtx: ChatContext,
|
|
1647
|
+
toolCtx: ToolContext,
|
|
1648
|
+
modelSettings: ModelSettings,
|
|
1649
|
+
replyAbortController: AbortController,
|
|
1650
|
+
instructions?: string,
|
|
1651
|
+
newMessage?: ChatMessage,
|
|
1652
|
+
toolsMessages?: ChatItem[],
|
|
1653
|
+
): Promise<void> =>
|
|
1654
|
+
tracer.startActiveSpan(
|
|
1655
|
+
async (span) =>
|
|
1656
|
+
this._pipelineReplyTaskImpl({
|
|
1657
|
+
speechHandle,
|
|
1658
|
+
chatCtx,
|
|
1659
|
+
toolCtx,
|
|
1660
|
+
modelSettings,
|
|
1661
|
+
replyAbortController,
|
|
1662
|
+
instructions,
|
|
1663
|
+
newMessage,
|
|
1664
|
+
toolsMessages,
|
|
1665
|
+
span,
|
|
1666
|
+
}),
|
|
1667
|
+
{
|
|
1668
|
+
name: 'agent_turn',
|
|
1669
|
+
context: this.agentSession.rootSpanContext,
|
|
1670
|
+
},
|
|
1671
|
+
);
|
|
1606
1672
|
|
|
1607
1673
|
private async realtimeGenerationTask(
|
|
1608
1674
|
speechHandle: SpeechHandle,
|
|
@@ -1610,6 +1676,37 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1610
1676
|
modelSettings: ModelSettings,
|
|
1611
1677
|
replyAbortController: AbortController,
|
|
1612
1678
|
): Promise<void> {
|
|
1679
|
+
return tracer.startActiveSpan(
|
|
1680
|
+
async (span) =>
|
|
1681
|
+
this._realtimeGenerationTaskImpl({
|
|
1682
|
+
speechHandle,
|
|
1683
|
+
ev,
|
|
1684
|
+
modelSettings,
|
|
1685
|
+
replyAbortController,
|
|
1686
|
+
span,
|
|
1687
|
+
}),
|
|
1688
|
+
{
|
|
1689
|
+
name: 'agent_turn',
|
|
1690
|
+
context: this.agentSession.rootSpanContext,
|
|
1691
|
+
},
|
|
1692
|
+
);
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1695
|
+
private async _realtimeGenerationTaskImpl({
|
|
1696
|
+
speechHandle,
|
|
1697
|
+
ev,
|
|
1698
|
+
modelSettings,
|
|
1699
|
+
replyAbortController,
|
|
1700
|
+
span,
|
|
1701
|
+
}: {
|
|
1702
|
+
speechHandle: SpeechHandle;
|
|
1703
|
+
ev: GenerationCreatedEvent;
|
|
1704
|
+
modelSettings: ModelSettings;
|
|
1705
|
+
replyAbortController: AbortController;
|
|
1706
|
+
span: Span;
|
|
1707
|
+
}): Promise<void> {
|
|
1708
|
+
span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
|
|
1709
|
+
|
|
1613
1710
|
speechHandleStorage.enterWith(speechHandle);
|
|
1614
1711
|
|
|
1615
1712
|
if (!this.realtimeSession) {
|
|
@@ -1786,6 +1883,8 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1786
1883
|
|
|
1787
1884
|
const onToolExecutionStarted = (f: FunctionCall) => {
|
|
1788
1885
|
speechHandle._itemAdded([f]);
|
|
1886
|
+
this.agent._chatCtx.items.push(f);
|
|
1887
|
+
this.agentSession._toolItemsAdded([f]);
|
|
1789
1888
|
};
|
|
1790
1889
|
|
|
1791
1890
|
const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
|
|
@@ -1979,6 +2078,11 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
1979
2078
|
}
|
|
1980
2079
|
const chatCtx = this.realtimeSession.chatCtx.copy();
|
|
1981
2080
|
chatCtx.items.push(...functionToolsExecutedEvent.functionCallOutputs);
|
|
2081
|
+
|
|
2082
|
+
this.agentSession._toolItemsAdded(
|
|
2083
|
+
functionToolsExecutedEvent.functionCallOutputs as FunctionCallOutput[],
|
|
2084
|
+
);
|
|
2085
|
+
|
|
1982
2086
|
try {
|
|
1983
2087
|
await this.realtimeSession.updateChatCtx(chatCtx);
|
|
1984
2088
|
} catch (error) {
|
|
@@ -2096,16 +2200,30 @@ export class AgentActivity implements RecognitionHooks {
|
|
|
2096
2200
|
this.wakeupMainTask();
|
|
2097
2201
|
}
|
|
2098
2202
|
|
|
2099
|
-
// TODO(brian): PR3 - Wrap entire drain() method with tracer.startActiveSpan('drain_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
|
|
2100
2203
|
async drain(): Promise<void> {
|
|
2204
|
+
// Create drain_agent_activity as a ROOT span (new trace) to match Python behavior
|
|
2205
|
+
return tracer.startActiveSpan(async (span) => this._drainImpl(span), {
|
|
2206
|
+
name: 'drain_agent_activity',
|
|
2207
|
+
context: ROOT_CONTEXT,
|
|
2208
|
+
});
|
|
2209
|
+
}
|
|
2210
|
+
|
|
2211
|
+
private async _drainImpl(span: Span): Promise<void> {
|
|
2212
|
+
span.setAttribute(traceTypes.ATTR_AGENT_LABEL, this.agent.id);
|
|
2213
|
+
|
|
2101
2214
|
const unlock = await this.lock.lock();
|
|
2102
2215
|
try {
|
|
2103
2216
|
if (this._draining) return;
|
|
2104
2217
|
|
|
2105
2218
|
this.cancelPreemptiveGeneration();
|
|
2106
|
-
|
|
2219
|
+
|
|
2220
|
+
const onExitTask = tracer.startActiveSpan(async () => this.agent.onExit(), {
|
|
2221
|
+
name: 'on_exit',
|
|
2222
|
+
attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
|
|
2223
|
+
});
|
|
2224
|
+
|
|
2107
2225
|
this.createSpeechTask({
|
|
2108
|
-
task: Task.from(() =>
|
|
2226
|
+
task: Task.from(() => onExitTask),
|
|
2109
2227
|
name: 'AgentActivity_onExit',
|
|
2110
2228
|
});
|
|
2111
2229
|
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
// SPDX-License-Identifier: Apache-2.0
|
|
4
4
|
import type { AudioFrame, Room } from '@livekit/rtc-node';
|
|
5
5
|
import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
|
|
6
|
+
import type { Context, Span } from '@opentelemetry/api';
|
|
7
|
+
import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
|
|
6
8
|
import { EventEmitter } from 'node:events';
|
|
7
9
|
import type { ReadableStream } from 'node:stream/web';
|
|
8
10
|
import {
|
|
@@ -14,12 +16,14 @@ import {
|
|
|
14
16
|
type TTSModelString,
|
|
15
17
|
} from '../inference/index.js';
|
|
16
18
|
import { getJobContext } from '../job.js';
|
|
19
|
+
import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
|
|
17
20
|
import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
|
|
18
21
|
import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
|
|
19
22
|
import type { LLMError } from '../llm/llm.js';
|
|
20
23
|
import { log } from '../log.js';
|
|
21
24
|
import type { STT } from '../stt/index.js';
|
|
22
25
|
import type { STTError } from '../stt/stt.js';
|
|
26
|
+
import { traceTypes, tracer } from '../telemetry/index.js';
|
|
23
27
|
import type { TTS, TTSError } from '../tts/tts.js';
|
|
24
28
|
import type { VAD } from '../vad.js';
|
|
25
29
|
import type { Agent } from './agent.js';
|
|
@@ -128,9 +132,22 @@ export class AgentSession<
|
|
|
128
132
|
private closingTask: Promise<void> | null = null;
|
|
129
133
|
private userAwayTimer: NodeJS.Timeout | null = null;
|
|
130
134
|
|
|
135
|
+
private sessionSpan?: Span;
|
|
136
|
+
private userSpeakingSpan?: Span;
|
|
137
|
+
private agentSpeakingSpan?: Span;
|
|
138
|
+
|
|
139
|
+
/** @internal */
|
|
140
|
+
rootSpanContext?: Context;
|
|
141
|
+
|
|
131
142
|
/** @internal */
|
|
132
143
|
_recordedEvents: AgentEvent[] = [];
|
|
133
144
|
|
|
145
|
+
/** @internal */
|
|
146
|
+
_enableRecording = false;
|
|
147
|
+
|
|
148
|
+
/** @internal - Timestamp when the session started (milliseconds) */
|
|
149
|
+
_startedAt?: number;
|
|
150
|
+
|
|
134
151
|
constructor(opts: AgentSessionOptions<UserData>) {
|
|
135
152
|
super();
|
|
136
153
|
|
|
@@ -175,7 +192,8 @@ export class AgentSession<
|
|
|
175
192
|
this._chatCtx = ChatContext.empty();
|
|
176
193
|
this.options = { ...defaultVoiceOptions, ...voiceOptions };
|
|
177
194
|
|
|
178
|
-
this.
|
|
195
|
+
this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
|
|
196
|
+
this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
179
197
|
}
|
|
180
198
|
|
|
181
199
|
emit<K extends keyof AgentSessionCallbacks>(
|
|
@@ -211,25 +229,22 @@ export class AgentSession<
|
|
|
211
229
|
this._userData = value;
|
|
212
230
|
}
|
|
213
231
|
|
|
214
|
-
async
|
|
215
|
-
// TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
|
|
216
|
-
// TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
|
|
217
|
-
// TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
|
|
232
|
+
private async _startImpl({
|
|
218
233
|
agent,
|
|
219
234
|
room,
|
|
220
235
|
inputOptions,
|
|
221
236
|
outputOptions,
|
|
222
|
-
record
|
|
237
|
+
record,
|
|
238
|
+
span,
|
|
223
239
|
}: {
|
|
224
240
|
agent: Agent;
|
|
225
241
|
room: Room;
|
|
226
242
|
inputOptions?: Partial<RoomInputOptions>;
|
|
227
243
|
outputOptions?: Partial<RoomOutputOptions>;
|
|
228
|
-
record
|
|
244
|
+
record: boolean;
|
|
245
|
+
span: Span;
|
|
229
246
|
}): Promise<void> {
|
|
230
|
-
|
|
231
|
-
return;
|
|
232
|
-
}
|
|
247
|
+
span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
|
|
233
248
|
|
|
234
249
|
this.agent = agent;
|
|
235
250
|
this._updateAgentState('initializing');
|
|
@@ -291,9 +306,62 @@ export class AgentSession<
|
|
|
291
306
|
);
|
|
292
307
|
|
|
293
308
|
this.started = true;
|
|
309
|
+
this._startedAt = Date.now();
|
|
294
310
|
this._updateAgentState('listening');
|
|
295
311
|
}
|
|
296
312
|
|
|
313
|
+
async start({
|
|
314
|
+
agent,
|
|
315
|
+
room,
|
|
316
|
+
inputOptions,
|
|
317
|
+
outputOptions,
|
|
318
|
+
record = true,
|
|
319
|
+
}: {
|
|
320
|
+
agent: Agent;
|
|
321
|
+
room: Room;
|
|
322
|
+
inputOptions?: Partial<RoomInputOptions>;
|
|
323
|
+
outputOptions?: Partial<RoomOutputOptions>;
|
|
324
|
+
record?: boolean;
|
|
325
|
+
}): Promise<void> {
|
|
326
|
+
if (this.started) {
|
|
327
|
+
return;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
const ctx = getJobContext();
|
|
331
|
+
|
|
332
|
+
record = record ?? ctx.info.job.enableRecording;
|
|
333
|
+
this._enableRecording = record;
|
|
334
|
+
|
|
335
|
+
this.logger.info(
|
|
336
|
+
{ record, enableRecording: ctx.info.job.enableRecording },
|
|
337
|
+
'Configuring session recording',
|
|
338
|
+
);
|
|
339
|
+
|
|
340
|
+
if (this._enableRecording) {
|
|
341
|
+
await ctx.initRecording();
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
// Create agent_session as a ROOT span (new trace) to match Python behavior
|
|
345
|
+
// This creates a separate trace for better cloud dashboard organization
|
|
346
|
+
this.sessionSpan = tracer.startSpan({
|
|
347
|
+
name: 'agent_session',
|
|
348
|
+
context: ROOT_CONTEXT,
|
|
349
|
+
});
|
|
350
|
+
|
|
351
|
+
// Set the session span as the active span in the context
|
|
352
|
+
// This ensures all child spans (agent_turn, user_turn, etc.) are parented to it
|
|
353
|
+
this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
|
|
354
|
+
|
|
355
|
+
await this._startImpl({
|
|
356
|
+
agent,
|
|
357
|
+
room,
|
|
358
|
+
inputOptions,
|
|
359
|
+
outputOptions,
|
|
360
|
+
record,
|
|
361
|
+
span: this.sessionSpan,
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
|
|
297
365
|
updateAgent(agent: Agent): void {
|
|
298
366
|
this.agent = agent;
|
|
299
367
|
|
|
@@ -367,32 +435,41 @@ export class AgentSession<
|
|
|
367
435
|
}
|
|
368
436
|
|
|
369
437
|
private async updateActivity(agent: Agent): Promise<void> {
|
|
370
|
-
|
|
371
|
-
|
|
438
|
+
const runWithContext = async () => {
|
|
439
|
+
// TODO(AJS-129): add lock to agent activity core lifecycle
|
|
440
|
+
this.nextActivity = new AgentActivity(agent, this);
|
|
372
441
|
|
|
373
|
-
|
|
442
|
+
const previousActivity = this.activity;
|
|
374
443
|
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
444
|
+
if (this.activity) {
|
|
445
|
+
await this.activity.drain();
|
|
446
|
+
await this.activity.close();
|
|
447
|
+
}
|
|
379
448
|
|
|
380
|
-
|
|
381
|
-
|
|
449
|
+
this.activity = this.nextActivity;
|
|
450
|
+
this.nextActivity = undefined;
|
|
382
451
|
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
452
|
+
this._chatCtx.insert(
|
|
453
|
+
new AgentHandoffItem({
|
|
454
|
+
oldAgentId: previousActivity?.agent.id,
|
|
455
|
+
newAgentId: agent.id,
|
|
456
|
+
}),
|
|
457
|
+
);
|
|
458
|
+
this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
|
|
390
459
|
|
|
391
|
-
|
|
460
|
+
await this.activity.start();
|
|
392
461
|
|
|
393
|
-
|
|
394
|
-
|
|
462
|
+
if (this._input.audio) {
|
|
463
|
+
this.activity.attachAudioInput(this._input.audio.stream);
|
|
464
|
+
}
|
|
465
|
+
};
|
|
466
|
+
|
|
467
|
+
// Run within session span context if available
|
|
468
|
+
if (this.rootSpanContext) {
|
|
469
|
+
return otelContext.with(this.rootSpanContext, runWithContext);
|
|
395
470
|
}
|
|
471
|
+
|
|
472
|
+
return runWithContext();
|
|
396
473
|
}
|
|
397
474
|
|
|
398
475
|
get chatCtx(): ChatContext {
|
|
@@ -452,14 +529,35 @@ export class AgentSession<
|
|
|
452
529
|
this.emit(AgentSessionEventTypes.ConversationItemAdded, createConversationItemAddedEvent(item));
|
|
453
530
|
}
|
|
454
531
|
|
|
532
|
+
/** @internal */
|
|
533
|
+
_toolItemsAdded(items: (FunctionCall | FunctionCallOutput)[]): void {
|
|
534
|
+
this._chatCtx.insert(items);
|
|
535
|
+
}
|
|
536
|
+
|
|
455
537
|
/** @internal */
|
|
456
538
|
_updateAgentState(state: AgentState) {
|
|
457
539
|
if (this._agentState === state) {
|
|
458
540
|
return;
|
|
459
541
|
}
|
|
460
542
|
|
|
461
|
-
|
|
462
|
-
|
|
543
|
+
if (state === 'speaking') {
|
|
544
|
+
// TODO(brian): PR4 - Track error counts
|
|
545
|
+
|
|
546
|
+
if (this.agentSpeakingSpan === undefined) {
|
|
547
|
+
this.agentSpeakingSpan = tracer.startSpan({
|
|
548
|
+
name: 'agent_speaking',
|
|
549
|
+
context: this.rootSpanContext,
|
|
550
|
+
});
|
|
551
|
+
|
|
552
|
+
// TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
|
|
553
|
+
// (Ref: Python agent_session.py line 1161-1164)
|
|
554
|
+
}
|
|
555
|
+
} else if (this.agentSpeakingSpan !== undefined) {
|
|
556
|
+
// TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
|
|
557
|
+
this.agentSpeakingSpan.end();
|
|
558
|
+
this.agentSpeakingSpan = undefined;
|
|
559
|
+
}
|
|
560
|
+
|
|
463
561
|
const oldState = this._agentState;
|
|
464
562
|
this._agentState = state;
|
|
465
563
|
|
|
@@ -482,8 +580,20 @@ export class AgentSession<
|
|
|
482
580
|
return;
|
|
483
581
|
}
|
|
484
582
|
|
|
485
|
-
|
|
486
|
-
|
|
583
|
+
if (state === 'speaking' && this.userSpeakingSpan === undefined) {
|
|
584
|
+
this.userSpeakingSpan = tracer.startSpan({
|
|
585
|
+
name: 'user_speaking',
|
|
586
|
+
context: this.rootSpanContext,
|
|
587
|
+
});
|
|
588
|
+
|
|
589
|
+
// TODO(brian): PR4 - Set participant attributes if roomIO.linkedParticipant is available
|
|
590
|
+
// (Ref: Python agent_session.py line 1192-1195)
|
|
591
|
+
} else if (this.userSpeakingSpan !== undefined) {
|
|
592
|
+
// TODO(brian): PR4 - Set ATTR_END_TIME attribute with lastSpeakingTime if available
|
|
593
|
+
this.userSpeakingSpan.end();
|
|
594
|
+
this.userSpeakingSpan = undefined;
|
|
595
|
+
}
|
|
596
|
+
|
|
487
597
|
const oldState = this.userState;
|
|
488
598
|
this.userState = state;
|
|
489
599
|
|
|
@@ -550,19 +660,33 @@ export class AgentSession<
|
|
|
550
660
|
reason: CloseReason,
|
|
551
661
|
error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
|
|
552
662
|
drain: boolean = false,
|
|
663
|
+
): Promise<void> {
|
|
664
|
+
if (this.rootSpanContext) {
|
|
665
|
+
return otelContext.with(this.rootSpanContext, async () => {
|
|
666
|
+
await this.closeImplInner(reason, error, drain);
|
|
667
|
+
});
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
return this.closeImplInner(reason, error, drain);
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
private async closeImplInner(
|
|
674
|
+
reason: CloseReason,
|
|
675
|
+
error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
|
|
676
|
+
drain: boolean = false,
|
|
553
677
|
): Promise<void> {
|
|
554
678
|
if (!this.started) {
|
|
555
679
|
return;
|
|
556
680
|
}
|
|
557
681
|
|
|
558
682
|
this._cancelUserAwayTimer();
|
|
683
|
+
this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
|
|
559
684
|
|
|
560
685
|
if (this.activity) {
|
|
561
686
|
if (!drain) {
|
|
562
687
|
try {
|
|
563
688
|
this.activity.interrupt();
|
|
564
689
|
} catch (error) {
|
|
565
|
-
// uninterruptible speech [copied from python]
|
|
566
690
|
// TODO(shubhra): force interrupt or wait for it to finish?
|
|
567
691
|
// it might be an audio played from the error callback
|
|
568
692
|
}
|
|
@@ -584,12 +708,28 @@ export class AgentSession<
|
|
|
584
708
|
await this.activity?.close();
|
|
585
709
|
this.activity = undefined;
|
|
586
710
|
|
|
711
|
+
if (this.sessionSpan) {
|
|
712
|
+
this.sessionSpan.end();
|
|
713
|
+
this.sessionSpan = undefined;
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
if (this.userSpeakingSpan) {
|
|
717
|
+
this.userSpeakingSpan.end();
|
|
718
|
+
this.userSpeakingSpan = undefined;
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
if (this.agentSpeakingSpan) {
|
|
722
|
+
this.agentSpeakingSpan.end();
|
|
723
|
+
this.agentSpeakingSpan = undefined;
|
|
724
|
+
}
|
|
725
|
+
|
|
587
726
|
this.started = false;
|
|
588
727
|
|
|
589
728
|
this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));
|
|
590
729
|
|
|
591
730
|
this.userState = 'listening';
|
|
592
731
|
this._agentState = 'initializing';
|
|
732
|
+
this.rootSpanContext = undefined;
|
|
593
733
|
|
|
594
734
|
this.logger.info({ reason, error }, 'AgentSession closed');
|
|
595
735
|
}
|