@livekit/agents 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/dist/inference/api_protos.cjs +2 -2
  2. package/dist/inference/api_protos.cjs.map +1 -1
  3. package/dist/inference/api_protos.d.cts +16 -16
  4. package/dist/inference/api_protos.d.ts +16 -16
  5. package/dist/inference/api_protos.js +2 -2
  6. package/dist/inference/api_protos.js.map +1 -1
  7. package/dist/ipc/job_proc_lazy_main.cjs +35 -1
  8. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  9. package/dist/ipc/job_proc_lazy_main.js +13 -1
  10. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  11. package/dist/job.cjs +52 -6
  12. package/dist/job.cjs.map +1 -1
  13. package/dist/job.d.cts +2 -0
  14. package/dist/job.d.ts +2 -0
  15. package/dist/job.d.ts.map +1 -1
  16. package/dist/job.js +52 -6
  17. package/dist/job.js.map +1 -1
  18. package/dist/llm/llm.cjs +38 -3
  19. package/dist/llm/llm.cjs.map +1 -1
  20. package/dist/llm/llm.d.cts +1 -0
  21. package/dist/llm/llm.d.ts +1 -0
  22. package/dist/llm/llm.d.ts.map +1 -1
  23. package/dist/llm/llm.js +38 -3
  24. package/dist/llm/llm.js.map +1 -1
  25. package/dist/log.cjs +34 -10
  26. package/dist/log.cjs.map +1 -1
  27. package/dist/log.d.cts +7 -0
  28. package/dist/log.d.ts +7 -0
  29. package/dist/log.d.ts.map +1 -1
  30. package/dist/log.js +34 -11
  31. package/dist/log.js.map +1 -1
  32. package/dist/telemetry/index.cjs +23 -2
  33. package/dist/telemetry/index.cjs.map +1 -1
  34. package/dist/telemetry/index.d.cts +4 -1
  35. package/dist/telemetry/index.d.ts +4 -1
  36. package/dist/telemetry/index.d.ts.map +1 -1
  37. package/dist/telemetry/index.js +27 -2
  38. package/dist/telemetry/index.js.map +1 -1
  39. package/dist/telemetry/logging.cjs +65 -0
  40. package/dist/telemetry/logging.cjs.map +1 -0
  41. package/dist/telemetry/logging.d.cts +21 -0
  42. package/dist/telemetry/logging.d.ts +21 -0
  43. package/dist/telemetry/logging.d.ts.map +1 -0
  44. package/dist/telemetry/logging.js +40 -0
  45. package/dist/telemetry/logging.js.map +1 -0
  46. package/dist/telemetry/otel_http_exporter.cjs +144 -0
  47. package/dist/telemetry/otel_http_exporter.cjs.map +1 -0
  48. package/dist/telemetry/otel_http_exporter.d.cts +62 -0
  49. package/dist/telemetry/otel_http_exporter.d.ts +62 -0
  50. package/dist/telemetry/otel_http_exporter.d.ts.map +1 -0
  51. package/dist/telemetry/otel_http_exporter.js +120 -0
  52. package/dist/telemetry/otel_http_exporter.js.map +1 -0
  53. package/dist/telemetry/pino_otel_transport.cjs +217 -0
  54. package/dist/telemetry/pino_otel_transport.cjs.map +1 -0
  55. package/dist/telemetry/pino_otel_transport.d.cts +58 -0
  56. package/dist/telemetry/pino_otel_transport.d.ts +58 -0
  57. package/dist/telemetry/pino_otel_transport.d.ts.map +1 -0
  58. package/dist/telemetry/pino_otel_transport.js +189 -0
  59. package/dist/telemetry/pino_otel_transport.js.map +1 -0
  60. package/dist/telemetry/traces.cjs +225 -16
  61. package/dist/telemetry/traces.cjs.map +1 -1
  62. package/dist/telemetry/traces.d.cts +17 -0
  63. package/dist/telemetry/traces.d.ts +17 -0
  64. package/dist/telemetry/traces.d.ts.map +1 -1
  65. package/dist/telemetry/traces.js +211 -14
  66. package/dist/telemetry/traces.js.map +1 -1
  67. package/dist/tts/tts.cjs +62 -5
  68. package/dist/tts/tts.cjs.map +1 -1
  69. package/dist/tts/tts.d.cts +2 -0
  70. package/dist/tts/tts.d.ts +2 -0
  71. package/dist/tts/tts.d.ts.map +1 -1
  72. package/dist/tts/tts.js +62 -5
  73. package/dist/tts/tts.js.map +1 -1
  74. package/dist/utils.cjs +6 -0
  75. package/dist/utils.cjs.map +1 -1
  76. package/dist/utils.d.cts +1 -0
  77. package/dist/utils.d.ts +1 -0
  78. package/dist/utils.d.ts.map +1 -1
  79. package/dist/utils.js +5 -0
  80. package/dist/utils.js.map +1 -1
  81. package/dist/voice/agent_activity.cjs +93 -7
  82. package/dist/voice/agent_activity.cjs.map +1 -1
  83. package/dist/voice/agent_activity.d.cts +3 -0
  84. package/dist/voice/agent_activity.d.ts +3 -0
  85. package/dist/voice/agent_activity.d.ts.map +1 -1
  86. package/dist/voice/agent_activity.js +93 -7
  87. package/dist/voice/agent_activity.js.map +1 -1
  88. package/dist/voice/agent_session.cjs +122 -27
  89. package/dist/voice/agent_session.cjs.map +1 -1
  90. package/dist/voice/agent_session.d.cts +15 -0
  91. package/dist/voice/agent_session.d.ts +15 -0
  92. package/dist/voice/agent_session.d.ts.map +1 -1
  93. package/dist/voice/agent_session.js +122 -27
  94. package/dist/voice/agent_session.js.map +1 -1
  95. package/dist/voice/audio_recognition.cjs +69 -22
  96. package/dist/voice/audio_recognition.cjs.map +1 -1
  97. package/dist/voice/audio_recognition.d.cts +5 -0
  98. package/dist/voice/audio_recognition.d.ts +5 -0
  99. package/dist/voice/audio_recognition.d.ts.map +1 -1
  100. package/dist/voice/audio_recognition.js +69 -22
  101. package/dist/voice/audio_recognition.js.map +1 -1
  102. package/dist/voice/generation.cjs +43 -3
  103. package/dist/voice/generation.cjs.map +1 -1
  104. package/dist/voice/generation.d.ts.map +1 -1
  105. package/dist/voice/generation.js +43 -3
  106. package/dist/voice/generation.js.map +1 -1
  107. package/dist/voice/report.cjs +3 -2
  108. package/dist/voice/report.cjs.map +1 -1
  109. package/dist/voice/report.d.cts +7 -1
  110. package/dist/voice/report.d.ts +7 -1
  111. package/dist/voice/report.d.ts.map +1 -1
  112. package/dist/voice/report.js +3 -2
  113. package/dist/voice/report.js.map +1 -1
  114. package/package.json +8 -2
  115. package/src/inference/api_protos.ts +2 -2
  116. package/src/ipc/job_proc_lazy_main.ts +12 -1
  117. package/src/job.ts +59 -10
  118. package/src/llm/llm.ts +48 -5
  119. package/src/log.ts +52 -15
  120. package/src/telemetry/index.ts +22 -4
  121. package/src/telemetry/logging.ts +55 -0
  122. package/src/telemetry/otel_http_exporter.ts +191 -0
  123. package/src/telemetry/pino_otel_transport.ts +265 -0
  124. package/src/telemetry/traces.ts +320 -20
  125. package/src/tts/tts.ts +71 -9
  126. package/src/utils.ts +5 -0
  127. package/src/voice/agent_activity.ts +140 -22
  128. package/src/voice/agent_session.ts +174 -34
  129. package/src/voice/audio_recognition.ts +85 -26
  130. package/src/voice/generation.ts +59 -7
  131. package/src/voice/report.ts +10 -4
@@ -3,6 +3,8 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { Mutex } from '@livekit/mutex';
5
5
  import type { AudioFrame } from '@livekit/rtc-node';
6
+ import type { Span } from '@opentelemetry/api';
7
+ import { ROOT_CONTEXT, trace } from '@opentelemetry/api';
6
8
  import { Heap } from 'heap-js';
7
9
  import { AsyncLocalStorage } from 'node:async_hooks';
8
10
  import { ReadableStream } from 'node:stream/web';
@@ -10,6 +12,7 @@ import { type ChatContext, ChatMessage } from '../llm/chat_context.js';
10
12
  import {
11
13
  type ChatItem,
12
14
  type FunctionCall,
15
+ type FunctionCallOutput,
13
16
  type GenerationCreatedEvent,
14
17
  type InputSpeechStartedEvent,
15
18
  type InputSpeechStoppedEvent,
@@ -34,6 +37,7 @@ import type {
34
37
  } from '../metrics/base.js';
35
38
  import { DeferredReadableStream } from '../stream/deferred_stream.js';
36
39
  import { STT, type STTError, type SpeechEvent } from '../stt/stt.js';
40
+ import { traceTypes, tracer } from '../telemetry/index.js';
37
41
  import { splitWords } from '../tokenize/basic/word.js';
38
42
  import { TTS, type TTSError } from '../tts/tts.js';
39
43
  import { Future, Task, cancelAndWait, waitFor } from '../utils.js';
@@ -70,7 +74,6 @@ import {
70
74
  } from './generation.js';
71
75
  import { SpeechHandle } from './speech_handle.js';
72
76
 
73
- // equivalent to Python's contextvars
74
77
  const speechHandleStorage = new AsyncLocalStorage<SpeechHandle>();
75
78
 
76
79
  interface PreemptiveGeneration {
@@ -202,10 +205,15 @@ export class AgentActivity implements RecognitionHooks {
202
205
  }
203
206
 
204
207
  async start(): Promise<void> {
205
- // TODO(brian): PR3 - Add span: startSpan = tracer.startSpan('start_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
206
- // TODO(brian): PR3 - Wrap prewarm calls with trace.useSpan(startSpan, endOnExit: false)
207
208
  const unlock = await this.lock.lock();
208
209
  try {
210
+ // Create start_agent_activity as a ROOT span (new trace) to match Python behavior
211
+ const startSpan = tracer.startSpan({
212
+ name: 'start_agent_activity',
213
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
214
+ context: ROOT_CONTEXT,
215
+ });
216
+
209
217
  this.agent._agentActivity = this;
210
218
 
211
219
  if (this.llm instanceof RealtimeModel) {
@@ -286,16 +294,26 @@ export class AgentActivity implements RecognitionHooks {
286
294
  turnDetectionMode: this.turnDetectionMode,
287
295
  minEndpointingDelay: this.agentSession.options.minEndpointingDelay,
288
296
  maxEndpointingDelay: this.agentSession.options.maxEndpointingDelay,
297
+ rootSpanContext: this.agentSession.rootSpanContext,
289
298
  });
290
299
  this.audioRecognition.start();
291
300
  this.started = true;
292
301
 
293
302
  this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
294
- // TODO(brian): PR3 - Wrap onEnter with tracer.startActiveSpan('on_enter', { attributes: { 'lk.agent_label': this.agent.label }, context: startSpan context })
303
+
304
+ // Create on_enter as a child of start_agent_activity in the new trace
305
+ const onEnterTask = tracer.startActiveSpan(async () => this.agent.onEnter(), {
306
+ name: 'on_enter',
307
+ context: trace.setSpan(ROOT_CONTEXT, startSpan),
308
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
309
+ });
310
+
295
311
  this.createSpeechTask({
296
- task: Task.from(() => this.agent.onEnter()),
312
+ task: Task.from(() => onEnterTask),
297
313
  name: 'AgentActivity_onEnter',
298
314
  });
315
+
316
+ startSpan.end();
299
317
  } finally {
300
318
  unlock();
301
319
  }
@@ -577,7 +595,6 @@ export class AgentActivity implements RecognitionHooks {
577
595
  }
578
596
 
579
597
  if (this.draining) {
580
- // copied from python:
581
598
  // TODO(shubhra): should we "forward" this new turn to the next agent?
582
599
  this.logger.warn('skipping new realtime generation, the agent is draining');
583
600
  return;
@@ -783,7 +800,6 @@ export class AgentActivity implements RecognitionHooks {
783
800
  if (this.draining) {
784
801
  this.cancelPreemptiveGeneration();
785
802
  this.logger.warn({ user_input: info.newTranscript }, 'skipping user input, task is draining');
786
- // copied from python:
787
803
  // TODO(shubhra): should we "forward" this new turn to the next agent/activity?
788
804
  return true;
789
805
  }
@@ -1254,17 +1270,35 @@ export class AgentActivity implements RecognitionHooks {
1254
1270
  }
1255
1271
  }
1256
1272
 
1257
- // TODO(brian): PR3 - Wrap entire pipelineReplyTask() method with tracer.startActiveSpan('agent_turn')
1258
- private async pipelineReplyTask(
1259
- speechHandle: SpeechHandle,
1260
- chatCtx: ChatContext,
1261
- toolCtx: ToolContext,
1262
- modelSettings: ModelSettings,
1263
- replyAbortController: AbortController,
1264
- instructions?: string,
1265
- newMessage?: ChatMessage,
1266
- toolsMessages?: ChatItem[],
1267
- ): Promise<void> {
1273
+ private _pipelineReplyTaskImpl = async ({
1274
+ speechHandle,
1275
+ chatCtx,
1276
+ toolCtx,
1277
+ modelSettings,
1278
+ replyAbortController,
1279
+ instructions,
1280
+ newMessage,
1281
+ toolsMessages,
1282
+ span,
1283
+ }: {
1284
+ speechHandle: SpeechHandle;
1285
+ chatCtx: ChatContext;
1286
+ toolCtx: ToolContext;
1287
+ modelSettings: ModelSettings;
1288
+ replyAbortController: AbortController;
1289
+ instructions?: string;
1290
+ newMessage?: ChatMessage;
1291
+ toolsMessages?: ChatItem[];
1292
+ span: Span;
1293
+ }): Promise<void> => {
1294
+ span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
1295
+ if (instructions) {
1296
+ span.setAttribute(traceTypes.ATTR_INSTRUCTIONS, instructions);
1297
+ }
1298
+ if (newMessage) {
1299
+ span.setAttribute(traceTypes.ATTR_USER_INPUT, newMessage.textContent || '');
1300
+ }
1301
+
1268
1302
  speechHandleStorage.enterWith(speechHandle);
1269
1303
 
1270
1304
  const audioOutput = this.agentSession.output.audioEnabled
@@ -1406,6 +1440,8 @@ export class AgentActivity implements RecognitionHooks {
1406
1440
  msg.createdAt = replyStartedAt;
1407
1441
  }
1408
1442
  this.agent._chatCtx.insert(toolsMessages);
1443
+ // Also add to session history (matches Python agent_session.py _tool_items_added)
1444
+ this.agentSession._toolItemsAdded(toolsMessages as (FunctionCall | FunctionCallOutput)[]);
1409
1445
  }
1410
1446
 
1411
1447
  if (speechHandle.interrupted) {
@@ -1601,8 +1637,38 @@ export class AgentActivity implements RecognitionHooks {
1601
1637
  msg.createdAt = replyStartedAt;
1602
1638
  }
1603
1639
  this.agent._chatCtx.insert(toolMessages);
1640
+ this.agentSession._toolItemsAdded(toolMessages as (FunctionCall | FunctionCallOutput)[]);
1604
1641
  }
1605
- }
1642
+ };
1643
+
1644
+ private pipelineReplyTask = async (
1645
+ speechHandle: SpeechHandle,
1646
+ chatCtx: ChatContext,
1647
+ toolCtx: ToolContext,
1648
+ modelSettings: ModelSettings,
1649
+ replyAbortController: AbortController,
1650
+ instructions?: string,
1651
+ newMessage?: ChatMessage,
1652
+ toolsMessages?: ChatItem[],
1653
+ ): Promise<void> =>
1654
+ tracer.startActiveSpan(
1655
+ async (span) =>
1656
+ this._pipelineReplyTaskImpl({
1657
+ speechHandle,
1658
+ chatCtx,
1659
+ toolCtx,
1660
+ modelSettings,
1661
+ replyAbortController,
1662
+ instructions,
1663
+ newMessage,
1664
+ toolsMessages,
1665
+ span,
1666
+ }),
1667
+ {
1668
+ name: 'agent_turn',
1669
+ context: this.agentSession.rootSpanContext,
1670
+ },
1671
+ );
1606
1672
 
1607
1673
  private async realtimeGenerationTask(
1608
1674
  speechHandle: SpeechHandle,
@@ -1610,6 +1676,37 @@ export class AgentActivity implements RecognitionHooks {
1610
1676
  modelSettings: ModelSettings,
1611
1677
  replyAbortController: AbortController,
1612
1678
  ): Promise<void> {
1679
+ return tracer.startActiveSpan(
1680
+ async (span) =>
1681
+ this._realtimeGenerationTaskImpl({
1682
+ speechHandle,
1683
+ ev,
1684
+ modelSettings,
1685
+ replyAbortController,
1686
+ span,
1687
+ }),
1688
+ {
1689
+ name: 'agent_turn',
1690
+ context: this.agentSession.rootSpanContext,
1691
+ },
1692
+ );
1693
+ }
1694
+
1695
+ private async _realtimeGenerationTaskImpl({
1696
+ speechHandle,
1697
+ ev,
1698
+ modelSettings,
1699
+ replyAbortController,
1700
+ span,
1701
+ }: {
1702
+ speechHandle: SpeechHandle;
1703
+ ev: GenerationCreatedEvent;
1704
+ modelSettings: ModelSettings;
1705
+ replyAbortController: AbortController;
1706
+ span: Span;
1707
+ }): Promise<void> {
1708
+ span.setAttribute(traceTypes.ATTR_SPEECH_ID, speechHandle.id);
1709
+
1613
1710
  speechHandleStorage.enterWith(speechHandle);
1614
1711
 
1615
1712
  if (!this.realtimeSession) {
@@ -1786,6 +1883,8 @@ export class AgentActivity implements RecognitionHooks {
1786
1883
 
1787
1884
  const onToolExecutionStarted = (f: FunctionCall) => {
1788
1885
  speechHandle._itemAdded([f]);
1886
+ this.agent._chatCtx.items.push(f);
1887
+ this.agentSession._toolItemsAdded([f]);
1789
1888
  };
1790
1889
 
1791
1890
  const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
@@ -1979,6 +2078,11 @@ export class AgentActivity implements RecognitionHooks {
1979
2078
  }
1980
2079
  const chatCtx = this.realtimeSession.chatCtx.copy();
1981
2080
  chatCtx.items.push(...functionToolsExecutedEvent.functionCallOutputs);
2081
+
2082
+ this.agentSession._toolItemsAdded(
2083
+ functionToolsExecutedEvent.functionCallOutputs as FunctionCallOutput[],
2084
+ );
2085
+
1982
2086
  try {
1983
2087
  await this.realtimeSession.updateChatCtx(chatCtx);
1984
2088
  } catch (error) {
@@ -2096,16 +2200,30 @@ export class AgentActivity implements RecognitionHooks {
2096
2200
  this.wakeupMainTask();
2097
2201
  }
2098
2202
 
2099
- // TODO(brian): PR3 - Wrap entire drain() method with tracer.startActiveSpan('drain_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
2100
2203
  async drain(): Promise<void> {
2204
+ // Create drain_agent_activity as a ROOT span (new trace) to match Python behavior
2205
+ return tracer.startActiveSpan(async (span) => this._drainImpl(span), {
2206
+ name: 'drain_agent_activity',
2207
+ context: ROOT_CONTEXT,
2208
+ });
2209
+ }
2210
+
2211
+ private async _drainImpl(span: Span): Promise<void> {
2212
+ span.setAttribute(traceTypes.ATTR_AGENT_LABEL, this.agent.id);
2213
+
2101
2214
  const unlock = await this.lock.lock();
2102
2215
  try {
2103
2216
  if (this._draining) return;
2104
2217
 
2105
2218
  this.cancelPreemptiveGeneration();
2106
- // TODO(brian): PR3 - Wrap onExit with tracer.startActiveSpan('on_exit', { attributes: { 'lk.agent_label': this.agent.label } })
2219
+
2220
+ const onExitTask = tracer.startActiveSpan(async () => this.agent.onExit(), {
2221
+ name: 'on_exit',
2222
+ attributes: { [traceTypes.ATTR_AGENT_LABEL]: this.agent.id },
2223
+ });
2224
+
2107
2225
  this.createSpeechTask({
2108
- task: Task.from(() => this.agent.onExit()),
2226
+ task: Task.from(() => onExitTask),
2109
2227
  name: 'AgentActivity_onExit',
2110
2228
  });
2111
2229
 
@@ -3,6 +3,8 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { AudioFrame, Room } from '@livekit/rtc-node';
5
5
  import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter';
6
+ import type { Context, Span } from '@opentelemetry/api';
7
+ import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
6
8
  import { EventEmitter } from 'node:events';
7
9
  import type { ReadableStream } from 'node:stream/web';
8
10
  import {
@@ -14,12 +16,14 @@ import {
14
16
  type TTSModelString,
15
17
  } from '../inference/index.js';
16
18
  import { getJobContext } from '../job.js';
19
+ import type { FunctionCall, FunctionCallOutput } from '../llm/chat_context.js';
17
20
  import { AgentHandoffItem, ChatContext, ChatMessage } from '../llm/chat_context.js';
18
21
  import type { LLM, RealtimeModel, RealtimeModelError, ToolChoice } from '../llm/index.js';
19
22
  import type { LLMError } from '../llm/llm.js';
20
23
  import { log } from '../log.js';
21
24
  import type { STT } from '../stt/index.js';
22
25
  import type { STTError } from '../stt/stt.js';
26
+ import { traceTypes, tracer } from '../telemetry/index.js';
23
27
  import type { TTS, TTSError } from '../tts/tts.js';
24
28
  import type { VAD } from '../vad.js';
25
29
  import type { Agent } from './agent.js';
@@ -128,9 +132,22 @@ export class AgentSession<
128
132
  private closingTask: Promise<void> | null = null;
129
133
  private userAwayTimer: NodeJS.Timeout | null = null;
130
134
 
135
+ private sessionSpan?: Span;
136
+ private userSpeakingSpan?: Span;
137
+ private agentSpeakingSpan?: Span;
138
+
139
+ /** @internal */
140
+ rootSpanContext?: Context;
141
+
131
142
  /** @internal */
132
143
  _recordedEvents: AgentEvent[] = [];
133
144
 
145
+ /** @internal */
146
+ _enableRecording = false;
147
+
148
+ /** @internal - Timestamp when the session started (milliseconds) */
149
+ _startedAt?: number;
150
+
134
151
  constructor(opts: AgentSessionOptions<UserData>) {
135
152
  super();
136
153
 
@@ -175,7 +192,8 @@ export class AgentSession<
175
192
  this._chatCtx = ChatContext.empty();
176
193
  this.options = { ...defaultVoiceOptions, ...voiceOptions };
177
194
 
178
- this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed.bind(this));
195
+ this._onUserInputTranscribed = this._onUserInputTranscribed.bind(this);
196
+ this.on(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
179
197
  }
180
198
 
181
199
  emit<K extends keyof AgentSessionCallbacks>(
@@ -211,25 +229,22 @@ export class AgentSession<
211
229
  this._userData = value;
212
230
  }
213
231
 
214
- async start({
215
- // TODO(brian): PR2 - Add setupCloudTracer() call if on LiveKit Cloud with recording enabled
216
- // TODO(brian): PR3 - Add span: this._sessionSpan = tracer.startSpan('agent_session'), store as instance property
217
- // TODO(brian): PR4 - Add setupCloudLogger() call in setupCloudTracer() to setup OTEL logging with Pino bridge
232
+ private async _startImpl({
218
233
  agent,
219
234
  room,
220
235
  inputOptions,
221
236
  outputOptions,
222
- record = true,
237
+ record,
238
+ span,
223
239
  }: {
224
240
  agent: Agent;
225
241
  room: Room;
226
242
  inputOptions?: Partial<RoomInputOptions>;
227
243
  outputOptions?: Partial<RoomOutputOptions>;
228
- record?: boolean;
244
+ record: boolean;
245
+ span: Span;
229
246
  }): Promise<void> {
230
- if (this.started) {
231
- return;
232
- }
247
+ span.setAttribute(traceTypes.ATTR_AGENT_LABEL, agent.id);
233
248
 
234
249
  this.agent = agent;
235
250
  this._updateAgentState('initializing');
@@ -291,9 +306,62 @@ export class AgentSession<
291
306
  );
292
307
 
293
308
  this.started = true;
309
+ this._startedAt = Date.now();
294
310
  this._updateAgentState('listening');
295
311
  }
296
312
 
313
+ async start({
314
+ agent,
315
+ room,
316
+ inputOptions,
317
+ outputOptions,
318
+ record,
319
+ }: {
320
+ agent: Agent;
321
+ room: Room;
322
+ inputOptions?: Partial<RoomInputOptions>;
323
+ outputOptions?: Partial<RoomOutputOptions>;
324
+ record?: boolean;
325
+ }): Promise<void> {
326
+ if (this.started) {
327
+ return;
328
+ }
329
+
330
+ const ctx = getJobContext();
331
+
332
+ this.logger.info(
333
+ { record, enableRecording: ctx.info.job.enableRecording },
334
+ 'Configuring session recording',
335
+ );
336
+
337
+ record = record ?? ctx.info.job.enableRecording;
338
+ this._enableRecording = record;
339
+
340
+ if (this._enableRecording) {
341
+ await ctx.initRecording();
342
+ }
343
+
344
+ // Create agent_session as a ROOT span (new trace) to match Python behavior
345
+ // This creates a separate trace for better cloud dashboard organization
346
+ this.sessionSpan = tracer.startSpan({
347
+ name: 'agent_session',
348
+ context: ROOT_CONTEXT,
349
+ });
350
+
351
+ // Set the session span as the active span in the context
352
+ // This ensures all child spans (agent_turn, user_turn, etc.) are parented to it
353
+ this.rootSpanContext = trace.setSpan(ROOT_CONTEXT, this.sessionSpan);
354
+
355
+ await this._startImpl({
356
+ agent,
357
+ room,
358
+ inputOptions,
359
+ outputOptions,
360
+ record,
361
+ span: this.sessionSpan,
362
+ });
363
+ }
364
+
297
365
  updateAgent(agent: Agent): void {
298
366
  this.agent = agent;
299
367
 
@@ -367,32 +435,41 @@ export class AgentSession<
367
435
  }
368
436
 
369
437
  private async updateActivity(agent: Agent): Promise<void> {
370
- // TODO(AJS-129): add lock to agent activity core lifecycle
371
- this.nextActivity = new AgentActivity(agent, this);
438
+ const runWithContext = async () => {
439
+ // TODO(AJS-129): add lock to agent activity core lifecycle
440
+ this.nextActivity = new AgentActivity(agent, this);
372
441
 
373
- const previousActivity = this.activity;
442
+ const previousActivity = this.activity;
374
443
 
375
- if (this.activity) {
376
- await this.activity.drain();
377
- await this.activity.close();
378
- }
444
+ if (this.activity) {
445
+ await this.activity.drain();
446
+ await this.activity.close();
447
+ }
379
448
 
380
- this.activity = this.nextActivity;
381
- this.nextActivity = undefined;
449
+ this.activity = this.nextActivity;
450
+ this.nextActivity = undefined;
382
451
 
383
- this._chatCtx.insert(
384
- new AgentHandoffItem({
385
- oldAgentId: previousActivity?.agent.id,
386
- newAgentId: agent.id,
387
- }),
388
- );
389
- this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
452
+ this._chatCtx.insert(
453
+ new AgentHandoffItem({
454
+ oldAgentId: previousActivity?.agent.id,
455
+ newAgentId: agent.id,
456
+ }),
457
+ );
458
+ this.logger.debug({ previousActivity, agent }, 'Agent handoff inserted into chat context');
390
459
 
391
- await this.activity.start();
460
+ await this.activity.start();
392
461
 
393
- if (this._input.audio) {
394
- this.activity.attachAudioInput(this._input.audio.stream);
462
+ if (this._input.audio) {
463
+ this.activity.attachAudioInput(this._input.audio.stream);
464
+ }
465
+ };
466
+
467
+ // Run within session span context if available
468
+ if (this.rootSpanContext) {
469
+ return otelContext.with(this.rootSpanContext, runWithContext);
395
470
  }
471
+
472
+ return runWithContext();
396
473
  }
397
474
 
398
475
  get chatCtx(): ChatContext {
@@ -452,14 +529,35 @@ export class AgentSession<
452
529
  this.emit(AgentSessionEventTypes.ConversationItemAdded, createConversationItemAddedEvent(item));
453
530
  }
454
531
 
532
+ /** @internal */
533
+ _toolItemsAdded(items: (FunctionCall | FunctionCallOutput)[]): void {
534
+ this._chatCtx.insert(items);
535
+ }
536
+
455
537
  /** @internal */
456
538
  _updateAgentState(state: AgentState) {
457
539
  if (this._agentState === state) {
458
540
  return;
459
541
  }
460
542
 
461
- // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._agentSpeakingSpan, create tracer.startSpan('agent_speaking') with participant attributes
462
- // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._agentSpeakingSpan, end and clear this._agentSpeakingSpan
543
+ if (state === 'speaking') {
544
+ // TODO(brian): PR4 - Track error counts
545
+
546
+ if (this.agentSpeakingSpan === undefined) {
547
+ this.agentSpeakingSpan = tracer.startSpan({
548
+ name: 'agent_speaking',
549
+ context: this.rootSpanContext,
550
+ });
551
+
552
+ // TODO(brian): PR4 - Set participant attributes if roomIO.room.localParticipant is available
553
+ // (Ref: Python agent_session.py line 1161-1164)
554
+ }
555
+ } else if (this.agentSpeakingSpan !== undefined) {
556
+ // TODO(brian): PR4 - Set ATTR_END_TIME attribute if available
557
+ this.agentSpeakingSpan.end();
558
+ this.agentSpeakingSpan = undefined;
559
+ }
560
+
463
561
  const oldState = this._agentState;
464
562
  this._agentState = state;
465
563
 
@@ -482,8 +580,20 @@ export class AgentSession<
482
580
  return;
483
581
  }
484
582
 
485
- // TODO(brian): PR3 - Add span: if state === 'speaking' && !this._userSpeakingSpan, create tracer.startSpan('user_speaking') with participant attributes
486
- // TODO(brian): PR3 - Add span: if state !== 'speaking' && this._userSpeakingSpan, end and clear this._userSpeakingSpan
583
+ if (state === 'speaking' && this.userSpeakingSpan === undefined) {
584
+ this.userSpeakingSpan = tracer.startSpan({
585
+ name: 'user_speaking',
586
+ context: this.rootSpanContext,
587
+ });
588
+
589
+ // TODO(brian): PR4 - Set participant attributes if roomIO.linkedParticipant is available
590
+ // (Ref: Python agent_session.py line 1192-1195)
591
+ } else if (this.userSpeakingSpan !== undefined) {
592
+ // TODO(brian): PR4 - Set ATTR_END_TIME attribute with lastSpeakingTime if available
593
+ this.userSpeakingSpan.end();
594
+ this.userSpeakingSpan = undefined;
595
+ }
596
+
487
597
  const oldState = this.userState;
488
598
  this.userState = state;
489
599
 
@@ -550,19 +660,33 @@ export class AgentSession<
550
660
  reason: CloseReason,
551
661
  error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
552
662
  drain: boolean = false,
663
+ ): Promise<void> {
664
+ if (this.rootSpanContext) {
665
+ return otelContext.with(this.rootSpanContext, async () => {
666
+ await this.closeImplInner(reason, error, drain);
667
+ });
668
+ }
669
+
670
+ return this.closeImplInner(reason, error, drain);
671
+ }
672
+
673
+ private async closeImplInner(
674
+ reason: CloseReason,
675
+ error: RealtimeModelError | LLMError | TTSError | STTError | null = null,
676
+ drain: boolean = false,
553
677
  ): Promise<void> {
554
678
  if (!this.started) {
555
679
  return;
556
680
  }
557
681
 
558
682
  this._cancelUserAwayTimer();
683
+ this.off(AgentSessionEventTypes.UserInputTranscribed, this._onUserInputTranscribed);
559
684
 
560
685
  if (this.activity) {
561
686
  if (!drain) {
562
687
  try {
563
688
  this.activity.interrupt();
564
689
  } catch (error) {
565
- // uninterruptible speech [copied from python]
566
690
  // TODO(shubhra): force interrupt or wait for it to finish?
567
691
  // it might be an audio played from the error callback
568
692
  }
@@ -584,12 +708,28 @@ export class AgentSession<
584
708
  await this.activity?.close();
585
709
  this.activity = undefined;
586
710
 
711
+ if (this.sessionSpan) {
712
+ this.sessionSpan.end();
713
+ this.sessionSpan = undefined;
714
+ }
715
+
716
+ if (this.userSpeakingSpan) {
717
+ this.userSpeakingSpan.end();
718
+ this.userSpeakingSpan = undefined;
719
+ }
720
+
721
+ if (this.agentSpeakingSpan) {
722
+ this.agentSpeakingSpan.end();
723
+ this.agentSpeakingSpan = undefined;
724
+ }
725
+
587
726
  this.started = false;
588
727
 
589
728
  this.emit(AgentSessionEventTypes.Close, createCloseEvent(reason, error));
590
729
 
591
730
  this.userState = 'listening';
592
731
  this._agentState = 'initializing';
732
+ this.rootSpanContext = undefined;
593
733
 
594
734
  this.logger.info({ reason, error }, 'AgentSession closed');
595
735
  }