@livekit/agents 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. package/dist/index.cjs +3 -0
  2. package/dist/index.cjs.map +1 -1
  3. package/dist/index.d.cts +2 -1
  4. package/dist/index.d.ts +2 -1
  5. package/dist/index.d.ts.map +1 -1
  6. package/dist/index.js +2 -0
  7. package/dist/index.js.map +1 -1
  8. package/dist/inference/api_protos.d.cts +12 -12
  9. package/dist/inference/api_protos.d.ts +12 -12
  10. package/dist/inference/llm.cjs +35 -13
  11. package/dist/inference/llm.cjs.map +1 -1
  12. package/dist/inference/llm.d.cts +10 -5
  13. package/dist/inference/llm.d.ts +10 -5
  14. package/dist/inference/llm.d.ts.map +1 -1
  15. package/dist/inference/llm.js +35 -13
  16. package/dist/inference/llm.js.map +1 -1
  17. package/dist/inference/tts.cjs +1 -1
  18. package/dist/inference/tts.cjs.map +1 -1
  19. package/dist/inference/tts.js +1 -1
  20. package/dist/inference/tts.js.map +1 -1
  21. package/dist/ipc/job_proc_lazy_main.cjs +6 -2
  22. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  23. package/dist/ipc/job_proc_lazy_main.js +6 -2
  24. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  25. package/dist/job.cjs +31 -0
  26. package/dist/job.cjs.map +1 -1
  27. package/dist/job.d.cts +6 -0
  28. package/dist/job.d.ts +6 -0
  29. package/dist/job.d.ts.map +1 -1
  30. package/dist/job.js +31 -0
  31. package/dist/job.js.map +1 -1
  32. package/dist/llm/chat_context.cjs +33 -0
  33. package/dist/llm/chat_context.cjs.map +1 -1
  34. package/dist/llm/chat_context.d.cts +22 -2
  35. package/dist/llm/chat_context.d.ts +22 -2
  36. package/dist/llm/chat_context.d.ts.map +1 -1
  37. package/dist/llm/chat_context.js +32 -0
  38. package/dist/llm/chat_context.js.map +1 -1
  39. package/dist/llm/index.cjs +2 -0
  40. package/dist/llm/index.cjs.map +1 -1
  41. package/dist/llm/index.d.cts +1 -1
  42. package/dist/llm/index.d.ts +1 -1
  43. package/dist/llm/index.d.ts.map +1 -1
  44. package/dist/llm/index.js +2 -0
  45. package/dist/llm/index.js.map +1 -1
  46. package/dist/llm/llm.cjs.map +1 -1
  47. package/dist/llm/llm.d.cts +1 -1
  48. package/dist/llm/llm.d.ts +1 -1
  49. package/dist/llm/llm.d.ts.map +1 -1
  50. package/dist/llm/llm.js.map +1 -1
  51. package/dist/llm/provider_format/google.cjs.map +1 -1
  52. package/dist/llm/provider_format/google.d.cts +1 -1
  53. package/dist/llm/provider_format/google.d.ts +1 -1
  54. package/dist/llm/provider_format/google.d.ts.map +1 -1
  55. package/dist/llm/provider_format/google.js.map +1 -1
  56. package/dist/llm/provider_format/google.test.cjs +48 -0
  57. package/dist/llm/provider_format/google.test.cjs.map +1 -1
  58. package/dist/llm/provider_format/google.test.js +54 -1
  59. package/dist/llm/provider_format/google.test.js.map +1 -1
  60. package/dist/llm/provider_format/index.d.cts +1 -1
  61. package/dist/llm/provider_format/index.d.ts +1 -1
  62. package/dist/llm/provider_format/index.d.ts.map +1 -1
  63. package/dist/llm/provider_format/openai.cjs +1 -2
  64. package/dist/llm/provider_format/openai.cjs.map +1 -1
  65. package/dist/llm/provider_format/openai.js +1 -2
  66. package/dist/llm/provider_format/openai.js.map +1 -1
  67. package/dist/llm/provider_format/openai.test.cjs +32 -0
  68. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  69. package/dist/llm/provider_format/openai.test.js +38 -1
  70. package/dist/llm/provider_format/openai.test.js.map +1 -1
  71. package/dist/llm/realtime.cjs.map +1 -1
  72. package/dist/llm/realtime.d.cts +4 -0
  73. package/dist/llm/realtime.d.ts +4 -0
  74. package/dist/llm/realtime.d.ts.map +1 -1
  75. package/dist/llm/realtime.js.map +1 -1
  76. package/dist/llm/utils.cjs +2 -2
  77. package/dist/llm/utils.cjs.map +1 -1
  78. package/dist/llm/utils.d.cts +1 -1
  79. package/dist/llm/utils.d.ts +1 -1
  80. package/dist/llm/utils.d.ts.map +1 -1
  81. package/dist/llm/utils.js +2 -2
  82. package/dist/llm/utils.js.map +1 -1
  83. package/dist/llm/zod-utils.cjs +6 -3
  84. package/dist/llm/zod-utils.cjs.map +1 -1
  85. package/dist/llm/zod-utils.d.cts +1 -1
  86. package/dist/llm/zod-utils.d.ts +1 -1
  87. package/dist/llm/zod-utils.d.ts.map +1 -1
  88. package/dist/llm/zod-utils.js +6 -3
  89. package/dist/llm/zod-utils.js.map +1 -1
  90. package/dist/llm/zod-utils.test.cjs +83 -0
  91. package/dist/llm/zod-utils.test.cjs.map +1 -1
  92. package/dist/llm/zod-utils.test.js +83 -0
  93. package/dist/llm/zod-utils.test.js.map +1 -1
  94. package/dist/log.cjs.map +1 -1
  95. package/dist/log.d.ts.map +1 -1
  96. package/dist/log.js.map +1 -1
  97. package/dist/telemetry/index.cjs +51 -0
  98. package/dist/telemetry/index.cjs.map +1 -0
  99. package/dist/telemetry/index.d.cts +4 -0
  100. package/dist/telemetry/index.d.ts +4 -0
  101. package/dist/telemetry/index.d.ts.map +1 -0
  102. package/dist/telemetry/index.js +12 -0
  103. package/dist/telemetry/index.js.map +1 -0
  104. package/dist/telemetry/trace_types.cjs +191 -0
  105. package/dist/telemetry/trace_types.cjs.map +1 -0
  106. package/dist/telemetry/trace_types.d.cts +56 -0
  107. package/dist/telemetry/trace_types.d.ts +56 -0
  108. package/dist/telemetry/trace_types.d.ts.map +1 -0
  109. package/dist/telemetry/trace_types.js +113 -0
  110. package/dist/telemetry/trace_types.js.map +1 -0
  111. package/dist/telemetry/traces.cjs +196 -0
  112. package/dist/telemetry/traces.cjs.map +1 -0
  113. package/dist/telemetry/traces.d.cts +97 -0
  114. package/dist/telemetry/traces.d.ts +97 -0
  115. package/dist/telemetry/traces.d.ts.map +1 -0
  116. package/dist/telemetry/traces.js +173 -0
  117. package/dist/telemetry/traces.js.map +1 -0
  118. package/dist/telemetry/utils.cjs +86 -0
  119. package/dist/telemetry/utils.cjs.map +1 -0
  120. package/dist/telemetry/utils.d.cts +5 -0
  121. package/dist/telemetry/utils.d.ts +5 -0
  122. package/dist/telemetry/utils.d.ts.map +1 -0
  123. package/dist/telemetry/utils.js +51 -0
  124. package/dist/telemetry/utils.js.map +1 -0
  125. package/dist/tts/tts.cjs.map +1 -1
  126. package/dist/tts/tts.d.ts.map +1 -1
  127. package/dist/tts/tts.js.map +1 -1
  128. package/dist/utils.cjs.map +1 -1
  129. package/dist/utils.d.cts +7 -0
  130. package/dist/utils.d.ts +7 -0
  131. package/dist/utils.d.ts.map +1 -1
  132. package/dist/utils.js.map +1 -1
  133. package/dist/voice/agent.cjs +15 -0
  134. package/dist/voice/agent.cjs.map +1 -1
  135. package/dist/voice/agent.d.cts +4 -1
  136. package/dist/voice/agent.d.ts +4 -1
  137. package/dist/voice/agent.d.ts.map +1 -1
  138. package/dist/voice/agent.js +15 -0
  139. package/dist/voice/agent.js.map +1 -1
  140. package/dist/voice/agent_activity.cjs +71 -20
  141. package/dist/voice/agent_activity.cjs.map +1 -1
  142. package/dist/voice/agent_activity.d.ts.map +1 -1
  143. package/dist/voice/agent_activity.js +71 -20
  144. package/dist/voice/agent_activity.js.map +1 -1
  145. package/dist/voice/agent_session.cjs +69 -2
  146. package/dist/voice/agent_session.cjs.map +1 -1
  147. package/dist/voice/agent_session.d.cts +11 -2
  148. package/dist/voice/agent_session.d.ts +11 -2
  149. package/dist/voice/agent_session.d.ts.map +1 -1
  150. package/dist/voice/agent_session.js +70 -3
  151. package/dist/voice/agent_session.js.map +1 -1
  152. package/dist/voice/audio_recognition.cjs.map +1 -1
  153. package/dist/voice/audio_recognition.d.ts.map +1 -1
  154. package/dist/voice/audio_recognition.js.map +1 -1
  155. package/dist/voice/generation.cjs.map +1 -1
  156. package/dist/voice/generation.d.ts.map +1 -1
  157. package/dist/voice/generation.js.map +1 -1
  158. package/dist/voice/index.cjs +2 -0
  159. package/dist/voice/index.cjs.map +1 -1
  160. package/dist/voice/index.d.cts +1 -0
  161. package/dist/voice/index.d.ts +1 -0
  162. package/dist/voice/index.d.ts.map +1 -1
  163. package/dist/voice/index.js +1 -0
  164. package/dist/voice/index.js.map +1 -1
  165. package/dist/voice/interruption_detection.test.cjs +114 -0
  166. package/dist/voice/interruption_detection.test.cjs.map +1 -0
  167. package/dist/voice/interruption_detection.test.js +113 -0
  168. package/dist/voice/interruption_detection.test.js.map +1 -0
  169. package/dist/voice/report.cjs +69 -0
  170. package/dist/voice/report.cjs.map +1 -0
  171. package/dist/voice/report.d.cts +26 -0
  172. package/dist/voice/report.d.ts +26 -0
  173. package/dist/voice/report.d.ts.map +1 -0
  174. package/dist/voice/report.js +44 -0
  175. package/dist/voice/report.js.map +1 -0
  176. package/dist/voice/room_io/room_io.cjs +3 -0
  177. package/dist/voice/room_io/room_io.cjs.map +1 -1
  178. package/dist/voice/room_io/room_io.d.cts +1 -0
  179. package/dist/voice/room_io/room_io.d.ts +1 -0
  180. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  181. package/dist/voice/room_io/room_io.js +3 -0
  182. package/dist/voice/room_io/room_io.js.map +1 -1
  183. package/package.json +12 -5
  184. package/src/index.ts +2 -1
  185. package/src/inference/llm.ts +53 -21
  186. package/src/inference/tts.ts +1 -1
  187. package/src/ipc/job_proc_lazy_main.ts +10 -2
  188. package/src/job.ts +48 -0
  189. package/src/llm/__snapshots__/zod-utils.test.ts.snap +218 -0
  190. package/src/llm/chat_context.ts +53 -1
  191. package/src/llm/index.ts +1 -0
  192. package/src/llm/llm.ts +3 -1
  193. package/src/llm/provider_format/google.test.ts +72 -1
  194. package/src/llm/provider_format/google.ts +4 -4
  195. package/src/llm/provider_format/openai.test.ts +55 -1
  196. package/src/llm/provider_format/openai.ts +3 -2
  197. package/src/llm/realtime.ts +8 -1
  198. package/src/llm/utils.ts +7 -2
  199. package/src/llm/zod-utils.test.ts +101 -0
  200. package/src/llm/zod-utils.ts +12 -3
  201. package/src/log.ts +1 -0
  202. package/src/telemetry/index.ts +10 -0
  203. package/src/telemetry/trace_types.ts +88 -0
  204. package/src/telemetry/traces.ts +266 -0
  205. package/src/telemetry/utils.ts +61 -0
  206. package/src/tts/tts.ts +4 -0
  207. package/src/utils.ts +17 -0
  208. package/src/voice/agent.ts +22 -0
  209. package/src/voice/agent_activity.ts +102 -24
  210. package/src/voice/agent_session.ts +98 -1
  211. package/src/voice/audio_recognition.ts +2 -0
  212. package/src/voice/generation.ts +3 -0
  213. package/src/voice/index.ts +1 -0
  214. package/src/voice/interruption_detection.test.ts +151 -0
  215. package/src/voice/report.ts +77 -0
  216. package/src/voice/room_io/room_io.ts +4 -0
@@ -0,0 +1,266 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import {
5
+ type Attributes,
6
+ type Context,
7
+ type Span,
8
+ type SpanOptions,
9
+ type Tracer,
10
+ type TracerProvider,
11
+ context as otelContext,
12
+ trace,
13
+ } from '@opentelemetry/api';
14
+ import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-http';
15
+ import { CompressionAlgorithm } from '@opentelemetry/otlp-exporter-base';
16
+ import { Resource } from '@opentelemetry/resources';
17
+ import type { ReadableSpan, SpanProcessor } from '@opentelemetry/sdk-trace-base';
18
+ import { BatchSpanProcessor, NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
19
+ import { ATTR_SERVICE_NAME } from '@opentelemetry/semantic-conventions';
20
+ import { AccessToken } from 'livekit-server-sdk';
21
+
22
+ export interface StartSpanOptions {
23
+ /** Name of the span */
24
+ name: string;
25
+ /** Optional parent context to use for this span */
26
+ context?: Context;
27
+ /** Attributes to set on the span when it starts */
28
+ attributes?: Attributes;
29
+ /** Whether to end the span when the function exits (default: true) */
30
+ endOnExit?: boolean;
31
+ }
32
+
33
+ /**
34
+ * A dynamic tracer that allows the tracer provider to be changed at runtime.
35
+ */
36
+ class DynamicTracer {
37
+ private tracerProvider: TracerProvider;
38
+ private tracer: Tracer;
39
+ private readonly instrumentingModuleName: string;
40
+
41
+ constructor(instrumentingModuleName: string) {
42
+ this.instrumentingModuleName = instrumentingModuleName;
43
+ this.tracerProvider = trace.getTracerProvider();
44
+ this.tracer = trace.getTracer(instrumentingModuleName);
45
+ }
46
+
47
+ /**
48
+ * Set a new tracer provider. This updates the underlying tracer instance.
49
+ * @param provider - The new tracer provider to use
50
+ */
51
+ setProvider(provider: TracerProvider): void {
52
+ this.tracerProvider = provider;
53
+ this.tracer = this.tracerProvider.getTracer(this.instrumentingModuleName);
54
+ }
55
+
56
+ /**
57
+ * Get the underlying OpenTelemetry tracer.
58
+ * Use this to access the full Tracer API when needed.
59
+ */
60
+ getTracer(): Tracer {
61
+ return this.tracer;
62
+ }
63
+
64
+ /**
65
+ * Start a span manually (without making it active).
66
+ * You must call span.end() when done.
67
+ *
68
+ * @param options - Span configuration including name
69
+ * @returns The created span
70
+ */
71
+ startSpan(options: StartSpanOptions): Span {
72
+ const ctx = options.context || otelContext.active();
73
+ const span = this.tracer.startSpan(
74
+ options.name,
75
+ {
76
+ attributes: options.attributes,
77
+ },
78
+ ctx,
79
+ );
80
+
81
+ return span;
82
+ }
83
+
84
+ /**
85
+ * Start a new span and make it active in the current context.
86
+ * The span will automatically be ended when the provided function completes (unless endOnExit=false).
87
+ *
88
+ * @param fn - The function to execute within the span context
89
+ * @param options - Span configuration including name
90
+ * @returns The result of the provided function
91
+ */
92
+ async startActiveSpan<T>(fn: (span: Span) => Promise<T>, options: StartSpanOptions): Promise<T> {
93
+ const ctx = options.context || otelContext.active();
94
+ const endOnExit = options.endOnExit === undefined ? true : options.endOnExit; // default true
95
+ const opts: SpanOptions = { attributes: options.attributes };
96
+
97
+ return new Promise((resolve, reject) => {
98
+ this.tracer.startActiveSpan(options.name, opts, ctx, async (span) => {
99
+ try {
100
+ const result = await fn(span);
101
+ resolve(result);
102
+ } catch (error) {
103
+ reject(error);
104
+ } finally {
105
+ if (endOnExit) {
106
+ span.end();
107
+ }
108
+ }
109
+ });
110
+ });
111
+ }
112
+
113
+ /**
114
+ * Synchronous version of startActiveSpan for non-async operations.
115
+ *
116
+ * @param fn - The function to execute within the span context
117
+ * @param options - Span configuration including name
118
+ * @returns The result of the provided function
119
+ */
120
+ startActiveSpanSync<T>(fn: (span: Span) => T, options: StartSpanOptions): T {
121
+ const ctx = options.context || otelContext.active();
122
+ const endOnExit = options.endOnExit === undefined ? true : options.endOnExit; // default true
123
+ const opts: SpanOptions = { attributes: options.attributes };
124
+
125
+ return this.tracer.startActiveSpan(options.name, opts, ctx, (span) => {
126
+ try {
127
+ return fn(span);
128
+ } finally {
129
+ if (endOnExit) {
130
+ span.end();
131
+ }
132
+ }
133
+ });
134
+ }
135
+ }
136
+
137
+ /**
138
+ * The global tracer instance used throughout the agents framework.
139
+ * This tracer can have its provider updated at runtime via setTracerProvider().
140
+ */
141
+ export const tracer = new DynamicTracer('livekit-agents');
142
+
143
+ class MetadataSpanProcessor implements SpanProcessor {
144
+ private metadata: Attributes;
145
+
146
+ constructor(metadata: Attributes) {
147
+ this.metadata = metadata;
148
+ }
149
+
150
+ onStart(span: Span, _parentContext: Context): void {
151
+ span.setAttributes(this.metadata);
152
+ }
153
+
154
+ onEnd(_span: ReadableSpan): void {}
155
+
156
+ shutdown(): Promise<void> {
157
+ return Promise.resolve();
158
+ }
159
+
160
+ forceFlush(): Promise<void> {
161
+ return Promise.resolve();
162
+ }
163
+ }
164
+
165
+ // TODO(brian): PR4 - Add MetadataLogProcessor for structured logging
166
+
167
+ // TODO(brian): PR4 - Add ExtraDetailsProcessor for structured logging
168
+
169
+ /**
170
+ * Set the tracer provider for the livekit-agents framework.
171
+ * This should be called before agent session start if using custom tracer providers.
172
+ *
173
+ * @param provider - The tracer provider to use (must be a NodeTracerProvider)
174
+ * @param options - Optional configuration with metadata property to inject into all spans
175
+ *
176
+ * @example
177
+ * ```typescript
178
+ * import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
179
+ * import { setTracerProvider } from '@livekit/agents/telemetry';
180
+ *
181
+ * const provider = new NodeTracerProvider();
182
+ * setTracerProvider(provider, {
183
+ * metadata: { room_id: 'room123', job_id: 'job456' }
184
+ * });
185
+ * ```
186
+ */
187
+ export function setTracerProvider(
188
+ provider: NodeTracerProvider,
189
+ options?: { metadata?: Attributes },
190
+ ): void {
191
+ if (options?.metadata) {
192
+ provider.addSpanProcessor(new MetadataSpanProcessor(options.metadata));
193
+ }
194
+
195
+ tracer.setProvider(provider);
196
+ }
197
+
198
+ /**
199
+ * Setup OpenTelemetry tracer for LiveKit Cloud observability.
200
+ * This configures OTLP exporters to send traces to LiveKit Cloud.
201
+ *
202
+ * @param options - Configuration for cloud tracer with roomId, jobId, and cloudHostname properties
203
+ *
204
+ * @internal
205
+ */
206
+ export async function setupCloudTracer(options: {
207
+ roomId: string;
208
+ jobId: string;
209
+ cloudHostname: string;
210
+ }): Promise<void> {
211
+ const { roomId, jobId, cloudHostname } = options;
212
+
213
+ const apiKey = process.env.LIVEKIT_API_KEY;
214
+ const apiSecret = process.env.LIVEKIT_API_SECRET;
215
+
216
+ if (!apiKey || !apiSecret) {
217
+ throw new Error('LIVEKIT_API_KEY and LIVEKIT_API_SECRET must be set for cloud tracing');
218
+ }
219
+
220
+ const token = new AccessToken(apiKey, apiSecret, {
221
+ identity: 'livekit-agents-telemetry',
222
+ ttl: '6h',
223
+ });
224
+ token.addObservabilityGrant({ write: true });
225
+
226
+ try {
227
+ const jwt = await token.toJwt();
228
+
229
+ const headers = {
230
+ Authorization: `Bearer ${jwt}`,
231
+ };
232
+
233
+ const metadata: Attributes = {
234
+ room_id: roomId,
235
+ job_id: jobId,
236
+ };
237
+
238
+ const resource = new Resource({
239
+ [ATTR_SERVICE_NAME]: 'livekit-agents',
240
+ room_id: roomId,
241
+ job_id: jobId,
242
+ });
243
+
244
+ // Configure OTLP exporter to send traces to LiveKit Cloud
245
+ const spanExporter = new OTLPTraceExporter({
246
+ url: `https://${cloudHostname}/observability/traces/otlp/v0`,
247
+ headers,
248
+ compression: CompressionAlgorithm.GZIP,
249
+ });
250
+
251
+ const tracerProvider = new NodeTracerProvider({
252
+ resource,
253
+ spanProcessors: [new MetadataSpanProcessor(metadata), new BatchSpanProcessor(spanExporter)],
254
+ });
255
+ tracerProvider.register();
256
+
257
+ // Metadata processor is already configured in the constructor above
258
+ setTracerProvider(tracerProvider);
259
+
260
+ // TODO(brian): PR4 - Add logger provider setup here for structured logging
261
+ // Similar to Python's setup: LoggerProvider, OTLPLogExporter, BatchLogRecordProcessor
262
+ } catch (error) {
263
+ console.error('Failed to setup cloud tracer:', error);
264
+ throw error;
265
+ }
266
+ }
@@ -0,0 +1,61 @@
1
+ // SPDX-FileCopyrightText: 2025 LiveKit, Inc.
2
+ //
3
+ // SPDX-License-Identifier: Apache-2.0
4
+ import { type Span, SpanStatusCode, context as otelContext, trace } from '@opentelemetry/api';
5
+ import type { RealtimeModelMetrics } from '../metrics/base.js';
6
+ import * as traceTypes from './trace_types.js';
7
+ import { tracer } from './traces.js';
8
+
9
+ export function recordException(span: Span, error: Error): void {
10
+ span.recordException(error);
11
+ span.setStatus({
12
+ code: SpanStatusCode.ERROR,
13
+ message: error.message,
14
+ });
15
+
16
+ // Set exception attributes for better visibility
17
+ // (in case the exception event is not rendered by the backend)
18
+ span.setAttributes({
19
+ [traceTypes.ATTR_EXCEPTION_TYPE]: error.constructor.name,
20
+ [traceTypes.ATTR_EXCEPTION_MESSAGE]: error.message,
21
+ [traceTypes.ATTR_EXCEPTION_TRACE]: error.stack || '',
22
+ });
23
+ }
24
+
25
+ export function recordRealtimeMetrics(span: Span, metrics: RealtimeModelMetrics): void {
26
+ const attrs: Record<string, string | number> = {
27
+ [traceTypes.ATTR_GEN_AI_REQUEST_MODEL]: metrics.label || 'unknown',
28
+ [traceTypes.ATTR_REALTIME_MODEL_METRICS]: JSON.stringify(metrics),
29
+ [traceTypes.ATTR_GEN_AI_USAGE_INPUT_TOKENS]: metrics.inputTokens,
30
+ [traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_TOKENS]: metrics.outputTokens,
31
+ [traceTypes.ATTR_GEN_AI_USAGE_INPUT_TEXT_TOKENS]: metrics.inputTokenDetails.textTokens,
32
+ [traceTypes.ATTR_GEN_AI_USAGE_INPUT_AUDIO_TOKENS]: metrics.inputTokenDetails.audioTokens,
33
+ [traceTypes.ATTR_GEN_AI_USAGE_INPUT_CACHED_TOKENS]: metrics.inputTokenDetails.cachedTokens,
34
+ [traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_TEXT_TOKENS]: metrics.outputTokenDetails.textTokens,
35
+ [traceTypes.ATTR_GEN_AI_USAGE_OUTPUT_AUDIO_TOKENS]: metrics.outputTokenDetails.audioTokens,
36
+ };
37
+
38
+ // Add LangFuse-specific completion start time if TTFT is available
39
+ if (metrics.ttftMs !== undefined && metrics.ttftMs !== -1) {
40
+ const completionStartTime = metrics.timestamp + metrics.ttftMs;
41
+ // Convert to UTC ISO string for LangFuse compatibility
42
+ const completionStartTimeUtc = new Date(completionStartTime).toISOString();
43
+ attrs[traceTypes.ATTR_LANGFUSE_COMPLETION_START_TIME] = completionStartTimeUtc;
44
+ }
45
+
46
+ if (span.isRecording()) {
47
+ span.setAttributes(attrs);
48
+ } else {
49
+ const currentContext = otelContext.active();
50
+ const spanContext = trace.setSpan(currentContext, span);
51
+
52
+ // Create a dedicated child span for orphaned metrics
53
+ tracer.getTracer().startActiveSpan('realtime_metrics', {}, spanContext, (child) => {
54
+ try {
55
+ child.setAttributes(attrs);
56
+ } finally {
57
+ child.end();
58
+ }
59
+ });
60
+ }
61
+ }
package/src/tts/tts.ts CHANGED
@@ -157,8 +157,10 @@ export abstract class SynthesizeStream
157
157
  }
158
158
 
159
159
  private async mainTask() {
160
+ // TODO(brian): PR3 - Add span wrapping: tracer.startActiveSpan('tts_request', ..., { endOnExit: false })
160
161
  for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
161
162
  try {
163
+ // TODO(brian): PR3 - Add span for retry attempts: tracer.startActiveSpan('tts_request_run', ...)
162
164
  return await this.run();
163
165
  } catch (error) {
164
166
  if (error instanceof APIError) {
@@ -385,8 +387,10 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
385
387
  }
386
388
 
387
389
  private async mainTask() {
390
+ // TODO(brian): PR3 - Add span wrapping: tracer.startActiveSpan('tts_request', ..., { endOnExit: false })
388
391
  for (let i = 0; i < this._connOptions.maxRetry + 1; i++) {
389
392
  try {
393
+ // TODO(brian): PR3 - Add span for retry attempts: tracer.startActiveSpan('tts_request_run', ...)
390
394
  return await this.run();
391
395
  } catch (error) {
392
396
  if (error instanceof APIError) {
package/src/utils.ts CHANGED
@@ -15,6 +15,23 @@ import { TransformStream, type TransformStreamDefaultController } from 'node:str
15
15
  import { v4 as uuidv4 } from 'uuid';
16
16
  import { log } from './log.js';
17
17
 
18
+ /**
19
+ * Recursively expands all nested properties of a type,
20
+ * resolving aliases so as to inspect the real shape in IDE.
21
+ */
22
+ // eslint-disable-next-line @typescript-eslint/ban-types
23
+ export type Expand<T> = T extends Function
24
+ ? T
25
+ : T extends object
26
+ ? T extends Array<infer U>
27
+ ? Array<Expand<U>>
28
+ : T extends Map<infer K, infer V>
29
+ ? Map<Expand<K>, Expand<V>>
30
+ : T extends Set<infer M>
31
+ ? Set<Expand<M>>
32
+ : { [K in keyof T]: Expand<T[K]> }
33
+ : T;
34
+
18
35
  /** Union of a single and a list of {@link AudioFrame}s */
19
36
  export type AudioBuffer = AudioFrame[] | AudioFrame;
20
37
 
@@ -59,6 +59,7 @@ export interface ModelSettings {
59
59
  }
60
60
 
61
61
  export interface AgentOptions<UserData> {
62
+ id?: string;
62
63
  instructions: string;
63
64
  chatCtx?: ChatContext;
64
65
  tools?: ToolContext<UserData>;
@@ -72,6 +73,7 @@ export interface AgentOptions<UserData> {
72
73
  }
73
74
 
74
75
  export class Agent<UserData = any> {
76
+ private _id: string;
75
77
  private turnDetection?: TurnDetectionMode;
76
78
  private _stt?: STT;
77
79
  private _vad?: VAD;
@@ -91,6 +93,7 @@ export class Agent<UserData = any> {
91
93
  _tools?: ToolContext<UserData>;
92
94
 
93
95
  constructor({
96
+ id,
94
97
  instructions,
95
98
  chatCtx,
96
99
  tools,
@@ -100,6 +103,21 @@ export class Agent<UserData = any> {
100
103
  llm,
101
104
  tts,
102
105
  }: AgentOptions<UserData>) {
106
+ if (id) {
107
+ this._id = id;
108
+ } else {
109
+ // Convert class name to snake_case
110
+ const className = this.constructor.name;
111
+ if (className === 'Agent') {
112
+ this._id = 'default_agent';
113
+ } else {
114
+ this._id = className
115
+ .replace(/([A-Z])/g, '_$1')
116
+ .toLowerCase()
117
+ .replace(/^_/, '');
118
+ }
119
+ }
120
+
103
121
  this._instructions = instructions;
104
122
  this._tools = { ...tools };
105
123
  this._chatCtx = chatCtx
@@ -152,6 +170,10 @@ export class Agent<UserData = any> {
152
170
  return new ReadonlyChatContext(this._chatCtx.items);
153
171
  }
154
172
 
173
+ get id(): string {
174
+ return this._id;
175
+ }
176
+
155
177
  get instructions(): string {
156
178
  return this._instructions;
157
179
  }
@@ -202,6 +202,8 @@ export class AgentActivity implements RecognitionHooks {
202
202
  }
203
203
 
204
204
  async start(): Promise<void> {
205
+ // TODO(brian): PR3 - Add span: startSpan = tracer.startSpan('start_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
206
+ // TODO(brian): PR3 - Wrap prewarm calls with trace.useSpan(startSpan, endOnExit: false)
205
207
  const unlock = await this.lock.lock();
206
208
  try {
207
209
  this.agent._agentActivity = this;
@@ -235,6 +237,14 @@ export class AgentActivity implements RecognitionHooks {
235
237
  } catch (error) {
236
238
  this.logger.error(error, 'failed to update the tools');
237
239
  }
240
+
241
+ if (!this.llm.capabilities.audioOutput && !this.tts && this.agentSession.output.audio) {
242
+ this.logger.error(
243
+ 'audio output is enabled but RealtimeModel has no audio modality ' +
244
+ 'and no TTS is set. Either enable audio modality in the RealtimeModel ' +
245
+ 'or set a TTS model.',
246
+ );
247
+ }
238
248
  } else if (this.llm instanceof LLM) {
239
249
  try {
240
250
  updateInstructions({
@@ -281,6 +291,7 @@ export class AgentActivity implements RecognitionHooks {
281
291
  this.started = true;
282
292
 
283
293
  this._mainTask = Task.from(({ signal }) => this.mainTask(signal));
294
+ // TODO(brian): PR3 - Wrap onEnter with tracer.startActiveSpan('on_enter', { attributes: { 'lk.agent_label': this.agent.label }, context: startSpan context })
284
295
  this.createSpeechTask({
285
296
  task: Task.from(() => this.agent.onEnter()),
286
297
  name: 'AgentActivity_onEnter',
@@ -625,11 +636,21 @@ export class AgentActivity implements RecognitionHooks {
625
636
  return;
626
637
  }
627
638
 
639
+ // Refactored interruption word count check:
640
+ // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
641
+ // - Apply check to all STT results: empty string, undefined, or any length
642
+ // - This ensures consistent behavior across all interruption scenarios
628
643
  if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
629
644
  const text = this.audioRecognition.currentTranscript;
630
-
631
645
  // TODO(shubhra): better word splitting for multi-language
632
- if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
646
+
647
+ // Normalize text: convert undefined/null to empty string for consistent word counting
648
+ const normalizedText = text ?? '';
649
+ const wordCount = splitWords(normalizedText, true).length;
650
+
651
+ // Only allow interruption if word count meets or exceeds minInterruptionWords
652
+ // This applies to all cases: empty strings, partial speech, and full speech
653
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
633
654
  return;
634
655
  }
635
656
  }
@@ -767,19 +788,30 @@ export class AgentActivity implements RecognitionHooks {
767
788
  return true;
768
789
  }
769
790
 
791
+ // Refactored interruption word count check for consistency with onVADInferenceDone:
792
+ // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
793
+ // - Use consistent word splitting logic with splitWords (matching onVADInferenceDone pattern)
770
794
  if (
771
795
  this.stt &&
772
796
  this.turnDetection !== 'manual' &&
773
797
  this._currentSpeech &&
774
798
  this._currentSpeech.allowInterruptions &&
775
799
  !this._currentSpeech.interrupted &&
776
- this.agentSession.options.minInterruptionWords > 0 &&
777
- info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
800
+ this.agentSession.options.minInterruptionWords > 0
778
801
  ) {
779
- // avoid interruption if the new_transcript is too short
780
- this.cancelPreemptiveGeneration();
781
- this.logger.info('skipping user input, new_transcript is too short');
782
- return false;
802
+ const wordCount = splitWords(info.newTranscript, true).length;
803
+ if (wordCount < this.agentSession.options.minInterruptionWords) {
804
+ // avoid interruption if the new_transcript contains fewer words than minInterruptionWords
805
+ this.cancelPreemptiveGeneration();
806
+ this.logger.info(
807
+ {
808
+ wordCount,
809
+ minInterruptionWords: this.agentSession.options.minInterruptionWords,
810
+ },
811
+ 'skipping user input, word count below minimum interruption threshold',
812
+ );
813
+ return false;
814
+ }
783
815
  }
784
816
 
785
817
  const oldTask = this._userTurnCompletedTask;
@@ -1222,6 +1254,7 @@ export class AgentActivity implements RecognitionHooks {
1222
1254
  }
1223
1255
  }
1224
1256
 
1257
+ // TODO(brian): PR3 - Wrap entire pipelineReplyTask() method with tracer.startActiveSpan('agent_turn')
1225
1258
  private async pipelineReplyTask(
1226
1259
  speechHandle: SpeechHandle,
1227
1260
  chatCtx: ChatContext,
@@ -1612,7 +1645,7 @@ export class AgentActivity implements RecognitionHooks {
1612
1645
 
1613
1646
  const readMessages = async (
1614
1647
  abortController: AbortController,
1615
- outputs: Array<[string, _TextOut | null, _AudioOut | null]>,
1648
+ outputs: Array<[string, _TextOut | null, _AudioOut | null, ('text' | 'audio')[] | undefined]>,
1616
1649
  ) => {
1617
1650
  replyAbortController.signal.addEventListener('abort', () => abortController.abort(), {
1618
1651
  once: true,
@@ -1627,7 +1660,25 @@ export class AgentActivity implements RecognitionHooks {
1627
1660
  );
1628
1661
  break;
1629
1662
  }
1630
- const trNodeResult = await this.agent.transcriptionNode(msg.textStream, modelSettings);
1663
+
1664
+ const msgModalities = msg.modalities ? await msg.modalities : undefined;
1665
+ let ttsTextInput: ReadableStream<string> | null = null;
1666
+ let trTextInput: ReadableStream<string>;
1667
+
1668
+ if (msgModalities && !msgModalities.includes('audio') && this.tts) {
1669
+ if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1670
+ this.logger.warn(
1671
+ 'text response received from realtime API, falling back to use a TTS model.',
1672
+ );
1673
+ }
1674
+ const [_ttsTextInput, _trTextInput] = msg.textStream.tee();
1675
+ ttsTextInput = _ttsTextInput;
1676
+ trTextInput = _trTextInput;
1677
+ } else {
1678
+ trTextInput = msg.textStream;
1679
+ }
1680
+
1681
+ const trNodeResult = await this.agent.transcriptionNode(trTextInput, modelSettings);
1631
1682
  let textOut: _TextOut | null = null;
1632
1683
  if (trNodeResult) {
1633
1684
  const [textForwardTask, _textOut] = performTextForwarding(
@@ -1638,30 +1689,51 @@ export class AgentActivity implements RecognitionHooks {
1638
1689
  forwardTasks.push(textForwardTask);
1639
1690
  textOut = _textOut;
1640
1691
  }
1692
+
1641
1693
  let audioOut: _AudioOut | null = null;
1642
1694
  if (audioOutput) {
1643
- const realtimeAudio = await this.agent.realtimeAudioOutputNode(
1644
- msg.audioStream,
1645
- modelSettings,
1646
- );
1647
- if (realtimeAudio) {
1695
+ let realtimeAudioResult: ReadableStream<AudioFrame> | null = null;
1696
+
1697
+ if (ttsTextInput) {
1698
+ const [ttsTask, ttsStream] = performTTSInference(
1699
+ (...args) => this.agent.ttsNode(...args),
1700
+ ttsTextInput,
1701
+ modelSettings,
1702
+ abortController,
1703
+ );
1704
+ tasks.push(ttsTask);
1705
+ realtimeAudioResult = ttsStream;
1706
+ } else if (msgModalities && msgModalities.includes('audio')) {
1707
+ realtimeAudioResult = await this.agent.realtimeAudioOutputNode(
1708
+ msg.audioStream,
1709
+ modelSettings,
1710
+ );
1711
+ } else if (this.llm instanceof RealtimeModel && this.llm.capabilities.audioOutput) {
1712
+ this.logger.error(
1713
+ 'Text message received from Realtime API with audio modality. ' +
1714
+ 'This usually happens when text chat context is synced to the API. ' +
1715
+ 'Try to add a TTS model as fallback or use text modality with TTS instead.',
1716
+ );
1717
+ } else {
1718
+ this.logger.warn(
1719
+ 'audio output is enabled but neither tts nor realtime audio is available',
1720
+ );
1721
+ }
1722
+
1723
+ if (realtimeAudioResult) {
1648
1724
  const [forwardTask, _audioOut] = performAudioForwarding(
1649
- realtimeAudio,
1725
+ realtimeAudioResult,
1650
1726
  audioOutput,
1651
1727
  abortController,
1652
1728
  );
1653
1729
  forwardTasks.push(forwardTask);
1654
1730
  audioOut = _audioOut;
1655
1731
  audioOut.firstFrameFut.await.finally(onFirstFrame);
1656
- } else {
1657
- this.logger.warn(
1658
- 'audio output is enabled but neither tts nor realtime audio is available',
1659
- );
1660
1732
  }
1661
1733
  } else if (textOut) {
1662
1734
  textOut.firstTextFut.await.finally(onFirstFrame);
1663
1735
  }
1664
- outputs.push([msg.messageId, textOut, audioOut]);
1736
+ outputs.push([msg.messageId, textOut, audioOut, msgModalities]);
1665
1737
  }
1666
1738
  await waitFor(forwardTasks);
1667
1739
  } catch (error) {
@@ -1671,7 +1743,9 @@ export class AgentActivity implements RecognitionHooks {
1671
1743
  }
1672
1744
  };
1673
1745
 
1674
- const messageOutputs: Array<[string, _TextOut | null, _AudioOut | null]> = [];
1746
+ const messageOutputs: Array<
1747
+ [string, _TextOut | null, _AudioOut | null, ('text' | 'audio')[] | undefined]
1748
+ > = [];
1675
1749
  const tasks = [
1676
1750
  Task.from(
1677
1751
  (controller) => readMessages(controller, messageOutputs),
@@ -1750,7 +1824,7 @@ export class AgentActivity implements RecognitionHooks {
1750
1824
 
1751
1825
  if (messageOutputs.length > 0) {
1752
1826
  // there should be only one message
1753
- const [msgId, textOut, audioOut] = messageOutputs[0]!;
1827
+ const [msgId, textOut, audioOut, msgModalities] = messageOutputs[0]!;
1754
1828
  let forwardedText = textOut?.text || '';
1755
1829
 
1756
1830
  if (audioOutput) {
@@ -1775,6 +1849,8 @@ export class AgentActivity implements RecognitionHooks {
1775
1849
  this.realtimeSession.truncate({
1776
1850
  messageId: msgId,
1777
1851
  audioEndMs: Math.floor(playbackPosition),
1852
+ modalities: msgModalities,
1853
+ audioTranscript: forwardedText,
1778
1854
  });
1779
1855
  }
1780
1856
 
@@ -1805,7 +1881,7 @@ export class AgentActivity implements RecognitionHooks {
1805
1881
 
1806
1882
  if (messageOutputs.length > 0) {
1807
1883
  // there should be only one message
1808
- const [msgId, textOut, _] = messageOutputs[0]!;
1884
+ const [msgId, textOut, _, __] = messageOutputs[0]!;
1809
1885
  const message = ChatMessage.create({
1810
1886
  role: 'assistant',
1811
1887
  content: textOut?.text || '',
@@ -2020,12 +2096,14 @@ export class AgentActivity implements RecognitionHooks {
2020
2096
  this.wakeupMainTask();
2021
2097
  }
2022
2098
 
2099
+ // TODO(brian): PR3 - Wrap entire drain() method with tracer.startActiveSpan('drain_agent_activity', { attributes: { 'lk.agent_label': this.agent.label } })
2023
2100
  async drain(): Promise<void> {
2024
2101
  const unlock = await this.lock.lock();
2025
2102
  try {
2026
2103
  if (this._draining) return;
2027
2104
 
2028
2105
  this.cancelPreemptiveGeneration();
2106
+ // TODO(brian): PR3 - Wrap onExit with tracer.startActiveSpan('on_exit', { attributes: { 'lk.agent_label': this.agent.label } })
2029
2107
  this.createSpeechTask({
2030
2108
  task: Task.from(() => this.agent.onExit()),
2031
2109
  name: 'AgentActivity_onExit',