@livekit/agents 1.0.45 → 1.0.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (225) hide show
  1. package/dist/cli.cjs +14 -20
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +14 -20
  5. package/dist/cli.js.map +1 -1
  6. package/dist/ipc/job_proc_lazy_main.cjs +14 -5
  7. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  8. package/dist/ipc/job_proc_lazy_main.js +14 -5
  9. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  10. package/dist/llm/chat_context.cjs +19 -0
  11. package/dist/llm/chat_context.cjs.map +1 -1
  12. package/dist/llm/chat_context.d.cts +4 -0
  13. package/dist/llm/chat_context.d.ts +4 -0
  14. package/dist/llm/chat_context.d.ts.map +1 -1
  15. package/dist/llm/chat_context.js +19 -0
  16. package/dist/llm/chat_context.js.map +1 -1
  17. package/dist/llm/provider_format/index.cjs +2 -0
  18. package/dist/llm/provider_format/index.cjs.map +1 -1
  19. package/dist/llm/provider_format/index.d.cts +1 -1
  20. package/dist/llm/provider_format/index.d.ts +1 -1
  21. package/dist/llm/provider_format/index.d.ts.map +1 -1
  22. package/dist/llm/provider_format/index.js +6 -1
  23. package/dist/llm/provider_format/index.js.map +1 -1
  24. package/dist/llm/provider_format/openai.cjs +82 -2
  25. package/dist/llm/provider_format/openai.cjs.map +1 -1
  26. package/dist/llm/provider_format/openai.d.cts +1 -0
  27. package/dist/llm/provider_format/openai.d.ts +1 -0
  28. package/dist/llm/provider_format/openai.d.ts.map +1 -1
  29. package/dist/llm/provider_format/openai.js +80 -1
  30. package/dist/llm/provider_format/openai.js.map +1 -1
  31. package/dist/llm/provider_format/openai.test.cjs +326 -0
  32. package/dist/llm/provider_format/openai.test.cjs.map +1 -1
  33. package/dist/llm/provider_format/openai.test.js +327 -1
  34. package/dist/llm/provider_format/openai.test.js.map +1 -1
  35. package/dist/llm/provider_format/utils.cjs +4 -3
  36. package/dist/llm/provider_format/utils.cjs.map +1 -1
  37. package/dist/llm/provider_format/utils.d.ts.map +1 -1
  38. package/dist/llm/provider_format/utils.js +4 -3
  39. package/dist/llm/provider_format/utils.js.map +1 -1
  40. package/dist/llm/realtime.cjs.map +1 -1
  41. package/dist/llm/realtime.d.cts +1 -0
  42. package/dist/llm/realtime.d.ts +1 -0
  43. package/dist/llm/realtime.d.ts.map +1 -1
  44. package/dist/llm/realtime.js.map +1 -1
  45. package/dist/log.cjs +5 -2
  46. package/dist/log.cjs.map +1 -1
  47. package/dist/log.d.ts.map +1 -1
  48. package/dist/log.js +5 -2
  49. package/dist/log.js.map +1 -1
  50. package/dist/stream/deferred_stream.cjs +15 -6
  51. package/dist/stream/deferred_stream.cjs.map +1 -1
  52. package/dist/stream/deferred_stream.d.ts.map +1 -1
  53. package/dist/stream/deferred_stream.js +15 -6
  54. package/dist/stream/deferred_stream.js.map +1 -1
  55. package/dist/stream/index.cjs +3 -0
  56. package/dist/stream/index.cjs.map +1 -1
  57. package/dist/stream/index.d.cts +1 -0
  58. package/dist/stream/index.d.ts +1 -0
  59. package/dist/stream/index.d.ts.map +1 -1
  60. package/dist/stream/index.js +2 -0
  61. package/dist/stream/index.js.map +1 -1
  62. package/dist/stream/multi_input_stream.cjs +139 -0
  63. package/dist/stream/multi_input_stream.cjs.map +1 -0
  64. package/dist/stream/multi_input_stream.d.cts +55 -0
  65. package/dist/stream/multi_input_stream.d.ts +55 -0
  66. package/dist/stream/multi_input_stream.d.ts.map +1 -0
  67. package/dist/stream/multi_input_stream.js +115 -0
  68. package/dist/stream/multi_input_stream.js.map +1 -0
  69. package/dist/stream/multi_input_stream.test.cjs +340 -0
  70. package/dist/stream/multi_input_stream.test.cjs.map +1 -0
  71. package/dist/stream/multi_input_stream.test.js +339 -0
  72. package/dist/stream/multi_input_stream.test.js.map +1 -0
  73. package/dist/telemetry/trace_types.cjs +42 -0
  74. package/dist/telemetry/trace_types.cjs.map +1 -1
  75. package/dist/telemetry/trace_types.d.cts +14 -0
  76. package/dist/telemetry/trace_types.d.ts +14 -0
  77. package/dist/telemetry/trace_types.d.ts.map +1 -1
  78. package/dist/telemetry/trace_types.js +28 -0
  79. package/dist/telemetry/trace_types.js.map +1 -1
  80. package/dist/utils.cjs +44 -2
  81. package/dist/utils.cjs.map +1 -1
  82. package/dist/utils.d.cts +8 -0
  83. package/dist/utils.d.ts +8 -0
  84. package/dist/utils.d.ts.map +1 -1
  85. package/dist/utils.js +44 -2
  86. package/dist/utils.js.map +1 -1
  87. package/dist/utils.test.cjs +71 -0
  88. package/dist/utils.test.cjs.map +1 -1
  89. package/dist/utils.test.js +71 -0
  90. package/dist/utils.test.js.map +1 -1
  91. package/dist/version.cjs +1 -1
  92. package/dist/version.cjs.map +1 -1
  93. package/dist/version.d.cts +1 -1
  94. package/dist/version.d.ts +1 -1
  95. package/dist/version.d.ts.map +1 -1
  96. package/dist/version.js +1 -1
  97. package/dist/version.js.map +1 -1
  98. package/dist/voice/agent.cjs +144 -12
  99. package/dist/voice/agent.cjs.map +1 -1
  100. package/dist/voice/agent.d.cts +29 -4
  101. package/dist/voice/agent.d.ts +29 -4
  102. package/dist/voice/agent.d.ts.map +1 -1
  103. package/dist/voice/agent.js +140 -11
  104. package/dist/voice/agent.js.map +1 -1
  105. package/dist/voice/agent.test.cjs +120 -0
  106. package/dist/voice/agent.test.cjs.map +1 -1
  107. package/dist/voice/agent.test.js +122 -2
  108. package/dist/voice/agent.test.js.map +1 -1
  109. package/dist/voice/agent_activity.cjs +402 -292
  110. package/dist/voice/agent_activity.cjs.map +1 -1
  111. package/dist/voice/agent_activity.d.cts +35 -7
  112. package/dist/voice/agent_activity.d.ts +35 -7
  113. package/dist/voice/agent_activity.d.ts.map +1 -1
  114. package/dist/voice/agent_activity.js +402 -287
  115. package/dist/voice/agent_activity.js.map +1 -1
  116. package/dist/voice/agent_session.cjs +156 -44
  117. package/dist/voice/agent_session.cjs.map +1 -1
  118. package/dist/voice/agent_session.d.cts +22 -9
  119. package/dist/voice/agent_session.d.ts +22 -9
  120. package/dist/voice/agent_session.d.ts.map +1 -1
  121. package/dist/voice/agent_session.js +156 -44
  122. package/dist/voice/agent_session.js.map +1 -1
  123. package/dist/voice/audio_recognition.cjs +89 -36
  124. package/dist/voice/audio_recognition.cjs.map +1 -1
  125. package/dist/voice/audio_recognition.d.cts +22 -1
  126. package/dist/voice/audio_recognition.d.ts +22 -1
  127. package/dist/voice/audio_recognition.d.ts.map +1 -1
  128. package/dist/voice/audio_recognition.js +93 -36
  129. package/dist/voice/audio_recognition.js.map +1 -1
  130. package/dist/voice/audio_recognition_span.test.cjs +233 -0
  131. package/dist/voice/audio_recognition_span.test.cjs.map +1 -0
  132. package/dist/voice/audio_recognition_span.test.js +232 -0
  133. package/dist/voice/audio_recognition_span.test.js.map +1 -0
  134. package/dist/voice/generation.cjs +39 -19
  135. package/dist/voice/generation.cjs.map +1 -1
  136. package/dist/voice/generation.d.ts.map +1 -1
  137. package/dist/voice/generation.js +44 -20
  138. package/dist/voice/generation.js.map +1 -1
  139. package/dist/voice/index.cjs +2 -0
  140. package/dist/voice/index.cjs.map +1 -1
  141. package/dist/voice/index.d.cts +1 -1
  142. package/dist/voice/index.d.ts +1 -1
  143. package/dist/voice/index.d.ts.map +1 -1
  144. package/dist/voice/index.js +2 -1
  145. package/dist/voice/index.js.map +1 -1
  146. package/dist/voice/io.cjs +6 -3
  147. package/dist/voice/io.cjs.map +1 -1
  148. package/dist/voice/io.d.cts +3 -2
  149. package/dist/voice/io.d.ts +3 -2
  150. package/dist/voice/io.d.ts.map +1 -1
  151. package/dist/voice/io.js +6 -3
  152. package/dist/voice/io.js.map +1 -1
  153. package/dist/voice/recorder_io/recorder_io.cjs +3 -1
  154. package/dist/voice/recorder_io/recorder_io.cjs.map +1 -1
  155. package/dist/voice/recorder_io/recorder_io.d.ts.map +1 -1
  156. package/dist/voice/recorder_io/recorder_io.js +3 -1
  157. package/dist/voice/recorder_io/recorder_io.js.map +1 -1
  158. package/dist/voice/room_io/_input.cjs +17 -17
  159. package/dist/voice/room_io/_input.cjs.map +1 -1
  160. package/dist/voice/room_io/_input.d.cts +2 -2
  161. package/dist/voice/room_io/_input.d.ts +2 -2
  162. package/dist/voice/room_io/_input.d.ts.map +1 -1
  163. package/dist/voice/room_io/_input.js +7 -6
  164. package/dist/voice/room_io/_input.js.map +1 -1
  165. package/dist/voice/room_io/room_io.cjs +9 -0
  166. package/dist/voice/room_io/room_io.cjs.map +1 -1
  167. package/dist/voice/room_io/room_io.d.cts +3 -1
  168. package/dist/voice/room_io/room_io.d.ts +3 -1
  169. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  170. package/dist/voice/room_io/room_io.js +9 -0
  171. package/dist/voice/room_io/room_io.js.map +1 -1
  172. package/dist/voice/speech_handle.cjs +7 -1
  173. package/dist/voice/speech_handle.cjs.map +1 -1
  174. package/dist/voice/speech_handle.d.cts +2 -0
  175. package/dist/voice/speech_handle.d.ts +2 -0
  176. package/dist/voice/speech_handle.d.ts.map +1 -1
  177. package/dist/voice/speech_handle.js +8 -2
  178. package/dist/voice/speech_handle.js.map +1 -1
  179. package/dist/voice/testing/run_result.cjs +66 -15
  180. package/dist/voice/testing/run_result.cjs.map +1 -1
  181. package/dist/voice/testing/run_result.d.cts +14 -3
  182. package/dist/voice/testing/run_result.d.ts +14 -3
  183. package/dist/voice/testing/run_result.d.ts.map +1 -1
  184. package/dist/voice/testing/run_result.js +66 -15
  185. package/dist/voice/testing/run_result.js.map +1 -1
  186. package/dist/voice/utils.cjs +47 -0
  187. package/dist/voice/utils.cjs.map +1 -0
  188. package/dist/voice/utils.d.cts +4 -0
  189. package/dist/voice/utils.d.ts +4 -0
  190. package/dist/voice/utils.d.ts.map +1 -0
  191. package/dist/voice/utils.js +23 -0
  192. package/dist/voice/utils.js.map +1 -0
  193. package/package.json +1 -1
  194. package/src/cli.ts +20 -33
  195. package/src/ipc/job_proc_lazy_main.ts +16 -5
  196. package/src/llm/chat_context.ts +35 -0
  197. package/src/llm/provider_format/index.ts +7 -2
  198. package/src/llm/provider_format/openai.test.ts +385 -1
  199. package/src/llm/provider_format/openai.ts +103 -0
  200. package/src/llm/provider_format/utils.ts +6 -4
  201. package/src/llm/realtime.ts +1 -0
  202. package/src/log.ts +5 -2
  203. package/src/stream/deferred_stream.ts +17 -6
  204. package/src/stream/index.ts +1 -0
  205. package/src/stream/multi_input_stream.test.ts +540 -0
  206. package/src/stream/multi_input_stream.ts +172 -0
  207. package/src/telemetry/trace_types.ts +18 -0
  208. package/src/utils.test.ts +87 -0
  209. package/src/utils.ts +52 -2
  210. package/src/version.ts +1 -1
  211. package/src/voice/agent.test.ts +140 -2
  212. package/src/voice/agent.ts +189 -10
  213. package/src/voice/agent_activity.ts +449 -286
  214. package/src/voice/agent_session.ts +195 -51
  215. package/src/voice/audio_recognition.ts +118 -38
  216. package/src/voice/audio_recognition_span.test.ts +261 -0
  217. package/src/voice/generation.ts +52 -23
  218. package/src/voice/index.ts +1 -1
  219. package/src/voice/io.ts +7 -4
  220. package/src/voice/recorder_io/recorder_io.ts +2 -1
  221. package/src/voice/room_io/_input.ts +11 -7
  222. package/src/voice/room_io/room_io.ts +12 -0
  223. package/src/voice/speech_handle.ts +9 -2
  224. package/src/voice/testing/run_result.ts +81 -23
  225. package/src/voice/utils.ts +29 -0
@@ -0,0 +1,233 @@
1
+ "use strict";
2
+ var import_rtc_node = require("@livekit/rtc-node");
3
+ var import_sdk_trace_base = require("@opentelemetry/sdk-trace-base");
4
+ var import_sdk_trace_node = require("@opentelemetry/sdk-trace-node");
5
+ var import_vitest = require("vitest");
6
+ var import_log = require("../log.cjs");
7
+ var import_stt = require("../stt/stt.cjs");
8
+ var import_telemetry = require("../telemetry/index.cjs");
9
+ var import_vad = require("../vad.cjs");
10
+ var import_audio_recognition = require("./audio_recognition.cjs");
11
+ function setupInMemoryTracing() {
12
+ const exporter = new import_sdk_trace_base.InMemorySpanExporter();
13
+ const provider = new import_sdk_trace_node.NodeTracerProvider();
14
+ provider.addSpanProcessor(new import_sdk_trace_base.SimpleSpanProcessor(exporter));
15
+ provider.register();
16
+ (0, import_telemetry.setTracerProvider)(provider);
17
+ return { exporter };
18
+ }
19
+ function spanByName(spans, name) {
20
+ return spans.find((s) => s.name === name);
21
+ }
22
+ class FakeVADStream extends Object {
23
+ // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
24
+ // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
25
+ events;
26
+ idx = 0;
27
+ constructor(events) {
28
+ super();
29
+ this.events = events;
30
+ }
31
+ updateInputStream() {
32
+ }
33
+ detachInputStream() {
34
+ }
35
+ close() {
36
+ }
37
+ [Symbol.asyncIterator]() {
38
+ return this;
39
+ }
40
+ async next() {
41
+ if (this.idx >= this.events.length) {
42
+ return { done: true, value: void 0 };
43
+ }
44
+ const value = this.events[this.idx++];
45
+ return { done: false, value };
46
+ }
47
+ }
48
+ class FakeVAD extends import_vad.VAD {
49
+ label = "fake-vad";
50
+ events;
51
+ constructor(events) {
52
+ super({ updateInterval: 1 });
53
+ this.events = events;
54
+ }
55
+ stream() {
56
+ return new FakeVADStream(this.events);
57
+ }
58
+ }
59
+ const alwaysTrueTurnDetector = {
60
+ supportsLanguage: async () => true,
61
+ unlikelyThreshold: async () => void 0,
62
+ predictEndOfTurn: async () => 1
63
+ };
64
+ (0, import_vitest.describe)("AudioRecognition user_turn span parity", () => {
65
+ (0, import_log.initializeLogger)({ pretty: false, level: "silent" });
66
+ (0, import_vitest.it)("creates user_turn and parents eou_detection under it (stt mode)", async () => {
67
+ const { exporter } = setupInMemoryTracing();
68
+ const hooks = {
69
+ onStartOfSpeech: import_vitest.vi.fn(),
70
+ onVADInferenceDone: import_vitest.vi.fn(),
71
+ onEndOfSpeech: import_vitest.vi.fn(),
72
+ onInterimTranscript: import_vitest.vi.fn(),
73
+ onFinalTranscript: import_vitest.vi.fn(),
74
+ onPreemptiveGeneration: import_vitest.vi.fn(),
75
+ retrieveChatCtx: () => ({
76
+ copy() {
77
+ return this;
78
+ },
79
+ addMessage() {
80
+ },
81
+ toJSON() {
82
+ return { items: [] };
83
+ }
84
+ }),
85
+ onEndOfTurn: import_vitest.vi.fn(async () => true)
86
+ };
87
+ const sttEvents = [
88
+ { type: import_stt.SpeechEventType.START_OF_SPEECH },
89
+ {
90
+ type: import_stt.SpeechEventType.FINAL_TRANSCRIPT,
91
+ alternatives: [
92
+ {
93
+ language: "en",
94
+ text: "hello",
95
+ startTime: 0,
96
+ endTime: 0,
97
+ confidence: 0.9
98
+ }
99
+ ]
100
+ },
101
+ { type: import_stt.SpeechEventType.END_OF_SPEECH }
102
+ ];
103
+ const sttNode = async () => new ReadableStream({
104
+ start(controller) {
105
+ for (const ev of sttEvents) controller.enqueue(ev);
106
+ controller.close();
107
+ }
108
+ });
109
+ const ar = new import_audio_recognition.AudioRecognition({
110
+ recognitionHooks: hooks,
111
+ stt: sttNode,
112
+ vad: void 0,
113
+ turnDetector: alwaysTrueTurnDetector,
114
+ turnDetectionMode: "stt",
115
+ minEndpointingDelay: 0,
116
+ maxEndpointingDelay: 0,
117
+ sttModel: "deepgram-nova2",
118
+ sttProvider: "deepgram",
119
+ getLinkedParticipant: () => ({ sid: "p1", identity: "bob", kind: import_rtc_node.ParticipantKind.AGENT })
120
+ });
121
+ await ar.start();
122
+ await new Promise((r) => setTimeout(r, 20));
123
+ await ar.close();
124
+ const spans = exporter.getFinishedSpans();
125
+ const userTurn = spanByName(spans, "user_turn");
126
+ const eou = spanByName(spans, "eou_detection");
127
+ (0, import_vitest.expect)(userTurn, "user_turn span missing").toBeTruthy();
128
+ (0, import_vitest.expect)(eou, "eou_detection span missing").toBeTruthy();
129
+ (0, import_vitest.expect)(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
130
+ (0, import_vitest.expect)(userTurn.attributes["lk.participant_id"]).toBe("p1");
131
+ (0, import_vitest.expect)(userTurn.attributes["lk.participant_identity"]).toBe("bob");
132
+ (0, import_vitest.expect)(userTurn.attributes["lk.participant_kind"]).toBe("AGENT");
133
+ (0, import_vitest.expect)(userTurn.attributes["gen_ai.request.model"]).toBe("deepgram-nova2");
134
+ (0, import_vitest.expect)(userTurn.attributes["gen_ai.provider.name"]).toBe("deepgram");
135
+ (0, import_vitest.expect)(userTurn.attributes["lk.user_transcript"]).toContain("hello");
136
+ (0, import_vitest.expect)(userTurn.attributes["lk.transcript_confidence"]).toBeGreaterThan(0);
137
+ });
138
+ (0, import_vitest.it)("creates user_turn from VAD startTime (vad mode) and keeps same parenting", async () => {
139
+ const { exporter } = setupInMemoryTracing();
140
+ const hooks = {
141
+ onStartOfSpeech: import_vitest.vi.fn(),
142
+ onVADInferenceDone: import_vitest.vi.fn(),
143
+ onEndOfSpeech: import_vitest.vi.fn(),
144
+ onInterimTranscript: import_vitest.vi.fn(),
145
+ onFinalTranscript: import_vitest.vi.fn(),
146
+ onPreemptiveGeneration: import_vitest.vi.fn(),
147
+ retrieveChatCtx: () => ({
148
+ copy() {
149
+ return this;
150
+ },
151
+ addMessage() {
152
+ },
153
+ toJSON() {
154
+ return { items: [] };
155
+ }
156
+ }),
157
+ onEndOfTurn: import_vitest.vi.fn(async () => true)
158
+ };
159
+ const now = Date.now();
160
+ const vadEvents = [
161
+ {
162
+ type: import_vad.VADEventType.START_OF_SPEECH,
163
+ samplesIndex: 0,
164
+ timestamp: now,
165
+ speechDuration: 100,
166
+ silenceDuration: 0,
167
+ frames: [],
168
+ probability: 0,
169
+ inferenceDuration: 0,
170
+ speaking: true,
171
+ rawAccumulatedSilence: 0,
172
+ rawAccumulatedSpeech: 0
173
+ },
174
+ {
175
+ type: import_vad.VADEventType.END_OF_SPEECH,
176
+ samplesIndex: 0,
177
+ timestamp: now + 200,
178
+ speechDuration: 100,
179
+ silenceDuration: 100,
180
+ frames: [],
181
+ probability: 0,
182
+ inferenceDuration: 0,
183
+ speaking: false,
184
+ rawAccumulatedSilence: 0,
185
+ rawAccumulatedSpeech: 0
186
+ }
187
+ ];
188
+ const sttEvents = [
189
+ {
190
+ type: import_stt.SpeechEventType.FINAL_TRANSCRIPT,
191
+ alternatives: [
192
+ {
193
+ language: "en",
194
+ text: "test",
195
+ startTime: 0,
196
+ endTime: 0,
197
+ confidence: 0.8
198
+ }
199
+ ]
200
+ }
201
+ ];
202
+ const sttNode = async () => new ReadableStream({
203
+ start(controller) {
204
+ for (const ev of sttEvents) controller.enqueue(ev);
205
+ controller.close();
206
+ }
207
+ });
208
+ const ar = new import_audio_recognition.AudioRecognition({
209
+ recognitionHooks: hooks,
210
+ stt: sttNode,
211
+ vad: new FakeVAD(vadEvents),
212
+ turnDetector: alwaysTrueTurnDetector,
213
+ turnDetectionMode: "vad",
214
+ minEndpointingDelay: 0,
215
+ maxEndpointingDelay: 0,
216
+ sttModel: "stt-model",
217
+ sttProvider: "stt-provider",
218
+ getLinkedParticipant: () => ({ sid: "p2", identity: "alice", kind: import_rtc_node.ParticipantKind.AGENT })
219
+ });
220
+ await ar.start();
221
+ await new Promise((r) => setTimeout(r, 20));
222
+ await ar.close();
223
+ const spans = exporter.getFinishedSpans();
224
+ const userTurn = spanByName(spans, "user_turn");
225
+ const eou = spanByName(spans, "eou_detection");
226
+ (0, import_vitest.expect)(userTurn).toBeTruthy();
227
+ (0, import_vitest.expect)(eou).toBeTruthy();
228
+ (0, import_vitest.expect)(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
229
+ (0, import_vitest.expect)(hooks.onStartOfSpeech).toHaveBeenCalled();
230
+ (0, import_vitest.expect)(hooks.onEndOfSpeech).toHaveBeenCalled();
231
+ });
232
+ });
233
+ //# sourceMappingURL=audio_recognition_span.test.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/voice/audio_recognition_span.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { ParticipantKind } from '@livekit/rtc-node';\nimport { InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';\nimport { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';\nimport { describe, expect, it, vi } from 'vitest';\nimport { initializeLogger } from '../log.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { setTracerProvider } from '../telemetry/index.js';\nimport { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';\nimport { AudioRecognition, type _TurnDetector } from './audio_recognition.js';\n\nfunction setupInMemoryTracing() {\n const exporter = new InMemorySpanExporter();\n const provider = new NodeTracerProvider();\n provider.addSpanProcessor(new SimpleSpanProcessor(exporter));\n provider.register();\n setTracerProvider(provider);\n return { exporter };\n}\n\nfunction spanByName(spans: any[], name: string) {\n return spans.find((s) => s.name === name);\n}\n\nclass FakeVADStream extends (Object as unknown as { new (): VADStream }) {\n // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output\n // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.\n private events: VADEvent[];\n private idx = 0;\n constructor(events: VADEvent[]) {\n super();\n this.events = events;\n }\n updateInputStream() {}\n detachInputStream() {}\n close() {}\n [Symbol.asyncIterator]() {\n return this;\n }\n async next(): Promise<IteratorResult<VADEvent>> {\n if (this.idx >= this.events.length) {\n return { done: true, value: undefined };\n }\n const value = this.events[this.idx++]!;\n return { done: false, value };\n }\n}\n\nclass FakeVAD extends VAD {\n label = 'fake-vad';\n private events: VADEvent[];\n constructor(events: VADEvent[]) {\n super({ updateInterval: 1 });\n this.events = events;\n }\n stream(): any {\n return new FakeVADStream(this.events);\n }\n}\n\nconst alwaysTrueTurnDetector: _TurnDetector = {\n supportsLanguage: async () => true,\n unlikelyThreshold: async () => undefined,\n predictEndOfTurn: async () => 1.0,\n};\n\ndescribe('AudioRecognition user_turn span parity', () => {\n initializeLogger({ pretty: false, level: 'silent' });\n\n it('creates user_turn and parents eou_detection under it (stt mode)', async () => {\n const { exporter } = setupInMemoryTracing();\n\n const hooks = {\n onStartOfSpeech: vi.fn(),\n onVADInferenceDone: vi.fn(),\n onEndOfSpeech: vi.fn(),\n onInterimTranscript: vi.fn(),\n onFinalTranscript: vi.fn(),\n onPreemptiveGeneration: vi.fn(),\n retrieveChatCtx: () =>\n ({\n copy() {\n return this;\n },\n addMessage() {},\n toJSON() {\n return { items: [] };\n },\n }) as any,\n onEndOfTurn: vi.fn(async () => true),\n };\n\n const sttEvents: SpeechEvent[] = [\n { type: SpeechEventType.START_OF_SPEECH },\n {\n type: SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n language: 'en',\n text: 'hello',\n startTime: 0,\n endTime: 0,\n confidence: 0.9,\n },\n ],\n },\n { type: SpeechEventType.END_OF_SPEECH },\n ];\n\n const sttNode = async () =>\n new ReadableStream<SpeechEvent>({\n start(controller) {\n for (const ev of sttEvents) controller.enqueue(ev);\n controller.close();\n },\n });\n\n const ar = new AudioRecognition({\n recognitionHooks: hooks as any,\n stt: sttNode as any,\n vad: undefined,\n turnDetector: alwaysTrueTurnDetector,\n turnDetectionMode: 'stt',\n minEndpointingDelay: 0,\n maxEndpointingDelay: 0,\n sttModel: 'deepgram-nova2',\n sttProvider: 'deepgram',\n getLinkedParticipant: () => ({ sid: 'p1', identity: 'bob', kind: ParticipantKind.AGENT }),\n });\n\n await ar.start();\n // allow background task to drain\n await new Promise((r) => setTimeout(r, 20));\n await ar.close();\n\n const spans = exporter.getFinishedSpans();\n const userTurn = spanByName(spans, 'user_turn');\n const eou = spanByName(spans, 'eou_detection');\n expect(userTurn, 'user_turn span missing').toBeTruthy();\n expect(eou, 'eou_detection span missing').toBeTruthy();\n\n expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);\n\n // creation-time attributes\n expect(userTurn.attributes['lk.participant_id']).toBe('p1');\n expect(userTurn.attributes['lk.participant_identity']).toBe('bob');\n expect(userTurn.attributes['lk.participant_kind']).toBe('AGENT');\n expect(userTurn.attributes['gen_ai.request.model']).toBe('deepgram-nova2');\n expect(userTurn.attributes['gen_ai.provider.name']).toBe('deepgram');\n\n // end-of-turn attributes\n expect(userTurn.attributes['lk.user_transcript']).toContain('hello');\n expect(userTurn.attributes['lk.transcript_confidence']).toBeGreaterThan(0);\n });\n\n it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {\n const { exporter } = setupInMemoryTracing();\n\n const hooks = {\n onStartOfSpeech: vi.fn(),\n onVADInferenceDone: vi.fn(),\n onEndOfSpeech: vi.fn(),\n onInterimTranscript: vi.fn(),\n onFinalTranscript: vi.fn(),\n onPreemptiveGeneration: vi.fn(),\n retrieveChatCtx: () =>\n ({\n copy() {\n return this;\n },\n addMessage() {},\n toJSON() {\n return { items: [] };\n },\n }) as any,\n onEndOfTurn: vi.fn(async () => true),\n };\n\n const now = Date.now();\n const vadEvents: VADEvent[] = [\n {\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: now,\n speechDuration: 100,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n },\n {\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: now + 200,\n speechDuration: 100,\n silenceDuration: 100,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n },\n ];\n\n const sttEvents: SpeechEvent[] = [\n {\n type: SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n language: 'en',\n text: 'test',\n startTime: 0,\n endTime: 0,\n confidence: 0.8,\n },\n ],\n },\n ];\n\n const sttNode = async () =>\n new ReadableStream<SpeechEvent>({\n start(controller) {\n for (const ev of sttEvents) controller.enqueue(ev);\n controller.close();\n },\n });\n\n const ar = new AudioRecognition({\n recognitionHooks: hooks as any,\n stt: sttNode as any,\n vad: new FakeVAD(vadEvents) as any,\n turnDetector: alwaysTrueTurnDetector,\n turnDetectionMode: 'vad',\n minEndpointingDelay: 0,\n maxEndpointingDelay: 0,\n sttModel: 'stt-model',\n sttProvider: 'stt-provider',\n getLinkedParticipant: () => ({ sid: 'p2', identity: 'alice', kind: ParticipantKind.AGENT }),\n });\n\n await ar.start();\n await new Promise((r) => setTimeout(r, 20));\n await ar.close();\n\n const spans = exporter.getFinishedSpans();\n const userTurn = spanByName(spans, 'user_turn');\n const eou = spanByName(spans, 'eou_detection');\n expect(userTurn).toBeTruthy();\n expect(eou).toBeTruthy();\n expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);\n\n expect(hooks.onStartOfSpeech).toHaveBeenCalled();\n expect(hooks.onEndOfSpeech).toHaveBeenCalled();\n });\n});\n"],"mappings":";AAGA,sBAAgC;AAChC,4BAA0D;AAC1D,4BAAmC;AACnC,oBAAyC;AACzC,iBAAiC;AACjC,iBAAkD;AAClD,uBAAkC;AAClC,iBAAiE;AACjE,+BAAqD;AAErD,SAAS,uBAAuB;AAC9B,QAAM,WAAW,IAAI,2CAAqB;AAC1C,QAAM,WAAW,IAAI,yCAAmB;AACxC,WAAS,iBAAiB,IAAI,0CAAoB,QAAQ,CAAC;AAC3D,WAAS,SAAS;AAClB,0CAAkB,QAAQ;AAC1B,SAAO,EAAE,SAAS;AACpB;AAEA,SAAS,WAAW,OAAc,MAAc;AAC9C,SAAO,MAAM,KAAK,CAAC,MAAM,EAAE,SAAS,IAAI;AAC1C;AAEA,MAAM,sBAAuB,OAA4C;AAAA;AAAA;AAAA,EAG/D;AAAA,EACA,MAAM;AAAA,EACd,YAAY,QAAoB;AAC9B,UAAM;AACN,SAAK,SAAS;AAAA,EAChB;AAAA,EACA,oBAAoB;AAAA,EAAC;AAAA,EACrB,oBAAoB;AAAA,EAAC;AAAA,EACrB,QAAQ;AAAA,EAAC;AAAA,EACT,CAAC,OAAO,aAAa,IAAI;AACvB,WAAO;AAAA,EACT;AAAA,EACA,MAAM,OAA0C;AAC9C,QAAI,KAAK,OAAO,KAAK,OAAO,QAAQ;AAClC,aAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,IACxC;AACA,UAAM,QAAQ,KAAK,OAAO,KAAK,KAAK;AACpC,WAAO,EAAE,MAAM,OAAO,MAAM;AAAA,EAC9B;AACF;AAEA,MAAM,gBAAgB,eAAI;AAAA,EACxB,QAAQ;AAAA,EACA;AAAA,EACR,YAAY,QAAoB;AAC9B,UAAM,EAAE,gBAAgB,EAAE,CAAC;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EACA,SAAc;AACZ,WAAO,IAAI,cAAc,KAAK,MAAM;AAAA,EACtC;AACF;AAEA,MAAM,yBAAwC;AAAA,EAC5C,kBAAkB,YAAY;AAAA,EAC9B,mBAAmB,YAAY;AAAA,EAC/B,kBAAkB,YAAY;AAChC;AAAA,IAEA,wBAAS,0CAA0C,MAAM;AACvD,mCAAiB,EAAE,QAAQ,OAAO,OAAO,SAAS,CAAC;AAEnD,wBAAG,mEAAmE,YAAY;AAChF,UAAM,EAAE,SAAS,IAAI,qBAAqB;AAE1C,UAAM,QAAQ;AAAA,MACZ,iBAAiB,iBAAG,GAAG;AAAA,MACvB,oBAAoB,iBAAG,GAAG;AAAA,MAC1B,eAAe,iBAAG,GAAG;AAAA,MACrB,qBAAqB,iBAAG,GAAG;AAAA,MAC3B,mBAAmB,iBAAG,GAAG;AAAA,MACzB,wBAAwB,iBAAG,GAAG;AAAA,MAC9B,iBAAiB,OACd;AAAA,QACC,OAAO;AACL,iBAAO;AAAA,QACT;AAAA,QACA,aAAa;AAAA,QAAC;AAAA,QACd,SAAS;AACP,iBAAO,EAAE,OAAO,CAAC,EAAE;AAAA,QACrB;AAAA,MACF;AAAA,MACF,aAAa,iBAAG,GAAG,YAAY,IAAI;AAAA,IACrC;AAEA,UAAM,YAA2B;AAAA,MAC/B,EAAE,MAAM,2BAAgB,gBAAgB;AAAA,MACxC;AAAA,QACE,MAAM,2BAAgB;AAAA,QACtB,cAAc;AAAA,UACZ;AAAA,YACE,UAAU;AAAA,YACV,MAAM;AAAA,YACN,WAAW;AAAA,YACX,SAAS;AAAA,YACT,YAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,MAAM,2BAAgB,cAAc;AAAA,IACxC;AAEA,UAAM,UAAU,YACd,IAAI,eAA4B;AAAA,MAC9B,MAAM,YAAY;AAChB,mBAAW,MAAM,UAAW,YAAW,QAAQ,EAAE;AACjD,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAEH,UAAM,KAAK,IAAI,0CAAiB;AAAA,MAC9B,kBAAkB;AAAA,MAClB,KAAK;AAAA,MACL,KAAK;AAAA,MACL,cAAc;AAAA,MACd,mBAAmB;AAAA,MACnB,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,MACrB,UAAU;AAAA,MACV,aAAa;AAAA,MACb,sBAAsB,OAAO,EAAE,KAAK,MAAM,UAAU,OAAO,MAAM,gCAAgB,MAAM;AAAA,IACzF,CAAC;AAED,UAAM,GAAG,MAAM;AAEf,UAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,EAAE,CAAC;AAC1C,UAAM,GAAG,MAAM;AAEf,UAAM,QAAQ,SAAS,iBAAiB;AACxC,UAAM,WAAW,WAAW,OAAO,WAAW;AAC9C,UAAM,MAAM,WAAW,OAAO,eAAe;AAC7C,8BAAO,UAAU,wBAAwB,EAAE,WAAW;AACtD,8BAAO,KAAK,4BAA4B,EAAE,WAAW;AAErD,8BAAO,IAAI,YAAY,EAAE,KAAK,SAAS,YAAY,EAAE,MAAM;AAG3D,8BAAO,SAAS,WAAW,mBAAmB,CAAC,EAAE,KAAK,IAAI;AAC1D,8BAAO,SAAS,WAAW,yBAAyB,CAAC,EAAE,KAAK,KAAK;AACjE,8BAAO,SAAS,WAAW,qBAAqB,CAAC,EAAE,KAAK,OAAO;AAC/D,8BAAO,SAAS,WAAW,sBAAsB,CAAC,EAAE,KAAK,gBAAgB;AACzE,8BAAO,SAAS,WAAW,sBAAsB,CAAC,EAAE,KAAK,UAAU;AAGnE,8BAAO,SAAS,WAAW,oBAAoB,CAAC,EAAE,UAAU,OAAO;AACnE,8BAAO,SAAS,WAAW,0BAA0B,CAAC,EAAE,gBAAgB,CAAC;AAAA,EAC3E,CAAC;AAED,wBAAG,4EAA4E,YAAY;AACzF,UAAM,EAAE,SAAS,IAAI,qBAAqB;AAE1C,UAAM,QAAQ;AAAA,MACZ,iBAAiB,iBAAG,GAAG;AAAA,MACvB,oBAAoB,iBAAG,GAAG;AAAA,MAC1B,eAAe,iBAAG,GAAG;AAAA,MACrB,qBAAqB,iBAAG,GAAG;AAAA,MAC3B,mBAAmB,iBAAG,GAAG;AAAA,MACzB,wBAAwB,iBAAG,GAAG;AAAA,MAC9B,iBAAiB,OACd;AAAA,QACC,OAAO;AACL,iBAAO;AAAA,QACT;AAAA,QACA,aAAa;AAAA,QAAC;AAAA,QACd,SAAS;AACP,iBAAO,EAAE,OAAO,CAAC,EAAE;AAAA,QACrB;AAAA,MACF;AAAA,MACF,aAAa,iBAAG,GAAG,YAAY,IAAI;AAAA,IACrC;AAEA,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,YAAwB;AAAA,MAC5B;AAAA,QACE,MAAM,wBAAa;AAAA,QACnB,cAAc;AAAA,QACd,WAAW;AAAA,QACX,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,QAAQ,CAAC;AAAA,QACT,aAAa;AAAA,QACb,mBAAmB;AAAA,QACnB,UAAU;AAAA,QACV,uBAAuB;AAAA,QACvB,sBAAsB;AAAA,MACxB;AAAA,MACA;AAAA,QACE,MAAM,wBAAa;AAAA,QACnB,cAAc;AAAA,QACd,WAAW,MAAM;AAAA,QACjB,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,QAAQ,CAAC;AAAA,QACT,aAAa;AAAA,QACb,mBAAmB;AAAA,QACnB,UAAU;AAAA,QACV,uBAAuB;AAAA,QACvB,sBAAsB;AAAA,MACxB;AAAA,IACF;AAEA,UAAM,YAA2B;AAAA,MAC/B;AAAA,QACE,MAAM,2BAAgB;AAAA,QACtB,cAAc;AAAA,UACZ;AAAA,YACE,UAAU;AAAA,YACV,MAAM;AAAA,YACN,WAAW;AAAA,YACX,SAAS;AAAA,YACT,YAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,UAAU,YACd,IAAI,eAA4B;AAAA,MAC9B,MAAM,YAAY;AAChB,mBAAW,MAAM,UAAW,YAAW,QAAQ,EAAE;AACjD,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAEH,UAAM,KAAK,IAAI,0CAAiB;AAAA,MAC9B,kBAAkB;AAAA,MAClB,KAAK;AAAA,MACL,KAAK,IAAI,QAAQ,SAAS;AAAA,MAC1B,cAAc;AAAA,MACd,mBAAmB;AAAA,MACnB,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,MACrB,UAAU;AAAA,MACV,aAAa;AAAA,MACb,sBAAsB,OAAO,EAAE,KAAK,MAAM,UAAU,SAAS,MAAM,gCAAgB,MAAM;AAAA,IAC3F,CAAC;AAED,UAAM,GAAG,MAAM;AACf,UAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,EAAE,CAAC;AAC1C,UAAM,GAAG,MAAM;AAEf,UAAM,QAAQ,SAAS,iBAAiB;AACxC,UAAM,WAAW,WAAW,OAAO,WAAW;AAC9C,UAAM,MAAM,WAAW,OAAO,eAAe;AAC7C,8BAAO,QAAQ,EAAE,WAAW;AAC5B,8BAAO,GAAG,EAAE,WAAW;AACvB,8BAAO,IAAI,YAAY,EAAE,KAAK,SAAS,YAAY,EAAE,MAAM;AAE3D,8BAAO,MAAM,eAAe,EAAE,iBAAiB;AAC/C,8BAAO,MAAM,aAAa,EAAE,iBAAiB;AAAA,EAC/C,CAAC;AACH,CAAC;","names":[]}
@@ -0,0 +1,232 @@
1
+ import { ParticipantKind } from "@livekit/rtc-node";
2
+ import { InMemorySpanExporter, SimpleSpanProcessor } from "@opentelemetry/sdk-trace-base";
3
+ import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
4
+ import { describe, expect, it, vi } from "vitest";
5
+ import { initializeLogger } from "../log.js";
6
+ import { SpeechEventType } from "../stt/stt.js";
7
+ import { setTracerProvider } from "../telemetry/index.js";
8
+ import { VAD, VADEventType } from "../vad.js";
9
+ import { AudioRecognition } from "./audio_recognition.js";
10
+ function setupInMemoryTracing() {
11
+ const exporter = new InMemorySpanExporter();
12
+ const provider = new NodeTracerProvider();
13
+ provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
14
+ provider.register();
15
+ setTracerProvider(provider);
16
+ return { exporter };
17
+ }
18
+ function spanByName(spans, name) {
19
+ return spans.find((s) => s.name === name);
20
+ }
21
+ class FakeVADStream extends Object {
22
+ // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
23
+ // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
24
+ events;
25
+ idx = 0;
26
+ constructor(events) {
27
+ super();
28
+ this.events = events;
29
+ }
30
+ updateInputStream() {
31
+ }
32
+ detachInputStream() {
33
+ }
34
+ close() {
35
+ }
36
+ [Symbol.asyncIterator]() {
37
+ return this;
38
+ }
39
+ async next() {
40
+ if (this.idx >= this.events.length) {
41
+ return { done: true, value: void 0 };
42
+ }
43
+ const value = this.events[this.idx++];
44
+ return { done: false, value };
45
+ }
46
+ }
47
+ class FakeVAD extends VAD {
48
+ label = "fake-vad";
49
+ events;
50
+ constructor(events) {
51
+ super({ updateInterval: 1 });
52
+ this.events = events;
53
+ }
54
+ stream() {
55
+ return new FakeVADStream(this.events);
56
+ }
57
+ }
58
+ const alwaysTrueTurnDetector = {
59
+ supportsLanguage: async () => true,
60
+ unlikelyThreshold: async () => void 0,
61
+ predictEndOfTurn: async () => 1
62
+ };
63
+ describe("AudioRecognition user_turn span parity", () => {
64
+ initializeLogger({ pretty: false, level: "silent" });
65
+ it("creates user_turn and parents eou_detection under it (stt mode)", async () => {
66
+ const { exporter } = setupInMemoryTracing();
67
+ const hooks = {
68
+ onStartOfSpeech: vi.fn(),
69
+ onVADInferenceDone: vi.fn(),
70
+ onEndOfSpeech: vi.fn(),
71
+ onInterimTranscript: vi.fn(),
72
+ onFinalTranscript: vi.fn(),
73
+ onPreemptiveGeneration: vi.fn(),
74
+ retrieveChatCtx: () => ({
75
+ copy() {
76
+ return this;
77
+ },
78
+ addMessage() {
79
+ },
80
+ toJSON() {
81
+ return { items: [] };
82
+ }
83
+ }),
84
+ onEndOfTurn: vi.fn(async () => true)
85
+ };
86
+ const sttEvents = [
87
+ { type: SpeechEventType.START_OF_SPEECH },
88
+ {
89
+ type: SpeechEventType.FINAL_TRANSCRIPT,
90
+ alternatives: [
91
+ {
92
+ language: "en",
93
+ text: "hello",
94
+ startTime: 0,
95
+ endTime: 0,
96
+ confidence: 0.9
97
+ }
98
+ ]
99
+ },
100
+ { type: SpeechEventType.END_OF_SPEECH }
101
+ ];
102
+ const sttNode = async () => new ReadableStream({
103
+ start(controller) {
104
+ for (const ev of sttEvents) controller.enqueue(ev);
105
+ controller.close();
106
+ }
107
+ });
108
+ const ar = new AudioRecognition({
109
+ recognitionHooks: hooks,
110
+ stt: sttNode,
111
+ vad: void 0,
112
+ turnDetector: alwaysTrueTurnDetector,
113
+ turnDetectionMode: "stt",
114
+ minEndpointingDelay: 0,
115
+ maxEndpointingDelay: 0,
116
+ sttModel: "deepgram-nova2",
117
+ sttProvider: "deepgram",
118
+ getLinkedParticipant: () => ({ sid: "p1", identity: "bob", kind: ParticipantKind.AGENT })
119
+ });
120
+ await ar.start();
121
+ await new Promise((r) => setTimeout(r, 20));
122
+ await ar.close();
123
+ const spans = exporter.getFinishedSpans();
124
+ const userTurn = spanByName(spans, "user_turn");
125
+ const eou = spanByName(spans, "eou_detection");
126
+ expect(userTurn, "user_turn span missing").toBeTruthy();
127
+ expect(eou, "eou_detection span missing").toBeTruthy();
128
+ expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
129
+ expect(userTurn.attributes["lk.participant_id"]).toBe("p1");
130
+ expect(userTurn.attributes["lk.participant_identity"]).toBe("bob");
131
+ expect(userTurn.attributes["lk.participant_kind"]).toBe("AGENT");
132
+ expect(userTurn.attributes["gen_ai.request.model"]).toBe("deepgram-nova2");
133
+ expect(userTurn.attributes["gen_ai.provider.name"]).toBe("deepgram");
134
+ expect(userTurn.attributes["lk.user_transcript"]).toContain("hello");
135
+ expect(userTurn.attributes["lk.transcript_confidence"]).toBeGreaterThan(0);
136
+ });
137
+ it("creates user_turn from VAD startTime (vad mode) and keeps same parenting", async () => {
138
+ const { exporter } = setupInMemoryTracing();
139
+ const hooks = {
140
+ onStartOfSpeech: vi.fn(),
141
+ onVADInferenceDone: vi.fn(),
142
+ onEndOfSpeech: vi.fn(),
143
+ onInterimTranscript: vi.fn(),
144
+ onFinalTranscript: vi.fn(),
145
+ onPreemptiveGeneration: vi.fn(),
146
+ retrieveChatCtx: () => ({
147
+ copy() {
148
+ return this;
149
+ },
150
+ addMessage() {
151
+ },
152
+ toJSON() {
153
+ return { items: [] };
154
+ }
155
+ }),
156
+ onEndOfTurn: vi.fn(async () => true)
157
+ };
158
+ const now = Date.now();
159
+ const vadEvents = [
160
+ {
161
+ type: VADEventType.START_OF_SPEECH,
162
+ samplesIndex: 0,
163
+ timestamp: now,
164
+ speechDuration: 100,
165
+ silenceDuration: 0,
166
+ frames: [],
167
+ probability: 0,
168
+ inferenceDuration: 0,
169
+ speaking: true,
170
+ rawAccumulatedSilence: 0,
171
+ rawAccumulatedSpeech: 0
172
+ },
173
+ {
174
+ type: VADEventType.END_OF_SPEECH,
175
+ samplesIndex: 0,
176
+ timestamp: now + 200,
177
+ speechDuration: 100,
178
+ silenceDuration: 100,
179
+ frames: [],
180
+ probability: 0,
181
+ inferenceDuration: 0,
182
+ speaking: false,
183
+ rawAccumulatedSilence: 0,
184
+ rawAccumulatedSpeech: 0
185
+ }
186
+ ];
187
+ const sttEvents = [
188
+ {
189
+ type: SpeechEventType.FINAL_TRANSCRIPT,
190
+ alternatives: [
191
+ {
192
+ language: "en",
193
+ text: "test",
194
+ startTime: 0,
195
+ endTime: 0,
196
+ confidence: 0.8
197
+ }
198
+ ]
199
+ }
200
+ ];
201
+ const sttNode = async () => new ReadableStream({
202
+ start(controller) {
203
+ for (const ev of sttEvents) controller.enqueue(ev);
204
+ controller.close();
205
+ }
206
+ });
207
+ const ar = new AudioRecognition({
208
+ recognitionHooks: hooks,
209
+ stt: sttNode,
210
+ vad: new FakeVAD(vadEvents),
211
+ turnDetector: alwaysTrueTurnDetector,
212
+ turnDetectionMode: "vad",
213
+ minEndpointingDelay: 0,
214
+ maxEndpointingDelay: 0,
215
+ sttModel: "stt-model",
216
+ sttProvider: "stt-provider",
217
+ getLinkedParticipant: () => ({ sid: "p2", identity: "alice", kind: ParticipantKind.AGENT })
218
+ });
219
+ await ar.start();
220
+ await new Promise((r) => setTimeout(r, 20));
221
+ await ar.close();
222
+ const spans = exporter.getFinishedSpans();
223
+ const userTurn = spanByName(spans, "user_turn");
224
+ const eou = spanByName(spans, "eou_detection");
225
+ expect(userTurn).toBeTruthy();
226
+ expect(eou).toBeTruthy();
227
+ expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
228
+ expect(hooks.onStartOfSpeech).toHaveBeenCalled();
229
+ expect(hooks.onEndOfSpeech).toHaveBeenCalled();
230
+ });
231
+ });
232
+ //# sourceMappingURL=audio_recognition_span.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../../src/voice/audio_recognition_span.test.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2026 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport { ParticipantKind } from '@livekit/rtc-node';\nimport { InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';\nimport { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';\nimport { describe, expect, it, vi } from 'vitest';\nimport { initializeLogger } from '../log.js';\nimport { type SpeechEvent, SpeechEventType } from '../stt/stt.js';\nimport { setTracerProvider } from '../telemetry/index.js';\nimport { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';\nimport { AudioRecognition, type _TurnDetector } from './audio_recognition.js';\n\nfunction setupInMemoryTracing() {\n const exporter = new InMemorySpanExporter();\n const provider = new NodeTracerProvider();\n provider.addSpanProcessor(new SimpleSpanProcessor(exporter));\n provider.register();\n setTracerProvider(provider);\n return { exporter };\n}\n\nfunction spanByName(spans: any[], name: string) {\n return spans.find((s) => s.name === name);\n}\n\nclass FakeVADStream extends (Object as unknown as { new (): VADStream }) {\n // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output\n // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.\n private events: VADEvent[];\n private idx = 0;\n constructor(events: VADEvent[]) {\n super();\n this.events = events;\n }\n updateInputStream() {}\n detachInputStream() {}\n close() {}\n [Symbol.asyncIterator]() {\n return this;\n }\n async next(): Promise<IteratorResult<VADEvent>> {\n if (this.idx >= this.events.length) {\n return { done: true, value: undefined };\n }\n const value = this.events[this.idx++]!;\n return { done: false, value };\n }\n}\n\nclass FakeVAD extends VAD {\n label = 'fake-vad';\n private events: VADEvent[];\n constructor(events: VADEvent[]) {\n super({ updateInterval: 1 });\n this.events = events;\n }\n stream(): any {\n return new FakeVADStream(this.events);\n }\n}\n\nconst alwaysTrueTurnDetector: _TurnDetector = {\n supportsLanguage: async () => true,\n unlikelyThreshold: async () => undefined,\n predictEndOfTurn: async () => 1.0,\n};\n\ndescribe('AudioRecognition user_turn span parity', () => {\n initializeLogger({ pretty: false, level: 'silent' });\n\n it('creates user_turn and parents eou_detection under it (stt mode)', async () => {\n const { exporter } = setupInMemoryTracing();\n\n const hooks = {\n onStartOfSpeech: vi.fn(),\n onVADInferenceDone: vi.fn(),\n onEndOfSpeech: vi.fn(),\n onInterimTranscript: vi.fn(),\n onFinalTranscript: vi.fn(),\n onPreemptiveGeneration: vi.fn(),\n retrieveChatCtx: () =>\n ({\n copy() {\n return this;\n },\n addMessage() {},\n toJSON() {\n return { items: [] };\n },\n }) as any,\n onEndOfTurn: vi.fn(async () => true),\n };\n\n const sttEvents: SpeechEvent[] = [\n { type: SpeechEventType.START_OF_SPEECH },\n {\n type: SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n language: 'en',\n text: 'hello',\n startTime: 0,\n endTime: 0,\n confidence: 0.9,\n },\n ],\n },\n { type: SpeechEventType.END_OF_SPEECH },\n ];\n\n const sttNode = async () =>\n new ReadableStream<SpeechEvent>({\n start(controller) {\n for (const ev of sttEvents) controller.enqueue(ev);\n controller.close();\n },\n });\n\n const ar = new AudioRecognition({\n recognitionHooks: hooks as any,\n stt: sttNode as any,\n vad: undefined,\n turnDetector: alwaysTrueTurnDetector,\n turnDetectionMode: 'stt',\n minEndpointingDelay: 0,\n maxEndpointingDelay: 0,\n sttModel: 'deepgram-nova2',\n sttProvider: 'deepgram',\n getLinkedParticipant: () => ({ sid: 'p1', identity: 'bob', kind: ParticipantKind.AGENT }),\n });\n\n await ar.start();\n // allow background task to drain\n await new Promise((r) => setTimeout(r, 20));\n await ar.close();\n\n const spans = exporter.getFinishedSpans();\n const userTurn = spanByName(spans, 'user_turn');\n const eou = spanByName(spans, 'eou_detection');\n expect(userTurn, 'user_turn span missing').toBeTruthy();\n expect(eou, 'eou_detection span missing').toBeTruthy();\n\n expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);\n\n // creation-time attributes\n expect(userTurn.attributes['lk.participant_id']).toBe('p1');\n expect(userTurn.attributes['lk.participant_identity']).toBe('bob');\n expect(userTurn.attributes['lk.participant_kind']).toBe('AGENT');\n expect(userTurn.attributes['gen_ai.request.model']).toBe('deepgram-nova2');\n expect(userTurn.attributes['gen_ai.provider.name']).toBe('deepgram');\n\n // end-of-turn attributes\n expect(userTurn.attributes['lk.user_transcript']).toContain('hello');\n expect(userTurn.attributes['lk.transcript_confidence']).toBeGreaterThan(0);\n });\n\n it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {\n const { exporter } = setupInMemoryTracing();\n\n const hooks = {\n onStartOfSpeech: vi.fn(),\n onVADInferenceDone: vi.fn(),\n onEndOfSpeech: vi.fn(),\n onInterimTranscript: vi.fn(),\n onFinalTranscript: vi.fn(),\n onPreemptiveGeneration: vi.fn(),\n retrieveChatCtx: () =>\n ({\n copy() {\n return this;\n },\n addMessage() {},\n toJSON() {\n return { items: [] };\n },\n }) as any,\n onEndOfTurn: vi.fn(async () => true),\n };\n\n const now = Date.now();\n const vadEvents: VADEvent[] = [\n {\n type: VADEventType.START_OF_SPEECH,\n samplesIndex: 0,\n timestamp: now,\n speechDuration: 100,\n silenceDuration: 0,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: true,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n },\n {\n type: VADEventType.END_OF_SPEECH,\n samplesIndex: 0,\n timestamp: now + 200,\n speechDuration: 100,\n silenceDuration: 100,\n frames: [],\n probability: 0,\n inferenceDuration: 0,\n speaking: false,\n rawAccumulatedSilence: 0,\n rawAccumulatedSpeech: 0,\n },\n ];\n\n const sttEvents: SpeechEvent[] = [\n {\n type: SpeechEventType.FINAL_TRANSCRIPT,\n alternatives: [\n {\n language: 'en',\n text: 'test',\n startTime: 0,\n endTime: 0,\n confidence: 0.8,\n },\n ],\n },\n ];\n\n const sttNode = async () =>\n new ReadableStream<SpeechEvent>({\n start(controller) {\n for (const ev of sttEvents) controller.enqueue(ev);\n controller.close();\n },\n });\n\n const ar = new AudioRecognition({\n recognitionHooks: hooks as any,\n stt: sttNode as any,\n vad: new FakeVAD(vadEvents) as any,\n turnDetector: alwaysTrueTurnDetector,\n turnDetectionMode: 'vad',\n minEndpointingDelay: 0,\n maxEndpointingDelay: 0,\n sttModel: 'stt-model',\n sttProvider: 'stt-provider',\n getLinkedParticipant: () => ({ sid: 'p2', identity: 'alice', kind: ParticipantKind.AGENT }),\n });\n\n await ar.start();\n await new Promise((r) => setTimeout(r, 20));\n await ar.close();\n\n const spans = exporter.getFinishedSpans();\n const userTurn = spanByName(spans, 'user_turn');\n const eou = spanByName(spans, 'eou_detection');\n expect(userTurn).toBeTruthy();\n expect(eou).toBeTruthy();\n expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);\n\n expect(hooks.onStartOfSpeech).toHaveBeenCalled();\n expect(hooks.onEndOfSpeech).toHaveBeenCalled();\n });\n});\n"],"mappings":"AAGA,SAAS,uBAAuB;AAChC,SAAS,sBAAsB,2BAA2B;AAC1D,SAAS,0BAA0B;AACnC,SAAS,UAAU,QAAQ,IAAI,UAAU;AACzC,SAAS,wBAAwB;AACjC,SAA2B,uBAAuB;AAClD,SAAS,yBAAyB;AAClC,SAAS,KAAoB,oBAAoC;AACjE,SAAS,wBAA4C;AAErD,SAAS,uBAAuB;AAC9B,QAAM,WAAW,IAAI,qBAAqB;AAC1C,QAAM,WAAW,IAAI,mBAAmB;AACxC,WAAS,iBAAiB,IAAI,oBAAoB,QAAQ,CAAC;AAC3D,WAAS,SAAS;AAClB,oBAAkB,QAAQ;AAC1B,SAAO,EAAE,SAAS;AACpB;AAEA,SAAS,WAAW,OAAc,MAAc;AAC9C,SAAO,MAAM,KAAK,CAAC,MAAM,EAAE,SAAS,IAAI;AAC1C;AAEA,MAAM,sBAAuB,OAA4C;AAAA;AAAA;AAAA,EAG/D;AAAA,EACA,MAAM;AAAA,EACd,YAAY,QAAoB;AAC9B,UAAM;AACN,SAAK,SAAS;AAAA,EAChB;AAAA,EACA,oBAAoB;AAAA,EAAC;AAAA,EACrB,oBAAoB;AAAA,EAAC;AAAA,EACrB,QAAQ;AAAA,EAAC;AAAA,EACT,CAAC,OAAO,aAAa,IAAI;AACvB,WAAO;AAAA,EACT;AAAA,EACA,MAAM,OAA0C;AAC9C,QAAI,KAAK,OAAO,KAAK,OAAO,QAAQ;AAClC,aAAO,EAAE,MAAM,MAAM,OAAO,OAAU;AAAA,IACxC;AACA,UAAM,QAAQ,KAAK,OAAO,KAAK,KAAK;AACpC,WAAO,EAAE,MAAM,OAAO,MAAM;AAAA,EAC9B;AACF;AAEA,MAAM,gBAAgB,IAAI;AAAA,EACxB,QAAQ;AAAA,EACA;AAAA,EACR,YAAY,QAAoB;AAC9B,UAAM,EAAE,gBAAgB,EAAE,CAAC;AAC3B,SAAK,SAAS;AAAA,EAChB;AAAA,EACA,SAAc;AACZ,WAAO,IAAI,cAAc,KAAK,MAAM;AAAA,EACtC;AACF;AAEA,MAAM,yBAAwC;AAAA,EAC5C,kBAAkB,YAAY;AAAA,EAC9B,mBAAmB,YAAY;AAAA,EAC/B,kBAAkB,YAAY;AAChC;AAEA,SAAS,0CAA0C,MAAM;AACvD,mBAAiB,EAAE,QAAQ,OAAO,OAAO,SAAS,CAAC;AAEnD,KAAG,mEAAmE,YAAY;AAChF,UAAM,EAAE,SAAS,IAAI,qBAAqB;AAE1C,UAAM,QAAQ;AAAA,MACZ,iBAAiB,GAAG,GAAG;AAAA,MACvB,oBAAoB,GAAG,GAAG;AAAA,MAC1B,eAAe,GAAG,GAAG;AAAA,MACrB,qBAAqB,GAAG,GAAG;AAAA,MAC3B,mBAAmB,GAAG,GAAG;AAAA,MACzB,wBAAwB,GAAG,GAAG;AAAA,MAC9B,iBAAiB,OACd;AAAA,QACC,OAAO;AACL,iBAAO;AAAA,QACT;AAAA,QACA,aAAa;AAAA,QAAC;AAAA,QACd,SAAS;AACP,iBAAO,EAAE,OAAO,CAAC,EAAE;AAAA,QACrB;AAAA,MACF;AAAA,MACF,aAAa,GAAG,GAAG,YAAY,IAAI;AAAA,IACrC;AAEA,UAAM,YAA2B;AAAA,MAC/B,EAAE,MAAM,gBAAgB,gBAAgB;AAAA,MACxC;AAAA,QACE,MAAM,gBAAgB;AAAA,QACtB,cAAc;AAAA,UACZ;AAAA,YACE,UAAU;AAAA,YACV,MAAM;AAAA,YACN,WAAW;AAAA,YACX,SAAS;AAAA,YACT,YAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,MACA,EAAE,MAAM,gBAAgB,cAAc;AAAA,IACxC;AAEA,UAAM,UAAU,YACd,IAAI,eAA4B;AAAA,MAC9B,MAAM,YAAY;AAChB,mBAAW,MAAM,UAAW,YAAW,QAAQ,EAAE;AACjD,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAEH,UAAM,KAAK,IAAI,iBAAiB;AAAA,MAC9B,kBAAkB;AAAA,MAClB,KAAK;AAAA,MACL,KAAK;AAAA,MACL,cAAc;AAAA,MACd,mBAAmB;AAAA,MACnB,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,MACrB,UAAU;AAAA,MACV,aAAa;AAAA,MACb,sBAAsB,OAAO,EAAE,KAAK,MAAM,UAAU,OAAO,MAAM,gBAAgB,MAAM;AAAA,IACzF,CAAC;AAED,UAAM,GAAG,MAAM;AAEf,UAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,EAAE,CAAC;AAC1C,UAAM,GAAG,MAAM;AAEf,UAAM,QAAQ,SAAS,iBAAiB;AACxC,UAAM,WAAW,WAAW,OAAO,WAAW;AAC9C,UAAM,MAAM,WAAW,OAAO,eAAe;AAC7C,WAAO,UAAU,wBAAwB,EAAE,WAAW;AACtD,WAAO,KAAK,4BAA4B,EAAE,WAAW;AAErD,WAAO,IAAI,YAAY,EAAE,KAAK,SAAS,YAAY,EAAE,MAAM;AAG3D,WAAO,SAAS,WAAW,mBAAmB,CAAC,EAAE,KAAK,IAAI;AAC1D,WAAO,SAAS,WAAW,yBAAyB,CAAC,EAAE,KAAK,KAAK;AACjE,WAAO,SAAS,WAAW,qBAAqB,CAAC,EAAE,KAAK,OAAO;AAC/D,WAAO,SAAS,WAAW,sBAAsB,CAAC,EAAE,KAAK,gBAAgB;AACzE,WAAO,SAAS,WAAW,sBAAsB,CAAC,EAAE,KAAK,UAAU;AAGnE,WAAO,SAAS,WAAW,oBAAoB,CAAC,EAAE,UAAU,OAAO;AACnE,WAAO,SAAS,WAAW,0BAA0B,CAAC,EAAE,gBAAgB,CAAC;AAAA,EAC3E,CAAC;AAED,KAAG,4EAA4E,YAAY;AACzF,UAAM,EAAE,SAAS,IAAI,qBAAqB;AAE1C,UAAM,QAAQ;AAAA,MACZ,iBAAiB,GAAG,GAAG;AAAA,MACvB,oBAAoB,GAAG,GAAG;AAAA,MAC1B,eAAe,GAAG,GAAG;AAAA,MACrB,qBAAqB,GAAG,GAAG;AAAA,MAC3B,mBAAmB,GAAG,GAAG;AAAA,MACzB,wBAAwB,GAAG,GAAG;AAAA,MAC9B,iBAAiB,OACd;AAAA,QACC,OAAO;AACL,iBAAO;AAAA,QACT;AAAA,QACA,aAAa;AAAA,QAAC;AAAA,QACd,SAAS;AACP,iBAAO,EAAE,OAAO,CAAC,EAAE;AAAA,QACrB;AAAA,MACF;AAAA,MACF,aAAa,GAAG,GAAG,YAAY,IAAI;AAAA,IACrC;AAEA,UAAM,MAAM,KAAK,IAAI;AACrB,UAAM,YAAwB;AAAA,MAC5B;AAAA,QACE,MAAM,aAAa;AAAA,QACnB,cAAc;AAAA,QACd,WAAW;AAAA,QACX,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,QAAQ,CAAC;AAAA,QACT,aAAa;AAAA,QACb,mBAAmB;AAAA,QACnB,UAAU;AAAA,QACV,uBAAuB;AAAA,QACvB,sBAAsB;AAAA,MACxB;AAAA,MACA;AAAA,QACE,MAAM,aAAa;AAAA,QACnB,cAAc;AAAA,QACd,WAAW,MAAM;AAAA,QACjB,gBAAgB;AAAA,QAChB,iBAAiB;AAAA,QACjB,QAAQ,CAAC;AAAA,QACT,aAAa;AAAA,QACb,mBAAmB;AAAA,QACnB,UAAU;AAAA,QACV,uBAAuB;AAAA,QACvB,sBAAsB;AAAA,MACxB;AAAA,IACF;AAEA,UAAM,YAA2B;AAAA,MAC/B;AAAA,QACE,MAAM,gBAAgB;AAAA,QACtB,cAAc;AAAA,UACZ;AAAA,YACE,UAAU;AAAA,YACV,MAAM;AAAA,YACN,WAAW;AAAA,YACX,SAAS;AAAA,YACT,YAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,UAAM,UAAU,YACd,IAAI,eAA4B;AAAA,MAC9B,MAAM,YAAY;AAChB,mBAAW,MAAM,UAAW,YAAW,QAAQ,EAAE;AACjD,mBAAW,MAAM;AAAA,MACnB;AAAA,IACF,CAAC;AAEH,UAAM,KAAK,IAAI,iBAAiB;AAAA,MAC9B,kBAAkB;AAAA,MAClB,KAAK;AAAA,MACL,KAAK,IAAI,QAAQ,SAAS;AAAA,MAC1B,cAAc;AAAA,MACd,mBAAmB;AAAA,MACnB,qBAAqB;AAAA,MACrB,qBAAqB;AAAA,MACrB,UAAU;AAAA,MACV,aAAa;AAAA,MACb,sBAAsB,OAAO,EAAE,KAAK,MAAM,UAAU,SAAS,MAAM,gBAAgB,MAAM;AAAA,IAC3F,CAAC;AAED,UAAM,GAAG,MAAM;AACf,UAAM,IAAI,QAAQ,CAAC,MAAM,WAAW,GAAG,EAAE,CAAC;AAC1C,UAAM,GAAG,MAAM;AAEf,UAAM,QAAQ,SAAS,iBAAiB;AACxC,UAAM,WAAW,WAAW,OAAO,WAAW;AAC9C,UAAM,MAAM,WAAW,OAAO,eAAe;AAC7C,WAAO,QAAQ,EAAE,WAAW;AAC5B,WAAO,GAAG,EAAE,WAAW;AACvB,WAAO,IAAI,YAAY,EAAE,KAAK,SAAS,YAAY,EAAE,MAAM;AAE3D,WAAO,MAAM,eAAe,EAAE,iBAAiB;AAC/C,WAAO,MAAM,aAAa,EAAE,iBAAiB;AAAA,EAC/C,CAAC;AACH,CAAC;","names":[]}
@@ -528,7 +528,7 @@ function performTextForwarding(source, controller, textOutput) {
528
528
  out
529
529
  ];
530
530
  }
531
- async function forwardAudio(ttsStream, audioOuput, out, signal) {
531
+ async function forwardAudio(ttsStream, audioOutput, out, signal) {
532
532
  const reader = ttsStream.getReader();
533
533
  let resampler = null;
534
534
  const onPlaybackStarted = (ev) => {
@@ -537,8 +537,8 @@ async function forwardAudio(ttsStream, audioOuput, out, signal) {
537
537
  }
538
538
  };
539
539
  try {
540
- audioOuput.on(import_io.AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
541
- audioOuput.resume();
540
+ audioOutput.on(import_io.AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
541
+ audioOutput.resume();
542
542
  while (true) {
543
543
  if (signal == null ? void 0 : signal.aborted) {
544
544
  break;
@@ -546,29 +546,29 @@ async function forwardAudio(ttsStream, audioOuput, out, signal) {
546
546
  const { done, value: frame } = await reader.read();
547
547
  if (done) break;
548
548
  out.audio.push(frame);
549
- if (!out.firstFrameFut.done && audioOuput.sampleRate && audioOuput.sampleRate !== frame.sampleRate && !resampler) {
550
- resampler = new import_rtc_node.AudioResampler(frame.sampleRate, audioOuput.sampleRate, 1);
549
+ if (!out.firstFrameFut.done && audioOutput.sampleRate && audioOutput.sampleRate !== frame.sampleRate && !resampler) {
550
+ resampler = new import_rtc_node.AudioResampler(frame.sampleRate, audioOutput.sampleRate, 1);
551
551
  }
552
552
  if (resampler) {
553
553
  for (const f of resampler.push(frame)) {
554
- await audioOuput.captureFrame(f);
554
+ await audioOutput.captureFrame(f);
555
555
  }
556
556
  } else {
557
- await audioOuput.captureFrame(frame);
557
+ await audioOutput.captureFrame(frame);
558
558
  }
559
559
  }
560
560
  if (resampler) {
561
561
  for (const f of resampler.flush()) {
562
- await audioOuput.captureFrame(f);
562
+ await audioOutput.captureFrame(f);
563
563
  }
564
564
  }
565
565
  } finally {
566
- audioOuput.off(import_io.AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
566
+ audioOutput.off(import_io.AudioOutput.EVENT_PLAYBACK_STARTED, onPlaybackStarted);
567
567
  if (!out.firstFrameFut.done) {
568
568
  out.firstFrameFut.reject(new Error("audio forwarding cancelled before playback started"));
569
569
  }
570
570
  reader == null ? void 0 : reader.releaseLock();
571
- audioOuput.flush();
571
+ audioOutput.flush();
572
572
  }
573
573
  }
574
574
  function performAudioForwarding(ttsStream, audioOutput, controller) {
@@ -689,13 +689,6 @@ function performToolExecutions({
689
689
  },
690
690
  "Executing LLM tool call"
691
691
  );
692
- const toolExecution = import_agent.asyncLocalStorage.run({ functionCall: toolCall }, async () => {
693
- return await tool.execute(parsedArgs, {
694
- ctx: new import_run_context.RunContext(session, speechHandle, toolCall),
695
- toolCallId: toolCall.callId,
696
- abortSignal: signal
697
- });
698
- });
699
692
  const _tracableToolExecutionImpl = async (toolExecTask, span) => {
700
693
  span.setAttribute(import_telemetry.traceTypes.ATTR_FUNCTION_TOOL_NAME, toolCall.name);
701
694
  span.setAttribute(import_telemetry.traceTypes.ATTR_FUNCTION_TOOL_ARGS, toolCall.args);
@@ -745,9 +738,36 @@ function performToolExecutions({
745
738
  const tracableToolExecution = (toolExecTask) => import_telemetry.tracer.startActiveSpan(async (span) => _tracableToolExecutionImpl(toolExecTask, span), {
746
739
  name: "function_tool"
747
740
  });
748
- tasks.push(tracableToolExecution(toolExecution));
741
+ const toolTask = import_utils.Task.from(
742
+ async () => {
743
+ const currentTask = import_utils.Task.current();
744
+ if (currentTask) {
745
+ (0, import_agent._setActivityTaskInfo)(currentTask, {
746
+ speechHandle,
747
+ functionCall: toolCall,
748
+ inlineTask: true
749
+ });
750
+ }
751
+ const toolExecution = import_agent.functionCallStorage.run({ functionCall: toolCall }, async () => {
752
+ return await tool.execute(parsedArgs, {
753
+ ctx: new import_run_context.RunContext(session, speechHandle, toolCall),
754
+ toolCallId: toolCall.callId,
755
+ abortSignal: signal
756
+ });
757
+ });
758
+ await tracableToolExecution(toolExecution);
759
+ },
760
+ controller2,
761
+ `performToolExecution:${toolCall.name}`
762
+ );
763
+ (0, import_agent._setActivityTaskInfo)(toolTask, {
764
+ speechHandle,
765
+ functionCall: toolCall,
766
+ inlineTask: true
767
+ });
768
+ tasks.push(toolTask);
749
769
  }
750
- await Promise.allSettled(tasks);
770
+ await Promise.allSettled(tasks.map((task) => task.result));
751
771
  if (toolOutput.output.length > 0) {
752
772
  logger.debug(
753
773
  {