@livekit/agents 1.1.0-dev.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (292) hide show
  1. package/dist/cli.cjs +2 -0
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +2 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/constants.cjs +3 -0
  7. package/dist/constants.cjs.map +1 -1
  8. package/dist/constants.d.cts +1 -0
  9. package/dist/constants.d.ts +1 -0
  10. package/dist/constants.d.ts.map +1 -1
  11. package/dist/constants.js +2 -0
  12. package/dist/constants.js.map +1 -1
  13. package/dist/cpu.cjs +189 -0
  14. package/dist/cpu.cjs.map +1 -0
  15. package/dist/cpu.d.cts +24 -0
  16. package/dist/cpu.d.ts +24 -0
  17. package/dist/cpu.d.ts.map +1 -0
  18. package/dist/cpu.js +152 -0
  19. package/dist/cpu.js.map +1 -0
  20. package/dist/cpu.test.cjs +227 -0
  21. package/dist/cpu.test.cjs.map +1 -0
  22. package/dist/cpu.test.js +204 -0
  23. package/dist/cpu.test.js.map +1 -0
  24. package/dist/index.cjs +12 -10
  25. package/dist/index.cjs.map +1 -1
  26. package/dist/index.d.cts +13 -13
  27. package/dist/index.d.ts +13 -13
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +11 -10
  30. package/dist/index.js.map +1 -1
  31. package/dist/inference/interruption/defaults.cjs +1 -1
  32. package/dist/inference/interruption/defaults.cjs.map +1 -1
  33. package/dist/inference/interruption/defaults.d.cts +1 -1
  34. package/dist/inference/interruption/defaults.d.ts +1 -1
  35. package/dist/inference/interruption/defaults.d.ts.map +1 -1
  36. package/dist/inference/interruption/defaults.js +1 -1
  37. package/dist/inference/interruption/defaults.js.map +1 -1
  38. package/dist/inference/interruption/http_transport.cjs +44 -28
  39. package/dist/inference/interruption/http_transport.cjs.map +1 -1
  40. package/dist/inference/interruption/http_transport.d.ts.map +1 -1
  41. package/dist/inference/interruption/http_transport.js +45 -29
  42. package/dist/inference/interruption/http_transport.js.map +1 -1
  43. package/dist/inference/interruption/interruption_detector.cjs +22 -5
  44. package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
  45. package/dist/inference/interruption/interruption_detector.d.cts +2 -2
  46. package/dist/inference/interruption/interruption_detector.d.ts +2 -2
  47. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
  48. package/dist/inference/interruption/interruption_detector.js +22 -5
  49. package/dist/inference/interruption/interruption_detector.js.map +1 -1
  50. package/dist/inference/interruption/interruption_stream.cjs +4 -4
  51. package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
  52. package/dist/inference/interruption/interruption_stream.js +4 -4
  53. package/dist/inference/interruption/interruption_stream.js.map +1 -1
  54. package/dist/inference/interruption/types.cjs.map +1 -1
  55. package/dist/inference/interruption/types.d.cts +2 -2
  56. package/dist/inference/interruption/types.d.ts +2 -2
  57. package/dist/inference/interruption/types.d.ts.map +1 -1
  58. package/dist/inference/interruption/ws_transport.cjs +60 -47
  59. package/dist/inference/interruption/ws_transport.cjs.map +1 -1
  60. package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
  61. package/dist/inference/interruption/ws_transport.js +60 -47
  62. package/dist/inference/interruption/ws_transport.js.map +1 -1
  63. package/dist/inference/llm.cjs.map +1 -1
  64. package/dist/inference/llm.d.cts +1 -1
  65. package/dist/inference/llm.d.ts +1 -1
  66. package/dist/inference/llm.d.ts.map +1 -1
  67. package/dist/inference/llm.js.map +1 -1
  68. package/dist/inference/stt.cjs +20 -12
  69. package/dist/inference/stt.cjs.map +1 -1
  70. package/dist/inference/stt.d.cts +3 -2
  71. package/dist/inference/stt.d.ts +3 -2
  72. package/dist/inference/stt.d.ts.map +1 -1
  73. package/dist/inference/stt.js +20 -12
  74. package/dist/inference/stt.js.map +1 -1
  75. package/dist/inference/stt.test.cjs +14 -0
  76. package/dist/inference/stt.test.cjs.map +1 -1
  77. package/dist/inference/stt.test.js +14 -0
  78. package/dist/inference/stt.test.js.map +1 -1
  79. package/dist/inference/tts.cjs +13 -4
  80. package/dist/inference/tts.cjs.map +1 -1
  81. package/dist/inference/tts.d.cts +8 -1
  82. package/dist/inference/tts.d.ts +8 -1
  83. package/dist/inference/tts.d.ts.map +1 -1
  84. package/dist/inference/tts.js +13 -4
  85. package/dist/inference/tts.js.map +1 -1
  86. package/dist/inference/tts.test.cjs +10 -0
  87. package/dist/inference/tts.test.cjs.map +1 -1
  88. package/dist/inference/tts.test.js +10 -0
  89. package/dist/inference/tts.test.js.map +1 -1
  90. package/dist/ipc/job_proc_lazy_main.cjs +41 -23
  91. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  92. package/dist/ipc/job_proc_lazy_main.js +41 -23
  93. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  94. package/dist/job.cjs +1 -1
  95. package/dist/job.cjs.map +1 -1
  96. package/dist/job.js +1 -1
  97. package/dist/job.js.map +1 -1
  98. package/dist/language.cjs +394 -0
  99. package/dist/language.cjs.map +1 -0
  100. package/dist/language.d.cts +15 -0
  101. package/dist/language.d.ts +15 -0
  102. package/dist/language.d.ts.map +1 -0
  103. package/dist/language.js +363 -0
  104. package/dist/language.js.map +1 -0
  105. package/dist/language.test.cjs +43 -0
  106. package/dist/language.test.cjs.map +1 -0
  107. package/dist/language.test.js +49 -0
  108. package/dist/language.test.js.map +1 -0
  109. package/dist/llm/index.cjs +2 -0
  110. package/dist/llm/index.cjs.map +1 -1
  111. package/dist/llm/index.d.cts +1 -1
  112. package/dist/llm/index.d.ts +1 -1
  113. package/dist/llm/index.d.ts.map +1 -1
  114. package/dist/llm/index.js +2 -0
  115. package/dist/llm/index.js.map +1 -1
  116. package/dist/stream/deferred_stream.cjs +6 -2
  117. package/dist/stream/deferred_stream.cjs.map +1 -1
  118. package/dist/stream/deferred_stream.d.ts.map +1 -1
  119. package/dist/stream/deferred_stream.js +6 -2
  120. package/dist/stream/deferred_stream.js.map +1 -1
  121. package/dist/stt/stt.cjs.map +1 -1
  122. package/dist/stt/stt.d.cts +2 -1
  123. package/dist/stt/stt.d.ts +2 -1
  124. package/dist/stt/stt.d.ts.map +1 -1
  125. package/dist/stt/stt.js.map +1 -1
  126. package/dist/utils.cjs +15 -0
  127. package/dist/utils.cjs.map +1 -1
  128. package/dist/utils.d.cts +8 -0
  129. package/dist/utils.d.ts +8 -0
  130. package/dist/utils.d.ts.map +1 -1
  131. package/dist/utils.js +13 -0
  132. package/dist/utils.js.map +1 -1
  133. package/dist/version.cjs +1 -1
  134. package/dist/version.js +1 -1
  135. package/dist/voice/agent.cjs +14 -17
  136. package/dist/voice/agent.cjs.map +1 -1
  137. package/dist/voice/agent.d.cts +10 -11
  138. package/dist/voice/agent.d.ts +10 -11
  139. package/dist/voice/agent.d.ts.map +1 -1
  140. package/dist/voice/agent.js +15 -18
  141. package/dist/voice/agent.js.map +1 -1
  142. package/dist/voice/agent.test.cjs +194 -0
  143. package/dist/voice/agent.test.cjs.map +1 -1
  144. package/dist/voice/agent.test.js +195 -1
  145. package/dist/voice/agent.test.js.map +1 -1
  146. package/dist/voice/agent_activity.cjs +116 -39
  147. package/dist/voice/agent_activity.cjs.map +1 -1
  148. package/dist/voice/agent_activity.d.cts +2 -0
  149. package/dist/voice/agent_activity.d.ts +2 -0
  150. package/dist/voice/agent_activity.d.ts.map +1 -1
  151. package/dist/voice/agent_activity.js +117 -40
  152. package/dist/voice/agent_activity.js.map +1 -1
  153. package/dist/voice/agent_activity.test.cjs +135 -0
  154. package/dist/voice/agent_activity.test.cjs.map +1 -0
  155. package/dist/voice/agent_activity.test.js +134 -0
  156. package/dist/voice/agent_activity.test.js.map +1 -0
  157. package/dist/voice/agent_session.cjs +38 -38
  158. package/dist/voice/agent_session.cjs.map +1 -1
  159. package/dist/voice/agent_session.d.cts +65 -56
  160. package/dist/voice/agent_session.d.ts +65 -56
  161. package/dist/voice/agent_session.d.ts.map +1 -1
  162. package/dist/voice/agent_session.js +37 -37
  163. package/dist/voice/agent_session.js.map +1 -1
  164. package/dist/voice/audio_recognition.cjs +106 -52
  165. package/dist/voice/audio_recognition.cjs.map +1 -1
  166. package/dist/voice/audio_recognition.d.cts +4 -2
  167. package/dist/voice/audio_recognition.d.ts +4 -2
  168. package/dist/voice/audio_recognition.d.ts.map +1 -1
  169. package/dist/voice/audio_recognition.js +106 -52
  170. package/dist/voice/audio_recognition.js.map +1 -1
  171. package/dist/voice/audio_recognition_span.test.cjs +84 -22
  172. package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
  173. package/dist/voice/audio_recognition_span.test.js +90 -23
  174. package/dist/voice/audio_recognition_span.test.js.map +1 -1
  175. package/dist/voice/events.cjs +1 -1
  176. package/dist/voice/events.cjs.map +1 -1
  177. package/dist/voice/events.d.cts +4 -3
  178. package/dist/voice/events.d.ts +4 -3
  179. package/dist/voice/events.d.ts.map +1 -1
  180. package/dist/voice/events.js +1 -1
  181. package/dist/voice/events.js.map +1 -1
  182. package/dist/voice/index.cjs +9 -1
  183. package/dist/voice/index.cjs.map +1 -1
  184. package/dist/voice/index.d.cts +1 -1
  185. package/dist/voice/index.d.ts +1 -1
  186. package/dist/voice/index.d.ts.map +1 -1
  187. package/dist/voice/index.js +10 -1
  188. package/dist/voice/index.js.map +1 -1
  189. package/dist/voice/remote_session.cjs +922 -0
  190. package/dist/voice/remote_session.cjs.map +1 -0
  191. package/dist/voice/remote_session.d.cts +108 -0
  192. package/dist/voice/remote_session.d.ts +108 -0
  193. package/dist/voice/remote_session.d.ts.map +1 -0
  194. package/dist/voice/remote_session.js +887 -0
  195. package/dist/voice/remote_session.js.map +1 -0
  196. package/dist/voice/report.cjs +11 -10
  197. package/dist/voice/report.cjs.map +1 -1
  198. package/dist/voice/report.d.cts +5 -3
  199. package/dist/voice/report.d.ts +5 -3
  200. package/dist/voice/report.d.ts.map +1 -1
  201. package/dist/voice/report.js +11 -10
  202. package/dist/voice/report.js.map +1 -1
  203. package/dist/voice/report.test.cjs +15 -0
  204. package/dist/voice/report.test.cjs.map +1 -1
  205. package/dist/voice/report.test.js +15 -0
  206. package/dist/voice/report.test.js.map +1 -1
  207. package/dist/voice/room_io/room_io.cjs +39 -0
  208. package/dist/voice/room_io/room_io.cjs.map +1 -1
  209. package/dist/voice/room_io/room_io.d.cts +3 -1
  210. package/dist/voice/room_io/room_io.d.ts +3 -1
  211. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  212. package/dist/voice/room_io/room_io.js +40 -1
  213. package/dist/voice/room_io/room_io.js.map +1 -1
  214. package/dist/voice/turn_config/interruption.cjs.map +1 -1
  215. package/dist/voice/turn_config/interruption.d.cts +1 -1
  216. package/dist/voice/turn_config/interruption.d.ts +1 -1
  217. package/dist/voice/turn_config/interruption.d.ts.map +1 -1
  218. package/dist/voice/turn_config/interruption.js.map +1 -1
  219. package/dist/voice/turn_config/utils.cjs +95 -35
  220. package/dist/voice/turn_config/utils.cjs.map +1 -1
  221. package/dist/voice/turn_config/utils.d.cts +17 -5
  222. package/dist/voice/turn_config/utils.d.ts +17 -5
  223. package/dist/voice/turn_config/utils.d.ts.map +1 -1
  224. package/dist/voice/turn_config/utils.js +93 -35
  225. package/dist/voice/turn_config/utils.js.map +1 -1
  226. package/dist/voice/turn_config/utils.test.cjs +83 -41
  227. package/dist/voice/turn_config/utils.test.cjs.map +1 -1
  228. package/dist/voice/turn_config/utils.test.js +84 -42
  229. package/dist/voice/turn_config/utils.test.js.map +1 -1
  230. package/dist/worker.cjs +6 -29
  231. package/dist/worker.cjs.map +1 -1
  232. package/dist/worker.d.ts.map +1 -1
  233. package/dist/worker.js +6 -19
  234. package/dist/worker.js.map +1 -1
  235. package/package.json +3 -2
  236. package/src/cli.ts +2 -0
  237. package/src/constants.ts +1 -0
  238. package/src/cpu.test.ts +239 -0
  239. package/src/cpu.ts +173 -0
  240. package/src/index.ts +13 -15
  241. package/src/inference/interruption/defaults.ts +1 -1
  242. package/src/inference/interruption/http_transport.ts +49 -30
  243. package/src/inference/interruption/interruption_detector.ts +22 -6
  244. package/src/inference/interruption/interruption_stream.ts +4 -4
  245. package/src/inference/interruption/types.ts +2 -2
  246. package/src/inference/interruption/ws_transport.ts +63 -59
  247. package/src/inference/llm.ts +3 -1
  248. package/src/inference/stt.test.ts +17 -0
  249. package/src/inference/stt.ts +22 -14
  250. package/src/inference/tts.test.ts +12 -0
  251. package/src/inference/tts.ts +22 -6
  252. package/src/ipc/job_proc_lazy_main.ts +44 -24
  253. package/src/job.ts +1 -1
  254. package/src/language.test.ts +62 -0
  255. package/src/language.ts +380 -0
  256. package/src/llm/index.ts +2 -0
  257. package/src/stream/deferred_stream.ts +5 -1
  258. package/src/stt/stt.ts +2 -1
  259. package/src/utils.ts +20 -0
  260. package/src/voice/agent.test.ts +208 -1
  261. package/src/voice/agent.ts +21 -22
  262. package/src/voice/agent_activity.test.ts +194 -0
  263. package/src/voice/agent_activity.ts +161 -43
  264. package/src/voice/agent_session.ts +103 -92
  265. package/src/voice/audio_recognition.ts +124 -61
  266. package/src/voice/audio_recognition_span.test.ts +115 -35
  267. package/src/voice/events.ts +4 -3
  268. package/src/voice/index.ts +10 -1
  269. package/src/voice/remote_session.ts +1083 -0
  270. package/src/voice/report.test.ts +22 -3
  271. package/src/voice/report.ts +31 -14
  272. package/src/voice/room_io/room_io.ts +52 -2
  273. package/src/voice/turn_config/interruption.ts +1 -1
  274. package/src/voice/turn_config/utils.test.ts +91 -43
  275. package/src/voice/turn_config/utils.ts +120 -56
  276. package/src/worker.ts +34 -50
  277. package/dist/voice/client_events.cjs +0 -554
  278. package/dist/voice/client_events.cjs.map +0 -1
  279. package/dist/voice/client_events.d.cts +0 -195
  280. package/dist/voice/client_events.d.ts +0 -195
  281. package/dist/voice/client_events.d.ts.map +0 -1
  282. package/dist/voice/client_events.js +0 -548
  283. package/dist/voice/client_events.js.map +0 -1
  284. package/dist/voice/wire_format.cjs +0 -798
  285. package/dist/voice/wire_format.cjs.map +0 -1
  286. package/dist/voice/wire_format.d.cts +0 -5503
  287. package/dist/voice/wire_format.d.ts +0 -5503
  288. package/dist/voice/wire_format.d.ts.map +0 -1
  289. package/dist/voice/wire_format.js +0 -728
  290. package/dist/voice/wire_format.js.map +0 -1
  291. package/src/voice/client_events.ts +0 -838
  292. package/src/voice/wire_format.ts +0 -827
@@ -2,14 +2,27 @@
2
2
  //
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import { ParticipantKind } from '@livekit/rtc-node';
5
- import { InMemorySpanExporter, SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base';
5
+ import { ROOT_CONTEXT, context as otelContext, trace } from '@opentelemetry/api';
6
+ import {
7
+ InMemorySpanExporter,
8
+ type ReadableSpan,
9
+ SimpleSpanProcessor,
10
+ } from '@opentelemetry/sdk-trace-base';
6
11
  import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node';
12
+ import { ReadableStream } from 'node:stream/web';
7
13
  import { describe, expect, it, vi } from 'vitest';
14
+ import { ChatContext } from '../llm/chat_context.js';
8
15
  import { initializeLogger } from '../log.js';
9
16
  import { type SpeechEvent, SpeechEventType } from '../stt/stt.js';
10
- import { setTracerProvider } from '../telemetry/index.js';
17
+ import { setTracerProvider, tracer } from '../telemetry/index.js';
11
18
  import { VAD, type VADEvent, VADEventType, type VADStream } from '../vad.js';
12
- import { AudioRecognition, type _TurnDetector } from './audio_recognition.js';
19
+ import { AgentSession } from './agent_session.js';
20
+ import {
21
+ AudioRecognition,
22
+ type RecognitionHooks,
23
+ type _TurnDetector,
24
+ } from './audio_recognition.js';
25
+ import type { STTNode } from './io.js';
13
26
 
14
27
  function setupInMemoryTracing() {
15
28
  const exporter = new InMemorySpanExporter();
@@ -20,10 +33,25 @@ function setupInMemoryTracing() {
20
33
  return { exporter };
21
34
  }
22
35
 
23
- function spanByName(spans: any[], name: string) {
36
+ function spanByName(spans: ReadableSpan[], name: string) {
24
37
  return spans.find((s) => s.name === name);
25
38
  }
26
39
 
40
+ function createFakeSession(rootSpanContext = ROOT_CONTEXT): AgentSession {
41
+ return {
42
+ _agentState: 'listening',
43
+ _roomIO: {
44
+ linkedParticipant: { sid: 'p3', identity: 'charlie', kind: ParticipantKind.AGENT },
45
+ },
46
+ _setUserAwayTimer: vi.fn(),
47
+ _cancelUserAwayTimer: vi.fn(),
48
+ _userSpeakingSpan: undefined,
49
+ _userState: 'listening',
50
+ emit: vi.fn(),
51
+ rootSpanContext,
52
+ } as unknown as AgentSession;
53
+ }
54
+
27
55
  class FakeVADStream extends (Object as unknown as { new (): VADStream }) {
28
56
  // We intentionally avoid extending the real VADStream (it is not exported as a value in JS output
29
57
  // in some bundling contexts). Instead we emulate the async iterator shape used by AudioRecognition.
@@ -61,6 +89,8 @@ class FakeVAD extends VAD {
61
89
  }
62
90
 
63
91
  const alwaysTrueTurnDetector: _TurnDetector = {
92
+ model: 'test-turn-detector',
93
+ provider: 'test-provider',
64
94
  supportsLanguage: async () => true,
65
95
  unlikelyThreshold: async () => undefined,
66
96
  predictEndOfTurn: async () => 1.0,
@@ -72,23 +102,15 @@ describe('AudioRecognition user_turn span parity', () => {
72
102
  it('creates user_turn and parents eou_detection under it (stt mode)', async () => {
73
103
  const { exporter } = setupInMemoryTracing();
74
104
 
75
- const hooks = {
105
+ const hooks: RecognitionHooks = {
106
+ onInterruption: vi.fn(),
76
107
  onStartOfSpeech: vi.fn(),
77
108
  onVADInferenceDone: vi.fn(),
78
109
  onEndOfSpeech: vi.fn(),
79
110
  onInterimTranscript: vi.fn(),
80
111
  onFinalTranscript: vi.fn(),
81
112
  onPreemptiveGeneration: vi.fn(),
82
- retrieveChatCtx: () =>
83
- ({
84
- copy() {
85
- return this;
86
- },
87
- addMessage() {},
88
- toJSON() {
89
- return { items: [] };
90
- },
91
- }) as any,
113
+ retrieveChatCtx: () => ChatContext.empty(),
92
114
  onEndOfTurn: vi.fn(async () => true),
93
115
  };
94
116
 
@@ -109,8 +131,8 @@ describe('AudioRecognition user_turn span parity', () => {
109
131
  { type: SpeechEventType.END_OF_SPEECH },
110
132
  ];
111
133
 
112
- const sttNode = async () =>
113
- new ReadableStream<SpeechEvent>({
134
+ const sttNode: STTNode = async () =>
135
+ new ReadableStream<SpeechEvent | string>({
114
136
  start(controller) {
115
137
  for (const ev of sttEvents) controller.enqueue(ev);
116
138
  controller.close();
@@ -118,8 +140,8 @@ describe('AudioRecognition user_turn span parity', () => {
118
140
  });
119
141
 
120
142
  const ar = new AudioRecognition({
121
- recognitionHooks: hooks as any,
122
- stt: sttNode as any,
143
+ recognitionHooks: hooks,
144
+ stt: sttNode,
123
145
  vad: undefined,
124
146
  turnDetector: alwaysTrueTurnDetector,
125
147
  turnDetectionMode: 'stt',
@@ -140,6 +162,9 @@ describe('AudioRecognition user_turn span parity', () => {
140
162
  const eou = spanByName(spans, 'eou_detection');
141
163
  expect(userTurn, 'user_turn span missing').toBeTruthy();
142
164
  expect(eou, 'eou_detection span missing').toBeTruthy();
165
+ if (!userTurn || !eou) {
166
+ throw new Error('expected user_turn and eou_detection spans');
167
+ }
143
168
 
144
169
  expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
145
170
 
@@ -158,23 +183,15 @@ describe('AudioRecognition user_turn span parity', () => {
158
183
  it('creates user_turn from VAD startTime (vad mode) and keeps same parenting', async () => {
159
184
  const { exporter } = setupInMemoryTracing();
160
185
 
161
- const hooks = {
186
+ const hooks: RecognitionHooks = {
187
+ onInterruption: vi.fn(),
162
188
  onStartOfSpeech: vi.fn(),
163
189
  onVADInferenceDone: vi.fn(),
164
190
  onEndOfSpeech: vi.fn(),
165
191
  onInterimTranscript: vi.fn(),
166
192
  onFinalTranscript: vi.fn(),
167
193
  onPreemptiveGeneration: vi.fn(),
168
- retrieveChatCtx: () =>
169
- ({
170
- copy() {
171
- return this;
172
- },
173
- addMessage() {},
174
- toJSON() {
175
- return { items: [] };
176
- },
177
- }) as any,
194
+ retrieveChatCtx: () => ChatContext.empty(),
178
195
  onEndOfTurn: vi.fn(async () => true),
179
196
  };
180
197
 
@@ -223,8 +240,8 @@ describe('AudioRecognition user_turn span parity', () => {
223
240
  },
224
241
  ];
225
242
 
226
- const sttNode = async () =>
227
- new ReadableStream<SpeechEvent>({
243
+ const sttNode: STTNode = async () =>
244
+ new ReadableStream<SpeechEvent | string>({
228
245
  start(controller) {
229
246
  for (const ev of sttEvents) controller.enqueue(ev);
230
247
  controller.close();
@@ -232,9 +249,9 @@ describe('AudioRecognition user_turn span parity', () => {
232
249
  });
233
250
 
234
251
  const ar = new AudioRecognition({
235
- recognitionHooks: hooks as any,
236
- stt: sttNode as any,
237
- vad: new FakeVAD(vadEvents) as any,
252
+ recognitionHooks: hooks,
253
+ stt: sttNode,
254
+ vad: new FakeVAD(vadEvents),
238
255
  turnDetector: alwaysTrueTurnDetector,
239
256
  turnDetectionMode: 'vad',
240
257
  minEndpointingDelay: 0,
@@ -253,9 +270,72 @@ describe('AudioRecognition user_turn span parity', () => {
253
270
  const eou = spanByName(spans, 'eou_detection');
254
271
  expect(userTurn).toBeTruthy();
255
272
  expect(eou).toBeTruthy();
273
+ if (!userTurn || !eou) {
274
+ throw new Error('expected user_turn and eou_detection spans');
275
+ }
256
276
  expect(eou.parentSpanId).toBe(userTurn.spanContext().spanId);
257
277
 
258
278
  expect(hooks.onStartOfSpeech).toHaveBeenCalled();
259
279
  expect(hooks.onEndOfSpeech).toHaveBeenCalled();
260
280
  });
281
+
282
+ it('parents user_speaking under user_turn when an explicit speech context is provided', () => {
283
+ const { exporter } = setupInMemoryTracing();
284
+ const sessionSpan = tracer.startSpan({ name: 'agent_session', context: ROOT_CONTEXT });
285
+ const sessionContext = trace.setSpan(ROOT_CONTEXT, sessionSpan);
286
+ const fakeSession = createFakeSession(sessionContext);
287
+ const userTurn = tracer.startSpan({ name: 'user_turn', context: sessionContext });
288
+ const userTurnContext = trace.setSpan(sessionContext, userTurn);
289
+ const speakingStartedAt = Date.now() - 100;
290
+ const speakingEndedAt = Date.now();
291
+
292
+ otelContext.with(userTurnContext, () => {
293
+ AgentSession.prototype._updateUserState.call(fakeSession, 'speaking', {
294
+ lastSpeakingTime: speakingStartedAt,
295
+ otelContext: otelContext.active(),
296
+ });
297
+ AgentSession.prototype._updateUserState.call(fakeSession, 'listening', {
298
+ lastSpeakingTime: speakingEndedAt,
299
+ otelContext: otelContext.active(),
300
+ });
301
+ });
302
+
303
+ userTurn.end();
304
+ sessionSpan.end();
305
+
306
+ const spans = exporter.getFinishedSpans();
307
+ const userSpeaking = spanByName(spans, 'user_speaking');
308
+ const exportedUserTurn = spanByName(spans, 'user_turn');
309
+ expect(userSpeaking).toBeTruthy();
310
+ expect(exportedUserTurn).toBeTruthy();
311
+ if (!userSpeaking || !exportedUserTurn) {
312
+ throw new Error('expected user_speaking and user_turn spans');
313
+ }
314
+ expect(userSpeaking.parentSpanId).toBe(exportedUserTurn.spanContext().spanId);
315
+ expect(userSpeaking.attributes['lk.participant_id']).toBe('p3');
316
+ });
317
+
318
+ it('keeps user_speaking attached to the session root without an explicit speech context', () => {
319
+ const { exporter } = setupInMemoryTracing();
320
+ const sessionSpan = tracer.startSpan({ name: 'agent_session', context: ROOT_CONTEXT });
321
+ const sessionContext = trace.setSpan(ROOT_CONTEXT, sessionSpan);
322
+ const fakeSession = createFakeSession(sessionContext);
323
+
324
+ AgentSession.prototype._updateUserState.call(fakeSession, 'speaking', {
325
+ lastSpeakingTime: Date.now() - 100,
326
+ });
327
+ AgentSession.prototype._updateUserState.call(fakeSession, 'listening', {
328
+ lastSpeakingTime: Date.now(),
329
+ });
330
+
331
+ sessionSpan.end();
332
+
333
+ const spans = exporter.getFinishedSpans();
334
+ const userSpeaking = spanByName(spans, 'user_speaking');
335
+ expect(userSpeaking).toBeTruthy();
336
+ if (!userSpeaking) {
337
+ throw new Error('expected user_speaking span');
338
+ }
339
+ expect(userSpeaking.parentSpanId).toBe(sessionSpan.spanContext().spanId);
340
+ });
261
341
  });
@@ -3,6 +3,7 @@
3
3
  // SPDX-License-Identifier: Apache-2.0
4
4
  import type { InterruptionDetectionError } from '../inference/interruption/errors.js';
5
5
  import type { OverlappingSpeechEvent } from '../inference/interruption/types.js';
6
+ import type { LanguageCode } from '../language.js';
6
7
  import type {
7
8
  ChatMessage,
8
9
  FunctionCall,
@@ -27,7 +28,7 @@ export enum AgentSessionEventTypes {
27
28
  FunctionToolsExecuted = 'function_tools_executed',
28
29
  MetricsCollected = 'metrics_collected',
29
30
  SpeechCreated = 'speech_created',
30
- UserOverlappingSpeech = 'user_overlapping_speech',
31
+ OverlappingSpeech = 'overlapping_speech',
31
32
  Error = 'error',
32
33
  Close = 'close',
33
34
  }
@@ -90,7 +91,7 @@ export type UserInputTranscribedEvent = {
90
91
  /** Not supported yet. Always null by default. */
91
92
  speakerId: string | null;
92
93
  createdAt: number;
93
- language: string | null;
94
+ language: LanguageCode | null;
94
95
  };
95
96
 
96
97
  export const createUserInputTranscribedEvent = ({
@@ -103,7 +104,7 @@ export const createUserInputTranscribedEvent = ({
103
104
  transcript: string;
104
105
  isFinal: boolean;
105
106
  speakerId?: string | null;
106
- language?: string | null;
107
+ language?: LanguageCode | null;
107
108
  createdAt?: number;
108
109
  }): UserInputTranscribedEvent => ({
109
110
  type: 'user_input_transcribed',
@@ -5,7 +5,16 @@ export { Agent, AgentTask, StopResponse, type AgentOptions, type ModelSettings }
5
5
  export { AgentSession, type AgentSessionOptions, type VoiceOptions } from './agent_session.js';
6
6
  export * from './avatar/index.js';
7
7
  export * from './background_audio.js';
8
- export { type TextInputCallback, type TextInputEvent } from './client_events.js';
8
+ export {
9
+ type TextInputCallback,
10
+ type TextInputEvent,
11
+ RemoteSession,
12
+ type RemoteSessionCallbacks,
13
+ type RemoteSessionEventTypes,
14
+ SessionHost,
15
+ SessionTransport,
16
+ RoomSessionTransport,
17
+ } from './remote_session.js';
9
18
  export * from './events.js';
10
19
  export { type TimedString } from './io.js';
11
20
  export * from './report.js';