@livekit/agents 1.1.0-dev.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (292) hide show
  1. package/dist/cli.cjs +2 -0
  2. package/dist/cli.cjs.map +1 -1
  3. package/dist/cli.d.ts.map +1 -1
  4. package/dist/cli.js +2 -0
  5. package/dist/cli.js.map +1 -1
  6. package/dist/constants.cjs +3 -0
  7. package/dist/constants.cjs.map +1 -1
  8. package/dist/constants.d.cts +1 -0
  9. package/dist/constants.d.ts +1 -0
  10. package/dist/constants.d.ts.map +1 -1
  11. package/dist/constants.js +2 -0
  12. package/dist/constants.js.map +1 -1
  13. package/dist/cpu.cjs +189 -0
  14. package/dist/cpu.cjs.map +1 -0
  15. package/dist/cpu.d.cts +24 -0
  16. package/dist/cpu.d.ts +24 -0
  17. package/dist/cpu.d.ts.map +1 -0
  18. package/dist/cpu.js +152 -0
  19. package/dist/cpu.js.map +1 -0
  20. package/dist/cpu.test.cjs +227 -0
  21. package/dist/cpu.test.cjs.map +1 -0
  22. package/dist/cpu.test.js +204 -0
  23. package/dist/cpu.test.js.map +1 -0
  24. package/dist/index.cjs +12 -10
  25. package/dist/index.cjs.map +1 -1
  26. package/dist/index.d.cts +13 -13
  27. package/dist/index.d.ts +13 -13
  28. package/dist/index.d.ts.map +1 -1
  29. package/dist/index.js +11 -10
  30. package/dist/index.js.map +1 -1
  31. package/dist/inference/interruption/defaults.cjs +1 -1
  32. package/dist/inference/interruption/defaults.cjs.map +1 -1
  33. package/dist/inference/interruption/defaults.d.cts +1 -1
  34. package/dist/inference/interruption/defaults.d.ts +1 -1
  35. package/dist/inference/interruption/defaults.d.ts.map +1 -1
  36. package/dist/inference/interruption/defaults.js +1 -1
  37. package/dist/inference/interruption/defaults.js.map +1 -1
  38. package/dist/inference/interruption/http_transport.cjs +44 -28
  39. package/dist/inference/interruption/http_transport.cjs.map +1 -1
  40. package/dist/inference/interruption/http_transport.d.ts.map +1 -1
  41. package/dist/inference/interruption/http_transport.js +45 -29
  42. package/dist/inference/interruption/http_transport.js.map +1 -1
  43. package/dist/inference/interruption/interruption_detector.cjs +22 -5
  44. package/dist/inference/interruption/interruption_detector.cjs.map +1 -1
  45. package/dist/inference/interruption/interruption_detector.d.cts +2 -2
  46. package/dist/inference/interruption/interruption_detector.d.ts +2 -2
  47. package/dist/inference/interruption/interruption_detector.d.ts.map +1 -1
  48. package/dist/inference/interruption/interruption_detector.js +22 -5
  49. package/dist/inference/interruption/interruption_detector.js.map +1 -1
  50. package/dist/inference/interruption/interruption_stream.cjs +4 -4
  51. package/dist/inference/interruption/interruption_stream.cjs.map +1 -1
  52. package/dist/inference/interruption/interruption_stream.js +4 -4
  53. package/dist/inference/interruption/interruption_stream.js.map +1 -1
  54. package/dist/inference/interruption/types.cjs.map +1 -1
  55. package/dist/inference/interruption/types.d.cts +2 -2
  56. package/dist/inference/interruption/types.d.ts +2 -2
  57. package/dist/inference/interruption/types.d.ts.map +1 -1
  58. package/dist/inference/interruption/ws_transport.cjs +60 -47
  59. package/dist/inference/interruption/ws_transport.cjs.map +1 -1
  60. package/dist/inference/interruption/ws_transport.d.ts.map +1 -1
  61. package/dist/inference/interruption/ws_transport.js +60 -47
  62. package/dist/inference/interruption/ws_transport.js.map +1 -1
  63. package/dist/inference/llm.cjs.map +1 -1
  64. package/dist/inference/llm.d.cts +1 -1
  65. package/dist/inference/llm.d.ts +1 -1
  66. package/dist/inference/llm.d.ts.map +1 -1
  67. package/dist/inference/llm.js.map +1 -1
  68. package/dist/inference/stt.cjs +20 -12
  69. package/dist/inference/stt.cjs.map +1 -1
  70. package/dist/inference/stt.d.cts +3 -2
  71. package/dist/inference/stt.d.ts +3 -2
  72. package/dist/inference/stt.d.ts.map +1 -1
  73. package/dist/inference/stt.js +20 -12
  74. package/dist/inference/stt.js.map +1 -1
  75. package/dist/inference/stt.test.cjs +14 -0
  76. package/dist/inference/stt.test.cjs.map +1 -1
  77. package/dist/inference/stt.test.js +14 -0
  78. package/dist/inference/stt.test.js.map +1 -1
  79. package/dist/inference/tts.cjs +13 -4
  80. package/dist/inference/tts.cjs.map +1 -1
  81. package/dist/inference/tts.d.cts +8 -1
  82. package/dist/inference/tts.d.ts +8 -1
  83. package/dist/inference/tts.d.ts.map +1 -1
  84. package/dist/inference/tts.js +13 -4
  85. package/dist/inference/tts.js.map +1 -1
  86. package/dist/inference/tts.test.cjs +10 -0
  87. package/dist/inference/tts.test.cjs.map +1 -1
  88. package/dist/inference/tts.test.js +10 -0
  89. package/dist/inference/tts.test.js.map +1 -1
  90. package/dist/ipc/job_proc_lazy_main.cjs +41 -23
  91. package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
  92. package/dist/ipc/job_proc_lazy_main.js +41 -23
  93. package/dist/ipc/job_proc_lazy_main.js.map +1 -1
  94. package/dist/job.cjs +1 -1
  95. package/dist/job.cjs.map +1 -1
  96. package/dist/job.js +1 -1
  97. package/dist/job.js.map +1 -1
  98. package/dist/language.cjs +394 -0
  99. package/dist/language.cjs.map +1 -0
  100. package/dist/language.d.cts +15 -0
  101. package/dist/language.d.ts +15 -0
  102. package/dist/language.d.ts.map +1 -0
  103. package/dist/language.js +363 -0
  104. package/dist/language.js.map +1 -0
  105. package/dist/language.test.cjs +43 -0
  106. package/dist/language.test.cjs.map +1 -0
  107. package/dist/language.test.js +49 -0
  108. package/dist/language.test.js.map +1 -0
  109. package/dist/llm/index.cjs +2 -0
  110. package/dist/llm/index.cjs.map +1 -1
  111. package/dist/llm/index.d.cts +1 -1
  112. package/dist/llm/index.d.ts +1 -1
  113. package/dist/llm/index.d.ts.map +1 -1
  114. package/dist/llm/index.js +2 -0
  115. package/dist/llm/index.js.map +1 -1
  116. package/dist/stream/deferred_stream.cjs +6 -2
  117. package/dist/stream/deferred_stream.cjs.map +1 -1
  118. package/dist/stream/deferred_stream.d.ts.map +1 -1
  119. package/dist/stream/deferred_stream.js +6 -2
  120. package/dist/stream/deferred_stream.js.map +1 -1
  121. package/dist/stt/stt.cjs.map +1 -1
  122. package/dist/stt/stt.d.cts +2 -1
  123. package/dist/stt/stt.d.ts +2 -1
  124. package/dist/stt/stt.d.ts.map +1 -1
  125. package/dist/stt/stt.js.map +1 -1
  126. package/dist/utils.cjs +15 -0
  127. package/dist/utils.cjs.map +1 -1
  128. package/dist/utils.d.cts +8 -0
  129. package/dist/utils.d.ts +8 -0
  130. package/dist/utils.d.ts.map +1 -1
  131. package/dist/utils.js +13 -0
  132. package/dist/utils.js.map +1 -1
  133. package/dist/version.cjs +1 -1
  134. package/dist/version.js +1 -1
  135. package/dist/voice/agent.cjs +14 -17
  136. package/dist/voice/agent.cjs.map +1 -1
  137. package/dist/voice/agent.d.cts +10 -11
  138. package/dist/voice/agent.d.ts +10 -11
  139. package/dist/voice/agent.d.ts.map +1 -1
  140. package/dist/voice/agent.js +15 -18
  141. package/dist/voice/agent.js.map +1 -1
  142. package/dist/voice/agent.test.cjs +194 -0
  143. package/dist/voice/agent.test.cjs.map +1 -1
  144. package/dist/voice/agent.test.js +195 -1
  145. package/dist/voice/agent.test.js.map +1 -1
  146. package/dist/voice/agent_activity.cjs +116 -39
  147. package/dist/voice/agent_activity.cjs.map +1 -1
  148. package/dist/voice/agent_activity.d.cts +2 -0
  149. package/dist/voice/agent_activity.d.ts +2 -0
  150. package/dist/voice/agent_activity.d.ts.map +1 -1
  151. package/dist/voice/agent_activity.js +117 -40
  152. package/dist/voice/agent_activity.js.map +1 -1
  153. package/dist/voice/agent_activity.test.cjs +135 -0
  154. package/dist/voice/agent_activity.test.cjs.map +1 -0
  155. package/dist/voice/agent_activity.test.js +134 -0
  156. package/dist/voice/agent_activity.test.js.map +1 -0
  157. package/dist/voice/agent_session.cjs +38 -38
  158. package/dist/voice/agent_session.cjs.map +1 -1
  159. package/dist/voice/agent_session.d.cts +65 -56
  160. package/dist/voice/agent_session.d.ts +65 -56
  161. package/dist/voice/agent_session.d.ts.map +1 -1
  162. package/dist/voice/agent_session.js +37 -37
  163. package/dist/voice/agent_session.js.map +1 -1
  164. package/dist/voice/audio_recognition.cjs +106 -52
  165. package/dist/voice/audio_recognition.cjs.map +1 -1
  166. package/dist/voice/audio_recognition.d.cts +4 -2
  167. package/dist/voice/audio_recognition.d.ts +4 -2
  168. package/dist/voice/audio_recognition.d.ts.map +1 -1
  169. package/dist/voice/audio_recognition.js +106 -52
  170. package/dist/voice/audio_recognition.js.map +1 -1
  171. package/dist/voice/audio_recognition_span.test.cjs +84 -22
  172. package/dist/voice/audio_recognition_span.test.cjs.map +1 -1
  173. package/dist/voice/audio_recognition_span.test.js +90 -23
  174. package/dist/voice/audio_recognition_span.test.js.map +1 -1
  175. package/dist/voice/events.cjs +1 -1
  176. package/dist/voice/events.cjs.map +1 -1
  177. package/dist/voice/events.d.cts +4 -3
  178. package/dist/voice/events.d.ts +4 -3
  179. package/dist/voice/events.d.ts.map +1 -1
  180. package/dist/voice/events.js +1 -1
  181. package/dist/voice/events.js.map +1 -1
  182. package/dist/voice/index.cjs +9 -1
  183. package/dist/voice/index.cjs.map +1 -1
  184. package/dist/voice/index.d.cts +1 -1
  185. package/dist/voice/index.d.ts +1 -1
  186. package/dist/voice/index.d.ts.map +1 -1
  187. package/dist/voice/index.js +10 -1
  188. package/dist/voice/index.js.map +1 -1
  189. package/dist/voice/remote_session.cjs +922 -0
  190. package/dist/voice/remote_session.cjs.map +1 -0
  191. package/dist/voice/remote_session.d.cts +108 -0
  192. package/dist/voice/remote_session.d.ts +108 -0
  193. package/dist/voice/remote_session.d.ts.map +1 -0
  194. package/dist/voice/remote_session.js +887 -0
  195. package/dist/voice/remote_session.js.map +1 -0
  196. package/dist/voice/report.cjs +11 -10
  197. package/dist/voice/report.cjs.map +1 -1
  198. package/dist/voice/report.d.cts +5 -3
  199. package/dist/voice/report.d.ts +5 -3
  200. package/dist/voice/report.d.ts.map +1 -1
  201. package/dist/voice/report.js +11 -10
  202. package/dist/voice/report.js.map +1 -1
  203. package/dist/voice/report.test.cjs +15 -0
  204. package/dist/voice/report.test.cjs.map +1 -1
  205. package/dist/voice/report.test.js +15 -0
  206. package/dist/voice/report.test.js.map +1 -1
  207. package/dist/voice/room_io/room_io.cjs +39 -0
  208. package/dist/voice/room_io/room_io.cjs.map +1 -1
  209. package/dist/voice/room_io/room_io.d.cts +3 -1
  210. package/dist/voice/room_io/room_io.d.ts +3 -1
  211. package/dist/voice/room_io/room_io.d.ts.map +1 -1
  212. package/dist/voice/room_io/room_io.js +40 -1
  213. package/dist/voice/room_io/room_io.js.map +1 -1
  214. package/dist/voice/turn_config/interruption.cjs.map +1 -1
  215. package/dist/voice/turn_config/interruption.d.cts +1 -1
  216. package/dist/voice/turn_config/interruption.d.ts +1 -1
  217. package/dist/voice/turn_config/interruption.d.ts.map +1 -1
  218. package/dist/voice/turn_config/interruption.js.map +1 -1
  219. package/dist/voice/turn_config/utils.cjs +95 -35
  220. package/dist/voice/turn_config/utils.cjs.map +1 -1
  221. package/dist/voice/turn_config/utils.d.cts +17 -5
  222. package/dist/voice/turn_config/utils.d.ts +17 -5
  223. package/dist/voice/turn_config/utils.d.ts.map +1 -1
  224. package/dist/voice/turn_config/utils.js +93 -35
  225. package/dist/voice/turn_config/utils.js.map +1 -1
  226. package/dist/voice/turn_config/utils.test.cjs +83 -41
  227. package/dist/voice/turn_config/utils.test.cjs.map +1 -1
  228. package/dist/voice/turn_config/utils.test.js +84 -42
  229. package/dist/voice/turn_config/utils.test.js.map +1 -1
  230. package/dist/worker.cjs +6 -29
  231. package/dist/worker.cjs.map +1 -1
  232. package/dist/worker.d.ts.map +1 -1
  233. package/dist/worker.js +6 -19
  234. package/dist/worker.js.map +1 -1
  235. package/package.json +3 -2
  236. package/src/cli.ts +2 -0
  237. package/src/constants.ts +1 -0
  238. package/src/cpu.test.ts +239 -0
  239. package/src/cpu.ts +173 -0
  240. package/src/index.ts +13 -15
  241. package/src/inference/interruption/defaults.ts +1 -1
  242. package/src/inference/interruption/http_transport.ts +49 -30
  243. package/src/inference/interruption/interruption_detector.ts +22 -6
  244. package/src/inference/interruption/interruption_stream.ts +4 -4
  245. package/src/inference/interruption/types.ts +2 -2
  246. package/src/inference/interruption/ws_transport.ts +63 -59
  247. package/src/inference/llm.ts +3 -1
  248. package/src/inference/stt.test.ts +17 -0
  249. package/src/inference/stt.ts +22 -14
  250. package/src/inference/tts.test.ts +12 -0
  251. package/src/inference/tts.ts +22 -6
  252. package/src/ipc/job_proc_lazy_main.ts +44 -24
  253. package/src/job.ts +1 -1
  254. package/src/language.test.ts +62 -0
  255. package/src/language.ts +380 -0
  256. package/src/llm/index.ts +2 -0
  257. package/src/stream/deferred_stream.ts +5 -1
  258. package/src/stt/stt.ts +2 -1
  259. package/src/utils.ts +20 -0
  260. package/src/voice/agent.test.ts +208 -1
  261. package/src/voice/agent.ts +21 -22
  262. package/src/voice/agent_activity.test.ts +194 -0
  263. package/src/voice/agent_activity.ts +161 -43
  264. package/src/voice/agent_session.ts +103 -92
  265. package/src/voice/audio_recognition.ts +124 -61
  266. package/src/voice/audio_recognition_span.test.ts +115 -35
  267. package/src/voice/events.ts +4 -3
  268. package/src/voice/index.ts +10 -1
  269. package/src/voice/remote_session.ts +1083 -0
  270. package/src/voice/report.test.ts +22 -3
  271. package/src/voice/report.ts +31 -14
  272. package/src/voice/room_io/room_io.ts +52 -2
  273. package/src/voice/turn_config/interruption.ts +1 -1
  274. package/src/voice/turn_config/utils.test.ts +91 -43
  275. package/src/voice/turn_config/utils.ts +120 -56
  276. package/src/worker.ts +34 -50
  277. package/dist/voice/client_events.cjs +0 -554
  278. package/dist/voice/client_events.cjs.map +0 -1
  279. package/dist/voice/client_events.d.cts +0 -195
  280. package/dist/voice/client_events.d.ts +0 -195
  281. package/dist/voice/client_events.d.ts.map +0 -1
  282. package/dist/voice/client_events.js +0 -548
  283. package/dist/voice/client_events.js.map +0 -1
  284. package/dist/voice/wire_format.cjs +0 -798
  285. package/dist/voice/wire_format.cjs.map +0 -1
  286. package/dist/voice/wire_format.d.cts +0 -5503
  287. package/dist/voice/wire_format.d.ts +0 -5503
  288. package/dist/voice/wire_format.d.ts.map +0 -1
  289. package/dist/voice/wire_format.js +0 -728
  290. package/dist/voice/wire_format.js.map +0 -1
  291. package/src/voice/client_events.ts +0 -838
  292. package/src/voice/wire_format.ts +0 -827
@@ -1,728 +0,0 @@
1
- import { z } from "zod";
2
- function msToS(ms) {
3
- return ms / 1e3;
4
- }
5
- function omitUndefined(obj) {
6
- const result = {};
7
- for (const [k, v] of Object.entries(obj)) {
8
- if (v !== void 0) {
9
- result[k] = v;
10
- }
11
- }
12
- return result;
13
- }
14
- function imageContentToWire(img) {
15
- return omitUndefined({
16
- id: img.id,
17
- type: img.type,
18
- image: typeof img.image === "string" ? img.image : void 0,
19
- inference_detail: img.inferenceDetail,
20
- inference_width: img.inferenceWidth,
21
- inference_height: img.inferenceHeight,
22
- mime_type: img.mimeType
23
- });
24
- }
25
- function audioContentToWire(audio) {
26
- return omitUndefined({
27
- type: audio.type,
28
- transcript: audio.transcript
29
- });
30
- }
31
- function chatContentToWire(content) {
32
- if (typeof content === "string") return content;
33
- if (content.type === "image_content") return imageContentToWire(content);
34
- return audioContentToWire(content);
35
- }
36
- function metricsReportToWire(m) {
37
- return omitUndefined({
38
- started_speaking_at: m.startedSpeakingAt,
39
- stopped_speaking_at: m.stoppedSpeakingAt,
40
- transcription_delay: m.transcriptionDelay,
41
- end_of_turn_delay: m.endOfTurnDelay,
42
- on_user_turn_completed_delay: m.onUserTurnCompletedDelay,
43
- llm_node_ttft: m.llmNodeTtft,
44
- tts_node_ttfb: m.ttsNodeTtfb,
45
- e2e_latency: m.e2eLatency
46
- });
47
- }
48
- function chatMessageToWire(msg) {
49
- const result = {
50
- id: msg.id,
51
- type: msg.type,
52
- role: msg.role,
53
- content: msg.content.map(chatContentToWire),
54
- interrupted: msg.interrupted,
55
- created_at: msToS(msg.createdAt)
56
- };
57
- if (msg.transcriptConfidence !== void 0) {
58
- result.transcript_confidence = msg.transcriptConfidence;
59
- }
60
- if (Object.keys(msg.metrics).length > 0) {
61
- result.metrics = metricsReportToWire(msg.metrics);
62
- }
63
- if (Object.keys(msg.extra).length > 0) {
64
- result.extra = msg.extra;
65
- }
66
- return result;
67
- }
68
- function functionCallToWire(fc) {
69
- const result = {
70
- id: fc.id,
71
- type: fc.type,
72
- call_id: fc.callId,
73
- arguments: fc.args,
74
- name: fc.name,
75
- created_at: msToS(fc.createdAt)
76
- };
77
- if (Object.keys(fc.extra).length > 0) {
78
- result.extra = fc.extra;
79
- }
80
- if (fc.groupId !== void 0) {
81
- result.group_id = fc.groupId;
82
- }
83
- return result;
84
- }
85
- function functionCallOutputToWire(fco) {
86
- return {
87
- id: fco.id,
88
- type: fco.type,
89
- name: fco.name,
90
- call_id: fco.callId,
91
- output: fco.output,
92
- is_error: fco.isError,
93
- created_at: msToS(fco.createdAt)
94
- };
95
- }
96
- function agentHandoffToWire(ah) {
97
- const result = {
98
- id: ah.id,
99
- type: ah.type,
100
- new_agent_id: ah.newAgentId,
101
- created_at: msToS(ah.createdAt)
102
- };
103
- if (ah.oldAgentId !== void 0) {
104
- result.old_agent_id = ah.oldAgentId;
105
- }
106
- return result;
107
- }
108
- function chatItemToWire(item) {
109
- switch (item.type) {
110
- case "message":
111
- return chatMessageToWire(item);
112
- case "function_call":
113
- return functionCallToWire(item);
114
- case "function_call_output":
115
- return functionCallOutputToWire(item);
116
- case "agent_handoff":
117
- return agentHandoffToWire(item);
118
- }
119
- }
120
- function metadataToWire(m) {
121
- if (!m) return null;
122
- return omitUndefined({
123
- model_name: m.modelName,
124
- model_provider: m.modelProvider
125
- });
126
- }
127
- function llmMetricsToWire(m) {
128
- return omitUndefined({
129
- type: m.type,
130
- label: m.label,
131
- request_id: m.requestId,
132
- timestamp: msToS(m.timestamp),
133
- duration: msToS(m.durationMs),
134
- ttft: msToS(m.ttftMs),
135
- cancelled: m.cancelled,
136
- completion_tokens: m.completionTokens,
137
- prompt_tokens: m.promptTokens,
138
- prompt_cached_tokens: m.promptCachedTokens,
139
- total_tokens: m.totalTokens,
140
- tokens_per_second: m.tokensPerSecond,
141
- speech_id: m.speechId,
142
- metadata: metadataToWire(m.metadata)
143
- });
144
- }
145
- function sttMetricsToWire(m) {
146
- return omitUndefined({
147
- type: m.type,
148
- label: m.label,
149
- request_id: m.requestId,
150
- timestamp: msToS(m.timestamp),
151
- duration: msToS(m.durationMs),
152
- audio_duration: msToS(m.audioDurationMs),
153
- input_tokens: m.inputTokens,
154
- output_tokens: m.outputTokens,
155
- streamed: m.streamed,
156
- metadata: metadataToWire(m.metadata)
157
- });
158
- }
159
- function ttsMetricsToWire(m) {
160
- return omitUndefined({
161
- type: m.type,
162
- label: m.label,
163
- request_id: m.requestId,
164
- timestamp: msToS(m.timestamp),
165
- ttfb: msToS(m.ttfbMs),
166
- duration: msToS(m.durationMs),
167
- audio_duration: msToS(m.audioDurationMs),
168
- cancelled: m.cancelled,
169
- characters_count: m.charactersCount,
170
- input_tokens: m.inputTokens,
171
- output_tokens: m.outputTokens,
172
- streamed: m.streamed,
173
- segment_id: m.segmentId,
174
- speech_id: m.speechId,
175
- metadata: metadataToWire(m.metadata)
176
- });
177
- }
178
- function vadMetricsToWire(m) {
179
- return {
180
- type: m.type,
181
- label: m.label,
182
- timestamp: msToS(m.timestamp),
183
- idle_time: msToS(m.idleTimeMs),
184
- inference_duration_total: msToS(m.inferenceDurationTotalMs),
185
- inference_count: m.inferenceCount
186
- };
187
- }
188
- function eouMetricsToWire(m) {
189
- return omitUndefined({
190
- type: m.type,
191
- timestamp: msToS(m.timestamp),
192
- end_of_utterance_delay: msToS(m.endOfUtteranceDelayMs),
193
- transcription_delay: msToS(m.transcriptionDelayMs),
194
- on_user_turn_completed_delay: msToS(m.onUserTurnCompletedDelayMs),
195
- speech_id: m.speechId
196
- });
197
- }
198
- function cachedTokenDetailsToWire(d) {
199
- return {
200
- audio_tokens: d.audioTokens,
201
- text_tokens: d.textTokens,
202
- image_tokens: d.imageTokens
203
- };
204
- }
205
- function inputTokenDetailsToWire(d) {
206
- return omitUndefined({
207
- audio_tokens: d.audioTokens,
208
- text_tokens: d.textTokens,
209
- image_tokens: d.imageTokens,
210
- cached_tokens: d.cachedTokens,
211
- cached_tokens_details: d.cachedTokensDetails ? cachedTokenDetailsToWire(d.cachedTokensDetails) : void 0
212
- });
213
- }
214
- function outputTokenDetailsToWire(d) {
215
- return {
216
- text_tokens: d.textTokens,
217
- audio_tokens: d.audioTokens,
218
- image_tokens: d.imageTokens
219
- };
220
- }
221
- function realtimeModelMetricsToWire(m) {
222
- return omitUndefined({
223
- type: m.type,
224
- label: m.label,
225
- request_id: m.requestId,
226
- timestamp: msToS(m.timestamp),
227
- duration: msToS(m.durationMs),
228
- session_duration: m.sessionDurationMs !== void 0 ? msToS(m.sessionDurationMs) : void 0,
229
- ttft: msToS(m.ttftMs),
230
- cancelled: m.cancelled,
231
- input_tokens: m.inputTokens,
232
- output_tokens: m.outputTokens,
233
- total_tokens: m.totalTokens,
234
- tokens_per_second: m.tokensPerSecond,
235
- input_token_details: inputTokenDetailsToWire(m.inputTokenDetails),
236
- output_token_details: outputTokenDetailsToWire(m.outputTokenDetails),
237
- metadata: metadataToWire(m.metadata)
238
- });
239
- }
240
- function interruptionMetricsToWire(m) {
241
- return omitUndefined({
242
- type: m.type,
243
- timestamp: msToS(m.timestamp),
244
- total_duration: msToS(m.totalDuration),
245
- prediction_duration: msToS(m.predictionDuration),
246
- detection_delay: msToS(m.detectionDelay),
247
- num_interruptions: m.numInterruptions,
248
- num_backchannels: m.numBackchannels,
249
- num_requests: m.numRequests,
250
- metadata: metadataToWire(m.metadata)
251
- });
252
- }
253
- function agentMetricsToWire(m) {
254
- switch (m.type) {
255
- case "llm_metrics":
256
- return llmMetricsToWire(m);
257
- case "stt_metrics":
258
- return sttMetricsToWire(m);
259
- case "tts_metrics":
260
- return ttsMetricsToWire(m);
261
- case "vad_metrics":
262
- return vadMetricsToWire(m);
263
- case "eou_metrics":
264
- return eouMetricsToWire(m);
265
- case "realtime_model_metrics":
266
- return realtimeModelMetricsToWire(m);
267
- case "interruption_metrics":
268
- return interruptionMetricsToWire(m);
269
- }
270
- }
271
- function llmModelUsageToWire(u) {
272
- return {
273
- type: u.type,
274
- provider: u.provider ?? "",
275
- model: u.model ?? "",
276
- input_tokens: u.inputTokens ?? 0,
277
- input_cached_tokens: u.inputCachedTokens ?? 0,
278
- input_audio_tokens: u.inputAudioTokens ?? 0,
279
- input_cached_audio_tokens: u.inputCachedAudioTokens ?? 0,
280
- input_text_tokens: u.inputTextTokens ?? 0,
281
- input_cached_text_tokens: u.inputCachedTextTokens ?? 0,
282
- input_image_tokens: u.inputImageTokens ?? 0,
283
- input_cached_image_tokens: u.inputCachedImageTokens ?? 0,
284
- output_tokens: u.outputTokens ?? 0,
285
- output_audio_tokens: u.outputAudioTokens ?? 0,
286
- output_text_tokens: u.outputTextTokens ?? 0,
287
- session_duration: msToS(u.sessionDurationMs ?? 0)
288
- };
289
- }
290
- function ttsModelUsageToWire(u) {
291
- return {
292
- type: u.type,
293
- provider: u.provider ?? "",
294
- model: u.model ?? "",
295
- input_tokens: u.inputTokens ?? 0,
296
- output_tokens: u.outputTokens ?? 0,
297
- characters_count: u.charactersCount ?? 0,
298
- audio_duration: msToS(u.audioDurationMs ?? 0)
299
- };
300
- }
301
- function sttModelUsageToWire(u) {
302
- return {
303
- type: u.type,
304
- provider: u.provider ?? "",
305
- model: u.model ?? "",
306
- input_tokens: u.inputTokens ?? 0,
307
- output_tokens: u.outputTokens ?? 0,
308
- audio_duration: msToS(u.audioDurationMs ?? 0)
309
- };
310
- }
311
- function interruptionModelUsageToWire(u) {
312
- return {
313
- type: u.type,
314
- provider: u.provider ?? "",
315
- model: u.model ?? "",
316
- total_requests: u.totalRequests ?? 0
317
- };
318
- }
319
- function modelUsageToWire(u) {
320
- switch (u.type) {
321
- case "llm_usage":
322
- return llmModelUsageToWire(u);
323
- case "tts_usage":
324
- return ttsModelUsageToWire(u);
325
- case "stt_usage":
326
- return sttModelUsageToWire(u);
327
- case "interruption_usage":
328
- return interruptionModelUsageToWire(u);
329
- default:
330
- return u;
331
- }
332
- }
333
- function agentSessionUsageToWire(u) {
334
- return {
335
- model_usage: u.modelUsage.map(modelUsageToWire)
336
- };
337
- }
338
- const imageContentWireSchema = z.object({
339
- id: z.string(),
340
- type: z.literal("image_content"),
341
- image: z.string(),
342
- inference_detail: z.enum(["auto", "high", "low"]).optional(),
343
- inference_width: z.number().optional(),
344
- inference_height: z.number().optional(),
345
- mime_type: z.string().optional()
346
- });
347
- const audioContentWireSchema = z.object({
348
- type: z.literal("audio_content"),
349
- transcript: z.string().nullable().optional()
350
- });
351
- const chatContentWireSchema = z.union([z.string(), imageContentWireSchema, audioContentWireSchema]);
352
- const metricsReportWireSchema = z.object({
353
- started_speaking_at: z.number().optional(),
354
- stopped_speaking_at: z.number().optional(),
355
- transcription_delay: z.number().optional(),
356
- end_of_turn_delay: z.number().optional(),
357
- on_user_turn_completed_delay: z.number().optional(),
358
- llm_node_ttft: z.number().optional(),
359
- tts_node_ttfb: z.number().optional(),
360
- e2e_latency: z.number().optional()
361
- }).optional();
362
- const chatMessageWireSchema = z.object({
363
- id: z.string(),
364
- type: z.literal("message"),
365
- role: z.enum(["developer", "system", "user", "assistant"]),
366
- content: z.array(chatContentWireSchema),
367
- interrupted: z.boolean(),
368
- created_at: z.number(),
369
- transcript_confidence: z.number().optional(),
370
- metrics: metricsReportWireSchema,
371
- extra: z.record(z.string(), z.unknown()).optional()
372
- });
373
- const functionCallWireSchema = z.object({
374
- id: z.string(),
375
- type: z.literal("function_call"),
376
- call_id: z.string(),
377
- arguments: z.string(),
378
- name: z.string(),
379
- created_at: z.number(),
380
- extra: z.record(z.string(), z.unknown()).optional(),
381
- group_id: z.string().optional()
382
- });
383
- const functionCallOutputWireSchema = z.object({
384
- id: z.string(),
385
- type: z.literal("function_call_output"),
386
- name: z.string(),
387
- call_id: z.string(),
388
- output: z.string(),
389
- is_error: z.boolean(),
390
- created_at: z.number()
391
- });
392
- const agentHandoffWireSchema = z.object({
393
- id: z.string(),
394
- type: z.literal("agent_handoff"),
395
- new_agent_id: z.string(),
396
- created_at: z.number(),
397
- old_agent_id: z.string().optional()
398
- });
399
- const chatItemWireSchema = z.discriminatedUnion("type", [
400
- chatMessageWireSchema,
401
- functionCallWireSchema,
402
- functionCallOutputWireSchema,
403
- agentHandoffWireSchema
404
- ]);
405
- const metadataWireSchema = z.object({
406
- model_name: z.string().optional(),
407
- model_provider: z.string().optional()
408
- }).nullable().optional();
409
- const llmMetricsWireSchema = z.object({
410
- type: z.literal("llm_metrics"),
411
- label: z.string(),
412
- request_id: z.string(),
413
- timestamp: z.number(),
414
- duration: z.number(),
415
- ttft: z.number(),
416
- cancelled: z.boolean(),
417
- completion_tokens: z.number(),
418
- prompt_tokens: z.number(),
419
- prompt_cached_tokens: z.number(),
420
- total_tokens: z.number(),
421
- tokens_per_second: z.number(),
422
- speech_id: z.string().nullable().optional(),
423
- metadata: metadataWireSchema
424
- });
425
- const sttMetricsWireSchema = z.object({
426
- type: z.literal("stt_metrics"),
427
- label: z.string(),
428
- request_id: z.string(),
429
- timestamp: z.number(),
430
- duration: z.number(),
431
- audio_duration: z.number(),
432
- input_tokens: z.number().optional(),
433
- output_tokens: z.number().optional(),
434
- streamed: z.boolean(),
435
- metadata: metadataWireSchema
436
- });
437
- const ttsMetricsWireSchema = z.object({
438
- type: z.literal("tts_metrics"),
439
- label: z.string(),
440
- request_id: z.string(),
441
- timestamp: z.number(),
442
- ttfb: z.number(),
443
- duration: z.number(),
444
- audio_duration: z.number(),
445
- cancelled: z.boolean(),
446
- characters_count: z.number(),
447
- input_tokens: z.number().optional(),
448
- output_tokens: z.number().optional(),
449
- streamed: z.boolean(),
450
- segment_id: z.string().nullable().optional(),
451
- speech_id: z.string().nullable().optional(),
452
- metadata: metadataWireSchema
453
- });
454
- const vadMetricsWireSchema = z.object({
455
- type: z.literal("vad_metrics"),
456
- label: z.string(),
457
- timestamp: z.number(),
458
- idle_time: z.number(),
459
- inference_duration_total: z.number(),
460
- inference_count: z.number()
461
- });
462
- const eouMetricsWireSchema = z.object({
463
- type: z.literal("eou_metrics"),
464
- timestamp: z.number(),
465
- end_of_utterance_delay: z.number(),
466
- transcription_delay: z.number(),
467
- on_user_turn_completed_delay: z.number(),
468
- speech_id: z.string().nullable().optional()
469
- });
470
- const cachedTokenDetailsWireSchema = z.object({
471
- audio_tokens: z.number(),
472
- text_tokens: z.number(),
473
- image_tokens: z.number()
474
- });
475
- const inputTokenDetailsWireSchema = z.object({
476
- audio_tokens: z.number(),
477
- text_tokens: z.number(),
478
- image_tokens: z.number(),
479
- cached_tokens: z.number(),
480
- cached_tokens_details: cachedTokenDetailsWireSchema.nullable().optional()
481
- });
482
- const outputTokenDetailsWireSchema = z.object({
483
- text_tokens: z.number(),
484
- audio_tokens: z.number(),
485
- image_tokens: z.number()
486
- });
487
- const realtimeModelMetricsWireSchema = z.object({
488
- type: z.literal("realtime_model_metrics"),
489
- label: z.string(),
490
- request_id: z.string(),
491
- timestamp: z.number(),
492
- duration: z.number(),
493
- session_duration: z.number().optional(),
494
- ttft: z.number(),
495
- cancelled: z.boolean(),
496
- input_tokens: z.number(),
497
- output_tokens: z.number(),
498
- total_tokens: z.number(),
499
- tokens_per_second: z.number(),
500
- input_token_details: inputTokenDetailsWireSchema,
501
- output_token_details: outputTokenDetailsWireSchema,
502
- metadata: metadataWireSchema
503
- });
504
- const interruptionMetricsWireSchema = z.object({
505
- type: z.literal("interruption_metrics"),
506
- timestamp: z.number(),
507
- total_duration: z.number(),
508
- prediction_duration: z.number(),
509
- detection_delay: z.number(),
510
- num_interruptions: z.number(),
511
- num_backchannels: z.number(),
512
- num_requests: z.number(),
513
- metadata: metadataWireSchema
514
- });
515
- const agentMetricsWireSchema = z.discriminatedUnion("type", [
516
- llmMetricsWireSchema,
517
- sttMetricsWireSchema,
518
- ttsMetricsWireSchema,
519
- vadMetricsWireSchema,
520
- eouMetricsWireSchema,
521
- realtimeModelMetricsWireSchema,
522
- interruptionMetricsWireSchema
523
- ]);
524
- const llmModelUsageWireSchema = z.object({
525
- type: z.literal("llm_usage"),
526
- provider: z.string().optional(),
527
- model: z.string().optional(),
528
- input_tokens: z.number().optional(),
529
- input_cached_tokens: z.number().optional(),
530
- input_audio_tokens: z.number().optional(),
531
- input_cached_audio_tokens: z.number().optional(),
532
- input_text_tokens: z.number().optional(),
533
- input_cached_text_tokens: z.number().optional(),
534
- input_image_tokens: z.number().optional(),
535
- input_cached_image_tokens: z.number().optional(),
536
- output_tokens: z.number().optional(),
537
- output_audio_tokens: z.number().optional(),
538
- output_text_tokens: z.number().optional(),
539
- session_duration: z.number().optional()
540
- });
541
- const ttsModelUsageWireSchema = z.object({
542
- type: z.literal("tts_usage"),
543
- provider: z.string().optional(),
544
- model: z.string().optional(),
545
- input_tokens: z.number().optional(),
546
- output_tokens: z.number().optional(),
547
- characters_count: z.number().optional(),
548
- audio_duration: z.number().optional()
549
- });
550
- const sttModelUsageWireSchema = z.object({
551
- type: z.literal("stt_usage"),
552
- provider: z.string().optional(),
553
- model: z.string().optional(),
554
- input_tokens: z.number().optional(),
555
- output_tokens: z.number().optional(),
556
- audio_duration: z.number().optional()
557
- });
558
- const interruptionModelUsageWireSchema = z.object({
559
- type: z.literal("interruption_usage"),
560
- provider: z.string().optional(),
561
- model: z.string().optional(),
562
- total_requests: z.number().optional()
563
- });
564
- const modelUsageWireSchema = z.discriminatedUnion("type", [
565
- llmModelUsageWireSchema,
566
- ttsModelUsageWireSchema,
567
- sttModelUsageWireSchema,
568
- interruptionModelUsageWireSchema
569
- ]);
570
- const agentSessionUsageWireSchema = z.object({
571
- model_usage: z.array(modelUsageWireSchema)
572
- });
573
- const agentStateSchema = z.enum(["initializing", "idle", "listening", "thinking", "speaking"]);
574
- const userStateSchema = z.enum(["speaking", "listening", "away"]);
575
- const clientAgentStateChangedSchema = z.object({
576
- type: z.literal("agent_state_changed"),
577
- old_state: agentStateSchema,
578
- new_state: agentStateSchema,
579
- created_at: z.number()
580
- });
581
- const clientUserStateChangedSchema = z.object({
582
- type: z.literal("user_state_changed"),
583
- old_state: userStateSchema,
584
- new_state: userStateSchema,
585
- created_at: z.number()
586
- });
587
- const clientConversationItemAddedSchema = z.object({
588
- type: z.literal("conversation_item_added"),
589
- item: chatMessageWireSchema,
590
- created_at: z.number()
591
- });
592
- const clientUserInputTranscribedSchema = z.object({
593
- type: z.literal("user_input_transcribed"),
594
- transcript: z.string(),
595
- is_final: z.boolean(),
596
- language: z.string().nullable(),
597
- created_at: z.number()
598
- });
599
- const clientFunctionToolsExecutedSchema = z.object({
600
- type: z.literal("function_tools_executed"),
601
- function_calls: z.array(functionCallWireSchema),
602
- function_call_outputs: z.array(functionCallOutputWireSchema.nullable()),
603
- created_at: z.number()
604
- });
605
- const clientMetricsCollectedSchema = z.object({
606
- type: z.literal("metrics_collected"),
607
- metrics: agentMetricsWireSchema,
608
- created_at: z.number()
609
- });
610
- const clientErrorSchema = z.object({
611
- type: z.literal("error"),
612
- message: z.string(),
613
- created_at: z.number()
614
- });
615
- const clientUserOverlappingSpeechSchema = z.object({
616
- type: z.literal("user_overlapping_speech"),
617
- is_interruption: z.boolean(),
618
- created_at: z.number(),
619
- sent_at: z.number(),
620
- detection_delay: z.number(),
621
- overlap_started_at: z.number().nullable()
622
- });
623
- const clientSessionUsageSchema = z.object({
624
- type: z.literal("session_usage"),
625
- usage: agentSessionUsageWireSchema,
626
- created_at: z.number()
627
- });
628
- const clientEventSchema = z.discriminatedUnion("type", [
629
- clientAgentStateChangedSchema,
630
- clientUserStateChangedSchema,
631
- clientConversationItemAddedSchema,
632
- clientUserInputTranscribedSchema,
633
- clientFunctionToolsExecutedSchema,
634
- clientMetricsCollectedSchema,
635
- clientErrorSchema,
636
- clientUserOverlappingSpeechSchema,
637
- clientSessionUsageSchema
638
- ]);
639
- const sendMessageRequestSchema = z.object({
640
- text: z.string()
641
- });
642
- const streamRequestSchema = z.object({
643
- request_id: z.string(),
644
- method: z.string(),
645
- payload: z.string()
646
- });
647
- const streamResponseSchema = z.object({
648
- request_id: z.string(),
649
- payload: z.string(),
650
- error: z.string().nullable().optional()
651
- });
652
- const getSessionStateResponseSchema = z.object({
653
- agent_state: agentStateSchema,
654
- user_state: userStateSchema,
655
- agent_id: z.string(),
656
- options: z.record(z.string(), z.unknown()),
657
- created_at: z.number()
658
- });
659
- const getChatHistoryResponseSchema = z.object({
660
- items: z.array(chatItemWireSchema)
661
- });
662
- const getAgentInfoResponseSchema = z.object({
663
- id: z.string(),
664
- instructions: z.string().nullable(),
665
- tools: z.array(z.string()),
666
- chat_ctx: z.array(chatItemWireSchema)
667
- });
668
- const sendMessageResponseSchema = z.object({
669
- items: z.array(chatItemWireSchema)
670
- });
671
- const getRTCStatsResponseSchema = z.object({
672
- publisher_stats: z.array(z.record(z.string(), z.unknown())),
673
- subscriber_stats: z.array(z.record(z.string(), z.unknown()))
674
- });
675
- const getSessionUsageResponseSchema = z.object({
676
- usage: agentSessionUsageWireSchema,
677
- created_at: z.number()
678
- });
679
- export {
680
- agentHandoffToWire,
681
- agentHandoffWireSchema,
682
- agentMetricsToWire,
683
- agentMetricsWireSchema,
684
- agentSessionUsageToWire,
685
- agentSessionUsageWireSchema,
686
- chatItemToWire,
687
- chatItemWireSchema,
688
- chatMessageToWire,
689
- chatMessageWireSchema,
690
- clientAgentStateChangedSchema,
691
- clientConversationItemAddedSchema,
692
- clientErrorSchema,
693
- clientEventSchema,
694
- clientFunctionToolsExecutedSchema,
695
- clientMetricsCollectedSchema,
696
- clientSessionUsageSchema,
697
- clientUserInputTranscribedSchema,
698
- clientUserOverlappingSpeechSchema,
699
- clientUserStateChangedSchema,
700
- eouMetricsWireSchema,
701
- functionCallOutputToWire,
702
- functionCallOutputWireSchema,
703
- functionCallToWire,
704
- functionCallWireSchema,
705
- getAgentInfoResponseSchema,
706
- getChatHistoryResponseSchema,
707
- getRTCStatsResponseSchema,
708
- getSessionStateResponseSchema,
709
- getSessionUsageResponseSchema,
710
- interruptionMetricsWireSchema,
711
- interruptionModelUsageWireSchema,
712
- llmMetricsWireSchema,
713
- llmModelUsageWireSchema,
714
- modelUsageToWire,
715
- modelUsageWireSchema,
716
- msToS,
717
- realtimeModelMetricsWireSchema,
718
- sendMessageRequestSchema,
719
- sendMessageResponseSchema,
720
- streamRequestSchema,
721
- streamResponseSchema,
722
- sttMetricsWireSchema,
723
- sttModelUsageWireSchema,
724
- ttsMetricsWireSchema,
725
- ttsModelUsageWireSchema,
726
- vadMetricsWireSchema
727
- };
728
- //# sourceMappingURL=wire_format.js.map