visionclaw 0.1.193 → 0.1.194-dev.feat-backup-progress-reporting.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (352) hide show
  1. package/dist/agent/backup-uploader.d.ts +44 -0
  2. package/dist/agent/backup-uploader.d.ts.map +1 -0
  3. package/dist/agent/backup-uploader.js +83 -0
  4. package/dist/agent/backup-uploader.js.map +1 -0
  5. package/dist/agent/backup.d.ts +7 -1
  6. package/dist/agent/backup.d.ts.map +1 -1
  7. package/dist/agent/backup.js +337 -52
  8. package/dist/agent/backup.js.map +1 -1
  9. package/dist/agent/command-handlers.d.ts +2 -0
  10. package/dist/agent/command-handlers.d.ts.map +1 -1
  11. package/dist/agent/command-handlers.js +29 -3
  12. package/dist/agent/command-handlers.js.map +1 -1
  13. package/dist/agent/context.d.ts +2 -0
  14. package/dist/agent/context.d.ts.map +1 -1
  15. package/dist/agent/context.js +11 -1
  16. package/dist/agent/context.js.map +1 -1
  17. package/dist/agent/data-collector.d.ts.map +1 -1
  18. package/dist/agent/data-collector.js +23 -38
  19. package/dist/agent/data-collector.js.map +1 -1
  20. package/dist/agent/loop.d.ts +3 -0
  21. package/dist/agent/loop.d.ts.map +1 -1
  22. package/dist/agent/loop.js +48 -0
  23. package/dist/agent/loop.js.map +1 -1
  24. package/dist/agent/message-format.d.ts.map +1 -1
  25. package/dist/agent/message-format.js +3 -0
  26. package/dist/agent/message-format.js.map +1 -1
  27. package/dist/agent/status.d.ts.map +1 -1
  28. package/dist/agent/status.js +2 -1
  29. package/dist/agent/status.js.map +1 -1
  30. package/dist/backup.d.ts +4 -1
  31. package/dist/backup.d.ts.map +1 -1
  32. package/dist/backup.js +86 -3
  33. package/dist/backup.js.map +1 -1
  34. package/dist/calendar/google-calendar.d.ts +2 -0
  35. package/dist/calendar/google-calendar.d.ts.map +1 -1
  36. package/dist/calendar/google-calendar.js +3 -0
  37. package/dist/calendar/google-calendar.js.map +1 -1
  38. package/dist/channels/interface.d.ts +9 -0
  39. package/dist/channels/interface.d.ts.map +1 -1
  40. package/dist/channels/manager.d.ts +7 -1
  41. package/dist/channels/manager.d.ts.map +1 -1
  42. package/dist/channels/manager.js +12 -0
  43. package/dist/channels/manager.js.map +1 -1
  44. package/dist/channels/telegram.d.ts.map +1 -1
  45. package/dist/channels/telegram.js +5 -0
  46. package/dist/channels/telegram.js.map +1 -1
  47. package/dist/cli/index.d.ts +1 -0
  48. package/dist/cli/index.d.ts.map +1 -1
  49. package/dist/cli/index.js +42 -2
  50. package/dist/cli/index.js.map +1 -1
  51. package/dist/config/types.d.ts +9 -0
  52. package/dist/config/types.d.ts.map +1 -1
  53. package/dist/config/types.js +6 -0
  54. package/dist/config/types.js.map +1 -1
  55. package/dist/drive/google-drive.d.ts +1 -0
  56. package/dist/drive/google-drive.d.ts.map +1 -1
  57. package/dist/drive/google-drive.js +5 -0
  58. package/dist/drive/google-drive.js.map +1 -1
  59. package/dist/i18n/messages.d.ts +4 -0
  60. package/dist/i18n/messages.d.ts.map +1 -1
  61. package/dist/i18n/messages.js +7 -2
  62. package/dist/i18n/messages.js.map +1 -1
  63. package/dist/index.js.map +1 -1
  64. package/dist/obs/server.d.ts +7 -0
  65. package/dist/obs/server.d.ts.map +1 -1
  66. package/dist/obs/server.js +22 -0
  67. package/dist/obs/server.js.map +1 -1
  68. package/dist/onboarding/bot-profile.d.ts.map +1 -1
  69. package/dist/onboarding/bot-profile.js +1 -0
  70. package/dist/onboarding/bot-profile.js.map +1 -1
  71. package/dist/onboarding/index.d.ts.map +1 -1
  72. package/dist/onboarding/index.js +5 -0
  73. package/dist/onboarding/index.js.map +1 -1
  74. package/dist/onboarding/prepare-mac.d.ts +1 -0
  75. package/dist/onboarding/prepare-mac.d.ts.map +1 -1
  76. package/dist/onboarding/prepare-mac.js +32 -15
  77. package/dist/onboarding/prepare-mac.js.map +1 -1
  78. package/dist/onboarding/setup-shared.d.ts.map +1 -1
  79. package/dist/onboarding/setup-shared.js +6 -0
  80. package/dist/onboarding/setup-shared.js.map +1 -1
  81. package/dist/onboarding/setup-steps.d.ts.map +1 -1
  82. package/dist/onboarding/setup-steps.js +8 -0
  83. package/dist/onboarding/setup-steps.js.map +1 -1
  84. package/dist/realtime/agent-bridge.d.ts +7 -0
  85. package/dist/realtime/agent-bridge.d.ts.map +1 -0
  86. package/dist/realtime/agent-bridge.js +31 -0
  87. package/dist/realtime/agent-bridge.js.map +1 -0
  88. package/dist/realtime/assets/index.html +1058 -0
  89. package/dist/realtime/assets/samples/alloy.mp3 +0 -0
  90. package/dist/realtime/assets/samples/ash.mp3 +0 -0
  91. package/dist/realtime/assets/samples/ballad.mp3 +0 -0
  92. package/dist/realtime/assets/samples/cedar.mp3 +0 -0
  93. package/dist/realtime/assets/samples/coral.mp3 +0 -0
  94. package/dist/realtime/assets/samples/echo.mp3 +0 -0
  95. package/dist/realtime/assets/samples/marin.mp3 +0 -0
  96. package/dist/realtime/assets/samples/sage.mp3 +0 -0
  97. package/dist/realtime/assets/samples/shimmer.mp3 +0 -0
  98. package/dist/realtime/assets/samples/verse.mp3 +0 -0
  99. package/dist/realtime/context.d.ts +14 -0
  100. package/dist/realtime/context.d.ts.map +1 -0
  101. package/dist/realtime/context.js +153 -0
  102. package/dist/realtime/context.js.map +1 -0
  103. package/dist/realtime/http-helpers.d.ts +5 -0
  104. package/dist/realtime/http-helpers.d.ts.map +1 -0
  105. package/dist/realtime/http-helpers.js +29 -0
  106. package/dist/realtime/http-helpers.js.map +1 -0
  107. package/dist/realtime/index.d.ts +62 -0
  108. package/dist/realtime/index.d.ts.map +1 -0
  109. package/dist/realtime/index.js +94 -0
  110. package/dist/realtime/index.js.map +1 -0
  111. package/dist/realtime/server.d.ts +6 -0
  112. package/dist/realtime/server.d.ts.map +1 -0
  113. package/dist/realtime/server.js +476 -0
  114. package/dist/realtime/server.js.map +1 -0
  115. package/dist/realtime/telegram-auth.d.ts +2 -0
  116. package/dist/realtime/telegram-auth.d.ts.map +1 -0
  117. package/dist/realtime/telegram-auth.js +24 -0
  118. package/dist/realtime/telegram-auth.js.map +1 -0
  119. package/dist/realtime/tools.d.ts +829 -0
  120. package/dist/realtime/tools.d.ts.map +1 -0
  121. package/dist/realtime/tools.js +630 -0
  122. package/dist/realtime/tools.js.map +1 -0
  123. package/dist/realtime/types.d.ts +62 -0
  124. package/dist/realtime/types.d.ts.map +1 -0
  125. package/dist/realtime/types.js +3 -0
  126. package/dist/realtime/types.js.map +1 -0
  127. package/dist/realtime/voice-summarizer.d.ts +4 -0
  128. package/dist/realtime/voice-summarizer.d.ts.map +1 -0
  129. package/dist/realtime/voice-summarizer.js +129 -0
  130. package/dist/realtime/voice-summarizer.js.map +1 -0
  131. package/dist/restore.d.ts +5 -0
  132. package/dist/restore.d.ts.map +1 -1
  133. package/dist/restore.js +132 -16
  134. package/dist/restore.js.map +1 -1
  135. package/dist/tools/email.d.ts +1 -1
  136. package/dist/tools/memory.d.ts +13 -0
  137. package/dist/tools/memory.d.ts.map +1 -1
  138. package/dist/tools/memory.js +43 -43
  139. package/dist/tools/memory.js.map +1 -1
  140. package/dist/tools/stock-data.d.ts +16 -0
  141. package/dist/tools/stock-data.d.ts.map +1 -1
  142. package/dist/tools/stock-data.js +36 -38
  143. package/dist/tools/stock-data.js.map +1 -1
  144. package/dist/tools/web-fetch.d.ts +4 -0
  145. package/dist/tools/web-fetch.d.ts.map +1 -1
  146. package/dist/tools/web-fetch.js +95 -23
  147. package/dist/tools/web-fetch.js.map +1 -1
  148. package/dist/tos-storage.d.ts +45 -0
  149. package/dist/tos-storage.d.ts.map +1 -0
  150. package/dist/tos-storage.js +134 -0
  151. package/dist/tos-storage.js.map +1 -0
  152. package/dist-agent/bundle.cjs +170934 -167547
  153. package/package.json +3 -2
  154. package/dist/agent/applied-credential-signature.d.ts +0 -53
  155. package/dist/agent/applied-credential-signature.d.ts.map +0 -1
  156. package/dist/agent/applied-credential-signature.js +0 -137
  157. package/dist/agent/applied-credential-signature.js.map +0 -1
  158. package/dist/agent/engines/claude/cli-resolver.d.ts +0 -16
  159. package/dist/agent/engines/claude/cli-resolver.d.ts.map +0 -1
  160. package/dist/agent/engines/claude/cli-resolver.js +0 -83
  161. package/dist/agent/engines/claude/cli-resolver.js.map +0 -1
  162. package/dist/agent/engines/claude/session-browser-policy.d.ts +0 -9
  163. package/dist/agent/engines/claude/session-browser-policy.d.ts.map +0 -1
  164. package/dist/agent/engines/claude/session-browser-policy.js +0 -49
  165. package/dist/agent/engines/claude/session-browser-policy.js.map +0 -1
  166. package/dist/agent/engines/claude/session.d.ts +0 -291
  167. package/dist/agent/engines/claude/session.d.ts.map +0 -1
  168. package/dist/agent/engines/claude/session.js +0 -1177
  169. package/dist/agent/engines/claude/session.js.map +0 -1
  170. package/dist/agent/engines/client-factory.d.ts +0 -63
  171. package/dist/agent/engines/client-factory.d.ts.map +0 -1
  172. package/dist/agent/engines/client-factory.js +0 -382
  173. package/dist/agent/engines/client-factory.js.map +0 -1
  174. package/dist/agent/engines/engine.d.ts +0 -8
  175. package/dist/agent/engines/engine.d.ts.map +0 -1
  176. package/dist/agent/engines/engine.js +0 -15
  177. package/dist/agent/engines/engine.js.map +0 -1
  178. package/dist/agent/engines/openai/file-session.d.ts +0 -49
  179. package/dist/agent/engines/openai/file-session.d.ts.map +0 -1
  180. package/dist/agent/engines/openai/file-session.js +0 -108
  181. package/dist/agent/engines/openai/file-session.js.map +0 -1
  182. package/dist/agent/engines/openai/file-tools.d.ts +0 -35
  183. package/dist/agent/engines/openai/file-tools.d.ts.map +0 -1
  184. package/dist/agent/engines/openai/file-tools.js +0 -194
  185. package/dist/agent/engines/openai/file-tools.js.map +0 -1
  186. package/dist/agent/engines/openai/session.d.ts +0 -190
  187. package/dist/agent/engines/openai/session.d.ts.map +0 -1
  188. package/dist/agent/engines/openai/session.js +0 -1066
  189. package/dist/agent/engines/openai/session.js.map +0 -1
  190. package/dist/agent/engines/openai/tools.d.ts +0 -13
  191. package/dist/agent/engines/openai/tools.d.ts.map +0 -1
  192. package/dist/agent/engines/openai/tools.js +0 -248
  193. package/dist/agent/engines/openai/tools.js.map +0 -1
  194. package/dist/agent/engines/session-types.d.ts +0 -146
  195. package/dist/agent/engines/session-types.d.ts.map +0 -1
  196. package/dist/agent/engines/session-types.js +0 -2
  197. package/dist/agent/engines/session-types.js.map +0 -1
  198. package/dist/agent/engines/system-prompt-log.d.ts +0 -9
  199. package/dist/agent/engines/system-prompt-log.d.ts.map +0 -1
  200. package/dist/agent/engines/system-prompt-log.js +0 -46
  201. package/dist/agent/engines/system-prompt-log.js.map +0 -1
  202. package/dist/agent/transcript/transcript-backfill.d.ts +0 -54
  203. package/dist/agent/transcript/transcript-backfill.d.ts.map +0 -1
  204. package/dist/agent/transcript/transcript-backfill.js +0 -604
  205. package/dist/agent/transcript/transcript-backfill.js.map +0 -1
  206. package/dist/agent/transcript/transcript-indexer.d.ts +0 -273
  207. package/dist/agent/transcript/transcript-indexer.d.ts.map +0 -1
  208. package/dist/agent/transcript/transcript-indexer.js +0 -1217
  209. package/dist/agent/transcript/transcript-indexer.js.map +0 -1
  210. package/dist/agent/transcript/transcript-memory-migrations.d.ts +0 -25
  211. package/dist/agent/transcript/transcript-memory-migrations.d.ts.map +0 -1
  212. package/dist/agent/transcript/transcript-memory-migrations.js +0 -87
  213. package/dist/agent/transcript/transcript-memory-migrations.js.map +0 -1
  214. package/dist/agent/transcript-memory-migrations.d.ts +0 -25
  215. package/dist/agent/transcript-memory-migrations.d.ts.map +0 -1
  216. package/dist/agent/transcript-memory-migrations.js +0 -87
  217. package/dist/agent/transcript-memory-migrations.js.map +0 -1
  218. package/dist/agent/tunnel-credential-handler.d.ts +0 -90
  219. package/dist/agent/tunnel-credential-handler.d.ts.map +0 -1
  220. package/dist/agent/tunnel-credential-handler.js +0 -162
  221. package/dist/agent/tunnel-credential-handler.js.map +0 -1
  222. package/dist/agent/usage/usage-backfill-handler.d.ts +0 -18
  223. package/dist/agent/usage/usage-backfill-handler.d.ts.map +0 -1
  224. package/dist/agent/usage/usage-backfill-handler.js +0 -69
  225. package/dist/agent/usage/usage-backfill-handler.js.map +0 -1
  226. package/dist/agent/usage/usage-gate.d.ts +0 -25
  227. package/dist/agent/usage/usage-gate.d.ts.map +0 -1
  228. package/dist/agent/usage/usage-gate.js +0 -83
  229. package/dist/agent/usage/usage-gate.js.map +0 -1
  230. package/dist/agent/usage/usage-handler.d.ts +0 -7
  231. package/dist/agent/usage/usage-handler.d.ts.map +0 -1
  232. package/dist/agent/usage/usage-handler.js +0 -28
  233. package/dist/agent/usage/usage-handler.js.map +0 -1
  234. package/dist/agent/usage/usage-report-builder.d.ts +0 -26
  235. package/dist/agent/usage/usage-report-builder.d.ts.map +0 -1
  236. package/dist/agent/usage/usage-report-builder.js +0 -80
  237. package/dist/agent/usage/usage-report-builder.js.map +0 -1
  238. package/dist/agent/usage/usage-report-queue.d.ts +0 -26
  239. package/dist/agent/usage/usage-report-queue.d.ts.map +0 -1
  240. package/dist/agent/usage/usage-report-queue.js +0 -199
  241. package/dist/agent/usage/usage-report-queue.js.map +0 -1
  242. package/dist/agent/usage/usage-report-types.d.ts +0 -41
  243. package/dist/agent/usage/usage-report-types.d.ts.map +0 -1
  244. package/dist/agent/usage/usage-report-types.js +0 -2
  245. package/dist/agent/usage/usage-report-types.js.map +0 -1
  246. package/dist/agent/usage/usage-reporter.d.ts +0 -31
  247. package/dist/agent/usage/usage-reporter.d.ts.map +0 -1
  248. package/dist/agent/usage/usage-reporter.js +0 -102
  249. package/dist/agent/usage/usage-reporter.js.map +0 -1
  250. package/dist/agent/usage-backfill-handler.d.ts +0 -18
  251. package/dist/agent/usage-backfill-handler.d.ts.map +0 -1
  252. package/dist/agent/usage-backfill-handler.js +0 -69
  253. package/dist/agent/usage-backfill-handler.js.map +0 -1
  254. package/dist/agent/usage-gate.d.ts +0 -25
  255. package/dist/agent/usage-gate.d.ts.map +0 -1
  256. package/dist/agent/usage-gate.js +0 -83
  257. package/dist/agent/usage-gate.js.map +0 -1
  258. package/dist/agent/usage-report-builder.d.ts +0 -26
  259. package/dist/agent/usage-report-builder.d.ts.map +0 -1
  260. package/dist/agent/usage-report-builder.js +0 -80
  261. package/dist/agent/usage-report-builder.js.map +0 -1
  262. package/dist/agent/usage-report-queue.d.ts +0 -26
  263. package/dist/agent/usage-report-queue.d.ts.map +0 -1
  264. package/dist/agent/usage-report-queue.js +0 -199
  265. package/dist/agent/usage-report-queue.js.map +0 -1
  266. package/dist/agent/usage-report-types.d.ts +0 -41
  267. package/dist/agent/usage-report-types.d.ts.map +0 -1
  268. package/dist/agent/usage-report-types.js +0 -2
  269. package/dist/agent/usage-report-types.js.map +0 -1
  270. package/dist/agent/usage-reporter.d.ts +0 -31
  271. package/dist/agent/usage-reporter.d.ts.map +0 -1
  272. package/dist/agent/usage-reporter.js +0 -102
  273. package/dist/agent/usage-reporter.js.map +0 -1
  274. package/dist/agent/wake-cycle-tool-tracker.d.ts +0 -39
  275. package/dist/agent/wake-cycle-tool-tracker.d.ts.map +0 -1
  276. package/dist/agent/wake-cycle-tool-tracker.js +0 -72
  277. package/dist/agent/wake-cycle-tool-tracker.js.map +0 -1
  278. package/dist/billing/payg-handler.d.ts +0 -29
  279. package/dist/billing/payg-handler.d.ts.map +0 -1
  280. package/dist/billing/payg-handler.js +0 -92
  281. package/dist/billing/payg-handler.js.map +0 -1
  282. package/dist/billing/payment-handler.d.ts +0 -24
  283. package/dist/billing/payment-handler.d.ts.map +0 -1
  284. package/dist/billing/payment-handler.js +0 -101
  285. package/dist/billing/payment-handler.js.map +0 -1
  286. package/dist/builtin-skills/catalog/phone-adb-automation/SKILL.md +0 -412
  287. package/dist/builtin-skills/catalog/phone-adb-automation/phone_input.sh +0 -132
  288. package/dist/builtin-skills/catalog/phone-adb-automation/phone_launch.sh +0 -166
  289. package/dist/builtin-skills/catalog/phone-adb-automation/phone_screenshot.sh +0 -87
  290. package/dist/builtin-skills/catalog/phone-adb-automation/phone_security_kbd.py +0 -174
  291. package/dist/builtin-skills/catalog/phone-adb-automation/phone_setup.sh +0 -274
  292. package/dist/builtin-skills/catalog/phone-adb-automation/phone_swipe.sh +0 -111
  293. package/dist/builtin-skills/catalog/phone-adb-automation/phone_tap.sh +0 -87
  294. package/dist/builtin-skills/catalog/phone-adb-automation/phone_ui_parse.py +0 -176
  295. package/dist/builtin-skills/catalog/phone-adb-automation/phone_wake_unlock.sh +0 -67
  296. package/dist/builtin-skills/transcribe-audio/SKILL.md +0 -122
  297. package/dist/data-processing/convert-demo-cli.d.ts +0 -7
  298. package/dist/data-processing/convert-demo-cli.d.ts.map +0 -1
  299. package/dist/data-processing/convert-demo-cli.js +0 -30
  300. package/dist/data-processing/convert-demo-cli.js.map +0 -1
  301. package/dist/data-processing/convert-demo.d.ts +0 -26
  302. package/dist/data-processing/convert-demo.d.ts.map +0 -1
  303. package/dist/data-processing/convert-demo.js +0 -233
  304. package/dist/data-processing/convert-demo.js.map +0 -1
  305. package/dist/obs/rdp/icons/icons/app_windows.svg +0 -4
  306. package/dist/obs/rdp/icons/icons/clip_get.svg +0 -4
  307. package/dist/obs/rdp/icons/icons/clip_send.svg +0 -4
  308. package/dist/obs/rdp/icons/icons/clip_shared.svg +0 -4
  309. package/dist/obs/rdp/icons/icons/clipboard.svg +0 -4
  310. package/dist/obs/rdp/icons/icons/clipboard_shared.svg +0 -4
  311. package/dist/obs/rdp/icons/icons/control.svg +0 -4
  312. package/dist/obs/rdp/icons/icons/desktop.svg +0 -4
  313. package/dist/obs/rdp/icons/icons/display.svg +0 -4
  314. package/dist/obs/rdp/icons/icons/launchpad.svg +0 -4
  315. package/dist/obs/rdp/icons/icons/mission_control.svg +0 -4
  316. package/dist/obs/rdp/icons/icons/screenshot.svg +0 -4
  317. package/dist/obs/rdp/icons/icons/zoom_actual.svg +0 -4
  318. package/dist/obs/rdp/icons/icons/zoom_fit.svg +0 -4
  319. package/dist/obs/rdp/icons/icons/zoom_in.svg +0 -4
  320. package/dist/obs/rdp/icons/icons/zoom_out.svg +0 -4
  321. package/dist/obs/tunnel-telemetry.d.ts +0 -46
  322. package/dist/obs/tunnel-telemetry.d.ts.map +0 -1
  323. package/dist/obs/tunnel-telemetry.js +0 -70
  324. package/dist/obs/tunnel-telemetry.js.map +0 -1
  325. package/dist/onboarding/cloudflared-cert.d.ts +0 -15
  326. package/dist/onboarding/cloudflared-cert.d.ts.map +0 -1
  327. package/dist/onboarding/cloudflared-cert.js +0 -57
  328. package/dist/onboarding/cloudflared-cert.js.map +0 -1
  329. package/dist/onboarding/playwriter-extension.d.ts +0 -19
  330. package/dist/onboarding/playwriter-extension.d.ts.map +0 -1
  331. package/dist/onboarding/playwriter-extension.js +0 -246
  332. package/dist/onboarding/playwriter-extension.js.map +0 -1
  333. package/dist/service/gbox-tun.d.ts +0 -14
  334. package/dist/service/gbox-tun.d.ts.map +0 -1
  335. package/dist/service/gbox-tun.js +0 -315
  336. package/dist/service/gbox-tun.js.map +0 -1
  337. package/dist/skills/installed.d.ts +0 -11
  338. package/dist/skills/installed.d.ts.map +0 -1
  339. package/dist/skills/installed.js +0 -35
  340. package/dist/skills/installed.js.map +0 -1
  341. package/dist/tools/coordinate-resolver.d.ts +0 -30
  342. package/dist/tools/coordinate-resolver.d.ts.map +0 -1
  343. package/dist/tools/coordinate-resolver.js +0 -104
  344. package/dist/tools/coordinate-resolver.js.map +0 -1
  345. package/dist/utils/playwriter-relay.d.ts +0 -9
  346. package/dist/utils/playwriter-relay.d.ts.map +0 -1
  347. package/dist/utils/playwriter-relay.js +0 -77
  348. package/dist/utils/playwriter-relay.js.map +0 -1
  349. package/dist/utils/wechat-monitor.d.ts +0 -21
  350. package/dist/utils/wechat-monitor.d.ts.map +0 -1
  351. package/dist/utils/wechat-monitor.js +0 -88
  352. package/dist/utils/wechat-monitor.js.map +0 -1
@@ -0,0 +1,1058 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
6
+ <title>VisionClaw Voice</title>
7
+ <script src="https://telegram.org/js/telegram-web-app.js"></script>
8
+ <style>
9
+ :root {
10
+ --bg: #09090b;
11
+ --surface: #18181b;
12
+ --surface-hover: #27272a;
13
+ --border: #27272a;
14
+ --text: #fafafa;
15
+ --text-secondary: #a1a1aa;
16
+ --text-muted: #71717a;
17
+ --accent: #8b5cf6;
18
+ --accent-light: #a78bfa;
19
+ --accent-glow: rgba(139, 92, 246, 0.25);
20
+ --accent-glow-strong: rgba(139, 92, 246, 0.4);
21
+ --success: #10b981;
22
+ --error: #f43f5e;
23
+ --radius: 16px;
24
+ }
25
+
26
+ * { margin: 0; padding: 0; box-sizing: border-box; }
27
+
28
+ body {
29
+ font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Segoe UI', system-ui, sans-serif;
30
+ background: var(--bg);
31
+ color: var(--text);
32
+ height: 100vh;
33
+ height: 100dvh;
34
+ display: flex;
35
+ flex-direction: column;
36
+ overflow: hidden;
37
+ -webkit-font-smoothing: antialiased;
38
+ }
39
+
40
+ /* ─── Start Screen ─── */
41
+ .start-screen {
42
+ flex: 1;
43
+ display: flex;
44
+ flex-direction: column;
45
+ align-items: center;
46
+ justify-content: center;
47
+ padding: 40px 24px;
48
+ gap: 0;
49
+ }
50
+
51
+ .start-screen.hidden { display: none; }
52
+
53
+ .brand-icon {
54
+ width: 64px;
55
+ height: 64px;
56
+ border-radius: 20px;
57
+ background: linear-gradient(135deg, var(--accent), #6d28d9);
58
+ display: flex;
59
+ align-items: center;
60
+ justify-content: center;
61
+ margin-bottom: 24px;
62
+ box-shadow: 0 8px 32px var(--accent-glow);
63
+ }
64
+
65
+ .brand-icon svg {
66
+ width: 32px;
67
+ height: 32px;
68
+ fill: white;
69
+ }
70
+
71
+ .start-screen h1 {
72
+ font-size: 22px;
73
+ font-weight: 600;
74
+ letter-spacing: -0.3px;
75
+ margin-bottom: 8px;
76
+ }
77
+
78
+ .start-screen .subtitle {
79
+ color: var(--text-muted);
80
+ font-size: 15px;
81
+ margin-bottom: 32px;
82
+ }
83
+
84
+ .start-btn {
85
+ width: 110px;
86
+ height: 110px;
87
+ border-radius: 50%;
88
+ border: none;
89
+ background: linear-gradient(135deg, var(--accent), #7c3aed);
90
+ color: white;
91
+ font-size: 14px;
92
+ font-weight: 600;
93
+ cursor: pointer;
94
+ transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
95
+ display: flex;
96
+ flex-direction: column;
97
+ align-items: center;
98
+ justify-content: center;
99
+ gap: 6px;
100
+ position: relative;
101
+ box-shadow: 0 4px 24px var(--accent-glow), 0 0 0 0 var(--accent-glow);
102
+ }
103
+
104
+ .start-btn svg {
105
+ width: 28px;
106
+ height: 28px;
107
+ fill: white;
108
+ }
109
+
110
+ .start-btn:hover {
111
+ transform: scale(1.05);
112
+ box-shadow: 0 8px 40px var(--accent-glow-strong), 0 0 0 8px rgba(139, 92, 246, 0.08);
113
+ }
114
+
115
+ .start-btn:active {
116
+ transform: scale(0.97);
117
+ }
118
+
119
+ /* Subtle pulse ring on start button */
120
+ .start-btn::before {
121
+ content: '';
122
+ position: absolute;
123
+ inset: -4px;
124
+ border-radius: 50%;
125
+ border: 2px solid var(--accent-light);
126
+ opacity: 0;
127
+ animation: startPulse 3s ease-in-out infinite;
128
+ }
129
+
130
+ @keyframes startPulse {
131
+ 0%, 100% { opacity: 0; transform: scale(1); }
132
+ 50% { opacity: 0.3; transform: scale(1.08); }
133
+ }
134
+
135
+ .footer-text {
136
+ margin-top: 24px;
137
+ color: var(--text-muted);
138
+ font-size: 11px;
139
+ opacity: 0.5;
140
+ }
141
+
142
+ /* ─── Voice Selector (Dropdown) ─── */
143
+ .voice-selector {
144
+ display: flex;
145
+ flex-direction: column;
146
+ align-items: center;
147
+ gap: 10px;
148
+ margin-bottom: 28px;
149
+ width: 100%;
150
+ max-width: 260px;
151
+ }
152
+
153
+ .voice-selector label {
154
+ font-size: 13px;
155
+ color: var(--text-muted);
156
+ font-weight: 500;
157
+ }
158
+
159
+ .voice-dropdown {
160
+ width: 100%;
161
+ padding: 12px 16px;
162
+ border-radius: 12px;
163
+ border: 1.5px solid var(--border);
164
+ background: var(--surface);
165
+ color: var(--text);
166
+ font-size: 15px;
167
+ font-weight: 500;
168
+ cursor: pointer;
169
+ transition: all 0.2s ease;
170
+ appearance: none;
171
+ -webkit-appearance: none;
172
+ background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a1a1aa' d='M6 8L1 3h10z'/%3E%3C/svg%3E");
173
+ background-repeat: no-repeat;
174
+ background-position: right 14px center;
175
+ text-transform: capitalize;
176
+ }
177
+
178
+ .voice-dropdown:hover,
179
+ .voice-dropdown:focus {
180
+ border-color: var(--accent);
181
+ outline: none;
182
+ box-shadow: 0 0 12px var(--accent-glow);
183
+ }
184
+
185
+ .voice-dropdown option {
186
+ background: var(--surface);
187
+ color: var(--text);
188
+ padding: 8px;
189
+ }
190
+
191
+ /* ─── Session View ─── */
192
+ .session-view { display: none; flex-direction: column; height: 100vh; height: 100dvh; }
193
+ .session-view.active { display: flex; }
194
+
195
+ .header {
196
+ padding: 14px 20px;
197
+ display: flex;
198
+ align-items: center;
199
+ gap: 12px;
200
+ background: var(--surface);
201
+ border-bottom: 1px solid var(--border);
202
+ position: relative;
203
+ }
204
+
205
+ .header-avatar {
206
+ width: 34px;
207
+ height: 34px;
208
+ border-radius: 10px;
209
+ background: linear-gradient(135deg, var(--accent), #6d28d9);
210
+ display: flex;
211
+ align-items: center;
212
+ justify-content: center;
213
+ }
214
+
215
+ .header-avatar svg {
216
+ width: 18px;
217
+ height: 18px;
218
+ fill: white;
219
+ }
220
+
221
+ .header .info h1 {
222
+ font-size: 15px;
223
+ font-weight: 600;
224
+ letter-spacing: -0.2px;
225
+ }
226
+
227
+ .header .status {
228
+ font-size: 12px;
229
+ color: var(--text-muted);
230
+ display: flex;
231
+ align-items: center;
232
+ gap: 5px;
233
+ }
234
+
235
+ .header .status .dot {
236
+ width: 6px;
237
+ height: 6px;
238
+ border-radius: 50%;
239
+ background: var(--text-muted);
240
+ transition: background 0.3s ease;
241
+ }
242
+
243
+ .header .status .dot.connected { background: var(--success); }
244
+ .header .status .dot.error { background: var(--error); }
245
+
246
+ .end-btn {
247
+ position: absolute;
248
+ right: 16px;
249
+ top: 50%;
250
+ transform: translateY(-50%);
251
+ background: transparent;
252
+ border: 1px solid var(--border);
253
+ color: var(--text-muted);
254
+ padding: 6px 14px;
255
+ border-radius: 8px;
256
+ font-size: 13px;
257
+ font-weight: 500;
258
+ cursor: pointer;
259
+ transition: all 0.2s ease;
260
+ }
261
+
262
+ .end-btn:hover {
263
+ border-color: var(--error);
264
+ color: var(--error);
265
+ background: rgba(244, 63, 94, 0.06);
266
+ }
267
+
268
+ /* ─── Transcript ─── */
269
+ .transcript {
270
+ flex: 1;
271
+ overflow-y: auto;
272
+ padding: 16px 16px;
273
+ display: flex;
274
+ flex-direction: column;
275
+ gap: 8px;
276
+ scroll-behavior: smooth;
277
+ }
278
+
279
+ .transcript::-webkit-scrollbar { width: 4px; }
280
+ .transcript::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
281
+
282
+ .msg {
283
+ max-width: 82%;
284
+ padding: 10px 14px;
285
+ border-radius: 18px;
286
+ font-size: 14px;
287
+ line-height: 1.5;
288
+ animation: msgIn 0.25s cubic-bezier(0.4, 0, 0.2, 1);
289
+ }
290
+
291
+ @keyframes msgIn {
292
+ from { opacity: 0; transform: translateY(6px) scale(0.97); }
293
+ to { opacity: 1; transform: translateY(0) scale(1); }
294
+ }
295
+
296
+ .msg.user {
297
+ align-self: flex-end;
298
+ background: var(--accent);
299
+ color: white;
300
+ border-bottom-right-radius: 6px;
301
+ }
302
+
303
+ .msg.assistant {
304
+ align-self: flex-start;
305
+ background: var(--surface);
306
+ border: 1px solid var(--border);
307
+ border-bottom-left-radius: 6px;
308
+ }
309
+
310
+ .msg.system {
311
+ align-self: center;
312
+ background: transparent;
313
+ color: var(--text-muted);
314
+ font-size: 12px;
315
+ padding: 6px 12px;
316
+ }
317
+
318
+ .msg.tool {
319
+ align-self: center;
320
+ background: rgba(139, 92, 246, 0.08);
321
+ border: 1px solid rgba(139, 92, 246, 0.2);
322
+ color: var(--accent-light);
323
+ font-size: 12px;
324
+ font-weight: 500;
325
+ padding: 6px 14px;
326
+ border-radius: 20px;
327
+ }
328
+
329
+ .msg.tool.thinking {
330
+ display: flex;
331
+ align-items: center;
332
+ gap: 8px;
333
+ }
334
+
335
+ .msg.tool.thinking .dots {
336
+ display: flex;
337
+ gap: 3px;
338
+ }
339
+
340
+ .msg.tool.thinking .dots span {
341
+ width: 4px;
342
+ height: 4px;
343
+ border-radius: 50%;
344
+ background: var(--accent-light);
345
+ animation: dotBounce 1.4s ease-in-out infinite;
346
+ }
347
+
348
+ .msg.tool.thinking .dots span:nth-child(2) { animation-delay: 0.16s; }
349
+ .msg.tool.thinking .dots span:nth-child(3) { animation-delay: 0.32s; }
350
+
351
+ @keyframes dotBounce {
352
+ 0%, 80%, 100% { opacity: 0.3; }
353
+ 40% { opacity: 1; }
354
+ }
355
+
356
+ /* ─── Controls ─── */
357
+ .controls {
358
+ padding: 20px 20px 28px;
359
+ display: flex;
360
+ flex-direction: column;
361
+ align-items: center;
362
+ gap: 12px;
363
+ background: linear-gradient(to top, var(--bg) 60%, transparent);
364
+ }
365
+
366
+ .mic-btn {
367
+ width: 64px;
368
+ height: 64px;
369
+ border-radius: 50%;
370
+ border: 2px solid var(--border);
371
+ background: var(--surface);
372
+ cursor: pointer;
373
+ display: flex;
374
+ align-items: center;
375
+ justify-content: center;
376
+ transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
377
+ position: relative;
378
+ }
379
+
380
+ .mic-btn svg {
381
+ width: 24px;
382
+ height: 24px;
383
+ fill: var(--text-secondary);
384
+ transition: fill 0.2s ease;
385
+ }
386
+
387
+ .mic-btn:hover {
388
+ border-color: var(--accent);
389
+ background: var(--surface-hover);
390
+ }
391
+
392
+ .mic-btn:hover svg { fill: var(--accent-light); }
393
+
394
+ .mic-btn.active {
395
+ background: var(--accent);
396
+ border-color: var(--accent);
397
+ box-shadow: 0 0 24px var(--accent-glow-strong);
398
+ }
399
+
400
+ .mic-btn.active svg { fill: white; }
401
+
402
+ .mic-btn.active::before {
403
+ content: '';
404
+ position: absolute;
405
+ inset: -6px;
406
+ border-radius: 50%;
407
+ border: 2px solid var(--accent);
408
+ animation: micPulse 1.6s ease-out infinite;
409
+ }
410
+
411
+ @keyframes micPulse {
412
+ 0% { transform: scale(1); opacity: 0.5; }
413
+ 100% { transform: scale(1.4); opacity: 0; }
414
+ }
415
+
416
+ .mic-btn.muted {
417
+ border-color: var(--error);
418
+ }
419
+
420
+ .mic-btn.muted svg { fill: var(--error); }
421
+
422
+ .btn-label {
423
+ font-size: 12px;
424
+ color: var(--text-muted);
425
+ font-weight: 500;
426
+ }
427
+
428
+ /* ─── Overlay ─── */
429
+ .overlay {
430
+ position: fixed;
431
+ inset: 0;
432
+ background: rgba(9, 9, 11, 0.85);
433
+ backdrop-filter: blur(8px);
434
+ -webkit-backdrop-filter: blur(8px);
435
+ display: flex;
436
+ align-items: center;
437
+ justify-content: center;
438
+ z-index: 100;
439
+ }
440
+
441
+ .overlay.hidden { display: none; }
442
+
443
+ .overlay-content {
444
+ text-align: center;
445
+ display: flex;
446
+ flex-direction: column;
447
+ align-items: center;
448
+ gap: 16px;
449
+ }
450
+
451
+ .spinner {
452
+ width: 36px;
453
+ height: 36px;
454
+ border: 3px solid var(--border);
455
+ border-top-color: var(--accent);
456
+ border-radius: 50%;
457
+ animation: spin 0.7s linear infinite;
458
+ }
459
+
460
+ @keyframes spin { to { transform: rotate(360deg); } }
461
+
462
+ .overlay .text {
463
+ color: var(--text-secondary);
464
+ font-size: 14px;
465
+ font-weight: 500;
466
+ }
467
+ </style>
468
+ </head>
469
+ <body>
470
+
471
+ <!-- Start Screen -->
472
+ <div class="start-screen" id="startScreen">
473
+ <div class="brand-icon">
474
+ <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/><path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
475
+ </div>
476
+ <h1>VisionClaw Voice</h1>
477
+ <p class="subtitle" id="greeting">Tap to start a conversation</p>
478
+
479
+ <div class="voice-selector">
480
+ <label>Choose a voice</label>
481
+ <select class="voice-dropdown" id="voiceDropdown">
482
+ <option value="alloy">Alloy</option>
483
+ <option value="ash">Ash</option>
484
+ <option value="ballad">Ballad</option>
485
+ <option value="cedar">Cedar</option>
486
+ <option value="coral" selected>Coral</option>
487
+ <option value="echo">Echo</option>
488
+ <option value="marin">Marin</option>
489
+ <option value="sage">Sage</option>
490
+ <option value="shimmer">Shimmer</option>
491
+ <option value="verse">Verse</option>
492
+ </select>
493
+ </div>
494
+
495
+ <button class="start-btn" id="startBtn" onclick="startSession()">
496
+ <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/><path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
497
+ <span>Start</span>
498
+ </button>
499
+ <p class="footer-text">VisionClaw</p>
500
+ </div>
501
+
502
+ <!-- Session View -->
503
+ <div class="session-view" id="sessionView">
504
+ <div class="header">
505
+ <div class="header-avatar">
506
+ <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/><path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
507
+ </div>
508
+ <div class="info">
509
+ <h1>Voice Assistant</h1>
510
+ <div class="status">
511
+ <span class="dot" id="statusDot"></span>
512
+ <span id="statusText">Connecting...</span>
513
+ </div>
514
+ </div>
515
+ <button class="end-btn" onclick="endSession()">End</button>
516
+ </div>
517
+
518
+ <div class="transcript" id="transcript"></div>
519
+
520
+ <div class="controls">
521
+ <button class="mic-btn" id="micBtn" onclick="toggleMic()">
522
+ <svg viewBox="0 0 24 24">
523
+ <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/>
524
+ <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/>
525
+ </svg>
526
+ </button>
527
+ <span class="btn-label" id="micLabel">Tap to talk</span>
528
+ </div>
529
+ </div>
530
+
531
+ <!-- Connecting Overlay -->
532
+ <div class="overlay hidden" id="overlay">
533
+ <div class="overlay-content">
534
+ <div class="spinner"></div>
535
+ <div class="text" id="overlayText">Connecting...</div>
536
+ </div>
537
+ </div>
538
+
539
+ <script>
540
+ // ─── Telegram Mini App Integration ──────────────────────────────────────────
541
+ const tg = window.Telegram?.WebApp;
542
+ let tgUser = null;
543
+ let isTelegramMiniApp = false;
544
+
545
+ if (tg && tg.initData) {
546
+ isTelegramMiniApp = true;
547
+ tg.ready();
548
+ tg.expand();
549
+
550
+ tgUser = tg.initDataUnsafe?.user;
551
+ console.log('[Telegram] Mini App loaded, user:', tgUser?.first_name);
552
+
553
+ tg.BackButton.onClick(() => {
554
+ if (isConnected) {
555
+ endSession();
556
+ } else {
557
+ tg.close();
558
+ }
559
+ });
560
+
561
+ if (tgUser?.first_name) {
562
+ document.addEventListener('DOMContentLoaded', () => {
563
+ const el = document.getElementById('greeting');
564
+ if (el) el.textContent = `Hey ${tgUser.first_name}, tap to start`;
565
+ });
566
+ }
567
+ }
568
+
569
+ // ─── Voice Selector (Dropdown with Preview) ────────────────────────────
570
+ let selectedVoice = 'coral';
571
+ let previewAudio = null;
572
+
573
+ function stopPreview() {
574
+ if (previewAudio) {
575
+ previewAudio.pause();
576
+ previewAudio.currentTime = 0;
577
+ previewAudio = null;
578
+ }
579
+ }
580
+
581
+ function playPreview(voice) {
582
+ stopPreview();
583
+ previewAudio = new Audio(`/realtime/samples/${voice}.mp3`);
584
+ previewAudio.addEventListener('ended', () => { previewAudio = null; });
585
+ previewAudio.addEventListener('error', () => { previewAudio = null; });
586
+ previewAudio.play().catch(() => {});
587
+ }
588
+
589
+ document.addEventListener('DOMContentLoaded', () => {
590
+ const dropdown = document.getElementById('voiceDropdown');
591
+ if (dropdown) {
592
+ dropdown.addEventListener('change', (e) => {
593
+ selectedVoice = e.target.value;
594
+ console.log('[Voice] Selected:', selectedVoice);
595
+ playPreview(selectedVoice);
596
+ });
597
+ }
598
+ });
599
+
600
+ // ─── State ─────────────────────────────────────────────────────────────────
601
+ let pc = null;
602
+ let dc = null;
603
+ let audioEl = null;
604
+ let micStream = null;
605
+ let isMicMuted = false;
606
+ let isConnected = false;
607
+ let sessionReady = false;
608
+ let realtimeSessionId = null;
609
+ let realtimeSessionSecret = null;
610
+
611
+ function sessionQuery() {
612
+ if (!realtimeSessionId || !realtimeSessionSecret) return '';
613
+ return `sessionId=${encodeURIComponent(realtimeSessionId)}&sessionSecret=${encodeURIComponent(realtimeSessionSecret)}`;
614
+ }
615
+
616
+ function sessionPayload(extra = {}) {
617
+ return { ...extra, sessionId: realtimeSessionId, sessionSecret: realtimeSessionSecret };
618
+ }
619
+
620
+ async function verifyTelegramOwner() {
621
+ if (!tg?.initData) {
622
+ throw new Error('Telegram Mini App authentication is required.');
623
+ }
624
+ const response = await fetch('/realtime/api/verify-telegram', {
625
+ method: 'POST',
626
+ headers: { 'Content-Type': 'application/json' },
627
+ body: JSON.stringify({ initData: tg.initData }),
628
+ });
629
+ if (!response.ok) {
630
+ const data = await response.json().catch(() => ({ error: 'Unauthorized' }));
631
+ throw new Error(data.error || 'Realtime is only available to the owner.');
632
+ }
633
+ }
634
+
635
+ // ─── UI Helpers ────────────────────────────────────────────────────────────
636
+
637
+ function addMessage(text, type = 'system') {
638
+ const transcript = document.getElementById('transcript');
639
+ const msg = document.createElement('div');
640
+ msg.className = `msg ${type}`;
641
+ msg.textContent = text;
642
+ transcript.appendChild(msg);
643
+ transcript.scrollTop = transcript.scrollHeight;
644
+ return msg;
645
+ }
646
+
647
+ function setStatus(text, state = '') {
648
+ document.getElementById('statusText').textContent = text;
649
+ const dot = document.getElementById('statusDot');
650
+ dot.className = 'dot' + (state ? ` ${state}` : '');
651
+ }
652
+
653
+ function showOverlay(text) {
654
+ document.getElementById('overlay').classList.remove('hidden');
655
+ document.getElementById('overlayText').textContent = text;
656
+ }
657
+
658
+ function hideOverlay() {
659
+ document.getElementById('overlay').classList.add('hidden');
660
+ }
661
+
662
+ // ─── Start Session ──────────────────────────────────────────────────────────
663
+
664
+ async function startSession() {
665
+ stopPreview(); // Stop any playing voice preview immediately
666
+ document.getElementById('startScreen').classList.add('hidden');
667
+ document.getElementById('sessionView').classList.add('active');
668
+ showOverlay('Requesting microphone...');
669
+
670
+ if (isTelegramMiniApp) {
671
+ tg.BackButton.show();
672
+ }
673
+
674
+ try {
675
+ showOverlay('Verifying owner...');
676
+ await verifyTelegramOwner();
677
+
678
+ showOverlay('Requesting microphone...');
679
+ micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
680
+
681
+ showOverlay('Setting up connection...');
682
+
683
+ pc = new RTCPeerConnection();
684
+
685
+ audioEl = document.createElement('audio');
686
+ audioEl.autoplay = true;
687
+ pc.ontrack = (e) => {
688
+ audioEl.srcObject = e.streams[0];
689
+ };
690
+
691
+ // Mute mic initially — unmute only after session.updated confirms voice config
692
+ const micTrack = micStream.getTracks()[0];
693
+ micTrack.enabled = false;
694
+ pc.addTrack(micTrack);
695
+
696
+ dc = pc.createDataChannel('oai-events');
697
+ dc.onopen = async () => {
698
+ console.log('[DC] Data channel open');
699
+ isConnected = true;
700
+ sessionStartTime = Date.now();
701
+ transcriptLog = [];
702
+ showOverlay('Configuring voice...');
703
+
704
+ // GA API: session.update via data channel is the ONLY way to configure
705
+ // tools, voice, instructions. This is critical — not optional.
706
+ // Mic stays muted until session.updated is received to prevent
707
+ // the model from responding with default voice before config is applied.
708
+ try {
709
+ const configRes = await fetch(`/realtime/api/session-config?voice=${encodeURIComponent(selectedVoice)}&${sessionQuery()}`);
710
+ const sessionUpdate = await configRes.json();
711
+ dc.send(JSON.stringify(sessionUpdate));
712
+ console.log('[DC] Sent session.update (tools, voice, instructions configured)');
713
+ } catch (err) {
714
+ console.error('[DC] CRITICAL: session.update failed:', err);
715
+ // Unmute mic anyway so session is usable, even with wrong voice
716
+ micTrack.enabled = true;
717
+ hideOverlay();
718
+ setStatus('Connected', 'connected');
719
+ addMessage('Warning: Voice configuration failed. Tools may not work.', 'system');
720
+ }
721
+ };
722
+
723
+ dc.onmessage = (e) => {
724
+ handleRealtimeEvent(JSON.parse(e.data));
725
+ };
726
+
727
+ dc.onclose = () => {
728
+ isConnected = false;
729
+ setStatus('Disconnected', 'error');
730
+ addMessage('Session ended.');
731
+ };
732
+
733
+ showOverlay('Connecting to voice...');
734
+
735
+ const offer = await pc.createOffer();
736
+ await pc.setLocalDescription(offer);
737
+
738
+ if (!tg?.initData) {
739
+ throw new Error('Telegram Mini App authentication is required.');
740
+ }
741
+
742
+ const sdpResponse = await fetch(`/realtime/api/session?voice=${encodeURIComponent(selectedVoice)}`, {
743
+ method: 'POST',
744
+ body: offer.sdp,
745
+ headers: {
746
+ 'Content-Type': 'application/sdp',
747
+ 'X-Telegram-Init-Data': tg.initData,
748
+ },
749
+ });
750
+
751
+ if (!sdpResponse.ok) {
752
+ const errText = await sdpResponse.text();
753
+ throw new Error(`Session failed: ${sdpResponse.status} - ${errText}`);
754
+ }
755
+
756
+ const sessionData = await sdpResponse.json();
757
+ realtimeSessionId = sessionData.sessionId;
758
+ realtimeSessionSecret = sessionData.sessionSecret;
759
+ await pc.setRemoteDescription({ type: 'answer', sdp: sessionData.sdp });
760
+
761
+ console.log('[WebRTC] Connected to OpenAI Realtime API');
762
+
763
+ } catch (err) {
764
+ console.error('[Session] Error:', err);
765
+ hideOverlay();
766
+ setStatus('Error: ' + err.message, 'error');
767
+ addMessage('Error: ' + err.message, 'system');
768
+ }
769
+ }
770
+
771
+ // ─── Handle Realtime API Events ────────────────────────────────────────────
772
+
773
+ let currentAssistantText = '';
774
+ let currentAssistantMsgEl = null;
775
+ let currentUserText = '';
776
+ let currentUserMsgEl = null;
777
+ let transcriptLog = [];
778
+ let sessionStartTime = null;
779
+
780
+ function handleRealtimeEvent(event) {
781
+ // DEBUG: log ALL event types
782
+ console.log('[DC Event]', event.type);
783
+
784
+ switch (event.type) {
785
+ case 'session.created':
786
+ console.log('[Session] Created:', event.session?.id);
787
+ console.log('[Session] Created tools:', event.session?.tools?.length || 0);
788
+ break;
789
+
790
+ case 'session.updated':
791
+ console.log('[Session] Updated:', JSON.stringify(Object.keys(event.session || {})));
792
+ console.log('[Session] Tools registered:', event.session?.tools?.length || 0);
793
+ console.log('[Session] Tool names:', event.session?.tools?.map(t => t.name));
794
+ console.log('[Session] Modalities:', event.session?.modalities);
795
+ // Voice config is now applied — unmute mic and let user speak (only once)
796
+ if (!sessionReady) {
797
+ sessionReady = true;
798
+ if (micStream) {
799
+ micStream.getTracks().forEach(t => { t.enabled = true; });
800
+ console.log('[Session] Mic unmuted — voice config confirmed');
801
+ }
802
+ hideOverlay();
803
+ setStatus('Connected', 'connected');
804
+ addMessage('Session started. Speak naturally!');
805
+ }
806
+ break;
807
+
808
+ case 'conversation.item.input_audio_transcription.delta':
809
+ currentUserText += event.delta || '';
810
+ if (!currentUserMsgEl) {
811
+ currentUserMsgEl = addMessage(currentUserText, 'user');
812
+ } else {
813
+ currentUserMsgEl.textContent = currentUserText;
814
+ }
815
+ break;
816
+
817
+ case 'conversation.item.input_audio_transcription.completed':
818
+ if (event.transcript?.trim()) {
819
+ if (currentUserMsgEl) {
820
+ currentUserMsgEl.textContent = event.transcript.trim();
821
+ } else {
822
+ addMessage(event.transcript.trim(), 'user');
823
+ }
824
+ transcriptLog.push({
825
+ role: 'user',
826
+ text: event.transcript.trim(),
827
+ timestamp: new Date().toISOString(),
828
+ });
829
+ }
830
+ currentUserText = '';
831
+ currentUserMsgEl = null;
832
+ break;
833
+
834
+ case 'conversation.item.input_audio_transcription.failed':
835
+ console.warn('[Transcription] Failed:', event.error);
836
+ currentUserText = '';
837
+ currentUserMsgEl = null;
838
+ break;
839
+
840
+ case 'response.output_audio_transcript.delta':
841
+ currentAssistantText += event.delta || '';
842
+ if (!currentAssistantMsgEl) {
843
+ currentAssistantMsgEl = addMessage(currentAssistantText, 'assistant');
844
+ } else {
845
+ currentAssistantMsgEl.textContent = currentAssistantText;
846
+ }
847
+ break;
848
+
849
+ case 'response.output_audio_transcript.done':
850
+ if (event.transcript?.trim()) {
851
+ if (currentAssistantMsgEl) {
852
+ currentAssistantMsgEl.textContent = event.transcript.trim();
853
+ } else {
854
+ addMessage(event.transcript.trim(), 'assistant');
855
+ }
856
+ transcriptLog.push({
857
+ role: 'assistant',
858
+ text: event.transcript.trim(),
859
+ timestamp: new Date().toISOString(),
860
+ });
861
+ }
862
+ currentAssistantText = '';
863
+ currentAssistantMsgEl = null;
864
+ break;
865
+
866
+ case 'response.function_call_arguments.done':
867
+ handleFunctionCall(event);
868
+ break;
869
+
870
+ case 'response.created':
871
+ break;
872
+
873
+ case 'response.done':
874
+ if (event.response?.usage) {
875
+ console.log('[Usage]', event.response.usage);
876
+ }
877
+ break;
878
+
879
+ case 'conversation.item.created':
880
+ case 'conversation.item.done':
881
+ break;
882
+
883
+ case 'error':
884
+ console.error('[Realtime Error]', JSON.stringify(event.error));
885
+ addMessage(`Error: ${event.error?.message || 'Unknown error'}`, 'system');
886
+ break;
887
+
888
+ case 'input_audio_buffer.speech_started':
889
+ document.getElementById('micBtn').classList.add('active');
890
+ document.getElementById('micLabel').textContent = 'Listening...';
891
+ break;
892
+
893
+ case 'input_audio_buffer.speech_stopped':
894
+ document.getElementById('micBtn').classList.remove('active');
895
+ document.getElementById('micLabel').textContent = isMicMuted ? 'Muted' : 'Tap to mute';
896
+ break;
897
+
898
+ case 'input_audio_buffer.committed':
899
+ case 'input_audio_buffer.timeout_triggered':
900
+ break;
901
+
902
+ default:
903
+ if (event.type?.includes('function') || event.type?.includes('tool')) {
904
+ console.log('[Event] FUNCTION/TOOL related:', event.type, JSON.stringify(event).substring(0, 500));
905
+ } else if (!event.type?.startsWith('response.audio.') &&
906
+ !event.type?.startsWith('response.output_audio_transcript.') &&
907
+ event.type !== 'response.content_part.added' &&
908
+ event.type !== 'response.content_part.done' &&
909
+ event.type !== 'response.output_item.added' &&
910
+ event.type !== 'response.output_item.done') {
911
+ console.log('[Event] Unhandled:', event.type);
912
+ }
913
+ break;
914
+ }
915
+ }
916
+
917
+ // ─── Function Call Handling ─────────────────────────────────────────────────
918
+
919
+ function addToolThinking(name) {
920
+ const transcript = document.getElementById('transcript');
921
+ const msg = document.createElement('div');
922
+ msg.className = 'msg tool thinking';
923
+ const friendlyNames = {
924
+ web_search: 'Searching the web',
925
+ list_calendar_events: 'Checking calendar',
926
+ create_calendar_event: 'Creating event',
927
+ };
928
+ msg.innerHTML = `${friendlyNames[name] || name}<div class="dots"><span></span><span></span><span></span></div>`;
929
+ transcript.appendChild(msg);
930
+ transcript.scrollTop = transcript.scrollHeight;
931
+ return msg;
932
+ }
933
+
934
+ async function handleFunctionCall(event) {
935
+ const { call_id, name, arguments: argsStr } = event;
936
+ console.log(`[FnCall] RECEIVED: ${name}(${argsStr}) call_id=${call_id}`);
937
+ const thinkingMsg = addToolThinking(name);
938
+
939
+ let result;
940
+ try {
941
+ // Use HTTP POST instead of WebSocket for reliability through tunnels
942
+ console.log(`[FnCall] Executing via HTTP POST /api/execute-tool...`);
943
+ const response = await fetch('/realtime/api/execute-tool', {
944
+ method: 'POST',
945
+ headers: { 'Content-Type': 'application/json' },
946
+ body: JSON.stringify(sessionPayload({ name, arguments: argsStr, call_id })),
947
+ });
948
+
949
+ if (!response.ok) {
950
+ const errData = await response.json().catch(() => ({ error: response.statusText }));
951
+ result = JSON.stringify({ error: errData.error || `HTTP ${response.status}` });
952
+ console.error(`[FnCall] HTTP error:`, result);
953
+ } else {
954
+ const data = await response.json();
955
+ result = data.result;
956
+ console.log(`[FnCall] Result for ${name}:`, result?.substring(0, 200));
957
+ }
958
+ } catch (err) {
959
+ result = JSON.stringify({ error: `Network error: ${err.message}` });
960
+ console.error(`[FnCall] Network error:`, err);
961
+ }
962
+
963
+ thinkingMsg.classList.remove('thinking');
964
+ thinkingMsg.innerHTML = `${name} done`;
965
+
966
+ // Send result back to OpenAI via data channel
967
+ dc.send(JSON.stringify({
968
+ type: 'conversation.item.create',
969
+ item: {
970
+ type: 'function_call_output',
971
+ call_id: call_id,
972
+ output: result,
973
+ },
974
+ }));
975
+ dc.send(JSON.stringify({ type: 'response.create' }));
976
+ }
977
+
978
+ // ─── Mic Toggle ────────────────────────────────────────────────────────────
979
+
980
+ function toggleMic() {
981
+ if (!micStream) return;
982
+
983
+ isMicMuted = !isMicMuted;
984
+ micStream.getTracks().forEach((track) => {
985
+ track.enabled = !isMicMuted;
986
+ });
987
+
988
+ const btn = document.getElementById('micBtn');
989
+ const label = document.getElementById('micLabel');
990
+ if (isMicMuted) {
991
+ btn.classList.add('muted');
992
+ label.textContent = 'Muted';
993
+ } else {
994
+ btn.classList.remove('muted');
995
+ label.textContent = 'Tap to mute';
996
+ }
997
+ }
998
+
999
+ // ─── End Session ───────────────────────────────────────────────────────────
1000
+
1001
+ async function endSession() {
1002
+ if (transcriptLog.length > 0) {
1003
+ try {
1004
+ const duration = sessionStartTime ? (Date.now() - sessionStartTime) / 1000 : null;
1005
+ await fetch('/realtime/api/end-session', {
1006
+ method: 'POST',
1007
+ headers: { 'Content-Type': 'application/json' },
1008
+ body: JSON.stringify(sessionPayload({
1009
+ transcript: transcriptLog,
1010
+ duration,
1011
+ })),
1012
+ });
1013
+ console.log(`[Session] Transcript sent (${transcriptLog.length} messages)`);
1014
+ } catch (err) {
1015
+ console.error('[Session] Failed to send transcript:', err);
1016
+ }
1017
+ }
1018
+
1019
+ if (pc) { pc.close(); pc = null; }
1020
+ if (micStream) { micStream.getTracks().forEach((t) => t.stop()); micStream = null; }
1021
+ if (dc) { dc = null; }
1022
+ isConnected = false;
1023
+ isMicMuted = false;
1024
+ currentAssistantText = '';
1025
+ currentAssistantMsgEl = null;
1026
+ currentUserText = '';
1027
+ currentUserMsgEl = null;
1028
+ transcriptLog = [];
1029
+ sessionStartTime = null;
1030
+ sessionReady = false;
1031
+ realtimeSessionId = null;
1032
+ realtimeSessionSecret = null;
1033
+
1034
+ document.getElementById('sessionView').classList.remove('active');
1035
+ document.getElementById('startScreen').classList.remove('hidden');
1036
+
1037
+ if (isTelegramMiniApp) {
1038
+ tg.BackButton.hide();
1039
+ }
1040
+ }
1041
+
1042
+ // ─── Cleanup ───────────────────────────────────────────────────────────────
1043
+
1044
+ window.addEventListener('beforeunload', () => {
1045
+ if (transcriptLog.length > 0) {
1046
+ const duration = sessionStartTime ? (Date.now() - sessionStartTime) / 1000 : null;
1047
+ const payload = JSON.stringify(sessionPayload({
1048
+ transcript: transcriptLog,
1049
+ duration,
1050
+ }));
1051
+ navigator.sendBeacon('/realtime/api/end-session', new Blob([payload], { type: 'application/json' }));
1052
+ }
1053
+ if (pc) pc.close();
1054
+ if (micStream) micStream.getTracks().forEach((t) => t.stop());
1055
+ });
1056
+ </script>
1057
+ </body>
1058
+ </html>