visionclaw 0.1.195-beta.0 → 0.1.195-dev.feat-e2e-test-system.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (345) hide show
  1. package/dist/agent/command-handlers.d.ts.map +1 -1
  2. package/dist/agent/command-handlers.js +17 -0
  3. package/dist/agent/command-handlers.js.map +1 -1
  4. package/dist/builtin-skills/catalog/equity-research/SKILL.md +256 -0
  5. package/dist/builtin-skills/catalog/financial-modeling/SKILL.md +186 -0
  6. package/dist/builtin-skills/catalog/investment-banking/SKILL.md +213 -0
  7. package/dist/builtin-skills/catalog/private-equity/SKILL.md +282 -0
  8. package/dist/builtin-skills/catalog/wealth-management/SKILL.md +252 -0
  9. package/dist/channels/interface.d.ts +9 -0
  10. package/dist/channels/interface.d.ts.map +1 -1
  11. package/dist/channels/manager.d.ts.map +1 -1
  12. package/dist/channels/manager.js +3 -0
  13. package/dist/channels/manager.js.map +1 -1
  14. package/dist/channels/telegram.d.ts.map +1 -1
  15. package/dist/channels/telegram.js +7 -1
  16. package/dist/channels/telegram.js.map +1 -1
  17. package/dist/config/types.d.ts +6 -0
  18. package/dist/config/types.d.ts.map +1 -1
  19. package/dist/config/types.js +10 -0
  20. package/dist/config/types.js.map +1 -1
  21. package/dist/e2e/artifacts.d.ts +8 -0
  22. package/dist/e2e/artifacts.d.ts.map +1 -0
  23. package/dist/e2e/artifacts.js +35 -0
  24. package/dist/e2e/artifacts.js.map +1 -0
  25. package/dist/e2e/cleanup.d.ts +8 -0
  26. package/dist/e2e/cleanup.d.ts.map +1 -0
  27. package/dist/e2e/cleanup.js +108 -0
  28. package/dist/e2e/cleanup.js.map +1 -0
  29. package/dist/e2e/cli.d.ts +4 -0
  30. package/dist/e2e/cli.d.ts.map +1 -0
  31. package/dist/e2e/cli.js +16 -0
  32. package/dist/e2e/cli.js.map +1 -0
  33. package/dist/e2e/index.d.ts +5 -0
  34. package/dist/e2e/index.d.ts.map +1 -0
  35. package/dist/e2e/index.js +4 -0
  36. package/dist/e2e/index.js.map +1 -0
  37. package/dist/e2e/local-test-server.d.ts +7 -0
  38. package/dist/e2e/local-test-server.d.ts.map +1 -0
  39. package/dist/e2e/local-test-server.js +75 -0
  40. package/dist/e2e/local-test-server.js.map +1 -0
  41. package/dist/e2e/oauth-setup-store.d.ts +28 -0
  42. package/dist/e2e/oauth-setup-store.d.ts.map +1 -0
  43. package/dist/e2e/oauth-setup-store.js +56 -0
  44. package/dist/e2e/oauth-setup-store.js.map +1 -0
  45. package/dist/e2e/parser.d.ts +4 -0
  46. package/dist/e2e/parser.d.ts.map +1 -0
  47. package/dist/e2e/parser.js +52 -0
  48. package/dist/e2e/parser.js.map +1 -0
  49. package/dist/e2e/registry.d.ts +3 -0
  50. package/dist/e2e/registry.d.ts.map +1 -0
  51. package/dist/e2e/registry.js +44 -0
  52. package/dist/e2e/registry.js.map +1 -0
  53. package/dist/e2e/reporter.d.ts +6 -0
  54. package/dist/e2e/reporter.d.ts.map +1 -0
  55. package/dist/e2e/reporter.js +56 -0
  56. package/dist/e2e/reporter.js.map +1 -0
  57. package/dist/e2e/runner.d.ts +4 -0
  58. package/dist/e2e/runner.d.ts.map +1 -0
  59. package/dist/e2e/runner.js +116 -0
  60. package/dist/e2e/runner.js.map +1 -0
  61. package/dist/e2e/setup-google-guest.d.ts +19 -0
  62. package/dist/e2e/setup-google-guest.d.ts.map +1 -0
  63. package/dist/e2e/setup-google-guest.js +205 -0
  64. package/dist/e2e/setup-google-guest.js.map +1 -0
  65. package/dist/e2e/suite-utils.d.ts +19 -0
  66. package/dist/e2e/suite-utils.d.ts.map +1 -0
  67. package/dist/e2e/suite-utils.js +60 -0
  68. package/dist/e2e/suite-utils.js.map +1 -0
  69. package/dist/e2e/suites/agent.d.ts +3 -0
  70. package/dist/e2e/suites/agent.d.ts.map +1 -0
  71. package/dist/e2e/suites/agent.js +33 -0
  72. package/dist/e2e/suites/agent.js.map +1 -0
  73. package/dist/e2e/suites/browser.d.ts +3 -0
  74. package/dist/e2e/suites/browser.d.ts.map +1 -0
  75. package/dist/e2e/suites/browser.js +58 -0
  76. package/dist/e2e/suites/browser.js.map +1 -0
  77. package/dist/e2e/suites/cua.d.ts +3 -0
  78. package/dist/e2e/suites/cua.d.ts.map +1 -0
  79. package/dist/e2e/suites/cua.js +68 -0
  80. package/dist/e2e/suites/cua.js.map +1 -0
  81. package/dist/e2e/suites/google.d.ts +3 -0
  82. package/dist/e2e/suites/google.d.ts.map +1 -0
  83. package/dist/e2e/suites/google.js +145 -0
  84. package/dist/e2e/suites/google.js.map +1 -0
  85. package/dist/e2e/suites/memory.d.ts +3 -0
  86. package/dist/e2e/suites/memory.d.ts.map +1 -0
  87. package/dist/e2e/suites/memory.js +50 -0
  88. package/dist/e2e/suites/memory.js.map +1 -0
  89. package/dist/e2e/suites/obs.d.ts +3 -0
  90. package/dist/e2e/suites/obs.d.ts.map +1 -0
  91. package/dist/e2e/suites/obs.js +29 -0
  92. package/dist/e2e/suites/obs.js.map +1 -0
  93. package/dist/e2e/suites/self.d.ts +3 -0
  94. package/dist/e2e/suites/self.d.ts.map +1 -0
  95. package/dist/e2e/suites/self.js +65 -0
  96. package/dist/e2e/suites/self.js.map +1 -0
  97. package/dist/e2e/suites/upgrade.d.ts +3 -0
  98. package/dist/e2e/suites/upgrade.d.ts.map +1 -0
  99. package/dist/e2e/suites/upgrade.js +31 -0
  100. package/dist/e2e/suites/upgrade.js.map +1 -0
  101. package/dist/e2e/types.d.ts +91 -0
  102. package/dist/e2e/types.d.ts.map +1 -0
  103. package/dist/e2e/types.js +2 -0
  104. package/dist/e2e/types.js.map +1 -0
  105. package/dist/index.js.map +1 -1
  106. package/dist/service/daemon.d.ts +1 -0
  107. package/dist/service/daemon.d.ts.map +1 -1
  108. package/dist/service/daemon.js +110 -15
  109. package/dist/service/daemon.js.map +1 -1
  110. package/dist/tools/upgrade.d.ts +8 -0
  111. package/dist/tools/upgrade.d.ts.map +1 -1
  112. package/dist/tools/upgrade.js +64 -8
  113. package/dist/tools/upgrade.js.map +1 -1
  114. package/dist-agent/bundle.cjs +32037 -30064
  115. package/package.json +1 -1
  116. package/dist/agent/applied-credential-signature.d.ts +0 -53
  117. package/dist/agent/applied-credential-signature.d.ts.map +0 -1
  118. package/dist/agent/applied-credential-signature.js +0 -137
  119. package/dist/agent/applied-credential-signature.js.map +0 -1
  120. package/dist/agent/engines/claude/cli-resolver.d.ts +0 -16
  121. package/dist/agent/engines/claude/cli-resolver.d.ts.map +0 -1
  122. package/dist/agent/engines/claude/cli-resolver.js +0 -83
  123. package/dist/agent/engines/claude/cli-resolver.js.map +0 -1
  124. package/dist/agent/engines/claude/session-browser-policy.d.ts +0 -9
  125. package/dist/agent/engines/claude/session-browser-policy.d.ts.map +0 -1
  126. package/dist/agent/engines/claude/session-browser-policy.js +0 -49
  127. package/dist/agent/engines/claude/session-browser-policy.js.map +0 -1
  128. package/dist/agent/engines/claude/session.d.ts +0 -304
  129. package/dist/agent/engines/claude/session.d.ts.map +0 -1
  130. package/dist/agent/engines/claude/session.js +0 -1233
  131. package/dist/agent/engines/claude/session.js.map +0 -1
  132. package/dist/agent/engines/client-factory.d.ts +0 -63
  133. package/dist/agent/engines/client-factory.d.ts.map +0 -1
  134. package/dist/agent/engines/client-factory.js +0 -382
  135. package/dist/agent/engines/client-factory.js.map +0 -1
  136. package/dist/agent/engines/engine-factory.d.ts +0 -5
  137. package/dist/agent/engines/engine-factory.d.ts.map +0 -1
  138. package/dist/agent/engines/engine-factory.js +0 -7
  139. package/dist/agent/engines/engine-factory.js.map +0 -1
  140. package/dist/agent/engines/engine.d.ts +0 -8
  141. package/dist/agent/engines/engine.d.ts.map +0 -1
  142. package/dist/agent/engines/engine.js +0 -15
  143. package/dist/agent/engines/engine.js.map +0 -1
  144. package/dist/agent/engines/openai/file-session.d.ts +0 -19
  145. package/dist/agent/engines/openai/file-session.d.ts.map +0 -1
  146. package/dist/agent/engines/openai/file-session.js +0 -78
  147. package/dist/agent/engines/openai/file-session.js.map +0 -1
  148. package/dist/agent/engines/openai/file-tools.d.ts +0 -35
  149. package/dist/agent/engines/openai/file-tools.d.ts.map +0 -1
  150. package/dist/agent/engines/openai/file-tools.js +0 -194
  151. package/dist/agent/engines/openai/file-tools.js.map +0 -1
  152. package/dist/agent/engines/openai/session.d.ts +0 -55
  153. package/dist/agent/engines/openai/session.d.ts.map +0 -1
  154. package/dist/agent/engines/openai/session.js +0 -447
  155. package/dist/agent/engines/openai/session.js.map +0 -1
  156. package/dist/agent/engines/openai/tools.d.ts +0 -15
  157. package/dist/agent/engines/openai/tools.d.ts.map +0 -1
  158. package/dist/agent/engines/openai/tools.js +0 -221
  159. package/dist/agent/engines/openai/tools.js.map +0 -1
  160. package/dist/agent/engines/pi/session.d.ts +0 -54
  161. package/dist/agent/engines/pi/session.d.ts.map +0 -1
  162. package/dist/agent/engines/pi/session.js +0 -397
  163. package/dist/agent/engines/pi/session.js.map +0 -1
  164. package/dist/agent/engines/pi/tools.d.ts +0 -19
  165. package/dist/agent/engines/pi/tools.d.ts.map +0 -1
  166. package/dist/agent/engines/pi/tools.js +0 -127
  167. package/dist/agent/engines/pi/tools.js.map +0 -1
  168. package/dist/agent/engines/session-types.d.ts +0 -153
  169. package/dist/agent/engines/session-types.d.ts.map +0 -1
  170. package/dist/agent/engines/session-types.js +0 -2
  171. package/dist/agent/engines/session-types.js.map +0 -1
  172. package/dist/agent/engines/system-prompt-log.d.ts +0 -9
  173. package/dist/agent/engines/system-prompt-log.d.ts.map +0 -1
  174. package/dist/agent/engines/system-prompt-log.js +0 -46
  175. package/dist/agent/engines/system-prompt-log.js.map +0 -1
  176. package/dist/agent/model-provider.d.ts +0 -103
  177. package/dist/agent/model-provider.d.ts.map +0 -1
  178. package/dist/agent/model-provider.js +0 -540
  179. package/dist/agent/model-provider.js.map +0 -1
  180. package/dist/agent/transcript/transcript-backfill.d.ts +0 -54
  181. package/dist/agent/transcript/transcript-backfill.d.ts.map +0 -1
  182. package/dist/agent/transcript/transcript-backfill.js +0 -604
  183. package/dist/agent/transcript/transcript-backfill.js.map +0 -1
  184. package/dist/agent/transcript/transcript-indexer.d.ts +0 -273
  185. package/dist/agent/transcript/transcript-indexer.d.ts.map +0 -1
  186. package/dist/agent/transcript/transcript-indexer.js +0 -1217
  187. package/dist/agent/transcript/transcript-indexer.js.map +0 -1
  188. package/dist/agent/transcript/transcript-memory-migrations.d.ts +0 -25
  189. package/dist/agent/transcript/transcript-memory-migrations.d.ts.map +0 -1
  190. package/dist/agent/transcript/transcript-memory-migrations.js +0 -87
  191. package/dist/agent/transcript/transcript-memory-migrations.js.map +0 -1
  192. package/dist/agent/transcript-memory-migrations.d.ts +0 -25
  193. package/dist/agent/transcript-memory-migrations.d.ts.map +0 -1
  194. package/dist/agent/transcript-memory-migrations.js +0 -87
  195. package/dist/agent/transcript-memory-migrations.js.map +0 -1
  196. package/dist/agent/tunnel-credential-handler.d.ts +0 -90
  197. package/dist/agent/tunnel-credential-handler.d.ts.map +0 -1
  198. package/dist/agent/tunnel-credential-handler.js +0 -162
  199. package/dist/agent/tunnel-credential-handler.js.map +0 -1
  200. package/dist/agent/usage/usage-backfill-handler.d.ts +0 -18
  201. package/dist/agent/usage/usage-backfill-handler.d.ts.map +0 -1
  202. package/dist/agent/usage/usage-backfill-handler.js +0 -69
  203. package/dist/agent/usage/usage-backfill-handler.js.map +0 -1
  204. package/dist/agent/usage/usage-gate.d.ts +0 -25
  205. package/dist/agent/usage/usage-gate.d.ts.map +0 -1
  206. package/dist/agent/usage/usage-gate.js +0 -83
  207. package/dist/agent/usage/usage-gate.js.map +0 -1
  208. package/dist/agent/usage/usage-handler.d.ts +0 -7
  209. package/dist/agent/usage/usage-handler.d.ts.map +0 -1
  210. package/dist/agent/usage/usage-handler.js +0 -28
  211. package/dist/agent/usage/usage-handler.js.map +0 -1
  212. package/dist/agent/usage/usage-report-builder.d.ts +0 -26
  213. package/dist/agent/usage/usage-report-builder.d.ts.map +0 -1
  214. package/dist/agent/usage/usage-report-builder.js +0 -80
  215. package/dist/agent/usage/usage-report-builder.js.map +0 -1
  216. package/dist/agent/usage/usage-report-queue.d.ts +0 -26
  217. package/dist/agent/usage/usage-report-queue.d.ts.map +0 -1
  218. package/dist/agent/usage/usage-report-queue.js +0 -199
  219. package/dist/agent/usage/usage-report-queue.js.map +0 -1
  220. package/dist/agent/usage/usage-report-types.d.ts +0 -41
  221. package/dist/agent/usage/usage-report-types.d.ts.map +0 -1
  222. package/dist/agent/usage/usage-report-types.js +0 -2
  223. package/dist/agent/usage/usage-report-types.js.map +0 -1
  224. package/dist/agent/usage/usage-reporter.d.ts +0 -31
  225. package/dist/agent/usage/usage-reporter.d.ts.map +0 -1
  226. package/dist/agent/usage/usage-reporter.js +0 -102
  227. package/dist/agent/usage/usage-reporter.js.map +0 -1
  228. package/dist/agent/usage-backfill-handler.d.ts +0 -18
  229. package/dist/agent/usage-backfill-handler.d.ts.map +0 -1
  230. package/dist/agent/usage-backfill-handler.js +0 -69
  231. package/dist/agent/usage-backfill-handler.js.map +0 -1
  232. package/dist/agent/usage-gate.d.ts +0 -25
  233. package/dist/agent/usage-gate.d.ts.map +0 -1
  234. package/dist/agent/usage-gate.js +0 -83
  235. package/dist/agent/usage-gate.js.map +0 -1
  236. package/dist/agent/usage-report-builder.d.ts +0 -26
  237. package/dist/agent/usage-report-builder.d.ts.map +0 -1
  238. package/dist/agent/usage-report-builder.js +0 -80
  239. package/dist/agent/usage-report-builder.js.map +0 -1
  240. package/dist/agent/usage-report-queue.d.ts +0 -26
  241. package/dist/agent/usage-report-queue.d.ts.map +0 -1
  242. package/dist/agent/usage-report-queue.js +0 -199
  243. package/dist/agent/usage-report-queue.js.map +0 -1
  244. package/dist/agent/usage-report-types.d.ts +0 -41
  245. package/dist/agent/usage-report-types.d.ts.map +0 -1
  246. package/dist/agent/usage-report-types.js +0 -2
  247. package/dist/agent/usage-report-types.js.map +0 -1
  248. package/dist/agent/usage-reporter.d.ts +0 -31
  249. package/dist/agent/usage-reporter.d.ts.map +0 -1
  250. package/dist/agent/usage-reporter.js +0 -102
  251. package/dist/agent/usage-reporter.js.map +0 -1
  252. package/dist/agent/wake-cycle-tool-tracker.d.ts +0 -39
  253. package/dist/agent/wake-cycle-tool-tracker.d.ts.map +0 -1
  254. package/dist/agent/wake-cycle-tool-tracker.js +0 -72
  255. package/dist/agent/wake-cycle-tool-tracker.js.map +0 -1
  256. package/dist/billing/payg-handler.d.ts +0 -29
  257. package/dist/billing/payg-handler.d.ts.map +0 -1
  258. package/dist/billing/payg-handler.js +0 -92
  259. package/dist/billing/payg-handler.js.map +0 -1
  260. package/dist/billing/payment-handler.d.ts +0 -24
  261. package/dist/billing/payment-handler.d.ts.map +0 -1
  262. package/dist/billing/payment-handler.js +0 -101
  263. package/dist/billing/payment-handler.js.map +0 -1
  264. package/dist/builtin-skills/catalog/phone-adb-automation/SKILL.md +0 -412
  265. package/dist/builtin-skills/catalog/phone-adb-automation/phone_input.sh +0 -132
  266. package/dist/builtin-skills/catalog/phone-adb-automation/phone_launch.sh +0 -166
  267. package/dist/builtin-skills/catalog/phone-adb-automation/phone_screenshot.sh +0 -87
  268. package/dist/builtin-skills/catalog/phone-adb-automation/phone_security_kbd.py +0 -174
  269. package/dist/builtin-skills/catalog/phone-adb-automation/phone_setup.sh +0 -274
  270. package/dist/builtin-skills/catalog/phone-adb-automation/phone_swipe.sh +0 -111
  271. package/dist/builtin-skills/catalog/phone-adb-automation/phone_tap.sh +0 -87
  272. package/dist/builtin-skills/catalog/phone-adb-automation/phone_ui_parse.py +0 -176
  273. package/dist/builtin-skills/catalog/phone-adb-automation/phone_wake_unlock.sh +0 -67
  274. package/dist/builtin-skills/transcribe-audio/SKILL.md +0 -122
  275. package/dist/data-processing/convert-demo-cli.d.ts +0 -7
  276. package/dist/data-processing/convert-demo-cli.d.ts.map +0 -1
  277. package/dist/data-processing/convert-demo-cli.js +0 -30
  278. package/dist/data-processing/convert-demo-cli.js.map +0 -1
  279. package/dist/data-processing/convert-demo.d.ts +0 -26
  280. package/dist/data-processing/convert-demo.d.ts.map +0 -1
  281. package/dist/data-processing/convert-demo.js +0 -233
  282. package/dist/data-processing/convert-demo.js.map +0 -1
  283. package/dist/obs/rdp/icons/icons/app_windows.svg +0 -4
  284. package/dist/obs/rdp/icons/icons/clip_get.svg +0 -4
  285. package/dist/obs/rdp/icons/icons/clip_send.svg +0 -4
  286. package/dist/obs/rdp/icons/icons/clip_shared.svg +0 -4
  287. package/dist/obs/rdp/icons/icons/clipboard.svg +0 -4
  288. package/dist/obs/rdp/icons/icons/clipboard_shared.svg +0 -4
  289. package/dist/obs/rdp/icons/icons/control.svg +0 -4
  290. package/dist/obs/rdp/icons/icons/desktop.svg +0 -4
  291. package/dist/obs/rdp/icons/icons/display.svg +0 -4
  292. package/dist/obs/rdp/icons/icons/launchpad.svg +0 -4
  293. package/dist/obs/rdp/icons/icons/mission_control.svg +0 -4
  294. package/dist/obs/rdp/icons/icons/screenshot.svg +0 -4
  295. package/dist/obs/rdp/icons/icons/zoom_actual.svg +0 -4
  296. package/dist/obs/rdp/icons/icons/zoom_fit.svg +0 -4
  297. package/dist/obs/rdp/icons/icons/zoom_in.svg +0 -4
  298. package/dist/obs/rdp/icons/icons/zoom_out.svg +0 -4
  299. package/dist/obs/tunnel-telemetry.d.ts +0 -46
  300. package/dist/obs/tunnel-telemetry.d.ts.map +0 -1
  301. package/dist/obs/tunnel-telemetry.js +0 -70
  302. package/dist/obs/tunnel-telemetry.js.map +0 -1
  303. package/dist/onboarding/cloudflared-cert.d.ts +0 -15
  304. package/dist/onboarding/cloudflared-cert.d.ts.map +0 -1
  305. package/dist/onboarding/cloudflared-cert.js +0 -57
  306. package/dist/onboarding/cloudflared-cert.js.map +0 -1
  307. package/dist/onboarding/playwriter-extension.d.ts +0 -19
  308. package/dist/onboarding/playwriter-extension.d.ts.map +0 -1
  309. package/dist/onboarding/playwriter-extension.js +0 -246
  310. package/dist/onboarding/playwriter-extension.js.map +0 -1
  311. package/dist/realtime/websocket.d.ts +0 -7
  312. package/dist/realtime/websocket.d.ts.map +0 -1
  313. package/dist/realtime/websocket.js +0 -65
  314. package/dist/realtime/websocket.js.map +0 -1
  315. package/dist/service/gbox-tun.d.ts +0 -14
  316. package/dist/service/gbox-tun.d.ts.map +0 -1
  317. package/dist/service/gbox-tun.js +0 -315
  318. package/dist/service/gbox-tun.js.map +0 -1
  319. package/dist/skills/installed.d.ts +0 -11
  320. package/dist/skills/installed.d.ts.map +0 -1
  321. package/dist/skills/installed.js +0 -35
  322. package/dist/skills/installed.js.map +0 -1
  323. package/dist/tools/coordinate-resolver.d.ts +0 -30
  324. package/dist/tools/coordinate-resolver.d.ts.map +0 -1
  325. package/dist/tools/coordinate-resolver.js +0 -104
  326. package/dist/tools/coordinate-resolver.js.map +0 -1
  327. package/dist/utils/playwriter-relay.d.ts +0 -9
  328. package/dist/utils/playwriter-relay.d.ts.map +0 -1
  329. package/dist/utils/playwriter-relay.js +0 -77
  330. package/dist/utils/playwriter-relay.js.map +0 -1
  331. package/dist/utils/wechat-monitor.d.ts +0 -21
  332. package/dist/utils/wechat-monitor.d.ts.map +0 -1
  333. package/dist/utils/wechat-monitor.js +0 -88
  334. package/dist/utils/wechat-monitor.js.map +0 -1
  335. package/dist-agent/realtime/assets/index.html +0 -1058
  336. package/dist-agent/realtime/assets/samples/alloy.mp3 +0 -0
  337. package/dist-agent/realtime/assets/samples/ash.mp3 +0 -0
  338. package/dist-agent/realtime/assets/samples/ballad.mp3 +0 -0
  339. package/dist-agent/realtime/assets/samples/cedar.mp3 +0 -0
  340. package/dist-agent/realtime/assets/samples/coral.mp3 +0 -0
  341. package/dist-agent/realtime/assets/samples/echo.mp3 +0 -0
  342. package/dist-agent/realtime/assets/samples/marin.mp3 +0 -0
  343. package/dist-agent/realtime/assets/samples/sage.mp3 +0 -0
  344. package/dist-agent/realtime/assets/samples/shimmer.mp3 +0 -0
  345. package/dist-agent/realtime/assets/samples/verse.mp3 +0 -0
@@ -1,1058 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
6
- <title>VisionClaw Voice</title>
7
- <script src="https://telegram.org/js/telegram-web-app.js"></script>
8
- <style>
9
- :root {
10
- --bg: #09090b;
11
- --surface: #18181b;
12
- --surface-hover: #27272a;
13
- --border: #27272a;
14
- --text: #fafafa;
15
- --text-secondary: #a1a1aa;
16
- --text-muted: #71717a;
17
- --accent: #8b5cf6;
18
- --accent-light: #a78bfa;
19
- --accent-glow: rgba(139, 92, 246, 0.25);
20
- --accent-glow-strong: rgba(139, 92, 246, 0.4);
21
- --success: #10b981;
22
- --error: #f43f5e;
23
- --radius: 16px;
24
- }
25
-
26
- * { margin: 0; padding: 0; box-sizing: border-box; }
27
-
28
- body {
29
- font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'Segoe UI', system-ui, sans-serif;
30
- background: var(--bg);
31
- color: var(--text);
32
- height: 100vh;
33
- height: 100dvh;
34
- display: flex;
35
- flex-direction: column;
36
- overflow: hidden;
37
- -webkit-font-smoothing: antialiased;
38
- }
39
-
40
- /* ─── Start Screen ─── */
41
- .start-screen {
42
- flex: 1;
43
- display: flex;
44
- flex-direction: column;
45
- align-items: center;
46
- justify-content: center;
47
- padding: 40px 24px;
48
- gap: 0;
49
- }
50
-
51
- .start-screen.hidden { display: none; }
52
-
53
- .brand-icon {
54
- width: 64px;
55
- height: 64px;
56
- border-radius: 20px;
57
- background: linear-gradient(135deg, var(--accent), #6d28d9);
58
- display: flex;
59
- align-items: center;
60
- justify-content: center;
61
- margin-bottom: 24px;
62
- box-shadow: 0 8px 32px var(--accent-glow);
63
- }
64
-
65
- .brand-icon svg {
66
- width: 32px;
67
- height: 32px;
68
- fill: white;
69
- }
70
-
71
- .start-screen h1 {
72
- font-size: 22px;
73
- font-weight: 600;
74
- letter-spacing: -0.3px;
75
- margin-bottom: 8px;
76
- }
77
-
78
- .start-screen .subtitle {
79
- color: var(--text-muted);
80
- font-size: 15px;
81
- margin-bottom: 32px;
82
- }
83
-
84
- .start-btn {
85
- width: 110px;
86
- height: 110px;
87
- border-radius: 50%;
88
- border: none;
89
- background: linear-gradient(135deg, var(--accent), #7c3aed);
90
- color: white;
91
- font-size: 14px;
92
- font-weight: 600;
93
- cursor: pointer;
94
- transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
95
- display: flex;
96
- flex-direction: column;
97
- align-items: center;
98
- justify-content: center;
99
- gap: 6px;
100
- position: relative;
101
- box-shadow: 0 4px 24px var(--accent-glow), 0 0 0 0 var(--accent-glow);
102
- }
103
-
104
- .start-btn svg {
105
- width: 28px;
106
- height: 28px;
107
- fill: white;
108
- }
109
-
110
- .start-btn:hover {
111
- transform: scale(1.05);
112
- box-shadow: 0 8px 40px var(--accent-glow-strong), 0 0 0 8px rgba(139, 92, 246, 0.08);
113
- }
114
-
115
- .start-btn:active {
116
- transform: scale(0.97);
117
- }
118
-
119
- /* Subtle pulse ring on start button */
120
- .start-btn::before {
121
- content: '';
122
- position: absolute;
123
- inset: -4px;
124
- border-radius: 50%;
125
- border: 2px solid var(--accent-light);
126
- opacity: 0;
127
- animation: startPulse 3s ease-in-out infinite;
128
- }
129
-
130
- @keyframes startPulse {
131
- 0%, 100% { opacity: 0; transform: scale(1); }
132
- 50% { opacity: 0.3; transform: scale(1.08); }
133
- }
134
-
135
- .footer-text {
136
- margin-top: 24px;
137
- color: var(--text-muted);
138
- font-size: 11px;
139
- opacity: 0.5;
140
- }
141
-
142
- /* ─── Voice Selector (Dropdown) ─── */
143
- .voice-selector {
144
- display: flex;
145
- flex-direction: column;
146
- align-items: center;
147
- gap: 10px;
148
- margin-bottom: 28px;
149
- width: 100%;
150
- max-width: 260px;
151
- }
152
-
153
- .voice-selector label {
154
- font-size: 13px;
155
- color: var(--text-muted);
156
- font-weight: 500;
157
- }
158
-
159
- .voice-dropdown {
160
- width: 100%;
161
- padding: 12px 16px;
162
- border-radius: 12px;
163
- border: 1.5px solid var(--border);
164
- background: var(--surface);
165
- color: var(--text);
166
- font-size: 15px;
167
- font-weight: 500;
168
- cursor: pointer;
169
- transition: all 0.2s ease;
170
- appearance: none;
171
- -webkit-appearance: none;
172
- background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' viewBox='0 0 12 12'%3E%3Cpath fill='%23a1a1aa' d='M6 8L1 3h10z'/%3E%3C/svg%3E");
173
- background-repeat: no-repeat;
174
- background-position: right 14px center;
175
- text-transform: capitalize;
176
- }
177
-
178
- .voice-dropdown:hover,
179
- .voice-dropdown:focus {
180
- border-color: var(--accent);
181
- outline: none;
182
- box-shadow: 0 0 12px var(--accent-glow);
183
- }
184
-
185
- .voice-dropdown option {
186
- background: var(--surface);
187
- color: var(--text);
188
- padding: 8px;
189
- }
190
-
191
- /* ─── Session View ─── */
192
- .session-view { display: none; flex-direction: column; height: 100vh; height: 100dvh; }
193
- .session-view.active { display: flex; }
194
-
195
- .header {
196
- padding: 14px 20px;
197
- display: flex;
198
- align-items: center;
199
- gap: 12px;
200
- background: var(--surface);
201
- border-bottom: 1px solid var(--border);
202
- position: relative;
203
- }
204
-
205
- .header-avatar {
206
- width: 34px;
207
- height: 34px;
208
- border-radius: 10px;
209
- background: linear-gradient(135deg, var(--accent), #6d28d9);
210
- display: flex;
211
- align-items: center;
212
- justify-content: center;
213
- }
214
-
215
- .header-avatar svg {
216
- width: 18px;
217
- height: 18px;
218
- fill: white;
219
- }
220
-
221
- .header .info h1 {
222
- font-size: 15px;
223
- font-weight: 600;
224
- letter-spacing: -0.2px;
225
- }
226
-
227
- .header .status {
228
- font-size: 12px;
229
- color: var(--text-muted);
230
- display: flex;
231
- align-items: center;
232
- gap: 5px;
233
- }
234
-
235
- .header .status .dot {
236
- width: 6px;
237
- height: 6px;
238
- border-radius: 50%;
239
- background: var(--text-muted);
240
- transition: background 0.3s ease;
241
- }
242
-
243
- .header .status .dot.connected { background: var(--success); }
244
- .header .status .dot.error { background: var(--error); }
245
-
246
- .end-btn {
247
- position: absolute;
248
- right: 16px;
249
- top: 50%;
250
- transform: translateY(-50%);
251
- background: transparent;
252
- border: 1px solid var(--border);
253
- color: var(--text-muted);
254
- padding: 6px 14px;
255
- border-radius: 8px;
256
- font-size: 13px;
257
- font-weight: 500;
258
- cursor: pointer;
259
- transition: all 0.2s ease;
260
- }
261
-
262
- .end-btn:hover {
263
- border-color: var(--error);
264
- color: var(--error);
265
- background: rgba(244, 63, 94, 0.06);
266
- }
267
-
268
- /* ─── Transcript ─── */
269
- .transcript {
270
- flex: 1;
271
- overflow-y: auto;
272
- padding: 16px 16px;
273
- display: flex;
274
- flex-direction: column;
275
- gap: 8px;
276
- scroll-behavior: smooth;
277
- }
278
-
279
- .transcript::-webkit-scrollbar { width: 4px; }
280
- .transcript::-webkit-scrollbar-thumb { background: var(--border); border-radius: 4px; }
281
-
282
- .msg {
283
- max-width: 82%;
284
- padding: 10px 14px;
285
- border-radius: 18px;
286
- font-size: 14px;
287
- line-height: 1.5;
288
- animation: msgIn 0.25s cubic-bezier(0.4, 0, 0.2, 1);
289
- }
290
-
291
- @keyframes msgIn {
292
- from { opacity: 0; transform: translateY(6px) scale(0.97); }
293
- to { opacity: 1; transform: translateY(0) scale(1); }
294
- }
295
-
296
- .msg.user {
297
- align-self: flex-end;
298
- background: var(--accent);
299
- color: white;
300
- border-bottom-right-radius: 6px;
301
- }
302
-
303
- .msg.assistant {
304
- align-self: flex-start;
305
- background: var(--surface);
306
- border: 1px solid var(--border);
307
- border-bottom-left-radius: 6px;
308
- }
309
-
310
- .msg.system {
311
- align-self: center;
312
- background: transparent;
313
- color: var(--text-muted);
314
- font-size: 12px;
315
- padding: 6px 12px;
316
- }
317
-
318
- .msg.tool {
319
- align-self: center;
320
- background: rgba(139, 92, 246, 0.08);
321
- border: 1px solid rgba(139, 92, 246, 0.2);
322
- color: var(--accent-light);
323
- font-size: 12px;
324
- font-weight: 500;
325
- padding: 6px 14px;
326
- border-radius: 20px;
327
- }
328
-
329
- .msg.tool.thinking {
330
- display: flex;
331
- align-items: center;
332
- gap: 8px;
333
- }
334
-
335
- .msg.tool.thinking .dots {
336
- display: flex;
337
- gap: 3px;
338
- }
339
-
340
- .msg.tool.thinking .dots span {
341
- width: 4px;
342
- height: 4px;
343
- border-radius: 50%;
344
- background: var(--accent-light);
345
- animation: dotBounce 1.4s ease-in-out infinite;
346
- }
347
-
348
- .msg.tool.thinking .dots span:nth-child(2) { animation-delay: 0.16s; }
349
- .msg.tool.thinking .dots span:nth-child(3) { animation-delay: 0.32s; }
350
-
351
- @keyframes dotBounce {
352
- 0%, 80%, 100% { opacity: 0.3; }
353
- 40% { opacity: 1; }
354
- }
355
-
356
- /* ─── Controls ─── */
357
- .controls {
358
- padding: 20px 20px 28px;
359
- display: flex;
360
- flex-direction: column;
361
- align-items: center;
362
- gap: 12px;
363
- background: linear-gradient(to top, var(--bg) 60%, transparent);
364
- }
365
-
366
- .mic-btn {
367
- width: 64px;
368
- height: 64px;
369
- border-radius: 50%;
370
- border: 2px solid var(--border);
371
- background: var(--surface);
372
- cursor: pointer;
373
- display: flex;
374
- align-items: center;
375
- justify-content: center;
376
- transition: all 0.2s cubic-bezier(0.4, 0, 0.2, 1);
377
- position: relative;
378
- }
379
-
380
- .mic-btn svg {
381
- width: 24px;
382
- height: 24px;
383
- fill: var(--text-secondary);
384
- transition: fill 0.2s ease;
385
- }
386
-
387
- .mic-btn:hover {
388
- border-color: var(--accent);
389
- background: var(--surface-hover);
390
- }
391
-
392
- .mic-btn:hover svg { fill: var(--accent-light); }
393
-
394
- .mic-btn.active {
395
- background: var(--accent);
396
- border-color: var(--accent);
397
- box-shadow: 0 0 24px var(--accent-glow-strong);
398
- }
399
-
400
- .mic-btn.active svg { fill: white; }
401
-
402
- .mic-btn.active::before {
403
- content: '';
404
- position: absolute;
405
- inset: -6px;
406
- border-radius: 50%;
407
- border: 2px solid var(--accent);
408
- animation: micPulse 1.6s ease-out infinite;
409
- }
410
-
411
- @keyframes micPulse {
412
- 0% { transform: scale(1); opacity: 0.5; }
413
- 100% { transform: scale(1.4); opacity: 0; }
414
- }
415
-
416
- .mic-btn.muted {
417
- border-color: var(--error);
418
- }
419
-
420
- .mic-btn.muted svg { fill: var(--error); }
421
-
422
- .btn-label {
423
- font-size: 12px;
424
- color: var(--text-muted);
425
- font-weight: 500;
426
- }
427
-
428
- /* ─── Overlay ─── */
429
- .overlay {
430
- position: fixed;
431
- inset: 0;
432
- background: rgba(9, 9, 11, 0.85);
433
- backdrop-filter: blur(8px);
434
- -webkit-backdrop-filter: blur(8px);
435
- display: flex;
436
- align-items: center;
437
- justify-content: center;
438
- z-index: 100;
439
- }
440
-
441
- .overlay.hidden { display: none; }
442
-
443
- .overlay-content {
444
- text-align: center;
445
- display: flex;
446
- flex-direction: column;
447
- align-items: center;
448
- gap: 16px;
449
- }
450
-
451
- .spinner {
452
- width: 36px;
453
- height: 36px;
454
- border: 3px solid var(--border);
455
- border-top-color: var(--accent);
456
- border-radius: 50%;
457
- animation: spin 0.7s linear infinite;
458
- }
459
-
460
- @keyframes spin { to { transform: rotate(360deg); } }
461
-
462
- .overlay .text {
463
- color: var(--text-secondary);
464
- font-size: 14px;
465
- font-weight: 500;
466
- }
467
- </style>
468
- </head>
469
- <body>
470
-
471
- <!-- Start Screen -->
472
- <div class="start-screen" id="startScreen">
473
- <div class="brand-icon">
474
- <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/><path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
475
- </div>
476
- <h1>VisionClaw Voice</h1>
477
- <p class="subtitle" id="greeting">Tap to start a conversation</p>
478
-
479
- <div class="voice-selector">
480
- <label>Choose a voice</label>
481
- <select class="voice-dropdown" id="voiceDropdown">
482
- <option value="alloy">Alloy</option>
483
- <option value="ash">Ash</option>
484
- <option value="ballad">Ballad</option>
485
- <option value="cedar">Cedar</option>
486
- <option value="coral" selected>Coral</option>
487
- <option value="echo">Echo</option>
488
- <option value="marin">Marin</option>
489
- <option value="sage">Sage</option>
490
- <option value="shimmer">Shimmer</option>
491
- <option value="verse">Verse</option>
492
- </select>
493
- </div>
494
-
495
- <button class="start-btn" id="startBtn" onclick="startSession()">
496
- <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/><path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
497
- <span>Start</span>
498
- </button>
499
- <p class="footer-text">VisionClaw</p>
500
- </div>
501
-
502
- <!-- Session View -->
503
- <div class="session-view" id="sessionView">
504
- <div class="header">
505
- <div class="header-avatar">
506
- <svg viewBox="0 0 24 24"><path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/><path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/></svg>
507
- </div>
508
- <div class="info">
509
- <h1>Voice Assistant</h1>
510
- <div class="status">
511
- <span class="dot" id="statusDot"></span>
512
- <span id="statusText">Connecting...</span>
513
- </div>
514
- </div>
515
- <button class="end-btn" onclick="endSession()">End</button>
516
- </div>
517
-
518
- <div class="transcript" id="transcript"></div>
519
-
520
- <div class="controls">
521
- <button class="mic-btn" id="micBtn" onclick="toggleMic()">
522
- <svg viewBox="0 0 24 24">
523
- <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/>
524
- <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/>
525
- </svg>
526
- </button>
527
- <span class="btn-label" id="micLabel">Tap to talk</span>
528
- </div>
529
- </div>
530
-
531
- <!-- Connecting Overlay -->
532
- <div class="overlay hidden" id="overlay">
533
- <div class="overlay-content">
534
- <div class="spinner"></div>
535
- <div class="text" id="overlayText">Connecting...</div>
536
- </div>
537
- </div>
538
-
539
- <script>
540
- // ─── Telegram Mini App Integration ──────────────────────────────────────────
541
- const tg = window.Telegram?.WebApp;
542
- let tgUser = null;
543
- let isTelegramMiniApp = false;
544
-
545
- if (tg && tg.initData) {
546
- isTelegramMiniApp = true;
547
- tg.ready();
548
- tg.expand();
549
-
550
- tgUser = tg.initDataUnsafe?.user;
551
- console.log('[Telegram] Mini App loaded, user:', tgUser?.first_name);
552
-
553
- tg.BackButton.onClick(() => {
554
- if (isConnected) {
555
- endSession();
556
- } else {
557
- tg.close();
558
- }
559
- });
560
-
561
- if (tgUser?.first_name) {
562
- document.addEventListener('DOMContentLoaded', () => {
563
- const el = document.getElementById('greeting');
564
- if (el) el.textContent = `Hey ${tgUser.first_name}, tap to start`;
565
- });
566
- }
567
- }
568
-
569
- // ─── Voice Selector (Dropdown with Preview) ────────────────────────────
570
- let selectedVoice = 'coral';
571
- let previewAudio = null;
572
-
573
- function stopPreview() {
574
- if (previewAudio) {
575
- previewAudio.pause();
576
- previewAudio.currentTime = 0;
577
- previewAudio = null;
578
- }
579
- }
580
-
581
- function playPreview(voice) {
582
- stopPreview();
583
- previewAudio = new Audio(`/realtime/samples/${voice}.mp3`);
584
- previewAudio.addEventListener('ended', () => { previewAudio = null; });
585
- previewAudio.addEventListener('error', () => { previewAudio = null; });
586
- previewAudio.play().catch(() => {});
587
- }
588
-
589
- document.addEventListener('DOMContentLoaded', () => {
590
- const dropdown = document.getElementById('voiceDropdown');
591
- if (dropdown) {
592
- dropdown.addEventListener('change', (e) => {
593
- selectedVoice = e.target.value;
594
- console.log('[Voice] Selected:', selectedVoice);
595
- playPreview(selectedVoice);
596
- });
597
- }
598
- });
599
-
600
- // ─── State ─────────────────────────────────────────────────────────────────
601
- let pc = null;
602
- let dc = null;
603
- let audioEl = null;
604
- let micStream = null;
605
- let isMicMuted = false;
606
- let isConnected = false;
607
- let sessionReady = false;
608
- let realtimeSessionId = null;
609
- let realtimeSessionSecret = null;
610
-
611
- function sessionQuery() {
612
- if (!realtimeSessionId || !realtimeSessionSecret) return '';
613
- return `sessionId=${encodeURIComponent(realtimeSessionId)}&sessionSecret=${encodeURIComponent(realtimeSessionSecret)}`;
614
- }
615
-
616
- function sessionPayload(extra = {}) {
617
- return { ...extra, sessionId: realtimeSessionId, sessionSecret: realtimeSessionSecret };
618
- }
619
-
620
- async function verifyTelegramOwner() {
621
- if (!tg?.initData) {
622
- throw new Error('Telegram Mini App authentication is required.');
623
- }
624
- const response = await fetch('/realtime/api/verify-telegram', {
625
- method: 'POST',
626
- headers: { 'Content-Type': 'application/json' },
627
- body: JSON.stringify({ initData: tg.initData }),
628
- });
629
- if (!response.ok) {
630
- const data = await response.json().catch(() => ({ error: 'Unauthorized' }));
631
- throw new Error(data.error || 'Realtime is only available to the owner.');
632
- }
633
- }
634
-
635
- // ─── UI Helpers ────────────────────────────────────────────────────────────
636
-
637
- function addMessage(text, type = 'system') {
638
- const transcript = document.getElementById('transcript');
639
- const msg = document.createElement('div');
640
- msg.className = `msg ${type}`;
641
- msg.textContent = text;
642
- transcript.appendChild(msg);
643
- transcript.scrollTop = transcript.scrollHeight;
644
- return msg;
645
- }
646
-
647
- function setStatus(text, state = '') {
648
- document.getElementById('statusText').textContent = text;
649
- const dot = document.getElementById('statusDot');
650
- dot.className = 'dot' + (state ? ` ${state}` : '');
651
- }
652
-
653
- function showOverlay(text) {
654
- document.getElementById('overlay').classList.remove('hidden');
655
- document.getElementById('overlayText').textContent = text;
656
- }
657
-
658
- function hideOverlay() {
659
- document.getElementById('overlay').classList.add('hidden');
660
- }
661
-
662
- // ─── Start Session ──────────────────────────────────────────────────────────
663
-
664
- async function startSession() {
665
- stopPreview(); // Stop any playing voice preview immediately
666
- document.getElementById('startScreen').classList.add('hidden');
667
- document.getElementById('sessionView').classList.add('active');
668
- showOverlay('Requesting microphone...');
669
-
670
- if (isTelegramMiniApp) {
671
- tg.BackButton.show();
672
- }
673
-
674
- try {
675
- showOverlay('Verifying owner...');
676
- await verifyTelegramOwner();
677
-
678
- showOverlay('Requesting microphone...');
679
- micStream = await navigator.mediaDevices.getUserMedia({ audio: true });
680
-
681
- showOverlay('Setting up connection...');
682
-
683
- pc = new RTCPeerConnection();
684
-
685
- audioEl = document.createElement('audio');
686
- audioEl.autoplay = true;
687
- pc.ontrack = (e) => {
688
- audioEl.srcObject = e.streams[0];
689
- };
690
-
691
- // Mute mic initially — unmute only after session.updated confirms voice config
692
- const micTrack = micStream.getTracks()[0];
693
- micTrack.enabled = false;
694
- pc.addTrack(micTrack);
695
-
696
- dc = pc.createDataChannel('oai-events');
697
- dc.onopen = async () => {
698
- console.log('[DC] Data channel open');
699
- isConnected = true;
700
- sessionStartTime = Date.now();
701
- transcriptLog = [];
702
- showOverlay('Configuring voice...');
703
-
704
- // GA API: session.update via data channel is the ONLY way to configure
705
- // tools, voice, instructions. This is critical — not optional.
706
- // Mic stays muted until session.updated is received to prevent
707
- // the model from responding with default voice before config is applied.
708
- try {
709
- const configRes = await fetch(`/realtime/api/session-config?voice=${encodeURIComponent(selectedVoice)}&${sessionQuery()}`);
710
- const sessionUpdate = await configRes.json();
711
- dc.send(JSON.stringify(sessionUpdate));
712
- console.log('[DC] Sent session.update (tools, voice, instructions configured)');
713
- } catch (err) {
714
- console.error('[DC] CRITICAL: session.update failed:', err);
715
- // Unmute mic anyway so session is usable, even with wrong voice
716
- micTrack.enabled = true;
717
- hideOverlay();
718
- setStatus('Connected', 'connected');
719
- addMessage('Warning: Voice configuration failed. Tools may not work.', 'system');
720
- }
721
- };
722
-
723
- dc.onmessage = (e) => {
724
- handleRealtimeEvent(JSON.parse(e.data));
725
- };
726
-
727
- dc.onclose = () => {
728
- isConnected = false;
729
- setStatus('Disconnected', 'error');
730
- addMessage('Session ended.');
731
- };
732
-
733
- showOverlay('Connecting to voice...');
734
-
735
- const offer = await pc.createOffer();
736
- await pc.setLocalDescription(offer);
737
-
738
- if (!tg?.initData) {
739
- throw new Error('Telegram Mini App authentication is required.');
740
- }
741
-
742
- const sdpResponse = await fetch(`/realtime/api/session?voice=${encodeURIComponent(selectedVoice)}`, {
743
- method: 'POST',
744
- body: offer.sdp,
745
- headers: {
746
- 'Content-Type': 'application/sdp',
747
- 'X-Telegram-Init-Data': tg.initData,
748
- },
749
- });
750
-
751
- if (!sdpResponse.ok) {
752
- const errText = await sdpResponse.text();
753
- throw new Error(`Session failed: ${sdpResponse.status} - ${errText}`);
754
- }
755
-
756
- const sessionData = await sdpResponse.json();
757
- realtimeSessionId = sessionData.sessionId;
758
- realtimeSessionSecret = sessionData.sessionSecret;
759
- await pc.setRemoteDescription({ type: 'answer', sdp: sessionData.sdp });
760
-
761
- console.log('[WebRTC] Connected to OpenAI Realtime API');
762
-
763
- } catch (err) {
764
- console.error('[Session] Error:', err);
765
- hideOverlay();
766
- setStatus('Error: ' + err.message, 'error');
767
- addMessage('Error: ' + err.message, 'system');
768
- }
769
- }
770
-
771
- // ─── Handle Realtime API Events ────────────────────────────────────────────
772
-
773
- let currentAssistantText = '';
774
- let currentAssistantMsgEl = null;
775
- let currentUserText = '';
776
- let currentUserMsgEl = null;
777
- let transcriptLog = [];
778
- let sessionStartTime = null;
779
-
780
- function handleRealtimeEvent(event) {
781
- // DEBUG: log ALL event types
782
- console.log('[DC Event]', event.type);
783
-
784
- switch (event.type) {
785
- case 'session.created':
786
- console.log('[Session] Created:', event.session?.id);
787
- console.log('[Session] Created tools:', event.session?.tools?.length || 0);
788
- break;
789
-
790
- case 'session.updated':
791
- console.log('[Session] Updated:', JSON.stringify(Object.keys(event.session || {})));
792
- console.log('[Session] Tools registered:', event.session?.tools?.length || 0);
793
- console.log('[Session] Tool names:', event.session?.tools?.map(t => t.name));
794
- console.log('[Session] Modalities:', event.session?.modalities);
795
- // Voice config is now applied — unmute mic and let user speak (only once)
796
- if (!sessionReady) {
797
- sessionReady = true;
798
- if (micStream) {
799
- micStream.getTracks().forEach(t => { t.enabled = true; });
800
- console.log('[Session] Mic unmuted — voice config confirmed');
801
- }
802
- hideOverlay();
803
- setStatus('Connected', 'connected');
804
- addMessage('Session started. Speak naturally!');
805
- }
806
- break;
807
-
808
- case 'conversation.item.input_audio_transcription.delta':
809
- currentUserText += event.delta || '';
810
- if (!currentUserMsgEl) {
811
- currentUserMsgEl = addMessage(currentUserText, 'user');
812
- } else {
813
- currentUserMsgEl.textContent = currentUserText;
814
- }
815
- break;
816
-
817
- case 'conversation.item.input_audio_transcription.completed':
818
- if (event.transcript?.trim()) {
819
- if (currentUserMsgEl) {
820
- currentUserMsgEl.textContent = event.transcript.trim();
821
- } else {
822
- addMessage(event.transcript.trim(), 'user');
823
- }
824
- transcriptLog.push({
825
- role: 'user',
826
- text: event.transcript.trim(),
827
- timestamp: new Date().toISOString(),
828
- });
829
- }
830
- currentUserText = '';
831
- currentUserMsgEl = null;
832
- break;
833
-
834
- case 'conversation.item.input_audio_transcription.failed':
835
- console.warn('[Transcription] Failed:', event.error);
836
- currentUserText = '';
837
- currentUserMsgEl = null;
838
- break;
839
-
840
- case 'response.output_audio_transcript.delta':
841
- currentAssistantText += event.delta || '';
842
- if (!currentAssistantMsgEl) {
843
- currentAssistantMsgEl = addMessage(currentAssistantText, 'assistant');
844
- } else {
845
- currentAssistantMsgEl.textContent = currentAssistantText;
846
- }
847
- break;
848
-
849
- case 'response.output_audio_transcript.done':
850
- if (event.transcript?.trim()) {
851
- if (currentAssistantMsgEl) {
852
- currentAssistantMsgEl.textContent = event.transcript.trim();
853
- } else {
854
- addMessage(event.transcript.trim(), 'assistant');
855
- }
856
- transcriptLog.push({
857
- role: 'assistant',
858
- text: event.transcript.trim(),
859
- timestamp: new Date().toISOString(),
860
- });
861
- }
862
- currentAssistantText = '';
863
- currentAssistantMsgEl = null;
864
- break;
865
-
866
- case 'response.function_call_arguments.done':
867
- handleFunctionCall(event);
868
- break;
869
-
870
- case 'response.created':
871
- break;
872
-
873
- case 'response.done':
874
- if (event.response?.usage) {
875
- console.log('[Usage]', event.response.usage);
876
- }
877
- break;
878
-
879
- case 'conversation.item.created':
880
- case 'conversation.item.done':
881
- break;
882
-
883
- case 'error':
884
- console.error('[Realtime Error]', JSON.stringify(event.error));
885
- addMessage(`Error: ${event.error?.message || 'Unknown error'}`, 'system');
886
- break;
887
-
888
- case 'input_audio_buffer.speech_started':
889
- document.getElementById('micBtn').classList.add('active');
890
- document.getElementById('micLabel').textContent = 'Listening...';
891
- break;
892
-
893
- case 'input_audio_buffer.speech_stopped':
894
- document.getElementById('micBtn').classList.remove('active');
895
- document.getElementById('micLabel').textContent = isMicMuted ? 'Muted' : 'Tap to mute';
896
- break;
897
-
898
- case 'input_audio_buffer.committed':
899
- case 'input_audio_buffer.timeout_triggered':
900
- break;
901
-
902
- default:
903
- if (event.type?.includes('function') || event.type?.includes('tool')) {
904
- console.log('[Event] FUNCTION/TOOL related:', event.type, JSON.stringify(event).substring(0, 500));
905
- } else if (!event.type?.startsWith('response.audio.') &&
906
- !event.type?.startsWith('response.output_audio_transcript.') &&
907
- event.type !== 'response.content_part.added' &&
908
- event.type !== 'response.content_part.done' &&
909
- event.type !== 'response.output_item.added' &&
910
- event.type !== 'response.output_item.done') {
911
- console.log('[Event] Unhandled:', event.type);
912
- }
913
- break;
914
- }
915
- }
916
-
917
- // ─── Function Call Handling ─────────────────────────────────────────────────
918
-
919
- function addToolThinking(name) {
920
- const transcript = document.getElementById('transcript');
921
- const msg = document.createElement('div');
922
- msg.className = 'msg tool thinking';
923
- const friendlyNames = {
924
- web_search: 'Searching the web',
925
- list_calendar_events: 'Checking calendar',
926
- create_calendar_event: 'Creating event',
927
- };
928
- msg.innerHTML = `${friendlyNames[name] || name}<div class="dots"><span></span><span></span><span></span></div>`;
929
- transcript.appendChild(msg);
930
- transcript.scrollTop = transcript.scrollHeight;
931
- return msg;
932
- }
933
-
934
- async function handleFunctionCall(event) {
935
- const { call_id, name, arguments: argsStr } = event;
936
- console.log(`[FnCall] RECEIVED: ${name}(${argsStr}) call_id=${call_id}`);
937
- const thinkingMsg = addToolThinking(name);
938
-
939
- let result;
940
- try {
941
- // Use HTTP POST instead of WebSocket for reliability through tunnels
942
- console.log(`[FnCall] Executing via HTTP POST /api/execute-tool...`);
943
- const response = await fetch('/realtime/api/execute-tool', {
944
- method: 'POST',
945
- headers: { 'Content-Type': 'application/json' },
946
- body: JSON.stringify(sessionPayload({ name, arguments: argsStr, call_id })),
947
- });
948
-
949
- if (!response.ok) {
950
- const errData = await response.json().catch(() => ({ error: response.statusText }));
951
- result = JSON.stringify({ error: errData.error || `HTTP ${response.status}` });
952
- console.error(`[FnCall] HTTP error:`, result);
953
- } else {
954
- const data = await response.json();
955
- result = data.result;
956
- console.log(`[FnCall] Result for ${name}:`, result?.substring(0, 200));
957
- }
958
- } catch (err) {
959
- result = JSON.stringify({ error: `Network error: ${err.message}` });
960
- console.error(`[FnCall] Network error:`, err);
961
- }
962
-
963
- thinkingMsg.classList.remove('thinking');
964
- thinkingMsg.innerHTML = `${name} done`;
965
-
966
- // Send result back to OpenAI via data channel
967
- dc.send(JSON.stringify({
968
- type: 'conversation.item.create',
969
- item: {
970
- type: 'function_call_output',
971
- call_id: call_id,
972
- output: result,
973
- },
974
- }));
975
- dc.send(JSON.stringify({ type: 'response.create' }));
976
- }
977
-
978
- // ─── Mic Toggle ────────────────────────────────────────────────────────────
979
-
980
- function toggleMic() {
981
- if (!micStream) return;
982
-
983
- isMicMuted = !isMicMuted;
984
- micStream.getTracks().forEach((track) => {
985
- track.enabled = !isMicMuted;
986
- });
987
-
988
- const btn = document.getElementById('micBtn');
989
- const label = document.getElementById('micLabel');
990
- if (isMicMuted) {
991
- btn.classList.add('muted');
992
- label.textContent = 'Muted';
993
- } else {
994
- btn.classList.remove('muted');
995
- label.textContent = 'Tap to mute';
996
- }
997
- }
998
-
999
- // ─── End Session ───────────────────────────────────────────────────────────
1000
-
1001
- async function endSession() {
1002
- if (transcriptLog.length > 0) {
1003
- try {
1004
- const duration = sessionStartTime ? (Date.now() - sessionStartTime) / 1000 : null;
1005
- await fetch('/realtime/api/end-session', {
1006
- method: 'POST',
1007
- headers: { 'Content-Type': 'application/json' },
1008
- body: JSON.stringify(sessionPayload({
1009
- transcript: transcriptLog,
1010
- duration,
1011
- })),
1012
- });
1013
- console.log(`[Session] Transcript sent (${transcriptLog.length} messages)`);
1014
- } catch (err) {
1015
- console.error('[Session] Failed to send transcript:', err);
1016
- }
1017
- }
1018
-
1019
- if (pc) { pc.close(); pc = null; }
1020
- if (micStream) { micStream.getTracks().forEach((t) => t.stop()); micStream = null; }
1021
- if (dc) { dc = null; }
1022
- isConnected = false;
1023
- isMicMuted = false;
1024
- currentAssistantText = '';
1025
- currentAssistantMsgEl = null;
1026
- currentUserText = '';
1027
- currentUserMsgEl = null;
1028
- transcriptLog = [];
1029
- sessionStartTime = null;
1030
- sessionReady = false;
1031
- realtimeSessionId = null;
1032
- realtimeSessionSecret = null;
1033
-
1034
- document.getElementById('sessionView').classList.remove('active');
1035
- document.getElementById('startScreen').classList.remove('hidden');
1036
-
1037
- if (isTelegramMiniApp) {
1038
- tg.BackButton.hide();
1039
- }
1040
- }
1041
-
1042
- // ─── Cleanup ───────────────────────────────────────────────────────────────
1043
-
1044
- window.addEventListener('beforeunload', () => {
1045
- if (transcriptLog.length > 0) {
1046
- const duration = sessionStartTime ? (Date.now() - sessionStartTime) / 1000 : null;
1047
- const payload = JSON.stringify(sessionPayload({
1048
- transcript: transcriptLog,
1049
- duration,
1050
- }));
1051
- navigator.sendBeacon('/realtime/api/end-session', new Blob([payload], { type: 'application/json' }));
1052
- }
1053
- if (pc) pc.close();
1054
- if (micStream) micStream.getTracks().forEach((t) => t.stop());
1055
- });
1056
- </script>
1057
- </body>
1058
- </html>