web-agent-bridge 3.4.0 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/LICENSE +84 -84
  2. package/README.ar.md +1565 -1304
  3. package/README.md +171 -298
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -237
  6. package/bin/wab-init.js +244 -223
  7. package/bin/wab.js +80 -80
  8. package/examples/azure-dns-wab.js +83 -83
  9. package/examples/bidi-agent.js +119 -119
  10. package/examples/cloudflare-wab-dns.js +121 -121
  11. package/examples/cpanel-wab-dns.js +114 -114
  12. package/examples/cross-site-agent.js +91 -91
  13. package/examples/dns-discovery-agent.js +166 -166
  14. package/examples/gcp-dns-wab.js +76 -76
  15. package/examples/governance-agent.js +169 -169
  16. package/examples/mcp-agent.js +94 -94
  17. package/examples/next-app-router/README.md +44 -44
  18. package/examples/plesk-wab-dns.js +103 -103
  19. package/examples/puppeteer-agent.js +108 -108
  20. package/examples/route53-wab-dns.js +144 -144
  21. package/examples/saas-dashboard/README.md +55 -55
  22. package/examples/safe-mode-agent.js +96 -96
  23. package/examples/self-discovery.js +106 -0
  24. package/examples/shopify-hydrogen/README.md +74 -74
  25. package/examples/vision-agent.js +171 -171
  26. package/examples/wab-sign.js +74 -74
  27. package/examples/wab-verify.js +60 -60
  28. package/examples/wordpress-elementor/README.md +77 -77
  29. package/package.json +93 -93
  30. package/public/.well-known/agent-tools.json +180 -180
  31. package/public/.well-known/ai-assets.json +59 -59
  32. package/public/.well-known/security.txt +8 -8
  33. package/public/.well-known/wab.json +28 -28
  34. package/public/activate.html +448 -368
  35. package/public/adopt.html +236 -0
  36. package/public/adoption-metrics.html +188 -188
  37. package/public/agent-workspace.html +359 -349
  38. package/public/ai.html +198 -198
  39. package/public/api.html +397 -413
  40. package/public/atp.html +171 -0
  41. package/public/azure-dns-integration.html +289 -289
  42. package/public/browser.html +486 -486
  43. package/public/cloudflare-integration.html +380 -380
  44. package/public/commander-dashboard.html +243 -243
  45. package/public/cookies.html +210 -210
  46. package/public/cpanel-integration.html +398 -398
  47. package/public/css/agent-workspace.css +1713 -1713
  48. package/public/css/premium.css +317 -317
  49. package/public/css/styles.css +1401 -1263
  50. package/public/dashboard-shieldlink.html +295 -0
  51. package/public/dashboard.html +711 -707
  52. package/public/dns.html +436 -436
  53. package/public/docs.html +588 -588
  54. package/public/enterprise-mesh.ar.html +80 -0
  55. package/public/enterprise-mesh.html +81 -0
  56. package/public/feed.xml +89 -89
  57. package/public/gcp-dns-integration.html +318 -318
  58. package/public/governance.ar.html +70 -0
  59. package/public/governance.html +69 -0
  60. package/public/growth.html +465 -465
  61. package/public/index.html +1372 -1266
  62. package/public/integrations.html +556 -556
  63. package/public/js/activate.js +449 -145
  64. package/public/js/agent-workspace.js +1740 -1740
  65. package/public/js/auth-nav.js +117 -65
  66. package/public/js/auth-redirect.js +12 -12
  67. package/public/js/cookie-consent.js +56 -56
  68. package/public/js/dns.js +438 -438
  69. package/public/js/wab-demo-page.js +721 -721
  70. package/public/js/ws-client.js +74 -74
  71. package/public/l-preview.html +242 -0
  72. package/public/llms-full.txt +360 -360
  73. package/public/llms.txt +125 -125
  74. package/public/login.html +85 -85
  75. package/public/mesh-dashboard.html +328 -328
  76. package/public/milestones.html +346 -0
  77. package/public/one-click.html +779 -0
  78. package/public/openapi.json +669 -669
  79. package/public/partners.ar.html +145 -0
  80. package/public/partners.html +143 -0
  81. package/public/phone-shield.html +281 -281
  82. package/public/plesk-integration.html +375 -375
  83. package/public/premium-dashboard.html +2489 -2489
  84. package/public/premium.html +793 -793
  85. package/public/privacy.html +297 -297
  86. package/public/provider-onboarding.html +172 -172
  87. package/public/provider-sandbox.html +134 -134
  88. package/public/providers.html +359 -359
  89. package/public/refusals.html +172 -0
  90. package/public/register.html +105 -105
  91. package/public/registrar-integrations.html +141 -141
  92. package/public/ring4.html +292 -0
  93. package/public/robots.txt +99 -99
  94. package/public/route53-integration.html +531 -531
  95. package/public/score.html +263 -0
  96. package/public/script/wab-consent.d.ts +36 -36
  97. package/public/script/wab-consent.js +104 -104
  98. package/public/script/wab-schema.js +131 -131
  99. package/public/script/wab.d.ts +108 -108
  100. package/public/script/wab.min.js +580 -580
  101. package/public/security.txt +8 -8
  102. package/public/shieldlink.html +244 -0
  103. package/public/shieldqr.html +231 -231
  104. package/public/sitemap.xml +13 -1
  105. package/public/terms.html +256 -256
  106. package/public/trust-graph-api.ar.html +92 -0
  107. package/public/trust-graph-api.html +91 -0
  108. package/public/wab-features.html +560 -0
  109. package/public/wab-trust.html +200 -200
  110. package/public/wab-truth.html +375 -0
  111. package/public/wab-vs-protocols.html +210 -210
  112. package/public/whitepaper.html +449 -449
  113. package/script/ai-agent-bridge.js +1754 -1754
  114. package/sdk/README.md +99 -99
  115. package/sdk/agent-mesh.js +449 -449
  116. package/sdk/atp.js +103 -0
  117. package/sdk/auto-discovery.js +301 -288
  118. package/sdk/commander.js +262 -262
  119. package/sdk/governance.js +262 -262
  120. package/sdk/index.d.ts +464 -464
  121. package/sdk/index.js +653 -649
  122. package/sdk/multi-agent.js +318 -318
  123. package/sdk/safe-mode.js +221 -221
  124. package/sdk/safety-shield.js +219 -219
  125. package/sdk/schema-discovery.js +83 -83
  126. package/server/adapters/index.js +520 -520
  127. package/server/config/plans.js +412 -367
  128. package/server/config/secrets.js +102 -102
  129. package/server/control-plane/index.js +301 -301
  130. package/server/data-plane/index.js +354 -354
  131. package/server/index.js +793 -670
  132. package/server/llm/index.js +404 -404
  133. package/server/middleware/adminAuth.js +35 -35
  134. package/server/middleware/api-tier.js +170 -0
  135. package/server/middleware/auth.js +50 -50
  136. package/server/middleware/featureGate.js +88 -88
  137. package/server/middleware/rateLimits.js +100 -100
  138. package/server/middleware/sensitiveAction.js +157 -157
  139. package/server/middleware/wab-trust.js +141 -0
  140. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  141. package/server/migrations/002_premium_features.sql +418 -418
  142. package/server/migrations/003_ads_integer_cents.sql +33 -33
  143. package/server/migrations/004_agent_os.sql +158 -158
  144. package/server/migrations/005_marketplace_metering.sql +126 -126
  145. package/server/migrations/006_growth_suite.sql +138 -0
  146. package/server/migrations/007_governance.sql +106 -106
  147. package/server/migrations/008_plans.sql +144 -144
  148. package/server/migrations/009_shieldqr.sql +30 -30
  149. package/server/migrations/010_extended_trust.sql +33 -33
  150. package/server/migrations/011_outreach.sql +47 -0
  151. package/server/migrations/012_shieldlink.sql +116 -0
  152. package/server/migrations/013_ct_monitor.sql +13 -0
  153. package/server/migrations/014_wab_advanced_features.sql +128 -0
  154. package/server/migrations/015_wab_truth_layer.sql +101 -0
  155. package/server/migrations/016_ring4_external_trust.sql +84 -0
  156. package/server/migrations/017_ring4_extensions.sql +69 -0
  157. package/server/migrations/018_commercial_foundations.sql +167 -0
  158. package/server/migrations/019_unify_tier_constraints.sql +133 -0
  159. package/server/migrations/020_agent_transaction_primitive.sql +119 -0
  160. package/server/models/adapters/index.js +33 -33
  161. package/server/models/adapters/mysql.js +183 -183
  162. package/server/models/adapters/postgresql.js +172 -172
  163. package/server/models/adapters/sqlite.js +7 -7
  164. package/server/models/db.js +740 -740
  165. package/server/observability/failure-analysis.js +337 -337
  166. package/server/observability/index.js +394 -394
  167. package/server/protocol/capabilities.js +223 -223
  168. package/server/protocol/index.js +243 -243
  169. package/server/protocol/schema.js +584 -584
  170. package/server/registry/certification.js +271 -271
  171. package/server/registry/index.js +326 -326
  172. package/server/routes/activate.js +478 -0
  173. package/server/routes/admin-outreach.js +239 -0
  174. package/server/routes/admin-plans.js +76 -76
  175. package/server/routes/admin-premium.js +674 -673
  176. package/server/routes/admin-shieldlink.js +137 -0
  177. package/server/routes/admin-shieldqr.js +90 -90
  178. package/server/routes/admin-trust-monitor.js +139 -83
  179. package/server/routes/admin.js +550 -549
  180. package/server/routes/adopt.js +61 -0
  181. package/server/routes/ads.js +130 -130
  182. package/server/routes/agent-workspace.js +540 -540
  183. package/server/routes/api-keys.js +127 -0
  184. package/server/routes/api.js +150 -150
  185. package/server/routes/auth.js +71 -71
  186. package/server/routes/billing.js +57 -57
  187. package/server/routes/commander.js +316 -316
  188. package/server/routes/customer-shieldlink.js +133 -0
  189. package/server/routes/demo-showcase.js +332 -332
  190. package/server/routes/demo-store.js +154 -154
  191. package/server/routes/diagnose.js +373 -0
  192. package/server/routes/discovery.js +2348 -2348
  193. package/server/routes/enterprise-mesh.js +170 -0
  194. package/server/routes/gateway.js +173 -173
  195. package/server/routes/governance-saas.js +203 -0
  196. package/server/routes/governance.js +208 -208
  197. package/server/routes/growth.js +1048 -0
  198. package/server/routes/intent.js +328 -0
  199. package/server/routes/license.js +251 -251
  200. package/server/routes/mesh.js +469 -469
  201. package/server/routes/noscript.js +543 -543
  202. package/server/routes/partners.js +201 -0
  203. package/server/routes/plans.js +33 -33
  204. package/server/routes/premium-v2.js +686 -686
  205. package/server/routes/premium.js +724 -724
  206. package/server/routes/providers.js +650 -650
  207. package/server/routes/reputation.js +411 -0
  208. package/server/routes/ring4.js +885 -0
  209. package/server/routes/runtime.js +2148 -2148
  210. package/server/routes/shieldlink.js +70 -0
  211. package/server/routes/shieldqr.js +88 -88
  212. package/server/routes/sovereign.js +465 -465
  213. package/server/routes/transactions.js +233 -0
  214. package/server/routes/truth-layer.js +670 -0
  215. package/server/routes/universal.js +200 -200
  216. package/server/routes/unsubscribe.js +51 -0
  217. package/server/routes/wab-api.js +850 -850
  218. package/server/routes/wab-cache.js +282 -0
  219. package/server/runtime/container-worker.js +111 -111
  220. package/server/runtime/container.js +448 -448
  221. package/server/runtime/distributed-worker.js +362 -362
  222. package/server/runtime/event-bus.js +210 -210
  223. package/server/runtime/index.js +253 -253
  224. package/server/runtime/queue.js +599 -599
  225. package/server/runtime/replay.js +666 -666
  226. package/server/runtime/sandbox.js +266 -266
  227. package/server/runtime/scheduler.js +534 -534
  228. package/server/runtime/session-engine.js +293 -293
  229. package/server/runtime/state-manager.js +188 -188
  230. package/server/secrets/wab-signing-key.pem +3 -0
  231. package/server/secrets/wab-signing-pub.pem +3 -0
  232. package/server/security/cross-site-redactor.js +196 -196
  233. package/server/security/dry-run.js +180 -180
  234. package/server/security/human-gate-rate-limit.js +147 -147
  235. package/server/security/human-gate-transports.js +178 -178
  236. package/server/security/human-gate.js +281 -281
  237. package/server/security/index.js +368 -368
  238. package/server/security/intent-engine.js +245 -245
  239. package/server/security/reward-guard.js +171 -171
  240. package/server/security/rollback-store.js +239 -239
  241. package/server/security/token-scope.js +404 -404
  242. package/server/security/url-policy.js +139 -139
  243. package/server/services/adoption-agent.js +182 -0
  244. package/server/services/agent-chat.js +506 -506
  245. package/server/services/agent-learning.js +601 -601
  246. package/server/services/agent-memory.js +625 -625
  247. package/server/services/agent-mesh.js +555 -555
  248. package/server/services/agent-symphony.js +717 -717
  249. package/server/services/agent-tasks.js +1807 -1807
  250. package/server/services/api-key-engine.js +292 -292
  251. package/server/services/cluster.js +894 -894
  252. package/server/services/commander.js +738 -738
  253. package/server/services/edge-compute.js +440 -440
  254. package/server/services/email.js +233 -233
  255. package/server/services/fairness-engine.js +409 -0
  256. package/server/services/fairness.js +420 -0
  257. package/server/services/governance.js +466 -466
  258. package/server/services/hosted-runtime.js +205 -205
  259. package/server/services/lfd.js +635 -635
  260. package/server/services/local-ai.js +389 -389
  261. package/server/services/marketplace.js +270 -270
  262. package/server/services/metering.js +182 -182
  263. package/server/services/modules/affiliate-intelligence.js +93 -93
  264. package/server/services/modules/agent-firewall.js +90 -90
  265. package/server/services/modules/bounty.js +89 -89
  266. package/server/services/modules/collective-bargaining.js +92 -92
  267. package/server/services/modules/dark-pattern.js +66 -66
  268. package/server/services/modules/gov-intelligence.js +45 -45
  269. package/server/services/modules/neural.js +55 -55
  270. package/server/services/modules/notary.js +49 -49
  271. package/server/services/modules/price-time-machine.js +86 -86
  272. package/server/services/modules/protocol.js +104 -104
  273. package/server/services/negotiation.js +439 -439
  274. package/server/services/outreach-agent.js +312 -0
  275. package/server/services/plans.js +214 -214
  276. package/server/services/plugins.js +771 -771
  277. package/server/services/price-intelligence.js +566 -566
  278. package/server/services/price-shield.js +1137 -1137
  279. package/server/services/provider-clients.js +740 -740
  280. package/server/services/reputation.js +465 -465
  281. package/server/services/search-engine.js +357 -357
  282. package/server/services/security.js +513 -513
  283. package/server/services/self-healing.js +843 -843
  284. package/server/services/shieldlink.js +492 -0
  285. package/server/services/shieldqr.js +322 -322
  286. package/server/services/sovereign-shield.js +542 -542
  287. package/server/services/ssl-ct-monitor.js +224 -0
  288. package/server/services/ssl-inspector.js +42 -42
  289. package/server/services/ssl-monitor.js +167 -167
  290. package/server/services/stripe.js +206 -205
  291. package/server/services/swarm.js +788 -788
  292. package/server/services/transactions.js +525 -0
  293. package/server/services/universal-scraper.js +662 -662
  294. package/server/services/verification.js +481 -481
  295. package/server/services/vision.js +1163 -1163
  296. package/server/services/wab-crypto.js +178 -178
  297. package/server/utils/cache.js +125 -125
  298. package/server/utils/migrate.js +81 -81
  299. package/server/utils/safe-fetch.js +228 -228
  300. package/server/utils/secureFields.js +50 -50
  301. package/server/ws.js +161 -161
  302. package/templates/artisan-marketplace.yaml +104 -104
  303. package/templates/book-price-scout.yaml +98 -98
  304. package/templates/electronics-price-tracker.yaml +108 -108
  305. package/templates/flight-deal-hunter.yaml +113 -113
  306. package/templates/freelancer-direct.yaml +116 -116
  307. package/templates/grocery-price-compare.yaml +93 -93
  308. package/templates/hotel-direct-booking.yaml +113 -113
  309. package/templates/local-services.yaml +98 -98
  310. package/templates/olive-oil-tunisia.yaml +88 -88
  311. package/templates/organic-farm-fresh.yaml +101 -101
  312. package/templates/restaurant-direct.yaml +97 -97
  313. package/templates/ring4/banking-sovereign.yaml +55 -0
  314. package/templates/ring4/ecommerce-sovereign.yaml +58 -0
  315. package/templates/ring4/healthcare-sovereign.yaml +60 -0
@@ -1,1163 +1,1163 @@
1
- const { db } = require('../models/db');
2
- const { randomUUID: uuidv4 } = require('crypto');
3
- const crypto = require('crypto');
4
-
5
- // ═══════════════════════════════════════════════════════════════════════
6
- // Schema
7
- // ═══════════════════════════════════════════════════════════════════════
8
-
9
- db.exec(`
10
- CREATE TABLE IF NOT EXISTS vision_configs (
11
- id TEXT PRIMARY KEY,
12
- site_id TEXT NOT NULL UNIQUE,
13
- provider TEXT DEFAULT 'local' CHECK(provider IN ('local','openai','anthropic','ollama')),
14
- model TEXT DEFAULT 'moondream',
15
- endpoint TEXT,
16
- api_key_encrypted TEXT,
17
- max_resolution TEXT DEFAULT '1280x720',
18
- cache_ttl INTEGER DEFAULT 300,
19
- enabled INTEGER DEFAULT 1,
20
- created_at TEXT DEFAULT (datetime('now')),
21
- updated_at TEXT DEFAULT (datetime('now'))
22
- );
23
-
24
- CREATE TABLE IF NOT EXISTS vision_cache (
25
- id TEXT PRIMARY KEY,
26
- site_id TEXT,
27
- url TEXT,
28
- screenshot_hash TEXT,
29
- analysis TEXT,
30
- elements_found TEXT,
31
- provider TEXT,
32
- model TEXT,
33
- tokens_used INTEGER,
34
- latency_ms INTEGER,
35
- created_at TEXT DEFAULT (datetime('now')),
36
- expires_at TEXT
37
- );
38
-
39
- CREATE TABLE IF NOT EXISTS vision_elements (
40
- id TEXT PRIMARY KEY,
41
- cache_id TEXT,
42
- site_id TEXT,
43
- element_type TEXT CHECK(element_type IN ('button','input','link','text','image','form','nav','dropdown')),
44
- label TEXT,
45
- description TEXT,
46
- bounding_box TEXT,
47
- suggested_selector TEXT,
48
- confidence REAL,
49
- interactable INTEGER DEFAULT 0,
50
- created_at TEXT DEFAULT (datetime('now')),
51
- FOREIGN KEY (cache_id) REFERENCES vision_cache(id) ON DELETE CASCADE
52
- );
53
-
54
- CREATE INDEX IF NOT EXISTS idx_vision_configs_site ON vision_configs(site_id);
55
- CREATE INDEX IF NOT EXISTS idx_vision_cache_site ON vision_cache(site_id);
56
- CREATE INDEX IF NOT EXISTS idx_vision_cache_hash ON vision_cache(screenshot_hash);
57
- CREATE INDEX IF NOT EXISTS idx_vision_cache_url ON vision_cache(url);
58
- CREATE INDEX IF NOT EXISTS idx_vision_cache_expires ON vision_cache(expires_at);
59
- CREATE INDEX IF NOT EXISTS idx_vision_elements_cache ON vision_elements(cache_id);
60
- CREATE INDEX IF NOT EXISTS idx_vision_elements_site ON vision_elements(site_id);
61
- CREATE INDEX IF NOT EXISTS idx_vision_elements_type ON vision_elements(element_type);
62
- `);
63
-
64
- // ═══════════════════════════════════════════════════════════════════════
65
- // Encryption helpers (AES-256-GCM keyed from JWT_SECRET)
66
- // ═══════════════════════════════════════════════════════════════════════
67
-
68
- const ENC_PREFIX = 'venc:';
69
-
70
- function _deriveKey() {
71
- const secret = process.env.JWT_SECRET || 'wab-vision-fallback-key';
72
- return crypto.createHash('sha256').update(secret).digest();
73
- }
74
-
75
- function encryptApiKey(plaintext) {
76
- if (!plaintext) return null;
77
- const key = _deriveKey();
78
- const iv = crypto.randomBytes(12);
79
- const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
80
- const enc = Buffer.concat([cipher.update(String(plaintext), 'utf8'), cipher.final()]);
81
- const tag = cipher.getAuthTag();
82
- return `${ENC_PREFIX}${iv.toString('hex')}:${tag.toString('hex')}:${enc.toString('hex')}`;
83
- }
84
-
85
- function decryptApiKey(encrypted) {
86
- if (!encrypted || typeof encrypted !== 'string' || !encrypted.startsWith(ENC_PREFIX)) return null;
87
- const key = _deriveKey();
88
- try {
89
- const rest = encrypted.slice(ENC_PREFIX.length);
90
- const [ivHex, tagHex, dataHex] = rest.split(':');
91
- const iv = Buffer.from(ivHex, 'hex');
92
- const tag = Buffer.from(tagHex, 'hex');
93
- const data = Buffer.from(dataHex, 'hex');
94
- const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
95
- decipher.setAuthTag(tag);
96
- return Buffer.concat([decipher.update(data), decipher.final()]).toString('utf8');
97
- } catch (e) {
98
- console.error('[Vision] Decrypt failed:', e.message);
99
- return null;
100
- }
101
- }
102
-
103
- // ═══════════════════════════════════════════════════════════════════════
104
- // Prepared statements
105
- // ═══════════════════════════════════════════════════════════════════════
106
-
107
- const stmts = {
108
- upsertConfig: db.prepare(`
109
- INSERT INTO vision_configs (id, site_id, provider, model, endpoint, api_key_encrypted, max_resolution, cache_ttl, enabled)
110
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1)
111
- ON CONFLICT(site_id) DO UPDATE SET
112
- provider = excluded.provider,
113
- model = excluded.model,
114
- endpoint = excluded.endpoint,
115
- api_key_encrypted = CASE WHEN excluded.api_key_encrypted IS NOT NULL THEN excluded.api_key_encrypted ELSE vision_configs.api_key_encrypted END,
116
- max_resolution = excluded.max_resolution,
117
- cache_ttl = excluded.cache_ttl,
118
- updated_at = datetime('now')
119
- `),
120
- getConfig: db.prepare(`SELECT * FROM vision_configs WHERE site_id = ?`),
121
- insertCache: db.prepare(`
122
- INSERT INTO vision_cache (id, site_id, url, screenshot_hash, analysis, elements_found, provider, model, tokens_used, latency_ms, expires_at)
123
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
124
- `),
125
- getCacheByHash: db.prepare(`
126
- SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? AND expires_at > datetime('now') ORDER BY created_at DESC LIMIT 1
127
- `),
128
- insertElement: db.prepare(`
129
- INSERT INTO vision_elements (id, cache_id, site_id, element_type, label, description, bounding_box, suggested_selector, confidence, interactable)
130
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
131
- `),
132
- searchElements: db.prepare(`
133
- SELECT * FROM vision_elements WHERE site_id = ? ORDER BY confidence DESC
134
- `),
135
- searchElementsByType: db.prepare(`
136
- SELECT * FROM vision_elements WHERE site_id = ? AND element_type = ? ORDER BY confidence DESC
137
- `),
138
- getCacheById: db.prepare(`SELECT * FROM vision_cache WHERE id = ?`),
139
- getCacheBySiteAndHash: db.prepare(`
140
- SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? ORDER BY created_at DESC LIMIT 1
141
- `),
142
- getElementsByCache: db.prepare(`SELECT * FROM vision_elements WHERE cache_id = ?`),
143
- cacheStats: db.prepare(`
144
- SELECT
145
- COUNT(*) as total_cached,
146
- SUM(CASE WHEN expires_at > datetime('now') THEN 1 ELSE 0 END) as active_cached,
147
- SUM(CASE WHEN expires_at <= datetime('now') THEN 1 ELSE 0 END) as expired,
148
- SUM(tokens_used) as total_tokens,
149
- AVG(latency_ms) as avg_latency,
150
- SUM(LENGTH(analysis)) as total_bytes
151
- FROM vision_cache WHERE site_id = ?
152
- `),
153
- deleteExpiredCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND expires_at <= datetime('now')`),
154
- deleteOldCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND created_at < ?`),
155
- deleteOrphanedElements: db.prepare(`DELETE FROM vision_elements WHERE cache_id NOT IN (SELECT id FROM vision_cache)`),
156
- visionHistory: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? ORDER BY created_at DESC LIMIT ?`),
157
- visionHistoryByUrl: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? AND url = ? ORDER BY created_at DESC LIMIT ?`),
158
- };
159
-
160
- // ═══════════════════════════════════════════════════════════════════════
161
- // Provider API calls
162
- // ═══════════════════════════════════════════════════════════════════════
163
-
164
- const PROVIDER_TIMEOUT_MS = 60_000;
165
-
166
- async function _callOllama(endpoint, model, base64Image, prompt) {
167
- const url = `${endpoint.replace(/\/+$/, '')}/api/generate`;
168
- const controller = new AbortController();
169
- const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
170
-
171
- try {
172
- const res = await fetch(url, {
173
- method: 'POST',
174
- headers: { 'Content-Type': 'application/json' },
175
- body: JSON.stringify({
176
- model,
177
- prompt,
178
- images: [base64Image],
179
- stream: false,
180
- }),
181
- signal: controller.signal,
182
- });
183
- if (!res.ok) {
184
- const errBody = await res.text().catch(() => '');
185
- throw new Error(`Ollama ${res.status}: ${errBody.slice(0, 300)}`);
186
- }
187
- const data = await res.json();
188
- return {
189
- text: data.response || '',
190
- tokens: (data.prompt_eval_count || 0) + (data.eval_count || 0),
191
- };
192
- } finally {
193
- clearTimeout(timer);
194
- }
195
- }
196
-
197
- async function _callOpenAI(apiKey, model, base64Image, prompt) {
198
- const controller = new AbortController();
199
- const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
200
-
201
- try {
202
- const res = await fetch('https://api.openai.com/v1/chat/completions', {
203
- method: 'POST',
204
- headers: {
205
- 'Content-Type': 'application/json',
206
- Authorization: `Bearer ${apiKey}`,
207
- },
208
- body: JSON.stringify({
209
- model: model || 'gpt-4o',
210
- messages: [
211
- {
212
- role: 'user',
213
- content: [
214
- { type: 'text', text: prompt },
215
- { type: 'image_url', image_url: { url: `data:image/png;base64,${base64Image}`, detail: 'high' } },
216
- ],
217
- },
218
- ],
219
- max_tokens: 4096,
220
- }),
221
- signal: controller.signal,
222
- });
223
- if (!res.ok) {
224
- const errBody = await res.text().catch(() => '');
225
- throw new Error(`OpenAI ${res.status}: ${errBody.slice(0, 300)}`);
226
- }
227
- const data = await res.json();
228
- const choice = data.choices && data.choices[0];
229
- return {
230
- text: choice ? choice.message.content : '',
231
- tokens: data.usage ? data.usage.total_tokens : 0,
232
- };
233
- } finally {
234
- clearTimeout(timer);
235
- }
236
- }
237
-
238
- async function _callAnthropic(apiKey, model, base64Image, prompt) {
239
- const controller = new AbortController();
240
- const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
241
-
242
- try {
243
- const res = await fetch('https://api.anthropic.com/v1/messages', {
244
- method: 'POST',
245
- headers: {
246
- 'Content-Type': 'application/json',
247
- 'x-api-key': apiKey,
248
- 'anthropic-version': '2023-06-01',
249
- },
250
- body: JSON.stringify({
251
- model: model || 'claude-sonnet-4-20250514',
252
- max_tokens: 4096,
253
- messages: [
254
- {
255
- role: 'user',
256
- content: [
257
- { type: 'image', source: { type: 'base64', media_type: 'image/png', data: base64Image } },
258
- { type: 'text', text: prompt },
259
- ],
260
- },
261
- ],
262
- }),
263
- signal: controller.signal,
264
- });
265
- if (!res.ok) {
266
- const errBody = await res.text().catch(() => '');
267
- throw new Error(`Anthropic ${res.status}: ${errBody.slice(0, 300)}`);
268
- }
269
- const data = await res.json();
270
- const textBlock = data.content && data.content.find(b => b.type === 'text');
271
- const inputTokens = data.usage ? data.usage.input_tokens : 0;
272
- const outputTokens = data.usage ? data.usage.output_tokens : 0;
273
- return {
274
- text: textBlock ? textBlock.text : '',
275
- tokens: inputTokens + outputTokens,
276
- };
277
- } finally {
278
- clearTimeout(timer);
279
- }
280
- }
281
-
282
- // ═══════════════════════════════════════════════════════════════════════
283
- // Prompt construction
284
- // ═══════════════════════════════════════════════════════════════════════
285
-
286
- function buildVisionPrompt(customPrompt) {
287
- const base = `Analyze this screenshot of a web page. Identify every interactive UI element visible.
288
-
289
- For each element, return a JSON object with these fields:
290
- - "type": one of "button", "input", "link", "text", "image", "form", "nav", "dropdown"
291
- - "label": the visible text or aria-label of the element
292
- - "description": a short human-readable description of what the element does
293
- - "position": {"x": approximate x coordinate in pixels, "y": approximate y coordinate in pixels, "width": approximate width, "height": approximate height}
294
- - "selector": a suggested CSS selector that could target this element (e.g. "button.submit-btn", "#login-form input[type=email]")
295
- - "interactable": true if the element can be clicked, typed into, or otherwise interacted with
296
- - "confidence": a number from 0.0 to 1.0 indicating how confident you are in this identification
297
-
298
- Return ONLY a JSON array of these objects wrapped in a markdown code block like:
299
- \`\`\`json
300
- [...]
301
- \`\`\`
302
-
303
- Be thorough — include buttons, links, inputs, dropdowns, navigation items, forms, and any other interactive elements.`;
304
-
305
- if (customPrompt) {
306
- return `${base}\n\nAdditional instructions: ${customPrompt}`;
307
- }
308
- return base;
309
- }
310
-
311
- // ═══════════════════════════════════════════════════════════════════════
312
- // Response parsing
313
- // ═══════════════════════════════════════════════════════════════════════
314
-
315
- const VALID_ELEMENT_TYPES = new Set(['button', 'input', 'link', 'text', 'image', 'form', 'nav', 'dropdown']);
316
-
317
- function parseVisionResponse(rawResponse, provider) {
318
- if (!rawResponse || typeof rawResponse !== 'string') return [];
319
-
320
- let elements = [];
321
-
322
- const jsonBlockMatch = rawResponse.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
323
- if (jsonBlockMatch) {
324
- try {
325
- const parsed = JSON.parse(jsonBlockMatch[1].trim());
326
- if (Array.isArray(parsed)) elements = parsed;
327
- else if (parsed && typeof parsed === 'object') elements = [parsed];
328
- } catch { /* fall through to other strategies */ }
329
- }
330
-
331
- if (elements.length === 0) {
332
- const arrayMatch = rawResponse.match(/\[\s*\{[\s\S]*?\}\s*\]/);
333
- if (arrayMatch) {
334
- try {
335
- elements = JSON.parse(arrayMatch[0]);
336
- } catch { /* fall through */ }
337
- }
338
- }
339
-
340
- if (elements.length === 0) {
341
- const objectMatches = [...rawResponse.matchAll(/\{[^{}]*"type"\s*:\s*"[^"]+?"[^{}]*\}/g)];
342
- for (const m of objectMatches) {
343
- try {
344
- elements.push(JSON.parse(m[0]));
345
- } catch { /* skip malformed */ }
346
- }
347
- }
348
-
349
- return elements.map(el => _normalizeElement(el, provider)).filter(Boolean);
350
- }
351
-
352
- function _normalizeElement(raw, _provider) {
353
- if (!raw || typeof raw !== 'object') return null;
354
-
355
- let type = (raw.type || raw.element_type || 'text').toLowerCase().trim();
356
- if (!VALID_ELEMENT_TYPES.has(type)) {
357
- if (/btn|button|submit/i.test(type)) type = 'button';
358
- else if (/input|field|text.?box|textarea/i.test(type)) type = 'input';
359
- else if (/link|anchor|href/i.test(type)) type = 'link';
360
- else if (/select|dropdown|combo/i.test(type)) type = 'dropdown';
361
- else if (/img|icon|logo/i.test(type)) type = 'image';
362
- else if (/form/i.test(type)) type = 'form';
363
- else if (/nav|menu|sidebar/i.test(type)) type = 'nav';
364
- else type = 'text';
365
- }
366
-
367
- const pos = raw.position || raw.bounding_box || raw.bbox || {};
368
- const boundingBox = {
369
- x: Number(pos.x) || 0,
370
- y: Number(pos.y) || 0,
371
- width: Number(pos.width || pos.w) || 0,
372
- height: Number(pos.height || pos.h) || 0,
373
- };
374
-
375
- const confidence = Math.max(0, Math.min(1, Number(raw.confidence) || 0.5));
376
-
377
- const interactable = raw.interactable != null
378
- ? !!raw.interactable
379
- : ['button', 'input', 'link', 'dropdown', 'form'].includes(type);
380
-
381
- return {
382
- type,
383
- label: String(raw.label || raw.text || raw.name || '').slice(0, 500),
384
- description: String(raw.description || raw.desc || '').slice(0, 1000),
385
- boundingBox,
386
- suggestedSelector: String(raw.selector || raw.suggested_selector || raw.css_selector || '').slice(0, 500),
387
- confidence,
388
- interactable,
389
- };
390
- }
391
-
392
- function extractElementsFromAnalysis(analysisText) {
393
- if (!analysisText || typeof analysisText !== 'string') return [];
394
-
395
- const fromJson = parseVisionResponse(analysisText, 'unknown');
396
- if (fromJson.length > 0) return fromJson;
397
-
398
- const elements = [];
399
- const lines = analysisText.split('\n');
400
-
401
- const typeKeywords = {
402
- button: /\b(button|btn|submit|click)\b/i,
403
- input: /\b(input|field|text.?box|textarea|type|enter)\b/i,
404
- link: /\b(link|anchor|href|url|navigate)\b/i,
405
- dropdown: /\b(dropdown|select|combo|menu|option)\b/i,
406
- image: /\b(image|img|icon|logo|picture|photo)\b/i,
407
- form: /\b(form|login|signup|register|search.?bar)\b/i,
408
- nav: /\b(nav|menu|sidebar|header|footer|tab)\b/i,
409
- };
410
-
411
- const bulletPattern = /^[\s]*[-*•]\s+(.+)/;
412
-
413
- for (const line of lines) {
414
- const match = line.match(bulletPattern);
415
- if (!match) continue;
416
- const content = match[1].trim();
417
- if (content.length < 3) continue;
418
-
419
- let type = 'text';
420
- for (const [t, re] of Object.entries(typeKeywords)) {
421
- if (re.test(content)) { type = t; break; }
422
- }
423
-
424
- const labelMatch = content.match(/["']([^"']+)["']/);
425
- const label = labelMatch ? labelMatch[1] : content.slice(0, 80);
426
-
427
- elements.push({
428
- type,
429
- label,
430
- description: content.slice(0, 1000),
431
- boundingBox: { x: 0, y: 0, width: 0, height: 0 },
432
- suggestedSelector: '',
433
- confidence: 0.3,
434
- interactable: ['button', 'input', 'link', 'dropdown', 'form'].includes(type),
435
- });
436
- }
437
-
438
- return elements;
439
- }
440
-
441
- // ═══════════════════════════════════════════════════════════════════════
442
- // Core functions
443
- // ═══════════════════════════════════════════════════════════════════════
444
-
445
- function configureVision(siteId, { provider, model, endpoint, apiKey, maxResolution, cacheTtl } = {}) {
446
- const id = uuidv4();
447
- const encKey = apiKey ? encryptApiKey(apiKey) : null;
448
-
449
- stmts.upsertConfig.run(
450
- id,
451
- siteId,
452
- provider || 'local',
453
- model || 'moondream',
454
- endpoint || null,
455
- encKey,
456
- maxResolution || '1280x720',
457
- cacheTtl != null ? cacheTtl : 300
458
- );
459
-
460
- const saved = stmts.getConfig.get(siteId);
461
- return _maskConfig(saved);
462
- }
463
-
464
- function getVisionConfig(siteId) {
465
- const row = stmts.getConfig.get(siteId);
466
- if (!row) return null;
467
- return _maskConfig(row);
468
- }
469
-
470
- function _maskConfig(row) {
471
- if (!row) return null;
472
- const out = { ...row };
473
- if (out.api_key_encrypted) {
474
- const decrypted = decryptApiKey(out.api_key_encrypted);
475
- out.api_key_masked = decrypted
476
- ? decrypted.slice(0, 4) + '****' + decrypted.slice(-4)
477
- : '********';
478
- } else {
479
- out.api_key_masked = null;
480
- }
481
- delete out.api_key_encrypted;
482
- return out;
483
- }
484
-
485
- async function analyzeScreenshot(siteId, { screenshotBase64, url, prompt } = {}) {
486
- if (!screenshotBase64) throw new Error('screenshotBase64 is required');
487
-
488
- const config = stmts.getConfig.get(siteId);
489
- if (!config || !config.enabled) throw new Error('Vision not configured or disabled for this site');
490
-
491
- const screenshotHash = crypto.createHash('sha256').update(screenshotBase64).digest('hex');
492
-
493
- const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
494
- if (cached) {
495
- let elements = [];
496
- try { elements = JSON.parse(cached.elements_found || '[]'); } catch { /* ignore */ }
497
- return {
498
- analysis: cached.analysis,
499
- elements,
500
- cached: true,
501
- latency_ms: cached.latency_ms,
502
- tokens_used: cached.tokens_used,
503
- cache_id: cached.id,
504
- };
505
- }
506
-
507
- const fullPrompt = buildVisionPrompt(prompt);
508
- const apiKey = config.api_key_encrypted ? decryptApiKey(config.api_key_encrypted) : null;
509
- const providerName = config.provider;
510
- const modelName = config.model;
511
-
512
- const startTime = Date.now();
513
- let result;
514
-
515
- try {
516
- switch (providerName) {
517
- case 'openai':
518
- if (!apiKey) throw new Error('OpenAI API key not configured');
519
- result = await _callOpenAI(apiKey, modelName, screenshotBase64, fullPrompt);
520
- break;
521
- case 'anthropic':
522
- if (!apiKey) throw new Error('Anthropic API key not configured');
523
- result = await _callAnthropic(apiKey, modelName, screenshotBase64, fullPrompt);
524
- break;
525
- case 'ollama':
526
- case 'local':
527
- default: {
528
- const ep = config.endpoint || 'http://localhost:11434';
529
- result = await _callOllama(ep, modelName, screenshotBase64, fullPrompt);
530
- break;
531
- }
532
- }
533
- } catch (err) {
534
- if (err.name === 'AbortError') throw new Error(`Vision provider timed out after ${PROVIDER_TIMEOUT_MS}ms`);
535
- throw err;
536
- }
537
-
538
- const latencyMs = Date.now() - startTime;
539
- const analysisText = result.text;
540
- const tokensUsed = result.tokens || 0;
541
-
542
- const elements = parseVisionResponse(analysisText, providerName);
543
- const cacheId = uuidv4();
544
- const expiresAt = new Date(Date.now() + (config.cache_ttl || 300) * 1000).toISOString();
545
-
546
- stmts.insertCache.run(
547
- cacheId, siteId, url || null, screenshotHash,
548
- analysisText, JSON.stringify(elements),
549
- providerName, modelName, tokensUsed, latencyMs, expiresAt
550
- );
551
-
552
- const insertElements = db.transaction((elems) => {
553
- for (const el of elems) {
554
- stmts.insertElement.run(
555
- uuidv4(), cacheId, siteId,
556
- el.type, el.label, el.description,
557
- JSON.stringify(el.boundingBox),
558
- el.suggestedSelector,
559
- el.confidence,
560
- el.interactable ? 1 : 0
561
- );
562
- }
563
- });
564
- insertElements(elements);
565
-
566
- return {
567
- analysis: analysisText,
568
- elements,
569
- cached: false,
570
- latency_ms: latencyMs,
571
- tokens_used: tokensUsed,
572
- cache_id: cacheId,
573
- };
574
- }
575
-
576
- // ═══════════════════════════════════════════════════════════════════════
577
- // Element search
578
- // ═══════════════════════════════════════════════════════════════════════
579
-
580
- function findElement(siteId, url, { description, type, label } = {}) {
581
- let candidates;
582
- if (type) {
583
- candidates = stmts.searchElementsByType.all(siteId, type);
584
- } else {
585
- candidates = stmts.searchElements.all(siteId);
586
- }
587
-
588
- if (url) {
589
- const cacheIdsForUrl = db.prepare(
590
- `SELECT id FROM vision_cache WHERE site_id = ? AND url = ?`
591
- ).all(siteId, url).map(r => r.id);
592
-
593
- if (cacheIdsForUrl.length > 0) {
594
- const urlSet = new Set(cacheIdsForUrl);
595
- candidates = candidates.filter(el => urlSet.has(el.cache_id));
596
- }
597
- }
598
-
599
- if (label) {
600
- const lowerLabel = label.toLowerCase();
601
- candidates = candidates.filter(el =>
602
- el.label && el.label.toLowerCase().includes(lowerLabel)
603
- );
604
- }
605
-
606
- if (description) {
607
- const terms = description.toLowerCase().split(/\s+/).filter(t => t.length > 1);
608
- candidates = candidates.map(el => {
609
- const text = `${el.label || ''} ${el.description || ''}`.toLowerCase();
610
- let matchCount = 0;
611
- for (const term of terms) {
612
- if (text.includes(term)) matchCount++;
613
- }
614
- const termScore = terms.length > 0 ? matchCount / terms.length : 0;
615
- const combinedScore = (el.confidence * 0.4) + (termScore * 0.6);
616
- return { ...el, _score: combinedScore };
617
- });
618
-
619
- candidates.sort((a, b) => b._score - a._score);
620
- candidates = candidates.filter(el => el._score > 0.1);
621
- }
622
-
623
- return candidates.slice(0, 20).map(el => {
624
- let boundingBox;
625
- try { boundingBox = JSON.parse(el.bounding_box || '{}'); } catch { boundingBox = {}; }
626
- return {
627
- id: el.id,
628
- cache_id: el.cache_id,
629
- element_type: el.element_type,
630
- label: el.label,
631
- description: el.description,
632
- bounding_box: boundingBox,
633
- suggested_selector: el.suggested_selector,
634
- confidence: el.confidence,
635
- interactable: !!el.interactable,
636
- _score: el._score || el.confidence,
637
- };
638
- });
639
- }
640
-
641
- // ═══════════════════════════════════════════════════════════════════════
642
- // Screenshot comparison
643
- // ═══════════════════════════════════════════════════════════════════════
644
-
645
- function compareScreenshots(siteId, url, screenshotAHash, screenshotBHash) {
646
- const cacheA = stmts.getCacheBySiteAndHash.get(siteId, screenshotAHash);
647
- const cacheB = stmts.getCacheBySiteAndHash.get(siteId, screenshotBHash);
648
-
649
- if (!cacheA || !cacheB) {
650
- return { error: 'One or both screenshots not found in cache', added: [], removed: [], changed: [], unchanged: [] };
651
- }
652
-
653
- let elementsA, elementsB;
654
- try { elementsA = JSON.parse(cacheA.elements_found || '[]'); } catch { elementsA = []; }
655
- try { elementsB = JSON.parse(cacheB.elements_found || '[]'); } catch { elementsB = []; }
656
-
657
- const makeKey = (el) => `${el.type || el.element_type}::${(el.label || '').toLowerCase()}`;
658
-
659
- const mapA = new Map();
660
- for (const el of elementsA) mapA.set(makeKey(el), el);
661
-
662
- const mapB = new Map();
663
- for (const el of elementsB) mapB.set(makeKey(el), el);
664
-
665
- const added = [];
666
- const removed = [];
667
- const changed = [];
668
- const unchanged = [];
669
-
670
- for (const [key, elB] of mapB) {
671
- if (!mapA.has(key)) {
672
- added.push(elB);
673
- } else {
674
- const elA = mapA.get(key);
675
- const posA = elA.position || elA.boundingBox || {};
676
- const posB = elB.position || elB.boundingBox || {};
677
- const moved = Math.abs((posA.x || 0) - (posB.x || 0)) > 10
678
- || Math.abs((posA.y || 0) - (posB.y || 0)) > 10
679
- || Math.abs((posA.width || 0) - (posB.width || 0)) > 10
680
- || Math.abs((posA.height || 0) - (posB.height || 0)) > 10;
681
- const descChanged = (elA.description || '') !== (elB.description || '');
682
-
683
- if (moved || descChanged) {
684
- changed.push({ before: elA, after: elB });
685
- } else {
686
- unchanged.push(elB);
687
- }
688
- }
689
- }
690
-
691
- for (const [key, elA] of mapA) {
692
- if (!mapB.has(key)) {
693
- removed.push(elA);
694
- }
695
- }
696
-
697
- return {
698
- added,
699
- removed,
700
- changed,
701
- unchanged,
702
- summary: {
703
- added_count: added.length,
704
- removed_count: removed.length,
705
- changed_count: changed.length,
706
- unchanged_count: unchanged.length,
707
- },
708
- };
709
- }
710
-
711
- // ═══════════════════════════════════════════════════════════════════════
712
- // Cache management
713
- // ═══════════════════════════════════════════════════════════════════════
714
-
715
- function getCacheStats(siteId) {
716
- const stats = stmts.cacheStats.get(siteId);
717
- const totalCached = stats.total_cached || 0;
718
- const activeCached = stats.active_cached || 0;
719
- const expired = stats.expired || 0;
720
- const hitRate = totalCached > 0 ? ((activeCached / totalCached) * 100).toFixed(1) : '0.0';
721
-
722
- return {
723
- total_cached: totalCached,
724
- active_cached: activeCached,
725
- expired,
726
- hit_rate_pct: parseFloat(hitRate),
727
- total_tokens_used: stats.total_tokens || 0,
728
- avg_latency_ms: Math.round(stats.avg_latency || 0),
729
- storage_estimate_bytes: stats.total_bytes || 0,
730
- };
731
- }
732
-
733
- function clearCache(siteId, { olderThan } = {}) {
734
- let deleted = 0;
735
-
736
- if (olderThan) {
737
- const cutoff = new Date(Date.now() - olderThan * 1000).toISOString();
738
- const result = stmts.deleteOldCache.run(siteId, cutoff);
739
- deleted = result.changes;
740
- } else {
741
- const result = stmts.deleteExpiredCache.run(siteId);
742
- deleted = result.changes;
743
- }
744
-
745
- const orphaned = stmts.deleteOrphanedElements.run();
746
- return { deleted, orphaned_elements_cleaned: orphaned.changes };
747
- }
748
-
749
- // ═══════════════════════════════════════════════════════════════════════
750
- // Supported models
751
- // ═══════════════════════════════════════════════════════════════════════
752
-
753
- function getSupportedModels() {
754
- return [
755
- {
756
- provider: 'local',
757
- models: [
758
- { id: 'moondream', name: 'Moondream', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
759
- { id: 'llava', name: 'LLaVA', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
760
- { id: 'llava:13b', name: 'LLaVA 13B', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '1920x1080', cost: 'free' },
761
- ],
762
- },
763
- {
764
- provider: 'ollama',
765
- models: [
766
- { id: 'moondream', name: 'Moondream (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
767
- { id: 'llava', name: 'LLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
768
- { id: 'bakllava', name: 'BakLLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1920x1080', cost: 'free' },
769
- ],
770
- },
771
- {
772
- provider: 'openai',
773
- models: [
774
- { id: 'gpt-4o', name: 'GPT-4o', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
775
- { id: 'gpt-4o-mini', name: 'GPT-4o Mini', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
776
- { id: 'gpt-4-turbo', name: 'GPT-4 Turbo', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
777
- ],
778
- },
779
- {
780
- provider: 'anthropic',
781
- models: [
782
- { id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
783
- { id: 'claude-3-5-sonnet-20241022', name: 'Claude 3.5 Sonnet', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
784
- { id: 'claude-3-haiku-20240307', name: 'Claude 3 Haiku', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
785
- ],
786
- },
787
- ];
788
- }
789
-
790
- // ═══════════════════════════════════════════════════════════════════════
791
- // Token estimation
792
- // ═══════════════════════════════════════════════════════════════════════
793
-
794
- function estimateTokens(imageBase64) {
795
- if (!imageBase64) return 0;
796
-
797
- const byteLength = Math.ceil(imageBase64.length * 0.75);
798
-
799
- let width = 1280;
800
- let height = 720;
801
- try {
802
- if (imageBase64.startsWith('/9j/')) {
803
- /* JPEG — use byte size heuristic */
804
- } else if (imageBase64.startsWith('iVBOR')) {
805
- const buf = Buffer.from(imageBase64.slice(0, 100), 'base64');
806
- if (buf.length >= 24) {
807
- width = buf.readUInt32BE(16);
808
- height = buf.readUInt32BE(20);
809
- }
810
- }
811
- } catch { /* use defaults */ }
812
-
813
- const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
814
- const highDetailTokens = 85 + (tiles * 170);
815
- const sizeBasedEstimate = Math.ceil(byteLength / 750);
816
-
817
- return Math.max(highDetailTokens, sizeBasedEstimate);
818
- }
819
-
820
- // ═══════════════════════════════════════════════════════════════════════
821
- // History
822
- // ═══════════════════════════════════════════════════════════════════════
823
-
824
- function getVisionHistory(siteId, { limit, url } = {}) {
825
- const max = limit || 50;
826
- let rows;
827
- if (url) {
828
- rows = stmts.visionHistoryByUrl.all(siteId, url, max);
829
- } else {
830
- rows = stmts.visionHistory.all(siteId, max);
831
- }
832
- return rows.map(row => {
833
- let elements = [];
834
- try { elements = JSON.parse(row.elements_found || '[]'); } catch { /* ignore */ }
835
- return {
836
- id: row.id,
837
- site_id: row.site_id,
838
- url: row.url,
839
- screenshot_hash: row.screenshot_hash,
840
- provider: row.provider,
841
- model: row.model,
842
- tokens_used: row.tokens_used,
843
- latency_ms: row.latency_ms,
844
- elements_count: elements.length,
845
- created_at: row.created_at,
846
- expires_at: row.expires_at,
847
- };
848
- });
849
- }
850
-
851
- // ═══════════════════════════════════════════════════════════════════════
852
- // LOCAL VISION ENGINE — Self-contained, no external API needed
853
- // DOM-based element detection, dark pattern analysis, ad detection,
854
- // layout analysis, accessibility audit — all computed locally.
855
- // ═══════════════════════════════════════════════════════════════════════
856
-
857
- const DARK_PATTERN_SIGNATURES = {
858
- confirmshaming: [
859
- /no,? i (don'?t|do not) (want|like|need|care)/i,
860
- /no thanks,? i (prefer|like|want) (to )?(pay|miss|stay|lose)/i,
861
- /i('?d rather|'?ll pass)/i, /keep (paying|losing|missing)/i,
862
- ],
863
- urgency: [
864
- /only \d+ left/i, /limited (time|offer|stock|availability)/i,
865
- /hurry|rush|act now|don'?t miss|last chance|expires? (soon|in|today)/i,
866
- /\d+ (people|others|users) (are )?(viewing|watching|buying)/i,
867
- /selling fast|almost gone/i,
868
- ],
869
- hiddenCosts: [/service fee|handling fee|processing fee|convenience fee/i, /additional charge|extra charge|booking fee/i],
870
- forcedContinuity: [/free trial.*(auto|automatic).*(renew|bill|charge)/i, /will be charged after/i, /cancel anytime.*(before|or)/i],
871
- sneakIntoBasket: [/added to (your )?cart|included (in|with) (your )?(order|purchase)/i, /protection plan|warranty|insurance/i],
872
- privacyZuckering: [/share (your )?(data|info|details|location|contacts)/i, /personalize/i],
873
- };
874
-
875
- const AD_CLASS_PATTERNS = [
876
- /\bad[s]?\b/i, /\badvert/i, /\bsponsor/i, /\bpromo(tion|ted)?\b/i,
877
- /\bbanner[\-_]?ad/i, /\bgoogle[\-_]?ad/i, /\bdfp[\-_]/i, /\badsense/i, /\btaboola/i, /\boutbrain/i,
878
- ];
879
-
880
- const AD_SIZES = [
881
- [728, 90], [300, 250], [336, 280], [160, 600], [320, 50], [970, 250], [300, 600],
882
- ];
883
-
884
- /**
885
- * Analyze a DOM snapshot locally — no external API calls.
886
- * @param {Array} domNodes - Flattened DOM nodes from the extraction script
887
- * @param {Object} viewport - { width, height }
888
- * @returns {Object} Full analysis result
889
- */
890
- function analyzeLocally(domNodes, viewport = { width: 1280, height: 720 }) {
891
- const elements = [];
892
- const darkPatterns = [];
893
- const adElements = [];
894
- const accessibilityIssues = [];
895
-
896
- // Layout detection
897
- const layoutRegions = [];
898
- const LAYOUT_SELECTORS = {
899
- header: ['header', '[role="banner"]'],
900
- navigation: ['nav', '[role="navigation"]'],
901
- main: ['main', '[role="main"]', 'article'],
902
- sidebar: ['aside', '[role="complementary"]'],
903
- footer: ['footer', '[role="contentinfo"]'],
904
- };
905
-
906
- for (const node of domNodes) {
907
- const tag = (node.tag || '').toLowerCase();
908
- const cls = (node.attributes?.class || '').toLowerCase();
909
- const id = (node.attributes?.id || '').toLowerCase();
910
- const role = (node.attributes?.role || '').toLowerCase();
911
- const text = (node.text || '').trim();
912
- const rect = node.rect || {};
913
-
914
- // ── Element detection ──
915
- let elType = null;
916
- let confidence = 0;
917
-
918
- if (tag === 'button' || role === 'button' || (tag === 'input' && ['submit', 'button'].includes(node.attributes?.type))) {
919
- elType = 'button'; confidence = 0.95;
920
- } else if (tag === 'a' && node.attributes?.href) {
921
- elType = 'link'; confidence = 0.9;
922
- } else if (['input', 'textarea'].includes(tag) || role === 'textbox') {
923
- elType = 'input'; confidence = 0.95;
924
- } else if (tag === 'select' || role === 'listbox' || role === 'combobox') {
925
- elType = 'dropdown'; confidence = 0.9;
926
- } else if (tag === 'form' || role === 'form') {
927
- elType = 'form'; confidence = 0.85;
928
- } else if (['img', 'picture', 'svg', 'video', 'canvas'].includes(tag) || role === 'img') {
929
- elType = 'image'; confidence = 0.8;
930
- } else if (['nav', 'header', 'footer'].includes(tag) || ['navigation', 'banner', 'contentinfo'].includes(role) || cls.includes('nav') || cls.includes('menu')) {
931
- elType = 'nav'; confidence = 0.75;
932
- } else if (cls.includes('btn') || cls.includes('button') || cls.includes('cta')) {
933
- elType = 'button'; confidence = 0.7;
934
- } else if (cls.includes('dropdown') || cls.includes('select')) {
935
- elType = 'dropdown'; confidence = 0.65;
936
- }
937
-
938
- if (elType) {
939
- elements.push({
940
- type: elType,
941
- label: (text || node.attributes?.placeholder || node.attributes?.['aria-label'] || node.attributes?.alt || '').slice(0, 200),
942
- description: `${tag} element${cls ? ' class=' + cls.slice(0, 80) : ''}`,
943
- boundingBox: { x: rect.x || 0, y: rect.y || 0, width: rect.width || 0, height: rect.height || 0 },
944
- suggestedSelector: node.selector || _buildFallbackSelector(node),
945
- confidence,
946
- interactable: ['button', 'link', 'input', 'dropdown', 'form'].includes(elType),
947
- });
948
- }
949
-
950
- // ── Dark pattern detection ──
951
- if (text.length > 5) {
952
- for (const [patternName, regexes] of Object.entries(DARK_PATTERN_SIGNATURES)) {
953
- for (const rx of regexes) {
954
- if (rx.test(text)) {
955
- darkPatterns.push({ type: patternName, text: text.slice(0, 200), selector: node.selector || '', severity: patternName === 'urgency' ? 'medium' : 'high', confidence: 0.85 });
956
- break;
957
- }
958
- }
959
- }
960
- }
961
-
962
- // Pre-checked upsell checkbox detection
963
- if (tag === 'input' && node.attributes?.type === 'checkbox' && node.attributes?.checked != null) {
964
- const lbl = text.toLowerCase();
965
- if (/newsletter|marketing|promo|share|partner|third.party|sms|offer/i.test(lbl)) {
966
- darkPatterns.push({ type: 'misdirection', text: `Pre-checked: "${text.slice(0, 100)}"`, selector: node.selector || '', severity: 'medium', confidence: 0.9 });
967
- }
968
- }
969
-
970
- // ── Ad detection ──
971
- let isAd = false;
972
- for (const rx of AD_CLASS_PATTERNS) {
973
- if (rx.test(cls) || rx.test(id)) { isAd = true; break; }
974
- }
975
- if (!isAd && rect.width && rect.height) {
976
- for (const [w, h] of AD_SIZES) {
977
- if (Math.abs(rect.width - w) < 10 && Math.abs(rect.height - h) < 10) { isAd = true; break; }
978
- }
979
- }
980
- if (!isAd && tag === 'iframe' && node.attributes?.src) {
981
- if (/doubleclick|googlesyndication|adnxs|criteo|taboola|outbrain/i.test(node.attributes.src)) isAd = true;
982
- }
983
- if (isAd) adElements.push({ tag, selector: node.selector || '', rect, reason: `class/id/size match` });
984
-
985
- // ── Accessibility ──
986
- if (tag === 'img' && !node.attributes?.alt) {
987
- accessibilityIssues.push({ type: 'missing-alt', severity: 'high', selector: node.selector || '' });
988
- }
989
- if (['button', 'a', 'input'].includes(tag) && rect.width > 0 && (rect.width < 44 || rect.height < 44)) {
990
- accessibilityIssues.push({ type: 'small-tap-target', severity: 'medium', selector: node.selector || '', size: `${rect.width}x${rect.height}` });
991
- }
992
- if (['input', 'select', 'textarea'].includes(tag) && !node.attributes?.['aria-label'] && !node.attributes?.['aria-labelledby'] && !node.attributes?.id) {
993
- accessibilityIssues.push({ type: 'missing-label', severity: 'high', selector: node.selector || '' });
994
- }
995
-
996
- // ── Layout regions ──
997
- for (const [regionName, selectors] of Object.entries(LAYOUT_SELECTORS)) {
998
- if (selectors.some(s => {
999
- if (s.startsWith('[role="')) return role === s.match(/\[role="(.+?)"\]/)?.[1];
1000
- return tag === s;
1001
- })) {
1002
- layoutRegions.push({ type: regionName, tag, rect, selector: node.selector || '' });
1003
- break;
1004
- }
1005
- }
1006
- }
1007
-
1008
- // Build analysis text (human-readable summary for caching)
1009
- const analysisText = JSON.stringify({
1010
- summary: {
1011
- totalElements: elements.length,
1012
- buttons: elements.filter(e => e.type === 'button').length,
1013
- links: elements.filter(e => e.type === 'link').length,
1014
- inputs: elements.filter(e => e.type === 'input').length,
1015
- forms: elements.filter(e => e.type === 'form').length,
1016
- darkPatterns: darkPatterns.length,
1017
- ads: adElements.length,
1018
- accessibilityIssues: accessibilityIssues.length,
1019
- },
1020
- elements,
1021
- darkPatterns,
1022
- ads: adElements,
1023
- accessibility: {
1024
- issues: accessibilityIssues,
1025
- score: Math.max(0, 100 - accessibilityIssues.length * 5),
1026
- },
1027
- layout: { regions: layoutRegions, columns: layoutRegions.filter(r => r.type === 'sidebar').length > 0 ? 2 : 1 },
1028
- });
1029
-
1030
- return {
1031
- text: analysisText,
1032
- tokens: 0, // Local analysis — no tokens used
1033
- elements,
1034
- darkPatterns,
1035
- ads: adElements,
1036
- accessibility: { issues: accessibilityIssues, score: Math.max(0, 100 - accessibilityIssues.length * 5) },
1037
- layout: { regions: layoutRegions },
1038
- };
1039
- }
1040
-
1041
- function _buildFallbackSelector(node) {
1042
- const tag = node.tag || 'div';
1043
- if (node.attributes?.id) return '#' + node.attributes.id;
1044
- let s = tag;
1045
- if (node.attributes?.class) {
1046
- const cls = node.attributes.class.trim().split(/\s+/).slice(0, 2).join('.');
1047
- if (cls) s += '.' + cls;
1048
- }
1049
- return s;
1050
- }
1051
-
1052
- /**
1053
- * DOM Extraction Script — inject into pages to capture DOM for local analysis.
1054
- * Returns minimal JSON with all interactive/layout elements + computed styles.
1055
- */
1056
- function getDomExtractionScript() {
1057
- return `(function(){
1058
- var MAX_D=8,INT=new Set(['a','button','input','select','textarea','details','summary','label']),
1059
- LAY=new Set(['header','nav','main','aside','footer','article','section','div','form']),
1060
- SKIP=new Set(['script','style','noscript','meta','link','br','hr']);
1061
- function ext(el,d){
1062
- if(d>MAX_D)return null;var t=el.tagName;if(!t)return null;t=t.toLowerCase();
1063
- if(SKIP.has(t))return null;var r=el.getBoundingClientRect();
1064
- if(r.width===0&&r.height===0&&!LAY.has(t)&&!INT.has(t))return null;
1065
- var cs=window.getComputedStyle(el);if(cs.display==='none'||cs.visibility==='hidden')return null;
1066
- var n={tag:t,text:(el.textContent||'').trim().substring(0,200),selector:sel(el),attributes:{},
1067
- rect:{x:Math.round(r.x),y:Math.round(r.y),width:Math.round(r.width),height:Math.round(r.height)},
1068
- visible:r.width>0&&r.height>0&&cs.opacity!=='0'};
1069
- ['id','class','href','src','alt','type','name','value','placeholder','role','aria-label',
1070
- 'aria-labelledby','aria-checked','data-action','checked','disabled'].forEach(function(a){
1071
- if(el.hasAttribute(a))n.attributes[a]=el.getAttribute(a);
1072
- });if(el.checked)n.attributes.checked='checked';
1073
- if(LAY.has(t)||INT.has(t)){n.children=[];var ch=Array.from(el.children);for(var j=0;j<ch.length;j++){var cn=ext(ch[j],d+1);if(cn)n.children.push(cn);}}
1074
- return n;
1075
- }
1076
- function sel(el){if(!el||!el.tagName)return'unknown';if(el.id)return'#'+CSS.escape(el.id);var p=[];var c=el;
1077
- for(var i=0;i<4&&c&&c!==document.body;i++){var s=c.tagName.toLowerCase();
1078
- if(c.id){p.unshift('#'+CSS.escape(c.id));break;}
1079
- if(c.className&&typeof c.className==='string'){var cl=c.className.trim().split(/\\s+/).slice(0,2).map(function(x){return'.'+CSS.escape(x);}).join('');if(cl)s+=cl;}
1080
- p.unshift(s);c=c.parentElement;}return p.join(' > ');}
1081
- function flat(n,r){if(!n)return;var ch=n.children;delete n.children;r.push(n);if(ch)ch.forEach(function(c){flat(c,r);});}
1082
- var root=ext(document.body,0);var f=[];flat(root,f);
1083
- return JSON.stringify({url:location.href,title:document.title,viewport:{width:innerWidth,height:innerHeight},dom:f,
1084
- meta:{lang:document.documentElement.lang||'',charset:document.characterSet}});
1085
- })();`;
1086
- }
1087
-
1088
- // ═══════════════════════════════════════════════════════════════════════
1089
- // Enhanced analyzeScreenshot — use local engine when provider is 'local'
1090
- // and DOM data is provided (no external API call needed)
1091
- // ═══════════════════════════════════════════════════════════════════════
1092
-
1093
- async function analyzePageDOM(siteId, { domSnapshot, url } = {}) {
1094
- if (!domSnapshot || !domSnapshot.dom) throw new Error('domSnapshot with dom array is required');
1095
-
1096
- const dataStr = JSON.stringify(domSnapshot.dom).slice(0, 2000);
1097
- const screenshotHash = crypto.createHash('sha256').update(dataStr).digest('hex');
1098
-
1099
- // Check cache
1100
- const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
1101
- if (cached) {
1102
- let elements = [];
1103
- try { elements = JSON.parse(cached.elements_found || '[]'); } catch {}
1104
- return { analysis: cached.analysis, elements, cached: true, latency_ms: cached.latency_ms, tokens_used: 0, cache_id: cached.id };
1105
- }
1106
-
1107
- const startTime = Date.now();
1108
- const result = analyzeLocally(domSnapshot.dom || [], domSnapshot.viewport);
1109
- const latencyMs = Date.now() - startTime;
1110
-
1111
- const cacheId = uuidv4();
1112
- const config = stmts.getConfig.get(siteId);
1113
- const cacheTtl = config?.cache_ttl || 300;
1114
- const expiresAt = new Date(Date.now() + cacheTtl * 1000).toISOString();
1115
-
1116
- stmts.insertCache.run(cacheId, siteId, url || domSnapshot.url || null, screenshotHash, result.text, JSON.stringify(result.elements), 'local', 'dom-engine', 0, latencyMs, expiresAt);
1117
-
1118
- const insertElements = db.transaction((elems) => {
1119
- for (const el of elems) {
1120
- stmts.insertElement.run(uuidv4(), cacheId, siteId, el.type, el.label, el.description, JSON.stringify(el.boundingBox), el.suggestedSelector, el.confidence, el.interactable ? 1 : 0);
1121
- }
1122
- });
1123
- insertElements(result.elements);
1124
-
1125
- return {
1126
- analysis: result.text,
1127
- elements: result.elements,
1128
- darkPatterns: result.darkPatterns,
1129
- ads: result.ads,
1130
- accessibility: result.accessibility,
1131
- layout: result.layout,
1132
- cached: false,
1133
- latency_ms: latencyMs,
1134
- tokens_used: 0,
1135
- cache_id: cacheId,
1136
- engine: 'local-dom',
1137
- };
1138
- }
1139
-
1140
- // ═══════════════════════════════════════════════════════════════════════
1141
- // Exports
1142
- // ═══════════════════════════════════════════════════════════════════════
1143
-
1144
- module.exports = {
1145
- configureVision,
1146
- getVisionConfig,
1147
- analyzeScreenshot,
1148
- analyzePageDOM,
1149
- analyzeLocally,
1150
- getDomExtractionScript,
1151
- buildVisionPrompt,
1152
- parseVisionResponse,
1153
- extractElementsFromAnalysis,
1154
- findElement,
1155
- compareScreenshots,
1156
- getCacheStats,
1157
- clearCache,
1158
- encryptApiKey,
1159
- decryptApiKey,
1160
- getSupportedModels,
1161
- estimateTokens,
1162
- getVisionHistory,
1163
- };
1
+ const { db } = require('../models/db');
2
+ const { randomUUID: uuidv4 } = require('crypto');
3
+ const crypto = require('crypto');
4
+
5
+ // ═══════════════════════════════════════════════════════════════════════
6
+ // Schema
7
+ // ═══════════════════════════════════════════════════════════════════════
8
+
9
+ db.exec(`
10
+ CREATE TABLE IF NOT EXISTS vision_configs (
11
+ id TEXT PRIMARY KEY,
12
+ site_id TEXT NOT NULL UNIQUE,
13
+ provider TEXT DEFAULT 'local' CHECK(provider IN ('local','openai','anthropic','ollama')),
14
+ model TEXT DEFAULT 'moondream',
15
+ endpoint TEXT,
16
+ api_key_encrypted TEXT,
17
+ max_resolution TEXT DEFAULT '1280x720',
18
+ cache_ttl INTEGER DEFAULT 300,
19
+ enabled INTEGER DEFAULT 1,
20
+ created_at TEXT DEFAULT (datetime('now')),
21
+ updated_at TEXT DEFAULT (datetime('now'))
22
+ );
23
+
24
+ CREATE TABLE IF NOT EXISTS vision_cache (
25
+ id TEXT PRIMARY KEY,
26
+ site_id TEXT,
27
+ url TEXT,
28
+ screenshot_hash TEXT,
29
+ analysis TEXT,
30
+ elements_found TEXT,
31
+ provider TEXT,
32
+ model TEXT,
33
+ tokens_used INTEGER,
34
+ latency_ms INTEGER,
35
+ created_at TEXT DEFAULT (datetime('now')),
36
+ expires_at TEXT
37
+ );
38
+
39
+ CREATE TABLE IF NOT EXISTS vision_elements (
40
+ id TEXT PRIMARY KEY,
41
+ cache_id TEXT,
42
+ site_id TEXT,
43
+ element_type TEXT CHECK(element_type IN ('button','input','link','text','image','form','nav','dropdown')),
44
+ label TEXT,
45
+ description TEXT,
46
+ bounding_box TEXT,
47
+ suggested_selector TEXT,
48
+ confidence REAL,
49
+ interactable INTEGER DEFAULT 0,
50
+ created_at TEXT DEFAULT (datetime('now')),
51
+ FOREIGN KEY (cache_id) REFERENCES vision_cache(id) ON DELETE CASCADE
52
+ );
53
+
54
+ CREATE INDEX IF NOT EXISTS idx_vision_configs_site ON vision_configs(site_id);
55
+ CREATE INDEX IF NOT EXISTS idx_vision_cache_site ON vision_cache(site_id);
56
+ CREATE INDEX IF NOT EXISTS idx_vision_cache_hash ON vision_cache(screenshot_hash);
57
+ CREATE INDEX IF NOT EXISTS idx_vision_cache_url ON vision_cache(url);
58
+ CREATE INDEX IF NOT EXISTS idx_vision_cache_expires ON vision_cache(expires_at);
59
+ CREATE INDEX IF NOT EXISTS idx_vision_elements_cache ON vision_elements(cache_id);
60
+ CREATE INDEX IF NOT EXISTS idx_vision_elements_site ON vision_elements(site_id);
61
+ CREATE INDEX IF NOT EXISTS idx_vision_elements_type ON vision_elements(element_type);
62
+ `);
63
+
64
+ // ═══════════════════════════════════════════════════════════════════════
65
+ // Encryption helpers (AES-256-GCM keyed from JWT_SECRET)
66
+ // ═══════════════════════════════════════════════════════════════════════
67
+
68
+ const ENC_PREFIX = 'venc:';
69
+
70
+ function _deriveKey() {
71
+ const secret = process.env.JWT_SECRET || 'wab-vision-fallback-key';
72
+ return crypto.createHash('sha256').update(secret).digest();
73
+ }
74
+
75
+ function encryptApiKey(plaintext) {
76
+ if (!plaintext) return null;
77
+ const key = _deriveKey();
78
+ const iv = crypto.randomBytes(12);
79
+ const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
80
+ const enc = Buffer.concat([cipher.update(String(plaintext), 'utf8'), cipher.final()]);
81
+ const tag = cipher.getAuthTag();
82
+ return `${ENC_PREFIX}${iv.toString('hex')}:${tag.toString('hex')}:${enc.toString('hex')}`;
83
+ }
84
+
85
+ function decryptApiKey(encrypted) {
86
+ if (!encrypted || typeof encrypted !== 'string' || !encrypted.startsWith(ENC_PREFIX)) return null;
87
+ const key = _deriveKey();
88
+ try {
89
+ const rest = encrypted.slice(ENC_PREFIX.length);
90
+ const [ivHex, tagHex, dataHex] = rest.split(':');
91
+ const iv = Buffer.from(ivHex, 'hex');
92
+ const tag = Buffer.from(tagHex, 'hex');
93
+ const data = Buffer.from(dataHex, 'hex');
94
+ const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
95
+ decipher.setAuthTag(tag);
96
+ return Buffer.concat([decipher.update(data), decipher.final()]).toString('utf8');
97
+ } catch (e) {
98
+ console.error('[Vision] Decrypt failed:', e.message);
99
+ return null;
100
+ }
101
+ }
102
+
103
+ // ═══════════════════════════════════════════════════════════════════════
104
+ // Prepared statements
105
+ // ═══════════════════════════════════════════════════════════════════════
106
+
107
+ const stmts = {
108
+ upsertConfig: db.prepare(`
109
+ INSERT INTO vision_configs (id, site_id, provider, model, endpoint, api_key_encrypted, max_resolution, cache_ttl, enabled)
110
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1)
111
+ ON CONFLICT(site_id) DO UPDATE SET
112
+ provider = excluded.provider,
113
+ model = excluded.model,
114
+ endpoint = excluded.endpoint,
115
+ api_key_encrypted = CASE WHEN excluded.api_key_encrypted IS NOT NULL THEN excluded.api_key_encrypted ELSE vision_configs.api_key_encrypted END,
116
+ max_resolution = excluded.max_resolution,
117
+ cache_ttl = excluded.cache_ttl,
118
+ updated_at = datetime('now')
119
+ `),
120
+ getConfig: db.prepare(`SELECT * FROM vision_configs WHERE site_id = ?`),
121
+ insertCache: db.prepare(`
122
+ INSERT INTO vision_cache (id, site_id, url, screenshot_hash, analysis, elements_found, provider, model, tokens_used, latency_ms, expires_at)
123
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
124
+ `),
125
+ getCacheByHash: db.prepare(`
126
+ SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? AND expires_at > datetime('now') ORDER BY created_at DESC LIMIT 1
127
+ `),
128
+ insertElement: db.prepare(`
129
+ INSERT INTO vision_elements (id, cache_id, site_id, element_type, label, description, bounding_box, suggested_selector, confidence, interactable)
130
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
131
+ `),
132
+ searchElements: db.prepare(`
133
+ SELECT * FROM vision_elements WHERE site_id = ? ORDER BY confidence DESC
134
+ `),
135
+ searchElementsByType: db.prepare(`
136
+ SELECT * FROM vision_elements WHERE site_id = ? AND element_type = ? ORDER BY confidence DESC
137
+ `),
138
+ getCacheById: db.prepare(`SELECT * FROM vision_cache WHERE id = ?`),
139
+ getCacheBySiteAndHash: db.prepare(`
140
+ SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? ORDER BY created_at DESC LIMIT 1
141
+ `),
142
+ getElementsByCache: db.prepare(`SELECT * FROM vision_elements WHERE cache_id = ?`),
143
+ cacheStats: db.prepare(`
144
+ SELECT
145
+ COUNT(*) as total_cached,
146
+ SUM(CASE WHEN expires_at > datetime('now') THEN 1 ELSE 0 END) as active_cached,
147
+ SUM(CASE WHEN expires_at <= datetime('now') THEN 1 ELSE 0 END) as expired,
148
+ SUM(tokens_used) as total_tokens,
149
+ AVG(latency_ms) as avg_latency,
150
+ SUM(LENGTH(analysis)) as total_bytes
151
+ FROM vision_cache WHERE site_id = ?
152
+ `),
153
+ deleteExpiredCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND expires_at <= datetime('now')`),
154
+ deleteOldCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND created_at < ?`),
155
+ deleteOrphanedElements: db.prepare(`DELETE FROM vision_elements WHERE cache_id NOT IN (SELECT id FROM vision_cache)`),
156
+ visionHistory: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? ORDER BY created_at DESC LIMIT ?`),
157
+ visionHistoryByUrl: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? AND url = ? ORDER BY created_at DESC LIMIT ?`),
158
+ };
159
+
160
+ // ═══════════════════════════════════════════════════════════════════════
161
+ // Provider API calls
162
+ // ═══════════════════════════════════════════════════════════════════════
163
+
164
+ const PROVIDER_TIMEOUT_MS = 60_000;
165
+
166
+ async function _callOllama(endpoint, model, base64Image, prompt) {
167
+ const url = `${endpoint.replace(/\/+$/, '')}/api/generate`;
168
+ const controller = new AbortController();
169
+ const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
170
+
171
+ try {
172
+ const res = await fetch(url, {
173
+ method: 'POST',
174
+ headers: { 'Content-Type': 'application/json' },
175
+ body: JSON.stringify({
176
+ model,
177
+ prompt,
178
+ images: [base64Image],
179
+ stream: false,
180
+ }),
181
+ signal: controller.signal,
182
+ });
183
+ if (!res.ok) {
184
+ const errBody = await res.text().catch(() => '');
185
+ throw new Error(`Ollama ${res.status}: ${errBody.slice(0, 300)}`);
186
+ }
187
+ const data = await res.json();
188
+ return {
189
+ text: data.response || '',
190
+ tokens: (data.prompt_eval_count || 0) + (data.eval_count || 0),
191
+ };
192
+ } finally {
193
+ clearTimeout(timer);
194
+ }
195
+ }
196
+
197
+ async function _callOpenAI(apiKey, model, base64Image, prompt) {
198
+ const controller = new AbortController();
199
+ const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
200
+
201
+ try {
202
+ const res = await fetch('https://api.openai.com/v1/chat/completions', {
203
+ method: 'POST',
204
+ headers: {
205
+ 'Content-Type': 'application/json',
206
+ Authorization: `Bearer ${apiKey}`,
207
+ },
208
+ body: JSON.stringify({
209
+ model: model || 'gpt-4o',
210
+ messages: [
211
+ {
212
+ role: 'user',
213
+ content: [
214
+ { type: 'text', text: prompt },
215
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${base64Image}`, detail: 'high' } },
216
+ ],
217
+ },
218
+ ],
219
+ max_tokens: 4096,
220
+ }),
221
+ signal: controller.signal,
222
+ });
223
+ if (!res.ok) {
224
+ const errBody = await res.text().catch(() => '');
225
+ throw new Error(`OpenAI ${res.status}: ${errBody.slice(0, 300)}`);
226
+ }
227
+ const data = await res.json();
228
+ const choice = data.choices && data.choices[0];
229
+ return {
230
+ text: choice ? choice.message.content : '',
231
+ tokens: data.usage ? data.usage.total_tokens : 0,
232
+ };
233
+ } finally {
234
+ clearTimeout(timer);
235
+ }
236
+ }
237
+
238
+ async function _callAnthropic(apiKey, model, base64Image, prompt) {
239
+ const controller = new AbortController();
240
+ const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
241
+
242
+ try {
243
+ const res = await fetch('https://api.anthropic.com/v1/messages', {
244
+ method: 'POST',
245
+ headers: {
246
+ 'Content-Type': 'application/json',
247
+ 'x-api-key': apiKey,
248
+ 'anthropic-version': '2023-06-01',
249
+ },
250
+ body: JSON.stringify({
251
+ model: model || 'claude-sonnet-4-20250514',
252
+ max_tokens: 4096,
253
+ messages: [
254
+ {
255
+ role: 'user',
256
+ content: [
257
+ { type: 'image', source: { type: 'base64', media_type: 'image/png', data: base64Image } },
258
+ { type: 'text', text: prompt },
259
+ ],
260
+ },
261
+ ],
262
+ }),
263
+ signal: controller.signal,
264
+ });
265
+ if (!res.ok) {
266
+ const errBody = await res.text().catch(() => '');
267
+ throw new Error(`Anthropic ${res.status}: ${errBody.slice(0, 300)}`);
268
+ }
269
+ const data = await res.json();
270
+ const textBlock = data.content && data.content.find(b => b.type === 'text');
271
+ const inputTokens = data.usage ? data.usage.input_tokens : 0;
272
+ const outputTokens = data.usage ? data.usage.output_tokens : 0;
273
+ return {
274
+ text: textBlock ? textBlock.text : '',
275
+ tokens: inputTokens + outputTokens,
276
+ };
277
+ } finally {
278
+ clearTimeout(timer);
279
+ }
280
+ }
281
+
282
+ // ═══════════════════════════════════════════════════════════════════════
283
+ // Prompt construction
284
+ // ═══════════════════════════════════════════════════════════════════════
285
+
286
+ function buildVisionPrompt(customPrompt) {
287
+ const base = `Analyze this screenshot of a web page. Identify every interactive UI element visible.
288
+
289
+ For each element, return a JSON object with these fields:
290
+ - "type": one of "button", "input", "link", "text", "image", "form", "nav", "dropdown"
291
+ - "label": the visible text or aria-label of the element
292
+ - "description": a short human-readable description of what the element does
293
+ - "position": {"x": approximate x coordinate in pixels, "y": approximate y coordinate in pixels, "width": approximate width, "height": approximate height}
294
+ - "selector": a suggested CSS selector that could target this element (e.g. "button.submit-btn", "#login-form input[type=email]")
295
+ - "interactable": true if the element can be clicked, typed into, or otherwise interacted with
296
+ - "confidence": a number from 0.0 to 1.0 indicating how confident you are in this identification
297
+
298
+ Return ONLY a JSON array of these objects wrapped in a markdown code block like:
299
+ \`\`\`json
300
+ [...]
301
+ \`\`\`
302
+
303
+ Be thorough — include buttons, links, inputs, dropdowns, navigation items, forms, and any other interactive elements.`;
304
+
305
+ if (customPrompt) {
306
+ return `${base}\n\nAdditional instructions: ${customPrompt}`;
307
+ }
308
+ return base;
309
+ }
310
+
311
+ // ═══════════════════════════════════════════════════════════════════════
312
+ // Response parsing
313
+ // ═══════════════════════════════════════════════════════════════════════
314
+
315
+ const VALID_ELEMENT_TYPES = new Set(['button', 'input', 'link', 'text', 'image', 'form', 'nav', 'dropdown']);
316
+
317
+ function parseVisionResponse(rawResponse, provider) {
318
+ if (!rawResponse || typeof rawResponse !== 'string') return [];
319
+
320
+ let elements = [];
321
+
322
+ const jsonBlockMatch = rawResponse.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
323
+ if (jsonBlockMatch) {
324
+ try {
325
+ const parsed = JSON.parse(jsonBlockMatch[1].trim());
326
+ if (Array.isArray(parsed)) elements = parsed;
327
+ else if (parsed && typeof parsed === 'object') elements = [parsed];
328
+ } catch { /* fall through to other strategies */ }
329
+ }
330
+
331
+ if (elements.length === 0) {
332
+ const arrayMatch = rawResponse.match(/\[\s*\{[\s\S]*?\}\s*\]/);
333
+ if (arrayMatch) {
334
+ try {
335
+ elements = JSON.parse(arrayMatch[0]);
336
+ } catch { /* fall through */ }
337
+ }
338
+ }
339
+
340
+ if (elements.length === 0) {
341
+ const objectMatches = [...rawResponse.matchAll(/\{[^{}]*"type"\s*:\s*"[^"]+?"[^{}]*\}/g)];
342
+ for (const m of objectMatches) {
343
+ try {
344
+ elements.push(JSON.parse(m[0]));
345
+ } catch { /* skip malformed */ }
346
+ }
347
+ }
348
+
349
+ return elements.map(el => _normalizeElement(el, provider)).filter(Boolean);
350
+ }
351
+
352
+ function _normalizeElement(raw, _provider) {
353
+ if (!raw || typeof raw !== 'object') return null;
354
+
355
+ let type = (raw.type || raw.element_type || 'text').toLowerCase().trim();
356
+ if (!VALID_ELEMENT_TYPES.has(type)) {
357
+ if (/btn|button|submit/i.test(type)) type = 'button';
358
+ else if (/input|field|text.?box|textarea/i.test(type)) type = 'input';
359
+ else if (/link|anchor|href/i.test(type)) type = 'link';
360
+ else if (/select|dropdown|combo/i.test(type)) type = 'dropdown';
361
+ else if (/img|icon|logo/i.test(type)) type = 'image';
362
+ else if (/form/i.test(type)) type = 'form';
363
+ else if (/nav|menu|sidebar/i.test(type)) type = 'nav';
364
+ else type = 'text';
365
+ }
366
+
367
+ const pos = raw.position || raw.bounding_box || raw.bbox || {};
368
+ const boundingBox = {
369
+ x: Number(pos.x) || 0,
370
+ y: Number(pos.y) || 0,
371
+ width: Number(pos.width || pos.w) || 0,
372
+ height: Number(pos.height || pos.h) || 0,
373
+ };
374
+
375
+ const confidence = Math.max(0, Math.min(1, Number(raw.confidence) || 0.5));
376
+
377
+ const interactable = raw.interactable != null
378
+ ? !!raw.interactable
379
+ : ['button', 'input', 'link', 'dropdown', 'form'].includes(type);
380
+
381
+ return {
382
+ type,
383
+ label: String(raw.label || raw.text || raw.name || '').slice(0, 500),
384
+ description: String(raw.description || raw.desc || '').slice(0, 1000),
385
+ boundingBox,
386
+ suggestedSelector: String(raw.selector || raw.suggested_selector || raw.css_selector || '').slice(0, 500),
387
+ confidence,
388
+ interactable,
389
+ };
390
+ }
391
+
392
+ function extractElementsFromAnalysis(analysisText) {
393
+ if (!analysisText || typeof analysisText !== 'string') return [];
394
+
395
+ const fromJson = parseVisionResponse(analysisText, 'unknown');
396
+ if (fromJson.length > 0) return fromJson;
397
+
398
+ const elements = [];
399
+ const lines = analysisText.split('\n');
400
+
401
+ const typeKeywords = {
402
+ button: /\b(button|btn|submit|click)\b/i,
403
+ input: /\b(input|field|text.?box|textarea|type|enter)\b/i,
404
+ link: /\b(link|anchor|href|url|navigate)\b/i,
405
+ dropdown: /\b(dropdown|select|combo|menu|option)\b/i,
406
+ image: /\b(image|img|icon|logo|picture|photo)\b/i,
407
+ form: /\b(form|login|signup|register|search.?bar)\b/i,
408
+ nav: /\b(nav|menu|sidebar|header|footer|tab)\b/i,
409
+ };
410
+
411
+ const bulletPattern = /^[\s]*[-*•]\s+(.+)/;
412
+
413
+ for (const line of lines) {
414
+ const match = line.match(bulletPattern);
415
+ if (!match) continue;
416
+ const content = match[1].trim();
417
+ if (content.length < 3) continue;
418
+
419
+ let type = 'text';
420
+ for (const [t, re] of Object.entries(typeKeywords)) {
421
+ if (re.test(content)) { type = t; break; }
422
+ }
423
+
424
+ const labelMatch = content.match(/["']([^"']+)["']/);
425
+ const label = labelMatch ? labelMatch[1] : content.slice(0, 80);
426
+
427
+ elements.push({
428
+ type,
429
+ label,
430
+ description: content.slice(0, 1000),
431
+ boundingBox: { x: 0, y: 0, width: 0, height: 0 },
432
+ suggestedSelector: '',
433
+ confidence: 0.3,
434
+ interactable: ['button', 'input', 'link', 'dropdown', 'form'].includes(type),
435
+ });
436
+ }
437
+
438
+ return elements;
439
+ }
440
+
441
+ // ═══════════════════════════════════════════════════════════════════════
442
+ // Core functions
443
+ // ═══════════════════════════════════════════════════════════════════════
444
+
445
+ function configureVision(siteId, { provider, model, endpoint, apiKey, maxResolution, cacheTtl } = {}) {
446
+ const id = uuidv4();
447
+ const encKey = apiKey ? encryptApiKey(apiKey) : null;
448
+
449
+ stmts.upsertConfig.run(
450
+ id,
451
+ siteId,
452
+ provider || 'local',
453
+ model || 'moondream',
454
+ endpoint || null,
455
+ encKey,
456
+ maxResolution || '1280x720',
457
+ cacheTtl != null ? cacheTtl : 300
458
+ );
459
+
460
+ const saved = stmts.getConfig.get(siteId);
461
+ return _maskConfig(saved);
462
+ }
463
+
464
+ function getVisionConfig(siteId) {
465
+ const row = stmts.getConfig.get(siteId);
466
+ if (!row) return null;
467
+ return _maskConfig(row);
468
+ }
469
+
470
+ function _maskConfig(row) {
471
+ if (!row) return null;
472
+ const out = { ...row };
473
+ if (out.api_key_encrypted) {
474
+ const decrypted = decryptApiKey(out.api_key_encrypted);
475
+ out.api_key_masked = decrypted
476
+ ? decrypted.slice(0, 4) + '****' + decrypted.slice(-4)
477
+ : '********';
478
+ } else {
479
+ out.api_key_masked = null;
480
+ }
481
+ delete out.api_key_encrypted;
482
+ return out;
483
+ }
484
+
485
+ async function analyzeScreenshot(siteId, { screenshotBase64, url, prompt } = {}) {
486
+ if (!screenshotBase64) throw new Error('screenshotBase64 is required');
487
+
488
+ const config = stmts.getConfig.get(siteId);
489
+ if (!config || !config.enabled) throw new Error('Vision not configured or disabled for this site');
490
+
491
+ const screenshotHash = crypto.createHash('sha256').update(screenshotBase64).digest('hex');
492
+
493
+ const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
494
+ if (cached) {
495
+ let elements = [];
496
+ try { elements = JSON.parse(cached.elements_found || '[]'); } catch { /* ignore */ }
497
+ return {
498
+ analysis: cached.analysis,
499
+ elements,
500
+ cached: true,
501
+ latency_ms: cached.latency_ms,
502
+ tokens_used: cached.tokens_used,
503
+ cache_id: cached.id,
504
+ };
505
+ }
506
+
507
+ const fullPrompt = buildVisionPrompt(prompt);
508
+ const apiKey = config.api_key_encrypted ? decryptApiKey(config.api_key_encrypted) : null;
509
+ const providerName = config.provider;
510
+ const modelName = config.model;
511
+
512
+ const startTime = Date.now();
513
+ let result;
514
+
515
+ try {
516
+ switch (providerName) {
517
+ case 'openai':
518
+ if (!apiKey) throw new Error('OpenAI API key not configured');
519
+ result = await _callOpenAI(apiKey, modelName, screenshotBase64, fullPrompt);
520
+ break;
521
+ case 'anthropic':
522
+ if (!apiKey) throw new Error('Anthropic API key not configured');
523
+ result = await _callAnthropic(apiKey, modelName, screenshotBase64, fullPrompt);
524
+ break;
525
+ case 'ollama':
526
+ case 'local':
527
+ default: {
528
+ const ep = config.endpoint || 'http://localhost:11434';
529
+ result = await _callOllama(ep, modelName, screenshotBase64, fullPrompt);
530
+ break;
531
+ }
532
+ }
533
+ } catch (err) {
534
+ if (err.name === 'AbortError') throw new Error(`Vision provider timed out after ${PROVIDER_TIMEOUT_MS}ms`);
535
+ throw err;
536
+ }
537
+
538
+ const latencyMs = Date.now() - startTime;
539
+ const analysisText = result.text;
540
+ const tokensUsed = result.tokens || 0;
541
+
542
+ const elements = parseVisionResponse(analysisText, providerName);
543
+ const cacheId = uuidv4();
544
+ const expiresAt = new Date(Date.now() + (config.cache_ttl || 300) * 1000).toISOString();
545
+
546
+ stmts.insertCache.run(
547
+ cacheId, siteId, url || null, screenshotHash,
548
+ analysisText, JSON.stringify(elements),
549
+ providerName, modelName, tokensUsed, latencyMs, expiresAt
550
+ );
551
+
552
+ const insertElements = db.transaction((elems) => {
553
+ for (const el of elems) {
554
+ stmts.insertElement.run(
555
+ uuidv4(), cacheId, siteId,
556
+ el.type, el.label, el.description,
557
+ JSON.stringify(el.boundingBox),
558
+ el.suggestedSelector,
559
+ el.confidence,
560
+ el.interactable ? 1 : 0
561
+ );
562
+ }
563
+ });
564
+ insertElements(elements);
565
+
566
+ return {
567
+ analysis: analysisText,
568
+ elements,
569
+ cached: false,
570
+ latency_ms: latencyMs,
571
+ tokens_used: tokensUsed,
572
+ cache_id: cacheId,
573
+ };
574
+ }
575
+
576
+ // ═══════════════════════════════════════════════════════════════════════
577
+ // Element search
578
+ // ═══════════════════════════════════════════════════════════════════════
579
+
580
+ function findElement(siteId, url, { description, type, label } = {}) {
581
+ let candidates;
582
+ if (type) {
583
+ candidates = stmts.searchElementsByType.all(siteId, type);
584
+ } else {
585
+ candidates = stmts.searchElements.all(siteId);
586
+ }
587
+
588
+ if (url) {
589
+ const cacheIdsForUrl = db.prepare(
590
+ `SELECT id FROM vision_cache WHERE site_id = ? AND url = ?`
591
+ ).all(siteId, url).map(r => r.id);
592
+
593
+ if (cacheIdsForUrl.length > 0) {
594
+ const urlSet = new Set(cacheIdsForUrl);
595
+ candidates = candidates.filter(el => urlSet.has(el.cache_id));
596
+ }
597
+ }
598
+
599
+ if (label) {
600
+ const lowerLabel = label.toLowerCase();
601
+ candidates = candidates.filter(el =>
602
+ el.label && el.label.toLowerCase().includes(lowerLabel)
603
+ );
604
+ }
605
+
606
+ if (description) {
607
+ const terms = description.toLowerCase().split(/\s+/).filter(t => t.length > 1);
608
+ candidates = candidates.map(el => {
609
+ const text = `${el.label || ''} ${el.description || ''}`.toLowerCase();
610
+ let matchCount = 0;
611
+ for (const term of terms) {
612
+ if (text.includes(term)) matchCount++;
613
+ }
614
+ const termScore = terms.length > 0 ? matchCount / terms.length : 0;
615
+ const combinedScore = (el.confidence * 0.4) + (termScore * 0.6);
616
+ return { ...el, _score: combinedScore };
617
+ });
618
+
619
+ candidates.sort((a, b) => b._score - a._score);
620
+ candidates = candidates.filter(el => el._score > 0.1);
621
+ }
622
+
623
+ return candidates.slice(0, 20).map(el => {
624
+ let boundingBox;
625
+ try { boundingBox = JSON.parse(el.bounding_box || '{}'); } catch { boundingBox = {}; }
626
+ return {
627
+ id: el.id,
628
+ cache_id: el.cache_id,
629
+ element_type: el.element_type,
630
+ label: el.label,
631
+ description: el.description,
632
+ bounding_box: boundingBox,
633
+ suggested_selector: el.suggested_selector,
634
+ confidence: el.confidence,
635
+ interactable: !!el.interactable,
636
+ _score: el._score || el.confidence,
637
+ };
638
+ });
639
+ }
640
+
641
+ // ═══════════════════════════════════════════════════════════════════════
642
+ // Screenshot comparison
643
+ // ═══════════════════════════════════════════════════════════════════════
644
+
645
+ function compareScreenshots(siteId, url, screenshotAHash, screenshotBHash) {
646
+ const cacheA = stmts.getCacheBySiteAndHash.get(siteId, screenshotAHash);
647
+ const cacheB = stmts.getCacheBySiteAndHash.get(siteId, screenshotBHash);
648
+
649
+ if (!cacheA || !cacheB) {
650
+ return { error: 'One or both screenshots not found in cache', added: [], removed: [], changed: [], unchanged: [] };
651
+ }
652
+
653
+ let elementsA, elementsB;
654
+ try { elementsA = JSON.parse(cacheA.elements_found || '[]'); } catch { elementsA = []; }
655
+ try { elementsB = JSON.parse(cacheB.elements_found || '[]'); } catch { elementsB = []; }
656
+
657
+ const makeKey = (el) => `${el.type || el.element_type}::${(el.label || '').toLowerCase()}`;
658
+
659
+ const mapA = new Map();
660
+ for (const el of elementsA) mapA.set(makeKey(el), el);
661
+
662
+ const mapB = new Map();
663
+ for (const el of elementsB) mapB.set(makeKey(el), el);
664
+
665
+ const added = [];
666
+ const removed = [];
667
+ const changed = [];
668
+ const unchanged = [];
669
+
670
+ for (const [key, elB] of mapB) {
671
+ if (!mapA.has(key)) {
672
+ added.push(elB);
673
+ } else {
674
+ const elA = mapA.get(key);
675
+ const posA = elA.position || elA.boundingBox || {};
676
+ const posB = elB.position || elB.boundingBox || {};
677
+ const moved = Math.abs((posA.x || 0) - (posB.x || 0)) > 10
678
+ || Math.abs((posA.y || 0) - (posB.y || 0)) > 10
679
+ || Math.abs((posA.width || 0) - (posB.width || 0)) > 10
680
+ || Math.abs((posA.height || 0) - (posB.height || 0)) > 10;
681
+ const descChanged = (elA.description || '') !== (elB.description || '');
682
+
683
+ if (moved || descChanged) {
684
+ changed.push({ before: elA, after: elB });
685
+ } else {
686
+ unchanged.push(elB);
687
+ }
688
+ }
689
+ }
690
+
691
+ for (const [key, elA] of mapA) {
692
+ if (!mapB.has(key)) {
693
+ removed.push(elA);
694
+ }
695
+ }
696
+
697
+ return {
698
+ added,
699
+ removed,
700
+ changed,
701
+ unchanged,
702
+ summary: {
703
+ added_count: added.length,
704
+ removed_count: removed.length,
705
+ changed_count: changed.length,
706
+ unchanged_count: unchanged.length,
707
+ },
708
+ };
709
+ }
710
+
711
+ // ═══════════════════════════════════════════════════════════════════════
712
+ // Cache management
713
+ // ═══════════════════════════════════════════════════════════════════════
714
+
715
+ function getCacheStats(siteId) {
716
+ const stats = stmts.cacheStats.get(siteId);
717
+ const totalCached = stats.total_cached || 0;
718
+ const activeCached = stats.active_cached || 0;
719
+ const expired = stats.expired || 0;
720
+ const hitRate = totalCached > 0 ? ((activeCached / totalCached) * 100).toFixed(1) : '0.0';
721
+
722
+ return {
723
+ total_cached: totalCached,
724
+ active_cached: activeCached,
725
+ expired,
726
+ hit_rate_pct: parseFloat(hitRate),
727
+ total_tokens_used: stats.total_tokens || 0,
728
+ avg_latency_ms: Math.round(stats.avg_latency || 0),
729
+ storage_estimate_bytes: stats.total_bytes || 0,
730
+ };
731
+ }
732
+
733
+ function clearCache(siteId, { olderThan } = {}) {
734
+ let deleted = 0;
735
+
736
+ if (olderThan) {
737
+ const cutoff = new Date(Date.now() - olderThan * 1000).toISOString();
738
+ const result = stmts.deleteOldCache.run(siteId, cutoff);
739
+ deleted = result.changes;
740
+ } else {
741
+ const result = stmts.deleteExpiredCache.run(siteId);
742
+ deleted = result.changes;
743
+ }
744
+
745
+ const orphaned = stmts.deleteOrphanedElements.run();
746
+ return { deleted, orphaned_elements_cleaned: orphaned.changes };
747
+ }
748
+
749
+ // ═══════════════════════════════════════════════════════════════════════
750
+ // Supported models
751
+ // ═══════════════════════════════════════════════════════════════════════
752
+
753
+ function getSupportedModels() {
754
+ return [
755
+ {
756
+ provider: 'local',
757
+ models: [
758
+ { id: 'moondream', name: 'Moondream', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
759
+ { id: 'llava', name: 'LLaVA', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
760
+ { id: 'llava:13b', name: 'LLaVA 13B', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '1920x1080', cost: 'free' },
761
+ ],
762
+ },
763
+ {
764
+ provider: 'ollama',
765
+ models: [
766
+ { id: 'moondream', name: 'Moondream (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
767
+ { id: 'llava', name: 'LLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
768
+ { id: 'bakllava', name: 'BakLLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1920x1080', cost: 'free' },
769
+ ],
770
+ },
771
+ {
772
+ provider: 'openai',
773
+ models: [
774
+ { id: 'gpt-4o', name: 'GPT-4o', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
775
+ { id: 'gpt-4o-mini', name: 'GPT-4o Mini', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
776
+ { id: 'gpt-4-turbo', name: 'GPT-4 Turbo', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
777
+ ],
778
+ },
779
+ {
780
+ provider: 'anthropic',
781
+ models: [
782
+ { id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
783
+ { id: 'claude-3-5-sonnet-20241022', name: 'Claude 3.5 Sonnet', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
784
+ { id: 'claude-3-haiku-20240307', name: 'Claude 3 Haiku', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
785
+ ],
786
+ },
787
+ ];
788
+ }
789
+
790
+ // ═══════════════════════════════════════════════════════════════════════
791
+ // Token estimation
792
+ // ═══════════════════════════════════════════════════════════════════════
793
+
794
+ function estimateTokens(imageBase64) {
795
+ if (!imageBase64) return 0;
796
+
797
+ const byteLength = Math.ceil(imageBase64.length * 0.75);
798
+
799
+ let width = 1280;
800
+ let height = 720;
801
+ try {
802
+ if (imageBase64.startsWith('/9j/')) {
803
+ /* JPEG — use byte size heuristic */
804
+ } else if (imageBase64.startsWith('iVBOR')) {
805
+ const buf = Buffer.from(imageBase64.slice(0, 100), 'base64');
806
+ if (buf.length >= 24) {
807
+ width = buf.readUInt32BE(16);
808
+ height = buf.readUInt32BE(20);
809
+ }
810
+ }
811
+ } catch { /* use defaults */ }
812
+
813
+ const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
814
+ const highDetailTokens = 85 + (tiles * 170);
815
+ const sizeBasedEstimate = Math.ceil(byteLength / 750);
816
+
817
+ return Math.max(highDetailTokens, sizeBasedEstimate);
818
+ }
819
+
820
+ // ═══════════════════════════════════════════════════════════════════════
821
+ // History
822
+ // ═══════════════════════════════════════════════════════════════════════
823
+
824
+ function getVisionHistory(siteId, { limit, url } = {}) {
825
+ const max = limit || 50;
826
+ let rows;
827
+ if (url) {
828
+ rows = stmts.visionHistoryByUrl.all(siteId, url, max);
829
+ } else {
830
+ rows = stmts.visionHistory.all(siteId, max);
831
+ }
832
+ return rows.map(row => {
833
+ let elements = [];
834
+ try { elements = JSON.parse(row.elements_found || '[]'); } catch { /* ignore */ }
835
+ return {
836
+ id: row.id,
837
+ site_id: row.site_id,
838
+ url: row.url,
839
+ screenshot_hash: row.screenshot_hash,
840
+ provider: row.provider,
841
+ model: row.model,
842
+ tokens_used: row.tokens_used,
843
+ latency_ms: row.latency_ms,
844
+ elements_count: elements.length,
845
+ created_at: row.created_at,
846
+ expires_at: row.expires_at,
847
+ };
848
+ });
849
+ }
850
+
851
+ // ═══════════════════════════════════════════════════════════════════════
852
+ // LOCAL VISION ENGINE — Self-contained, no external API needed
853
+ // DOM-based element detection, dark pattern analysis, ad detection,
854
+ // layout analysis, accessibility audit — all computed locally.
855
+ // ═══════════════════════════════════════════════════════════════════════
856
+
857
+ const DARK_PATTERN_SIGNATURES = {
858
+ confirmshaming: [
859
+ /no,? i (don'?t|do not) (want|like|need|care)/i,
860
+ /no thanks,? i (prefer|like|want) (to )?(pay|miss|stay|lose)/i,
861
+ /i('?d rather|'?ll pass)/i, /keep (paying|losing|missing)/i,
862
+ ],
863
+ urgency: [
864
+ /only \d+ left/i, /limited (time|offer|stock|availability)/i,
865
+ /hurry|rush|act now|don'?t miss|last chance|expires? (soon|in|today)/i,
866
+ /\d+ (people|others|users) (are )?(viewing|watching|buying)/i,
867
+ /selling fast|almost gone/i,
868
+ ],
869
+ hiddenCosts: [/service fee|handling fee|processing fee|convenience fee/i, /additional charge|extra charge|booking fee/i],
870
+ forcedContinuity: [/free trial.*(auto|automatic).*(renew|bill|charge)/i, /will be charged after/i, /cancel anytime.*(before|or)/i],
871
+ sneakIntoBasket: [/added to (your )?cart|included (in|with) (your )?(order|purchase)/i, /protection plan|warranty|insurance/i],
872
+ privacyZuckering: [/share (your )?(data|info|details|location|contacts)/i, /personalize/i],
873
+ };
874
+
875
+ const AD_CLASS_PATTERNS = [
876
+ /\bad[s]?\b/i, /\badvert/i, /\bsponsor/i, /\bpromo(tion|ted)?\b/i,
877
+ /\bbanner[\-_]?ad/i, /\bgoogle[\-_]?ad/i, /\bdfp[\-_]/i, /\badsense/i, /\btaboola/i, /\boutbrain/i,
878
+ ];
879
+
880
+ const AD_SIZES = [
881
+ [728, 90], [300, 250], [336, 280], [160, 600], [320, 50], [970, 250], [300, 600],
882
+ ];
883
+
884
+ /**
885
+ * Analyze a DOM snapshot locally — no external API calls.
886
+ * @param {Array} domNodes - Flattened DOM nodes from the extraction script
887
+ * @param {Object} viewport - { width, height }
888
+ * @returns {Object} Full analysis result
889
+ */
890
+ function analyzeLocally(domNodes, viewport = { width: 1280, height: 720 }) {
891
+ const elements = [];
892
+ const darkPatterns = [];
893
+ const adElements = [];
894
+ const accessibilityIssues = [];
895
+
896
+ // Layout detection
897
+ const layoutRegions = [];
898
+ const LAYOUT_SELECTORS = {
899
+ header: ['header', '[role="banner"]'],
900
+ navigation: ['nav', '[role="navigation"]'],
901
+ main: ['main', '[role="main"]', 'article'],
902
+ sidebar: ['aside', '[role="complementary"]'],
903
+ footer: ['footer', '[role="contentinfo"]'],
904
+ };
905
+
906
+ for (const node of domNodes) {
907
+ const tag = (node.tag || '').toLowerCase();
908
+ const cls = (node.attributes?.class || '').toLowerCase();
909
+ const id = (node.attributes?.id || '').toLowerCase();
910
+ const role = (node.attributes?.role || '').toLowerCase();
911
+ const text = (node.text || '').trim();
912
+ const rect = node.rect || {};
913
+
914
+ // ── Element detection ──
915
+ let elType = null;
916
+ let confidence = 0;
917
+
918
+ if (tag === 'button' || role === 'button' || (tag === 'input' && ['submit', 'button'].includes(node.attributes?.type))) {
919
+ elType = 'button'; confidence = 0.95;
920
+ } else if (tag === 'a' && node.attributes?.href) {
921
+ elType = 'link'; confidence = 0.9;
922
+ } else if (['input', 'textarea'].includes(tag) || role === 'textbox') {
923
+ elType = 'input'; confidence = 0.95;
924
+ } else if (tag === 'select' || role === 'listbox' || role === 'combobox') {
925
+ elType = 'dropdown'; confidence = 0.9;
926
+ } else if (tag === 'form' || role === 'form') {
927
+ elType = 'form'; confidence = 0.85;
928
+ } else if (['img', 'picture', 'svg', 'video', 'canvas'].includes(tag) || role === 'img') {
929
+ elType = 'image'; confidence = 0.8;
930
+ } else if (['nav', 'header', 'footer'].includes(tag) || ['navigation', 'banner', 'contentinfo'].includes(role) || cls.includes('nav') || cls.includes('menu')) {
931
+ elType = 'nav'; confidence = 0.75;
932
+ } else if (cls.includes('btn') || cls.includes('button') || cls.includes('cta')) {
933
+ elType = 'button'; confidence = 0.7;
934
+ } else if (cls.includes('dropdown') || cls.includes('select')) {
935
+ elType = 'dropdown'; confidence = 0.65;
936
+ }
937
+
938
+ if (elType) {
939
+ elements.push({
940
+ type: elType,
941
+ label: (text || node.attributes?.placeholder || node.attributes?.['aria-label'] || node.attributes?.alt || '').slice(0, 200),
942
+ description: `${tag} element${cls ? ' class=' + cls.slice(0, 80) : ''}`,
943
+ boundingBox: { x: rect.x || 0, y: rect.y || 0, width: rect.width || 0, height: rect.height || 0 },
944
+ suggestedSelector: node.selector || _buildFallbackSelector(node),
945
+ confidence,
946
+ interactable: ['button', 'link', 'input', 'dropdown', 'form'].includes(elType),
947
+ });
948
+ }
949
+
950
+ // ── Dark pattern detection ──
951
+ if (text.length > 5) {
952
+ for (const [patternName, regexes] of Object.entries(DARK_PATTERN_SIGNATURES)) {
953
+ for (const rx of regexes) {
954
+ if (rx.test(text)) {
955
+ darkPatterns.push({ type: patternName, text: text.slice(0, 200), selector: node.selector || '', severity: patternName === 'urgency' ? 'medium' : 'high', confidence: 0.85 });
956
+ break;
957
+ }
958
+ }
959
+ }
960
+ }
961
+
962
+ // Pre-checked upsell checkbox detection
963
+ if (tag === 'input' && node.attributes?.type === 'checkbox' && node.attributes?.checked != null) {
964
+ const lbl = text.toLowerCase();
965
+ if (/newsletter|marketing|promo|share|partner|third.party|sms|offer/i.test(lbl)) {
966
+ darkPatterns.push({ type: 'misdirection', text: `Pre-checked: "${text.slice(0, 100)}"`, selector: node.selector || '', severity: 'medium', confidence: 0.9 });
967
+ }
968
+ }
969
+
970
+ // ── Ad detection ──
971
+ let isAd = false;
972
+ for (const rx of AD_CLASS_PATTERNS) {
973
+ if (rx.test(cls) || rx.test(id)) { isAd = true; break; }
974
+ }
975
+ if (!isAd && rect.width && rect.height) {
976
+ for (const [w, h] of AD_SIZES) {
977
+ if (Math.abs(rect.width - w) < 10 && Math.abs(rect.height - h) < 10) { isAd = true; break; }
978
+ }
979
+ }
980
+ if (!isAd && tag === 'iframe' && node.attributes?.src) {
981
+ if (/doubleclick|googlesyndication|adnxs|criteo|taboola|outbrain/i.test(node.attributes.src)) isAd = true;
982
+ }
983
+ if (isAd) adElements.push({ tag, selector: node.selector || '', rect, reason: `class/id/size match` });
984
+
985
+ // ── Accessibility ──
986
+ if (tag === 'img' && !node.attributes?.alt) {
987
+ accessibilityIssues.push({ type: 'missing-alt', severity: 'high', selector: node.selector || '' });
988
+ }
989
+ if (['button', 'a', 'input'].includes(tag) && rect.width > 0 && (rect.width < 44 || rect.height < 44)) {
990
+ accessibilityIssues.push({ type: 'small-tap-target', severity: 'medium', selector: node.selector || '', size: `${rect.width}x${rect.height}` });
991
+ }
992
+ if (['input', 'select', 'textarea'].includes(tag) && !node.attributes?.['aria-label'] && !node.attributes?.['aria-labelledby'] && !node.attributes?.id) {
993
+ accessibilityIssues.push({ type: 'missing-label', severity: 'high', selector: node.selector || '' });
994
+ }
995
+
996
+ // ── Layout regions ──
997
+ for (const [regionName, selectors] of Object.entries(LAYOUT_SELECTORS)) {
998
+ if (selectors.some(s => {
999
+ if (s.startsWith('[role="')) return role === s.match(/\[role="(.+?)"\]/)?.[1];
1000
+ return tag === s;
1001
+ })) {
1002
+ layoutRegions.push({ type: regionName, tag, rect, selector: node.selector || '' });
1003
+ break;
1004
+ }
1005
+ }
1006
+ }
1007
+
1008
+ // Build analysis text (human-readable summary for caching)
1009
+ const analysisText = JSON.stringify({
1010
+ summary: {
1011
+ totalElements: elements.length,
1012
+ buttons: elements.filter(e => e.type === 'button').length,
1013
+ links: elements.filter(e => e.type === 'link').length,
1014
+ inputs: elements.filter(e => e.type === 'input').length,
1015
+ forms: elements.filter(e => e.type === 'form').length,
1016
+ darkPatterns: darkPatterns.length,
1017
+ ads: adElements.length,
1018
+ accessibilityIssues: accessibilityIssues.length,
1019
+ },
1020
+ elements,
1021
+ darkPatterns,
1022
+ ads: adElements,
1023
+ accessibility: {
1024
+ issues: accessibilityIssues,
1025
+ score: Math.max(0, 100 - accessibilityIssues.length * 5),
1026
+ },
1027
+ layout: { regions: layoutRegions, columns: layoutRegions.filter(r => r.type === 'sidebar').length > 0 ? 2 : 1 },
1028
+ });
1029
+
1030
+ return {
1031
+ text: analysisText,
1032
+ tokens: 0, // Local analysis — no tokens used
1033
+ elements,
1034
+ darkPatterns,
1035
+ ads: adElements,
1036
+ accessibility: { issues: accessibilityIssues, score: Math.max(0, 100 - accessibilityIssues.length * 5) },
1037
+ layout: { regions: layoutRegions },
1038
+ };
1039
+ }
1040
+
1041
+ function _buildFallbackSelector(node) {
1042
+ const tag = node.tag || 'div';
1043
+ if (node.attributes?.id) return '#' + node.attributes.id;
1044
+ let s = tag;
1045
+ if (node.attributes?.class) {
1046
+ const cls = node.attributes.class.trim().split(/\s+/).slice(0, 2).join('.');
1047
+ if (cls) s += '.' + cls;
1048
+ }
1049
+ return s;
1050
+ }
1051
+
1052
+ /**
1053
+ * DOM Extraction Script — inject into pages to capture DOM for local analysis.
1054
+ * Returns minimal JSON with all interactive/layout elements + computed styles.
1055
+ */
1056
+ function getDomExtractionScript() {
1057
+ return `(function(){
1058
+ var MAX_D=8,INT=new Set(['a','button','input','select','textarea','details','summary','label']),
1059
+ LAY=new Set(['header','nav','main','aside','footer','article','section','div','form']),
1060
+ SKIP=new Set(['script','style','noscript','meta','link','br','hr']);
1061
+ function ext(el,d){
1062
+ if(d>MAX_D)return null;var t=el.tagName;if(!t)return null;t=t.toLowerCase();
1063
+ if(SKIP.has(t))return null;var r=el.getBoundingClientRect();
1064
+ if(r.width===0&&r.height===0&&!LAY.has(t)&&!INT.has(t))return null;
1065
+ var cs=window.getComputedStyle(el);if(cs.display==='none'||cs.visibility==='hidden')return null;
1066
+ var n={tag:t,text:(el.textContent||'').trim().substring(0,200),selector:sel(el),attributes:{},
1067
+ rect:{x:Math.round(r.x),y:Math.round(r.y),width:Math.round(r.width),height:Math.round(r.height)},
1068
+ visible:r.width>0&&r.height>0&&cs.opacity!=='0'};
1069
+ ['id','class','href','src','alt','type','name','value','placeholder','role','aria-label',
1070
+ 'aria-labelledby','aria-checked','data-action','checked','disabled'].forEach(function(a){
1071
+ if(el.hasAttribute(a))n.attributes[a]=el.getAttribute(a);
1072
+ });if(el.checked)n.attributes.checked='checked';
1073
+ if(LAY.has(t)||INT.has(t)){n.children=[];var ch=Array.from(el.children);for(var j=0;j<ch.length;j++){var cn=ext(ch[j],d+1);if(cn)n.children.push(cn);}}
1074
+ return n;
1075
+ }
1076
+ function sel(el){if(!el||!el.tagName)return'unknown';if(el.id)return'#'+CSS.escape(el.id);var p=[];var c=el;
1077
+ for(var i=0;i<4&&c&&c!==document.body;i++){var s=c.tagName.toLowerCase();
1078
+ if(c.id){p.unshift('#'+CSS.escape(c.id));break;}
1079
+ if(c.className&&typeof c.className==='string'){var cl=c.className.trim().split(/\\s+/).slice(0,2).map(function(x){return'.'+CSS.escape(x);}).join('');if(cl)s+=cl;}
1080
+ p.unshift(s);c=c.parentElement;}return p.join(' > ');}
1081
+ function flat(n,r){if(!n)return;var ch=n.children;delete n.children;r.push(n);if(ch)ch.forEach(function(c){flat(c,r);});}
1082
+ var root=ext(document.body,0);var f=[];flat(root,f);
1083
+ return JSON.stringify({url:location.href,title:document.title,viewport:{width:innerWidth,height:innerHeight},dom:f,
1084
+ meta:{lang:document.documentElement.lang||'',charset:document.characterSet}});
1085
+ })();`;
1086
+ }
1087
+
1088
+ // ═══════════════════════════════════════════════════════════════════════
1089
+ // Enhanced analyzeScreenshot — use local engine when provider is 'local'
1090
+ // and DOM data is provided (no external API call needed)
1091
+ // ═══════════════════════════════════════════════════════════════════════
1092
+
1093
+ async function analyzePageDOM(siteId, { domSnapshot, url } = {}) {
1094
+ if (!domSnapshot || !domSnapshot.dom) throw new Error('domSnapshot with dom array is required');
1095
+
1096
+ const dataStr = JSON.stringify(domSnapshot.dom).slice(0, 2000);
1097
+ const screenshotHash = crypto.createHash('sha256').update(dataStr).digest('hex');
1098
+
1099
+ // Check cache
1100
+ const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
1101
+ if (cached) {
1102
+ let elements = [];
1103
+ try { elements = JSON.parse(cached.elements_found || '[]'); } catch {}
1104
+ return { analysis: cached.analysis, elements, cached: true, latency_ms: cached.latency_ms, tokens_used: 0, cache_id: cached.id };
1105
+ }
1106
+
1107
+ const startTime = Date.now();
1108
+ const result = analyzeLocally(domSnapshot.dom || [], domSnapshot.viewport);
1109
+ const latencyMs = Date.now() - startTime;
1110
+
1111
+ const cacheId = uuidv4();
1112
+ const config = stmts.getConfig.get(siteId);
1113
+ const cacheTtl = config?.cache_ttl || 300;
1114
+ const expiresAt = new Date(Date.now() + cacheTtl * 1000).toISOString();
1115
+
1116
+ stmts.insertCache.run(cacheId, siteId, url || domSnapshot.url || null, screenshotHash, result.text, JSON.stringify(result.elements), 'local', 'dom-engine', 0, latencyMs, expiresAt);
1117
+
1118
+ const insertElements = db.transaction((elems) => {
1119
+ for (const el of elems) {
1120
+ stmts.insertElement.run(uuidv4(), cacheId, siteId, el.type, el.label, el.description, JSON.stringify(el.boundingBox), el.suggestedSelector, el.confidence, el.interactable ? 1 : 0);
1121
+ }
1122
+ });
1123
+ insertElements(result.elements);
1124
+
1125
+ return {
1126
+ analysis: result.text,
1127
+ elements: result.elements,
1128
+ darkPatterns: result.darkPatterns,
1129
+ ads: result.ads,
1130
+ accessibility: result.accessibility,
1131
+ layout: result.layout,
1132
+ cached: false,
1133
+ latency_ms: latencyMs,
1134
+ tokens_used: 0,
1135
+ cache_id: cacheId,
1136
+ engine: 'local-dom',
1137
+ };
1138
+ }
1139
+
1140
+ // ═══════════════════════════════════════════════════════════════════════
1141
+ // Exports
1142
+ // ═══════════════════════════════════════════════════════════════════════
1143
+
1144
+ module.exports = {
1145
+ configureVision,
1146
+ getVisionConfig,
1147
+ analyzeScreenshot,
1148
+ analyzePageDOM,
1149
+ analyzeLocally,
1150
+ getDomExtractionScript,
1151
+ buildVisionPrompt,
1152
+ parseVisionResponse,
1153
+ extractElementsFromAnalysis,
1154
+ findElement,
1155
+ compareScreenshots,
1156
+ getCacheStats,
1157
+ clearCache,
1158
+ encryptApiKey,
1159
+ decryptApiKey,
1160
+ getSupportedModels,
1161
+ estimateTokens,
1162
+ getVisionHistory,
1163
+ };