web-agent-bridge 2.3.0 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.ar.md +506 -31
- package/README.md +574 -47
- package/bin/agent-runner.js +10 -1
- package/package.json +12 -4
- package/public/agent-workspace.html +347 -0
- package/public/browser.html +484 -0
- package/public/commander-dashboard.html +243 -0
- package/public/css/agent-workspace.css +1713 -0
- package/public/css/premium.css +317 -317
- package/public/demo.html +259 -259
- package/public/index.html +738 -644
- package/public/js/agent-workspace.js +1740 -0
- package/public/mesh-dashboard.html +309 -382
- package/public/premium-dashboard.html +2487 -2487
- package/public/premium.html +791 -791
- package/public/script/wab.min.js +124 -87
- package/script/ai-agent-bridge.js +154 -84
- package/sdk/agent-mesh.js +287 -171
- package/sdk/commander.js +262 -0
- package/sdk/index.d.ts +83 -0
- package/sdk/index.js +374 -260
- package/sdk/package.json +1 -1
- package/server/config/secrets.js +13 -5
- package/server/index.js +191 -5
- package/server/middleware/adminAuth.js +6 -1
- package/server/middleware/auth.js +11 -2
- package/server/middleware/rateLimits.js +78 -2
- package/server/migrations/002_premium_features.sql +418 -418
- package/server/migrations/003_ads_integer_cents.sql +33 -0
- package/server/models/db.js +121 -1
- package/server/routes/admin-premium.js +671 -671
- package/server/routes/admin.js +16 -2
- package/server/routes/ads.js +130 -0
- package/server/routes/agent-workspace.js +378 -0
- package/server/routes/api.js +21 -2
- package/server/routes/auth.js +26 -6
- package/server/routes/commander.js +316 -0
- package/server/routes/mesh.js +370 -201
- package/server/routes/premium-v2.js +686 -686
- package/server/routes/premium.js +724 -724
- package/server/routes/sovereign.js +78 -0
- package/server/routes/universal.js +177 -0
- package/server/routes/wab-api.js +20 -5
- package/server/services/agent-chat.js +506 -0
- package/server/services/agent-learning.js +230 -77
- package/server/services/agent-memory.js +625 -625
- package/server/services/agent-mesh.js +260 -67
- package/server/services/agent-symphony.js +553 -517
- package/server/services/agent-tasks.js +1807 -0
- package/server/services/commander.js +738 -0
- package/server/services/edge-compute.js +440 -0
- package/server/services/fairness-engine.js +409 -0
- package/server/services/local-ai.js +389 -0
- package/server/services/plugins.js +771 -747
- package/server/services/price-intelligence.js +565 -0
- package/server/services/price-shield.js +1137 -0
- package/server/services/search-engine.js +357 -0
- package/server/services/security.js +513 -0
- package/server/services/self-healing.js +843 -843
- package/server/services/swarm.js +788 -788
- package/server/services/universal-scraper.js +661 -0
- package/server/services/vision.js +871 -871
- package/server/ws.js +61 -1
- package/public/admin/dashboard.html +0 -848
- package/public/admin/login.html +0 -84
- package/public/video/tutorial.mp4 +0 -0
|
@@ -1,871 +1,871 @@
|
|
|
1
|
-
const { db } = require('../models/db');
|
|
2
|
-
const { v4: uuidv4 } = require('uuid');
|
|
3
|
-
const crypto = require('crypto');
|
|
4
|
-
|
|
5
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
6
|
-
// Schema
|
|
7
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
8
|
-
|
|
9
|
-
db.exec(`
|
|
10
|
-
CREATE TABLE IF NOT EXISTS vision_configs (
|
|
11
|
-
id TEXT PRIMARY KEY,
|
|
12
|
-
site_id TEXT NOT NULL UNIQUE,
|
|
13
|
-
provider TEXT DEFAULT 'local' CHECK(provider IN ('local','openai','anthropic','ollama')),
|
|
14
|
-
model TEXT DEFAULT 'moondream',
|
|
15
|
-
endpoint TEXT,
|
|
16
|
-
api_key_encrypted TEXT,
|
|
17
|
-
max_resolution TEXT DEFAULT '1280x720',
|
|
18
|
-
cache_ttl INTEGER DEFAULT 300,
|
|
19
|
-
enabled INTEGER DEFAULT 1,
|
|
20
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
21
|
-
updated_at TEXT DEFAULT (datetime('now'))
|
|
22
|
-
);
|
|
23
|
-
|
|
24
|
-
CREATE TABLE IF NOT EXISTS vision_cache (
|
|
25
|
-
id TEXT PRIMARY KEY,
|
|
26
|
-
site_id TEXT,
|
|
27
|
-
url TEXT,
|
|
28
|
-
screenshot_hash TEXT,
|
|
29
|
-
analysis TEXT,
|
|
30
|
-
elements_found TEXT,
|
|
31
|
-
provider TEXT,
|
|
32
|
-
model TEXT,
|
|
33
|
-
tokens_used INTEGER,
|
|
34
|
-
latency_ms INTEGER,
|
|
35
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
36
|
-
expires_at TEXT
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
CREATE TABLE IF NOT EXISTS vision_elements (
|
|
40
|
-
id TEXT PRIMARY KEY,
|
|
41
|
-
cache_id TEXT,
|
|
42
|
-
site_id TEXT,
|
|
43
|
-
element_type TEXT CHECK(element_type IN ('button','input','link','text','image','form','nav','dropdown')),
|
|
44
|
-
label TEXT,
|
|
45
|
-
description TEXT,
|
|
46
|
-
bounding_box TEXT,
|
|
47
|
-
suggested_selector TEXT,
|
|
48
|
-
confidence REAL,
|
|
49
|
-
interactable INTEGER DEFAULT 0,
|
|
50
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
51
|
-
FOREIGN KEY (cache_id) REFERENCES vision_cache(id) ON DELETE CASCADE
|
|
52
|
-
);
|
|
53
|
-
|
|
54
|
-
CREATE INDEX IF NOT EXISTS idx_vision_configs_site ON vision_configs(site_id);
|
|
55
|
-
CREATE INDEX IF NOT EXISTS idx_vision_cache_site ON vision_cache(site_id);
|
|
56
|
-
CREATE INDEX IF NOT EXISTS idx_vision_cache_hash ON vision_cache(screenshot_hash);
|
|
57
|
-
CREATE INDEX IF NOT EXISTS idx_vision_cache_url ON vision_cache(url);
|
|
58
|
-
CREATE INDEX IF NOT EXISTS idx_vision_cache_expires ON vision_cache(expires_at);
|
|
59
|
-
CREATE INDEX IF NOT EXISTS idx_vision_elements_cache ON vision_elements(cache_id);
|
|
60
|
-
CREATE INDEX IF NOT EXISTS idx_vision_elements_site ON vision_elements(site_id);
|
|
61
|
-
CREATE INDEX IF NOT EXISTS idx_vision_elements_type ON vision_elements(element_type);
|
|
62
|
-
`);
|
|
63
|
-
|
|
64
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
65
|
-
// Encryption helpers (AES-256-GCM keyed from JWT_SECRET)
|
|
66
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
67
|
-
|
|
68
|
-
const ENC_PREFIX = 'venc:';
|
|
69
|
-
|
|
70
|
-
function _deriveKey() {
|
|
71
|
-
const secret = process.env.JWT_SECRET || 'wab-vision-fallback-key';
|
|
72
|
-
return crypto.createHash('sha256').update(secret).digest();
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
function encryptApiKey(plaintext) {
|
|
76
|
-
if (!plaintext) return null;
|
|
77
|
-
const key = _deriveKey();
|
|
78
|
-
const iv = crypto.randomBytes(12);
|
|
79
|
-
const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
|
|
80
|
-
const enc = Buffer.concat([cipher.update(String(plaintext), 'utf8'), cipher.final()]);
|
|
81
|
-
const tag = cipher.getAuthTag();
|
|
82
|
-
return `${ENC_PREFIX}${iv.toString('hex')}:${tag.toString('hex')}:${enc.toString('hex')}`;
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
function decryptApiKey(encrypted) {
|
|
86
|
-
if (!encrypted || typeof encrypted !== 'string' || !encrypted.startsWith(ENC_PREFIX)) return null;
|
|
87
|
-
const key = _deriveKey();
|
|
88
|
-
try {
|
|
89
|
-
const rest = encrypted.slice(ENC_PREFIX.length);
|
|
90
|
-
const [ivHex, tagHex, dataHex] = rest.split(':');
|
|
91
|
-
const iv = Buffer.from(ivHex, 'hex');
|
|
92
|
-
const tag = Buffer.from(tagHex, 'hex');
|
|
93
|
-
const data = Buffer.from(dataHex, 'hex');
|
|
94
|
-
const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
|
|
95
|
-
decipher.setAuthTag(tag);
|
|
96
|
-
return Buffer.concat([decipher.update(data), decipher.final()]).toString('utf8');
|
|
97
|
-
} catch (e) {
|
|
98
|
-
console.error('[Vision] Decrypt failed:', e.message);
|
|
99
|
-
return null;
|
|
100
|
-
}
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
104
|
-
// Prepared statements
|
|
105
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
106
|
-
|
|
107
|
-
const stmts = {
|
|
108
|
-
upsertConfig: db.prepare(`
|
|
109
|
-
INSERT INTO vision_configs (id, site_id, provider, model, endpoint, api_key_encrypted, max_resolution, cache_ttl, enabled)
|
|
110
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1)
|
|
111
|
-
ON CONFLICT(site_id) DO UPDATE SET
|
|
112
|
-
provider = excluded.provider,
|
|
113
|
-
model = excluded.model,
|
|
114
|
-
endpoint = excluded.endpoint,
|
|
115
|
-
api_key_encrypted = CASE WHEN excluded.api_key_encrypted IS NOT NULL THEN excluded.api_key_encrypted ELSE vision_configs.api_key_encrypted END,
|
|
116
|
-
max_resolution = excluded.max_resolution,
|
|
117
|
-
cache_ttl = excluded.cache_ttl,
|
|
118
|
-
updated_at = datetime('now')
|
|
119
|
-
`),
|
|
120
|
-
getConfig: db.prepare(`SELECT * FROM vision_configs WHERE site_id = ?`),
|
|
121
|
-
insertCache: db.prepare(`
|
|
122
|
-
INSERT INTO vision_cache (id, site_id, url, screenshot_hash, analysis, elements_found, provider, model, tokens_used, latency_ms, expires_at)
|
|
123
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
124
|
-
`),
|
|
125
|
-
getCacheByHash: db.prepare(`
|
|
126
|
-
SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? AND expires_at > datetime('now') ORDER BY created_at DESC LIMIT 1
|
|
127
|
-
`),
|
|
128
|
-
insertElement: db.prepare(`
|
|
129
|
-
INSERT INTO vision_elements (id, cache_id, site_id, element_type, label, description, bounding_box, suggested_selector, confidence, interactable)
|
|
130
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
131
|
-
`),
|
|
132
|
-
searchElements: db.prepare(`
|
|
133
|
-
SELECT * FROM vision_elements WHERE site_id = ? ORDER BY confidence DESC
|
|
134
|
-
`),
|
|
135
|
-
searchElementsByType: db.prepare(`
|
|
136
|
-
SELECT * FROM vision_elements WHERE site_id = ? AND element_type = ? ORDER BY confidence DESC
|
|
137
|
-
`),
|
|
138
|
-
getCacheById: db.prepare(`SELECT * FROM vision_cache WHERE id = ?`),
|
|
139
|
-
getCacheBySiteAndHash: db.prepare(`
|
|
140
|
-
SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? ORDER BY created_at DESC LIMIT 1
|
|
141
|
-
`),
|
|
142
|
-
getElementsByCache: db.prepare(`SELECT * FROM vision_elements WHERE cache_id = ?`),
|
|
143
|
-
cacheStats: db.prepare(`
|
|
144
|
-
SELECT
|
|
145
|
-
COUNT(*) as total_cached,
|
|
146
|
-
SUM(CASE WHEN expires_at > datetime('now') THEN 1 ELSE 0 END) as active_cached,
|
|
147
|
-
SUM(CASE WHEN expires_at <= datetime('now') THEN 1 ELSE 0 END) as expired,
|
|
148
|
-
SUM(tokens_used) as total_tokens,
|
|
149
|
-
AVG(latency_ms) as avg_latency,
|
|
150
|
-
SUM(LENGTH(analysis)) as total_bytes
|
|
151
|
-
FROM vision_cache WHERE site_id = ?
|
|
152
|
-
`),
|
|
153
|
-
deleteExpiredCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND expires_at <= datetime('now')`),
|
|
154
|
-
deleteOldCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND created_at < ?`),
|
|
155
|
-
deleteOrphanedElements: db.prepare(`DELETE FROM vision_elements WHERE cache_id NOT IN (SELECT id FROM vision_cache)`),
|
|
156
|
-
visionHistory: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? ORDER BY created_at DESC LIMIT ?`),
|
|
157
|
-
visionHistoryByUrl: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? AND url = ? ORDER BY created_at DESC LIMIT ?`),
|
|
158
|
-
};
|
|
159
|
-
|
|
160
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
161
|
-
// Provider API calls
|
|
162
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
163
|
-
|
|
164
|
-
const PROVIDER_TIMEOUT_MS = 60_000;
|
|
165
|
-
|
|
166
|
-
async function _callOllama(endpoint, model, base64Image, prompt) {
|
|
167
|
-
const url = `${endpoint.replace(/\/+$/, '')}/api/generate`;
|
|
168
|
-
const controller = new AbortController();
|
|
169
|
-
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
170
|
-
|
|
171
|
-
try {
|
|
172
|
-
const res = await fetch(url, {
|
|
173
|
-
method: 'POST',
|
|
174
|
-
headers: { 'Content-Type': 'application/json' },
|
|
175
|
-
body: JSON.stringify({
|
|
176
|
-
model,
|
|
177
|
-
prompt,
|
|
178
|
-
images: [base64Image],
|
|
179
|
-
stream: false,
|
|
180
|
-
}),
|
|
181
|
-
signal: controller.signal,
|
|
182
|
-
});
|
|
183
|
-
if (!res.ok) {
|
|
184
|
-
const errBody = await res.text().catch(() => '');
|
|
185
|
-
throw new Error(`Ollama ${res.status}: ${errBody.slice(0, 300)}`);
|
|
186
|
-
}
|
|
187
|
-
const data = await res.json();
|
|
188
|
-
return {
|
|
189
|
-
text: data.response || '',
|
|
190
|
-
tokens: (data.prompt_eval_count || 0) + (data.eval_count || 0),
|
|
191
|
-
};
|
|
192
|
-
} finally {
|
|
193
|
-
clearTimeout(timer);
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
async function _callOpenAI(apiKey, model, base64Image, prompt) {
|
|
198
|
-
const controller = new AbortController();
|
|
199
|
-
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
200
|
-
|
|
201
|
-
try {
|
|
202
|
-
const res = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
203
|
-
method: 'POST',
|
|
204
|
-
headers: {
|
|
205
|
-
'Content-Type': 'application/json',
|
|
206
|
-
Authorization: `Bearer ${apiKey}`,
|
|
207
|
-
},
|
|
208
|
-
body: JSON.stringify({
|
|
209
|
-
model: model || 'gpt-4o',
|
|
210
|
-
messages: [
|
|
211
|
-
{
|
|
212
|
-
role: 'user',
|
|
213
|
-
content: [
|
|
214
|
-
{ type: 'text', text: prompt },
|
|
215
|
-
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64Image}`, detail: 'high' } },
|
|
216
|
-
],
|
|
217
|
-
},
|
|
218
|
-
],
|
|
219
|
-
max_tokens: 4096,
|
|
220
|
-
}),
|
|
221
|
-
signal: controller.signal,
|
|
222
|
-
});
|
|
223
|
-
if (!res.ok) {
|
|
224
|
-
const errBody = await res.text().catch(() => '');
|
|
225
|
-
throw new Error(`OpenAI ${res.status}: ${errBody.slice(0, 300)}`);
|
|
226
|
-
}
|
|
227
|
-
const data = await res.json();
|
|
228
|
-
const choice = data.choices && data.choices[0];
|
|
229
|
-
return {
|
|
230
|
-
text: choice ? choice.message.content : '',
|
|
231
|
-
tokens: data.usage ? data.usage.total_tokens : 0,
|
|
232
|
-
};
|
|
233
|
-
} finally {
|
|
234
|
-
clearTimeout(timer);
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
async function _callAnthropic(apiKey, model, base64Image, prompt) {
|
|
239
|
-
const controller = new AbortController();
|
|
240
|
-
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
241
|
-
|
|
242
|
-
try {
|
|
243
|
-
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
|
244
|
-
method: 'POST',
|
|
245
|
-
headers: {
|
|
246
|
-
'Content-Type': 'application/json',
|
|
247
|
-
'x-api-key': apiKey,
|
|
248
|
-
'anthropic-version': '2023-06-01',
|
|
249
|
-
},
|
|
250
|
-
body: JSON.stringify({
|
|
251
|
-
model: model || 'claude-sonnet-4-20250514',
|
|
252
|
-
max_tokens: 4096,
|
|
253
|
-
messages: [
|
|
254
|
-
{
|
|
255
|
-
role: 'user',
|
|
256
|
-
content: [
|
|
257
|
-
{ type: 'image', source: { type: 'base64', media_type: 'image/png', data: base64Image } },
|
|
258
|
-
{ type: 'text', text: prompt },
|
|
259
|
-
],
|
|
260
|
-
},
|
|
261
|
-
],
|
|
262
|
-
}),
|
|
263
|
-
signal: controller.signal,
|
|
264
|
-
});
|
|
265
|
-
if (!res.ok) {
|
|
266
|
-
const errBody = await res.text().catch(() => '');
|
|
267
|
-
throw new Error(`Anthropic ${res.status}: ${errBody.slice(0, 300)}`);
|
|
268
|
-
}
|
|
269
|
-
const data = await res.json();
|
|
270
|
-
const textBlock = data.content && data.content.find(b => b.type === 'text');
|
|
271
|
-
const inputTokens = data.usage ? data.usage.input_tokens : 0;
|
|
272
|
-
const outputTokens = data.usage ? data.usage.output_tokens : 0;
|
|
273
|
-
return {
|
|
274
|
-
text: textBlock ? textBlock.text : '',
|
|
275
|
-
tokens: inputTokens + outputTokens,
|
|
276
|
-
};
|
|
277
|
-
} finally {
|
|
278
|
-
clearTimeout(timer);
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
283
|
-
// Prompt construction
|
|
284
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
285
|
-
|
|
286
|
-
function buildVisionPrompt(customPrompt) {
|
|
287
|
-
const base = `Analyze this screenshot of a web page. Identify every interactive UI element visible.
|
|
288
|
-
|
|
289
|
-
For each element, return a JSON object with these fields:
|
|
290
|
-
- "type": one of "button", "input", "link", "text", "image", "form", "nav", "dropdown"
|
|
291
|
-
- "label": the visible text or aria-label of the element
|
|
292
|
-
- "description": a short human-readable description of what the element does
|
|
293
|
-
- "position": {"x": approximate x coordinate in pixels, "y": approximate y coordinate in pixels, "width": approximate width, "height": approximate height}
|
|
294
|
-
- "selector": a suggested CSS selector that could target this element (e.g. "button.submit-btn", "#login-form input[type=email]")
|
|
295
|
-
- "interactable": true if the element can be clicked, typed into, or otherwise interacted with
|
|
296
|
-
- "confidence": a number from 0.0 to 1.0 indicating how confident you are in this identification
|
|
297
|
-
|
|
298
|
-
Return ONLY a JSON array of these objects wrapped in a markdown code block like:
|
|
299
|
-
\`\`\`json
|
|
300
|
-
[...]
|
|
301
|
-
\`\`\`
|
|
302
|
-
|
|
303
|
-
Be thorough — include buttons, links, inputs, dropdowns, navigation items, forms, and any other interactive elements.`;
|
|
304
|
-
|
|
305
|
-
if (customPrompt) {
|
|
306
|
-
return `${base}\n\nAdditional instructions: ${customPrompt}`;
|
|
307
|
-
}
|
|
308
|
-
return base;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
312
|
-
// Response parsing
|
|
313
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
314
|
-
|
|
315
|
-
const VALID_ELEMENT_TYPES = new Set(['button', 'input', 'link', 'text', 'image', 'form', 'nav', 'dropdown']);
|
|
316
|
-
|
|
317
|
-
function parseVisionResponse(rawResponse, provider) {
|
|
318
|
-
if (!rawResponse || typeof rawResponse !== 'string') return [];
|
|
319
|
-
|
|
320
|
-
let elements = [];
|
|
321
|
-
|
|
322
|
-
const jsonBlockMatch = rawResponse.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
323
|
-
if (jsonBlockMatch) {
|
|
324
|
-
try {
|
|
325
|
-
const parsed = JSON.parse(jsonBlockMatch[1].trim());
|
|
326
|
-
if (Array.isArray(parsed)) elements = parsed;
|
|
327
|
-
else if (parsed && typeof parsed === 'object') elements = [parsed];
|
|
328
|
-
} catch { /* fall through to other strategies */ }
|
|
329
|
-
}
|
|
330
|
-
|
|
331
|
-
if (elements.length === 0) {
|
|
332
|
-
const arrayMatch = rawResponse.match(/\[\s*\{[\s\S]*?\}\s*\]/);
|
|
333
|
-
if (arrayMatch) {
|
|
334
|
-
try {
|
|
335
|
-
elements = JSON.parse(arrayMatch[0]);
|
|
336
|
-
} catch { /* fall through */ }
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
if (elements.length === 0) {
|
|
341
|
-
const objectMatches = [...rawResponse.matchAll(/\{[^{}]*"type"\s*:\s*"[^"]+?"[^{}]*\}/g)];
|
|
342
|
-
for (const m of objectMatches) {
|
|
343
|
-
try {
|
|
344
|
-
elements.push(JSON.parse(m[0]));
|
|
345
|
-
} catch { /* skip malformed */ }
|
|
346
|
-
}
|
|
347
|
-
}
|
|
348
|
-
|
|
349
|
-
return elements.map(el => _normalizeElement(el, provider)).filter(Boolean);
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
function _normalizeElement(raw, _provider) {
|
|
353
|
-
if (!raw || typeof raw !== 'object') return null;
|
|
354
|
-
|
|
355
|
-
let type = (raw.type || raw.element_type || 'text').toLowerCase().trim();
|
|
356
|
-
if (!VALID_ELEMENT_TYPES.has(type)) {
|
|
357
|
-
if (/btn|button|submit/i.test(type)) type = 'button';
|
|
358
|
-
else if (/input|field|text.?box|textarea/i.test(type)) type = 'input';
|
|
359
|
-
else if (/link|anchor|href/i.test(type)) type = 'link';
|
|
360
|
-
else if (/select|dropdown|combo/i.test(type)) type = 'dropdown';
|
|
361
|
-
else if (/img|icon|logo/i.test(type)) type = 'image';
|
|
362
|
-
else if (/form/i.test(type)) type = 'form';
|
|
363
|
-
else if (/nav|menu|sidebar/i.test(type)) type = 'nav';
|
|
364
|
-
else type = 'text';
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
const pos = raw.position || raw.bounding_box || raw.bbox || {};
|
|
368
|
-
const boundingBox = {
|
|
369
|
-
x: Number(pos.x) || 0,
|
|
370
|
-
y: Number(pos.y) || 0,
|
|
371
|
-
width: Number(pos.width || pos.w) || 0,
|
|
372
|
-
height: Number(pos.height || pos.h) || 0,
|
|
373
|
-
};
|
|
374
|
-
|
|
375
|
-
const confidence = Math.max(0, Math.min(1, Number(raw.confidence) || 0.5));
|
|
376
|
-
|
|
377
|
-
const interactable = raw.interactable != null
|
|
378
|
-
? !!raw.interactable
|
|
379
|
-
: ['button', 'input', 'link', 'dropdown', 'form'].includes(type);
|
|
380
|
-
|
|
381
|
-
return {
|
|
382
|
-
type,
|
|
383
|
-
label: String(raw.label || raw.text || raw.name || '').slice(0, 500),
|
|
384
|
-
description: String(raw.description || raw.desc || '').slice(0, 1000),
|
|
385
|
-
boundingBox,
|
|
386
|
-
suggestedSelector: String(raw.selector || raw.suggested_selector || raw.css_selector || '').slice(0, 500),
|
|
387
|
-
confidence,
|
|
388
|
-
interactable,
|
|
389
|
-
};
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
function extractElementsFromAnalysis(analysisText) {
|
|
393
|
-
if (!analysisText || typeof analysisText !== 'string') return [];
|
|
394
|
-
|
|
395
|
-
const fromJson = parseVisionResponse(analysisText, 'unknown');
|
|
396
|
-
if (fromJson.length > 0) return fromJson;
|
|
397
|
-
|
|
398
|
-
const elements = [];
|
|
399
|
-
const lines = analysisText.split('\n');
|
|
400
|
-
|
|
401
|
-
const typeKeywords = {
|
|
402
|
-
button: /\b(button|btn|submit|click)\b/i,
|
|
403
|
-
input: /\b(input|field|text.?box|textarea|type|enter)\b/i,
|
|
404
|
-
link: /\b(link|anchor|href|url|navigate)\b/i,
|
|
405
|
-
dropdown: /\b(dropdown|select|combo|menu|option)\b/i,
|
|
406
|
-
image: /\b(image|img|icon|logo|picture|photo)\b/i,
|
|
407
|
-
form: /\b(form|login|signup|register|search.?bar)\b/i,
|
|
408
|
-
nav: /\b(nav|menu|sidebar|header|footer|tab)\b/i,
|
|
409
|
-
};
|
|
410
|
-
|
|
411
|
-
const bulletPattern = /^[\s]*[-*•]\s+(.+)/;
|
|
412
|
-
|
|
413
|
-
for (const line of lines) {
|
|
414
|
-
const match = line.match(bulletPattern);
|
|
415
|
-
if (!match) continue;
|
|
416
|
-
const content = match[1].trim();
|
|
417
|
-
if (content.length < 3) continue;
|
|
418
|
-
|
|
419
|
-
let type = 'text';
|
|
420
|
-
for (const [t, re] of Object.entries(typeKeywords)) {
|
|
421
|
-
if (re.test(content)) { type = t; break; }
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
const labelMatch = content.match(/["']([^"']+)["']/);
|
|
425
|
-
const label = labelMatch ? labelMatch[1] : content.slice(0, 80);
|
|
426
|
-
|
|
427
|
-
elements.push({
|
|
428
|
-
type,
|
|
429
|
-
label,
|
|
430
|
-
description: content.slice(0, 1000),
|
|
431
|
-
boundingBox: { x: 0, y: 0, width: 0, height: 0 },
|
|
432
|
-
suggestedSelector: '',
|
|
433
|
-
confidence: 0.3,
|
|
434
|
-
interactable: ['button', 'input', 'link', 'dropdown', 'form'].includes(type),
|
|
435
|
-
});
|
|
436
|
-
}
|
|
437
|
-
|
|
438
|
-
return elements;
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
442
|
-
// Core functions
|
|
443
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
444
|
-
|
|
445
|
-
function configureVision(siteId, { provider, model, endpoint, apiKey, maxResolution, cacheTtl } = {}) {
|
|
446
|
-
const id = uuidv4();
|
|
447
|
-
const encKey = apiKey ? encryptApiKey(apiKey) : null;
|
|
448
|
-
|
|
449
|
-
stmts.upsertConfig.run(
|
|
450
|
-
id,
|
|
451
|
-
siteId,
|
|
452
|
-
provider || 'local',
|
|
453
|
-
model || 'moondream',
|
|
454
|
-
endpoint || null,
|
|
455
|
-
encKey,
|
|
456
|
-
maxResolution || '1280x720',
|
|
457
|
-
cacheTtl != null ? cacheTtl : 300
|
|
458
|
-
);
|
|
459
|
-
|
|
460
|
-
const saved = stmts.getConfig.get(siteId);
|
|
461
|
-
return _maskConfig(saved);
|
|
462
|
-
}
|
|
463
|
-
|
|
464
|
-
function getVisionConfig(siteId) {
|
|
465
|
-
const row = stmts.getConfig.get(siteId);
|
|
466
|
-
if (!row) return null;
|
|
467
|
-
return _maskConfig(row);
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
function _maskConfig(row) {
|
|
471
|
-
if (!row) return null;
|
|
472
|
-
const out = { ...row };
|
|
473
|
-
if (out.api_key_encrypted) {
|
|
474
|
-
const decrypted = decryptApiKey(out.api_key_encrypted);
|
|
475
|
-
out.api_key_masked = decrypted
|
|
476
|
-
? decrypted.slice(0, 4) + '****' + decrypted.slice(-4)
|
|
477
|
-
: '********';
|
|
478
|
-
} else {
|
|
479
|
-
out.api_key_masked = null;
|
|
480
|
-
}
|
|
481
|
-
delete out.api_key_encrypted;
|
|
482
|
-
return out;
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
async function analyzeScreenshot(siteId, { screenshotBase64, url, prompt } = {}) {
|
|
486
|
-
if (!screenshotBase64) throw new Error('screenshotBase64 is required');
|
|
487
|
-
|
|
488
|
-
const config = stmts.getConfig.get(siteId);
|
|
489
|
-
if (!config || !config.enabled) throw new Error('Vision not configured or disabled for this site');
|
|
490
|
-
|
|
491
|
-
const screenshotHash = crypto.createHash('sha256').update(screenshotBase64).digest('hex');
|
|
492
|
-
|
|
493
|
-
const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
|
|
494
|
-
if (cached) {
|
|
495
|
-
let elements = [];
|
|
496
|
-
try { elements = JSON.parse(cached.elements_found || '[]'); } catch { /* ignore */ }
|
|
497
|
-
return {
|
|
498
|
-
analysis: cached.analysis,
|
|
499
|
-
elements,
|
|
500
|
-
cached: true,
|
|
501
|
-
latency_ms: cached.latency_ms,
|
|
502
|
-
tokens_used: cached.tokens_used,
|
|
503
|
-
cache_id: cached.id,
|
|
504
|
-
};
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
const fullPrompt = buildVisionPrompt(prompt);
|
|
508
|
-
const apiKey = config.api_key_encrypted ? decryptApiKey(config.api_key_encrypted) : null;
|
|
509
|
-
const providerName = config.provider;
|
|
510
|
-
const modelName = config.model;
|
|
511
|
-
|
|
512
|
-
const startTime = Date.now();
|
|
513
|
-
let result;
|
|
514
|
-
|
|
515
|
-
try {
|
|
516
|
-
switch (providerName) {
|
|
517
|
-
case 'openai':
|
|
518
|
-
if (!apiKey) throw new Error('OpenAI API key not configured');
|
|
519
|
-
result = await _callOpenAI(apiKey, modelName, screenshotBase64, fullPrompt);
|
|
520
|
-
break;
|
|
521
|
-
case 'anthropic':
|
|
522
|
-
if (!apiKey) throw new Error('Anthropic API key not configured');
|
|
523
|
-
result = await _callAnthropic(apiKey, modelName, screenshotBase64, fullPrompt);
|
|
524
|
-
break;
|
|
525
|
-
case 'ollama':
|
|
526
|
-
case 'local':
|
|
527
|
-
default: {
|
|
528
|
-
const ep = config.endpoint || 'http://localhost:11434';
|
|
529
|
-
result = await _callOllama(ep, modelName, screenshotBase64, fullPrompt);
|
|
530
|
-
break;
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
} catch (err) {
|
|
534
|
-
if (err.name === 'AbortError') throw new Error(`Vision provider timed out after ${PROVIDER_TIMEOUT_MS}ms`);
|
|
535
|
-
throw err;
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
const latencyMs = Date.now() - startTime;
|
|
539
|
-
const analysisText = result.text;
|
|
540
|
-
const tokensUsed = result.tokens || 0;
|
|
541
|
-
|
|
542
|
-
const elements = parseVisionResponse(analysisText, providerName);
|
|
543
|
-
const cacheId = uuidv4();
|
|
544
|
-
const expiresAt = new Date(Date.now() + (config.cache_ttl || 300) * 1000).toISOString();
|
|
545
|
-
|
|
546
|
-
stmts.insertCache.run(
|
|
547
|
-
cacheId, siteId, url || null, screenshotHash,
|
|
548
|
-
analysisText, JSON.stringify(elements),
|
|
549
|
-
providerName, modelName, tokensUsed, latencyMs, expiresAt
|
|
550
|
-
);
|
|
551
|
-
|
|
552
|
-
const insertElements = db.transaction((elems) => {
|
|
553
|
-
for (const el of elems) {
|
|
554
|
-
stmts.insertElement.run(
|
|
555
|
-
uuidv4(), cacheId, siteId,
|
|
556
|
-
el.type, el.label, el.description,
|
|
557
|
-
JSON.stringify(el.boundingBox),
|
|
558
|
-
el.suggestedSelector,
|
|
559
|
-
el.confidence,
|
|
560
|
-
el.interactable ? 1 : 0
|
|
561
|
-
);
|
|
562
|
-
}
|
|
563
|
-
});
|
|
564
|
-
insertElements(elements);
|
|
565
|
-
|
|
566
|
-
return {
|
|
567
|
-
analysis: analysisText,
|
|
568
|
-
elements,
|
|
569
|
-
cached: false,
|
|
570
|
-
latency_ms: latencyMs,
|
|
571
|
-
tokens_used: tokensUsed,
|
|
572
|
-
cache_id: cacheId,
|
|
573
|
-
};
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
577
|
-
// Element search
|
|
578
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
579
|
-
|
|
580
|
-
function findElement(siteId, url, { description, type, label } = {}) {
|
|
581
|
-
let candidates;
|
|
582
|
-
if (type) {
|
|
583
|
-
candidates = stmts.searchElementsByType.all(siteId, type);
|
|
584
|
-
} else {
|
|
585
|
-
candidates = stmts.searchElements.all(siteId);
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
if (url) {
|
|
589
|
-
const cacheIdsForUrl = db.prepare(
|
|
590
|
-
`SELECT id FROM vision_cache WHERE site_id = ? AND url = ?`
|
|
591
|
-
).all(siteId, url).map(r => r.id);
|
|
592
|
-
|
|
593
|
-
if (cacheIdsForUrl.length > 0) {
|
|
594
|
-
const urlSet = new Set(cacheIdsForUrl);
|
|
595
|
-
candidates = candidates.filter(el => urlSet.has(el.cache_id));
|
|
596
|
-
}
|
|
597
|
-
}
|
|
598
|
-
|
|
599
|
-
if (label) {
|
|
600
|
-
const lowerLabel = label.toLowerCase();
|
|
601
|
-
candidates = candidates.filter(el =>
|
|
602
|
-
el.label && el.label.toLowerCase().includes(lowerLabel)
|
|
603
|
-
);
|
|
604
|
-
}
|
|
605
|
-
|
|
606
|
-
if (description) {
|
|
607
|
-
const terms = description.toLowerCase().split(/\s+/).filter(t => t.length > 1);
|
|
608
|
-
candidates = candidates.map(el => {
|
|
609
|
-
const text = `${el.label || ''} ${el.description || ''}`.toLowerCase();
|
|
610
|
-
let matchCount = 0;
|
|
611
|
-
for (const term of terms) {
|
|
612
|
-
if (text.includes(term)) matchCount++;
|
|
613
|
-
}
|
|
614
|
-
const termScore = terms.length > 0 ? matchCount / terms.length : 0;
|
|
615
|
-
const combinedScore = (el.confidence * 0.4) + (termScore * 0.6);
|
|
616
|
-
return { ...el, _score: combinedScore };
|
|
617
|
-
});
|
|
618
|
-
|
|
619
|
-
candidates.sort((a, b) => b._score - a._score);
|
|
620
|
-
candidates = candidates.filter(el => el._score > 0.1);
|
|
621
|
-
}
|
|
622
|
-
|
|
623
|
-
return candidates.slice(0, 20).map(el => {
|
|
624
|
-
let boundingBox;
|
|
625
|
-
try { boundingBox = JSON.parse(el.bounding_box || '{}'); } catch { boundingBox = {}; }
|
|
626
|
-
return {
|
|
627
|
-
id: el.id,
|
|
628
|
-
cache_id: el.cache_id,
|
|
629
|
-
element_type: el.element_type,
|
|
630
|
-
label: el.label,
|
|
631
|
-
description: el.description,
|
|
632
|
-
bounding_box: boundingBox,
|
|
633
|
-
suggested_selector: el.suggested_selector,
|
|
634
|
-
confidence: el.confidence,
|
|
635
|
-
interactable: !!el.interactable,
|
|
636
|
-
_score: el._score || el.confidence,
|
|
637
|
-
};
|
|
638
|
-
});
|
|
639
|
-
}
|
|
640
|
-
|
|
641
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
642
|
-
// Screenshot comparison
|
|
643
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
644
|
-
|
|
645
|
-
function compareScreenshots(siteId, url, screenshotAHash, screenshotBHash) {
|
|
646
|
-
const cacheA = stmts.getCacheBySiteAndHash.get(siteId, screenshotAHash);
|
|
647
|
-
const cacheB = stmts.getCacheBySiteAndHash.get(siteId, screenshotBHash);
|
|
648
|
-
|
|
649
|
-
if (!cacheA || !cacheB) {
|
|
650
|
-
return { error: 'One or both screenshots not found in cache', added: [], removed: [], changed: [], unchanged: [] };
|
|
651
|
-
}
|
|
652
|
-
|
|
653
|
-
let elementsA, elementsB;
|
|
654
|
-
try { elementsA = JSON.parse(cacheA.elements_found || '[]'); } catch { elementsA = []; }
|
|
655
|
-
try { elementsB = JSON.parse(cacheB.elements_found || '[]'); } catch { elementsB = []; }
|
|
656
|
-
|
|
657
|
-
const makeKey = (el) => `${el.type || el.element_type}::${(el.label || '').toLowerCase()}`;
|
|
658
|
-
|
|
659
|
-
const mapA = new Map();
|
|
660
|
-
for (const el of elementsA) mapA.set(makeKey(el), el);
|
|
661
|
-
|
|
662
|
-
const mapB = new Map();
|
|
663
|
-
for (const el of elementsB) mapB.set(makeKey(el), el);
|
|
664
|
-
|
|
665
|
-
const added = [];
|
|
666
|
-
const removed = [];
|
|
667
|
-
const changed = [];
|
|
668
|
-
const unchanged = [];
|
|
669
|
-
|
|
670
|
-
for (const [key, elB] of mapB) {
|
|
671
|
-
if (!mapA.has(key)) {
|
|
672
|
-
added.push(elB);
|
|
673
|
-
} else {
|
|
674
|
-
const elA = mapA.get(key);
|
|
675
|
-
const posA = elA.position || elA.boundingBox || {};
|
|
676
|
-
const posB = elB.position || elB.boundingBox || {};
|
|
677
|
-
const moved = Math.abs((posA.x || 0) - (posB.x || 0)) > 10
|
|
678
|
-
|| Math.abs((posA.y || 0) - (posB.y || 0)) > 10
|
|
679
|
-
|| Math.abs((posA.width || 0) - (posB.width || 0)) > 10
|
|
680
|
-
|| Math.abs((posA.height || 0) - (posB.height || 0)) > 10;
|
|
681
|
-
const descChanged = (elA.description || '') !== (elB.description || '');
|
|
682
|
-
|
|
683
|
-
if (moved || descChanged) {
|
|
684
|
-
changed.push({ before: elA, after: elB });
|
|
685
|
-
} else {
|
|
686
|
-
unchanged.push(elB);
|
|
687
|
-
}
|
|
688
|
-
}
|
|
689
|
-
}
|
|
690
|
-
|
|
691
|
-
for (const [key, elA] of mapA) {
|
|
692
|
-
if (!mapB.has(key)) {
|
|
693
|
-
removed.push(elA);
|
|
694
|
-
}
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
return {
|
|
698
|
-
added,
|
|
699
|
-
removed,
|
|
700
|
-
changed,
|
|
701
|
-
unchanged,
|
|
702
|
-
summary: {
|
|
703
|
-
added_count: added.length,
|
|
704
|
-
removed_count: removed.length,
|
|
705
|
-
changed_count: changed.length,
|
|
706
|
-
unchanged_count: unchanged.length,
|
|
707
|
-
},
|
|
708
|
-
};
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
712
|
-
// Cache management
|
|
713
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
714
|
-
|
|
715
|
-
function getCacheStats(siteId) {
|
|
716
|
-
const stats = stmts.cacheStats.get(siteId);
|
|
717
|
-
const totalCached = stats.total_cached || 0;
|
|
718
|
-
const activeCached = stats.active_cached || 0;
|
|
719
|
-
const expired = stats.expired || 0;
|
|
720
|
-
const hitRate = totalCached > 0 ? ((activeCached / totalCached) * 100).toFixed(1) : '0.0';
|
|
721
|
-
|
|
722
|
-
return {
|
|
723
|
-
total_cached: totalCached,
|
|
724
|
-
active_cached: activeCached,
|
|
725
|
-
expired,
|
|
726
|
-
hit_rate_pct: parseFloat(hitRate),
|
|
727
|
-
total_tokens_used: stats.total_tokens || 0,
|
|
728
|
-
avg_latency_ms: Math.round(stats.avg_latency || 0),
|
|
729
|
-
storage_estimate_bytes: stats.total_bytes || 0,
|
|
730
|
-
};
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
function clearCache(siteId, { olderThan } = {}) {
|
|
734
|
-
let deleted = 0;
|
|
735
|
-
|
|
736
|
-
if (olderThan) {
|
|
737
|
-
const cutoff = new Date(Date.now() - olderThan * 1000).toISOString();
|
|
738
|
-
const result = stmts.deleteOldCache.run(siteId, cutoff);
|
|
739
|
-
deleted = result.changes;
|
|
740
|
-
} else {
|
|
741
|
-
const result = stmts.deleteExpiredCache.run(siteId);
|
|
742
|
-
deleted = result.changes;
|
|
743
|
-
}
|
|
744
|
-
|
|
745
|
-
const orphaned = stmts.deleteOrphanedElements.run();
|
|
746
|
-
return { deleted, orphaned_elements_cleaned: orphaned.changes };
|
|
747
|
-
}
|
|
748
|
-
|
|
749
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
750
|
-
// Supported models
|
|
751
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
752
|
-
|
|
753
|
-
function getSupportedModels() {
|
|
754
|
-
return [
|
|
755
|
-
{
|
|
756
|
-
provider: 'local',
|
|
757
|
-
models: [
|
|
758
|
-
{ id: 'moondream', name: 'Moondream', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
|
|
759
|
-
{ id: 'llava', name: 'LLaVA', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
|
|
760
|
-
{ id: 'llava:13b', name: 'LLaVA 13B', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '1920x1080', cost: 'free' },
|
|
761
|
-
],
|
|
762
|
-
},
|
|
763
|
-
{
|
|
764
|
-
provider: 'ollama',
|
|
765
|
-
models: [
|
|
766
|
-
{ id: 'moondream', name: 'Moondream (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
|
|
767
|
-
{ id: 'llava', name: 'LLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
|
|
768
|
-
{ id: 'bakllava', name: 'BakLLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1920x1080', cost: 'free' },
|
|
769
|
-
],
|
|
770
|
-
},
|
|
771
|
-
{
|
|
772
|
-
provider: 'openai',
|
|
773
|
-
models: [
|
|
774
|
-
{ id: 'gpt-4o', name: 'GPT-4o', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
|
|
775
|
-
{ id: 'gpt-4o-mini', name: 'GPT-4o Mini', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
|
|
776
|
-
{ id: 'gpt-4-turbo', name: 'GPT-4 Turbo', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
|
|
777
|
-
],
|
|
778
|
-
},
|
|
779
|
-
{
|
|
780
|
-
provider: 'anthropic',
|
|
781
|
-
models: [
|
|
782
|
-
{ id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
|
|
783
|
-
{ id: 'claude-3-5-sonnet-20241022', name: 'Claude 3.5 Sonnet', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
|
|
784
|
-
{ id: 'claude-3-haiku-20240307', name: 'Claude 3 Haiku', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
|
|
785
|
-
],
|
|
786
|
-
},
|
|
787
|
-
];
|
|
788
|
-
}
|
|
789
|
-
|
|
790
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
791
|
-
// Token estimation
|
|
792
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
793
|
-
|
|
794
|
-
function estimateTokens(imageBase64) {
|
|
795
|
-
if (!imageBase64) return 0;
|
|
796
|
-
|
|
797
|
-
const byteLength = Math.ceil(imageBase64.length * 0.75);
|
|
798
|
-
|
|
799
|
-
let width = 1280;
|
|
800
|
-
let height = 720;
|
|
801
|
-
try {
|
|
802
|
-
if (imageBase64.startsWith('/9j/')) {
|
|
803
|
-
/* JPEG — use byte size heuristic */
|
|
804
|
-
} else if (imageBase64.startsWith('iVBOR')) {
|
|
805
|
-
const buf = Buffer.from(imageBase64.slice(0, 100), 'base64');
|
|
806
|
-
if (buf.length >= 24) {
|
|
807
|
-
width = buf.readUInt32BE(16);
|
|
808
|
-
height = buf.readUInt32BE(20);
|
|
809
|
-
}
|
|
810
|
-
}
|
|
811
|
-
} catch { /* use defaults */ }
|
|
812
|
-
|
|
813
|
-
const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
|
|
814
|
-
const highDetailTokens = 85 + (tiles * 170);
|
|
815
|
-
const sizeBasedEstimate = Math.ceil(byteLength / 750);
|
|
816
|
-
|
|
817
|
-
return Math.max(highDetailTokens, sizeBasedEstimate);
|
|
818
|
-
}
|
|
819
|
-
|
|
820
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
821
|
-
// History
|
|
822
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
823
|
-
|
|
824
|
-
function getVisionHistory(siteId, { limit, url } = {}) {
|
|
825
|
-
const max = limit || 50;
|
|
826
|
-
let rows;
|
|
827
|
-
if (url) {
|
|
828
|
-
rows = stmts.visionHistoryByUrl.all(siteId, url, max);
|
|
829
|
-
} else {
|
|
830
|
-
rows = stmts.visionHistory.all(siteId, max);
|
|
831
|
-
}
|
|
832
|
-
return rows.map(row => {
|
|
833
|
-
let elements = [];
|
|
834
|
-
try { elements = JSON.parse(row.elements_found || '[]'); } catch { /* ignore */ }
|
|
835
|
-
return {
|
|
836
|
-
id: row.id,
|
|
837
|
-
site_id: row.site_id,
|
|
838
|
-
url: row.url,
|
|
839
|
-
screenshot_hash: row.screenshot_hash,
|
|
840
|
-
provider: row.provider,
|
|
841
|
-
model: row.model,
|
|
842
|
-
tokens_used: row.tokens_used,
|
|
843
|
-
latency_ms: row.latency_ms,
|
|
844
|
-
elements_count: elements.length,
|
|
845
|
-
created_at: row.created_at,
|
|
846
|
-
expires_at: row.expires_at,
|
|
847
|
-
};
|
|
848
|
-
});
|
|
849
|
-
}
|
|
850
|
-
|
|
851
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
852
|
-
// Exports
|
|
853
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
854
|
-
|
|
855
|
-
module.exports = {
|
|
856
|
-
configureVision,
|
|
857
|
-
getVisionConfig,
|
|
858
|
-
analyzeScreenshot,
|
|
859
|
-
buildVisionPrompt,
|
|
860
|
-
parseVisionResponse,
|
|
861
|
-
extractElementsFromAnalysis,
|
|
862
|
-
findElement,
|
|
863
|
-
compareScreenshots,
|
|
864
|
-
getCacheStats,
|
|
865
|
-
clearCache,
|
|
866
|
-
encryptApiKey,
|
|
867
|
-
decryptApiKey,
|
|
868
|
-
getSupportedModels,
|
|
869
|
-
estimateTokens,
|
|
870
|
-
getVisionHistory,
|
|
871
|
-
};
|
|
1
|
+
const { db } = require('../models/db');
|
|
2
|
+
const { v4: uuidv4 } = require('uuid');
|
|
3
|
+
const crypto = require('crypto');
|
|
4
|
+
|
|
5
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
6
|
+
// Schema
|
|
7
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
8
|
+
|
|
9
|
+
db.exec(`
|
|
10
|
+
CREATE TABLE IF NOT EXISTS vision_configs (
|
|
11
|
+
id TEXT PRIMARY KEY,
|
|
12
|
+
site_id TEXT NOT NULL UNIQUE,
|
|
13
|
+
provider TEXT DEFAULT 'local' CHECK(provider IN ('local','openai','anthropic','ollama')),
|
|
14
|
+
model TEXT DEFAULT 'moondream',
|
|
15
|
+
endpoint TEXT,
|
|
16
|
+
api_key_encrypted TEXT,
|
|
17
|
+
max_resolution TEXT DEFAULT '1280x720',
|
|
18
|
+
cache_ttl INTEGER DEFAULT 300,
|
|
19
|
+
enabled INTEGER DEFAULT 1,
|
|
20
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
21
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
CREATE TABLE IF NOT EXISTS vision_cache (
|
|
25
|
+
id TEXT PRIMARY KEY,
|
|
26
|
+
site_id TEXT,
|
|
27
|
+
url TEXT,
|
|
28
|
+
screenshot_hash TEXT,
|
|
29
|
+
analysis TEXT,
|
|
30
|
+
elements_found TEXT,
|
|
31
|
+
provider TEXT,
|
|
32
|
+
model TEXT,
|
|
33
|
+
tokens_used INTEGER,
|
|
34
|
+
latency_ms INTEGER,
|
|
35
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
36
|
+
expires_at TEXT
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
CREATE TABLE IF NOT EXISTS vision_elements (
|
|
40
|
+
id TEXT PRIMARY KEY,
|
|
41
|
+
cache_id TEXT,
|
|
42
|
+
site_id TEXT,
|
|
43
|
+
element_type TEXT CHECK(element_type IN ('button','input','link','text','image','form','nav','dropdown')),
|
|
44
|
+
label TEXT,
|
|
45
|
+
description TEXT,
|
|
46
|
+
bounding_box TEXT,
|
|
47
|
+
suggested_selector TEXT,
|
|
48
|
+
confidence REAL,
|
|
49
|
+
interactable INTEGER DEFAULT 0,
|
|
50
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
51
|
+
FOREIGN KEY (cache_id) REFERENCES vision_cache(id) ON DELETE CASCADE
|
|
52
|
+
);
|
|
53
|
+
|
|
54
|
+
CREATE INDEX IF NOT EXISTS idx_vision_configs_site ON vision_configs(site_id);
|
|
55
|
+
CREATE INDEX IF NOT EXISTS idx_vision_cache_site ON vision_cache(site_id);
|
|
56
|
+
CREATE INDEX IF NOT EXISTS idx_vision_cache_hash ON vision_cache(screenshot_hash);
|
|
57
|
+
CREATE INDEX IF NOT EXISTS idx_vision_cache_url ON vision_cache(url);
|
|
58
|
+
CREATE INDEX IF NOT EXISTS idx_vision_cache_expires ON vision_cache(expires_at);
|
|
59
|
+
CREATE INDEX IF NOT EXISTS idx_vision_elements_cache ON vision_elements(cache_id);
|
|
60
|
+
CREATE INDEX IF NOT EXISTS idx_vision_elements_site ON vision_elements(site_id);
|
|
61
|
+
CREATE INDEX IF NOT EXISTS idx_vision_elements_type ON vision_elements(element_type);
|
|
62
|
+
`);
|
|
63
|
+
|
|
64
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
65
|
+
// Encryption helpers (AES-256-GCM keyed from JWT_SECRET)
|
|
66
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
67
|
+
|
|
68
|
+
const ENC_PREFIX = 'venc:';
|
|
69
|
+
|
|
70
|
+
function _deriveKey() {
|
|
71
|
+
const secret = process.env.JWT_SECRET || 'wab-vision-fallback-key';
|
|
72
|
+
return crypto.createHash('sha256').update(secret).digest();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function encryptApiKey(plaintext) {
|
|
76
|
+
if (!plaintext) return null;
|
|
77
|
+
const key = _deriveKey();
|
|
78
|
+
const iv = crypto.randomBytes(12);
|
|
79
|
+
const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
|
|
80
|
+
const enc = Buffer.concat([cipher.update(String(plaintext), 'utf8'), cipher.final()]);
|
|
81
|
+
const tag = cipher.getAuthTag();
|
|
82
|
+
return `${ENC_PREFIX}${iv.toString('hex')}:${tag.toString('hex')}:${enc.toString('hex')}`;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
function decryptApiKey(encrypted) {
|
|
86
|
+
if (!encrypted || typeof encrypted !== 'string' || !encrypted.startsWith(ENC_PREFIX)) return null;
|
|
87
|
+
const key = _deriveKey();
|
|
88
|
+
try {
|
|
89
|
+
const rest = encrypted.slice(ENC_PREFIX.length);
|
|
90
|
+
const [ivHex, tagHex, dataHex] = rest.split(':');
|
|
91
|
+
const iv = Buffer.from(ivHex, 'hex');
|
|
92
|
+
const tag = Buffer.from(tagHex, 'hex');
|
|
93
|
+
const data = Buffer.from(dataHex, 'hex');
|
|
94
|
+
const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
|
|
95
|
+
decipher.setAuthTag(tag);
|
|
96
|
+
return Buffer.concat([decipher.update(data), decipher.final()]).toString('utf8');
|
|
97
|
+
} catch (e) {
|
|
98
|
+
console.error('[Vision] Decrypt failed:', e.message);
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
104
|
+
// Prepared statements
|
|
105
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
106
|
+
|
|
107
|
+
const stmts = {
|
|
108
|
+
upsertConfig: db.prepare(`
|
|
109
|
+
INSERT INTO vision_configs (id, site_id, provider, model, endpoint, api_key_encrypted, max_resolution, cache_ttl, enabled)
|
|
110
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, 1)
|
|
111
|
+
ON CONFLICT(site_id) DO UPDATE SET
|
|
112
|
+
provider = excluded.provider,
|
|
113
|
+
model = excluded.model,
|
|
114
|
+
endpoint = excluded.endpoint,
|
|
115
|
+
api_key_encrypted = CASE WHEN excluded.api_key_encrypted IS NOT NULL THEN excluded.api_key_encrypted ELSE vision_configs.api_key_encrypted END,
|
|
116
|
+
max_resolution = excluded.max_resolution,
|
|
117
|
+
cache_ttl = excluded.cache_ttl,
|
|
118
|
+
updated_at = datetime('now')
|
|
119
|
+
`),
|
|
120
|
+
getConfig: db.prepare(`SELECT * FROM vision_configs WHERE site_id = ?`),
|
|
121
|
+
insertCache: db.prepare(`
|
|
122
|
+
INSERT INTO vision_cache (id, site_id, url, screenshot_hash, analysis, elements_found, provider, model, tokens_used, latency_ms, expires_at)
|
|
123
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
124
|
+
`),
|
|
125
|
+
getCacheByHash: db.prepare(`
|
|
126
|
+
SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? AND expires_at > datetime('now') ORDER BY created_at DESC LIMIT 1
|
|
127
|
+
`),
|
|
128
|
+
insertElement: db.prepare(`
|
|
129
|
+
INSERT INTO vision_elements (id, cache_id, site_id, element_type, label, description, bounding_box, suggested_selector, confidence, interactable)
|
|
130
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
131
|
+
`),
|
|
132
|
+
searchElements: db.prepare(`
|
|
133
|
+
SELECT * FROM vision_elements WHERE site_id = ? ORDER BY confidence DESC
|
|
134
|
+
`),
|
|
135
|
+
searchElementsByType: db.prepare(`
|
|
136
|
+
SELECT * FROM vision_elements WHERE site_id = ? AND element_type = ? ORDER BY confidence DESC
|
|
137
|
+
`),
|
|
138
|
+
getCacheById: db.prepare(`SELECT * FROM vision_cache WHERE id = ?`),
|
|
139
|
+
getCacheBySiteAndHash: db.prepare(`
|
|
140
|
+
SELECT * FROM vision_cache WHERE site_id = ? AND screenshot_hash = ? ORDER BY created_at DESC LIMIT 1
|
|
141
|
+
`),
|
|
142
|
+
getElementsByCache: db.prepare(`SELECT * FROM vision_elements WHERE cache_id = ?`),
|
|
143
|
+
cacheStats: db.prepare(`
|
|
144
|
+
SELECT
|
|
145
|
+
COUNT(*) as total_cached,
|
|
146
|
+
SUM(CASE WHEN expires_at > datetime('now') THEN 1 ELSE 0 END) as active_cached,
|
|
147
|
+
SUM(CASE WHEN expires_at <= datetime('now') THEN 1 ELSE 0 END) as expired,
|
|
148
|
+
SUM(tokens_used) as total_tokens,
|
|
149
|
+
AVG(latency_ms) as avg_latency,
|
|
150
|
+
SUM(LENGTH(analysis)) as total_bytes
|
|
151
|
+
FROM vision_cache WHERE site_id = ?
|
|
152
|
+
`),
|
|
153
|
+
deleteExpiredCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND expires_at <= datetime('now')`),
|
|
154
|
+
deleteOldCache: db.prepare(`DELETE FROM vision_cache WHERE site_id = ? AND created_at < ?`),
|
|
155
|
+
deleteOrphanedElements: db.prepare(`DELETE FROM vision_elements WHERE cache_id NOT IN (SELECT id FROM vision_cache)`),
|
|
156
|
+
visionHistory: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? ORDER BY created_at DESC LIMIT ?`),
|
|
157
|
+
visionHistoryByUrl: db.prepare(`SELECT * FROM vision_cache WHERE site_id = ? AND url = ? ORDER BY created_at DESC LIMIT ?`),
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
161
|
+
// Provider API calls
|
|
162
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
163
|
+
|
|
164
|
+
const PROVIDER_TIMEOUT_MS = 60_000;
|
|
165
|
+
|
|
166
|
+
async function _callOllama(endpoint, model, base64Image, prompt) {
|
|
167
|
+
const url = `${endpoint.replace(/\/+$/, '')}/api/generate`;
|
|
168
|
+
const controller = new AbortController();
|
|
169
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
const res = await fetch(url, {
|
|
173
|
+
method: 'POST',
|
|
174
|
+
headers: { 'Content-Type': 'application/json' },
|
|
175
|
+
body: JSON.stringify({
|
|
176
|
+
model,
|
|
177
|
+
prompt,
|
|
178
|
+
images: [base64Image],
|
|
179
|
+
stream: false,
|
|
180
|
+
}),
|
|
181
|
+
signal: controller.signal,
|
|
182
|
+
});
|
|
183
|
+
if (!res.ok) {
|
|
184
|
+
const errBody = await res.text().catch(() => '');
|
|
185
|
+
throw new Error(`Ollama ${res.status}: ${errBody.slice(0, 300)}`);
|
|
186
|
+
}
|
|
187
|
+
const data = await res.json();
|
|
188
|
+
return {
|
|
189
|
+
text: data.response || '',
|
|
190
|
+
tokens: (data.prompt_eval_count || 0) + (data.eval_count || 0),
|
|
191
|
+
};
|
|
192
|
+
} finally {
|
|
193
|
+
clearTimeout(timer);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
async function _callOpenAI(apiKey, model, base64Image, prompt) {
|
|
198
|
+
const controller = new AbortController();
|
|
199
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
200
|
+
|
|
201
|
+
try {
|
|
202
|
+
const res = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
203
|
+
method: 'POST',
|
|
204
|
+
headers: {
|
|
205
|
+
'Content-Type': 'application/json',
|
|
206
|
+
Authorization: `Bearer ${apiKey}`,
|
|
207
|
+
},
|
|
208
|
+
body: JSON.stringify({
|
|
209
|
+
model: model || 'gpt-4o',
|
|
210
|
+
messages: [
|
|
211
|
+
{
|
|
212
|
+
role: 'user',
|
|
213
|
+
content: [
|
|
214
|
+
{ type: 'text', text: prompt },
|
|
215
|
+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${base64Image}`, detail: 'high' } },
|
|
216
|
+
],
|
|
217
|
+
},
|
|
218
|
+
],
|
|
219
|
+
max_tokens: 4096,
|
|
220
|
+
}),
|
|
221
|
+
signal: controller.signal,
|
|
222
|
+
});
|
|
223
|
+
if (!res.ok) {
|
|
224
|
+
const errBody = await res.text().catch(() => '');
|
|
225
|
+
throw new Error(`OpenAI ${res.status}: ${errBody.slice(0, 300)}`);
|
|
226
|
+
}
|
|
227
|
+
const data = await res.json();
|
|
228
|
+
const choice = data.choices && data.choices[0];
|
|
229
|
+
return {
|
|
230
|
+
text: choice ? choice.message.content : '',
|
|
231
|
+
tokens: data.usage ? data.usage.total_tokens : 0,
|
|
232
|
+
};
|
|
233
|
+
} finally {
|
|
234
|
+
clearTimeout(timer);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async function _callAnthropic(apiKey, model, base64Image, prompt) {
|
|
239
|
+
const controller = new AbortController();
|
|
240
|
+
const timer = setTimeout(() => controller.abort(), PROVIDER_TIMEOUT_MS);
|
|
241
|
+
|
|
242
|
+
try {
|
|
243
|
+
const res = await fetch('https://api.anthropic.com/v1/messages', {
|
|
244
|
+
method: 'POST',
|
|
245
|
+
headers: {
|
|
246
|
+
'Content-Type': 'application/json',
|
|
247
|
+
'x-api-key': apiKey,
|
|
248
|
+
'anthropic-version': '2023-06-01',
|
|
249
|
+
},
|
|
250
|
+
body: JSON.stringify({
|
|
251
|
+
model: model || 'claude-sonnet-4-20250514',
|
|
252
|
+
max_tokens: 4096,
|
|
253
|
+
messages: [
|
|
254
|
+
{
|
|
255
|
+
role: 'user',
|
|
256
|
+
content: [
|
|
257
|
+
{ type: 'image', source: { type: 'base64', media_type: 'image/png', data: base64Image } },
|
|
258
|
+
{ type: 'text', text: prompt },
|
|
259
|
+
],
|
|
260
|
+
},
|
|
261
|
+
],
|
|
262
|
+
}),
|
|
263
|
+
signal: controller.signal,
|
|
264
|
+
});
|
|
265
|
+
if (!res.ok) {
|
|
266
|
+
const errBody = await res.text().catch(() => '');
|
|
267
|
+
throw new Error(`Anthropic ${res.status}: ${errBody.slice(0, 300)}`);
|
|
268
|
+
}
|
|
269
|
+
const data = await res.json();
|
|
270
|
+
const textBlock = data.content && data.content.find(b => b.type === 'text');
|
|
271
|
+
const inputTokens = data.usage ? data.usage.input_tokens : 0;
|
|
272
|
+
const outputTokens = data.usage ? data.usage.output_tokens : 0;
|
|
273
|
+
return {
|
|
274
|
+
text: textBlock ? textBlock.text : '',
|
|
275
|
+
tokens: inputTokens + outputTokens,
|
|
276
|
+
};
|
|
277
|
+
} finally {
|
|
278
|
+
clearTimeout(timer);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
283
|
+
// Prompt construction
|
|
284
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
285
|
+
|
|
286
|
+
function buildVisionPrompt(customPrompt) {
|
|
287
|
+
const base = `Analyze this screenshot of a web page. Identify every interactive UI element visible.
|
|
288
|
+
|
|
289
|
+
For each element, return a JSON object with these fields:
|
|
290
|
+
- "type": one of "button", "input", "link", "text", "image", "form", "nav", "dropdown"
|
|
291
|
+
- "label": the visible text or aria-label of the element
|
|
292
|
+
- "description": a short human-readable description of what the element does
|
|
293
|
+
- "position": {"x": approximate x coordinate in pixels, "y": approximate y coordinate in pixels, "width": approximate width, "height": approximate height}
|
|
294
|
+
- "selector": a suggested CSS selector that could target this element (e.g. "button.submit-btn", "#login-form input[type=email]")
|
|
295
|
+
- "interactable": true if the element can be clicked, typed into, or otherwise interacted with
|
|
296
|
+
- "confidence": a number from 0.0 to 1.0 indicating how confident you are in this identification
|
|
297
|
+
|
|
298
|
+
Return ONLY a JSON array of these objects wrapped in a markdown code block like:
|
|
299
|
+
\`\`\`json
|
|
300
|
+
[...]
|
|
301
|
+
\`\`\`
|
|
302
|
+
|
|
303
|
+
Be thorough — include buttons, links, inputs, dropdowns, navigation items, forms, and any other interactive elements.`;
|
|
304
|
+
|
|
305
|
+
if (customPrompt) {
|
|
306
|
+
return `${base}\n\nAdditional instructions: ${customPrompt}`;
|
|
307
|
+
}
|
|
308
|
+
return base;
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
312
|
+
// Response parsing
|
|
313
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
314
|
+
|
|
315
|
+
const VALID_ELEMENT_TYPES = new Set(['button', 'input', 'link', 'text', 'image', 'form', 'nav', 'dropdown']);
|
|
316
|
+
|
|
317
|
+
function parseVisionResponse(rawResponse, provider) {
|
|
318
|
+
if (!rawResponse || typeof rawResponse !== 'string') return [];
|
|
319
|
+
|
|
320
|
+
let elements = [];
|
|
321
|
+
|
|
322
|
+
const jsonBlockMatch = rawResponse.match(/```(?:json)?\s*\n?([\s\S]*?)```/);
|
|
323
|
+
if (jsonBlockMatch) {
|
|
324
|
+
try {
|
|
325
|
+
const parsed = JSON.parse(jsonBlockMatch[1].trim());
|
|
326
|
+
if (Array.isArray(parsed)) elements = parsed;
|
|
327
|
+
else if (parsed && typeof parsed === 'object') elements = [parsed];
|
|
328
|
+
} catch { /* fall through to other strategies */ }
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
if (elements.length === 0) {
|
|
332
|
+
const arrayMatch = rawResponse.match(/\[\s*\{[\s\S]*?\}\s*\]/);
|
|
333
|
+
if (arrayMatch) {
|
|
334
|
+
try {
|
|
335
|
+
elements = JSON.parse(arrayMatch[0]);
|
|
336
|
+
} catch { /* fall through */ }
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (elements.length === 0) {
|
|
341
|
+
const objectMatches = [...rawResponse.matchAll(/\{[^{}]*"type"\s*:\s*"[^"]+?"[^{}]*\}/g)];
|
|
342
|
+
for (const m of objectMatches) {
|
|
343
|
+
try {
|
|
344
|
+
elements.push(JSON.parse(m[0]));
|
|
345
|
+
} catch { /* skip malformed */ }
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
return elements.map(el => _normalizeElement(el, provider)).filter(Boolean);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
function _normalizeElement(raw, _provider) {
|
|
353
|
+
if (!raw || typeof raw !== 'object') return null;
|
|
354
|
+
|
|
355
|
+
let type = (raw.type || raw.element_type || 'text').toLowerCase().trim();
|
|
356
|
+
if (!VALID_ELEMENT_TYPES.has(type)) {
|
|
357
|
+
if (/btn|button|submit/i.test(type)) type = 'button';
|
|
358
|
+
else if (/input|field|text.?box|textarea/i.test(type)) type = 'input';
|
|
359
|
+
else if (/link|anchor|href/i.test(type)) type = 'link';
|
|
360
|
+
else if (/select|dropdown|combo/i.test(type)) type = 'dropdown';
|
|
361
|
+
else if (/img|icon|logo/i.test(type)) type = 'image';
|
|
362
|
+
else if (/form/i.test(type)) type = 'form';
|
|
363
|
+
else if (/nav|menu|sidebar/i.test(type)) type = 'nav';
|
|
364
|
+
else type = 'text';
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const pos = raw.position || raw.bounding_box || raw.bbox || {};
|
|
368
|
+
const boundingBox = {
|
|
369
|
+
x: Number(pos.x) || 0,
|
|
370
|
+
y: Number(pos.y) || 0,
|
|
371
|
+
width: Number(pos.width || pos.w) || 0,
|
|
372
|
+
height: Number(pos.height || pos.h) || 0,
|
|
373
|
+
};
|
|
374
|
+
|
|
375
|
+
const confidence = Math.max(0, Math.min(1, Number(raw.confidence) || 0.5));
|
|
376
|
+
|
|
377
|
+
const interactable = raw.interactable != null
|
|
378
|
+
? !!raw.interactable
|
|
379
|
+
: ['button', 'input', 'link', 'dropdown', 'form'].includes(type);
|
|
380
|
+
|
|
381
|
+
return {
|
|
382
|
+
type,
|
|
383
|
+
label: String(raw.label || raw.text || raw.name || '').slice(0, 500),
|
|
384
|
+
description: String(raw.description || raw.desc || '').slice(0, 1000),
|
|
385
|
+
boundingBox,
|
|
386
|
+
suggestedSelector: String(raw.selector || raw.suggested_selector || raw.css_selector || '').slice(0, 500),
|
|
387
|
+
confidence,
|
|
388
|
+
interactable,
|
|
389
|
+
};
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function extractElementsFromAnalysis(analysisText) {
|
|
393
|
+
if (!analysisText || typeof analysisText !== 'string') return [];
|
|
394
|
+
|
|
395
|
+
const fromJson = parseVisionResponse(analysisText, 'unknown');
|
|
396
|
+
if (fromJson.length > 0) return fromJson;
|
|
397
|
+
|
|
398
|
+
const elements = [];
|
|
399
|
+
const lines = analysisText.split('\n');
|
|
400
|
+
|
|
401
|
+
const typeKeywords = {
|
|
402
|
+
button: /\b(button|btn|submit|click)\b/i,
|
|
403
|
+
input: /\b(input|field|text.?box|textarea|type|enter)\b/i,
|
|
404
|
+
link: /\b(link|anchor|href|url|navigate)\b/i,
|
|
405
|
+
dropdown: /\b(dropdown|select|combo|menu|option)\b/i,
|
|
406
|
+
image: /\b(image|img|icon|logo|picture|photo)\b/i,
|
|
407
|
+
form: /\b(form|login|signup|register|search.?bar)\b/i,
|
|
408
|
+
nav: /\b(nav|menu|sidebar|header|footer|tab)\b/i,
|
|
409
|
+
};
|
|
410
|
+
|
|
411
|
+
const bulletPattern = /^[\s]*[-*•]\s+(.+)/;
|
|
412
|
+
|
|
413
|
+
for (const line of lines) {
|
|
414
|
+
const match = line.match(bulletPattern);
|
|
415
|
+
if (!match) continue;
|
|
416
|
+
const content = match[1].trim();
|
|
417
|
+
if (content.length < 3) continue;
|
|
418
|
+
|
|
419
|
+
let type = 'text';
|
|
420
|
+
for (const [t, re] of Object.entries(typeKeywords)) {
|
|
421
|
+
if (re.test(content)) { type = t; break; }
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const labelMatch = content.match(/["']([^"']+)["']/);
|
|
425
|
+
const label = labelMatch ? labelMatch[1] : content.slice(0, 80);
|
|
426
|
+
|
|
427
|
+
elements.push({
|
|
428
|
+
type,
|
|
429
|
+
label,
|
|
430
|
+
description: content.slice(0, 1000),
|
|
431
|
+
boundingBox: { x: 0, y: 0, width: 0, height: 0 },
|
|
432
|
+
suggestedSelector: '',
|
|
433
|
+
confidence: 0.3,
|
|
434
|
+
interactable: ['button', 'input', 'link', 'dropdown', 'form'].includes(type),
|
|
435
|
+
});
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
return elements;
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
442
|
+
// Core functions
|
|
443
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
444
|
+
|
|
445
|
+
function configureVision(siteId, { provider, model, endpoint, apiKey, maxResolution, cacheTtl } = {}) {
|
|
446
|
+
const id = uuidv4();
|
|
447
|
+
const encKey = apiKey ? encryptApiKey(apiKey) : null;
|
|
448
|
+
|
|
449
|
+
stmts.upsertConfig.run(
|
|
450
|
+
id,
|
|
451
|
+
siteId,
|
|
452
|
+
provider || 'local',
|
|
453
|
+
model || 'moondream',
|
|
454
|
+
endpoint || null,
|
|
455
|
+
encKey,
|
|
456
|
+
maxResolution || '1280x720',
|
|
457
|
+
cacheTtl != null ? cacheTtl : 300
|
|
458
|
+
);
|
|
459
|
+
|
|
460
|
+
const saved = stmts.getConfig.get(siteId);
|
|
461
|
+
return _maskConfig(saved);
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
function getVisionConfig(siteId) {
|
|
465
|
+
const row = stmts.getConfig.get(siteId);
|
|
466
|
+
if (!row) return null;
|
|
467
|
+
return _maskConfig(row);
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function _maskConfig(row) {
|
|
471
|
+
if (!row) return null;
|
|
472
|
+
const out = { ...row };
|
|
473
|
+
if (out.api_key_encrypted) {
|
|
474
|
+
const decrypted = decryptApiKey(out.api_key_encrypted);
|
|
475
|
+
out.api_key_masked = decrypted
|
|
476
|
+
? decrypted.slice(0, 4) + '****' + decrypted.slice(-4)
|
|
477
|
+
: '********';
|
|
478
|
+
} else {
|
|
479
|
+
out.api_key_masked = null;
|
|
480
|
+
}
|
|
481
|
+
delete out.api_key_encrypted;
|
|
482
|
+
return out;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
async function analyzeScreenshot(siteId, { screenshotBase64, url, prompt } = {}) {
|
|
486
|
+
if (!screenshotBase64) throw new Error('screenshotBase64 is required');
|
|
487
|
+
|
|
488
|
+
const config = stmts.getConfig.get(siteId);
|
|
489
|
+
if (!config || !config.enabled) throw new Error('Vision not configured or disabled for this site');
|
|
490
|
+
|
|
491
|
+
const screenshotHash = crypto.createHash('sha256').update(screenshotBase64).digest('hex');
|
|
492
|
+
|
|
493
|
+
const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
|
|
494
|
+
if (cached) {
|
|
495
|
+
let elements = [];
|
|
496
|
+
try { elements = JSON.parse(cached.elements_found || '[]'); } catch { /* ignore */ }
|
|
497
|
+
return {
|
|
498
|
+
analysis: cached.analysis,
|
|
499
|
+
elements,
|
|
500
|
+
cached: true,
|
|
501
|
+
latency_ms: cached.latency_ms,
|
|
502
|
+
tokens_used: cached.tokens_used,
|
|
503
|
+
cache_id: cached.id,
|
|
504
|
+
};
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
const fullPrompt = buildVisionPrompt(prompt);
|
|
508
|
+
const apiKey = config.api_key_encrypted ? decryptApiKey(config.api_key_encrypted) : null;
|
|
509
|
+
const providerName = config.provider;
|
|
510
|
+
const modelName = config.model;
|
|
511
|
+
|
|
512
|
+
const startTime = Date.now();
|
|
513
|
+
let result;
|
|
514
|
+
|
|
515
|
+
try {
|
|
516
|
+
switch (providerName) {
|
|
517
|
+
case 'openai':
|
|
518
|
+
if (!apiKey) throw new Error('OpenAI API key not configured');
|
|
519
|
+
result = await _callOpenAI(apiKey, modelName, screenshotBase64, fullPrompt);
|
|
520
|
+
break;
|
|
521
|
+
case 'anthropic':
|
|
522
|
+
if (!apiKey) throw new Error('Anthropic API key not configured');
|
|
523
|
+
result = await _callAnthropic(apiKey, modelName, screenshotBase64, fullPrompt);
|
|
524
|
+
break;
|
|
525
|
+
case 'ollama':
|
|
526
|
+
case 'local':
|
|
527
|
+
default: {
|
|
528
|
+
const ep = config.endpoint || 'http://localhost:11434';
|
|
529
|
+
result = await _callOllama(ep, modelName, screenshotBase64, fullPrompt);
|
|
530
|
+
break;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
} catch (err) {
|
|
534
|
+
if (err.name === 'AbortError') throw new Error(`Vision provider timed out after ${PROVIDER_TIMEOUT_MS}ms`);
|
|
535
|
+
throw err;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
const latencyMs = Date.now() - startTime;
|
|
539
|
+
const analysisText = result.text;
|
|
540
|
+
const tokensUsed = result.tokens || 0;
|
|
541
|
+
|
|
542
|
+
const elements = parseVisionResponse(analysisText, providerName);
|
|
543
|
+
const cacheId = uuidv4();
|
|
544
|
+
const expiresAt = new Date(Date.now() + (config.cache_ttl || 300) * 1000).toISOString();
|
|
545
|
+
|
|
546
|
+
stmts.insertCache.run(
|
|
547
|
+
cacheId, siteId, url || null, screenshotHash,
|
|
548
|
+
analysisText, JSON.stringify(elements),
|
|
549
|
+
providerName, modelName, tokensUsed, latencyMs, expiresAt
|
|
550
|
+
);
|
|
551
|
+
|
|
552
|
+
const insertElements = db.transaction((elems) => {
|
|
553
|
+
for (const el of elems) {
|
|
554
|
+
stmts.insertElement.run(
|
|
555
|
+
uuidv4(), cacheId, siteId,
|
|
556
|
+
el.type, el.label, el.description,
|
|
557
|
+
JSON.stringify(el.boundingBox),
|
|
558
|
+
el.suggestedSelector,
|
|
559
|
+
el.confidence,
|
|
560
|
+
el.interactable ? 1 : 0
|
|
561
|
+
);
|
|
562
|
+
}
|
|
563
|
+
});
|
|
564
|
+
insertElements(elements);
|
|
565
|
+
|
|
566
|
+
return {
|
|
567
|
+
analysis: analysisText,
|
|
568
|
+
elements,
|
|
569
|
+
cached: false,
|
|
570
|
+
latency_ms: latencyMs,
|
|
571
|
+
tokens_used: tokensUsed,
|
|
572
|
+
cache_id: cacheId,
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
577
|
+
// Element search
|
|
578
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
579
|
+
|
|
580
|
+
function findElement(siteId, url, { description, type, label } = {}) {
|
|
581
|
+
let candidates;
|
|
582
|
+
if (type) {
|
|
583
|
+
candidates = stmts.searchElementsByType.all(siteId, type);
|
|
584
|
+
} else {
|
|
585
|
+
candidates = stmts.searchElements.all(siteId);
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
if (url) {
|
|
589
|
+
const cacheIdsForUrl = db.prepare(
|
|
590
|
+
`SELECT id FROM vision_cache WHERE site_id = ? AND url = ?`
|
|
591
|
+
).all(siteId, url).map(r => r.id);
|
|
592
|
+
|
|
593
|
+
if (cacheIdsForUrl.length > 0) {
|
|
594
|
+
const urlSet = new Set(cacheIdsForUrl);
|
|
595
|
+
candidates = candidates.filter(el => urlSet.has(el.cache_id));
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
|
|
599
|
+
if (label) {
|
|
600
|
+
const lowerLabel = label.toLowerCase();
|
|
601
|
+
candidates = candidates.filter(el =>
|
|
602
|
+
el.label && el.label.toLowerCase().includes(lowerLabel)
|
|
603
|
+
);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
if (description) {
|
|
607
|
+
const terms = description.toLowerCase().split(/\s+/).filter(t => t.length > 1);
|
|
608
|
+
candidates = candidates.map(el => {
|
|
609
|
+
const text = `${el.label || ''} ${el.description || ''}`.toLowerCase();
|
|
610
|
+
let matchCount = 0;
|
|
611
|
+
for (const term of terms) {
|
|
612
|
+
if (text.includes(term)) matchCount++;
|
|
613
|
+
}
|
|
614
|
+
const termScore = terms.length > 0 ? matchCount / terms.length : 0;
|
|
615
|
+
const combinedScore = (el.confidence * 0.4) + (termScore * 0.6);
|
|
616
|
+
return { ...el, _score: combinedScore };
|
|
617
|
+
});
|
|
618
|
+
|
|
619
|
+
candidates.sort((a, b) => b._score - a._score);
|
|
620
|
+
candidates = candidates.filter(el => el._score > 0.1);
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
return candidates.slice(0, 20).map(el => {
|
|
624
|
+
let boundingBox;
|
|
625
|
+
try { boundingBox = JSON.parse(el.bounding_box || '{}'); } catch { boundingBox = {}; }
|
|
626
|
+
return {
|
|
627
|
+
id: el.id,
|
|
628
|
+
cache_id: el.cache_id,
|
|
629
|
+
element_type: el.element_type,
|
|
630
|
+
label: el.label,
|
|
631
|
+
description: el.description,
|
|
632
|
+
bounding_box: boundingBox,
|
|
633
|
+
suggested_selector: el.suggested_selector,
|
|
634
|
+
confidence: el.confidence,
|
|
635
|
+
interactable: !!el.interactable,
|
|
636
|
+
_score: el._score || el.confidence,
|
|
637
|
+
};
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
642
|
+
// Screenshot comparison
|
|
643
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
644
|
+
|
|
645
|
+
function compareScreenshots(siteId, url, screenshotAHash, screenshotBHash) {
|
|
646
|
+
const cacheA = stmts.getCacheBySiteAndHash.get(siteId, screenshotAHash);
|
|
647
|
+
const cacheB = stmts.getCacheBySiteAndHash.get(siteId, screenshotBHash);
|
|
648
|
+
|
|
649
|
+
if (!cacheA || !cacheB) {
|
|
650
|
+
return { error: 'One or both screenshots not found in cache', added: [], removed: [], changed: [], unchanged: [] };
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
let elementsA, elementsB;
|
|
654
|
+
try { elementsA = JSON.parse(cacheA.elements_found || '[]'); } catch { elementsA = []; }
|
|
655
|
+
try { elementsB = JSON.parse(cacheB.elements_found || '[]'); } catch { elementsB = []; }
|
|
656
|
+
|
|
657
|
+
const makeKey = (el) => `${el.type || el.element_type}::${(el.label || '').toLowerCase()}`;
|
|
658
|
+
|
|
659
|
+
const mapA = new Map();
|
|
660
|
+
for (const el of elementsA) mapA.set(makeKey(el), el);
|
|
661
|
+
|
|
662
|
+
const mapB = new Map();
|
|
663
|
+
for (const el of elementsB) mapB.set(makeKey(el), el);
|
|
664
|
+
|
|
665
|
+
const added = [];
|
|
666
|
+
const removed = [];
|
|
667
|
+
const changed = [];
|
|
668
|
+
const unchanged = [];
|
|
669
|
+
|
|
670
|
+
for (const [key, elB] of mapB) {
|
|
671
|
+
if (!mapA.has(key)) {
|
|
672
|
+
added.push(elB);
|
|
673
|
+
} else {
|
|
674
|
+
const elA = mapA.get(key);
|
|
675
|
+
const posA = elA.position || elA.boundingBox || {};
|
|
676
|
+
const posB = elB.position || elB.boundingBox || {};
|
|
677
|
+
const moved = Math.abs((posA.x || 0) - (posB.x || 0)) > 10
|
|
678
|
+
|| Math.abs((posA.y || 0) - (posB.y || 0)) > 10
|
|
679
|
+
|| Math.abs((posA.width || 0) - (posB.width || 0)) > 10
|
|
680
|
+
|| Math.abs((posA.height || 0) - (posB.height || 0)) > 10;
|
|
681
|
+
const descChanged = (elA.description || '') !== (elB.description || '');
|
|
682
|
+
|
|
683
|
+
if (moved || descChanged) {
|
|
684
|
+
changed.push({ before: elA, after: elB });
|
|
685
|
+
} else {
|
|
686
|
+
unchanged.push(elB);
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
for (const [key, elA] of mapA) {
|
|
692
|
+
if (!mapB.has(key)) {
|
|
693
|
+
removed.push(elA);
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
return {
|
|
698
|
+
added,
|
|
699
|
+
removed,
|
|
700
|
+
changed,
|
|
701
|
+
unchanged,
|
|
702
|
+
summary: {
|
|
703
|
+
added_count: added.length,
|
|
704
|
+
removed_count: removed.length,
|
|
705
|
+
changed_count: changed.length,
|
|
706
|
+
unchanged_count: unchanged.length,
|
|
707
|
+
},
|
|
708
|
+
};
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
712
|
+
// Cache management
|
|
713
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
714
|
+
|
|
715
|
+
function getCacheStats(siteId) {
|
|
716
|
+
const stats = stmts.cacheStats.get(siteId);
|
|
717
|
+
const totalCached = stats.total_cached || 0;
|
|
718
|
+
const activeCached = stats.active_cached || 0;
|
|
719
|
+
const expired = stats.expired || 0;
|
|
720
|
+
const hitRate = totalCached > 0 ? ((activeCached / totalCached) * 100).toFixed(1) : '0.0';
|
|
721
|
+
|
|
722
|
+
return {
|
|
723
|
+
total_cached: totalCached,
|
|
724
|
+
active_cached: activeCached,
|
|
725
|
+
expired,
|
|
726
|
+
hit_rate_pct: parseFloat(hitRate),
|
|
727
|
+
total_tokens_used: stats.total_tokens || 0,
|
|
728
|
+
avg_latency_ms: Math.round(stats.avg_latency || 0),
|
|
729
|
+
storage_estimate_bytes: stats.total_bytes || 0,
|
|
730
|
+
};
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
function clearCache(siteId, { olderThan } = {}) {
|
|
734
|
+
let deleted = 0;
|
|
735
|
+
|
|
736
|
+
if (olderThan) {
|
|
737
|
+
const cutoff = new Date(Date.now() - olderThan * 1000).toISOString();
|
|
738
|
+
const result = stmts.deleteOldCache.run(siteId, cutoff);
|
|
739
|
+
deleted = result.changes;
|
|
740
|
+
} else {
|
|
741
|
+
const result = stmts.deleteExpiredCache.run(siteId);
|
|
742
|
+
deleted = result.changes;
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
const orphaned = stmts.deleteOrphanedElements.run();
|
|
746
|
+
return { deleted, orphaned_elements_cleaned: orphaned.changes };
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
750
|
+
// Supported models
|
|
751
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
752
|
+
|
|
753
|
+
function getSupportedModels() {
|
|
754
|
+
return [
|
|
755
|
+
{
|
|
756
|
+
provider: 'local',
|
|
757
|
+
models: [
|
|
758
|
+
{ id: 'moondream', name: 'Moondream', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
|
|
759
|
+
{ id: 'llava', name: 'LLaVA', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
|
|
760
|
+
{ id: 'llava:13b', name: 'LLaVA 13B', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '1920x1080', cost: 'free' },
|
|
761
|
+
],
|
|
762
|
+
},
|
|
763
|
+
{
|
|
764
|
+
provider: 'ollama',
|
|
765
|
+
models: [
|
|
766
|
+
{ id: 'moondream', name: 'Moondream (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1280x720', cost: 'free' },
|
|
767
|
+
{ id: 'llava', name: 'LLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning'], max_resolution: '1920x1080', cost: 'free' },
|
|
768
|
+
{ id: 'bakllava', name: 'BakLLaVA (Ollama)', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '1920x1080', cost: 'free' },
|
|
769
|
+
],
|
|
770
|
+
},
|
|
771
|
+
{
|
|
772
|
+
provider: 'openai',
|
|
773
|
+
models: [
|
|
774
|
+
{ id: 'gpt-4o', name: 'GPT-4o', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
|
|
775
|
+
{ id: 'gpt-4o-mini', name: 'GPT-4o Mini', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
|
|
776
|
+
{ id: 'gpt-4-turbo', name: 'GPT-4 Turbo', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
|
|
777
|
+
],
|
|
778
|
+
},
|
|
779
|
+
{
|
|
780
|
+
provider: 'anthropic',
|
|
781
|
+
models: [
|
|
782
|
+
{ id: 'claude-sonnet-4-20250514', name: 'Claude Sonnet 4', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui', 'accessibility'], max_resolution: '4096x4096', cost: 'paid' },
|
|
783
|
+
{ id: 'claude-3-5-sonnet-20241022', name: 'Claude 3.5 Sonnet', capabilities: ['element_detection', 'text_recognition', 'layout_analysis', 'reasoning', 'complex_ui'], max_resolution: '4096x4096', cost: 'paid' },
|
|
784
|
+
{ id: 'claude-3-haiku-20240307', name: 'Claude 3 Haiku', capabilities: ['element_detection', 'text_recognition', 'layout_analysis'], max_resolution: '4096x4096', cost: 'paid' },
|
|
785
|
+
],
|
|
786
|
+
},
|
|
787
|
+
];
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
791
|
+
// Token estimation
|
|
792
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
793
|
+
|
|
794
|
+
function estimateTokens(imageBase64) {
|
|
795
|
+
if (!imageBase64) return 0;
|
|
796
|
+
|
|
797
|
+
const byteLength = Math.ceil(imageBase64.length * 0.75);
|
|
798
|
+
|
|
799
|
+
let width = 1280;
|
|
800
|
+
let height = 720;
|
|
801
|
+
try {
|
|
802
|
+
if (imageBase64.startsWith('/9j/')) {
|
|
803
|
+
/* JPEG — use byte size heuristic */
|
|
804
|
+
} else if (imageBase64.startsWith('iVBOR')) {
|
|
805
|
+
const buf = Buffer.from(imageBase64.slice(0, 100), 'base64');
|
|
806
|
+
if (buf.length >= 24) {
|
|
807
|
+
width = buf.readUInt32BE(16);
|
|
808
|
+
height = buf.readUInt32BE(20);
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
} catch { /* use defaults */ }
|
|
812
|
+
|
|
813
|
+
const tiles = Math.ceil(width / 512) * Math.ceil(height / 512);
|
|
814
|
+
const highDetailTokens = 85 + (tiles * 170);
|
|
815
|
+
const sizeBasedEstimate = Math.ceil(byteLength / 750);
|
|
816
|
+
|
|
817
|
+
return Math.max(highDetailTokens, sizeBasedEstimate);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
821
|
+
// History
|
|
822
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
823
|
+
|
|
824
|
+
function getVisionHistory(siteId, { limit, url } = {}) {
|
|
825
|
+
const max = limit || 50;
|
|
826
|
+
let rows;
|
|
827
|
+
if (url) {
|
|
828
|
+
rows = stmts.visionHistoryByUrl.all(siteId, url, max);
|
|
829
|
+
} else {
|
|
830
|
+
rows = stmts.visionHistory.all(siteId, max);
|
|
831
|
+
}
|
|
832
|
+
return rows.map(row => {
|
|
833
|
+
let elements = [];
|
|
834
|
+
try { elements = JSON.parse(row.elements_found || '[]'); } catch { /* ignore */ }
|
|
835
|
+
return {
|
|
836
|
+
id: row.id,
|
|
837
|
+
site_id: row.site_id,
|
|
838
|
+
url: row.url,
|
|
839
|
+
screenshot_hash: row.screenshot_hash,
|
|
840
|
+
provider: row.provider,
|
|
841
|
+
model: row.model,
|
|
842
|
+
tokens_used: row.tokens_used,
|
|
843
|
+
latency_ms: row.latency_ms,
|
|
844
|
+
elements_count: elements.length,
|
|
845
|
+
created_at: row.created_at,
|
|
846
|
+
expires_at: row.expires_at,
|
|
847
|
+
};
|
|
848
|
+
});
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
852
|
+
// Exports
|
|
853
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
854
|
+
|
|
855
|
+
module.exports = {
|
|
856
|
+
configureVision,
|
|
857
|
+
getVisionConfig,
|
|
858
|
+
analyzeScreenshot,
|
|
859
|
+
buildVisionPrompt,
|
|
860
|
+
parseVisionResponse,
|
|
861
|
+
extractElementsFromAnalysis,
|
|
862
|
+
findElement,
|
|
863
|
+
compareScreenshots,
|
|
864
|
+
getCacheStats,
|
|
865
|
+
clearCache,
|
|
866
|
+
encryptApiKey,
|
|
867
|
+
decryptApiKey,
|
|
868
|
+
getSupportedModels,
|
|
869
|
+
estimateTokens,
|
|
870
|
+
getVisionHistory,
|
|
871
|
+
};
|