web-agent-bridge 2.3.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +12 -4
- package/public/commander-dashboard.html +243 -0
- package/public/css/premium.css +317 -317
- package/public/demo.html +259 -259
- package/public/index.html +644 -644
- package/public/mesh-dashboard.html +309 -382
- package/public/premium-dashboard.html +2487 -2487
- package/public/premium.html +791 -791
- package/public/script/wab.min.js +124 -87
- package/script/ai-agent-bridge.js +154 -84
- package/sdk/agent-mesh.js +287 -171
- package/sdk/commander.js +262 -0
- package/sdk/index.js +260 -260
- package/server/index.js +8 -1
- package/server/migrations/002_premium_features.sql +418 -418
- package/server/models/db.js +24 -5
- package/server/routes/admin-premium.js +671 -671
- package/server/routes/commander.js +316 -0
- package/server/routes/mesh.js +370 -201
- package/server/routes/premium-v2.js +686 -686
- package/server/routes/premium.js +724 -724
- package/server/services/agent-learning.js +230 -77
- package/server/services/agent-memory.js +625 -625
- package/server/services/agent-mesh.js +260 -67
- package/server/services/agent-symphony.js +548 -518
- package/server/services/commander.js +738 -0
- package/server/services/edge-compute.js +440 -0
- package/server/services/local-ai.js +389 -0
- package/server/services/plugins.js +747 -747
- package/server/services/self-healing.js +843 -843
- package/server/services/swarm.js +788 -788
- package/server/services/vision.js +871 -871
- package/public/admin/dashboard.html +0 -848
- package/public/admin/login.html +0 -84
- package/public/video/tutorial.mp4 +0 -0
|
@@ -1,843 +1,843 @@
|
|
|
1
|
-
const { db } = require('../models/db');
|
|
2
|
-
const crypto = require('crypto');
|
|
3
|
-
|
|
4
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
5
|
-
// Schema
|
|
6
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
7
|
-
|
|
8
|
-
db.exec(`
|
|
9
|
-
CREATE TABLE IF NOT EXISTS selector_registry (
|
|
10
|
-
id TEXT PRIMARY KEY,
|
|
11
|
-
site_id TEXT NOT NULL,
|
|
12
|
-
action_name TEXT NOT NULL,
|
|
13
|
-
original_selector TEXT NOT NULL,
|
|
14
|
-
current_selector TEXT NOT NULL,
|
|
15
|
-
selector_type TEXT,
|
|
16
|
-
element_signature TEXT DEFAULT '{}',
|
|
17
|
-
confidence REAL DEFAULT 1.0,
|
|
18
|
-
verified INTEGER DEFAULT 1,
|
|
19
|
-
heal_count INTEGER DEFAULT 0,
|
|
20
|
-
last_verified TEXT,
|
|
21
|
-
last_healed TEXT,
|
|
22
|
-
created_at TEXT DEFAULT (datetime('now')),
|
|
23
|
-
updated_at TEXT DEFAULT (datetime('now'))
|
|
24
|
-
);
|
|
25
|
-
|
|
26
|
-
CREATE TABLE IF NOT EXISTS selector_corrections (
|
|
27
|
-
id TEXT PRIMARY KEY,
|
|
28
|
-
registry_id TEXT,
|
|
29
|
-
site_id TEXT NOT NULL,
|
|
30
|
-
old_selector TEXT NOT NULL,
|
|
31
|
-
new_selector TEXT NOT NULL,
|
|
32
|
-
corrected_by TEXT,
|
|
33
|
-
reason TEXT,
|
|
34
|
-
shared INTEGER DEFAULT 0,
|
|
35
|
-
applied_count INTEGER DEFAULT 0,
|
|
36
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
37
|
-
);
|
|
38
|
-
|
|
39
|
-
CREATE TABLE IF NOT EXISTS healing_log (
|
|
40
|
-
id TEXT PRIMARY KEY,
|
|
41
|
-
registry_id TEXT,
|
|
42
|
-
site_id TEXT NOT NULL,
|
|
43
|
-
old_selector TEXT NOT NULL,
|
|
44
|
-
new_selector TEXT,
|
|
45
|
-
strategy TEXT,
|
|
46
|
-
confidence REAL,
|
|
47
|
-
success INTEGER,
|
|
48
|
-
created_at TEXT DEFAULT (datetime('now'))
|
|
49
|
-
);
|
|
50
|
-
|
|
51
|
-
CREATE TABLE IF NOT EXISTS element_snapshots (
|
|
52
|
-
id TEXT PRIMARY KEY,
|
|
53
|
-
site_id TEXT NOT NULL,
|
|
54
|
-
url TEXT NOT NULL,
|
|
55
|
-
selector TEXT NOT NULL,
|
|
56
|
-
snapshot TEXT DEFAULT '{}',
|
|
57
|
-
captured_at TEXT DEFAULT (datetime('now'))
|
|
58
|
-
);
|
|
59
|
-
|
|
60
|
-
CREATE INDEX IF NOT EXISTS idx_selector_registry_site ON selector_registry(site_id);
|
|
61
|
-
CREATE INDEX IF NOT EXISTS idx_selector_registry_action ON selector_registry(action_name);
|
|
62
|
-
CREATE INDEX IF NOT EXISTS idx_selector_registry_site_action ON selector_registry(site_id, action_name);
|
|
63
|
-
CREATE INDEX IF NOT EXISTS idx_selector_corrections_site ON selector_corrections(site_id);
|
|
64
|
-
CREATE INDEX IF NOT EXISTS idx_healing_log_site ON healing_log(site_id);
|
|
65
|
-
CREATE INDEX IF NOT EXISTS idx_element_snapshots_site ON element_snapshots(site_id);
|
|
66
|
-
CREATE INDEX IF NOT EXISTS idx_element_snapshots_site_url ON element_snapshots(site_id, url);
|
|
67
|
-
`);
|
|
68
|
-
|
|
69
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
70
|
-
// Prepared Statements
|
|
71
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
72
|
-
|
|
73
|
-
const stmts = {
|
|
74
|
-
insertRegistry: db.prepare(`
|
|
75
|
-
INSERT INTO selector_registry (id, site_id, action_name, original_selector, current_selector, selector_type, element_signature, confidence, last_verified)
|
|
76
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, datetime('now'))
|
|
77
|
-
`),
|
|
78
|
-
findRegistry: db.prepare(`
|
|
79
|
-
SELECT * FROM selector_registry WHERE site_id = ? AND action_name = ?
|
|
80
|
-
`),
|
|
81
|
-
findRegistryById: db.prepare(`
|
|
82
|
-
SELECT * FROM selector_registry WHERE id = ?
|
|
83
|
-
`),
|
|
84
|
-
updateRegistrySelector: db.prepare(`
|
|
85
|
-
UPDATE selector_registry
|
|
86
|
-
SET current_selector = ?, confidence = ?, heal_count = heal_count + 1,
|
|
87
|
-
last_healed = datetime('now'), updated_at = datetime('now')
|
|
88
|
-
WHERE id = ?
|
|
89
|
-
`),
|
|
90
|
-
updateRegistryVerified: db.prepare(`
|
|
91
|
-
UPDATE selector_registry
|
|
92
|
-
SET verified = ?, confidence = ?, last_verified = datetime('now'), updated_at = datetime('now')
|
|
93
|
-
WHERE site_id = ? AND action_name = ?
|
|
94
|
-
`),
|
|
95
|
-
insertCorrection: db.prepare(`
|
|
96
|
-
INSERT INTO selector_corrections (id, registry_id, site_id, old_selector, new_selector, corrected_by, reason, shared)
|
|
97
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
98
|
-
`),
|
|
99
|
-
updateRegistryFromCorrection: db.prepare(`
|
|
100
|
-
UPDATE selector_registry
|
|
101
|
-
SET current_selector = ?, confidence = 1.0, heal_count = heal_count + 1,
|
|
102
|
-
last_healed = datetime('now'), updated_at = datetime('now')
|
|
103
|
-
WHERE id = ?
|
|
104
|
-
`),
|
|
105
|
-
findSharedCorrections: db.prepare(`
|
|
106
|
-
SELECT * FROM selector_corrections
|
|
107
|
-
WHERE site_id = ? AND old_selector = ? AND shared = 1
|
|
108
|
-
ORDER BY applied_count DESC
|
|
109
|
-
`),
|
|
110
|
-
incrementCorrectionApplied: db.prepare(`
|
|
111
|
-
UPDATE selector_corrections SET applied_count = applied_count + 1 WHERE id = ?
|
|
112
|
-
`),
|
|
113
|
-
insertHealingLog: db.prepare(`
|
|
114
|
-
INSERT INTO healing_log (id, registry_id, site_id, old_selector, new_selector, strategy, confidence, success)
|
|
115
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
116
|
-
`),
|
|
117
|
-
getHealingLogBySite: db.prepare(`
|
|
118
|
-
SELECT * FROM healing_log WHERE site_id = ? ORDER BY created_at DESC LIMIT ?
|
|
119
|
-
`),
|
|
120
|
-
getHealingLogBySiteAction: db.prepare(`
|
|
121
|
-
SELECT hl.* FROM healing_log hl
|
|
122
|
-
JOIN selector_registry sr ON hl.registry_id = sr.id
|
|
123
|
-
WHERE hl.site_id = ? AND sr.action_name = ?
|
|
124
|
-
ORDER BY hl.created_at DESC LIMIT ?
|
|
125
|
-
`),
|
|
126
|
-
getSelectorsBySite: db.prepare(`
|
|
127
|
-
SELECT * FROM selector_registry WHERE site_id = ?
|
|
128
|
-
`),
|
|
129
|
-
insertSnapshot: db.prepare(`
|
|
130
|
-
INSERT INTO element_snapshots (id, site_id, url, selector, snapshot, captured_at)
|
|
131
|
-
VALUES (?, ?, ?, ?, ?, datetime('now'))
|
|
132
|
-
`),
|
|
133
|
-
getLatestSnapshot: db.prepare(`
|
|
134
|
-
SELECT * FROM element_snapshots WHERE site_id = ? AND url = ? ORDER BY captured_at DESC LIMIT 1
|
|
135
|
-
`),
|
|
136
|
-
};
|
|
137
|
-
|
|
138
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
139
|
-
// 1. Register Selector
|
|
140
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
141
|
-
|
|
142
|
-
function registerSelector(siteId, { actionName, selector, selectorType, elementSignature }) {
|
|
143
|
-
const id = crypto.randomUUID();
|
|
144
|
-
const sigJson = typeof elementSignature === 'string'
|
|
145
|
-
? elementSignature
|
|
146
|
-
: JSON.stringify(elementSignature || {});
|
|
147
|
-
|
|
148
|
-
stmts.insertRegistry.run(
|
|
149
|
-
id, siteId, actionName, selector, selector,
|
|
150
|
-
selectorType || 'css', sigJson
|
|
151
|
-
);
|
|
152
|
-
|
|
153
|
-
return { id, siteId, actionName, selector, selectorType: selectorType || 'css' };
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
157
|
-
// 2. Capture Element Signature
|
|
158
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
159
|
-
|
|
160
|
-
function captureElementSignature(elementData) {
|
|
161
|
-
if (!elementData) return {};
|
|
162
|
-
|
|
163
|
-
const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
|
|
164
|
-
const id = elementData.id || null;
|
|
165
|
-
|
|
166
|
-
let classes = [];
|
|
167
|
-
if (Array.isArray(elementData.classes)) {
|
|
168
|
-
classes = elementData.classes.filter(Boolean).sort();
|
|
169
|
-
} else if (typeof elementData.className === 'string') {
|
|
170
|
-
classes = elementData.className.split(/\s+/).filter(Boolean).sort();
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
const attrs = {};
|
|
174
|
-
const rawAttrs = elementData.attributes || elementData.attrs || {};
|
|
175
|
-
const attrKeys = Object.keys(rawAttrs).sort();
|
|
176
|
-
for (const key of attrKeys) {
|
|
177
|
-
const lower = key.toLowerCase();
|
|
178
|
-
if (lower === 'class' || lower === 'id' || lower === 'style') continue;
|
|
179
|
-
attrs[lower] = rawAttrs[key];
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
const text = (elementData.text || elementData.textContent || elementData.innerText || '')
|
|
183
|
-
.trim()
|
|
184
|
-
.substring(0, 200);
|
|
185
|
-
|
|
186
|
-
let parent = null;
|
|
187
|
-
if (elementData.parent) {
|
|
188
|
-
parent = {
|
|
189
|
-
tag: (elementData.parent.tag || elementData.parent.tagName || '').toLowerCase(),
|
|
190
|
-
id: elementData.parent.id || null,
|
|
191
|
-
classes: Array.isArray(elementData.parent.classes)
|
|
192
|
-
? elementData.parent.classes.filter(Boolean).sort()
|
|
193
|
-
: (elementData.parent.className || '').split(/\s+/).filter(Boolean).sort(),
|
|
194
|
-
};
|
|
195
|
-
}
|
|
196
|
-
|
|
197
|
-
let siblings = [];
|
|
198
|
-
if (Array.isArray(elementData.siblings)) {
|
|
199
|
-
siblings = elementData.siblings.map(sib => ({
|
|
200
|
-
tag: (sib.tag || sib.tagName || '').toLowerCase(),
|
|
201
|
-
id: sib.id || null,
|
|
202
|
-
classes: Array.isArray(sib.classes)
|
|
203
|
-
? sib.classes.filter(Boolean).sort()
|
|
204
|
-
: (sib.className || '').split(/\s+/).filter(Boolean).sort(),
|
|
205
|
-
}));
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
return {
|
|
209
|
-
tag,
|
|
210
|
-
id,
|
|
211
|
-
classes,
|
|
212
|
-
attributes: attrs,
|
|
213
|
-
text,
|
|
214
|
-
parent,
|
|
215
|
-
siblings,
|
|
216
|
-
capturedAt: new Date().toISOString(),
|
|
217
|
-
};
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
221
|
-
// 3. Levenshtein Distance & Text Similarity
|
|
222
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
223
|
-
|
|
224
|
-
function levenshteinDistance(a, b) {
|
|
225
|
-
if (a === b) return 0;
|
|
226
|
-
if (!a || !a.length) return b ? b.length : 0;
|
|
227
|
-
if (!b || !b.length) return a.length;
|
|
228
|
-
|
|
229
|
-
const m = a.length;
|
|
230
|
-
const n = b.length;
|
|
231
|
-
const dp = new Array(m + 1);
|
|
232
|
-
|
|
233
|
-
for (let i = 0; i <= m; i++) {
|
|
234
|
-
dp[i] = new Array(n + 1);
|
|
235
|
-
dp[i][0] = i;
|
|
236
|
-
}
|
|
237
|
-
for (let j = 0; j <= n; j++) {
|
|
238
|
-
dp[0][j] = j;
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
for (let i = 1; i <= m; i++) {
|
|
242
|
-
for (let j = 1; j <= n; j++) {
|
|
243
|
-
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
244
|
-
dp[i][j] = Math.min(
|
|
245
|
-
dp[i - 1][j] + 1,
|
|
246
|
-
dp[i][j - 1] + 1,
|
|
247
|
-
dp[i - 1][j - 1] + cost
|
|
248
|
-
);
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
return dp[m][n];
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
function textSimilarity(a, b) {
|
|
256
|
-
if (!a && !b) return 1;
|
|
257
|
-
if (!a || !b) return 0;
|
|
258
|
-
const maxLen = Math.max(a.length, b.length);
|
|
259
|
-
if (maxLen === 0) return 1;
|
|
260
|
-
return 1 - (levenshteinDistance(a, b) / maxLen);
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
264
|
-
// 4. Heal Selector (Core Algorithm)
|
|
265
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
266
|
-
|
|
267
|
-
function healSelector(siteId, actionName, failedSelector, pageElements) {
|
|
268
|
-
const registry = stmts.findRegistry.get(siteId, actionName);
|
|
269
|
-
const registryId = registry ? registry.id : null;
|
|
270
|
-
|
|
271
|
-
let storedSignature = {};
|
|
272
|
-
if (registry) {
|
|
273
|
-
try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { /* empty */ }
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
if (!Array.isArray(pageElements) || pageElements.length === 0) {
|
|
277
|
-
const logId = crypto.randomUUID();
|
|
278
|
-
stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
|
|
279
|
-
return { healed: false, newSelector: null, strategy: null, confidence: 0 };
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
const candidates = [];
|
|
283
|
-
|
|
284
|
-
for (const el of pageElements) {
|
|
285
|
-
const sig = captureElementSignature(el);
|
|
286
|
-
|
|
287
|
-
// Strategy 1: attribute_match — data-* attributes and aria-label
|
|
288
|
-
const storedAttrs = storedSignature.attributes || {};
|
|
289
|
-
const elAttrs = sig.attributes || {};
|
|
290
|
-
let attrMatches = 0;
|
|
291
|
-
let attrTotal = 0;
|
|
292
|
-
for (const key of Object.keys(storedAttrs)) {
|
|
293
|
-
if (key.startsWith('data-') || key === 'aria-label' || key.startsWith('aria-')) {
|
|
294
|
-
attrTotal++;
|
|
295
|
-
if (elAttrs[key] === storedAttrs[key]) attrMatches++;
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
if (attrTotal > 0 && attrMatches > 0) {
|
|
299
|
-
const conf = attrMatches / attrTotal;
|
|
300
|
-
candidates.push({
|
|
301
|
-
element: el,
|
|
302
|
-
signature: sig,
|
|
303
|
-
strategy: 'attribute_match',
|
|
304
|
-
confidence: conf * 0.95,
|
|
305
|
-
selector: buildCSSPath(el),
|
|
306
|
-
});
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// Strategy 2: id_match — partial ID matching
|
|
310
|
-
if (storedSignature.id && sig.id) {
|
|
311
|
-
const sim = textSimilarity(storedSignature.id, sig.id);
|
|
312
|
-
if (sim > 0.5) {
|
|
313
|
-
candidates.push({
|
|
314
|
-
element: el,
|
|
315
|
-
signature: sig,
|
|
316
|
-
strategy: 'id_match',
|
|
317
|
-
confidence: sim * 0.98,
|
|
318
|
-
selector: `#${sig.id}`,
|
|
319
|
-
});
|
|
320
|
-
}
|
|
321
|
-
}
|
|
322
|
-
|
|
323
|
-
// Strategy 3: text_match — Levenshtein-based text similarity
|
|
324
|
-
if (storedSignature.text && sig.text) {
|
|
325
|
-
const sim = textSimilarity(
|
|
326
|
-
storedSignature.text.toLowerCase(),
|
|
327
|
-
sig.text.toLowerCase()
|
|
328
|
-
);
|
|
329
|
-
if (sim > 0.5) {
|
|
330
|
-
candidates.push({
|
|
331
|
-
element: el,
|
|
332
|
-
signature: sig,
|
|
333
|
-
strategy: 'text_match',
|
|
334
|
-
confidence: sim * 0.85,
|
|
335
|
-
selector: buildCSSPath(el),
|
|
336
|
-
});
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
// Strategy 4: structural_match — parent>nth-child path matching
|
|
341
|
-
if (storedSignature.parent && sig.parent) {
|
|
342
|
-
let structScore = 0;
|
|
343
|
-
let structChecks = 0;
|
|
344
|
-
|
|
345
|
-
if (storedSignature.tag && sig.tag) {
|
|
346
|
-
structChecks++;
|
|
347
|
-
if (storedSignature.tag === sig.tag) structScore++;
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
structChecks++;
|
|
351
|
-
if (storedSignature.parent.tag === sig.parent.tag) structScore++;
|
|
352
|
-
|
|
353
|
-
if (storedSignature.parent.id && sig.parent.id) {
|
|
354
|
-
structChecks++;
|
|
355
|
-
if (storedSignature.parent.id === sig.parent.id) structScore++;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
const storedParentClasses = storedSignature.parent.classes || [];
|
|
359
|
-
const elParentClasses = sig.parent.classes || [];
|
|
360
|
-
if (storedParentClasses.length > 0) {
|
|
361
|
-
structChecks++;
|
|
362
|
-
const overlap = storedParentClasses.filter(c => elParentClasses.includes(c));
|
|
363
|
-
structScore += overlap.length / storedParentClasses.length;
|
|
364
|
-
}
|
|
365
|
-
|
|
366
|
-
if (storedSignature.siblings && sig.siblings) {
|
|
367
|
-
structChecks++;
|
|
368
|
-
const storedSibTags = storedSignature.siblings.map(s => s.tag).sort();
|
|
369
|
-
const elSibTags = sig.siblings.map(s => s.tag).sort();
|
|
370
|
-
const sibSim = textSimilarity(storedSibTags.join(','), elSibTags.join(','));
|
|
371
|
-
structScore += sibSim;
|
|
372
|
-
}
|
|
373
|
-
|
|
374
|
-
if (structChecks > 0) {
|
|
375
|
-
const conf = structScore / structChecks;
|
|
376
|
-
if (conf > 0.4) {
|
|
377
|
-
const nthChild = el.index != null ? el.index + 1 : 1;
|
|
378
|
-
const parentSel = sig.parent.id
|
|
379
|
-
? `#${sig.parent.id}`
|
|
380
|
-
: sig.parent.tag;
|
|
381
|
-
candidates.push({
|
|
382
|
-
element: el,
|
|
383
|
-
signature: sig,
|
|
384
|
-
strategy: 'structural_match',
|
|
385
|
-
confidence: conf * 0.80,
|
|
386
|
-
selector: `${parentSel} > ${sig.tag}:nth-child(${nthChild})`,
|
|
387
|
-
});
|
|
388
|
-
}
|
|
389
|
-
}
|
|
390
|
-
}
|
|
391
|
-
|
|
392
|
-
// Strategy 5: class_match — overlapping CSS classes
|
|
393
|
-
const storedClasses = storedSignature.classes || [];
|
|
394
|
-
const elClasses = sig.classes || [];
|
|
395
|
-
if (storedClasses.length > 0 && elClasses.length > 0) {
|
|
396
|
-
const overlap = storedClasses.filter(c => elClasses.includes(c));
|
|
397
|
-
if (overlap.length > 0) {
|
|
398
|
-
const conf = overlap.length / Math.max(storedClasses.length, elClasses.length);
|
|
399
|
-
candidates.push({
|
|
400
|
-
element: el,
|
|
401
|
-
signature: sig,
|
|
402
|
-
strategy: 'class_match',
|
|
403
|
-
confidence: conf * 0.75,
|
|
404
|
-
selector: `${sig.tag || '*'}.${overlap.join('.')}`,
|
|
405
|
-
});
|
|
406
|
-
}
|
|
407
|
-
}
|
|
408
|
-
}
|
|
409
|
-
|
|
410
|
-
// Strategy 6: community_match — known corrections
|
|
411
|
-
const communityCorrections = stmts.findSharedCorrections.all(siteId, failedSelector);
|
|
412
|
-
for (const corr of communityCorrections) {
|
|
413
|
-
const appliedBoost = Math.min(corr.applied_count * 0.02, 0.15);
|
|
414
|
-
candidates.push({
|
|
415
|
-
element: null,
|
|
416
|
-
signature: null,
|
|
417
|
-
strategy: 'community_match',
|
|
418
|
-
confidence: 0.70 + appliedBoost,
|
|
419
|
-
selector: corr.new_selector,
|
|
420
|
-
correctionId: corr.id,
|
|
421
|
-
});
|
|
422
|
-
}
|
|
423
|
-
|
|
424
|
-
// Pick highest confidence above threshold
|
|
425
|
-
candidates.sort((a, b) => b.confidence - a.confidence);
|
|
426
|
-
const best = candidates.find(c => c.confidence >= 0.6);
|
|
427
|
-
|
|
428
|
-
if (!best) {
|
|
429
|
-
const logId = crypto.randomUUID();
|
|
430
|
-
stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
|
|
431
|
-
return { healed: false, newSelector: null, strategy: null, confidence: 0 };
|
|
432
|
-
}
|
|
433
|
-
|
|
434
|
-
const logId = crypto.randomUUID();
|
|
435
|
-
stmts.insertHealingLog.run(
|
|
436
|
-
logId, registryId, siteId, failedSelector,
|
|
437
|
-
best.selector, best.strategy, best.confidence, 1
|
|
438
|
-
);
|
|
439
|
-
|
|
440
|
-
if (registryId) {
|
|
441
|
-
stmts.updateRegistrySelector.run(best.selector, best.confidence, registryId);
|
|
442
|
-
}
|
|
443
|
-
|
|
444
|
-
if (best.correctionId) {
|
|
445
|
-
stmts.incrementCorrectionApplied.run(best.correctionId);
|
|
446
|
-
}
|
|
447
|
-
|
|
448
|
-
return {
|
|
449
|
-
healed: true,
|
|
450
|
-
newSelector: best.selector,
|
|
451
|
-
strategy: best.strategy,
|
|
452
|
-
confidence: Math.round(best.confidence * 1000) / 1000,
|
|
453
|
-
};
|
|
454
|
-
}
|
|
455
|
-
|
|
456
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
457
|
-
// 5. Submit Correction
|
|
458
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
459
|
-
|
|
460
|
-
function submitCorrection(siteId, registryId, { oldSelector, newSelector, correctedBy, reason, shared }) {
|
|
461
|
-
const id = crypto.randomUUID();
|
|
462
|
-
stmts.insertCorrection.run(
|
|
463
|
-
id, registryId || null, siteId,
|
|
464
|
-
oldSelector, newSelector,
|
|
465
|
-
correctedBy || 'user',
|
|
466
|
-
reason || null,
|
|
467
|
-
shared ? 1 : 0
|
|
468
|
-
);
|
|
469
|
-
|
|
470
|
-
if (registryId) {
|
|
471
|
-
const reg = stmts.findRegistryById.get(registryId);
|
|
472
|
-
if (reg) {
|
|
473
|
-
stmts.updateRegistryFromCorrection.run(newSelector, registryId);
|
|
474
|
-
}
|
|
475
|
-
}
|
|
476
|
-
|
|
477
|
-
return { id, siteId, registryId, oldSelector, newSelector };
|
|
478
|
-
}
|
|
479
|
-
|
|
480
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
481
|
-
// 6. Community Suggestions
|
|
482
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
483
|
-
|
|
484
|
-
function getCommunitySuggestions(siteId, failedSelector) {
|
|
485
|
-
return stmts.findSharedCorrections.all(siteId, failedSelector);
|
|
486
|
-
}
|
|
487
|
-
|
|
488
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
489
|
-
// 7. Verify Selector
|
|
490
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
491
|
-
|
|
492
|
-
function verifySelector(siteId, actionName, elementData) {
|
|
493
|
-
const registry = stmts.findRegistry.get(siteId, actionName);
|
|
494
|
-
if (!registry) {
|
|
495
|
-
return { valid: false, confidence: 0, drift: null, error: 'Selector not registered' };
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
let storedSignature;
|
|
499
|
-
try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { storedSignature = {}; }
|
|
500
|
-
|
|
501
|
-
const currentSignature = captureElementSignature(elementData);
|
|
502
|
-
|
|
503
|
-
let totalChecks = 0;
|
|
504
|
-
let matchScore = 0;
|
|
505
|
-
const driftDetails = {};
|
|
506
|
-
|
|
507
|
-
if (storedSignature.tag) {
|
|
508
|
-
totalChecks++;
|
|
509
|
-
if (storedSignature.tag === currentSignature.tag) {
|
|
510
|
-
matchScore++;
|
|
511
|
-
} else {
|
|
512
|
-
driftDetails.tag = { expected: storedSignature.tag, actual: currentSignature.tag };
|
|
513
|
-
}
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
if (storedSignature.id) {
|
|
517
|
-
totalChecks++;
|
|
518
|
-
if (storedSignature.id === currentSignature.id) {
|
|
519
|
-
matchScore++;
|
|
520
|
-
} else {
|
|
521
|
-
driftDetails.id = { expected: storedSignature.id, actual: currentSignature.id };
|
|
522
|
-
}
|
|
523
|
-
}
|
|
524
|
-
|
|
525
|
-
const storedClasses = storedSignature.classes || [];
|
|
526
|
-
const currentClasses = currentSignature.classes || [];
|
|
527
|
-
if (storedClasses.length > 0) {
|
|
528
|
-
totalChecks++;
|
|
529
|
-
const overlap = storedClasses.filter(c => currentClasses.includes(c));
|
|
530
|
-
const classRatio = overlap.length / storedClasses.length;
|
|
531
|
-
matchScore += classRatio;
|
|
532
|
-
if (classRatio < 1) {
|
|
533
|
-
const removed = storedClasses.filter(c => !currentClasses.includes(c));
|
|
534
|
-
const added = currentClasses.filter(c => !storedClasses.includes(c));
|
|
535
|
-
driftDetails.classes = { removed, added, overlapRatio: classRatio };
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
|
|
539
|
-
const storedAttrs = storedSignature.attributes || {};
|
|
540
|
-
const currentAttrs = currentSignature.attributes || {};
|
|
541
|
-
const allAttrKeys = [...new Set([...Object.keys(storedAttrs), ...Object.keys(currentAttrs)])];
|
|
542
|
-
if (allAttrKeys.length > 0) {
|
|
543
|
-
totalChecks++;
|
|
544
|
-
let attrMatch = 0;
|
|
545
|
-
const changedAttrs = {};
|
|
546
|
-
for (const key of allAttrKeys) {
|
|
547
|
-
if (storedAttrs[key] === currentAttrs[key]) {
|
|
548
|
-
attrMatch++;
|
|
549
|
-
} else {
|
|
550
|
-
changedAttrs[key] = { expected: storedAttrs[key] || null, actual: currentAttrs[key] || null };
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
matchScore += attrMatch / allAttrKeys.length;
|
|
554
|
-
if (Object.keys(changedAttrs).length > 0) {
|
|
555
|
-
driftDetails.attributes = changedAttrs;
|
|
556
|
-
}
|
|
557
|
-
}
|
|
558
|
-
|
|
559
|
-
if (storedSignature.text) {
|
|
560
|
-
totalChecks++;
|
|
561
|
-
const sim = textSimilarity(
|
|
562
|
-
storedSignature.text.toLowerCase(),
|
|
563
|
-
(currentSignature.text || '').toLowerCase()
|
|
564
|
-
);
|
|
565
|
-
matchScore += sim;
|
|
566
|
-
if (sim < 0.95) {
|
|
567
|
-
driftDetails.text = {
|
|
568
|
-
expected: storedSignature.text.substring(0, 50),
|
|
569
|
-
actual: (currentSignature.text || '').substring(0, 50),
|
|
570
|
-
similarity: Math.round(sim * 1000) / 1000,
|
|
571
|
-
};
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
|
-
|
|
575
|
-
const confidence = totalChecks > 0 ? matchScore / totalChecks : 0;
|
|
576
|
-
const valid = confidence >= 0.7;
|
|
577
|
-
const hasDrift = Object.keys(driftDetails).length > 0;
|
|
578
|
-
|
|
579
|
-
stmts.updateRegistryVerified.run(valid ? 1 : 0, confidence, siteId, actionName);
|
|
580
|
-
|
|
581
|
-
return {
|
|
582
|
-
valid,
|
|
583
|
-
confidence: Math.round(confidence * 1000) / 1000,
|
|
584
|
-
drift: hasDrift ? driftDetails : null,
|
|
585
|
-
};
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
589
|
-
// 8. Selector Health
|
|
590
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
591
|
-
|
|
592
|
-
function getSelectorHealth(siteId) {
|
|
593
|
-
const all = stmts.getSelectorsBySite.all(siteId);
|
|
594
|
-
const total = all.length;
|
|
595
|
-
|
|
596
|
-
if (total === 0) {
|
|
597
|
-
return { total: 0, verified: 0, healed: 0, avgConfidence: 0, broken: [] };
|
|
598
|
-
}
|
|
599
|
-
|
|
600
|
-
let verifiedCount = 0;
|
|
601
|
-
let healedCount = 0;
|
|
602
|
-
let confidenceSum = 0;
|
|
603
|
-
const broken = [];
|
|
604
|
-
|
|
605
|
-
for (const row of all) {
|
|
606
|
-
confidenceSum += row.confidence;
|
|
607
|
-
if (row.verified) verifiedCount++;
|
|
608
|
-
if (row.heal_count > 0) healedCount++;
|
|
609
|
-
if (!row.verified || row.confidence < 0.6) {
|
|
610
|
-
broken.push({
|
|
611
|
-
id: row.id,
|
|
612
|
-
actionName: row.action_name,
|
|
613
|
-
currentSelector: row.current_selector,
|
|
614
|
-
confidence: row.confidence,
|
|
615
|
-
healCount: row.heal_count,
|
|
616
|
-
lastHealed: row.last_healed,
|
|
617
|
-
});
|
|
618
|
-
}
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
return {
|
|
622
|
-
total,
|
|
623
|
-
verified: verifiedCount,
|
|
624
|
-
healed: healedCount,
|
|
625
|
-
avgConfidence: Math.round((confidenceSum / total) * 1000) / 1000,
|
|
626
|
-
broken,
|
|
627
|
-
};
|
|
628
|
-
}
|
|
629
|
-
|
|
630
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
631
|
-
// 9. Healing History
|
|
632
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
633
|
-
|
|
634
|
-
function getHealingHistory(siteId, { limit, actionName } = {}) {
|
|
635
|
-
const lim = limit || 50;
|
|
636
|
-
if (actionName) {
|
|
637
|
-
return stmts.getHealingLogBySiteAction.all(siteId, actionName, lim);
|
|
638
|
-
}
|
|
639
|
-
return stmts.getHealingLogBySite.all(siteId, lim);
|
|
640
|
-
}
|
|
641
|
-
|
|
642
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
643
|
-
// 10. Snapshot Elements
|
|
644
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
645
|
-
|
|
646
|
-
function snapshotElements(siteId, url, elements) {
|
|
647
|
-
const id = crypto.randomUUID();
|
|
648
|
-
const snapshotData = Array.isArray(elements)
|
|
649
|
-
? elements.map(el => captureElementSignature(el))
|
|
650
|
-
: [];
|
|
651
|
-
|
|
652
|
-
stmts.insertSnapshot.run(
|
|
653
|
-
id, siteId, url, '*',
|
|
654
|
-
JSON.stringify(snapshotData)
|
|
655
|
-
);
|
|
656
|
-
|
|
657
|
-
return { id, siteId, url, elementCount: snapshotData.length };
|
|
658
|
-
}
|
|
659
|
-
|
|
660
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
661
|
-
// 11. Detect Drift
|
|
662
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
663
|
-
|
|
664
|
-
function detectDrift(siteId, url, currentElements) {
|
|
665
|
-
const lastSnapshot = stmts.getLatestSnapshot.get(siteId, url);
|
|
666
|
-
if (!lastSnapshot) {
|
|
667
|
-
return { hasDrift: false, message: 'No previous snapshot found', changed: [], added: [], removed: [] };
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
let previousElements;
|
|
671
|
-
try { previousElements = JSON.parse(lastSnapshot.snapshot || '[]'); } catch { previousElements = []; }
|
|
672
|
-
|
|
673
|
-
const currentSigs = (Array.isArray(currentElements) ? currentElements : [])
|
|
674
|
-
.map(el => captureElementSignature(el));
|
|
675
|
-
|
|
676
|
-
function fingerprint(sig) {
|
|
677
|
-
return `${sig.tag || ''}|${sig.id || ''}|${(sig.classes || []).join(',')}|${sig.text || ''}`;
|
|
678
|
-
}
|
|
679
|
-
|
|
680
|
-
const prevFingerprints = new Map();
|
|
681
|
-
for (let i = 0; i < previousElements.length; i++) {
|
|
682
|
-
prevFingerprints.set(fingerprint(previousElements[i]), i);
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
const currFingerprints = new Map();
|
|
686
|
-
for (let i = 0; i < currentSigs.length; i++) {
|
|
687
|
-
currFingerprints.set(fingerprint(currentSigs[i]), i);
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
const changed = [];
|
|
691
|
-
const added = [];
|
|
692
|
-
const removed = [];
|
|
693
|
-
|
|
694
|
-
for (const [fp, idx] of currFingerprints) {
|
|
695
|
-
if (!prevFingerprints.has(fp)) {
|
|
696
|
-
const prevBySamePosTag = previousElements[idx];
|
|
697
|
-
if (prevBySamePosTag && prevBySamePosTag.tag === currentSigs[idx].tag) {
|
|
698
|
-
const sim = textSimilarity(fingerprint(prevBySamePosTag), fp);
|
|
699
|
-
if (sim > 0.3 && sim < 1.0) {
|
|
700
|
-
changed.push({
|
|
701
|
-
index: idx,
|
|
702
|
-
previous: previousElements[idx],
|
|
703
|
-
current: currentSigs[idx],
|
|
704
|
-
similarity: Math.round(sim * 1000) / 1000,
|
|
705
|
-
});
|
|
706
|
-
continue;
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
added.push({ index: idx, element: currentSigs[idx] });
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
for (const [fp, idx] of prevFingerprints) {
|
|
714
|
-
if (!currFingerprints.has(fp)) {
|
|
715
|
-
const alreadyChanged = changed.some(c => c.index === idx);
|
|
716
|
-
if (!alreadyChanged) {
|
|
717
|
-
removed.push({ index: idx, element: previousElements[idx] });
|
|
718
|
-
}
|
|
719
|
-
}
|
|
720
|
-
}
|
|
721
|
-
|
|
722
|
-
return {
|
|
723
|
-
hasDrift: changed.length > 0 || added.length > 0 || removed.length > 0,
|
|
724
|
-
snapshotDate: lastSnapshot.captured_at,
|
|
725
|
-
changed,
|
|
726
|
-
added,
|
|
727
|
-
removed,
|
|
728
|
-
summary: {
|
|
729
|
-
changedCount: changed.length,
|
|
730
|
-
addedCount: added.length,
|
|
731
|
-
removedCount: removed.length,
|
|
732
|
-
},
|
|
733
|
-
};
|
|
734
|
-
}
|
|
735
|
-
|
|
736
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
737
|
-
// 12. Build CSS Path
|
|
738
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
739
|
-
|
|
740
|
-
function buildCSSPath(elementData) {
|
|
741
|
-
if (!elementData) return '*';
|
|
742
|
-
|
|
743
|
-
const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
|
|
744
|
-
const id = elementData.id;
|
|
745
|
-
|
|
746
|
-
if (id) return `#${id}`;
|
|
747
|
-
|
|
748
|
-
let selector = tag || '*';
|
|
749
|
-
|
|
750
|
-
let classes = [];
|
|
751
|
-
if (Array.isArray(elementData.classes)) {
|
|
752
|
-
classes = elementData.classes.filter(Boolean);
|
|
753
|
-
} else if (typeof elementData.className === 'string') {
|
|
754
|
-
classes = elementData.className.split(/\s+/).filter(Boolean);
|
|
755
|
-
}
|
|
756
|
-
if (classes.length > 0) {
|
|
757
|
-
selector += '.' + classes.join('.');
|
|
758
|
-
}
|
|
759
|
-
|
|
760
|
-
const attrs = elementData.attributes || elementData.attrs || {};
|
|
761
|
-
for (const key of Object.keys(attrs)) {
|
|
762
|
-
const lower = key.toLowerCase();
|
|
763
|
-
if (lower === 'class' || lower === 'id' || lower === 'style') continue;
|
|
764
|
-
const val = attrs[key];
|
|
765
|
-
if (val != null && val !== '') {
|
|
766
|
-
selector += `[${lower}="${val.replace(/"/g, '\\"')}"]`;
|
|
767
|
-
} else if (val === '' || val == null) {
|
|
768
|
-
selector += `[${lower}]`;
|
|
769
|
-
}
|
|
770
|
-
}
|
|
771
|
-
|
|
772
|
-
return selector;
|
|
773
|
-
}
|
|
774
|
-
|
|
775
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
776
|
-
// 13. Build XPath
|
|
777
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
778
|
-
|
|
779
|
-
function buildXPath(elementData) {
|
|
780
|
-
if (!elementData) return '//*';
|
|
781
|
-
|
|
782
|
-
const tag = (elementData.tag || elementData.tagName || '').toLowerCase() || '*';
|
|
783
|
-
const id = elementData.id;
|
|
784
|
-
|
|
785
|
-
if (id) return `//${tag}[@id="${id}"]`;
|
|
786
|
-
|
|
787
|
-
const predicates = [];
|
|
788
|
-
|
|
789
|
-
let classes = [];
|
|
790
|
-
if (Array.isArray(elementData.classes)) {
|
|
791
|
-
classes = elementData.classes.filter(Boolean);
|
|
792
|
-
} else if (typeof elementData.className === 'string') {
|
|
793
|
-
classes = elementData.className.split(/\s+/).filter(Boolean);
|
|
794
|
-
}
|
|
795
|
-
for (const cls of classes) {
|
|
796
|
-
predicates.push(`contains(@class, "${cls}")`);
|
|
797
|
-
}
|
|
798
|
-
|
|
799
|
-
const attrs = elementData.attributes || elementData.attrs || {};
|
|
800
|
-
for (const key of Object.keys(attrs)) {
|
|
801
|
-
const lower = key.toLowerCase();
|
|
802
|
-
if (lower === 'class' || lower === 'id' || lower === 'style') continue;
|
|
803
|
-
const val = attrs[key];
|
|
804
|
-
if (val != null && val !== '') {
|
|
805
|
-
predicates.push(`@${lower}="${val}"`);
|
|
806
|
-
} else {
|
|
807
|
-
predicates.push(`@${lower}`);
|
|
808
|
-
}
|
|
809
|
-
}
|
|
810
|
-
|
|
811
|
-
const text = (elementData.text || elementData.textContent || elementData.innerText || '').trim();
|
|
812
|
-
if (text && text.length <= 80) {
|
|
813
|
-
if (!text.includes('"')) {
|
|
814
|
-
predicates.push(`normalize-space(text())="${text}"`);
|
|
815
|
-
} else if (!text.includes("'")) {
|
|
816
|
-
predicates.push(`normalize-space(text())='${text}'`);
|
|
817
|
-
}
|
|
818
|
-
}
|
|
819
|
-
|
|
820
|
-
if (predicates.length === 0) return `//${tag}`;
|
|
821
|
-
return `//${tag}[${predicates.join(' and ')}]`;
|
|
822
|
-
}
|
|
823
|
-
|
|
824
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
825
|
-
// Exports
|
|
826
|
-
// ═══════════════════════════════════════════════════════════════════════
|
|
827
|
-
|
|
828
|
-
module.exports = {
|
|
829
|
-
registerSelector,
|
|
830
|
-
captureElementSignature,
|
|
831
|
-
healSelector,
|
|
832
|
-
levenshteinDistance,
|
|
833
|
-
textSimilarity,
|
|
834
|
-
submitCorrection,
|
|
835
|
-
getCommunitySuggestions,
|
|
836
|
-
verifySelector,
|
|
837
|
-
getSelectorHealth,
|
|
838
|
-
getHealingHistory,
|
|
839
|
-
snapshotElements,
|
|
840
|
-
detectDrift,
|
|
841
|
-
buildCSSPath,
|
|
842
|
-
buildXPath,
|
|
843
|
-
};
|
|
1
|
+
const { db } = require('../models/db');
|
|
2
|
+
const crypto = require('crypto');
|
|
3
|
+
|
|
4
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
5
|
+
// Schema
|
|
6
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
7
|
+
|
|
8
|
+
db.exec(`
|
|
9
|
+
CREATE TABLE IF NOT EXISTS selector_registry (
|
|
10
|
+
id TEXT PRIMARY KEY,
|
|
11
|
+
site_id TEXT NOT NULL,
|
|
12
|
+
action_name TEXT NOT NULL,
|
|
13
|
+
original_selector TEXT NOT NULL,
|
|
14
|
+
current_selector TEXT NOT NULL,
|
|
15
|
+
selector_type TEXT,
|
|
16
|
+
element_signature TEXT DEFAULT '{}',
|
|
17
|
+
confidence REAL DEFAULT 1.0,
|
|
18
|
+
verified INTEGER DEFAULT 1,
|
|
19
|
+
heal_count INTEGER DEFAULT 0,
|
|
20
|
+
last_verified TEXT,
|
|
21
|
+
last_healed TEXT,
|
|
22
|
+
created_at TEXT DEFAULT (datetime('now')),
|
|
23
|
+
updated_at TEXT DEFAULT (datetime('now'))
|
|
24
|
+
);
|
|
25
|
+
|
|
26
|
+
CREATE TABLE IF NOT EXISTS selector_corrections (
|
|
27
|
+
id TEXT PRIMARY KEY,
|
|
28
|
+
registry_id TEXT,
|
|
29
|
+
site_id TEXT NOT NULL,
|
|
30
|
+
old_selector TEXT NOT NULL,
|
|
31
|
+
new_selector TEXT NOT NULL,
|
|
32
|
+
corrected_by TEXT,
|
|
33
|
+
reason TEXT,
|
|
34
|
+
shared INTEGER DEFAULT 0,
|
|
35
|
+
applied_count INTEGER DEFAULT 0,
|
|
36
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
CREATE TABLE IF NOT EXISTS healing_log (
|
|
40
|
+
id TEXT PRIMARY KEY,
|
|
41
|
+
registry_id TEXT,
|
|
42
|
+
site_id TEXT NOT NULL,
|
|
43
|
+
old_selector TEXT NOT NULL,
|
|
44
|
+
new_selector TEXT,
|
|
45
|
+
strategy TEXT,
|
|
46
|
+
confidence REAL,
|
|
47
|
+
success INTEGER,
|
|
48
|
+
created_at TEXT DEFAULT (datetime('now'))
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
CREATE TABLE IF NOT EXISTS element_snapshots (
|
|
52
|
+
id TEXT PRIMARY KEY,
|
|
53
|
+
site_id TEXT NOT NULL,
|
|
54
|
+
url TEXT NOT NULL,
|
|
55
|
+
selector TEXT NOT NULL,
|
|
56
|
+
snapshot TEXT DEFAULT '{}',
|
|
57
|
+
captured_at TEXT DEFAULT (datetime('now'))
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
CREATE INDEX IF NOT EXISTS idx_selector_registry_site ON selector_registry(site_id);
|
|
61
|
+
CREATE INDEX IF NOT EXISTS idx_selector_registry_action ON selector_registry(action_name);
|
|
62
|
+
CREATE INDEX IF NOT EXISTS idx_selector_registry_site_action ON selector_registry(site_id, action_name);
|
|
63
|
+
CREATE INDEX IF NOT EXISTS idx_selector_corrections_site ON selector_corrections(site_id);
|
|
64
|
+
CREATE INDEX IF NOT EXISTS idx_healing_log_site ON healing_log(site_id);
|
|
65
|
+
CREATE INDEX IF NOT EXISTS idx_element_snapshots_site ON element_snapshots(site_id);
|
|
66
|
+
CREATE INDEX IF NOT EXISTS idx_element_snapshots_site_url ON element_snapshots(site_id, url);
|
|
67
|
+
`);
|
|
68
|
+
|
|
69
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
70
|
+
// Prepared Statements
|
|
71
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
72
|
+
|
|
73
|
+
const stmts = {
|
|
74
|
+
insertRegistry: db.prepare(`
|
|
75
|
+
INSERT INTO selector_registry (id, site_id, action_name, original_selector, current_selector, selector_type, element_signature, confidence, last_verified)
|
|
76
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, datetime('now'))
|
|
77
|
+
`),
|
|
78
|
+
findRegistry: db.prepare(`
|
|
79
|
+
SELECT * FROM selector_registry WHERE site_id = ? AND action_name = ?
|
|
80
|
+
`),
|
|
81
|
+
findRegistryById: db.prepare(`
|
|
82
|
+
SELECT * FROM selector_registry WHERE id = ?
|
|
83
|
+
`),
|
|
84
|
+
updateRegistrySelector: db.prepare(`
|
|
85
|
+
UPDATE selector_registry
|
|
86
|
+
SET current_selector = ?, confidence = ?, heal_count = heal_count + 1,
|
|
87
|
+
last_healed = datetime('now'), updated_at = datetime('now')
|
|
88
|
+
WHERE id = ?
|
|
89
|
+
`),
|
|
90
|
+
updateRegistryVerified: db.prepare(`
|
|
91
|
+
UPDATE selector_registry
|
|
92
|
+
SET verified = ?, confidence = ?, last_verified = datetime('now'), updated_at = datetime('now')
|
|
93
|
+
WHERE site_id = ? AND action_name = ?
|
|
94
|
+
`),
|
|
95
|
+
insertCorrection: db.prepare(`
|
|
96
|
+
INSERT INTO selector_corrections (id, registry_id, site_id, old_selector, new_selector, corrected_by, reason, shared)
|
|
97
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
98
|
+
`),
|
|
99
|
+
updateRegistryFromCorrection: db.prepare(`
|
|
100
|
+
UPDATE selector_registry
|
|
101
|
+
SET current_selector = ?, confidence = 1.0, heal_count = heal_count + 1,
|
|
102
|
+
last_healed = datetime('now'), updated_at = datetime('now')
|
|
103
|
+
WHERE id = ?
|
|
104
|
+
`),
|
|
105
|
+
findSharedCorrections: db.prepare(`
|
|
106
|
+
SELECT * FROM selector_corrections
|
|
107
|
+
WHERE site_id = ? AND old_selector = ? AND shared = 1
|
|
108
|
+
ORDER BY applied_count DESC
|
|
109
|
+
`),
|
|
110
|
+
incrementCorrectionApplied: db.prepare(`
|
|
111
|
+
UPDATE selector_corrections SET applied_count = applied_count + 1 WHERE id = ?
|
|
112
|
+
`),
|
|
113
|
+
insertHealingLog: db.prepare(`
|
|
114
|
+
INSERT INTO healing_log (id, registry_id, site_id, old_selector, new_selector, strategy, confidence, success)
|
|
115
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
116
|
+
`),
|
|
117
|
+
getHealingLogBySite: db.prepare(`
|
|
118
|
+
SELECT * FROM healing_log WHERE site_id = ? ORDER BY created_at DESC LIMIT ?
|
|
119
|
+
`),
|
|
120
|
+
getHealingLogBySiteAction: db.prepare(`
|
|
121
|
+
SELECT hl.* FROM healing_log hl
|
|
122
|
+
JOIN selector_registry sr ON hl.registry_id = sr.id
|
|
123
|
+
WHERE hl.site_id = ? AND sr.action_name = ?
|
|
124
|
+
ORDER BY hl.created_at DESC LIMIT ?
|
|
125
|
+
`),
|
|
126
|
+
getSelectorsBySite: db.prepare(`
|
|
127
|
+
SELECT * FROM selector_registry WHERE site_id = ?
|
|
128
|
+
`),
|
|
129
|
+
insertSnapshot: db.prepare(`
|
|
130
|
+
INSERT INTO element_snapshots (id, site_id, url, selector, snapshot, captured_at)
|
|
131
|
+
VALUES (?, ?, ?, ?, ?, datetime('now'))
|
|
132
|
+
`),
|
|
133
|
+
getLatestSnapshot: db.prepare(`
|
|
134
|
+
SELECT * FROM element_snapshots WHERE site_id = ? AND url = ? ORDER BY captured_at DESC LIMIT 1
|
|
135
|
+
`),
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
139
|
+
// 1. Register Selector
|
|
140
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
141
|
+
|
|
142
|
+
function registerSelector(siteId, { actionName, selector, selectorType, elementSignature }) {
|
|
143
|
+
const id = crypto.randomUUID();
|
|
144
|
+
const sigJson = typeof elementSignature === 'string'
|
|
145
|
+
? elementSignature
|
|
146
|
+
: JSON.stringify(elementSignature || {});
|
|
147
|
+
|
|
148
|
+
stmts.insertRegistry.run(
|
|
149
|
+
id, siteId, actionName, selector, selector,
|
|
150
|
+
selectorType || 'css', sigJson
|
|
151
|
+
);
|
|
152
|
+
|
|
153
|
+
return { id, siteId, actionName, selector, selectorType: selectorType || 'css' };
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
157
|
+
// 2. Capture Element Signature
|
|
158
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
159
|
+
|
|
160
|
+
function captureElementSignature(elementData) {
|
|
161
|
+
if (!elementData) return {};
|
|
162
|
+
|
|
163
|
+
const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
|
|
164
|
+
const id = elementData.id || null;
|
|
165
|
+
|
|
166
|
+
let classes = [];
|
|
167
|
+
if (Array.isArray(elementData.classes)) {
|
|
168
|
+
classes = elementData.classes.filter(Boolean).sort();
|
|
169
|
+
} else if (typeof elementData.className === 'string') {
|
|
170
|
+
classes = elementData.className.split(/\s+/).filter(Boolean).sort();
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
const attrs = {};
|
|
174
|
+
const rawAttrs = elementData.attributes || elementData.attrs || {};
|
|
175
|
+
const attrKeys = Object.keys(rawAttrs).sort();
|
|
176
|
+
for (const key of attrKeys) {
|
|
177
|
+
const lower = key.toLowerCase();
|
|
178
|
+
if (lower === 'class' || lower === 'id' || lower === 'style') continue;
|
|
179
|
+
attrs[lower] = rawAttrs[key];
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
const text = (elementData.text || elementData.textContent || elementData.innerText || '')
|
|
183
|
+
.trim()
|
|
184
|
+
.substring(0, 200);
|
|
185
|
+
|
|
186
|
+
let parent = null;
|
|
187
|
+
if (elementData.parent) {
|
|
188
|
+
parent = {
|
|
189
|
+
tag: (elementData.parent.tag || elementData.parent.tagName || '').toLowerCase(),
|
|
190
|
+
id: elementData.parent.id || null,
|
|
191
|
+
classes: Array.isArray(elementData.parent.classes)
|
|
192
|
+
? elementData.parent.classes.filter(Boolean).sort()
|
|
193
|
+
: (elementData.parent.className || '').split(/\s+/).filter(Boolean).sort(),
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
let siblings = [];
|
|
198
|
+
if (Array.isArray(elementData.siblings)) {
|
|
199
|
+
siblings = elementData.siblings.map(sib => ({
|
|
200
|
+
tag: (sib.tag || sib.tagName || '').toLowerCase(),
|
|
201
|
+
id: sib.id || null,
|
|
202
|
+
classes: Array.isArray(sib.classes)
|
|
203
|
+
? sib.classes.filter(Boolean).sort()
|
|
204
|
+
: (sib.className || '').split(/\s+/).filter(Boolean).sort(),
|
|
205
|
+
}));
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
tag,
|
|
210
|
+
id,
|
|
211
|
+
classes,
|
|
212
|
+
attributes: attrs,
|
|
213
|
+
text,
|
|
214
|
+
parent,
|
|
215
|
+
siblings,
|
|
216
|
+
capturedAt: new Date().toISOString(),
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
221
|
+
// 3. Levenshtein Distance & Text Similarity
|
|
222
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
223
|
+
|
|
224
|
+
function levenshteinDistance(a, b) {
|
|
225
|
+
if (a === b) return 0;
|
|
226
|
+
if (!a || !a.length) return b ? b.length : 0;
|
|
227
|
+
if (!b || !b.length) return a.length;
|
|
228
|
+
|
|
229
|
+
const m = a.length;
|
|
230
|
+
const n = b.length;
|
|
231
|
+
const dp = new Array(m + 1);
|
|
232
|
+
|
|
233
|
+
for (let i = 0; i <= m; i++) {
|
|
234
|
+
dp[i] = new Array(n + 1);
|
|
235
|
+
dp[i][0] = i;
|
|
236
|
+
}
|
|
237
|
+
for (let j = 0; j <= n; j++) {
|
|
238
|
+
dp[0][j] = j;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
for (let i = 1; i <= m; i++) {
|
|
242
|
+
for (let j = 1; j <= n; j++) {
|
|
243
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
244
|
+
dp[i][j] = Math.min(
|
|
245
|
+
dp[i - 1][j] + 1,
|
|
246
|
+
dp[i][j - 1] + 1,
|
|
247
|
+
dp[i - 1][j - 1] + cost
|
|
248
|
+
);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
return dp[m][n];
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function textSimilarity(a, b) {
|
|
256
|
+
if (!a && !b) return 1;
|
|
257
|
+
if (!a || !b) return 0;
|
|
258
|
+
const maxLen = Math.max(a.length, b.length);
|
|
259
|
+
if (maxLen === 0) return 1;
|
|
260
|
+
return 1 - (levenshteinDistance(a, b) / maxLen);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
264
|
+
// 4. Heal Selector (Core Algorithm)
|
|
265
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
266
|
+
|
|
267
|
+
function healSelector(siteId, actionName, failedSelector, pageElements) {
|
|
268
|
+
const registry = stmts.findRegistry.get(siteId, actionName);
|
|
269
|
+
const registryId = registry ? registry.id : null;
|
|
270
|
+
|
|
271
|
+
let storedSignature = {};
|
|
272
|
+
if (registry) {
|
|
273
|
+
try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { /* empty */ }
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (!Array.isArray(pageElements) || pageElements.length === 0) {
|
|
277
|
+
const logId = crypto.randomUUID();
|
|
278
|
+
stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
|
|
279
|
+
return { healed: false, newSelector: null, strategy: null, confidence: 0 };
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const candidates = [];
|
|
283
|
+
|
|
284
|
+
for (const el of pageElements) {
|
|
285
|
+
const sig = captureElementSignature(el);
|
|
286
|
+
|
|
287
|
+
// Strategy 1: attribute_match — data-* attributes and aria-label
|
|
288
|
+
const storedAttrs = storedSignature.attributes || {};
|
|
289
|
+
const elAttrs = sig.attributes || {};
|
|
290
|
+
let attrMatches = 0;
|
|
291
|
+
let attrTotal = 0;
|
|
292
|
+
for (const key of Object.keys(storedAttrs)) {
|
|
293
|
+
if (key.startsWith('data-') || key === 'aria-label' || key.startsWith('aria-')) {
|
|
294
|
+
attrTotal++;
|
|
295
|
+
if (elAttrs[key] === storedAttrs[key]) attrMatches++;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
if (attrTotal > 0 && attrMatches > 0) {
|
|
299
|
+
const conf = attrMatches / attrTotal;
|
|
300
|
+
candidates.push({
|
|
301
|
+
element: el,
|
|
302
|
+
signature: sig,
|
|
303
|
+
strategy: 'attribute_match',
|
|
304
|
+
confidence: conf * 0.95,
|
|
305
|
+
selector: buildCSSPath(el),
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// Strategy 2: id_match — partial ID matching
|
|
310
|
+
if (storedSignature.id && sig.id) {
|
|
311
|
+
const sim = textSimilarity(storedSignature.id, sig.id);
|
|
312
|
+
if (sim > 0.5) {
|
|
313
|
+
candidates.push({
|
|
314
|
+
element: el,
|
|
315
|
+
signature: sig,
|
|
316
|
+
strategy: 'id_match',
|
|
317
|
+
confidence: sim * 0.98,
|
|
318
|
+
selector: `#${sig.id}`,
|
|
319
|
+
});
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// Strategy 3: text_match — Levenshtein-based text similarity
|
|
324
|
+
if (storedSignature.text && sig.text) {
|
|
325
|
+
const sim = textSimilarity(
|
|
326
|
+
storedSignature.text.toLowerCase(),
|
|
327
|
+
sig.text.toLowerCase()
|
|
328
|
+
);
|
|
329
|
+
if (sim > 0.5) {
|
|
330
|
+
candidates.push({
|
|
331
|
+
element: el,
|
|
332
|
+
signature: sig,
|
|
333
|
+
strategy: 'text_match',
|
|
334
|
+
confidence: sim * 0.85,
|
|
335
|
+
selector: buildCSSPath(el),
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
// Strategy 4: structural_match — parent>nth-child path matching
|
|
341
|
+
if (storedSignature.parent && sig.parent) {
|
|
342
|
+
let structScore = 0;
|
|
343
|
+
let structChecks = 0;
|
|
344
|
+
|
|
345
|
+
if (storedSignature.tag && sig.tag) {
|
|
346
|
+
structChecks++;
|
|
347
|
+
if (storedSignature.tag === sig.tag) structScore++;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
structChecks++;
|
|
351
|
+
if (storedSignature.parent.tag === sig.parent.tag) structScore++;
|
|
352
|
+
|
|
353
|
+
if (storedSignature.parent.id && sig.parent.id) {
|
|
354
|
+
structChecks++;
|
|
355
|
+
if (storedSignature.parent.id === sig.parent.id) structScore++;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
const storedParentClasses = storedSignature.parent.classes || [];
|
|
359
|
+
const elParentClasses = sig.parent.classes || [];
|
|
360
|
+
if (storedParentClasses.length > 0) {
|
|
361
|
+
structChecks++;
|
|
362
|
+
const overlap = storedParentClasses.filter(c => elParentClasses.includes(c));
|
|
363
|
+
structScore += overlap.length / storedParentClasses.length;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
if (storedSignature.siblings && sig.siblings) {
|
|
367
|
+
structChecks++;
|
|
368
|
+
const storedSibTags = storedSignature.siblings.map(s => s.tag).sort();
|
|
369
|
+
const elSibTags = sig.siblings.map(s => s.tag).sort();
|
|
370
|
+
const sibSim = textSimilarity(storedSibTags.join(','), elSibTags.join(','));
|
|
371
|
+
structScore += sibSim;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
if (structChecks > 0) {
|
|
375
|
+
const conf = structScore / structChecks;
|
|
376
|
+
if (conf > 0.4) {
|
|
377
|
+
const nthChild = el.index != null ? el.index + 1 : 1;
|
|
378
|
+
const parentSel = sig.parent.id
|
|
379
|
+
? `#${sig.parent.id}`
|
|
380
|
+
: sig.parent.tag;
|
|
381
|
+
candidates.push({
|
|
382
|
+
element: el,
|
|
383
|
+
signature: sig,
|
|
384
|
+
strategy: 'structural_match',
|
|
385
|
+
confidence: conf * 0.80,
|
|
386
|
+
selector: `${parentSel} > ${sig.tag}:nth-child(${nthChild})`,
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Strategy 5: class_match — overlapping CSS classes
|
|
393
|
+
const storedClasses = storedSignature.classes || [];
|
|
394
|
+
const elClasses = sig.classes || [];
|
|
395
|
+
if (storedClasses.length > 0 && elClasses.length > 0) {
|
|
396
|
+
const overlap = storedClasses.filter(c => elClasses.includes(c));
|
|
397
|
+
if (overlap.length > 0) {
|
|
398
|
+
const conf = overlap.length / Math.max(storedClasses.length, elClasses.length);
|
|
399
|
+
candidates.push({
|
|
400
|
+
element: el,
|
|
401
|
+
signature: sig,
|
|
402
|
+
strategy: 'class_match',
|
|
403
|
+
confidence: conf * 0.75,
|
|
404
|
+
selector: `${sig.tag || '*'}.${overlap.join('.')}`,
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
// Strategy 6: community_match — known corrections
|
|
411
|
+
const communityCorrections = stmts.findSharedCorrections.all(siteId, failedSelector);
|
|
412
|
+
for (const corr of communityCorrections) {
|
|
413
|
+
const appliedBoost = Math.min(corr.applied_count * 0.02, 0.15);
|
|
414
|
+
candidates.push({
|
|
415
|
+
element: null,
|
|
416
|
+
signature: null,
|
|
417
|
+
strategy: 'community_match',
|
|
418
|
+
confidence: 0.70 + appliedBoost,
|
|
419
|
+
selector: corr.new_selector,
|
|
420
|
+
correctionId: corr.id,
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
// Pick highest confidence above threshold
|
|
425
|
+
candidates.sort((a, b) => b.confidence - a.confidence);
|
|
426
|
+
const best = candidates.find(c => c.confidence >= 0.6);
|
|
427
|
+
|
|
428
|
+
if (!best) {
|
|
429
|
+
const logId = crypto.randomUUID();
|
|
430
|
+
stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
|
|
431
|
+
return { healed: false, newSelector: null, strategy: null, confidence: 0 };
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
const logId = crypto.randomUUID();
|
|
435
|
+
stmts.insertHealingLog.run(
|
|
436
|
+
logId, registryId, siteId, failedSelector,
|
|
437
|
+
best.selector, best.strategy, best.confidence, 1
|
|
438
|
+
);
|
|
439
|
+
|
|
440
|
+
if (registryId) {
|
|
441
|
+
stmts.updateRegistrySelector.run(best.selector, best.confidence, registryId);
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
if (best.correctionId) {
|
|
445
|
+
stmts.incrementCorrectionApplied.run(best.correctionId);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
return {
|
|
449
|
+
healed: true,
|
|
450
|
+
newSelector: best.selector,
|
|
451
|
+
strategy: best.strategy,
|
|
452
|
+
confidence: Math.round(best.confidence * 1000) / 1000,
|
|
453
|
+
};
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
457
|
+
// 5. Submit Correction
|
|
458
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
459
|
+
|
|
460
|
+
function submitCorrection(siteId, registryId, { oldSelector, newSelector, correctedBy, reason, shared }) {
|
|
461
|
+
const id = crypto.randomUUID();
|
|
462
|
+
stmts.insertCorrection.run(
|
|
463
|
+
id, registryId || null, siteId,
|
|
464
|
+
oldSelector, newSelector,
|
|
465
|
+
correctedBy || 'user',
|
|
466
|
+
reason || null,
|
|
467
|
+
shared ? 1 : 0
|
|
468
|
+
);
|
|
469
|
+
|
|
470
|
+
if (registryId) {
|
|
471
|
+
const reg = stmts.findRegistryById.get(registryId);
|
|
472
|
+
if (reg) {
|
|
473
|
+
stmts.updateRegistryFromCorrection.run(newSelector, registryId);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return { id, siteId, registryId, oldSelector, newSelector };
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
481
|
+
// 6. Community Suggestions
|
|
482
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
483
|
+
|
|
484
|
+
function getCommunitySuggestions(siteId, failedSelector) {
|
|
485
|
+
return stmts.findSharedCorrections.all(siteId, failedSelector);
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
489
|
+
// 7. Verify Selector
|
|
490
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
491
|
+
|
|
492
|
+
function verifySelector(siteId, actionName, elementData) {
|
|
493
|
+
const registry = stmts.findRegistry.get(siteId, actionName);
|
|
494
|
+
if (!registry) {
|
|
495
|
+
return { valid: false, confidence: 0, drift: null, error: 'Selector not registered' };
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
let storedSignature;
|
|
499
|
+
try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { storedSignature = {}; }
|
|
500
|
+
|
|
501
|
+
const currentSignature = captureElementSignature(elementData);
|
|
502
|
+
|
|
503
|
+
let totalChecks = 0;
|
|
504
|
+
let matchScore = 0;
|
|
505
|
+
const driftDetails = {};
|
|
506
|
+
|
|
507
|
+
if (storedSignature.tag) {
|
|
508
|
+
totalChecks++;
|
|
509
|
+
if (storedSignature.tag === currentSignature.tag) {
|
|
510
|
+
matchScore++;
|
|
511
|
+
} else {
|
|
512
|
+
driftDetails.tag = { expected: storedSignature.tag, actual: currentSignature.tag };
|
|
513
|
+
}
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
if (storedSignature.id) {
|
|
517
|
+
totalChecks++;
|
|
518
|
+
if (storedSignature.id === currentSignature.id) {
|
|
519
|
+
matchScore++;
|
|
520
|
+
} else {
|
|
521
|
+
driftDetails.id = { expected: storedSignature.id, actual: currentSignature.id };
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
const storedClasses = storedSignature.classes || [];
|
|
526
|
+
const currentClasses = currentSignature.classes || [];
|
|
527
|
+
if (storedClasses.length > 0) {
|
|
528
|
+
totalChecks++;
|
|
529
|
+
const overlap = storedClasses.filter(c => currentClasses.includes(c));
|
|
530
|
+
const classRatio = overlap.length / storedClasses.length;
|
|
531
|
+
matchScore += classRatio;
|
|
532
|
+
if (classRatio < 1) {
|
|
533
|
+
const removed = storedClasses.filter(c => !currentClasses.includes(c));
|
|
534
|
+
const added = currentClasses.filter(c => !storedClasses.includes(c));
|
|
535
|
+
driftDetails.classes = { removed, added, overlapRatio: classRatio };
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
const storedAttrs = storedSignature.attributes || {};
|
|
540
|
+
const currentAttrs = currentSignature.attributes || {};
|
|
541
|
+
const allAttrKeys = [...new Set([...Object.keys(storedAttrs), ...Object.keys(currentAttrs)])];
|
|
542
|
+
if (allAttrKeys.length > 0) {
|
|
543
|
+
totalChecks++;
|
|
544
|
+
let attrMatch = 0;
|
|
545
|
+
const changedAttrs = {};
|
|
546
|
+
for (const key of allAttrKeys) {
|
|
547
|
+
if (storedAttrs[key] === currentAttrs[key]) {
|
|
548
|
+
attrMatch++;
|
|
549
|
+
} else {
|
|
550
|
+
changedAttrs[key] = { expected: storedAttrs[key] || null, actual: currentAttrs[key] || null };
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
matchScore += attrMatch / allAttrKeys.length;
|
|
554
|
+
if (Object.keys(changedAttrs).length > 0) {
|
|
555
|
+
driftDetails.attributes = changedAttrs;
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
if (storedSignature.text) {
|
|
560
|
+
totalChecks++;
|
|
561
|
+
const sim = textSimilarity(
|
|
562
|
+
storedSignature.text.toLowerCase(),
|
|
563
|
+
(currentSignature.text || '').toLowerCase()
|
|
564
|
+
);
|
|
565
|
+
matchScore += sim;
|
|
566
|
+
if (sim < 0.95) {
|
|
567
|
+
driftDetails.text = {
|
|
568
|
+
expected: storedSignature.text.substring(0, 50),
|
|
569
|
+
actual: (currentSignature.text || '').substring(0, 50),
|
|
570
|
+
similarity: Math.round(sim * 1000) / 1000,
|
|
571
|
+
};
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
const confidence = totalChecks > 0 ? matchScore / totalChecks : 0;
|
|
576
|
+
const valid = confidence >= 0.7;
|
|
577
|
+
const hasDrift = Object.keys(driftDetails).length > 0;
|
|
578
|
+
|
|
579
|
+
stmts.updateRegistryVerified.run(valid ? 1 : 0, confidence, siteId, actionName);
|
|
580
|
+
|
|
581
|
+
return {
|
|
582
|
+
valid,
|
|
583
|
+
confidence: Math.round(confidence * 1000) / 1000,
|
|
584
|
+
drift: hasDrift ? driftDetails : null,
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
589
|
+
// 8. Selector Health
|
|
590
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
591
|
+
|
|
592
|
+
function getSelectorHealth(siteId) {
|
|
593
|
+
const all = stmts.getSelectorsBySite.all(siteId);
|
|
594
|
+
const total = all.length;
|
|
595
|
+
|
|
596
|
+
if (total === 0) {
|
|
597
|
+
return { total: 0, verified: 0, healed: 0, avgConfidence: 0, broken: [] };
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
let verifiedCount = 0;
|
|
601
|
+
let healedCount = 0;
|
|
602
|
+
let confidenceSum = 0;
|
|
603
|
+
const broken = [];
|
|
604
|
+
|
|
605
|
+
for (const row of all) {
|
|
606
|
+
confidenceSum += row.confidence;
|
|
607
|
+
if (row.verified) verifiedCount++;
|
|
608
|
+
if (row.heal_count > 0) healedCount++;
|
|
609
|
+
if (!row.verified || row.confidence < 0.6) {
|
|
610
|
+
broken.push({
|
|
611
|
+
id: row.id,
|
|
612
|
+
actionName: row.action_name,
|
|
613
|
+
currentSelector: row.current_selector,
|
|
614
|
+
confidence: row.confidence,
|
|
615
|
+
healCount: row.heal_count,
|
|
616
|
+
lastHealed: row.last_healed,
|
|
617
|
+
});
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
return {
|
|
622
|
+
total,
|
|
623
|
+
verified: verifiedCount,
|
|
624
|
+
healed: healedCount,
|
|
625
|
+
avgConfidence: Math.round((confidenceSum / total) * 1000) / 1000,
|
|
626
|
+
broken,
|
|
627
|
+
};
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
631
|
+
// 9. Healing History
|
|
632
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
633
|
+
|
|
634
|
+
function getHealingHistory(siteId, { limit, actionName } = {}) {
|
|
635
|
+
const lim = limit || 50;
|
|
636
|
+
if (actionName) {
|
|
637
|
+
return stmts.getHealingLogBySiteAction.all(siteId, actionName, lim);
|
|
638
|
+
}
|
|
639
|
+
return stmts.getHealingLogBySite.all(siteId, lim);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
643
|
+
// 10. Snapshot Elements
|
|
644
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
645
|
+
|
|
646
|
+
function snapshotElements(siteId, url, elements) {
|
|
647
|
+
const id = crypto.randomUUID();
|
|
648
|
+
const snapshotData = Array.isArray(elements)
|
|
649
|
+
? elements.map(el => captureElementSignature(el))
|
|
650
|
+
: [];
|
|
651
|
+
|
|
652
|
+
stmts.insertSnapshot.run(
|
|
653
|
+
id, siteId, url, '*',
|
|
654
|
+
JSON.stringify(snapshotData)
|
|
655
|
+
);
|
|
656
|
+
|
|
657
|
+
return { id, siteId, url, elementCount: snapshotData.length };
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
661
|
+
// 11. Detect Drift
|
|
662
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
663
|
+
|
|
664
|
+
function detectDrift(siteId, url, currentElements) {
|
|
665
|
+
const lastSnapshot = stmts.getLatestSnapshot.get(siteId, url);
|
|
666
|
+
if (!lastSnapshot) {
|
|
667
|
+
return { hasDrift: false, message: 'No previous snapshot found', changed: [], added: [], removed: [] };
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
let previousElements;
|
|
671
|
+
try { previousElements = JSON.parse(lastSnapshot.snapshot || '[]'); } catch { previousElements = []; }
|
|
672
|
+
|
|
673
|
+
const currentSigs = (Array.isArray(currentElements) ? currentElements : [])
|
|
674
|
+
.map(el => captureElementSignature(el));
|
|
675
|
+
|
|
676
|
+
function fingerprint(sig) {
|
|
677
|
+
return `${sig.tag || ''}|${sig.id || ''}|${(sig.classes || []).join(',')}|${sig.text || ''}`;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
const prevFingerprints = new Map();
|
|
681
|
+
for (let i = 0; i < previousElements.length; i++) {
|
|
682
|
+
prevFingerprints.set(fingerprint(previousElements[i]), i);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
const currFingerprints = new Map();
|
|
686
|
+
for (let i = 0; i < currentSigs.length; i++) {
|
|
687
|
+
currFingerprints.set(fingerprint(currentSigs[i]), i);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
const changed = [];
|
|
691
|
+
const added = [];
|
|
692
|
+
const removed = [];
|
|
693
|
+
|
|
694
|
+
for (const [fp, idx] of currFingerprints) {
|
|
695
|
+
if (!prevFingerprints.has(fp)) {
|
|
696
|
+
const prevBySamePosTag = previousElements[idx];
|
|
697
|
+
if (prevBySamePosTag && prevBySamePosTag.tag === currentSigs[idx].tag) {
|
|
698
|
+
const sim = textSimilarity(fingerprint(prevBySamePosTag), fp);
|
|
699
|
+
if (sim > 0.3 && sim < 1.0) {
|
|
700
|
+
changed.push({
|
|
701
|
+
index: idx,
|
|
702
|
+
previous: previousElements[idx],
|
|
703
|
+
current: currentSigs[idx],
|
|
704
|
+
similarity: Math.round(sim * 1000) / 1000,
|
|
705
|
+
});
|
|
706
|
+
continue;
|
|
707
|
+
}
|
|
708
|
+
}
|
|
709
|
+
added.push({ index: idx, element: currentSigs[idx] });
|
|
710
|
+
}
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
for (const [fp, idx] of prevFingerprints) {
|
|
714
|
+
if (!currFingerprints.has(fp)) {
|
|
715
|
+
const alreadyChanged = changed.some(c => c.index === idx);
|
|
716
|
+
if (!alreadyChanged) {
|
|
717
|
+
removed.push({ index: idx, element: previousElements[idx] });
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
return {
|
|
723
|
+
hasDrift: changed.length > 0 || added.length > 0 || removed.length > 0,
|
|
724
|
+
snapshotDate: lastSnapshot.captured_at,
|
|
725
|
+
changed,
|
|
726
|
+
added,
|
|
727
|
+
removed,
|
|
728
|
+
summary: {
|
|
729
|
+
changedCount: changed.length,
|
|
730
|
+
addedCount: added.length,
|
|
731
|
+
removedCount: removed.length,
|
|
732
|
+
},
|
|
733
|
+
};
|
|
734
|
+
}
|
|
735
|
+
|
|
736
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
737
|
+
// 12. Build CSS Path
|
|
738
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
739
|
+
|
|
740
|
+
function buildCSSPath(elementData) {
|
|
741
|
+
if (!elementData) return '*';
|
|
742
|
+
|
|
743
|
+
const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
|
|
744
|
+
const id = elementData.id;
|
|
745
|
+
|
|
746
|
+
if (id) return `#${id}`;
|
|
747
|
+
|
|
748
|
+
let selector = tag || '*';
|
|
749
|
+
|
|
750
|
+
let classes = [];
|
|
751
|
+
if (Array.isArray(elementData.classes)) {
|
|
752
|
+
classes = elementData.classes.filter(Boolean);
|
|
753
|
+
} else if (typeof elementData.className === 'string') {
|
|
754
|
+
classes = elementData.className.split(/\s+/).filter(Boolean);
|
|
755
|
+
}
|
|
756
|
+
if (classes.length > 0) {
|
|
757
|
+
selector += '.' + classes.join('.');
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
const attrs = elementData.attributes || elementData.attrs || {};
|
|
761
|
+
for (const key of Object.keys(attrs)) {
|
|
762
|
+
const lower = key.toLowerCase();
|
|
763
|
+
if (lower === 'class' || lower === 'id' || lower === 'style') continue;
|
|
764
|
+
const val = attrs[key];
|
|
765
|
+
if (val != null && val !== '') {
|
|
766
|
+
selector += `[${lower}="${val.replace(/"/g, '\\"')}"]`;
|
|
767
|
+
} else if (val === '' || val == null) {
|
|
768
|
+
selector += `[${lower}]`;
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
return selector;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
776
|
+
// 13. Build XPath
|
|
777
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
778
|
+
|
|
779
|
+
function buildXPath(elementData) {
|
|
780
|
+
if (!elementData) return '//*';
|
|
781
|
+
|
|
782
|
+
const tag = (elementData.tag || elementData.tagName || '').toLowerCase() || '*';
|
|
783
|
+
const id = elementData.id;
|
|
784
|
+
|
|
785
|
+
if (id) return `//${tag}[@id="${id}"]`;
|
|
786
|
+
|
|
787
|
+
const predicates = [];
|
|
788
|
+
|
|
789
|
+
let classes = [];
|
|
790
|
+
if (Array.isArray(elementData.classes)) {
|
|
791
|
+
classes = elementData.classes.filter(Boolean);
|
|
792
|
+
} else if (typeof elementData.className === 'string') {
|
|
793
|
+
classes = elementData.className.split(/\s+/).filter(Boolean);
|
|
794
|
+
}
|
|
795
|
+
for (const cls of classes) {
|
|
796
|
+
predicates.push(`contains(@class, "${cls}")`);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
const attrs = elementData.attributes || elementData.attrs || {};
|
|
800
|
+
for (const key of Object.keys(attrs)) {
|
|
801
|
+
const lower = key.toLowerCase();
|
|
802
|
+
if (lower === 'class' || lower === 'id' || lower === 'style') continue;
|
|
803
|
+
const val = attrs[key];
|
|
804
|
+
if (val != null && val !== '') {
|
|
805
|
+
predicates.push(`@${lower}="${val}"`);
|
|
806
|
+
} else {
|
|
807
|
+
predicates.push(`@${lower}`);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
const text = (elementData.text || elementData.textContent || elementData.innerText || '').trim();
|
|
812
|
+
if (text && text.length <= 80) {
|
|
813
|
+
if (!text.includes('"')) {
|
|
814
|
+
predicates.push(`normalize-space(text())="${text}"`);
|
|
815
|
+
} else if (!text.includes("'")) {
|
|
816
|
+
predicates.push(`normalize-space(text())='${text}'`);
|
|
817
|
+
}
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
if (predicates.length === 0) return `//${tag}`;
|
|
821
|
+
return `//${tag}[${predicates.join(' and ')}]`;
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
825
|
+
// Exports
|
|
826
|
+
// ═══════════════════════════════════════════════════════════════════════
|
|
827
|
+
|
|
828
|
+
module.exports = {
|
|
829
|
+
registerSelector,
|
|
830
|
+
captureElementSignature,
|
|
831
|
+
healSelector,
|
|
832
|
+
levenshteinDistance,
|
|
833
|
+
textSimilarity,
|
|
834
|
+
submitCorrection,
|
|
835
|
+
getCommunitySuggestions,
|
|
836
|
+
verifySelector,
|
|
837
|
+
getSelectorHealth,
|
|
838
|
+
getHealingHistory,
|
|
839
|
+
snapshotElements,
|
|
840
|
+
detectDrift,
|
|
841
|
+
buildCSSPath,
|
|
842
|
+
buildXPath,
|
|
843
|
+
};
|