web-agent-bridge 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. package/LICENSE +72 -72
  2. package/README.ar.md +1286 -1152
  3. package/README.md +1764 -1635
  4. package/bin/agent-runner.js +474 -474
  5. package/bin/cli.js +237 -138
  6. package/bin/wab.js +80 -80
  7. package/examples/bidi-agent.js +119 -119
  8. package/examples/cross-site-agent.js +91 -91
  9. package/examples/mcp-agent.js +94 -94
  10. package/examples/next-app-router/README.md +44 -44
  11. package/examples/puppeteer-agent.js +108 -108
  12. package/examples/saas-dashboard/README.md +55 -55
  13. package/examples/shopify-hydrogen/README.md +74 -74
  14. package/examples/vision-agent.js +171 -171
  15. package/examples/wordpress-elementor/README.md +77 -77
  16. package/package.json +16 -3
  17. package/public/.well-known/agent-tools.json +180 -180
  18. package/public/.well-known/ai-assets.json +59 -59
  19. package/public/.well-known/security.txt +8 -0
  20. package/public/agent-workspace.html +349 -349
  21. package/public/ai.html +198 -198
  22. package/public/api.html +413 -412
  23. package/public/browser.html +486 -486
  24. package/public/commander-dashboard.html +243 -243
  25. package/public/cookies.html +210 -210
  26. package/public/css/agent-workspace.css +1713 -1713
  27. package/public/css/premium.css +317 -317
  28. package/public/css/styles.css +1235 -1235
  29. package/public/dashboard.html +706 -706
  30. package/public/dns.html +507 -0
  31. package/public/docs.html +587 -587
  32. package/public/feed.xml +89 -89
  33. package/public/growth.html +463 -463
  34. package/public/index.html +1070 -982
  35. package/public/integrations.html +556 -0
  36. package/public/js/agent-workspace.js +1740 -1740
  37. package/public/js/auth-nav.js +31 -31
  38. package/public/js/auth-redirect.js +12 -12
  39. package/public/js/cookie-consent.js +56 -56
  40. package/public/js/wab-demo-page.js +721 -721
  41. package/public/js/ws-client.js +74 -74
  42. package/public/llms-full.txt +360 -360
  43. package/public/llms.txt +125 -125
  44. package/public/login.html +85 -85
  45. package/public/mesh-dashboard.html +328 -328
  46. package/public/openapi.json +580 -580
  47. package/public/phone-shield.html +281 -0
  48. package/public/premium-dashboard.html +2489 -2489
  49. package/public/premium.html +793 -793
  50. package/public/privacy.html +297 -297
  51. package/public/register.html +105 -105
  52. package/public/robots.txt +87 -87
  53. package/public/script/wab-consent.d.ts +36 -36
  54. package/public/script/wab-consent.js +104 -104
  55. package/public/script/wab-schema.js +131 -131
  56. package/public/script/wab.d.ts +108 -108
  57. package/public/script/wab.min.js +580 -580
  58. package/public/security.txt +8 -0
  59. package/public/terms.html +256 -256
  60. package/script/ai-agent-bridge.js +1754 -1754
  61. package/sdk/README.md +99 -99
  62. package/sdk/agent-mesh.js +449 -449
  63. package/sdk/commander.js +262 -262
  64. package/sdk/index.d.ts +464 -464
  65. package/sdk/index.js +12 -1
  66. package/sdk/multi-agent.js +318 -318
  67. package/sdk/package.json +1 -1
  68. package/sdk/safety-shield.js +219 -0
  69. package/sdk/schema-discovery.js +83 -83
  70. package/server/adapters/index.js +520 -520
  71. package/server/config/plans.js +367 -367
  72. package/server/config/secrets.js +102 -102
  73. package/server/control-plane/index.js +301 -301
  74. package/server/data-plane/index.js +354 -354
  75. package/server/index.js +531 -427
  76. package/server/llm/index.js +404 -404
  77. package/server/middleware/adminAuth.js +35 -35
  78. package/server/middleware/auth.js +50 -50
  79. package/server/middleware/featureGate.js +88 -88
  80. package/server/middleware/rateLimits.js +100 -100
  81. package/server/middleware/sensitiveAction.js +157 -0
  82. package/server/migrations/001_add_analytics_indexes.sql +7 -7
  83. package/server/migrations/002_premium_features.sql +418 -418
  84. package/server/migrations/003_ads_integer_cents.sql +33 -33
  85. package/server/migrations/004_agent_os.sql +158 -158
  86. package/server/migrations/005_marketplace_metering.sql +126 -126
  87. package/server/models/adapters/index.js +33 -33
  88. package/server/models/adapters/mysql.js +183 -183
  89. package/server/models/adapters/postgresql.js +172 -172
  90. package/server/models/adapters/sqlite.js +7 -7
  91. package/server/models/db.js +681 -681
  92. package/server/observability/failure-analysis.js +337 -337
  93. package/server/observability/index.js +394 -394
  94. package/server/protocol/capabilities.js +223 -223
  95. package/server/protocol/index.js +243 -243
  96. package/server/protocol/schema.js +584 -584
  97. package/server/registry/certification.js +271 -271
  98. package/server/registry/index.js +326 -326
  99. package/server/routes/admin-premium.js +671 -671
  100. package/server/routes/admin.js +261 -261
  101. package/server/routes/ads.js +130 -130
  102. package/server/routes/agent-workspace.js +540 -540
  103. package/server/routes/api.js +150 -150
  104. package/server/routes/auth.js +71 -71
  105. package/server/routes/billing.js +45 -45
  106. package/server/routes/commander.js +316 -316
  107. package/server/routes/demo-showcase.js +332 -332
  108. package/server/routes/demo-store.js +154 -0
  109. package/server/routes/discovery.js +417 -417
  110. package/server/routes/gateway.js +173 -157
  111. package/server/routes/license.js +251 -240
  112. package/server/routes/mesh.js +469 -469
  113. package/server/routes/noscript.js +543 -543
  114. package/server/routes/premium-v2.js +686 -686
  115. package/server/routes/premium.js +724 -724
  116. package/server/routes/runtime.js +2148 -2147
  117. package/server/routes/sovereign.js +465 -385
  118. package/server/routes/universal.js +200 -185
  119. package/server/routes/wab-api.js +850 -501
  120. package/server/runtime/container-worker.js +111 -111
  121. package/server/runtime/container.js +448 -448
  122. package/server/runtime/distributed-worker.js +362 -362
  123. package/server/runtime/event-bus.js +210 -210
  124. package/server/runtime/index.js +253 -253
  125. package/server/runtime/queue.js +599 -599
  126. package/server/runtime/replay.js +666 -666
  127. package/server/runtime/sandbox.js +266 -266
  128. package/server/runtime/scheduler.js +534 -534
  129. package/server/runtime/session-engine.js +293 -293
  130. package/server/runtime/state-manager.js +188 -188
  131. package/server/security/cross-site-redactor.js +196 -0
  132. package/server/security/dry-run.js +180 -0
  133. package/server/security/human-gate-rate-limit.js +147 -0
  134. package/server/security/human-gate-transports.js +178 -0
  135. package/server/security/human-gate.js +281 -0
  136. package/server/security/index.js +368 -368
  137. package/server/security/intent-engine.js +245 -0
  138. package/server/security/reward-guard.js +171 -0
  139. package/server/security/rollback-store.js +239 -0
  140. package/server/security/token-scope.js +404 -0
  141. package/server/security/url-policy.js +139 -0
  142. package/server/services/agent-chat.js +506 -506
  143. package/server/services/agent-learning.js +601 -575
  144. package/server/services/agent-memory.js +625 -625
  145. package/server/services/agent-mesh.js +555 -539
  146. package/server/services/agent-symphony.js +717 -717
  147. package/server/services/agent-tasks.js +1807 -1807
  148. package/server/services/api-key-engine.js +292 -261
  149. package/server/services/cluster.js +894 -894
  150. package/server/services/commander.js +738 -738
  151. package/server/services/edge-compute.js +440 -440
  152. package/server/services/email.js +204 -204
  153. package/server/services/hosted-runtime.js +205 -205
  154. package/server/services/lfd.js +635 -635
  155. package/server/services/local-ai.js +389 -389
  156. package/server/services/marketplace.js +270 -270
  157. package/server/services/metering.js +182 -182
  158. package/server/services/modules/affiliate-intelligence.js +93 -93
  159. package/server/services/modules/agent-firewall.js +90 -90
  160. package/server/services/modules/bounty.js +89 -89
  161. package/server/services/modules/collective-bargaining.js +92 -92
  162. package/server/services/modules/dark-pattern.js +66 -66
  163. package/server/services/modules/gov-intelligence.js +45 -45
  164. package/server/services/modules/neural.js +55 -55
  165. package/server/services/modules/notary.js +49 -49
  166. package/server/services/modules/price-time-machine.js +86 -86
  167. package/server/services/modules/protocol.js +104 -104
  168. package/server/services/negotiation.js +439 -439
  169. package/server/services/plugins.js +771 -771
  170. package/server/services/price-intelligence.js +566 -566
  171. package/server/services/price-shield.js +1137 -1137
  172. package/server/services/reputation.js +465 -465
  173. package/server/services/search-engine.js +357 -357
  174. package/server/services/security.js +513 -513
  175. package/server/services/self-healing.js +843 -843
  176. package/server/services/sovereign-shield.js +542 -0
  177. package/server/services/stripe.js +192 -192
  178. package/server/services/swarm.js +788 -788
  179. package/server/services/universal-scraper.js +662 -661
  180. package/server/services/verification.js +481 -481
  181. package/server/services/vision.js +1163 -1163
  182. package/server/utils/cache.js +125 -125
  183. package/server/utils/migrate.js +81 -81
  184. package/server/utils/safe-fetch.js +228 -0
  185. package/server/utils/secureFields.js +50 -50
  186. package/server/ws.js +161 -161
  187. package/templates/artisan-marketplace.yaml +104 -104
  188. package/templates/book-price-scout.yaml +98 -98
  189. package/templates/electronics-price-tracker.yaml +108 -108
  190. package/templates/flight-deal-hunter.yaml +113 -113
  191. package/templates/freelancer-direct.yaml +116 -116
  192. package/templates/grocery-price-compare.yaml +93 -93
  193. package/templates/hotel-direct-booking.yaml +113 -113
  194. package/templates/local-services.yaml +98 -98
  195. package/templates/olive-oil-tunisia.yaml +88 -88
  196. package/templates/organic-farm-fresh.yaml +101 -101
  197. package/templates/restaurant-direct.yaml +97 -97
  198. package/public/score.html +0 -263
  199. package/server/migrations/006_growth_suite.sql +0 -138
  200. package/server/routes/growth.js +0 -962
  201. package/server/services/fairness-engine.js +0 -409
  202. package/server/services/fairness.js +0 -420
@@ -1,843 +1,843 @@
1
- const { db } = require('../models/db');
2
- const crypto = require('crypto');
3
-
4
- // ═══════════════════════════════════════════════════════════════════════
5
- // Schema
6
- // ═══════════════════════════════════════════════════════════════════════
7
-
8
- db.exec(`
9
- CREATE TABLE IF NOT EXISTS selector_registry (
10
- id TEXT PRIMARY KEY,
11
- site_id TEXT NOT NULL,
12
- action_name TEXT NOT NULL,
13
- original_selector TEXT NOT NULL,
14
- current_selector TEXT NOT NULL,
15
- selector_type TEXT,
16
- element_signature TEXT DEFAULT '{}',
17
- confidence REAL DEFAULT 1.0,
18
- verified INTEGER DEFAULT 1,
19
- heal_count INTEGER DEFAULT 0,
20
- last_verified TEXT,
21
- last_healed TEXT,
22
- created_at TEXT DEFAULT (datetime('now')),
23
- updated_at TEXT DEFAULT (datetime('now'))
24
- );
25
-
26
- CREATE TABLE IF NOT EXISTS selector_corrections (
27
- id TEXT PRIMARY KEY,
28
- registry_id TEXT,
29
- site_id TEXT NOT NULL,
30
- old_selector TEXT NOT NULL,
31
- new_selector TEXT NOT NULL,
32
- corrected_by TEXT,
33
- reason TEXT,
34
- shared INTEGER DEFAULT 0,
35
- applied_count INTEGER DEFAULT 0,
36
- created_at TEXT DEFAULT (datetime('now'))
37
- );
38
-
39
- CREATE TABLE IF NOT EXISTS healing_log (
40
- id TEXT PRIMARY KEY,
41
- registry_id TEXT,
42
- site_id TEXT NOT NULL,
43
- old_selector TEXT NOT NULL,
44
- new_selector TEXT,
45
- strategy TEXT,
46
- confidence REAL,
47
- success INTEGER,
48
- created_at TEXT DEFAULT (datetime('now'))
49
- );
50
-
51
- CREATE TABLE IF NOT EXISTS element_snapshots (
52
- id TEXT PRIMARY KEY,
53
- site_id TEXT NOT NULL,
54
- url TEXT NOT NULL,
55
- selector TEXT NOT NULL,
56
- snapshot TEXT DEFAULT '{}',
57
- captured_at TEXT DEFAULT (datetime('now'))
58
- );
59
-
60
- CREATE INDEX IF NOT EXISTS idx_selector_registry_site ON selector_registry(site_id);
61
- CREATE INDEX IF NOT EXISTS idx_selector_registry_action ON selector_registry(action_name);
62
- CREATE INDEX IF NOT EXISTS idx_selector_registry_site_action ON selector_registry(site_id, action_name);
63
- CREATE INDEX IF NOT EXISTS idx_selector_corrections_site ON selector_corrections(site_id);
64
- CREATE INDEX IF NOT EXISTS idx_healing_log_site ON healing_log(site_id);
65
- CREATE INDEX IF NOT EXISTS idx_element_snapshots_site ON element_snapshots(site_id);
66
- CREATE INDEX IF NOT EXISTS idx_element_snapshots_site_url ON element_snapshots(site_id, url);
67
- `);
68
-
69
- // ═══════════════════════════════════════════════════════════════════════
70
- // Prepared Statements
71
- // ═══════════════════════════════════════════════════════════════════════
72
-
73
- const stmts = {
74
- insertRegistry: db.prepare(`
75
- INSERT INTO selector_registry (id, site_id, action_name, original_selector, current_selector, selector_type, element_signature, confidence, last_verified)
76
- VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, datetime('now'))
77
- `),
78
- findRegistry: db.prepare(`
79
- SELECT * FROM selector_registry WHERE site_id = ? AND action_name = ?
80
- `),
81
- findRegistryById: db.prepare(`
82
- SELECT * FROM selector_registry WHERE id = ?
83
- `),
84
- updateRegistrySelector: db.prepare(`
85
- UPDATE selector_registry
86
- SET current_selector = ?, confidence = ?, heal_count = heal_count + 1,
87
- last_healed = datetime('now'), updated_at = datetime('now')
88
- WHERE id = ?
89
- `),
90
- updateRegistryVerified: db.prepare(`
91
- UPDATE selector_registry
92
- SET verified = ?, confidence = ?, last_verified = datetime('now'), updated_at = datetime('now')
93
- WHERE site_id = ? AND action_name = ?
94
- `),
95
- insertCorrection: db.prepare(`
96
- INSERT INTO selector_corrections (id, registry_id, site_id, old_selector, new_selector, corrected_by, reason, shared)
97
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
98
- `),
99
- updateRegistryFromCorrection: db.prepare(`
100
- UPDATE selector_registry
101
- SET current_selector = ?, confidence = 1.0, heal_count = heal_count + 1,
102
- last_healed = datetime('now'), updated_at = datetime('now')
103
- WHERE id = ?
104
- `),
105
- findSharedCorrections: db.prepare(`
106
- SELECT * FROM selector_corrections
107
- WHERE site_id = ? AND old_selector = ? AND shared = 1
108
- ORDER BY applied_count DESC
109
- `),
110
- incrementCorrectionApplied: db.prepare(`
111
- UPDATE selector_corrections SET applied_count = applied_count + 1 WHERE id = ?
112
- `),
113
- insertHealingLog: db.prepare(`
114
- INSERT INTO healing_log (id, registry_id, site_id, old_selector, new_selector, strategy, confidence, success)
115
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
116
- `),
117
- getHealingLogBySite: db.prepare(`
118
- SELECT * FROM healing_log WHERE site_id = ? ORDER BY created_at DESC LIMIT ?
119
- `),
120
- getHealingLogBySiteAction: db.prepare(`
121
- SELECT hl.* FROM healing_log hl
122
- JOIN selector_registry sr ON hl.registry_id = sr.id
123
- WHERE hl.site_id = ? AND sr.action_name = ?
124
- ORDER BY hl.created_at DESC LIMIT ?
125
- `),
126
- getSelectorsBySite: db.prepare(`
127
- SELECT * FROM selector_registry WHERE site_id = ?
128
- `),
129
- insertSnapshot: db.prepare(`
130
- INSERT INTO element_snapshots (id, site_id, url, selector, snapshot, captured_at)
131
- VALUES (?, ?, ?, ?, ?, datetime('now'))
132
- `),
133
- getLatestSnapshot: db.prepare(`
134
- SELECT * FROM element_snapshots WHERE site_id = ? AND url = ? ORDER BY captured_at DESC LIMIT 1
135
- `),
136
- };
137
-
138
- // ═══════════════════════════════════════════════════════════════════════
139
- // 1. Register Selector
140
- // ═══════════════════════════════════════════════════════════════════════
141
-
142
- function registerSelector(siteId, { actionName, selector, selectorType, elementSignature }) {
143
- const id = crypto.randomUUID();
144
- const sigJson = typeof elementSignature === 'string'
145
- ? elementSignature
146
- : JSON.stringify(elementSignature || {});
147
-
148
- stmts.insertRegistry.run(
149
- id, siteId, actionName, selector, selector,
150
- selectorType || 'css', sigJson
151
- );
152
-
153
- return { id, siteId, actionName, selector, selectorType: selectorType || 'css' };
154
- }
155
-
156
- // ═══════════════════════════════════════════════════════════════════════
157
- // 2. Capture Element Signature
158
- // ═══════════════════════════════════════════════════════════════════════
159
-
160
- function captureElementSignature(elementData) {
161
- if (!elementData) return {};
162
-
163
- const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
164
- const id = elementData.id || null;
165
-
166
- let classes = [];
167
- if (Array.isArray(elementData.classes)) {
168
- classes = elementData.classes.filter(Boolean).sort();
169
- } else if (typeof elementData.className === 'string') {
170
- classes = elementData.className.split(/\s+/).filter(Boolean).sort();
171
- }
172
-
173
- const attrs = {};
174
- const rawAttrs = elementData.attributes || elementData.attrs || {};
175
- const attrKeys = Object.keys(rawAttrs).sort();
176
- for (const key of attrKeys) {
177
- const lower = key.toLowerCase();
178
- if (lower === 'class' || lower === 'id' || lower === 'style') continue;
179
- attrs[lower] = rawAttrs[key];
180
- }
181
-
182
- const text = (elementData.text || elementData.textContent || elementData.innerText || '')
183
- .trim()
184
- .substring(0, 200);
185
-
186
- let parent = null;
187
- if (elementData.parent) {
188
- parent = {
189
- tag: (elementData.parent.tag || elementData.parent.tagName || '').toLowerCase(),
190
- id: elementData.parent.id || null,
191
- classes: Array.isArray(elementData.parent.classes)
192
- ? elementData.parent.classes.filter(Boolean).sort()
193
- : (elementData.parent.className || '').split(/\s+/).filter(Boolean).sort(),
194
- };
195
- }
196
-
197
- let siblings = [];
198
- if (Array.isArray(elementData.siblings)) {
199
- siblings = elementData.siblings.map(sib => ({
200
- tag: (sib.tag || sib.tagName || '').toLowerCase(),
201
- id: sib.id || null,
202
- classes: Array.isArray(sib.classes)
203
- ? sib.classes.filter(Boolean).sort()
204
- : (sib.className || '').split(/\s+/).filter(Boolean).sort(),
205
- }));
206
- }
207
-
208
- return {
209
- tag,
210
- id,
211
- classes,
212
- attributes: attrs,
213
- text,
214
- parent,
215
- siblings,
216
- capturedAt: new Date().toISOString(),
217
- };
218
- }
219
-
220
- // ═══════════════════════════════════════════════════════════════════════
221
- // 3. Levenshtein Distance & Text Similarity
222
- // ═══════════════════════════════════════════════════════════════════════
223
-
224
- function levenshteinDistance(a, b) {
225
- if (a === b) return 0;
226
- if (!a || !a.length) return b ? b.length : 0;
227
- if (!b || !b.length) return a.length;
228
-
229
- const m = a.length;
230
- const n = b.length;
231
- const dp = new Array(m + 1);
232
-
233
- for (let i = 0; i <= m; i++) {
234
- dp[i] = new Array(n + 1);
235
- dp[i][0] = i;
236
- }
237
- for (let j = 0; j <= n; j++) {
238
- dp[0][j] = j;
239
- }
240
-
241
- for (let i = 1; i <= m; i++) {
242
- for (let j = 1; j <= n; j++) {
243
- const cost = a[i - 1] === b[j - 1] ? 0 : 1;
244
- dp[i][j] = Math.min(
245
- dp[i - 1][j] + 1,
246
- dp[i][j - 1] + 1,
247
- dp[i - 1][j - 1] + cost
248
- );
249
- }
250
- }
251
-
252
- return dp[m][n];
253
- }
254
-
255
- function textSimilarity(a, b) {
256
- if (!a && !b) return 1;
257
- if (!a || !b) return 0;
258
- const maxLen = Math.max(a.length, b.length);
259
- if (maxLen === 0) return 1;
260
- return 1 - (levenshteinDistance(a, b) / maxLen);
261
- }
262
-
263
- // ═══════════════════════════════════════════════════════════════════════
264
- // 4. Heal Selector (Core Algorithm)
265
- // ═══════════════════════════════════════════════════════════════════════
266
-
267
- function healSelector(siteId, actionName, failedSelector, pageElements) {
268
- const registry = stmts.findRegistry.get(siteId, actionName);
269
- const registryId = registry ? registry.id : null;
270
-
271
- let storedSignature = {};
272
- if (registry) {
273
- try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { /* empty */ }
274
- }
275
-
276
- if (!Array.isArray(pageElements) || pageElements.length === 0) {
277
- const logId = crypto.randomUUID();
278
- stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
279
- return { healed: false, newSelector: null, strategy: null, confidence: 0 };
280
- }
281
-
282
- const candidates = [];
283
-
284
- for (const el of pageElements) {
285
- const sig = captureElementSignature(el);
286
-
287
- // Strategy 1: attribute_match — data-* attributes and aria-label
288
- const storedAttrs = storedSignature.attributes || {};
289
- const elAttrs = sig.attributes || {};
290
- let attrMatches = 0;
291
- let attrTotal = 0;
292
- for (const key of Object.keys(storedAttrs)) {
293
- if (key.startsWith('data-') || key === 'aria-label' || key.startsWith('aria-')) {
294
- attrTotal++;
295
- if (elAttrs[key] === storedAttrs[key]) attrMatches++;
296
- }
297
- }
298
- if (attrTotal > 0 && attrMatches > 0) {
299
- const conf = attrMatches / attrTotal;
300
- candidates.push({
301
- element: el,
302
- signature: sig,
303
- strategy: 'attribute_match',
304
- confidence: conf * 0.95,
305
- selector: buildCSSPath(el),
306
- });
307
- }
308
-
309
- // Strategy 2: id_match — partial ID matching
310
- if (storedSignature.id && sig.id) {
311
- const sim = textSimilarity(storedSignature.id, sig.id);
312
- if (sim > 0.5) {
313
- candidates.push({
314
- element: el,
315
- signature: sig,
316
- strategy: 'id_match',
317
- confidence: sim * 0.98,
318
- selector: `#${sig.id}`,
319
- });
320
- }
321
- }
322
-
323
- // Strategy 3: text_match — Levenshtein-based text similarity
324
- if (storedSignature.text && sig.text) {
325
- const sim = textSimilarity(
326
- storedSignature.text.toLowerCase(),
327
- sig.text.toLowerCase()
328
- );
329
- if (sim > 0.5) {
330
- candidates.push({
331
- element: el,
332
- signature: sig,
333
- strategy: 'text_match',
334
- confidence: sim * 0.85,
335
- selector: buildCSSPath(el),
336
- });
337
- }
338
- }
339
-
340
- // Strategy 4: structural_match — parent>nth-child path matching
341
- if (storedSignature.parent && sig.parent) {
342
- let structScore = 0;
343
- let structChecks = 0;
344
-
345
- if (storedSignature.tag && sig.tag) {
346
- structChecks++;
347
- if (storedSignature.tag === sig.tag) structScore++;
348
- }
349
-
350
- structChecks++;
351
- if (storedSignature.parent.tag === sig.parent.tag) structScore++;
352
-
353
- if (storedSignature.parent.id && sig.parent.id) {
354
- structChecks++;
355
- if (storedSignature.parent.id === sig.parent.id) structScore++;
356
- }
357
-
358
- const storedParentClasses = storedSignature.parent.classes || [];
359
- const elParentClasses = sig.parent.classes || [];
360
- if (storedParentClasses.length > 0) {
361
- structChecks++;
362
- const overlap = storedParentClasses.filter(c => elParentClasses.includes(c));
363
- structScore += overlap.length / storedParentClasses.length;
364
- }
365
-
366
- if (storedSignature.siblings && sig.siblings) {
367
- structChecks++;
368
- const storedSibTags = storedSignature.siblings.map(s => s.tag).sort();
369
- const elSibTags = sig.siblings.map(s => s.tag).sort();
370
- const sibSim = textSimilarity(storedSibTags.join(','), elSibTags.join(','));
371
- structScore += sibSim;
372
- }
373
-
374
- if (structChecks > 0) {
375
- const conf = structScore / structChecks;
376
- if (conf > 0.4) {
377
- const nthChild = el.index != null ? el.index + 1 : 1;
378
- const parentSel = sig.parent.id
379
- ? `#${sig.parent.id}`
380
- : sig.parent.tag;
381
- candidates.push({
382
- element: el,
383
- signature: sig,
384
- strategy: 'structural_match',
385
- confidence: conf * 0.80,
386
- selector: `${parentSel} > ${sig.tag}:nth-child(${nthChild})`,
387
- });
388
- }
389
- }
390
- }
391
-
392
- // Strategy 5: class_match — overlapping CSS classes
393
- const storedClasses = storedSignature.classes || [];
394
- const elClasses = sig.classes || [];
395
- if (storedClasses.length > 0 && elClasses.length > 0) {
396
- const overlap = storedClasses.filter(c => elClasses.includes(c));
397
- if (overlap.length > 0) {
398
- const conf = overlap.length / Math.max(storedClasses.length, elClasses.length);
399
- candidates.push({
400
- element: el,
401
- signature: sig,
402
- strategy: 'class_match',
403
- confidence: conf * 0.75,
404
- selector: `${sig.tag || '*'}.${overlap.join('.')}`,
405
- });
406
- }
407
- }
408
- }
409
-
410
- // Strategy 6: community_match — known corrections
411
- const communityCorrections = stmts.findSharedCorrections.all(siteId, failedSelector);
412
- for (const corr of communityCorrections) {
413
- const appliedBoost = Math.min(corr.applied_count * 0.02, 0.15);
414
- candidates.push({
415
- element: null,
416
- signature: null,
417
- strategy: 'community_match',
418
- confidence: 0.70 + appliedBoost,
419
- selector: corr.new_selector,
420
- correctionId: corr.id,
421
- });
422
- }
423
-
424
- // Pick highest confidence above threshold
425
- candidates.sort((a, b) => b.confidence - a.confidence);
426
- const best = candidates.find(c => c.confidence >= 0.6);
427
-
428
- if (!best) {
429
- const logId = crypto.randomUUID();
430
- stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
431
- return { healed: false, newSelector: null, strategy: null, confidence: 0 };
432
- }
433
-
434
- const logId = crypto.randomUUID();
435
- stmts.insertHealingLog.run(
436
- logId, registryId, siteId, failedSelector,
437
- best.selector, best.strategy, best.confidence, 1
438
- );
439
-
440
- if (registryId) {
441
- stmts.updateRegistrySelector.run(best.selector, best.confidence, registryId);
442
- }
443
-
444
- if (best.correctionId) {
445
- stmts.incrementCorrectionApplied.run(best.correctionId);
446
- }
447
-
448
- return {
449
- healed: true,
450
- newSelector: best.selector,
451
- strategy: best.strategy,
452
- confidence: Math.round(best.confidence * 1000) / 1000,
453
- };
454
- }
455
-
456
- // ═══════════════════════════════════════════════════════════════════════
457
- // 5. Submit Correction
458
- // ═══════════════════════════════════════════════════════════════════════
459
-
460
- function submitCorrection(siteId, registryId, { oldSelector, newSelector, correctedBy, reason, shared }) {
461
- const id = crypto.randomUUID();
462
- stmts.insertCorrection.run(
463
- id, registryId || null, siteId,
464
- oldSelector, newSelector,
465
- correctedBy || 'user',
466
- reason || null,
467
- shared ? 1 : 0
468
- );
469
-
470
- if (registryId) {
471
- const reg = stmts.findRegistryById.get(registryId);
472
- if (reg) {
473
- stmts.updateRegistryFromCorrection.run(newSelector, registryId);
474
- }
475
- }
476
-
477
- return { id, siteId, registryId, oldSelector, newSelector };
478
- }
479
-
480
- // ═══════════════════════════════════════════════════════════════════════
481
- // 6. Community Suggestions
482
- // ═══════════════════════════════════════════════════════════════════════
483
-
484
- function getCommunitySuggestions(siteId, failedSelector) {
485
- return stmts.findSharedCorrections.all(siteId, failedSelector);
486
- }
487
-
488
- // ═══════════════════════════════════════════════════════════════════════
489
- // 7. Verify Selector
490
- // ═══════════════════════════════════════════════════════════════════════
491
-
492
- function verifySelector(siteId, actionName, elementData) {
493
- const registry = stmts.findRegistry.get(siteId, actionName);
494
- if (!registry) {
495
- return { valid: false, confidence: 0, drift: null, error: 'Selector not registered' };
496
- }
497
-
498
- let storedSignature;
499
- try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { storedSignature = {}; }
500
-
501
- const currentSignature = captureElementSignature(elementData);
502
-
503
- let totalChecks = 0;
504
- let matchScore = 0;
505
- const driftDetails = {};
506
-
507
- if (storedSignature.tag) {
508
- totalChecks++;
509
- if (storedSignature.tag === currentSignature.tag) {
510
- matchScore++;
511
- } else {
512
- driftDetails.tag = { expected: storedSignature.tag, actual: currentSignature.tag };
513
- }
514
- }
515
-
516
- if (storedSignature.id) {
517
- totalChecks++;
518
- if (storedSignature.id === currentSignature.id) {
519
- matchScore++;
520
- } else {
521
- driftDetails.id = { expected: storedSignature.id, actual: currentSignature.id };
522
- }
523
- }
524
-
525
- const storedClasses = storedSignature.classes || [];
526
- const currentClasses = currentSignature.classes || [];
527
- if (storedClasses.length > 0) {
528
- totalChecks++;
529
- const overlap = storedClasses.filter(c => currentClasses.includes(c));
530
- const classRatio = overlap.length / storedClasses.length;
531
- matchScore += classRatio;
532
- if (classRatio < 1) {
533
- const removed = storedClasses.filter(c => !currentClasses.includes(c));
534
- const added = currentClasses.filter(c => !storedClasses.includes(c));
535
- driftDetails.classes = { removed, added, overlapRatio: classRatio };
536
- }
537
- }
538
-
539
- const storedAttrs = storedSignature.attributes || {};
540
- const currentAttrs = currentSignature.attributes || {};
541
- const allAttrKeys = [...new Set([...Object.keys(storedAttrs), ...Object.keys(currentAttrs)])];
542
- if (allAttrKeys.length > 0) {
543
- totalChecks++;
544
- let attrMatch = 0;
545
- const changedAttrs = {};
546
- for (const key of allAttrKeys) {
547
- if (storedAttrs[key] === currentAttrs[key]) {
548
- attrMatch++;
549
- } else {
550
- changedAttrs[key] = { expected: storedAttrs[key] || null, actual: currentAttrs[key] || null };
551
- }
552
- }
553
- matchScore += attrMatch / allAttrKeys.length;
554
- if (Object.keys(changedAttrs).length > 0) {
555
- driftDetails.attributes = changedAttrs;
556
- }
557
- }
558
-
559
- if (storedSignature.text) {
560
- totalChecks++;
561
- const sim = textSimilarity(
562
- storedSignature.text.toLowerCase(),
563
- (currentSignature.text || '').toLowerCase()
564
- );
565
- matchScore += sim;
566
- if (sim < 0.95) {
567
- driftDetails.text = {
568
- expected: storedSignature.text.substring(0, 50),
569
- actual: (currentSignature.text || '').substring(0, 50),
570
- similarity: Math.round(sim * 1000) / 1000,
571
- };
572
- }
573
- }
574
-
575
- const confidence = totalChecks > 0 ? matchScore / totalChecks : 0;
576
- const valid = confidence >= 0.7;
577
- const hasDrift = Object.keys(driftDetails).length > 0;
578
-
579
- stmts.updateRegistryVerified.run(valid ? 1 : 0, confidence, siteId, actionName);
580
-
581
- return {
582
- valid,
583
- confidence: Math.round(confidence * 1000) / 1000,
584
- drift: hasDrift ? driftDetails : null,
585
- };
586
- }
587
-
588
- // ═══════════════════════════════════════════════════════════════════════
589
- // 8. Selector Health
590
- // ═══════════════════════════════════════════════════════════════════════
591
-
592
- function getSelectorHealth(siteId) {
593
- const all = stmts.getSelectorsBySite.all(siteId);
594
- const total = all.length;
595
-
596
- if (total === 0) {
597
- return { total: 0, verified: 0, healed: 0, avgConfidence: 0, broken: [] };
598
- }
599
-
600
- let verifiedCount = 0;
601
- let healedCount = 0;
602
- let confidenceSum = 0;
603
- const broken = [];
604
-
605
- for (const row of all) {
606
- confidenceSum += row.confidence;
607
- if (row.verified) verifiedCount++;
608
- if (row.heal_count > 0) healedCount++;
609
- if (!row.verified || row.confidence < 0.6) {
610
- broken.push({
611
- id: row.id,
612
- actionName: row.action_name,
613
- currentSelector: row.current_selector,
614
- confidence: row.confidence,
615
- healCount: row.heal_count,
616
- lastHealed: row.last_healed,
617
- });
618
- }
619
- }
620
-
621
- return {
622
- total,
623
- verified: verifiedCount,
624
- healed: healedCount,
625
- avgConfidence: Math.round((confidenceSum / total) * 1000) / 1000,
626
- broken,
627
- };
628
- }
629
-
630
- // ═══════════════════════════════════════════════════════════════════════
631
- // 9. Healing History
632
- // ═══════════════════════════════════════════════════════════════════════
633
-
634
- function getHealingHistory(siteId, { limit, actionName } = {}) {
635
- const lim = limit || 50;
636
- if (actionName) {
637
- return stmts.getHealingLogBySiteAction.all(siteId, actionName, lim);
638
- }
639
- return stmts.getHealingLogBySite.all(siteId, lim);
640
- }
641
-
642
- // ═══════════════════════════════════════════════════════════════════════
643
- // 10. Snapshot Elements
644
- // ═══════════════════════════════════════════════════════════════════════
645
-
646
- function snapshotElements(siteId, url, elements) {
647
- const id = crypto.randomUUID();
648
- const snapshotData = Array.isArray(elements)
649
- ? elements.map(el => captureElementSignature(el))
650
- : [];
651
-
652
- stmts.insertSnapshot.run(
653
- id, siteId, url, '*',
654
- JSON.stringify(snapshotData)
655
- );
656
-
657
- return { id, siteId, url, elementCount: snapshotData.length };
658
- }
659
-
660
- // ═══════════════════════════════════════════════════════════════════════
661
- // 11. Detect Drift
662
- // ═══════════════════════════════════════════════════════════════════════
663
-
664
- function detectDrift(siteId, url, currentElements) {
665
- const lastSnapshot = stmts.getLatestSnapshot.get(siteId, url);
666
- if (!lastSnapshot) {
667
- return { hasDrift: false, message: 'No previous snapshot found', changed: [], added: [], removed: [] };
668
- }
669
-
670
- let previousElements;
671
- try { previousElements = JSON.parse(lastSnapshot.snapshot || '[]'); } catch { previousElements = []; }
672
-
673
- const currentSigs = (Array.isArray(currentElements) ? currentElements : [])
674
- .map(el => captureElementSignature(el));
675
-
676
- function fingerprint(sig) {
677
- return `${sig.tag || ''}|${sig.id || ''}|${(sig.classes || []).join(',')}|${sig.text || ''}`;
678
- }
679
-
680
- const prevFingerprints = new Map();
681
- for (let i = 0; i < previousElements.length; i++) {
682
- prevFingerprints.set(fingerprint(previousElements[i]), i);
683
- }
684
-
685
- const currFingerprints = new Map();
686
- for (let i = 0; i < currentSigs.length; i++) {
687
- currFingerprints.set(fingerprint(currentSigs[i]), i);
688
- }
689
-
690
- const changed = [];
691
- const added = [];
692
- const removed = [];
693
-
694
- for (const [fp, idx] of currFingerprints) {
695
- if (!prevFingerprints.has(fp)) {
696
- const prevBySamePosTag = previousElements[idx];
697
- if (prevBySamePosTag && prevBySamePosTag.tag === currentSigs[idx].tag) {
698
- const sim = textSimilarity(fingerprint(prevBySamePosTag), fp);
699
- if (sim > 0.3 && sim < 1.0) {
700
- changed.push({
701
- index: idx,
702
- previous: previousElements[idx],
703
- current: currentSigs[idx],
704
- similarity: Math.round(sim * 1000) / 1000,
705
- });
706
- continue;
707
- }
708
- }
709
- added.push({ index: idx, element: currentSigs[idx] });
710
- }
711
- }
712
-
713
- for (const [fp, idx] of prevFingerprints) {
714
- if (!currFingerprints.has(fp)) {
715
- const alreadyChanged = changed.some(c => c.index === idx);
716
- if (!alreadyChanged) {
717
- removed.push({ index: idx, element: previousElements[idx] });
718
- }
719
- }
720
- }
721
-
722
- return {
723
- hasDrift: changed.length > 0 || added.length > 0 || removed.length > 0,
724
- snapshotDate: lastSnapshot.captured_at,
725
- changed,
726
- added,
727
- removed,
728
- summary: {
729
- changedCount: changed.length,
730
- addedCount: added.length,
731
- removedCount: removed.length,
732
- },
733
- };
734
- }
735
-
736
- // ═══════════════════════════════════════════════════════════════════════
737
- // 12. Build CSS Path
738
- // ═══════════════════════════════════════════════════════════════════════
739
-
740
- function buildCSSPath(elementData) {
741
- if (!elementData) return '*';
742
-
743
- const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
744
- const id = elementData.id;
745
-
746
- if (id) return `#${id}`;
747
-
748
- let selector = tag || '*';
749
-
750
- let classes = [];
751
- if (Array.isArray(elementData.classes)) {
752
- classes = elementData.classes.filter(Boolean);
753
- } else if (typeof elementData.className === 'string') {
754
- classes = elementData.className.split(/\s+/).filter(Boolean);
755
- }
756
- if (classes.length > 0) {
757
- selector += '.' + classes.join('.');
758
- }
759
-
760
- const attrs = elementData.attributes || elementData.attrs || {};
761
- for (const key of Object.keys(attrs)) {
762
- const lower = key.toLowerCase();
763
- if (lower === 'class' || lower === 'id' || lower === 'style') continue;
764
- const val = attrs[key];
765
- if (val != null && val !== '') {
766
- selector += `[${lower}="${val.replace(/"/g, '\\"')}"]`;
767
- } else if (val === '' || val == null) {
768
- selector += `[${lower}]`;
769
- }
770
- }
771
-
772
- return selector;
773
- }
774
-
775
- // ═══════════════════════════════════════════════════════════════════════
776
- // 13. Build XPath
777
- // ═══════════════════════════════════════════════════════════════════════
778
-
779
- function buildXPath(elementData) {
780
- if (!elementData) return '//*';
781
-
782
- const tag = (elementData.tag || elementData.tagName || '').toLowerCase() || '*';
783
- const id = elementData.id;
784
-
785
- if (id) return `//${tag}[@id="${id}"]`;
786
-
787
- const predicates = [];
788
-
789
- let classes = [];
790
- if (Array.isArray(elementData.classes)) {
791
- classes = elementData.classes.filter(Boolean);
792
- } else if (typeof elementData.className === 'string') {
793
- classes = elementData.className.split(/\s+/).filter(Boolean);
794
- }
795
- for (const cls of classes) {
796
- predicates.push(`contains(@class, "${cls}")`);
797
- }
798
-
799
- const attrs = elementData.attributes || elementData.attrs || {};
800
- for (const key of Object.keys(attrs)) {
801
- const lower = key.toLowerCase();
802
- if (lower === 'class' || lower === 'id' || lower === 'style') continue;
803
- const val = attrs[key];
804
- if (val != null && val !== '') {
805
- predicates.push(`@${lower}="${val}"`);
806
- } else {
807
- predicates.push(`@${lower}`);
808
- }
809
- }
810
-
811
- const text = (elementData.text || elementData.textContent || elementData.innerText || '').trim();
812
- if (text && text.length <= 80) {
813
- if (!text.includes('"')) {
814
- predicates.push(`normalize-space(text())="${text}"`);
815
- } else if (!text.includes("'")) {
816
- predicates.push(`normalize-space(text())='${text}'`);
817
- }
818
- }
819
-
820
- if (predicates.length === 0) return `//${tag}`;
821
- return `//${tag}[${predicates.join(' and ')}]`;
822
- }
823
-
824
- // ═══════════════════════════════════════════════════════════════════════
825
- // Exports
826
- // ═══════════════════════════════════════════════════════════════════════
827
-
828
- module.exports = {
829
- registerSelector,
830
- captureElementSignature,
831
- healSelector,
832
- levenshteinDistance,
833
- textSimilarity,
834
- submitCorrection,
835
- getCommunitySuggestions,
836
- verifySelector,
837
- getSelectorHealth,
838
- getHealingHistory,
839
- snapshotElements,
840
- detectDrift,
841
- buildCSSPath,
842
- buildXPath,
843
- };
1
+ const { db } = require('../models/db');
2
+ const crypto = require('crypto');
3
+
4
+ // ═══════════════════════════════════════════════════════════════════════
5
+ // Schema
6
+ // ═══════════════════════════════════════════════════════════════════════
7
+
8
+ db.exec(`
9
+ CREATE TABLE IF NOT EXISTS selector_registry (
10
+ id TEXT PRIMARY KEY,
11
+ site_id TEXT NOT NULL,
12
+ action_name TEXT NOT NULL,
13
+ original_selector TEXT NOT NULL,
14
+ current_selector TEXT NOT NULL,
15
+ selector_type TEXT,
16
+ element_signature TEXT DEFAULT '{}',
17
+ confidence REAL DEFAULT 1.0,
18
+ verified INTEGER DEFAULT 1,
19
+ heal_count INTEGER DEFAULT 0,
20
+ last_verified TEXT,
21
+ last_healed TEXT,
22
+ created_at TEXT DEFAULT (datetime('now')),
23
+ updated_at TEXT DEFAULT (datetime('now'))
24
+ );
25
+
26
+ CREATE TABLE IF NOT EXISTS selector_corrections (
27
+ id TEXT PRIMARY KEY,
28
+ registry_id TEXT,
29
+ site_id TEXT NOT NULL,
30
+ old_selector TEXT NOT NULL,
31
+ new_selector TEXT NOT NULL,
32
+ corrected_by TEXT,
33
+ reason TEXT,
34
+ shared INTEGER DEFAULT 0,
35
+ applied_count INTEGER DEFAULT 0,
36
+ created_at TEXT DEFAULT (datetime('now'))
37
+ );
38
+
39
+ CREATE TABLE IF NOT EXISTS healing_log (
40
+ id TEXT PRIMARY KEY,
41
+ registry_id TEXT,
42
+ site_id TEXT NOT NULL,
43
+ old_selector TEXT NOT NULL,
44
+ new_selector TEXT,
45
+ strategy TEXT,
46
+ confidence REAL,
47
+ success INTEGER,
48
+ created_at TEXT DEFAULT (datetime('now'))
49
+ );
50
+
51
+ CREATE TABLE IF NOT EXISTS element_snapshots (
52
+ id TEXT PRIMARY KEY,
53
+ site_id TEXT NOT NULL,
54
+ url TEXT NOT NULL,
55
+ selector TEXT NOT NULL,
56
+ snapshot TEXT DEFAULT '{}',
57
+ captured_at TEXT DEFAULT (datetime('now'))
58
+ );
59
+
60
+ CREATE INDEX IF NOT EXISTS idx_selector_registry_site ON selector_registry(site_id);
61
+ CREATE INDEX IF NOT EXISTS idx_selector_registry_action ON selector_registry(action_name);
62
+ CREATE INDEX IF NOT EXISTS idx_selector_registry_site_action ON selector_registry(site_id, action_name);
63
+ CREATE INDEX IF NOT EXISTS idx_selector_corrections_site ON selector_corrections(site_id);
64
+ CREATE INDEX IF NOT EXISTS idx_healing_log_site ON healing_log(site_id);
65
+ CREATE INDEX IF NOT EXISTS idx_element_snapshots_site ON element_snapshots(site_id);
66
+ CREATE INDEX IF NOT EXISTS idx_element_snapshots_site_url ON element_snapshots(site_id, url);
67
+ `);
68
+
69
+ // ═══════════════════════════════════════════════════════════════════════
70
+ // Prepared Statements
71
+ // ═══════════════════════════════════════════════════════════════════════
72
+
73
+ const stmts = {
74
+ insertRegistry: db.prepare(`
75
+ INSERT INTO selector_registry (id, site_id, action_name, original_selector, current_selector, selector_type, element_signature, confidence, last_verified)
76
+ VALUES (?, ?, ?, ?, ?, ?, ?, 1.0, datetime('now'))
77
+ `),
78
+ findRegistry: db.prepare(`
79
+ SELECT * FROM selector_registry WHERE site_id = ? AND action_name = ?
80
+ `),
81
+ findRegistryById: db.prepare(`
82
+ SELECT * FROM selector_registry WHERE id = ?
83
+ `),
84
+ updateRegistrySelector: db.prepare(`
85
+ UPDATE selector_registry
86
+ SET current_selector = ?, confidence = ?, heal_count = heal_count + 1,
87
+ last_healed = datetime('now'), updated_at = datetime('now')
88
+ WHERE id = ?
89
+ `),
90
+ updateRegistryVerified: db.prepare(`
91
+ UPDATE selector_registry
92
+ SET verified = ?, confidence = ?, last_verified = datetime('now'), updated_at = datetime('now')
93
+ WHERE site_id = ? AND action_name = ?
94
+ `),
95
+ insertCorrection: db.prepare(`
96
+ INSERT INTO selector_corrections (id, registry_id, site_id, old_selector, new_selector, corrected_by, reason, shared)
97
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
98
+ `),
99
+ updateRegistryFromCorrection: db.prepare(`
100
+ UPDATE selector_registry
101
+ SET current_selector = ?, confidence = 1.0, heal_count = heal_count + 1,
102
+ last_healed = datetime('now'), updated_at = datetime('now')
103
+ WHERE id = ?
104
+ `),
105
+ findSharedCorrections: db.prepare(`
106
+ SELECT * FROM selector_corrections
107
+ WHERE site_id = ? AND old_selector = ? AND shared = 1
108
+ ORDER BY applied_count DESC
109
+ `),
110
+ incrementCorrectionApplied: db.prepare(`
111
+ UPDATE selector_corrections SET applied_count = applied_count + 1 WHERE id = ?
112
+ `),
113
+ insertHealingLog: db.prepare(`
114
+ INSERT INTO healing_log (id, registry_id, site_id, old_selector, new_selector, strategy, confidence, success)
115
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
116
+ `),
117
+ getHealingLogBySite: db.prepare(`
118
+ SELECT * FROM healing_log WHERE site_id = ? ORDER BY created_at DESC LIMIT ?
119
+ `),
120
+ getHealingLogBySiteAction: db.prepare(`
121
+ SELECT hl.* FROM healing_log hl
122
+ JOIN selector_registry sr ON hl.registry_id = sr.id
123
+ WHERE hl.site_id = ? AND sr.action_name = ?
124
+ ORDER BY hl.created_at DESC LIMIT ?
125
+ `),
126
+ getSelectorsBySite: db.prepare(`
127
+ SELECT * FROM selector_registry WHERE site_id = ?
128
+ `),
129
+ insertSnapshot: db.prepare(`
130
+ INSERT INTO element_snapshots (id, site_id, url, selector, snapshot, captured_at)
131
+ VALUES (?, ?, ?, ?, ?, datetime('now'))
132
+ `),
133
+ getLatestSnapshot: db.prepare(`
134
+ SELECT * FROM element_snapshots WHERE site_id = ? AND url = ? ORDER BY captured_at DESC LIMIT 1
135
+ `),
136
+ };
137
+
138
+ // ═══════════════════════════════════════════════════════════════════════
139
+ // 1. Register Selector
140
+ // ═══════════════════════════════════════════════════════════════════════
141
+
142
+ function registerSelector(siteId, { actionName, selector, selectorType, elementSignature }) {
143
+ const id = crypto.randomUUID();
144
+ const sigJson = typeof elementSignature === 'string'
145
+ ? elementSignature
146
+ : JSON.stringify(elementSignature || {});
147
+
148
+ stmts.insertRegistry.run(
149
+ id, siteId, actionName, selector, selector,
150
+ selectorType || 'css', sigJson
151
+ );
152
+
153
+ return { id, siteId, actionName, selector, selectorType: selectorType || 'css' };
154
+ }
155
+
156
+ // ═══════════════════════════════════════════════════════════════════════
157
+ // 2. Capture Element Signature
158
+ // ═══════════════════════════════════════════════════════════════════════
159
+
160
+ function captureElementSignature(elementData) {
161
+ if (!elementData) return {};
162
+
163
+ const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
164
+ const id = elementData.id || null;
165
+
166
+ let classes = [];
167
+ if (Array.isArray(elementData.classes)) {
168
+ classes = elementData.classes.filter(Boolean).sort();
169
+ } else if (typeof elementData.className === 'string') {
170
+ classes = elementData.className.split(/\s+/).filter(Boolean).sort();
171
+ }
172
+
173
+ const attrs = {};
174
+ const rawAttrs = elementData.attributes || elementData.attrs || {};
175
+ const attrKeys = Object.keys(rawAttrs).sort();
176
+ for (const key of attrKeys) {
177
+ const lower = key.toLowerCase();
178
+ if (lower === 'class' || lower === 'id' || lower === 'style') continue;
179
+ attrs[lower] = rawAttrs[key];
180
+ }
181
+
182
+ const text = (elementData.text || elementData.textContent || elementData.innerText || '')
183
+ .trim()
184
+ .substring(0, 200);
185
+
186
+ let parent = null;
187
+ if (elementData.parent) {
188
+ parent = {
189
+ tag: (elementData.parent.tag || elementData.parent.tagName || '').toLowerCase(),
190
+ id: elementData.parent.id || null,
191
+ classes: Array.isArray(elementData.parent.classes)
192
+ ? elementData.parent.classes.filter(Boolean).sort()
193
+ : (elementData.parent.className || '').split(/\s+/).filter(Boolean).sort(),
194
+ };
195
+ }
196
+
197
+ let siblings = [];
198
+ if (Array.isArray(elementData.siblings)) {
199
+ siblings = elementData.siblings.map(sib => ({
200
+ tag: (sib.tag || sib.tagName || '').toLowerCase(),
201
+ id: sib.id || null,
202
+ classes: Array.isArray(sib.classes)
203
+ ? sib.classes.filter(Boolean).sort()
204
+ : (sib.className || '').split(/\s+/).filter(Boolean).sort(),
205
+ }));
206
+ }
207
+
208
+ return {
209
+ tag,
210
+ id,
211
+ classes,
212
+ attributes: attrs,
213
+ text,
214
+ parent,
215
+ siblings,
216
+ capturedAt: new Date().toISOString(),
217
+ };
218
+ }
219
+
220
+ // ═══════════════════════════════════════════════════════════════════════
221
+ // 3. Levenshtein Distance & Text Similarity
222
+ // ═══════════════════════════════════════════════════════════════════════
223
+
224
+ function levenshteinDistance(a, b) {
225
+ if (a === b) return 0;
226
+ if (!a || !a.length) return b ? b.length : 0;
227
+ if (!b || !b.length) return a.length;
228
+
229
+ const m = a.length;
230
+ const n = b.length;
231
+ const dp = new Array(m + 1);
232
+
233
+ for (let i = 0; i <= m; i++) {
234
+ dp[i] = new Array(n + 1);
235
+ dp[i][0] = i;
236
+ }
237
+ for (let j = 0; j <= n; j++) {
238
+ dp[0][j] = j;
239
+ }
240
+
241
+ for (let i = 1; i <= m; i++) {
242
+ for (let j = 1; j <= n; j++) {
243
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
244
+ dp[i][j] = Math.min(
245
+ dp[i - 1][j] + 1,
246
+ dp[i][j - 1] + 1,
247
+ dp[i - 1][j - 1] + cost
248
+ );
249
+ }
250
+ }
251
+
252
+ return dp[m][n];
253
+ }
254
+
255
+ function textSimilarity(a, b) {
256
+ if (!a && !b) return 1;
257
+ if (!a || !b) return 0;
258
+ const maxLen = Math.max(a.length, b.length);
259
+ if (maxLen === 0) return 1;
260
+ return 1 - (levenshteinDistance(a, b) / maxLen);
261
+ }
262
+
263
+ // ═══════════════════════════════════════════════════════════════════════
264
+ // 4. Heal Selector (Core Algorithm)
265
+ // ═══════════════════════════════════════════════════════════════════════
266
+
267
+ function healSelector(siteId, actionName, failedSelector, pageElements) {
268
+ const registry = stmts.findRegistry.get(siteId, actionName);
269
+ const registryId = registry ? registry.id : null;
270
+
271
+ let storedSignature = {};
272
+ if (registry) {
273
+ try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { /* empty */ }
274
+ }
275
+
276
+ if (!Array.isArray(pageElements) || pageElements.length === 0) {
277
+ const logId = crypto.randomUUID();
278
+ stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
279
+ return { healed: false, newSelector: null, strategy: null, confidence: 0 };
280
+ }
281
+
282
+ const candidates = [];
283
+
284
+ for (const el of pageElements) {
285
+ const sig = captureElementSignature(el);
286
+
287
+ // Strategy 1: attribute_match — data-* attributes and aria-label
288
+ const storedAttrs = storedSignature.attributes || {};
289
+ const elAttrs = sig.attributes || {};
290
+ let attrMatches = 0;
291
+ let attrTotal = 0;
292
+ for (const key of Object.keys(storedAttrs)) {
293
+ if (key.startsWith('data-') || key === 'aria-label' || key.startsWith('aria-')) {
294
+ attrTotal++;
295
+ if (elAttrs[key] === storedAttrs[key]) attrMatches++;
296
+ }
297
+ }
298
+ if (attrTotal > 0 && attrMatches > 0) {
299
+ const conf = attrMatches / attrTotal;
300
+ candidates.push({
301
+ element: el,
302
+ signature: sig,
303
+ strategy: 'attribute_match',
304
+ confidence: conf * 0.95,
305
+ selector: buildCSSPath(el),
306
+ });
307
+ }
308
+
309
+ // Strategy 2: id_match — partial ID matching
310
+ if (storedSignature.id && sig.id) {
311
+ const sim = textSimilarity(storedSignature.id, sig.id);
312
+ if (sim > 0.5) {
313
+ candidates.push({
314
+ element: el,
315
+ signature: sig,
316
+ strategy: 'id_match',
317
+ confidence: sim * 0.98,
318
+ selector: `#${sig.id}`,
319
+ });
320
+ }
321
+ }
322
+
323
+ // Strategy 3: text_match — Levenshtein-based text similarity
324
+ if (storedSignature.text && sig.text) {
325
+ const sim = textSimilarity(
326
+ storedSignature.text.toLowerCase(),
327
+ sig.text.toLowerCase()
328
+ );
329
+ if (sim > 0.5) {
330
+ candidates.push({
331
+ element: el,
332
+ signature: sig,
333
+ strategy: 'text_match',
334
+ confidence: sim * 0.85,
335
+ selector: buildCSSPath(el),
336
+ });
337
+ }
338
+ }
339
+
340
+ // Strategy 4: structural_match — parent>nth-child path matching
341
+ if (storedSignature.parent && sig.parent) {
342
+ let structScore = 0;
343
+ let structChecks = 0;
344
+
345
+ if (storedSignature.tag && sig.tag) {
346
+ structChecks++;
347
+ if (storedSignature.tag === sig.tag) structScore++;
348
+ }
349
+
350
+ structChecks++;
351
+ if (storedSignature.parent.tag === sig.parent.tag) structScore++;
352
+
353
+ if (storedSignature.parent.id && sig.parent.id) {
354
+ structChecks++;
355
+ if (storedSignature.parent.id === sig.parent.id) structScore++;
356
+ }
357
+
358
+ const storedParentClasses = storedSignature.parent.classes || [];
359
+ const elParentClasses = sig.parent.classes || [];
360
+ if (storedParentClasses.length > 0) {
361
+ structChecks++;
362
+ const overlap = storedParentClasses.filter(c => elParentClasses.includes(c));
363
+ structScore += overlap.length / storedParentClasses.length;
364
+ }
365
+
366
+ if (storedSignature.siblings && sig.siblings) {
367
+ structChecks++;
368
+ const storedSibTags = storedSignature.siblings.map(s => s.tag).sort();
369
+ const elSibTags = sig.siblings.map(s => s.tag).sort();
370
+ const sibSim = textSimilarity(storedSibTags.join(','), elSibTags.join(','));
371
+ structScore += sibSim;
372
+ }
373
+
374
+ if (structChecks > 0) {
375
+ const conf = structScore / structChecks;
376
+ if (conf > 0.4) {
377
+ const nthChild = el.index != null ? el.index + 1 : 1;
378
+ const parentSel = sig.parent.id
379
+ ? `#${sig.parent.id}`
380
+ : sig.parent.tag;
381
+ candidates.push({
382
+ element: el,
383
+ signature: sig,
384
+ strategy: 'structural_match',
385
+ confidence: conf * 0.80,
386
+ selector: `${parentSel} > ${sig.tag}:nth-child(${nthChild})`,
387
+ });
388
+ }
389
+ }
390
+ }
391
+
392
+ // Strategy 5: class_match — overlapping CSS classes
393
+ const storedClasses = storedSignature.classes || [];
394
+ const elClasses = sig.classes || [];
395
+ if (storedClasses.length > 0 && elClasses.length > 0) {
396
+ const overlap = storedClasses.filter(c => elClasses.includes(c));
397
+ if (overlap.length > 0) {
398
+ const conf = overlap.length / Math.max(storedClasses.length, elClasses.length);
399
+ candidates.push({
400
+ element: el,
401
+ signature: sig,
402
+ strategy: 'class_match',
403
+ confidence: conf * 0.75,
404
+ selector: `${sig.tag || '*'}.${overlap.join('.')}`,
405
+ });
406
+ }
407
+ }
408
+ }
409
+
410
+ // Strategy 6: community_match — known corrections
411
+ const communityCorrections = stmts.findSharedCorrections.all(siteId, failedSelector);
412
+ for (const corr of communityCorrections) {
413
+ const appliedBoost = Math.min(corr.applied_count * 0.02, 0.15);
414
+ candidates.push({
415
+ element: null,
416
+ signature: null,
417
+ strategy: 'community_match',
418
+ confidence: 0.70 + appliedBoost,
419
+ selector: corr.new_selector,
420
+ correctionId: corr.id,
421
+ });
422
+ }
423
+
424
+ // Pick highest confidence above threshold
425
+ candidates.sort((a, b) => b.confidence - a.confidence);
426
+ const best = candidates.find(c => c.confidence >= 0.6);
427
+
428
+ if (!best) {
429
+ const logId = crypto.randomUUID();
430
+ stmts.insertHealingLog.run(logId, registryId, siteId, failedSelector, null, 'none', 0, 0);
431
+ return { healed: false, newSelector: null, strategy: null, confidence: 0 };
432
+ }
433
+
434
+ const logId = crypto.randomUUID();
435
+ stmts.insertHealingLog.run(
436
+ logId, registryId, siteId, failedSelector,
437
+ best.selector, best.strategy, best.confidence, 1
438
+ );
439
+
440
+ if (registryId) {
441
+ stmts.updateRegistrySelector.run(best.selector, best.confidence, registryId);
442
+ }
443
+
444
+ if (best.correctionId) {
445
+ stmts.incrementCorrectionApplied.run(best.correctionId);
446
+ }
447
+
448
+ return {
449
+ healed: true,
450
+ newSelector: best.selector,
451
+ strategy: best.strategy,
452
+ confidence: Math.round(best.confidence * 1000) / 1000,
453
+ };
454
+ }
455
+
456
+ // ═══════════════════════════════════════════════════════════════════════
457
+ // 5. Submit Correction
458
+ // ═══════════════════════════════════════════════════════════════════════
459
+
460
+ function submitCorrection(siteId, registryId, { oldSelector, newSelector, correctedBy, reason, shared }) {
461
+ const id = crypto.randomUUID();
462
+ stmts.insertCorrection.run(
463
+ id, registryId || null, siteId,
464
+ oldSelector, newSelector,
465
+ correctedBy || 'user',
466
+ reason || null,
467
+ shared ? 1 : 0
468
+ );
469
+
470
+ if (registryId) {
471
+ const reg = stmts.findRegistryById.get(registryId);
472
+ if (reg) {
473
+ stmts.updateRegistryFromCorrection.run(newSelector, registryId);
474
+ }
475
+ }
476
+
477
+ return { id, siteId, registryId, oldSelector, newSelector };
478
+ }
479
+
480
+ // ═══════════════════════════════════════════════════════════════════════
481
+ // 6. Community Suggestions
482
+ // ═══════════════════════════════════════════════════════════════════════
483
+
484
+ function getCommunitySuggestions(siteId, failedSelector) {
485
+ return stmts.findSharedCorrections.all(siteId, failedSelector);
486
+ }
487
+
488
+ // ═══════════════════════════════════════════════════════════════════════
489
+ // 7. Verify Selector
490
+ // ═══════════════════════════════════════════════════════════════════════
491
+
492
+ function verifySelector(siteId, actionName, elementData) {
493
+ const registry = stmts.findRegistry.get(siteId, actionName);
494
+ if (!registry) {
495
+ return { valid: false, confidence: 0, drift: null, error: 'Selector not registered' };
496
+ }
497
+
498
+ let storedSignature;
499
+ try { storedSignature = JSON.parse(registry.element_signature || '{}'); } catch { storedSignature = {}; }
500
+
501
+ const currentSignature = captureElementSignature(elementData);
502
+
503
+ let totalChecks = 0;
504
+ let matchScore = 0;
505
+ const driftDetails = {};
506
+
507
+ if (storedSignature.tag) {
508
+ totalChecks++;
509
+ if (storedSignature.tag === currentSignature.tag) {
510
+ matchScore++;
511
+ } else {
512
+ driftDetails.tag = { expected: storedSignature.tag, actual: currentSignature.tag };
513
+ }
514
+ }
515
+
516
+ if (storedSignature.id) {
517
+ totalChecks++;
518
+ if (storedSignature.id === currentSignature.id) {
519
+ matchScore++;
520
+ } else {
521
+ driftDetails.id = { expected: storedSignature.id, actual: currentSignature.id };
522
+ }
523
+ }
524
+
525
+ const storedClasses = storedSignature.classes || [];
526
+ const currentClasses = currentSignature.classes || [];
527
+ if (storedClasses.length > 0) {
528
+ totalChecks++;
529
+ const overlap = storedClasses.filter(c => currentClasses.includes(c));
530
+ const classRatio = overlap.length / storedClasses.length;
531
+ matchScore += classRatio;
532
+ if (classRatio < 1) {
533
+ const removed = storedClasses.filter(c => !currentClasses.includes(c));
534
+ const added = currentClasses.filter(c => !storedClasses.includes(c));
535
+ driftDetails.classes = { removed, added, overlapRatio: classRatio };
536
+ }
537
+ }
538
+
539
+ const storedAttrs = storedSignature.attributes || {};
540
+ const currentAttrs = currentSignature.attributes || {};
541
+ const allAttrKeys = [...new Set([...Object.keys(storedAttrs), ...Object.keys(currentAttrs)])];
542
+ if (allAttrKeys.length > 0) {
543
+ totalChecks++;
544
+ let attrMatch = 0;
545
+ const changedAttrs = {};
546
+ for (const key of allAttrKeys) {
547
+ if (storedAttrs[key] === currentAttrs[key]) {
548
+ attrMatch++;
549
+ } else {
550
+ changedAttrs[key] = { expected: storedAttrs[key] || null, actual: currentAttrs[key] || null };
551
+ }
552
+ }
553
+ matchScore += attrMatch / allAttrKeys.length;
554
+ if (Object.keys(changedAttrs).length > 0) {
555
+ driftDetails.attributes = changedAttrs;
556
+ }
557
+ }
558
+
559
+ if (storedSignature.text) {
560
+ totalChecks++;
561
+ const sim = textSimilarity(
562
+ storedSignature.text.toLowerCase(),
563
+ (currentSignature.text || '').toLowerCase()
564
+ );
565
+ matchScore += sim;
566
+ if (sim < 0.95) {
567
+ driftDetails.text = {
568
+ expected: storedSignature.text.substring(0, 50),
569
+ actual: (currentSignature.text || '').substring(0, 50),
570
+ similarity: Math.round(sim * 1000) / 1000,
571
+ };
572
+ }
573
+ }
574
+
575
+ const confidence = totalChecks > 0 ? matchScore / totalChecks : 0;
576
+ const valid = confidence >= 0.7;
577
+ const hasDrift = Object.keys(driftDetails).length > 0;
578
+
579
+ stmts.updateRegistryVerified.run(valid ? 1 : 0, confidence, siteId, actionName);
580
+
581
+ return {
582
+ valid,
583
+ confidence: Math.round(confidence * 1000) / 1000,
584
+ drift: hasDrift ? driftDetails : null,
585
+ };
586
+ }
587
+
588
+ // ═══════════════════════════════════════════════════════════════════════
589
+ // 8. Selector Health
590
+ // ═══════════════════════════════════════════════════════════════════════
591
+
592
+ function getSelectorHealth(siteId) {
593
+ const all = stmts.getSelectorsBySite.all(siteId);
594
+ const total = all.length;
595
+
596
+ if (total === 0) {
597
+ return { total: 0, verified: 0, healed: 0, avgConfidence: 0, broken: [] };
598
+ }
599
+
600
+ let verifiedCount = 0;
601
+ let healedCount = 0;
602
+ let confidenceSum = 0;
603
+ const broken = [];
604
+
605
+ for (const row of all) {
606
+ confidenceSum += row.confidence;
607
+ if (row.verified) verifiedCount++;
608
+ if (row.heal_count > 0) healedCount++;
609
+ if (!row.verified || row.confidence < 0.6) {
610
+ broken.push({
611
+ id: row.id,
612
+ actionName: row.action_name,
613
+ currentSelector: row.current_selector,
614
+ confidence: row.confidence,
615
+ healCount: row.heal_count,
616
+ lastHealed: row.last_healed,
617
+ });
618
+ }
619
+ }
620
+
621
+ return {
622
+ total,
623
+ verified: verifiedCount,
624
+ healed: healedCount,
625
+ avgConfidence: Math.round((confidenceSum / total) * 1000) / 1000,
626
+ broken,
627
+ };
628
+ }
629
+
630
+ // ═══════════════════════════════════════════════════════════════════════
631
+ // 9. Healing History
632
+ // ═══════════════════════════════════════════════════════════════════════
633
+
634
+ function getHealingHistory(siteId, { limit, actionName } = {}) {
635
+ const lim = limit || 50;
636
+ if (actionName) {
637
+ return stmts.getHealingLogBySiteAction.all(siteId, actionName, lim);
638
+ }
639
+ return stmts.getHealingLogBySite.all(siteId, lim);
640
+ }
641
+
642
+ // ═══════════════════════════════════════════════════════════════════════
643
+ // 10. Snapshot Elements
644
+ // ═══════════════════════════════════════════════════════════════════════
645
+
646
+ function snapshotElements(siteId, url, elements) {
647
+ const id = crypto.randomUUID();
648
+ const snapshotData = Array.isArray(elements)
649
+ ? elements.map(el => captureElementSignature(el))
650
+ : [];
651
+
652
+ stmts.insertSnapshot.run(
653
+ id, siteId, url, '*',
654
+ JSON.stringify(snapshotData)
655
+ );
656
+
657
+ return { id, siteId, url, elementCount: snapshotData.length };
658
+ }
659
+
660
+ // ═══════════════════════════════════════════════════════════════════════
661
+ // 11. Detect Drift
662
+ // ═══════════════════════════════════════════════════════════════════════
663
+
664
+ function detectDrift(siteId, url, currentElements) {
665
+ const lastSnapshot = stmts.getLatestSnapshot.get(siteId, url);
666
+ if (!lastSnapshot) {
667
+ return { hasDrift: false, message: 'No previous snapshot found', changed: [], added: [], removed: [] };
668
+ }
669
+
670
+ let previousElements;
671
+ try { previousElements = JSON.parse(lastSnapshot.snapshot || '[]'); } catch { previousElements = []; }
672
+
673
+ const currentSigs = (Array.isArray(currentElements) ? currentElements : [])
674
+ .map(el => captureElementSignature(el));
675
+
676
+ function fingerprint(sig) {
677
+ return `${sig.tag || ''}|${sig.id || ''}|${(sig.classes || []).join(',')}|${sig.text || ''}`;
678
+ }
679
+
680
+ const prevFingerprints = new Map();
681
+ for (let i = 0; i < previousElements.length; i++) {
682
+ prevFingerprints.set(fingerprint(previousElements[i]), i);
683
+ }
684
+
685
+ const currFingerprints = new Map();
686
+ for (let i = 0; i < currentSigs.length; i++) {
687
+ currFingerprints.set(fingerprint(currentSigs[i]), i);
688
+ }
689
+
690
+ const changed = [];
691
+ const added = [];
692
+ const removed = [];
693
+
694
+ for (const [fp, idx] of currFingerprints) {
695
+ if (!prevFingerprints.has(fp)) {
696
+ const prevBySamePosTag = previousElements[idx];
697
+ if (prevBySamePosTag && prevBySamePosTag.tag === currentSigs[idx].tag) {
698
+ const sim = textSimilarity(fingerprint(prevBySamePosTag), fp);
699
+ if (sim > 0.3 && sim < 1.0) {
700
+ changed.push({
701
+ index: idx,
702
+ previous: previousElements[idx],
703
+ current: currentSigs[idx],
704
+ similarity: Math.round(sim * 1000) / 1000,
705
+ });
706
+ continue;
707
+ }
708
+ }
709
+ added.push({ index: idx, element: currentSigs[idx] });
710
+ }
711
+ }
712
+
713
+ for (const [fp, idx] of prevFingerprints) {
714
+ if (!currFingerprints.has(fp)) {
715
+ const alreadyChanged = changed.some(c => c.index === idx);
716
+ if (!alreadyChanged) {
717
+ removed.push({ index: idx, element: previousElements[idx] });
718
+ }
719
+ }
720
+ }
721
+
722
+ return {
723
+ hasDrift: changed.length > 0 || added.length > 0 || removed.length > 0,
724
+ snapshotDate: lastSnapshot.captured_at,
725
+ changed,
726
+ added,
727
+ removed,
728
+ summary: {
729
+ changedCount: changed.length,
730
+ addedCount: added.length,
731
+ removedCount: removed.length,
732
+ },
733
+ };
734
+ }
735
+
736
+ // ═══════════════════════════════════════════════════════════════════════
737
+ // 12. Build CSS Path
738
+ // ═══════════════════════════════════════════════════════════════════════
739
+
740
+ function buildCSSPath(elementData) {
741
+ if (!elementData) return '*';
742
+
743
+ const tag = (elementData.tag || elementData.tagName || '').toLowerCase();
744
+ const id = elementData.id;
745
+
746
+ if (id) return `#${id}`;
747
+
748
+ let selector = tag || '*';
749
+
750
+ let classes = [];
751
+ if (Array.isArray(elementData.classes)) {
752
+ classes = elementData.classes.filter(Boolean);
753
+ } else if (typeof elementData.className === 'string') {
754
+ classes = elementData.className.split(/\s+/).filter(Boolean);
755
+ }
756
+ if (classes.length > 0) {
757
+ selector += '.' + classes.join('.');
758
+ }
759
+
760
+ const attrs = elementData.attributes || elementData.attrs || {};
761
+ for (const key of Object.keys(attrs)) {
762
+ const lower = key.toLowerCase();
763
+ if (lower === 'class' || lower === 'id' || lower === 'style') continue;
764
+ const val = attrs[key];
765
+ if (val != null && val !== '') {
766
+ selector += `[${lower}="${val.replace(/"/g, '\\"')}"]`;
767
+ } else if (val === '' || val == null) {
768
+ selector += `[${lower}]`;
769
+ }
770
+ }
771
+
772
+ return selector;
773
+ }
774
+
775
+ // ═══════════════════════════════════════════════════════════════════════
776
+ // 13. Build XPath
777
+ // ═══════════════════════════════════════════════════════════════════════
778
+
779
+ function buildXPath(elementData) {
780
+ if (!elementData) return '//*';
781
+
782
+ const tag = (elementData.tag || elementData.tagName || '').toLowerCase() || '*';
783
+ const id = elementData.id;
784
+
785
+ if (id) return `//${tag}[@id="${id}"]`;
786
+
787
+ const predicates = [];
788
+
789
+ let classes = [];
790
+ if (Array.isArray(elementData.classes)) {
791
+ classes = elementData.classes.filter(Boolean);
792
+ } else if (typeof elementData.className === 'string') {
793
+ classes = elementData.className.split(/\s+/).filter(Boolean);
794
+ }
795
+ for (const cls of classes) {
796
+ predicates.push(`contains(@class, "${cls}")`);
797
+ }
798
+
799
+ const attrs = elementData.attributes || elementData.attrs || {};
800
+ for (const key of Object.keys(attrs)) {
801
+ const lower = key.toLowerCase();
802
+ if (lower === 'class' || lower === 'id' || lower === 'style') continue;
803
+ const val = attrs[key];
804
+ if (val != null && val !== '') {
805
+ predicates.push(`@${lower}="${val}"`);
806
+ } else {
807
+ predicates.push(`@${lower}`);
808
+ }
809
+ }
810
+
811
+ const text = (elementData.text || elementData.textContent || elementData.innerText || '').trim();
812
+ if (text && text.length <= 80) {
813
+ if (!text.includes('"')) {
814
+ predicates.push(`normalize-space(text())="${text}"`);
815
+ } else if (!text.includes("'")) {
816
+ predicates.push(`normalize-space(text())='${text}'`);
817
+ }
818
+ }
819
+
820
+ if (predicates.length === 0) return `//${tag}`;
821
+ return `//${tag}[${predicates.join(' and ')}]`;
822
+ }
823
+
824
+ // ═══════════════════════════════════════════════════════════════════════
825
+ // Exports
826
+ // ═══════════════════════════════════════════════════════════════════════
827
+
828
+ module.exports = {
829
+ registerSelector,
830
+ captureElementSignature,
831
+ healSelector,
832
+ levenshteinDistance,
833
+ textSimilarity,
834
+ submitCorrection,
835
+ getCommunitySuggestions,
836
+ verifySelector,
837
+ getSelectorHealth,
838
+ getHealingHistory,
839
+ snapshotElements,
840
+ detectDrift,
841
+ buildCSSPath,
842
+ buildXPath,
843
+ };