web-agent-bridge 2.6.0 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -848,6 +848,295 @@ function getVisionHistory(siteId, { limit, url } = {}) {
848
848
  });
849
849
  }
850
850
 
851
+ // ═══════════════════════════════════════════════════════════════════════
852
+ // LOCAL VISION ENGINE — Self-contained, no external API needed
853
+ // DOM-based element detection, dark pattern analysis, ad detection,
854
+ // layout analysis, accessibility audit — all computed locally.
855
+ // ═══════════════════════════════════════════════════════════════════════
856
+
857
+ const DARK_PATTERN_SIGNATURES = {
858
+ confirmshaming: [
859
+ /no,? i (don'?t|do not) (want|like|need|care)/i,
860
+ /no thanks,? i (prefer|like|want) (to )?(pay|miss|stay|lose)/i,
861
+ /i('?d rather|'?ll pass)/i, /keep (paying|losing|missing)/i,
862
+ ],
863
+ urgency: [
864
+ /only \d+ left/i, /limited (time|offer|stock|availability)/i,
865
+ /hurry|rush|act now|don'?t miss|last chance|expires? (soon|in|today)/i,
866
+ /\d+ (people|others|users) (are )?(viewing|watching|buying)/i,
867
+ /selling fast|almost gone/i,
868
+ ],
869
+ hiddenCosts: [/service fee|handling fee|processing fee|convenience fee/i, /additional charge|extra charge|booking fee/i],
870
+ forcedContinuity: [/free trial.*(auto|automatic).*(renew|bill|charge)/i, /will be charged after/i, /cancel anytime.*(before|or)/i],
871
+ sneakIntoBasket: [/added to (your )?cart|included (in|with) (your )?(order|purchase)/i, /protection plan|warranty|insurance/i],
872
+ privacyZuckering: [/share (your )?(data|info|details|location|contacts)/i, /personalize/i],
873
+ };
874
+
875
+ const AD_CLASS_PATTERNS = [
876
+ /\bad[s]?\b/i, /\badvert/i, /\bsponsor/i, /\bpromo(tion|ted)?\b/i,
877
+ /\bbanner[\-_]?ad/i, /\bgoogle[\-_]?ad/i, /\bdfp[\-_]/i, /\badsense/i, /\btaboola/i, /\boutbrain/i,
878
+ ];
879
+
880
+ const AD_SIZES = [
881
+ [728, 90], [300, 250], [336, 280], [160, 600], [320, 50], [970, 250], [300, 600],
882
+ ];
883
+
884
+ /**
885
+ * Analyze a DOM snapshot locally — no external API calls.
886
+ * @param {Array} domNodes - Flattened DOM nodes from the extraction script
887
+ * @param {Object} viewport - { width, height }
888
+ * @returns {Object} Full analysis result
889
+ */
890
+ function analyzeLocally(domNodes, viewport = { width: 1280, height: 720 }) {
891
+ const elements = [];
892
+ const darkPatterns = [];
893
+ const adElements = [];
894
+ const accessibilityIssues = [];
895
+
896
+ // Layout detection
897
+ const layoutRegions = [];
898
+ const LAYOUT_SELECTORS = {
899
+ header: ['header', '[role="banner"]'],
900
+ navigation: ['nav', '[role="navigation"]'],
901
+ main: ['main', '[role="main"]', 'article'],
902
+ sidebar: ['aside', '[role="complementary"]'],
903
+ footer: ['footer', '[role="contentinfo"]'],
904
+ };
905
+
906
+ for (const node of domNodes) {
907
+ const tag = (node.tag || '').toLowerCase();
908
+ const cls = (node.attributes?.class || '').toLowerCase();
909
+ const id = (node.attributes?.id || '').toLowerCase();
910
+ const role = (node.attributes?.role || '').toLowerCase();
911
+ const text = (node.text || '').trim();
912
+ const rect = node.rect || {};
913
+
914
+ // ── Element detection ──
915
+ let elType = null;
916
+ let confidence = 0;
917
+
918
+ if (tag === 'button' || role === 'button' || (tag === 'input' && ['submit', 'button'].includes(node.attributes?.type))) {
919
+ elType = 'button'; confidence = 0.95;
920
+ } else if (tag === 'a' && node.attributes?.href) {
921
+ elType = 'link'; confidence = 0.9;
922
+ } else if (['input', 'textarea'].includes(tag) || role === 'textbox') {
923
+ elType = 'input'; confidence = 0.95;
924
+ } else if (tag === 'select' || role === 'listbox' || role === 'combobox') {
925
+ elType = 'dropdown'; confidence = 0.9;
926
+ } else if (tag === 'form' || role === 'form') {
927
+ elType = 'form'; confidence = 0.85;
928
+ } else if (['img', 'picture', 'svg', 'video', 'canvas'].includes(tag) || role === 'img') {
929
+ elType = 'image'; confidence = 0.8;
930
+ } else if (['nav', 'header', 'footer'].includes(tag) || ['navigation', 'banner', 'contentinfo'].includes(role) || cls.includes('nav') || cls.includes('menu')) {
931
+ elType = 'nav'; confidence = 0.75;
932
+ } else if (cls.includes('btn') || cls.includes('button') || cls.includes('cta')) {
933
+ elType = 'button'; confidence = 0.7;
934
+ } else if (cls.includes('dropdown') || cls.includes('select')) {
935
+ elType = 'dropdown'; confidence = 0.65;
936
+ }
937
+
938
+ if (elType) {
939
+ elements.push({
940
+ type: elType,
941
+ label: (text || node.attributes?.placeholder || node.attributes?.['aria-label'] || node.attributes?.alt || '').slice(0, 200),
942
+ description: `${tag} element${cls ? ' class=' + cls.slice(0, 80) : ''}`,
943
+ boundingBox: { x: rect.x || 0, y: rect.y || 0, width: rect.width || 0, height: rect.height || 0 },
944
+ suggestedSelector: node.selector || _buildFallbackSelector(node),
945
+ confidence,
946
+ interactable: ['button', 'link', 'input', 'dropdown', 'form'].includes(elType),
947
+ });
948
+ }
949
+
950
+ // ── Dark pattern detection ──
951
+ if (text.length > 5) {
952
+ for (const [patternName, regexes] of Object.entries(DARK_PATTERN_SIGNATURES)) {
953
+ for (const rx of regexes) {
954
+ if (rx.test(text)) {
955
+ darkPatterns.push({ type: patternName, text: text.slice(0, 200), selector: node.selector || '', severity: patternName === 'urgency' ? 'medium' : 'high', confidence: 0.85 });
956
+ break;
957
+ }
958
+ }
959
+ }
960
+ }
961
+
962
+ // Pre-checked upsell checkbox detection
963
+ if (tag === 'input' && node.attributes?.type === 'checkbox' && node.attributes?.checked != null) {
964
+ const lbl = text.toLowerCase();
965
+ if (/newsletter|marketing|promo|share|partner|third.party|sms|offer/i.test(lbl)) {
966
+ darkPatterns.push({ type: 'misdirection', text: `Pre-checked: "${text.slice(0, 100)}"`, selector: node.selector || '', severity: 'medium', confidence: 0.9 });
967
+ }
968
+ }
969
+
970
+ // ── Ad detection ──
971
+ let isAd = false;
972
+ for (const rx of AD_CLASS_PATTERNS) {
973
+ if (rx.test(cls) || rx.test(id)) { isAd = true; break; }
974
+ }
975
+ if (!isAd && rect.width && rect.height) {
976
+ for (const [w, h] of AD_SIZES) {
977
+ if (Math.abs(rect.width - w) < 10 && Math.abs(rect.height - h) < 10) { isAd = true; break; }
978
+ }
979
+ }
980
+ if (!isAd && tag === 'iframe' && node.attributes?.src) {
981
+ if (/doubleclick|googlesyndication|adnxs|criteo|taboola|outbrain/i.test(node.attributes.src)) isAd = true;
982
+ }
983
+ if (isAd) adElements.push({ tag, selector: node.selector || '', rect, reason: `class/id/size match` });
984
+
985
+ // ── Accessibility ──
986
+ if (tag === 'img' && !node.attributes?.alt) {
987
+ accessibilityIssues.push({ type: 'missing-alt', severity: 'high', selector: node.selector || '' });
988
+ }
989
+ if (['button', 'a', 'input'].includes(tag) && rect.width > 0 && (rect.width < 44 || rect.height < 44)) {
990
+ accessibilityIssues.push({ type: 'small-tap-target', severity: 'medium', selector: node.selector || '', size: `${rect.width}x${rect.height}` });
991
+ }
992
+ if (['input', 'select', 'textarea'].includes(tag) && !node.attributes?.['aria-label'] && !node.attributes?.['aria-labelledby'] && !node.attributes?.id) {
993
+ accessibilityIssues.push({ type: 'missing-label', severity: 'high', selector: node.selector || '' });
994
+ }
995
+
996
+ // ── Layout regions ──
997
+ for (const [regionName, selectors] of Object.entries(LAYOUT_SELECTORS)) {
998
+ if (selectors.some(s => {
999
+ if (s.startsWith('[role="')) return role === s.match(/\[role="(.+?)"\]/)?.[1];
1000
+ return tag === s;
1001
+ })) {
1002
+ layoutRegions.push({ type: regionName, tag, rect, selector: node.selector || '' });
1003
+ break;
1004
+ }
1005
+ }
1006
+ }
1007
+
1008
+ // Build analysis text (human-readable summary for caching)
1009
+ const analysisText = JSON.stringify({
1010
+ summary: {
1011
+ totalElements: elements.length,
1012
+ buttons: elements.filter(e => e.type === 'button').length,
1013
+ links: elements.filter(e => e.type === 'link').length,
1014
+ inputs: elements.filter(e => e.type === 'input').length,
1015
+ forms: elements.filter(e => e.type === 'form').length,
1016
+ darkPatterns: darkPatterns.length,
1017
+ ads: adElements.length,
1018
+ accessibilityIssues: accessibilityIssues.length,
1019
+ },
1020
+ elements,
1021
+ darkPatterns,
1022
+ ads: adElements,
1023
+ accessibility: {
1024
+ issues: accessibilityIssues,
1025
+ score: Math.max(0, 100 - accessibilityIssues.length * 5),
1026
+ },
1027
+ layout: { regions: layoutRegions, columns: layoutRegions.filter(r => r.type === 'sidebar').length > 0 ? 2 : 1 },
1028
+ });
1029
+
1030
+ return {
1031
+ text: analysisText,
1032
+ tokens: 0, // Local analysis — no tokens used
1033
+ elements,
1034
+ darkPatterns,
1035
+ ads: adElements,
1036
+ accessibility: { issues: accessibilityIssues, score: Math.max(0, 100 - accessibilityIssues.length * 5) },
1037
+ layout: { regions: layoutRegions },
1038
+ };
1039
+ }
1040
+
1041
+ function _buildFallbackSelector(node) {
1042
+ const tag = node.tag || 'div';
1043
+ if (node.attributes?.id) return '#' + node.attributes.id;
1044
+ let s = tag;
1045
+ if (node.attributes?.class) {
1046
+ const cls = node.attributes.class.trim().split(/\s+/).slice(0, 2).join('.');
1047
+ if (cls) s += '.' + cls;
1048
+ }
1049
+ return s;
1050
+ }
1051
+
1052
+ /**
1053
+ * DOM Extraction Script — inject into pages to capture DOM for local analysis.
1054
+ * Returns minimal JSON with all interactive/layout elements + computed styles.
1055
+ */
1056
+ function getDomExtractionScript() {
1057
+ return `(function(){
1058
+ var MAX_D=8,INT=new Set(['a','button','input','select','textarea','details','summary','label']),
1059
+ LAY=new Set(['header','nav','main','aside','footer','article','section','div','form']),
1060
+ SKIP=new Set(['script','style','noscript','meta','link','br','hr']);
1061
+ function ext(el,d){
1062
+ if(d>MAX_D)return null;var t=el.tagName;if(!t)return null;t=t.toLowerCase();
1063
+ if(SKIP.has(t))return null;var r=el.getBoundingClientRect();
1064
+ if(r.width===0&&r.height===0&&!LAY.has(t)&&!INT.has(t))return null;
1065
+ var cs=window.getComputedStyle(el);if(cs.display==='none'||cs.visibility==='hidden')return null;
1066
+ var n={tag:t,text:(el.textContent||'').trim().substring(0,200),selector:sel(el),attributes:{},
1067
+ rect:{x:Math.round(r.x),y:Math.round(r.y),width:Math.round(r.width),height:Math.round(r.height)},
1068
+ visible:r.width>0&&r.height>0&&cs.opacity!=='0'};
1069
+ ['id','class','href','src','alt','type','name','value','placeholder','role','aria-label',
1070
+ 'aria-labelledby','aria-checked','data-action','checked','disabled'].forEach(function(a){
1071
+ if(el.hasAttribute(a))n.attributes[a]=el.getAttribute(a);
1072
+ });if(el.checked)n.attributes.checked='checked';
1073
+ if(LAY.has(t)||INT.has(t)){n.children=[];for(var c of el.children){var cn=ext(c,d+1);if(cn)n.children.push(cn);}}
1074
+ return n;
1075
+ }
1076
+ function sel(el){if(el.id)return'#'+CSS.escape(el.id);var p=[];var c=el;
1077
+ for(var i=0;i<4&&c&&c!==document.body;i++){var s=c.tagName.toLowerCase();
1078
+ if(c.id){p.unshift('#'+CSS.escape(c.id));break;}
1079
+ if(c.className&&typeof c.className==='string'){var cl=c.className.trim().split(/\\s+/).slice(0,2).map(function(x){return'.'+CSS.escape(x);}).join('');if(cl)s+=cl;}
1080
+ p.unshift(s);c=c.parentElement;}return p.join(' > ');}
1081
+ function flat(n,r){if(!n)return;var ch=n.children;delete n.children;r.push(n);if(ch)ch.forEach(function(c){flat(c,r);});}
1082
+ var root=ext(document.body,0);var f=[];flat(root,f);
1083
+ return JSON.stringify({url:location.href,title:document.title,viewport:{width:innerWidth,height:innerHeight},dom:f,
1084
+ meta:{lang:document.documentElement.lang||'',charset:document.characterSet}});
1085
+ })();`;
1086
+ }
1087
+
1088
+ // ═══════════════════════════════════════════════════════════════════════
1089
+ // Enhanced analyzeScreenshot — use local engine when provider is 'local'
1090
+ // and DOM data is provided (no external API call needed)
1091
+ // ═══════════════════════════════════════════════════════════════════════
1092
+
1093
+ async function analyzePageDOM(siteId, { domSnapshot, url } = {}) {
1094
+ if (!domSnapshot || !domSnapshot.dom) throw new Error('domSnapshot with dom array is required');
1095
+
1096
+ const dataStr = JSON.stringify(domSnapshot.dom).slice(0, 2000);
1097
+ const screenshotHash = crypto.createHash('sha256').update(dataStr).digest('hex');
1098
+
1099
+ // Check cache
1100
+ const cached = stmts.getCacheByHash.get(siteId, screenshotHash);
1101
+ if (cached) {
1102
+ let elements = [];
1103
+ try { elements = JSON.parse(cached.elements_found || '[]'); } catch {}
1104
+ return { analysis: cached.analysis, elements, cached: true, latency_ms: cached.latency_ms, tokens_used: 0, cache_id: cached.id };
1105
+ }
1106
+
1107
+ const startTime = Date.now();
1108
+ const result = analyzeLocally(domSnapshot.dom || [], domSnapshot.viewport);
1109
+ const latencyMs = Date.now() - startTime;
1110
+
1111
+ const cacheId = uuidv4();
1112
+ const config = stmts.getConfig.get(siteId);
1113
+ const cacheTtl = config?.cache_ttl || 300;
1114
+ const expiresAt = new Date(Date.now() + cacheTtl * 1000).toISOString();
1115
+
1116
+ stmts.insertCache.run(cacheId, siteId, url || domSnapshot.url || null, screenshotHash, result.text, JSON.stringify(result.elements), 'local', 'dom-engine', 0, latencyMs, expiresAt);
1117
+
1118
+ const insertElements = db.transaction((elems) => {
1119
+ for (const el of elems) {
1120
+ stmts.insertElement.run(uuidv4(), cacheId, siteId, el.type, el.label, el.description, JSON.stringify(el.boundingBox), el.suggestedSelector, el.confidence, el.interactable ? 1 : 0);
1121
+ }
1122
+ });
1123
+ insertElements(result.elements);
1124
+
1125
+ return {
1126
+ analysis: result.text,
1127
+ elements: result.elements,
1128
+ darkPatterns: result.darkPatterns,
1129
+ ads: result.ads,
1130
+ accessibility: result.accessibility,
1131
+ layout: result.layout,
1132
+ cached: false,
1133
+ latency_ms: latencyMs,
1134
+ tokens_used: 0,
1135
+ cache_id: cacheId,
1136
+ engine: 'local-dom',
1137
+ };
1138
+ }
1139
+
851
1140
  // ═══════════════════════════════════════════════════════════════════════
852
1141
  // Exports
853
1142
  // ═══════════════════════════════════════════════════════════════════════
@@ -856,6 +1145,9 @@ module.exports = {
856
1145
  configureVision,
857
1146
  getVisionConfig,
858
1147
  analyzeScreenshot,
1148
+ analyzePageDOM,
1149
+ analyzeLocally,
1150
+ getDomExtractionScript,
859
1151
  buildVisionPrompt,
860
1152
  parseVisionResponse,
861
1153
  extractElementsFromAnalysis,