sentienceapi 0.90.6 → 0.90.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +81 -0
  2. package/dist/index.d.ts +1 -0
  3. package/dist/index.d.ts.map +1 -1
  4. package/dist/index.js +3 -1
  5. package/dist/index.js.map +1 -1
  6. package/dist/textSearch.d.ts +64 -0
  7. package/dist/textSearch.d.ts.map +1 -0
  8. package/dist/textSearch.js +109 -0
  9. package/dist/textSearch.js.map +1 -0
  10. package/dist/tracing/cloud-sink.d.ts +4 -0
  11. package/dist/tracing/cloud-sink.d.ts.map +1 -1
  12. package/dist/tracing/cloud-sink.js +15 -0
  13. package/dist/tracing/cloud-sink.js.map +1 -1
  14. package/dist/tracing/index-schema.d.ts +182 -0
  15. package/dist/tracing/index-schema.d.ts.map +1 -0
  16. package/dist/tracing/index-schema.js +150 -0
  17. package/dist/tracing/index-schema.js.map +1 -0
  18. package/dist/tracing/indexer.d.ts +17 -0
  19. package/dist/tracing/indexer.d.ts.map +1 -0
  20. package/dist/tracing/indexer.js +282 -0
  21. package/dist/tracing/indexer.js.map +1 -0
  22. package/dist/tracing/jsonl-sink.d.ts +4 -0
  23. package/dist/tracing/jsonl-sink.d.ts.map +1 -1
  24. package/dist/tracing/jsonl-sink.js +15 -0
  25. package/dist/tracing/jsonl-sink.js.map +1 -1
  26. package/dist/tracing/tracer-factory.d.ts +11 -5
  27. package/dist/tracing/tracer-factory.d.ts.map +1 -1
  28. package/dist/tracing/tracer-factory.js +16 -7
  29. package/dist/tracing/tracer-factory.js.map +1 -1
  30. package/dist/types.d.ts +98 -0
  31. package/dist/types.d.ts.map +1 -1
  32. package/package.json +1 -1
  33. package/src/extension/background.js +222 -52
  34. package/src/extension/content.js +285 -9
  35. package/src/extension/injected_api.js +1224 -189
  36. package/src/extension/manifest.json +10 -4
  37. package/src/extension/pkg/README.md +163 -2
  38. package/src/extension/pkg/sentience_core.d.ts +9 -0
  39. package/src/extension/pkg/sentience_core.js +16 -0
  40. package/src/extension/pkg/sentience_core_bg.wasm +0 -0
  41. package/src/extension/pkg/sentience_core_bg.wasm.d.ts +1 -0
  42. package/src/extension/release.json +115 -0
  43. package/src/extension/test-content.js +4 -0
@@ -1,33 +1,56 @@
1
- // injected_api.js - MAIN WORLD
1
+ // injected_api.js - MAIN WORLD (NO WASM! CSP-Resistant!)
2
+ // This script ONLY collects raw DOM data and sends it to background for processing
2
3
  (async () => {
3
- // 1. Get Extension ID (Wait for content.js to set it)
4
+ // console.log('[SentienceAPI] Initializing (CSP-Resistant Mode)...');
5
+
6
+ // Wait for Extension ID from content.js
4
7
  const getExtensionId = () => document.documentElement.dataset.sentienceExtensionId;
5
8
  let extId = getExtensionId();
6
-
7
- // Safety poller for async loading race conditions
9
+
8
10
  if (!extId) {
9
11
  await new Promise(resolve => {
10
12
  const check = setInterval(() => {
11
13
  extId = getExtensionId();
12
14
  if (extId) { clearInterval(check); resolve(); }
13
15
  }, 50);
16
+ setTimeout(() => resolve(), 5000); // Max 5s wait
14
17
  });
15
18
  }
16
19
 
17
- const EXT_URL = `chrome-extension://${extId}/`;
18
- console.log('[SentienceAPI.com] Initializing from:', EXT_URL);
20
+ if (!extId) {
21
+ console.error('[SentienceAPI] Failed to get extension ID');
22
+ return;
23
+ }
24
+
25
+ // console.log('[SentienceAPI] Extension ID:', extId);
19
26
 
27
+ // Registry for click actions (still needed for click() function)
20
28
  window.sentience_registry = [];
21
- let wasmModule = null;
22
29
 
23
- // --- HELPER: Deep Walker ---
30
+ // --- HELPER: Deep Walker with Native Filter ---
24
31
  function getAllElements(root = document) {
25
32
  const elements = [];
26
- const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT);
33
+ const filter = {
34
+ acceptNode: function(node) {
35
+ // Skip metadata and script/style tags
36
+ if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) {
37
+ return NodeFilter.FILTER_REJECT;
38
+ }
39
+ // Skip deep SVG children
40
+ if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') {
41
+ return NodeFilter.FILTER_REJECT;
42
+ }
43
+ return NodeFilter.FILTER_ACCEPT;
44
+ }
45
+ };
46
+
47
+ const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter);
27
48
  while(walker.nextNode()) {
28
49
  const node = walker.currentNode;
29
- elements.push(node);
30
- if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot));
50
+ if (node.isConnected) {
51
+ elements.push(node);
52
+ if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot));
53
+ }
31
54
  }
32
55
  return elements;
33
56
  }
@@ -40,7 +63,163 @@
40
63
  return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100);
41
64
  }
42
65
 
43
- // --- HELPER: Viewport Check (NEW) ---
66
+ // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) ---
67
+ function getClassName(el) {
68
+ if (!el || !el.className) return '';
69
+
70
+ // Handle string (HTML elements)
71
+ if (typeof el.className === 'string') return el.className;
72
+
73
+ // Handle SVGAnimatedString (SVG elements)
74
+ if (typeof el.className === 'object') {
75
+ if ('baseVal' in el.className && typeof el.className.baseVal === 'string') {
76
+ return el.className.baseVal;
77
+ }
78
+ if ('animVal' in el.className && typeof el.className.animVal === 'string') {
79
+ return el.className.animVal;
80
+ }
81
+ // Fallback: convert to string
82
+ try {
83
+ return String(el.className);
84
+ } catch (e) {
85
+ return '';
86
+ }
87
+ }
88
+
89
+ return '';
90
+ }
91
+
92
+ // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) ---
93
+ function toSafeString(value) {
94
+ if (value === null || value === undefined) return null;
95
+
96
+ // 1. If it's already a primitive string, return it
97
+ if (typeof value === 'string') return value;
98
+
99
+ // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.)
100
+ if (typeof value === 'object') {
101
+ // Try extracting baseVal (standard SVG property)
102
+ if ('baseVal' in value && typeof value.baseVal === 'string') {
103
+ return value.baseVal;
104
+ }
105
+ // Try animVal as fallback
106
+ if ('animVal' in value && typeof value.animVal === 'string') {
107
+ return value.animVal;
108
+ }
109
+ // Fallback: Force to string (prevents WASM crash even if data is less useful)
110
+ // This prevents the "Invalid Type" crash, even if the data is "[object SVGAnimatedString]"
111
+ try {
112
+ return String(value);
113
+ } catch (e) {
114
+ return null;
115
+ }
116
+ }
117
+
118
+ // 3. Last resort cast for primitives
119
+ try {
120
+ return String(value);
121
+ } catch (e) {
122
+ return null;
123
+ }
124
+ }
125
+
126
+ // --- HELPER: Get SVG Fill/Stroke Color ---
127
+ // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor)
128
+ function getSVGColor(el) {
129
+ if (!el || el.tagName !== 'SVG') return null;
130
+
131
+ const style = window.getComputedStyle(el);
132
+
133
+ // Try fill first (most common for SVG icons)
134
+ const fill = style.fill;
135
+ if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') {
136
+ // Convert fill to rgb() format if needed
137
+ const rgbaMatch = fill.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
138
+ if (rgbaMatch) {
139
+ const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
140
+ if (alpha >= 0.9) {
141
+ return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
142
+ }
143
+ } else if (fill.startsWith('rgb(')) {
144
+ return fill;
145
+ }
146
+ }
147
+
148
+ // Fallback to stroke if fill is not available
149
+ const stroke = style.stroke;
150
+ if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') {
151
+ const rgbaMatch = stroke.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
152
+ if (rgbaMatch) {
153
+ const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
154
+ if (alpha >= 0.9) {
155
+ return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
156
+ }
157
+ } else if (stroke.startsWith('rgb(')) {
158
+ return stroke;
159
+ }
160
+ }
161
+
162
+ return null;
163
+ }
164
+
165
+ // --- HELPER: Get Effective Background Color ---
166
+ // Traverses up the DOM tree to find the nearest non-transparent background color
167
+ // For SVGs, also checks fill/stroke properties
168
+ // This handles rgba(0,0,0,0) and transparent values that browsers commonly return
169
+ function getEffectiveBackgroundColor(el) {
170
+ if (!el) return null;
171
+
172
+ // For SVG elements, use fill/stroke instead of backgroundColor
173
+ if (el.tagName === 'SVG') {
174
+ const svgColor = getSVGColor(el);
175
+ if (svgColor) return svgColor;
176
+ }
177
+
178
+ let current = el;
179
+ const maxDepth = 10; // Prevent infinite loops
180
+ let depth = 0;
181
+
182
+ while (current && depth < maxDepth) {
183
+ const style = window.getComputedStyle(current);
184
+
185
+ // For SVG elements in the tree, also check fill/stroke
186
+ if (current.tagName === 'SVG') {
187
+ const svgColor = getSVGColor(current);
188
+ if (svgColor) return svgColor;
189
+ }
190
+
191
+ const bgColor = style.backgroundColor;
192
+
193
+ if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') {
194
+ // Check if it's rgba with alpha < 1 (semi-transparent)
195
+ const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
196
+ if (rgbaMatch) {
197
+ const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
198
+ // If alpha is high enough (>= 0.9), consider it opaque enough
199
+ if (alpha >= 0.9) {
200
+ // Convert to rgb() format for Gateway compatibility
201
+ return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
202
+ }
203
+ // If semi-transparent, continue up the tree
204
+ } else if (bgColor.startsWith('rgb(')) {
205
+ // Already in rgb() format, use it
206
+ return bgColor;
207
+ } else {
208
+ // Named color or other format, return as-is
209
+ return bgColor;
210
+ }
211
+ }
212
+
213
+ // Move up the DOM tree
214
+ current = current.parentElement;
215
+ depth++;
216
+ }
217
+
218
+ // Fallback: return null if nothing found
219
+ return null;
220
+ }
221
+
222
+ // --- HELPER: Viewport Check ---
44
223
  function isInViewport(rect) {
45
224
  return (
46
225
  rect.top < window.innerHeight && rect.bottom > 0 &&
@@ -48,19 +227,30 @@
48
227
  );
49
228
  }
50
229
 
51
- // --- HELPER: Occlusion Check (NEW) ---
52
- function isOccluded(el, rect) {
53
- // Fast center-point check
230
+ // --- HELPER: Occlusion Check (Optimized to avoid layout thrashing) ---
231
+ // Only checks occlusion for elements likely to be occluded (high z-index, positioned)
232
+ // This avoids forced reflow for most elements, dramatically improving performance
233
+ function isOccluded(el, rect, style) {
234
+ // Fast path: Skip occlusion check for most elements
235
+ // Only check for elements that are likely to be occluded (overlays, modals, tooltips)
236
+ const zIndex = parseInt(style.zIndex, 10);
237
+ const position = style.position;
238
+
239
+ // Skip occlusion check for normal flow elements (vast majority)
240
+ // Only check for positioned elements or high z-index (likely overlays)
241
+ if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) {
242
+ return false; // Assume not occluded for performance
243
+ }
244
+
245
+ // For positioned/high z-index elements, do the expensive check
54
246
  const cx = rect.x + rect.width / 2;
55
247
  const cy = rect.y + rect.height / 2;
56
-
57
- // If point is off-screen, elementFromPoint returns null, assume NOT occluded for safety
248
+
58
249
  if (cx < 0 || cx > window.innerWidth || cy < 0 || cy > window.innerHeight) return false;
59
250
 
60
251
  const topEl = document.elementFromPoint(cx, cy);
61
252
  if (!topEl) return false;
62
-
63
- // It's visible if the top element is us, or contains us, or we contain it
253
+
64
254
  return !(el === topEl || el.contains(topEl) || topEl.contains(el));
65
255
  }
66
256
 
@@ -76,45 +266,91 @@
76
266
  };
77
267
  window.addEventListener('message', listener);
78
268
  window.postMessage({ type: 'SENTIENCE_SCREENSHOT_REQUEST', requestId, options }, '*');
269
+ setTimeout(() => {
270
+ window.removeEventListener('message', listener);
271
+ resolve(null);
272
+ }, 10000); // 10s timeout
273
+ });
274
+ }
275
+
276
+ // --- HELPER: Snapshot Processing Bridge (NEW!) ---
277
+ function processSnapshotInBackground(rawData, options) {
278
+ return new Promise((resolve, reject) => {
279
+ const requestId = Math.random().toString(36).substring(7);
280
+ const TIMEOUT_MS = 25000; // 25 seconds (longer than content.js timeout)
281
+ let resolved = false;
282
+
283
+ const timeout = setTimeout(() => {
284
+ if (!resolved) {
285
+ resolved = true;
286
+ window.removeEventListener('message', listener);
287
+ reject(new Error('WASM processing timeout - extension may be unresponsive. Try reloading the extension.'));
288
+ }
289
+ }, TIMEOUT_MS);
290
+
291
+ const listener = (e) => {
292
+ if (e.data.type === 'SENTIENCE_SNAPSHOT_RESULT' && e.data.requestId === requestId) {
293
+ if (resolved) return; // Already handled
294
+ resolved = true;
295
+ clearTimeout(timeout);
296
+ window.removeEventListener('message', listener);
297
+
298
+ if (e.data.error) {
299
+ reject(new Error(e.data.error));
300
+ } else {
301
+ resolve({
302
+ elements: e.data.elements,
303
+ raw_elements: e.data.raw_elements,
304
+ duration: e.data.duration
305
+ });
306
+ }
307
+ }
308
+ };
309
+
310
+ window.addEventListener('message', listener);
311
+
312
+ try {
313
+ window.postMessage({
314
+ type: 'SENTIENCE_SNAPSHOT_REQUEST',
315
+ requestId,
316
+ rawData,
317
+ options
318
+ }, '*');
319
+ } catch (error) {
320
+ if (!resolved) {
321
+ resolved = true;
322
+ clearTimeout(timeout);
323
+ window.removeEventListener('message', listener);
324
+ reject(new Error(`Failed to send snapshot request: ${error.message}`));
325
+ }
326
+ }
79
327
  });
80
328
  }
81
329
 
82
- // --- HELPER: Get Raw HTML for Turndown/External Processing ---
83
- // Returns cleaned HTML that can be processed by Turndown or other Node.js libraries
330
+ // --- HELPER: Raw HTML Extractor (unchanged) ---
84
331
  function getRawHTML(root) {
85
332
  const sourceRoot = root || document.body;
86
333
  const clone = sourceRoot.cloneNode(true);
87
-
88
- // Remove unwanted elements by tag name (simple and reliable)
334
+
89
335
  const unwantedTags = ['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'];
90
336
  unwantedTags.forEach(tag => {
91
337
  const elements = clone.querySelectorAll(tag);
92
338
  elements.forEach(el => {
93
- if (el.parentNode) {
94
- el.parentNode.removeChild(el);
95
- }
339
+ if (el.parentNode) el.parentNode.removeChild(el);
96
340
  });
97
341
  });
98
342
 
99
- // Remove invisible elements from original DOM and find matching ones in clone
100
- // We'll use a simple approach: mark elements in original, then remove from clone
343
+ // Remove invisible elements
101
344
  const invisibleSelectors = [];
102
- const walker = document.createTreeWalker(
103
- sourceRoot,
104
- NodeFilter.SHOW_ELEMENT,
105
- null,
106
- false
107
- );
108
-
345
+ const walker = document.createTreeWalker(sourceRoot, NodeFilter.SHOW_ELEMENT, null, false);
109
346
  let node;
110
347
  while (node = walker.nextNode()) {
111
348
  const tag = node.tagName.toLowerCase();
112
349
  if (tag === 'head' || tag === 'title') continue;
113
-
350
+
114
351
  const style = window.getComputedStyle(node);
115
352
  if (style.display === 'none' || style.visibility === 'hidden' ||
116
353
  (node.offsetWidth === 0 && node.offsetHeight === 0)) {
117
- // Build a selector for this element
118
354
  let selector = tag;
119
355
  if (node.id) {
120
356
  selector = `#${node.id}`;
@@ -128,30 +364,25 @@
128
364
  }
129
365
  }
130
366
 
131
- // Remove invisible elements from clone (if we can find them)
132
367
  invisibleSelectors.forEach(selector => {
133
368
  try {
134
369
  const elements = clone.querySelectorAll(selector);
135
370
  elements.forEach(el => {
136
- if (el.parentNode) {
137
- el.parentNode.removeChild(el);
138
- }
371
+ if (el.parentNode) el.parentNode.removeChild(el);
139
372
  });
140
373
  } catch (e) {
141
374
  // Invalid selector, skip
142
375
  }
143
376
  });
144
377
 
145
- // Resolve relative URLs in links and images
378
+ // Resolve relative URLs
146
379
  const links = clone.querySelectorAll('a[href]');
147
380
  links.forEach(link => {
148
381
  const href = link.getAttribute('href');
149
382
  if (href && !href.startsWith('http://') && !href.startsWith('https://') && !href.startsWith('#')) {
150
383
  try {
151
384
  link.setAttribute('href', new URL(href, document.baseURI).href);
152
- } catch (e) {
153
- // Keep original href if URL parsing fails
154
- }
385
+ } catch (e) {}
155
386
  }
156
387
  });
157
388
 
@@ -161,32 +392,24 @@
161
392
  if (src && !src.startsWith('http://') && !src.startsWith('https://') && !src.startsWith('data:')) {
162
393
  try {
163
394
  img.setAttribute('src', new URL(src, document.baseURI).href);
164
- } catch (e) {
165
- // Keep original src if URL parsing fails
166
- }
395
+ } catch (e) {}
167
396
  }
168
397
  });
169
398
 
170
399
  return clone.innerHTML;
171
400
  }
172
401
 
173
- // --- HELPER: Simple Markdown Converter (Lightweight) ---
174
- // Uses getRawHTML() and then converts to markdown for consistency
402
+ // --- HELPER: Markdown Converter (unchanged) ---
175
403
  function convertToMarkdown(root) {
176
- // Get cleaned HTML first
177
404
  const rawHTML = getRawHTML(root);
178
-
179
- // Create a temporary container to parse the HTML
180
405
  const tempDiv = document.createElement('div');
181
406
  tempDiv.innerHTML = rawHTML;
182
-
407
+
183
408
  let markdown = '';
184
- let insideLink = false; // Track if we're inside an <a> tag
409
+ let insideLink = false;
185
410
 
186
411
  function walk(node) {
187
412
  if (node.nodeType === Node.TEXT_NODE) {
188
- // Keep minimal whitespace to prevent words merging
189
- // Strip newlines inside text nodes to prevent broken links
190
413
  const text = node.textContent.replace(/[\r\n]+/g, ' ').replace(/\s+/g, ' ');
191
414
  if (text.trim()) markdown += text;
192
415
  return;
@@ -201,13 +424,12 @@
201
424
  if (tag === 'h2') markdown += '\n## ';
202
425
  if (tag === 'h3') markdown += '\n### ';
203
426
  if (tag === 'li') markdown += '\n- ';
204
- // IMPORTANT: Don't add newlines for block elements when inside a link
205
427
  if (!insideLink && (tag === 'p' || tag === 'div' || tag === 'br')) markdown += '\n';
206
428
  if (tag === 'strong' || tag === 'b') markdown += '**';
207
429
  if (tag === 'em' || tag === 'i') markdown += '_';
208
430
  if (tag === 'a') {
209
431
  markdown += '[';
210
- insideLink = true; // Mark that we're entering a link
432
+ insideLink = true;
211
433
  }
212
434
 
213
435
  // Children
@@ -219,25 +441,21 @@
219
441
 
220
442
  // Suffix
221
443
  if (tag === 'a') {
222
- // Get absolute URL from href attribute (already resolved in getRawHTML)
223
444
  const href = node.getAttribute('href');
224
445
  if (href) markdown += `](${href})`;
225
446
  else markdown += ']';
226
- insideLink = false; // Mark that we're exiting the link
447
+ insideLink = false;
227
448
  }
228
449
  if (tag === 'strong' || tag === 'b') markdown += '**';
229
450
  if (tag === 'em' || tag === 'i') markdown += '_';
230
- // IMPORTANT: Don't add newlines for block elements when inside a link (suffix section too)
231
451
  if (!insideLink && (tag === 'h1' || tag === 'h2' || tag === 'h3' || tag === 'p' || tag === 'div')) markdown += '\n';
232
452
  }
233
453
 
234
454
  walk(tempDiv);
235
-
236
- // Cleanup: remove excessive newlines
237
455
  return markdown.replace(/\n{3,}/g, '\n\n').trim();
238
456
  }
239
457
 
240
- // --- HELPER: Raw Text Extractor ---
458
+ // --- HELPER: Text Extractor (unchanged) ---
241
459
  function convertToText(root) {
242
460
  let text = '';
243
461
  function walk(node) {
@@ -247,22 +465,20 @@
247
465
  }
248
466
  if (node.nodeType === Node.ELEMENT_NODE) {
249
467
  const tag = node.tagName.toLowerCase();
250
- // Skip nav/footer/header/script/style/noscript/iframe/svg
251
468
  if (['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'].includes(tag)) return;
252
469
 
253
470
  const style = window.getComputedStyle(node);
254
471
  if (style.display === 'none' || style.visibility === 'hidden') return;
255
-
256
- // Block level elements get a newline
472
+
257
473
  const isBlock = style.display === 'block' || style.display === 'flex' || node.tagName === 'P' || node.tagName === 'DIV';
258
474
  if (isBlock) text += ' ';
259
-
475
+
260
476
  if (node.shadowRoot) {
261
477
  Array.from(node.shadowRoot.childNodes).forEach(walk);
262
478
  } else {
263
479
  node.childNodes.forEach(walk);
264
480
  }
265
-
481
+
266
482
  if (isBlock) text += '\n';
267
483
  }
268
484
  }
@@ -270,155 +486,597 @@
270
486
  return text.replace(/\n{3,}/g, '\n\n').trim();
271
487
  }
272
488
 
273
- // Load WASM
274
- try {
275
- const wasmUrl = EXT_URL + 'pkg/sentience_core.js';
276
- const module = await import(wasmUrl);
277
- const imports = {
278
- env: {
279
- js_click_element: (id) => {
280
- const el = window.sentience_registry[id];
281
- if (el) { el.click(); el.focus(); }
489
+ // --- HELPER: Clean null/undefined fields ---
490
+ function cleanElement(obj) {
491
+ if (Array.isArray(obj)) {
492
+ return obj.map(cleanElement);
493
+ }
494
+ if (obj !== null && typeof obj === 'object') {
495
+ const cleaned = {};
496
+ for (const [key, value] of Object.entries(obj)) {
497
+ if (value !== null && value !== undefined) {
498
+ if (typeof value === 'object') {
499
+ const deepClean = cleanElement(value);
500
+ if (Object.keys(deepClean).length > 0) {
501
+ cleaned[key] = deepClean;
502
+ }
503
+ } else {
504
+ cleaned[key] = value;
505
+ }
282
506
  }
283
507
  }
284
- };
285
- await module.default(undefined, imports);
286
- wasmModule = module;
287
-
288
- // Verify functions are available
289
- if (!wasmModule.analyze_page) {
290
- console.error('[SentienceAPI.com] WASM functions not available');
291
- } else {
292
- console.log('[SentienceAPI.com] ✓ API Ready!');
293
- console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze')));
508
+ return cleaned;
294
509
  }
295
- } catch (e) {
296
- console.error('[SentienceAPI.com] WASM Load Failed:', e);
510
+ return obj;
297
511
  }
298
512
 
299
- // REMOVED: Headless detection - no longer needed (license system removed)
300
-
301
- // --- GLOBAL API ---
302
- window.sentience = {
303
- // 1. Geometry snapshot (existing)
304
- snapshot: async (options = {}) => {
305
- if (!wasmModule) return { error: "WASM not ready" };
513
+ // --- HELPER: Extract Raw Element Data (for Golden Set) ---
514
+ function extractRawElementData(el) {
515
+ const style = window.getComputedStyle(el);
516
+ const rect = el.getBoundingClientRect();
517
+
518
+ return {
519
+ tag: el.tagName,
520
+ rect: {
521
+ x: Math.round(rect.x),
522
+ y: Math.round(rect.y),
523
+ width: Math.round(rect.width),
524
+ height: Math.round(rect.height)
525
+ },
526
+ styles: {
527
+ cursor: style.cursor || null,
528
+ backgroundColor: style.backgroundColor || null,
529
+ color: style.color || null,
530
+ fontWeight: style.fontWeight || null,
531
+ fontSize: style.fontSize || null,
532
+ display: style.display || null,
533
+ position: style.position || null,
534
+ zIndex: style.zIndex || null,
535
+ opacity: style.opacity || null,
536
+ visibility: style.visibility || null
537
+ },
538
+ attributes: {
539
+ role: el.getAttribute('role') || null,
540
+ type: el.getAttribute('type') || null,
541
+ ariaLabel: el.getAttribute('aria-label') || null,
542
+ id: el.id || null,
543
+ className: el.className || null
544
+ }
545
+ };
546
+ }
306
547
 
307
- const rawData = [];
308
- // Remove textMap as we include text in rawData
309
- window.sentience_registry = [];
548
+ // --- HELPER: Generate Unique CSS Selector (for Golden Set) ---
549
+ function getUniqueSelector(el) {
550
+ if (!el || !el.tagName) return '';
551
+
552
+ // If element has a unique ID, use it
553
+ if (el.id) {
554
+ return `#${el.id}`;
555
+ }
556
+
557
+ // Try data attributes or aria-label for uniqueness
558
+ for (const attr of el.attributes) {
559
+ if (attr.name.startsWith('data-') || attr.name === 'aria-label') {
560
+ const value = attr.value ? attr.value.replace(/"/g, '\\"') : '';
561
+ return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`;
562
+ }
563
+ }
564
+
565
+ // Build path with classes and nth-child for uniqueness
566
+ const path = [];
567
+ let current = el;
568
+
569
+ while (current && current !== document.body && current !== document.documentElement) {
570
+ let selector = current.tagName.toLowerCase();
310
571
 
311
- const nodes = getAllElements();
572
+ // If current element has ID, use it and stop
573
+ if (current.id) {
574
+ selector = `#${current.id}`;
575
+ path.unshift(selector);
576
+ break;
577
+ }
312
578
 
313
- nodes.forEach((el, idx) => {
314
- if (!el.getBoundingClientRect) return;
315
- const rect = el.getBoundingClientRect();
316
- if (rect.width < 5 || rect.height < 5) return;
579
+ // Add class if available
580
+ if (current.className && typeof current.className === 'string') {
581
+ const classes = current.className.trim().split(/\s+/).filter(c => c);
582
+ if (classes.length > 0) {
583
+ // Use first class for simplicity
584
+ selector += `.${classes[0]}`;
585
+ }
586
+ }
587
+
588
+ // Add nth-of-type if needed for uniqueness
589
+ if (current.parentElement) {
590
+ const siblings = Array.from(current.parentElement.children);
591
+ const sameTagSiblings = siblings.filter(s => s.tagName === current.tagName);
592
+ const index = sameTagSiblings.indexOf(current);
593
+ if (index > 0 || sameTagSiblings.length > 1) {
594
+ selector += `:nth-of-type(${index + 1})`;
595
+ }
596
+ }
597
+
598
+ path.unshift(selector);
599
+ current = current.parentElement;
600
+ }
601
+
602
+ return path.join(' > ') || el.tagName.toLowerCase();
603
+ }
604
+
605
+ // --- HELPER: Wait for DOM Stability (SPA Hydration) ---
606
+ // Waits for the DOM to stabilize before taking a snapshot
607
+ // Useful for React/Vue apps that render empty skeletons before hydration
608
+ async function waitForStability(options = {}) {
609
+ const {
610
+ minNodeCount = 500,
611
+ quietPeriod = 200, // milliseconds
612
+ maxWait = 5000 // maximum wait time
613
+ } = options;
317
614
 
318
- window.sentience_registry[idx] = el;
615
+ const startTime = Date.now();
616
+
617
+ return new Promise((resolve) => {
618
+ // Check if DOM already has enough nodes
619
+ const nodeCount = document.querySelectorAll('*').length;
620
+ if (nodeCount >= minNodeCount) {
621
+ // DOM seems ready, but wait for quiet period to ensure stability
622
+ let lastChange = Date.now();
623
+ const observer = new MutationObserver(() => {
624
+ lastChange = Date.now();
625
+ });
319
626
 
320
- // Calculate properties for Fat Payload
321
- const textVal = getText(el);
322
- const inView = isInViewport(rect);
323
- // Only check occlusion if visible (Optimization)
324
- const occluded = inView ? isOccluded(el, rect) : false;
325
-
326
- const style = window.getComputedStyle(el);
327
- rawData.push({
328
- id: idx,
329
- tag: el.tagName.toLowerCase(),
330
- rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
331
- styles: {
332
- display: style.display,
333
- visibility: style.visibility,
334
- opacity: style.opacity,
335
- z_index: style.zIndex || "0",
336
- bg_color: style.backgroundColor,
337
- color: style.color,
338
- cursor: style.cursor,
339
- font_weight: style.fontWeight,
340
- font_size: style.fontSize
341
- },
342
- attributes: {
343
- role: el.getAttribute('role'),
344
- type_: el.getAttribute('type'),
345
- aria_label: el.getAttribute('aria-label'),
346
- href: el.href,
347
- class: el.className
348
- },
349
- // Pass to WASM
350
- text: textVal || null,
351
- in_viewport: inView,
352
- is_occluded: occluded
627
+ observer.observe(document.body, {
628
+ childList: true,
629
+ subtree: true,
630
+ attributes: false
631
+ });
632
+
633
+ const checkStable = () => {
634
+ const timeSinceLastChange = Date.now() - lastChange;
635
+ const totalWait = Date.now() - startTime;
636
+
637
+ if (timeSinceLastChange >= quietPeriod) {
638
+ observer.disconnect();
639
+ resolve();
640
+ } else if (totalWait >= maxWait) {
641
+ observer.disconnect();
642
+ console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway');
643
+ resolve();
644
+ } else {
645
+ setTimeout(checkStable, 50);
646
+ }
647
+ };
648
+
649
+ checkStable();
650
+ } else {
651
+ // DOM doesn't have enough nodes yet, wait for them
652
+ const observer = new MutationObserver(() => {
653
+ const currentCount = document.querySelectorAll('*').length;
654
+ const totalWait = Date.now() - startTime;
655
+
656
+ if (currentCount >= minNodeCount) {
657
+ observer.disconnect();
658
+ // Now wait for quiet period
659
+ let lastChange = Date.now();
660
+ const quietObserver = new MutationObserver(() => {
661
+ lastChange = Date.now();
662
+ });
663
+
664
+ quietObserver.observe(document.body, {
665
+ childList: true,
666
+ subtree: true,
667
+ attributes: false
668
+ });
669
+
670
+ const checkQuiet = () => {
671
+ const timeSinceLastChange = Date.now() - lastChange;
672
+ const totalWait = Date.now() - startTime;
673
+
674
+ if (timeSinceLastChange >= quietPeriod) {
675
+ quietObserver.disconnect();
676
+ resolve();
677
+ } else if (totalWait >= maxWait) {
678
+ quietObserver.disconnect();
679
+ console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway');
680
+ resolve();
681
+ } else {
682
+ setTimeout(checkQuiet, 50);
683
+ }
684
+ };
685
+
686
+ checkQuiet();
687
+ } else if (totalWait >= maxWait) {
688
+ observer.disconnect();
689
+ console.warn('[SentienceAPI] DOM node count timeout - proceeding anyway');
690
+ resolve();
691
+ }
353
692
  });
693
+
694
+ observer.observe(document.body, {
695
+ childList: true,
696
+ subtree: true,
697
+ attributes: false
698
+ });
699
+
700
+ // Timeout fallback
701
+ setTimeout(() => {
702
+ observer.disconnect();
703
+ console.warn('[SentienceAPI] DOM stability max wait reached - proceeding');
704
+ resolve();
705
+ }, maxWait);
706
+ }
707
+ });
708
+ }
709
+
710
+ // --- HELPER: Collect Iframe Snapshots (Frame Stitching) ---
711
+ // Recursively collects snapshot data from all child iframes
712
+ // This enables detection of elements inside iframes (e.g., Stripe forms)
713
+ //
714
+ // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy).
715
+ // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped
716
+ // with a warning. For cross-origin iframes, users must manually switch frames using
717
+ // Playwright's page.frame() API.
718
+ async function collectIframeSnapshots(options = {}) {
719
+ const iframeData = new Map(); // Map of iframe element -> snapshot data
720
+
721
+ // Find all iframe elements in current document
722
+ const iframes = Array.from(document.querySelectorAll('iframe'));
723
+
724
+ if (iframes.length === 0) {
725
+ return iframeData;
726
+ }
727
+
728
+ console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`);
729
+ // Request snapshot from each iframe
730
+ const iframePromises = iframes.map((iframe, idx) => {
731
+ // OPTIMIZATION: Skip common ad domains to save time
732
+ const src = iframe.src || '';
733
+ if (src.includes('doubleclick') || src.includes('googleadservices') || src.includes('ads system')) {
734
+ console.log(`[SentienceAPI] Skipping ad iframe: ${src.substring(0, 30)}...`);
735
+ return Promise.resolve(null);
736
+ }
737
+
738
+ return new Promise((resolve) => {
739
+ const requestId = `iframe-${idx}-${Date.now()}`;
740
+
741
+ // 1. EXTENDED TIMEOUT (Handle slow children)
742
+ const timeout = setTimeout(() => {
743
+ console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`);
744
+ resolve(null);
745
+ }, 5000); // Increased to 5s to handle slow processing
746
+
747
+ // 2. ROBUST LISTENER with debugging
748
+ const listener = (event) => {
749
+ // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening
750
+ if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE') {
751
+ // Only log if it's not our request (for debugging)
752
+ if (event.data?.requestId !== requestId) {
753
+ // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`);
754
+ }
755
+ }
756
+
757
+ // Check if this is the response we're waiting for
758
+ if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' &&
759
+ event.data?.requestId === requestId) {
760
+
761
+ clearTimeout(timeout);
762
+ window.removeEventListener('message', listener);
763
+
764
+ if (event.data.error) {
765
+ console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error);
766
+ resolve(null);
767
+ } else {
768
+ const elementCount = event.data.snapshot?.raw_elements?.length || 0;
769
+ console.log(`[SentienceAPI] ✓ Received ${elementCount} elements from Iframe ${idx} (id: ${requestId})`);
770
+ resolve({
771
+ iframe: iframe,
772
+ data: event.data.snapshot,
773
+ error: null
774
+ });
775
+ }
776
+ }
777
+ };
778
+
779
+ window.addEventListener('message', listener);
780
+
781
+ // 3. SEND REQUEST with error handling
782
+ try {
783
+ if (iframe.contentWindow) {
784
+ // console.log(`[SentienceAPI] Sending request to Iframe ${idx} (id: ${requestId})`);
785
+ iframe.contentWindow.postMessage({
786
+ type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST',
787
+ requestId: requestId,
788
+ options: {
789
+ ...options,
790
+ collectIframes: true // Enable recursion for nested iframes
791
+ }
792
+ }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy
793
+ } else {
794
+ console.warn(`[SentienceAPI] Iframe ${idx} contentWindow is inaccessible (Cross-Origin?)`);
795
+ clearTimeout(timeout);
796
+ window.removeEventListener('message', listener);
797
+ resolve(null);
798
+ }
799
+ } catch (error) {
800
+ console.error(`[SentienceAPI] Failed to postMessage to Iframe ${idx}:`, error);
801
+ clearTimeout(timeout);
802
+ window.removeEventListener('message', listener);
803
+ resolve(null);
804
+ }
354
805
  });
806
+ });
807
+
808
+ // Wait for all iframe responses
809
+ const results = await Promise.all(iframePromises);
810
+
811
+ // Store iframe data
812
+ results.forEach((result, idx) => {
813
+ if (result && result.data && !result.error) {
814
+ iframeData.set(iframes[idx], result.data);
815
+ console.log(`[SentienceAPI] ✓ Collected snapshot from iframe ${idx}`);
816
+ } else if (result && result.error) {
817
+ console.warn(`[SentienceAPI] Iframe ${idx} snapshot error:`, result.error);
818
+ } else if (!result) {
819
+ console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`);
820
+ }
821
+ });
822
+
823
+ return iframeData;
824
+ }
355
825
 
356
- // FREE TIER: No license checks - extension provides basic geometry data
357
- // Pro/Enterprise tiers will be handled server-side (future work)
358
-
359
- // 1. Get Geometry from WASM
360
- let result;
361
- try {
362
- if (options.limit || options.filter) {
363
- result = wasmModule.analyze_page_with_options(rawData, options);
364
- } else {
365
- result = wasmModule.analyze_page(rawData);
826
+ // --- HELPER: Handle Iframe Snapshot Request (for child frames) ---
827
+ // When a parent frame requests snapshot, this handler responds with local snapshot
828
+ // NOTE: Recursion is safe because querySelectorAll('iframe') only finds direct children.
829
+ // Iframe A can ask Iframe B, but won't go back up to parent (no circular dependency risk).
830
+ function setupIframeSnapshotHandler() {
831
+ window.addEventListener('message', async (event) => {
832
+ // Security: only respond to snapshot requests from parent frames
833
+ if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') {
834
+ const { requestId, options } = event.data;
835
+
836
+ try {
837
+ // Generate snapshot for this iframe's content
838
+ // Allow recursive collection - querySelectorAll('iframe') only finds direct children,
839
+ // so Iframe A will ask Iframe B, but won't go back up to parent (safe recursion)
840
+ // waitForStability: false makes performance better - i.e. don't wait for children frames
841
+ const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false };
842
+ const snapshot = await window.sentience.snapshot(snapshotOptions);
843
+
844
+ // Send response back to parent
845
+ if (event.source && event.source.postMessage) {
846
+ event.source.postMessage({
847
+ type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE',
848
+ requestId: requestId,
849
+ snapshot: snapshot,
850
+ error: null
851
+ }, '*');
852
+ }
853
+ } catch (error) {
854
+ // Send error response
855
+ if (event.source && event.source.postMessage) {
856
+ event.source.postMessage({
857
+ type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE',
858
+ requestId: requestId,
859
+ snapshot: null,
860
+ error: error.message
861
+ }, '*');
862
+ }
366
863
  }
367
- } catch (e) {
368
- return { status: "error", error: e.message };
369
864
  }
865
+ });
866
+ }
867
+
868
+ // Setup iframe handler when script loads (only once)
869
+ if (!window.sentience_iframe_handler_setup) {
870
+ setupIframeSnapshotHandler();
871
+ window.sentience_iframe_handler_setup = true;
872
+ }
370
873
 
371
- // Hydration step removed as WASM now returns populated structs
874
+ // --- GLOBAL API ---
875
+ window.sentience = {
876
+ // 1. Geometry snapshot (NEW ARCHITECTURE - No WASM in Main World!)
877
+ snapshot: async (options = {}) => {
878
+ try {
879
+ // Step 0: Wait for DOM stability if requested (for SPA hydration)
880
+ if (options.waitForStability !== false) {
881
+ await waitForStability(options.waitForStability || {});
882
+ }
883
+
884
+ // Step 1: Collect raw DOM data (Main World - CSP can't block this!)
885
+ const rawData = [];
886
+ window.sentience_registry = [];
372
887
 
373
- // Capture Screenshot
374
- let screenshot = null;
375
- if (options.screenshot) {
376
- screenshot = await captureScreenshot(options.screenshot);
377
- }
888
+ const nodes = getAllElements();
889
+
890
+ nodes.forEach((el, idx) => {
891
+ if (!el.getBoundingClientRect) return;
892
+ const rect = el.getBoundingClientRect();
893
+ if (rect.width < 5 || rect.height < 5) return;
894
+
895
+ window.sentience_registry[idx] = el;
896
+
897
+ const textVal = getText(el);
898
+ const inView = isInViewport(rect);
899
+
900
+ // Get computed style once (needed for both occlusion check and data collection)
901
+ const style = window.getComputedStyle(el);
902
+
903
+ // Only check occlusion for elements likely to be occluded (optimized)
904
+ // This avoids layout thrashing for the vast majority of elements
905
+ const occluded = inView ? isOccluded(el, rect, style) : false;
906
+
907
+ // Get effective background color (traverses DOM to find non-transparent color)
908
+ const effectiveBgColor = getEffectiveBackgroundColor(el);
909
+
910
+ rawData.push({
911
+ id: idx,
912
+ tag: el.tagName.toLowerCase(),
913
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
914
+ styles: {
915
+ display: toSafeString(style.display),
916
+ visibility: toSafeString(style.visibility),
917
+ opacity: toSafeString(style.opacity),
918
+ z_index: toSafeString(style.zIndex || "auto"),
919
+ position: toSafeString(style.position),
920
+ bg_color: toSafeString(effectiveBgColor || style.backgroundColor),
921
+ color: toSafeString(style.color),
922
+ cursor: toSafeString(style.cursor),
923
+ font_weight: toSafeString(style.fontWeight),
924
+ font_size: toSafeString(style.fontSize)
925
+ },
926
+ attributes: {
927
+ role: toSafeString(el.getAttribute('role')),
928
+ type_: toSafeString(el.getAttribute('type')),
929
+ aria_label: toSafeString(el.getAttribute('aria-label')),
930
+ href: toSafeString(el.href || el.getAttribute('href') || null),
931
+ class: toSafeString(getClassName(el)),
932
+ // Capture dynamic input state (not just initial attributes)
933
+ value: el.value !== undefined ? toSafeString(el.value) : toSafeString(el.getAttribute('value')),
934
+ checked: el.checked !== undefined ? String(el.checked) : null
935
+ },
936
+ text: toSafeString(textVal),
937
+ in_viewport: inView,
938
+ is_occluded: occluded
939
+ });
940
+ });
941
+
942
+ console.log(`[SentienceAPI] Collected ${rawData.length} elements from main frame`);
378
943
 
379
- // C. Clean up null/undefined fields to save tokens (Your existing cleaner)
380
- const cleanElement = (obj) => {
381
- if (Array.isArray(obj)) {
382
- return obj.map(cleanElement);
383
- } else if (obj !== null && typeof obj === 'object') {
384
- const cleaned = {};
385
- for (const [key, value] of Object.entries(obj)) {
386
- // Keep boolean false for critical flags if desired, or remove to match Rust defaults
387
- if (value !== null && value !== undefined) {
388
- cleaned[key] = cleanElement(value);
944
+ // Step 1.5: Collect iframe snapshots and FLATTEN immediately
945
+ // "Flatten Early" architecture: Merge iframe elements into main array before WASM
946
+ // This allows WASM to process all elements uniformly (no recursion needed)
947
+ let allRawElements = [...rawData]; // Start with main frame elements
948
+ let totalIframeElements = 0;
949
+
950
+ if (options.collectIframes !== false) {
951
+ try {
952
+ console.log(`[SentienceAPI] Starting iframe collection...`);
953
+ const iframeSnapshots = await collectIframeSnapshots(options);
954
+ console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`);
955
+
956
+ if (iframeSnapshots.size > 0) {
957
+ // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation.
958
+ iframeSnapshots.forEach((iframeSnapshot, iframeEl) => {
959
+ // Debug: Log structure to verify data is correct
960
+ // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot);
961
+
962
+ if (iframeSnapshot && iframeSnapshot.raw_elements) {
963
+ const rawElementsCount = iframeSnapshot.raw_elements.length;
964
+ console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`);
965
+ // Get iframe's bounding rect (offset for coordinate translation)
966
+ const iframeRect = iframeEl.getBoundingClientRect();
967
+ const offset = { x: iframeRect.x, y: iframeRect.y };
968
+
969
+ // Get iframe context for frame switching (Playwright needs this)
970
+ const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || '';
971
+ let isSameOrigin = false;
972
+ try {
973
+ // Try to access contentWindow to check if same-origin
974
+ isSameOrigin = iframeEl.contentWindow !== null;
975
+ } catch (e) {
976
+ isSameOrigin = false;
977
+ }
978
+
979
+ // Adjust coordinates and add iframe context to each element
980
+ const adjustedElements = iframeSnapshot.raw_elements.map(el => {
981
+ const adjusted = { ...el };
982
+
983
+ // Adjust rect coordinates to parent viewport
984
+ if (adjusted.rect) {
985
+ adjusted.rect = {
986
+ ...adjusted.rect,
987
+ x: adjusted.rect.x + offset.x,
988
+ y: adjusted.rect.y + offset.y
989
+ };
990
+ }
991
+
992
+ // Add iframe context so agents can switch frames in Playwright
993
+ adjusted.iframe_context = {
994
+ src: iframeSrc,
995
+ is_same_origin: isSameOrigin
996
+ };
997
+
998
+ return adjusted;
999
+ });
1000
+
1001
+ // Append flattened iframe elements to main array
1002
+ allRawElements.push(...adjustedElements);
1003
+ totalIframeElements += adjustedElements.length;
1004
+ }
1005
+ });
1006
+
1007
+ // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`);
389
1008
  }
1009
+ } catch (error) {
1010
+ console.warn('[SentienceAPI] Iframe collection failed:', error);
390
1011
  }
391
- return cleaned;
392
1012
  }
393
- return obj;
394
- };
395
1013
 
396
- const cleanedElements = cleanElement(result);
1014
+ // Step 2: Send EVERYTHING to WASM (One giant flat list)
1015
+ // Now WASM prunes iframe elements and main elements in one pass!
1016
+ // No recursion needed - everything is already flat
1017
+ console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`);
1018
+ const processed = await processSnapshotInBackground(allRawElements, options);
1019
+
1020
+ if (!processed || !processed.elements) {
1021
+ throw new Error('WASM processing returned invalid result');
1022
+ }
1023
+
1024
+ // Step 3: Capture screenshot if requested
1025
+ let screenshot = null;
1026
+ if (options.screenshot) {
1027
+ screenshot = await captureScreenshot(options.screenshot);
1028
+ }
397
1029
 
398
- return {
399
- status: "success",
400
- url: window.location.href,
401
- elements: cleanedElements,
402
- raw_elements: rawData, // Include raw data for server-side processing (safe to expose - no proprietary value)
403
- screenshot: screenshot
404
- };
1030
+ // Step 4: Clean and return
1031
+ const cleanedElements = cleanElement(processed.elements);
1032
+ const cleanedRawElements = cleanElement(processed.raw_elements);
1033
+
1034
+ // FIXED: Removed undefined 'totalIframeRawElements'
1035
+ // FIXED: Logic updated for "Flatten Early" architecture.
1036
+ // processed.elements ALREADY contains the merged iframe elements,
1037
+ // so we simply use .length. No addition needed.
1038
+
1039
+ const totalCount = cleanedElements.length;
1040
+ const totalRaw = cleanedRawElements.length;
1041
+ const iframeCount = totalIframeElements || 0;
1042
+
1043
+ console.log(`[SentienceAPI] ✓ Complete: ${totalCount} Smart Elements, ${totalRaw} Raw Elements (includes ${iframeCount} from iframes) (WASM took ${processed.duration?.toFixed(1)}ms)`);
1044
+
1045
+ return {
1046
+ status: "success",
1047
+ url: window.location.href,
1048
+ viewport: {
1049
+ width: window.innerWidth,
1050
+ height: window.innerHeight
1051
+ },
1052
+ elements: cleanedElements,
1053
+ raw_elements: cleanedRawElements,
1054
+ screenshot: screenshot
1055
+ };
1056
+ } catch (error) {
1057
+ console.error('[SentienceAPI] snapshot() failed:', error);
1058
+ console.error('[SentienceAPI] Error stack:', error.stack);
1059
+ return {
1060
+ status: "error",
1061
+ error: error.message || 'Unknown error',
1062
+ stack: error.stack
1063
+ };
1064
+ }
405
1065
  },
406
- // 2. Read Content (New)
1066
+
1067
+ // 2. Read Content (unchanged)
407
1068
  read: (options = {}) => {
408
- const format = options.format || 'raw'; // 'raw', 'text', or 'markdown'
1069
+ const format = options.format || 'raw';
409
1070
  let content;
410
-
1071
+
411
1072
  if (format === 'raw') {
412
- // Return raw HTML suitable for Turndown or other Node.js libraries
413
1073
  content = getRawHTML(document.body);
414
1074
  } else if (format === 'markdown') {
415
- // Return lightweight markdown conversion
416
1075
  content = convertToMarkdown(document.body);
417
1076
  } else {
418
- // Default to text
419
1077
  content = convertToText(document.body);
420
1078
  }
421
-
1079
+
422
1080
  return {
423
1081
  status: "success",
424
1082
  url: window.location.href,
@@ -428,11 +1086,388 @@
428
1086
  };
429
1087
  },
430
1088
 
431
- // 3. Action
1089
+ // 2b. Find Text Rectangle - Get exact pixel coordinates of specific text
1090
+ findTextRect: (options = {}) => {
1091
+ const {
1092
+ text,
1093
+ containerElement = document.body,
1094
+ caseSensitive = false,
1095
+ wholeWord = false,
1096
+ maxResults = 10
1097
+ } = options;
1098
+
1099
+ if (!text || text.trim().length === 0) {
1100
+ return {
1101
+ status: "error",
1102
+ error: "Text parameter is required"
1103
+ };
1104
+ }
1105
+
1106
+ const results = [];
1107
+ const searchText = caseSensitive ? text : text.toLowerCase();
1108
+
1109
+ // Helper function to find text in a single text node
1110
+ function findInTextNode(textNode) {
1111
+ const nodeText = textNode.nodeValue;
1112
+ const searchableText = caseSensitive ? nodeText : nodeText.toLowerCase();
1113
+
1114
+ let startIndex = 0;
1115
+ while (startIndex < nodeText.length && results.length < maxResults) {
1116
+ const foundIndex = searchableText.indexOf(searchText, startIndex);
1117
+
1118
+ if (foundIndex === -1) break;
1119
+
1120
+ // Check whole word matching if required
1121
+ if (wholeWord) {
1122
+ const before = foundIndex > 0 ? nodeText[foundIndex - 1] : ' ';
1123
+ const after = foundIndex + text.length < nodeText.length
1124
+ ? nodeText[foundIndex + text.length]
1125
+ : ' ';
1126
+
1127
+ // Check if surrounded by word boundaries
1128
+ if (!/\s/.test(before) || !/\s/.test(after)) {
1129
+ startIndex = foundIndex + 1;
1130
+ continue;
1131
+ }
1132
+ }
1133
+
1134
+ try {
1135
+ // Create range for this occurrence
1136
+ const range = document.createRange();
1137
+ range.setStart(textNode, foundIndex);
1138
+ range.setEnd(textNode, foundIndex + text.length);
1139
+
1140
+ const rect = range.getBoundingClientRect();
1141
+
1142
+ // Only include visible rectangles
1143
+ if (rect.width > 0 && rect.height > 0) {
1144
+ results.push({
1145
+ text: nodeText.substring(foundIndex, foundIndex + text.length),
1146
+ rect: {
1147
+ x: rect.left + window.scrollX,
1148
+ y: rect.top + window.scrollY,
1149
+ width: rect.width,
1150
+ height: rect.height,
1151
+ left: rect.left + window.scrollX,
1152
+ top: rect.top + window.scrollY,
1153
+ right: rect.right + window.scrollX,
1154
+ bottom: rect.bottom + window.scrollY
1155
+ },
1156
+ viewport_rect: {
1157
+ x: rect.left,
1158
+ y: rect.top,
1159
+ width: rect.width,
1160
+ height: rect.height
1161
+ },
1162
+ context: {
1163
+ before: nodeText.substring(Math.max(0, foundIndex - 20), foundIndex),
1164
+ after: nodeText.substring(foundIndex + text.length, Math.min(nodeText.length, foundIndex + text.length + 20))
1165
+ },
1166
+ in_viewport: (
1167
+ rect.top >= 0 &&
1168
+ rect.left >= 0 &&
1169
+ rect.bottom <= window.innerHeight &&
1170
+ rect.right <= window.innerWidth
1171
+ )
1172
+ });
1173
+ }
1174
+ } catch (e) {
1175
+ console.warn('[SentienceAPI] Failed to get rect for text:', e);
1176
+ }
1177
+
1178
+ startIndex = foundIndex + 1;
1179
+ }
1180
+ }
1181
+
1182
+ // Tree walker to find all text nodes
1183
+ const walker = document.createTreeWalker(
1184
+ containerElement,
1185
+ NodeFilter.SHOW_TEXT,
1186
+ {
1187
+ acceptNode: function(node) {
1188
+ // Skip script, style, and empty text nodes
1189
+ const parent = node.parentElement;
1190
+ if (!parent) return NodeFilter.FILTER_REJECT;
1191
+
1192
+ const tagName = parent.tagName.toLowerCase();
1193
+ if (tagName === 'script' || tagName === 'style' || tagName === 'noscript') {
1194
+ return NodeFilter.FILTER_REJECT;
1195
+ }
1196
+
1197
+ // Skip whitespace-only nodes
1198
+ if (!node.nodeValue || node.nodeValue.trim().length === 0) {
1199
+ return NodeFilter.FILTER_REJECT;
1200
+ }
1201
+
1202
+ // Check if element is visible
1203
+ const computedStyle = window.getComputedStyle(parent);
1204
+ if (computedStyle.display === 'none' ||
1205
+ computedStyle.visibility === 'hidden' ||
1206
+ computedStyle.opacity === '0') {
1207
+ return NodeFilter.FILTER_REJECT;
1208
+ }
1209
+
1210
+ return NodeFilter.FILTER_ACCEPT;
1211
+ }
1212
+ }
1213
+ );
1214
+
1215
+ // Walk through all text nodes
1216
+ let currentNode;
1217
+ while ((currentNode = walker.nextNode()) && results.length < maxResults) {
1218
+ findInTextNode(currentNode);
1219
+ }
1220
+
1221
+ return {
1222
+ status: "success",
1223
+ query: text,
1224
+ case_sensitive: caseSensitive,
1225
+ whole_word: wholeWord,
1226
+ matches: results.length,
1227
+ results: results,
1228
+ viewport: {
1229
+ width: window.innerWidth,
1230
+ height: window.innerHeight,
1231
+ scroll_x: window.scrollX,
1232
+ scroll_y: window.scrollY
1233
+ }
1234
+ };
1235
+ },
1236
+
1237
+ // 3. Click Action (unchanged)
432
1238
  click: (id) => {
433
1239
  const el = window.sentience_registry[id];
434
- if (el) { el.click(); el.focus(); return true; }
1240
+ if (el) {
1241
+ el.click();
1242
+ el.focus();
1243
+ return true;
1244
+ }
435
1245
  return false;
1246
+ },
1247
+
1248
+ // 4. Inspector Mode: Start Recording for Golden Set Collection
1249
+ startRecording: (options = {}) => {
1250
+ const {
1251
+ highlightColor = '#ff0000',
1252
+ successColor = '#00ff00',
1253
+ autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default
1254
+ keyboardShortcut = 'Ctrl+Shift+I'
1255
+ } = options;
1256
+
1257
+ console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON.");
1258
+ console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`);
1259
+
1260
+ // Validate registry is populated
1261
+ if (!window.sentience_registry || window.sentience_registry.length === 0) {
1262
+ console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry.");
1263
+ alert("Registry empty. Run `await window.sentience.snapshot()` first!");
1264
+ return () => {}; // Return no-op cleanup function
1265
+ }
1266
+
1267
+ // Create reverse mapping for O(1) lookup (fixes registry lookup bug)
1268
+ window.sentience_registry_map = new Map();
1269
+ window.sentience_registry.forEach((el, idx) => {
1270
+ if (el) window.sentience_registry_map.set(el, idx);
1271
+ });
1272
+
1273
+ // Create highlight box overlay
1274
+ let highlightBox = document.getElementById('sentience-highlight-box');
1275
+ if (!highlightBox) {
1276
+ highlightBox = document.createElement('div');
1277
+ highlightBox.id = 'sentience-highlight-box';
1278
+ highlightBox.style.cssText = `
1279
+ position: fixed;
1280
+ pointer-events: none;
1281
+ z-index: 2147483647;
1282
+ border: 2px solid ${highlightColor};
1283
+ background: rgba(255, 0, 0, 0.1);
1284
+ display: none;
1285
+ transition: all 0.1s ease;
1286
+ box-sizing: border-box;
1287
+ `;
1288
+ document.body.appendChild(highlightBox);
1289
+ }
1290
+
1291
+ // Create visual indicator (red border on page when recording)
1292
+ let recordingIndicator = document.getElementById('sentience-recording-indicator');
1293
+ if (!recordingIndicator) {
1294
+ recordingIndicator = document.createElement('div');
1295
+ recordingIndicator.id = 'sentience-recording-indicator';
1296
+ recordingIndicator.style.cssText = `
1297
+ position: fixed;
1298
+ top: 0;
1299
+ left: 0;
1300
+ right: 0;
1301
+ height: 3px;
1302
+ background: ${highlightColor};
1303
+ z-index: 2147483646;
1304
+ pointer-events: none;
1305
+ `;
1306
+ document.body.appendChild(recordingIndicator);
1307
+ }
1308
+ recordingIndicator.style.display = 'block';
1309
+
1310
+ // Hover handler (visual feedback)
1311
+ const mouseOverHandler = (e) => {
1312
+ const el = e.target;
1313
+ if (!el || el === highlightBox || el === recordingIndicator) return;
1314
+
1315
+ const rect = el.getBoundingClientRect();
1316
+ highlightBox.style.display = 'block';
1317
+ highlightBox.style.top = (rect.top + window.scrollY) + 'px';
1318
+ highlightBox.style.left = (rect.left + window.scrollX) + 'px';
1319
+ highlightBox.style.width = rect.width + 'px';
1320
+ highlightBox.style.height = rect.height + 'px';
1321
+ };
1322
+
1323
+ // Click handler (capture ground truth data)
1324
+ const clickHandler = (e) => {
1325
+ e.preventDefault();
1326
+ e.stopPropagation();
1327
+
1328
+ const el = e.target;
1329
+ if (!el || el === highlightBox || el === recordingIndicator) return;
1330
+
1331
+ // Use Map for reliable O(1) lookup
1332
+ const sentienceId = window.sentience_registry_map.get(el);
1333
+ if (sentienceId === undefined) {
1334
+ console.warn("⚠️ Element not found in Sentience Registry. Did you run snapshot() first?");
1335
+ alert("Element not in registry. Run `await window.sentience.snapshot()` first!");
1336
+ return;
1337
+ }
1338
+
1339
+ // Extract raw data (ground truth + raw signals, NOT model outputs)
1340
+ const rawData = extractRawElementData(el);
1341
+ const selector = getUniqueSelector(el);
1342
+ const role = el.getAttribute('role') || el.tagName.toLowerCase();
1343
+ const text = getText(el);
1344
+
1345
+ // Build golden set JSON (ground truth + raw signals only)
1346
+ const snippet = {
1347
+ task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`,
1348
+ url: window.location.href,
1349
+ timestamp: new Date().toISOString(),
1350
+ target_criteria: {
1351
+ id: sentienceId,
1352
+ selector: selector,
1353
+ role: role,
1354
+ text: text.substring(0, 50)
1355
+ },
1356
+ debug_snapshot: rawData
1357
+ };
1358
+
1359
+ // Copy to clipboard
1360
+ const jsonString = JSON.stringify(snippet, null, 2);
1361
+ navigator.clipboard.writeText(jsonString).then(() => {
1362
+ console.log("✅ Copied Ground Truth to clipboard:", snippet);
1363
+
1364
+ // Flash green to indicate success
1365
+ highlightBox.style.border = `2px solid ${successColor}`;
1366
+ highlightBox.style.background = 'rgba(0, 255, 0, 0.2)';
1367
+ setTimeout(() => {
1368
+ highlightBox.style.border = `2px solid ${highlightColor}`;
1369
+ highlightBox.style.background = 'rgba(255, 0, 0, 0.1)';
1370
+ }, 500);
1371
+ }).catch(err => {
1372
+ console.error("❌ Failed to copy to clipboard:", err);
1373
+ alert("Failed to copy to clipboard. Check console for JSON.");
1374
+ });
1375
+ };
1376
+
1377
+ // Auto-disable timeout
1378
+ let timeoutId = null;
1379
+
1380
+ // Cleanup function to stop recording (defined before use)
1381
+ const stopRecording = () => {
1382
+ document.removeEventListener('mouseover', mouseOverHandler, true);
1383
+ document.removeEventListener('click', clickHandler, true);
1384
+ document.removeEventListener('keydown', keyboardHandler, true);
1385
+
1386
+ if (timeoutId) {
1387
+ clearTimeout(timeoutId);
1388
+ timeoutId = null;
1389
+ }
1390
+
1391
+ if (highlightBox) {
1392
+ highlightBox.style.display = 'none';
1393
+ }
1394
+
1395
+ if (recordingIndicator) {
1396
+ recordingIndicator.style.display = 'none';
1397
+ }
1398
+
1399
+ // Clean up registry map (optional, but good practice)
1400
+ if (window.sentience_registry_map) {
1401
+ window.sentience_registry_map.clear();
1402
+ }
1403
+
1404
+ // Remove global reference
1405
+ if (window.sentience_stopRecording === stopRecording) {
1406
+ delete window.sentience_stopRecording;
1407
+ }
1408
+
1409
+ console.log("⚪ [Sentience] Recording Mode STOPPED.");
1410
+ };
1411
+
1412
+ // Keyboard shortcut handler (defined after stopRecording)
1413
+ const keyboardHandler = (e) => {
1414
+ // Ctrl+Shift+I or Cmd+Shift+I
1415
+ if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'I') {
1416
+ e.preventDefault();
1417
+ stopRecording();
1418
+ }
1419
+ };
1420
+
1421
+ // Attach event listeners (use capture phase to intercept early)
1422
+ document.addEventListener('mouseover', mouseOverHandler, true);
1423
+ document.addEventListener('click', clickHandler, true);
1424
+ document.addEventListener('keydown', keyboardHandler, true);
1425
+
1426
+ // Set up auto-disable timeout
1427
+ if (autoDisableTimeout > 0) {
1428
+ timeoutId = setTimeout(() => {
1429
+ console.log("⏰ [Sentience] Recording Mode auto-disabled after timeout.");
1430
+ stopRecording();
1431
+ }, autoDisableTimeout);
1432
+ }
1433
+
1434
+ // Store stop function globally for keyboard shortcut access
1435
+ window.sentience_stopRecording = stopRecording;
1436
+
1437
+ return stopRecording;
1438
+ }
1439
+ };
1440
+
1441
+ /**
1442
+ * Show overlay highlighting specific elements with Shadow DOM
1443
+ * @param {Array} elements - List of elements with bbox, importance, visual_cues
1444
+ * @param {number} targetElementId - Optional ID of target element (shown in red)
1445
+ */
1446
+ window.sentience.showOverlay = function(elements, targetElementId = null) {
1447
+ if (!elements || !Array.isArray(elements)) {
1448
+ console.warn('[Sentience] showOverlay: elements must be an array');
1449
+ return;
436
1450
  }
1451
+
1452
+ window.postMessage({
1453
+ type: 'SENTIENCE_SHOW_OVERLAY',
1454
+ elements: elements,
1455
+ targetElementId: targetElementId,
1456
+ timestamp: Date.now()
1457
+ }, '*');
1458
+
1459
+ console.log(`[Sentience] Overlay requested for ${elements.length} elements`);
437
1460
  };
438
- })();
1461
+
1462
+ /**
1463
+ * Clear overlay manually
1464
+ */
1465
+ window.sentience.clearOverlay = function() {
1466
+ window.postMessage({
1467
+ type: 'SENTIENCE_CLEAR_OVERLAY'
1468
+ }, '*');
1469
+ console.log('[Sentience] Overlay cleared');
1470
+ };
1471
+
1472
+ console.log('[SentienceAPI] ✓ Ready! (CSP-Resistant - WASM runs in background)');
1473
+ })();