sentienceapi 0.90.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sentienceapi might be problematic. Click here for more details.

Files changed (46) hide show
  1. sentience/__init__.py +153 -0
  2. sentience/actions.py +439 -0
  3. sentience/agent.py +687 -0
  4. sentience/agent_config.py +43 -0
  5. sentience/base_agent.py +101 -0
  6. sentience/browser.py +409 -0
  7. sentience/cli.py +130 -0
  8. sentience/cloud_tracing.py +292 -0
  9. sentience/conversational_agent.py +509 -0
  10. sentience/expect.py +92 -0
  11. sentience/extension/background.js +233 -0
  12. sentience/extension/content.js +298 -0
  13. sentience/extension/injected_api.js +1473 -0
  14. sentience/extension/manifest.json +36 -0
  15. sentience/extension/pkg/sentience_core.d.ts +51 -0
  16. sentience/extension/pkg/sentience_core.js +529 -0
  17. sentience/extension/pkg/sentience_core_bg.wasm +0 -0
  18. sentience/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  19. sentience/extension/release.json +115 -0
  20. sentience/extension/test-content.js +4 -0
  21. sentience/formatting.py +59 -0
  22. sentience/generator.py +202 -0
  23. sentience/inspector.py +185 -0
  24. sentience/llm_provider.py +431 -0
  25. sentience/models.py +406 -0
  26. sentience/overlay.py +115 -0
  27. sentience/query.py +303 -0
  28. sentience/read.py +96 -0
  29. sentience/recorder.py +369 -0
  30. sentience/schemas/trace_v1.json +216 -0
  31. sentience/screenshot.py +54 -0
  32. sentience/snapshot.py +282 -0
  33. sentience/text_search.py +107 -0
  34. sentience/trace_indexing/__init__.py +27 -0
  35. sentience/trace_indexing/index_schema.py +111 -0
  36. sentience/trace_indexing/indexer.py +363 -0
  37. sentience/tracer_factory.py +211 -0
  38. sentience/tracing.py +285 -0
  39. sentience/utils.py +296 -0
  40. sentience/wait.py +73 -0
  41. sentienceapi-0.90.9.dist-info/METADATA +878 -0
  42. sentienceapi-0.90.9.dist-info/RECORD +46 -0
  43. sentienceapi-0.90.9.dist-info/WHEEL +5 -0
  44. sentienceapi-0.90.9.dist-info/entry_points.txt +2 -0
  45. sentienceapi-0.90.9.dist-info/licenses/LICENSE.md +43 -0
  46. sentienceapi-0.90.9.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1473 @@
1
+ // injected_api.js - MAIN WORLD (NO WASM! CSP-Resistant!)
2
+ // This script ONLY collects raw DOM data and sends it to background for processing
3
+ (async () => {
4
+ // console.log('[SentienceAPI] Initializing (CSP-Resistant Mode)...');
5
+
6
+ // Wait for Extension ID from content.js
7
+ const getExtensionId = () => document.documentElement.dataset.sentienceExtensionId;
8
+ let extId = getExtensionId();
9
+
10
+ if (!extId) {
11
+ await new Promise(resolve => {
12
+ const check = setInterval(() => {
13
+ extId = getExtensionId();
14
+ if (extId) { clearInterval(check); resolve(); }
15
+ }, 50);
16
+ setTimeout(() => resolve(), 5000); // Max 5s wait
17
+ });
18
+ }
19
+
20
+ if (!extId) {
21
+ console.error('[SentienceAPI] Failed to get extension ID');
22
+ return;
23
+ }
24
+
25
+ // console.log('[SentienceAPI] Extension ID:', extId);
26
+
27
+ // Registry for click actions (still needed for click() function)
28
+ window.sentience_registry = [];
29
+
30
+ // --- HELPER: Deep Walker with Native Filter ---
31
+ function getAllElements(root = document) {
32
+ const elements = [];
33
+ const filter = {
34
+ acceptNode: function(node) {
35
+ // Skip metadata and script/style tags
36
+ if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) {
37
+ return NodeFilter.FILTER_REJECT;
38
+ }
39
+ // Skip deep SVG children
40
+ if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') {
41
+ return NodeFilter.FILTER_REJECT;
42
+ }
43
+ return NodeFilter.FILTER_ACCEPT;
44
+ }
45
+ };
46
+
47
+ const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter);
48
+ while(walker.nextNode()) {
49
+ const node = walker.currentNode;
50
+ if (node.isConnected) {
51
+ elements.push(node);
52
+ if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot));
53
+ }
54
+ }
55
+ return elements;
56
+ }
57
+
58
+ // --- HELPER: Smart Text Extractor ---
59
+ function getText(el) {
60
+ if (el.getAttribute('aria-label')) return el.getAttribute('aria-label');
61
+ if (el.tagName === 'INPUT') return el.value || el.placeholder || '';
62
+ if (el.tagName === 'IMG') return el.alt || '';
63
+ return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100);
64
+ }
65
+
66
+ // --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) ---
67
+ function getClassName(el) {
68
+ if (!el || !el.className) return '';
69
+
70
+ // Handle string (HTML elements)
71
+ if (typeof el.className === 'string') return el.className;
72
+
73
+ // Handle SVGAnimatedString (SVG elements)
74
+ if (typeof el.className === 'object') {
75
+ if ('baseVal' in el.className && typeof el.className.baseVal === 'string') {
76
+ return el.className.baseVal;
77
+ }
78
+ if ('animVal' in el.className && typeof el.className.animVal === 'string') {
79
+ return el.className.animVal;
80
+ }
81
+ // Fallback: convert to string
82
+ try {
83
+ return String(el.className);
84
+ } catch (e) {
85
+ return '';
86
+ }
87
+ }
88
+
89
+ return '';
90
+ }
91
+
92
+ // --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) ---
93
+ function toSafeString(value) {
94
+ if (value === null || value === undefined) return null;
95
+
96
+ // 1. If it's already a primitive string, return it
97
+ if (typeof value === 'string') return value;
98
+
99
+ // 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.)
100
+ if (typeof value === 'object') {
101
+ // Try extracting baseVal (standard SVG property)
102
+ if ('baseVal' in value && typeof value.baseVal === 'string') {
103
+ return value.baseVal;
104
+ }
105
+ // Try animVal as fallback
106
+ if ('animVal' in value && typeof value.animVal === 'string') {
107
+ return value.animVal;
108
+ }
109
+ // Fallback: Force to string (prevents WASM crash even if data is less useful)
110
+ // This prevents the "Invalid Type" crash, even if the data is "[object SVGAnimatedString]"
111
+ try {
112
+ return String(value);
113
+ } catch (e) {
114
+ return null;
115
+ }
116
+ }
117
+
118
+ // 3. Last resort cast for primitives
119
+ try {
120
+ return String(value);
121
+ } catch (e) {
122
+ return null;
123
+ }
124
+ }
125
+
126
+ // --- HELPER: Get SVG Fill/Stroke Color ---
127
+ // For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor)
128
+ function getSVGColor(el) {
129
+ if (!el || el.tagName !== 'SVG') return null;
130
+
131
+ const style = window.getComputedStyle(el);
132
+
133
+ // Try fill first (most common for SVG icons)
134
+ const fill = style.fill;
135
+ if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') {
136
+ // Convert fill to rgb() format if needed
137
+ const rgbaMatch = fill.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
138
+ if (rgbaMatch) {
139
+ const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
140
+ if (alpha >= 0.9) {
141
+ return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
142
+ }
143
+ } else if (fill.startsWith('rgb(')) {
144
+ return fill;
145
+ }
146
+ }
147
+
148
+ // Fallback to stroke if fill is not available
149
+ const stroke = style.stroke;
150
+ if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') {
151
+ const rgbaMatch = stroke.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
152
+ if (rgbaMatch) {
153
+ const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
154
+ if (alpha >= 0.9) {
155
+ return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
156
+ }
157
+ } else if (stroke.startsWith('rgb(')) {
158
+ return stroke;
159
+ }
160
+ }
161
+
162
+ return null;
163
+ }
164
+
165
+ // --- HELPER: Get Effective Background Color ---
166
+ // Traverses up the DOM tree to find the nearest non-transparent background color
167
+ // For SVGs, also checks fill/stroke properties
168
+ // This handles rgba(0,0,0,0) and transparent values that browsers commonly return
169
+ function getEffectiveBackgroundColor(el) {
170
+ if (!el) return null;
171
+
172
+ // For SVG elements, use fill/stroke instead of backgroundColor
173
+ if (el.tagName === 'SVG') {
174
+ const svgColor = getSVGColor(el);
175
+ if (svgColor) return svgColor;
176
+ }
177
+
178
+ let current = el;
179
+ const maxDepth = 10; // Prevent infinite loops
180
+ let depth = 0;
181
+
182
+ while (current && depth < maxDepth) {
183
+ const style = window.getComputedStyle(current);
184
+
185
+ // For SVG elements in the tree, also check fill/stroke
186
+ if (current.tagName === 'SVG') {
187
+ const svgColor = getSVGColor(current);
188
+ if (svgColor) return svgColor;
189
+ }
190
+
191
+ const bgColor = style.backgroundColor;
192
+
193
+ if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') {
194
+ // Check if it's rgba with alpha < 1 (semi-transparent)
195
+ const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
196
+ if (rgbaMatch) {
197
+ const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
198
+ // If alpha is high enough (>= 0.9), consider it opaque enough
199
+ if (alpha >= 0.9) {
200
+ // Convert to rgb() format for Gateway compatibility
201
+ return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
202
+ }
203
+ // If semi-transparent, continue up the tree
204
+ } else if (bgColor.startsWith('rgb(')) {
205
+ // Already in rgb() format, use it
206
+ return bgColor;
207
+ } else {
208
+ // Named color or other format, return as-is
209
+ return bgColor;
210
+ }
211
+ }
212
+
213
+ // Move up the DOM tree
214
+ current = current.parentElement;
215
+ depth++;
216
+ }
217
+
218
+ // Fallback: return null if nothing found
219
+ return null;
220
+ }
221
+
222
+ // --- HELPER: Viewport Check ---
223
+ function isInViewport(rect) {
224
+ return (
225
+ rect.top < window.innerHeight && rect.bottom > 0 &&
226
+ rect.left < window.innerWidth && rect.right > 0
227
+ );
228
+ }
229
+
230
+ // --- HELPER: Occlusion Check (Optimized to avoid layout thrashing) ---
231
+ // Only checks occlusion for elements likely to be occluded (high z-index, positioned)
232
+ // This avoids forced reflow for most elements, dramatically improving performance
233
+ function isOccluded(el, rect, style) {
234
+ // Fast path: Skip occlusion check for most elements
235
+ // Only check for elements that are likely to be occluded (overlays, modals, tooltips)
236
+ const zIndex = parseInt(style.zIndex, 10);
237
+ const position = style.position;
238
+
239
+ // Skip occlusion check for normal flow elements (vast majority)
240
+ // Only check for positioned elements or high z-index (likely overlays)
241
+ if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) {
242
+ return false; // Assume not occluded for performance
243
+ }
244
+
245
+ // For positioned/high z-index elements, do the expensive check
246
+ const cx = rect.x + rect.width / 2;
247
+ const cy = rect.y + rect.height / 2;
248
+
249
+ if (cx < 0 || cx > window.innerWidth || cy < 0 || cy > window.innerHeight) return false;
250
+
251
+ const topEl = document.elementFromPoint(cx, cy);
252
+ if (!topEl) return false;
253
+
254
+ return !(el === topEl || el.contains(topEl) || topEl.contains(el));
255
+ }
256
+
257
+ // --- HELPER: Screenshot Bridge ---
258
+ function captureScreenshot(options) {
259
+ return new Promise(resolve => {
260
+ const requestId = Math.random().toString(36).substring(7);
261
+ const listener = (e) => {
262
+ if (e.data.type === 'SENTIENCE_SCREENSHOT_RESULT' && e.data.requestId === requestId) {
263
+ window.removeEventListener('message', listener);
264
+ resolve(e.data.screenshot);
265
+ }
266
+ };
267
+ window.addEventListener('message', listener);
268
+ window.postMessage({ type: 'SENTIENCE_SCREENSHOT_REQUEST', requestId, options }, '*');
269
+ setTimeout(() => {
270
+ window.removeEventListener('message', listener);
271
+ resolve(null);
272
+ }, 10000); // 10s timeout
273
+ });
274
+ }
275
+
276
+ // --- HELPER: Snapshot Processing Bridge (NEW!) ---
277
+ function processSnapshotInBackground(rawData, options) {
278
+ return new Promise((resolve, reject) => {
279
+ const requestId = Math.random().toString(36).substring(7);
280
+ const TIMEOUT_MS = 25000; // 25 seconds (longer than content.js timeout)
281
+ let resolved = false;
282
+
283
+ const timeout = setTimeout(() => {
284
+ if (!resolved) {
285
+ resolved = true;
286
+ window.removeEventListener('message', listener);
287
+ reject(new Error('WASM processing timeout - extension may be unresponsive. Try reloading the extension.'));
288
+ }
289
+ }, TIMEOUT_MS);
290
+
291
+ const listener = (e) => {
292
+ if (e.data.type === 'SENTIENCE_SNAPSHOT_RESULT' && e.data.requestId === requestId) {
293
+ if (resolved) return; // Already handled
294
+ resolved = true;
295
+ clearTimeout(timeout);
296
+ window.removeEventListener('message', listener);
297
+
298
+ if (e.data.error) {
299
+ reject(new Error(e.data.error));
300
+ } else {
301
+ resolve({
302
+ elements: e.data.elements,
303
+ raw_elements: e.data.raw_elements,
304
+ duration: e.data.duration
305
+ });
306
+ }
307
+ }
308
+ };
309
+
310
+ window.addEventListener('message', listener);
311
+
312
+ try {
313
+ window.postMessage({
314
+ type: 'SENTIENCE_SNAPSHOT_REQUEST',
315
+ requestId,
316
+ rawData,
317
+ options
318
+ }, '*');
319
+ } catch (error) {
320
+ if (!resolved) {
321
+ resolved = true;
322
+ clearTimeout(timeout);
323
+ window.removeEventListener('message', listener);
324
+ reject(new Error(`Failed to send snapshot request: ${error.message}`));
325
+ }
326
+ }
327
+ });
328
+ }
329
+
330
+ // --- HELPER: Raw HTML Extractor (unchanged) ---
331
+ function getRawHTML(root) {
332
+ const sourceRoot = root || document.body;
333
+ const clone = sourceRoot.cloneNode(true);
334
+
335
+ const unwantedTags = ['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'];
336
+ unwantedTags.forEach(tag => {
337
+ const elements = clone.querySelectorAll(tag);
338
+ elements.forEach(el => {
339
+ if (el.parentNode) el.parentNode.removeChild(el);
340
+ });
341
+ });
342
+
343
+ // Remove invisible elements
344
+ const invisibleSelectors = [];
345
+ const walker = document.createTreeWalker(sourceRoot, NodeFilter.SHOW_ELEMENT, null, false);
346
+ let node;
347
+ while (node = walker.nextNode()) {
348
+ const tag = node.tagName.toLowerCase();
349
+ if (tag === 'head' || tag === 'title') continue;
350
+
351
+ const style = window.getComputedStyle(node);
352
+ if (style.display === 'none' || style.visibility === 'hidden' ||
353
+ (node.offsetWidth === 0 && node.offsetHeight === 0)) {
354
+ let selector = tag;
355
+ if (node.id) {
356
+ selector = `#${node.id}`;
357
+ } else if (node.className && typeof node.className === 'string') {
358
+ const classes = node.className.trim().split(/\s+/).filter(c => c);
359
+ if (classes.length > 0) {
360
+ selector = `${tag}.${classes.join('.')}`;
361
+ }
362
+ }
363
+ invisibleSelectors.push(selector);
364
+ }
365
+ }
366
+
367
+ invisibleSelectors.forEach(selector => {
368
+ try {
369
+ const elements = clone.querySelectorAll(selector);
370
+ elements.forEach(el => {
371
+ if (el.parentNode) el.parentNode.removeChild(el);
372
+ });
373
+ } catch (e) {
374
+ // Invalid selector, skip
375
+ }
376
+ });
377
+
378
+ // Resolve relative URLs
379
+ const links = clone.querySelectorAll('a[href]');
380
+ links.forEach(link => {
381
+ const href = link.getAttribute('href');
382
+ if (href && !href.startsWith('http://') && !href.startsWith('https://') && !href.startsWith('#')) {
383
+ try {
384
+ link.setAttribute('href', new URL(href, document.baseURI).href);
385
+ } catch (e) {}
386
+ }
387
+ });
388
+
389
+ const images = clone.querySelectorAll('img[src]');
390
+ images.forEach(img => {
391
+ const src = img.getAttribute('src');
392
+ if (src && !src.startsWith('http://') && !src.startsWith('https://') && !src.startsWith('data:')) {
393
+ try {
394
+ img.setAttribute('src', new URL(src, document.baseURI).href);
395
+ } catch (e) {}
396
+ }
397
+ });
398
+
399
+ return clone.innerHTML;
400
+ }
401
+
402
+ // --- HELPER: Markdown Converter (unchanged) ---
403
+ function convertToMarkdown(root) {
404
+ const rawHTML = getRawHTML(root);
405
+ const tempDiv = document.createElement('div');
406
+ tempDiv.innerHTML = rawHTML;
407
+
408
+ let markdown = '';
409
+ let insideLink = false;
410
+
411
+ function walk(node) {
412
+ if (node.nodeType === Node.TEXT_NODE) {
413
+ const text = node.textContent.replace(/[\r\n]+/g, ' ').replace(/\s+/g, ' ');
414
+ if (text.trim()) markdown += text;
415
+ return;
416
+ }
417
+
418
+ if (node.nodeType !== Node.ELEMENT_NODE) return;
419
+
420
+ const tag = node.tagName.toLowerCase();
421
+
422
+ // Prefix
423
+ if (tag === 'h1') markdown += '\n# ';
424
+ if (tag === 'h2') markdown += '\n## ';
425
+ if (tag === 'h3') markdown += '\n### ';
426
+ if (tag === 'li') markdown += '\n- ';
427
+ if (!insideLink && (tag === 'p' || tag === 'div' || tag === 'br')) markdown += '\n';
428
+ if (tag === 'strong' || tag === 'b') markdown += '**';
429
+ if (tag === 'em' || tag === 'i') markdown += '_';
430
+ if (tag === 'a') {
431
+ markdown += '[';
432
+ insideLink = true;
433
+ }
434
+
435
+ // Children
436
+ if (node.shadowRoot) {
437
+ Array.from(node.shadowRoot.childNodes).forEach(walk);
438
+ } else {
439
+ node.childNodes.forEach(walk);
440
+ }
441
+
442
+ // Suffix
443
+ if (tag === 'a') {
444
+ const href = node.getAttribute('href');
445
+ if (href) markdown += `](${href})`;
446
+ else markdown += ']';
447
+ insideLink = false;
448
+ }
449
+ if (tag === 'strong' || tag === 'b') markdown += '**';
450
+ if (tag === 'em' || tag === 'i') markdown += '_';
451
+ if (!insideLink && (tag === 'h1' || tag === 'h2' || tag === 'h3' || tag === 'p' || tag === 'div')) markdown += '\n';
452
+ }
453
+
454
+ walk(tempDiv);
455
+ return markdown.replace(/\n{3,}/g, '\n\n').trim();
456
+ }
457
+
458
+ // --- HELPER: Text Extractor (unchanged) ---
459
+ function convertToText(root) {
460
+ let text = '';
461
+ function walk(node) {
462
+ if (node.nodeType === Node.TEXT_NODE) {
463
+ text += node.textContent;
464
+ return;
465
+ }
466
+ if (node.nodeType === Node.ELEMENT_NODE) {
467
+ const tag = node.tagName.toLowerCase();
468
+ if (['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'].includes(tag)) return;
469
+
470
+ const style = window.getComputedStyle(node);
471
+ if (style.display === 'none' || style.visibility === 'hidden') return;
472
+
473
+ const isBlock = style.display === 'block' || style.display === 'flex' || node.tagName === 'P' || node.tagName === 'DIV';
474
+ if (isBlock) text += ' ';
475
+
476
+ if (node.shadowRoot) {
477
+ Array.from(node.shadowRoot.childNodes).forEach(walk);
478
+ } else {
479
+ node.childNodes.forEach(walk);
480
+ }
481
+
482
+ if (isBlock) text += '\n';
483
+ }
484
+ }
485
+ walk(root || document.body);
486
+ return text.replace(/\n{3,}/g, '\n\n').trim();
487
+ }
488
+
489
+ // --- HELPER: Clean null/undefined fields ---
490
+ function cleanElement(obj) {
491
+ if (Array.isArray(obj)) {
492
+ return obj.map(cleanElement);
493
+ }
494
+ if (obj !== null && typeof obj === 'object') {
495
+ const cleaned = {};
496
+ for (const [key, value] of Object.entries(obj)) {
497
+ if (value !== null && value !== undefined) {
498
+ if (typeof value === 'object') {
499
+ const deepClean = cleanElement(value);
500
+ if (Object.keys(deepClean).length > 0) {
501
+ cleaned[key] = deepClean;
502
+ }
503
+ } else {
504
+ cleaned[key] = value;
505
+ }
506
+ }
507
+ }
508
+ return cleaned;
509
+ }
510
+ return obj;
511
+ }
512
+
513
+ // --- HELPER: Extract Raw Element Data (for Golden Set) ---
514
+ function extractRawElementData(el) {
515
+ const style = window.getComputedStyle(el);
516
+ const rect = el.getBoundingClientRect();
517
+
518
+ return {
519
+ tag: el.tagName,
520
+ rect: {
521
+ x: Math.round(rect.x),
522
+ y: Math.round(rect.y),
523
+ width: Math.round(rect.width),
524
+ height: Math.round(rect.height)
525
+ },
526
+ styles: {
527
+ cursor: style.cursor || null,
528
+ backgroundColor: style.backgroundColor || null,
529
+ color: style.color || null,
530
+ fontWeight: style.fontWeight || null,
531
+ fontSize: style.fontSize || null,
532
+ display: style.display || null,
533
+ position: style.position || null,
534
+ zIndex: style.zIndex || null,
535
+ opacity: style.opacity || null,
536
+ visibility: style.visibility || null
537
+ },
538
+ attributes: {
539
+ role: el.getAttribute('role') || null,
540
+ type: el.getAttribute('type') || null,
541
+ ariaLabel: el.getAttribute('aria-label') || null,
542
+ id: el.id || null,
543
+ className: el.className || null
544
+ }
545
+ };
546
+ }
547
+
548
+ // --- HELPER: Generate Unique CSS Selector (for Golden Set) ---
549
+ function getUniqueSelector(el) {
550
+ if (!el || !el.tagName) return '';
551
+
552
+ // If element has a unique ID, use it
553
+ if (el.id) {
554
+ return `#${el.id}`;
555
+ }
556
+
557
+ // Try data attributes or aria-label for uniqueness
558
+ for (const attr of el.attributes) {
559
+ if (attr.name.startsWith('data-') || attr.name === 'aria-label') {
560
+ const value = attr.value ? attr.value.replace(/"/g, '\\"') : '';
561
+ return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`;
562
+ }
563
+ }
564
+
565
+ // Build path with classes and nth-child for uniqueness
566
+ const path = [];
567
+ let current = el;
568
+
569
+ while (current && current !== document.body && current !== document.documentElement) {
570
+ let selector = current.tagName.toLowerCase();
571
+
572
+ // If current element has ID, use it and stop
573
+ if (current.id) {
574
+ selector = `#${current.id}`;
575
+ path.unshift(selector);
576
+ break;
577
+ }
578
+
579
+ // Add class if available
580
+ if (current.className && typeof current.className === 'string') {
581
+ const classes = current.className.trim().split(/\s+/).filter(c => c);
582
+ if (classes.length > 0) {
583
+ // Use first class for simplicity
584
+ selector += `.${classes[0]}`;
585
+ }
586
+ }
587
+
588
+ // Add nth-of-type if needed for uniqueness
589
+ if (current.parentElement) {
590
+ const siblings = Array.from(current.parentElement.children);
591
+ const sameTagSiblings = siblings.filter(s => s.tagName === current.tagName);
592
+ const index = sameTagSiblings.indexOf(current);
593
+ if (index > 0 || sameTagSiblings.length > 1) {
594
+ selector += `:nth-of-type(${index + 1})`;
595
+ }
596
+ }
597
+
598
+ path.unshift(selector);
599
+ current = current.parentElement;
600
+ }
601
+
602
+ return path.join(' > ') || el.tagName.toLowerCase();
603
+ }
604
+
605
+ // --- HELPER: Wait for DOM Stability (SPA Hydration) ---
606
+ // Waits for the DOM to stabilize before taking a snapshot
607
+ // Useful for React/Vue apps that render empty skeletons before hydration
608
+ async function waitForStability(options = {}) {
609
+ const {
610
+ minNodeCount = 500,
611
+ quietPeriod = 200, // milliseconds
612
+ maxWait = 5000 // maximum wait time
613
+ } = options;
614
+
615
+ const startTime = Date.now();
616
+
617
+ return new Promise((resolve) => {
618
+ // Check if DOM already has enough nodes
619
+ const nodeCount = document.querySelectorAll('*').length;
620
+ if (nodeCount >= minNodeCount) {
621
+ // DOM seems ready, but wait for quiet period to ensure stability
622
+ let lastChange = Date.now();
623
+ const observer = new MutationObserver(() => {
624
+ lastChange = Date.now();
625
+ });
626
+
627
+ observer.observe(document.body, {
628
+ childList: true,
629
+ subtree: true,
630
+ attributes: false
631
+ });
632
+
633
+ const checkStable = () => {
634
+ const timeSinceLastChange = Date.now() - lastChange;
635
+ const totalWait = Date.now() - startTime;
636
+
637
+ if (timeSinceLastChange >= quietPeriod) {
638
+ observer.disconnect();
639
+ resolve();
640
+ } else if (totalWait >= maxWait) {
641
+ observer.disconnect();
642
+ console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway');
643
+ resolve();
644
+ } else {
645
+ setTimeout(checkStable, 50);
646
+ }
647
+ };
648
+
649
+ checkStable();
650
+ } else {
651
+ // DOM doesn't have enough nodes yet, wait for them
652
+ const observer = new MutationObserver(() => {
653
+ const currentCount = document.querySelectorAll('*').length;
654
+ const totalWait = Date.now() - startTime;
655
+
656
+ if (currentCount >= minNodeCount) {
657
+ observer.disconnect();
658
+ // Now wait for quiet period
659
+ let lastChange = Date.now();
660
+ const quietObserver = new MutationObserver(() => {
661
+ lastChange = Date.now();
662
+ });
663
+
664
+ quietObserver.observe(document.body, {
665
+ childList: true,
666
+ subtree: true,
667
+ attributes: false
668
+ });
669
+
670
+ const checkQuiet = () => {
671
+ const timeSinceLastChange = Date.now() - lastChange;
672
+ const totalWait = Date.now() - startTime;
673
+
674
+ if (timeSinceLastChange >= quietPeriod) {
675
+ quietObserver.disconnect();
676
+ resolve();
677
+ } else if (totalWait >= maxWait) {
678
+ quietObserver.disconnect();
679
+ console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway');
680
+ resolve();
681
+ } else {
682
+ setTimeout(checkQuiet, 50);
683
+ }
684
+ };
685
+
686
+ checkQuiet();
687
+ } else if (totalWait >= maxWait) {
688
+ observer.disconnect();
689
+ console.warn('[SentienceAPI] DOM node count timeout - proceeding anyway');
690
+ resolve();
691
+ }
692
+ });
693
+
694
+ observer.observe(document.body, {
695
+ childList: true,
696
+ subtree: true,
697
+ attributes: false
698
+ });
699
+
700
+ // Timeout fallback
701
+ setTimeout(() => {
702
+ observer.disconnect();
703
+ console.warn('[SentienceAPI] DOM stability max wait reached - proceeding');
704
+ resolve();
705
+ }, maxWait);
706
+ }
707
+ });
708
+ }
709
+
710
+ // --- HELPER: Collect Iframe Snapshots (Frame Stitching) ---
711
+ // Recursively collects snapshot data from all child iframes
712
+ // This enables detection of elements inside iframes (e.g., Stripe forms)
713
+ //
714
+ // NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy).
715
+ // Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped
716
+ // with a warning. For cross-origin iframes, users must manually switch frames using
717
+ // Playwright's page.frame() API.
718
+ async function collectIframeSnapshots(options = {}) {
719
+ const iframeData = new Map(); // Map of iframe element -> snapshot data
720
+
721
+ // Find all iframe elements in current document
722
+ const iframes = Array.from(document.querySelectorAll('iframe'));
723
+
724
+ if (iframes.length === 0) {
725
+ return iframeData;
726
+ }
727
+
728
+ console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`);
729
+ // Request snapshot from each iframe
730
+ const iframePromises = iframes.map((iframe, idx) => {
731
+ // OPTIMIZATION: Skip common ad domains to save time
732
+ const src = iframe.src || '';
733
+ if (src.includes('doubleclick') || src.includes('googleadservices') || src.includes('ads system')) {
734
+ console.log(`[SentienceAPI] Skipping ad iframe: ${src.substring(0, 30)}...`);
735
+ return Promise.resolve(null);
736
+ }
737
+
738
+ return new Promise((resolve) => {
739
+ const requestId = `iframe-${idx}-${Date.now()}`;
740
+
741
+ // 1. EXTENDED TIMEOUT (Handle slow children)
742
+ const timeout = setTimeout(() => {
743
+ console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`);
744
+ resolve(null);
745
+ }, 5000); // Increased to 5s to handle slow processing
746
+
747
+ // 2. ROBUST LISTENER with debugging
748
+ const listener = (event) => {
749
+ // Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening
750
+ if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE') {
751
+ // Only log if it's not our request (for debugging)
752
+ if (event.data?.requestId !== requestId) {
753
+ // console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`);
754
+ }
755
+ }
756
+
757
+ // Check if this is the response we're waiting for
758
+ if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' &&
759
+ event.data?.requestId === requestId) {
760
+
761
+ clearTimeout(timeout);
762
+ window.removeEventListener('message', listener);
763
+
764
+ if (event.data.error) {
765
+ console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error);
766
+ resolve(null);
767
+ } else {
768
+ const elementCount = event.data.snapshot?.raw_elements?.length || 0;
769
+ console.log(`[SentienceAPI] ✓ Received ${elementCount} elements from Iframe ${idx} (id: ${requestId})`);
770
+ resolve({
771
+ iframe: iframe,
772
+ data: event.data.snapshot,
773
+ error: null
774
+ });
775
+ }
776
+ }
777
+ };
778
+
779
+ window.addEventListener('message', listener);
780
+
781
+ // 3. SEND REQUEST with error handling
782
+ try {
783
+ if (iframe.contentWindow) {
784
+ // console.log(`[SentienceAPI] Sending request to Iframe ${idx} (id: ${requestId})`);
785
+ iframe.contentWindow.postMessage({
786
+ type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST',
787
+ requestId: requestId,
788
+ options: {
789
+ ...options,
790
+ collectIframes: true // Enable recursion for nested iframes
791
+ }
792
+ }, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy
793
+ } else {
794
+ console.warn(`[SentienceAPI] Iframe ${idx} contentWindow is inaccessible (Cross-Origin?)`);
795
+ clearTimeout(timeout);
796
+ window.removeEventListener('message', listener);
797
+ resolve(null);
798
+ }
799
+ } catch (error) {
800
+ console.error(`[SentienceAPI] Failed to postMessage to Iframe ${idx}:`, error);
801
+ clearTimeout(timeout);
802
+ window.removeEventListener('message', listener);
803
+ resolve(null);
804
+ }
805
+ });
806
+ });
807
+
808
+ // Wait for all iframe responses
809
+ const results = await Promise.all(iframePromises);
810
+
811
+ // Store iframe data
812
+ results.forEach((result, idx) => {
813
+ if (result && result.data && !result.error) {
814
+ iframeData.set(iframes[idx], result.data);
815
+ console.log(`[SentienceAPI] ✓ Collected snapshot from iframe ${idx}`);
816
+ } else if (result && result.error) {
817
+ console.warn(`[SentienceAPI] Iframe ${idx} snapshot error:`, result.error);
818
+ } else if (!result) {
819
+ console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`);
820
+ }
821
+ });
822
+
823
+ return iframeData;
824
+ }
825
+
826
+ // --- HELPER: Handle Iframe Snapshot Request (for child frames) ---
827
+ // When a parent frame requests snapshot, this handler responds with local snapshot
828
+ // NOTE: Recursion is safe because querySelectorAll('iframe') only finds direct children.
829
+ // Iframe A can ask Iframe B, but won't go back up to parent (no circular dependency risk).
830
+ function setupIframeSnapshotHandler() {
831
+ window.addEventListener('message', async (event) => {
832
+ // Security: only respond to snapshot requests from parent frames
833
+ if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') {
834
+ const { requestId, options } = event.data;
835
+
836
+ try {
837
+ // Generate snapshot for this iframe's content
838
+ // Allow recursive collection - querySelectorAll('iframe') only finds direct children,
839
+ // so Iframe A will ask Iframe B, but won't go back up to parent (safe recursion)
840
+ // waitForStability: false makes performance better - i.e. don't wait for children frames
841
+ const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false };
842
+ const snapshot = await window.sentience.snapshot(snapshotOptions);
843
+
844
+ // Send response back to parent
845
+ if (event.source && event.source.postMessage) {
846
+ event.source.postMessage({
847
+ type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE',
848
+ requestId: requestId,
849
+ snapshot: snapshot,
850
+ error: null
851
+ }, '*');
852
+ }
853
+ } catch (error) {
854
+ // Send error response
855
+ if (event.source && event.source.postMessage) {
856
+ event.source.postMessage({
857
+ type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE',
858
+ requestId: requestId,
859
+ snapshot: null,
860
+ error: error.message
861
+ }, '*');
862
+ }
863
+ }
864
+ }
865
+ });
866
+ }
867
+
868
+ // Setup iframe handler when script loads (only once)
869
+ if (!window.sentience_iframe_handler_setup) {
870
+ setupIframeSnapshotHandler();
871
+ window.sentience_iframe_handler_setup = true;
872
+ }
873
+
874
+ // --- GLOBAL API ---
875
+ window.sentience = {
876
+ // 1. Geometry snapshot (NEW ARCHITECTURE - No WASM in Main World!)
877
+ snapshot: async (options = {}) => {
878
+ try {
879
+ // Step 0: Wait for DOM stability if requested (for SPA hydration)
880
+ if (options.waitForStability !== false) {
881
+ await waitForStability(options.waitForStability || {});
882
+ }
883
+
884
+ // Step 1: Collect raw DOM data (Main World - CSP can't block this!)
885
+ const rawData = [];
886
+ window.sentience_registry = [];
887
+
888
+ const nodes = getAllElements();
889
+
890
+ nodes.forEach((el, idx) => {
891
+ if (!el.getBoundingClientRect) return;
892
+ const rect = el.getBoundingClientRect();
893
+ if (rect.width < 5 || rect.height < 5) return;
894
+
895
+ window.sentience_registry[idx] = el;
896
+
897
+ const textVal = getText(el);
898
+ const inView = isInViewport(rect);
899
+
900
+ // Get computed style once (needed for both occlusion check and data collection)
901
+ const style = window.getComputedStyle(el);
902
+
903
+ // Only check occlusion for elements likely to be occluded (optimized)
904
+ // This avoids layout thrashing for the vast majority of elements
905
+ const occluded = inView ? isOccluded(el, rect, style) : false;
906
+
907
+ // Get effective background color (traverses DOM to find non-transparent color)
908
+ const effectiveBgColor = getEffectiveBackgroundColor(el);
909
+
910
+ rawData.push({
911
+ id: idx,
912
+ tag: el.tagName.toLowerCase(),
913
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
914
+ styles: {
915
+ display: toSafeString(style.display),
916
+ visibility: toSafeString(style.visibility),
917
+ opacity: toSafeString(style.opacity),
918
+ z_index: toSafeString(style.zIndex || "auto"),
919
+ position: toSafeString(style.position),
920
+ bg_color: toSafeString(effectiveBgColor || style.backgroundColor),
921
+ color: toSafeString(style.color),
922
+ cursor: toSafeString(style.cursor),
923
+ font_weight: toSafeString(style.fontWeight),
924
+ font_size: toSafeString(style.fontSize)
925
+ },
926
+ attributes: {
927
+ role: toSafeString(el.getAttribute('role')),
928
+ type_: toSafeString(el.getAttribute('type')),
929
+ aria_label: toSafeString(el.getAttribute('aria-label')),
930
+ href: toSafeString(el.href || el.getAttribute('href') || null),
931
+ class: toSafeString(getClassName(el)),
932
+ // Capture dynamic input state (not just initial attributes)
933
+ value: el.value !== undefined ? toSafeString(el.value) : toSafeString(el.getAttribute('value')),
934
+ checked: el.checked !== undefined ? String(el.checked) : null
935
+ },
936
+ text: toSafeString(textVal),
937
+ in_viewport: inView,
938
+ is_occluded: occluded
939
+ });
940
+ });
941
+
942
+ console.log(`[SentienceAPI] Collected ${rawData.length} elements from main frame`);
943
+
944
+ // Step 1.5: Collect iframe snapshots and FLATTEN immediately
945
+ // "Flatten Early" architecture: Merge iframe elements into main array before WASM
946
+ // This allows WASM to process all elements uniformly (no recursion needed)
947
+ let allRawElements = [...rawData]; // Start with main frame elements
948
+ let totalIframeElements = 0;
949
+
950
+ if (options.collectIframes !== false) {
951
+ try {
952
+ console.log(`[SentienceAPI] Starting iframe collection...`);
953
+ const iframeSnapshots = await collectIframeSnapshots(options);
954
+ console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`);
955
+
956
+ if (iframeSnapshots.size > 0) {
957
+ // FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation.
958
+ iframeSnapshots.forEach((iframeSnapshot, iframeEl) => {
959
+ // Debug: Log structure to verify data is correct
960
+ // console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot);
961
+
962
+ if (iframeSnapshot && iframeSnapshot.raw_elements) {
963
+ const rawElementsCount = iframeSnapshot.raw_elements.length;
964
+ console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`);
965
+ // Get iframe's bounding rect (offset for coordinate translation)
966
+ const iframeRect = iframeEl.getBoundingClientRect();
967
+ const offset = { x: iframeRect.x, y: iframeRect.y };
968
+
969
+ // Get iframe context for frame switching (Playwright needs this)
970
+ const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || '';
971
+ let isSameOrigin = false;
972
+ try {
973
+ // Try to access contentWindow to check if same-origin
974
+ isSameOrigin = iframeEl.contentWindow !== null;
975
+ } catch (e) {
976
+ isSameOrigin = false;
977
+ }
978
+
979
+ // Adjust coordinates and add iframe context to each element
980
+ const adjustedElements = iframeSnapshot.raw_elements.map(el => {
981
+ const adjusted = { ...el };
982
+
983
+ // Adjust rect coordinates to parent viewport
984
+ if (adjusted.rect) {
985
+ adjusted.rect = {
986
+ ...adjusted.rect,
987
+ x: adjusted.rect.x + offset.x,
988
+ y: adjusted.rect.y + offset.y
989
+ };
990
+ }
991
+
992
+ // Add iframe context so agents can switch frames in Playwright
993
+ adjusted.iframe_context = {
994
+ src: iframeSrc,
995
+ is_same_origin: isSameOrigin
996
+ };
997
+
998
+ return adjusted;
999
+ });
1000
+
1001
+ // Append flattened iframe elements to main array
1002
+ allRawElements.push(...adjustedElements);
1003
+ totalIframeElements += adjustedElements.length;
1004
+ }
1005
+ });
1006
+
1007
+ // console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`);
1008
+ }
1009
+ } catch (error) {
1010
+ console.warn('[SentienceAPI] Iframe collection failed:', error);
1011
+ }
1012
+ }
1013
+
1014
+ // Step 2: Send EVERYTHING to WASM (One giant flat list)
1015
+ // Now WASM prunes iframe elements and main elements in one pass!
1016
+ // No recursion needed - everything is already flat
1017
+ console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`);
1018
+ const processed = await processSnapshotInBackground(allRawElements, options);
1019
+
1020
+ if (!processed || !processed.elements) {
1021
+ throw new Error('WASM processing returned invalid result');
1022
+ }
1023
+
1024
+ // Step 3: Capture screenshot if requested
1025
+ let screenshot = null;
1026
+ if (options.screenshot) {
1027
+ screenshot = await captureScreenshot(options.screenshot);
1028
+ }
1029
+
1030
+ // Step 4: Clean and return
1031
+ const cleanedElements = cleanElement(processed.elements);
1032
+ const cleanedRawElements = cleanElement(processed.raw_elements);
1033
+
1034
+ // FIXED: Removed undefined 'totalIframeRawElements'
1035
+ // FIXED: Logic updated for "Flatten Early" architecture.
1036
+ // processed.elements ALREADY contains the merged iframe elements,
1037
+ // so we simply use .length. No addition needed.
1038
+
1039
+ const totalCount = cleanedElements.length;
1040
+ const totalRaw = cleanedRawElements.length;
1041
+ const iframeCount = totalIframeElements || 0;
1042
+
1043
+ console.log(`[SentienceAPI] ✓ Complete: ${totalCount} Smart Elements, ${totalRaw} Raw Elements (includes ${iframeCount} from iframes) (WASM took ${processed.duration?.toFixed(1)}ms)`);
1044
+
1045
+ return {
1046
+ status: "success",
1047
+ url: window.location.href,
1048
+ viewport: {
1049
+ width: window.innerWidth,
1050
+ height: window.innerHeight
1051
+ },
1052
+ elements: cleanedElements,
1053
+ raw_elements: cleanedRawElements,
1054
+ screenshot: screenshot
1055
+ };
1056
+ } catch (error) {
1057
+ console.error('[SentienceAPI] snapshot() failed:', error);
1058
+ console.error('[SentienceAPI] Error stack:', error.stack);
1059
+ return {
1060
+ status: "error",
1061
+ error: error.message || 'Unknown error',
1062
+ stack: error.stack
1063
+ };
1064
+ }
1065
+ },
1066
+
1067
+ // 2. Read Content (unchanged)
1068
+ read: (options = {}) => {
1069
+ const format = options.format || 'raw';
1070
+ let content;
1071
+
1072
+ if (format === 'raw') {
1073
+ content = getRawHTML(document.body);
1074
+ } else if (format === 'markdown') {
1075
+ content = convertToMarkdown(document.body);
1076
+ } else {
1077
+ content = convertToText(document.body);
1078
+ }
1079
+
1080
+ return {
1081
+ status: "success",
1082
+ url: window.location.href,
1083
+ format: format,
1084
+ content: content,
1085
+ length: content.length
1086
+ };
1087
+ },
1088
+
1089
+ // 2b. Find Text Rectangle - Get exact pixel coordinates of specific text
1090
+ findTextRect: (options = {}) => {
1091
+ const {
1092
+ text,
1093
+ containerElement = document.body,
1094
+ caseSensitive = false,
1095
+ wholeWord = false,
1096
+ maxResults = 10
1097
+ } = options;
1098
+
1099
+ if (!text || text.trim().length === 0) {
1100
+ return {
1101
+ status: "error",
1102
+ error: "Text parameter is required"
1103
+ };
1104
+ }
1105
+
1106
+ const results = [];
1107
+ const searchText = caseSensitive ? text : text.toLowerCase();
1108
+
1109
+ // Helper function to find text in a single text node
1110
+ function findInTextNode(textNode) {
1111
+ const nodeText = textNode.nodeValue;
1112
+ const searchableText = caseSensitive ? nodeText : nodeText.toLowerCase();
1113
+
1114
+ let startIndex = 0;
1115
+ while (startIndex < nodeText.length && results.length < maxResults) {
1116
+ const foundIndex = searchableText.indexOf(searchText, startIndex);
1117
+
1118
+ if (foundIndex === -1) break;
1119
+
1120
+ // Check whole word matching if required
1121
+ if (wholeWord) {
1122
+ const before = foundIndex > 0 ? nodeText[foundIndex - 1] : ' ';
1123
+ const after = foundIndex + text.length < nodeText.length
1124
+ ? nodeText[foundIndex + text.length]
1125
+ : ' ';
1126
+
1127
+ // Check if surrounded by word boundaries
1128
+ if (!/\s/.test(before) || !/\s/.test(after)) {
1129
+ startIndex = foundIndex + 1;
1130
+ continue;
1131
+ }
1132
+ }
1133
+
1134
+ try {
1135
+ // Create range for this occurrence
1136
+ const range = document.createRange();
1137
+ range.setStart(textNode, foundIndex);
1138
+ range.setEnd(textNode, foundIndex + text.length);
1139
+
1140
+ const rect = range.getBoundingClientRect();
1141
+
1142
+ // Only include visible rectangles
1143
+ if (rect.width > 0 && rect.height > 0) {
1144
+ results.push({
1145
+ text: nodeText.substring(foundIndex, foundIndex + text.length),
1146
+ rect: {
1147
+ x: rect.left + window.scrollX,
1148
+ y: rect.top + window.scrollY,
1149
+ width: rect.width,
1150
+ height: rect.height,
1151
+ left: rect.left + window.scrollX,
1152
+ top: rect.top + window.scrollY,
1153
+ right: rect.right + window.scrollX,
1154
+ bottom: rect.bottom + window.scrollY
1155
+ },
1156
+ viewport_rect: {
1157
+ x: rect.left,
1158
+ y: rect.top,
1159
+ width: rect.width,
1160
+ height: rect.height
1161
+ },
1162
+ context: {
1163
+ before: nodeText.substring(Math.max(0, foundIndex - 20), foundIndex),
1164
+ after: nodeText.substring(foundIndex + text.length, Math.min(nodeText.length, foundIndex + text.length + 20))
1165
+ },
1166
+ in_viewport: (
1167
+ rect.top >= 0 &&
1168
+ rect.left >= 0 &&
1169
+ rect.bottom <= window.innerHeight &&
1170
+ rect.right <= window.innerWidth
1171
+ )
1172
+ });
1173
+ }
1174
+ } catch (e) {
1175
+ console.warn('[SentienceAPI] Failed to get rect for text:', e);
1176
+ }
1177
+
1178
+ startIndex = foundIndex + 1;
1179
+ }
1180
+ }
1181
+
1182
+ // Tree walker to find all text nodes
1183
+ const walker = document.createTreeWalker(
1184
+ containerElement,
1185
+ NodeFilter.SHOW_TEXT,
1186
+ {
1187
+ acceptNode: function(node) {
1188
+ // Skip script, style, and empty text nodes
1189
+ const parent = node.parentElement;
1190
+ if (!parent) return NodeFilter.FILTER_REJECT;
1191
+
1192
+ const tagName = parent.tagName.toLowerCase();
1193
+ if (tagName === 'script' || tagName === 'style' || tagName === 'noscript') {
1194
+ return NodeFilter.FILTER_REJECT;
1195
+ }
1196
+
1197
+ // Skip whitespace-only nodes
1198
+ if (!node.nodeValue || node.nodeValue.trim().length === 0) {
1199
+ return NodeFilter.FILTER_REJECT;
1200
+ }
1201
+
1202
+ // Check if element is visible
1203
+ const computedStyle = window.getComputedStyle(parent);
1204
+ if (computedStyle.display === 'none' ||
1205
+ computedStyle.visibility === 'hidden' ||
1206
+ computedStyle.opacity === '0') {
1207
+ return NodeFilter.FILTER_REJECT;
1208
+ }
1209
+
1210
+ return NodeFilter.FILTER_ACCEPT;
1211
+ }
1212
+ }
1213
+ );
1214
+
1215
+ // Walk through all text nodes
1216
+ let currentNode;
1217
+ while ((currentNode = walker.nextNode()) && results.length < maxResults) {
1218
+ findInTextNode(currentNode);
1219
+ }
1220
+
1221
+ return {
1222
+ status: "success",
1223
+ query: text,
1224
+ case_sensitive: caseSensitive,
1225
+ whole_word: wholeWord,
1226
+ matches: results.length,
1227
+ results: results,
1228
+ viewport: {
1229
+ width: window.innerWidth,
1230
+ height: window.innerHeight,
1231
+ scroll_x: window.scrollX,
1232
+ scroll_y: window.scrollY
1233
+ }
1234
+ };
1235
+ },
1236
+
1237
+ // 3. Click Action (unchanged)
1238
+ click: (id) => {
1239
+ const el = window.sentience_registry[id];
1240
+ if (el) {
1241
+ el.click();
1242
+ el.focus();
1243
+ return true;
1244
+ }
1245
+ return false;
1246
+ },
1247
+
1248
+ // 4. Inspector Mode: Start Recording for Golden Set Collection
1249
+ startRecording: (options = {}) => {
1250
+ const {
1251
+ highlightColor = '#ff0000',
1252
+ successColor = '#00ff00',
1253
+ autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default
1254
+ keyboardShortcut = 'Ctrl+Shift+I'
1255
+ } = options;
1256
+
1257
+ console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON.");
1258
+ console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`);
1259
+
1260
+ // Validate registry is populated
1261
+ if (!window.sentience_registry || window.sentience_registry.length === 0) {
1262
+ console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry.");
1263
+ alert("Registry empty. Run `await window.sentience.snapshot()` first!");
1264
+ return () => {}; // Return no-op cleanup function
1265
+ }
1266
+
1267
+ // Create reverse mapping for O(1) lookup (fixes registry lookup bug)
1268
+ window.sentience_registry_map = new Map();
1269
+ window.sentience_registry.forEach((el, idx) => {
1270
+ if (el) window.sentience_registry_map.set(el, idx);
1271
+ });
1272
+
1273
+ // Create highlight box overlay
1274
+ let highlightBox = document.getElementById('sentience-highlight-box');
1275
+ if (!highlightBox) {
1276
+ highlightBox = document.createElement('div');
1277
+ highlightBox.id = 'sentience-highlight-box';
1278
+ highlightBox.style.cssText = `
1279
+ position: fixed;
1280
+ pointer-events: none;
1281
+ z-index: 2147483647;
1282
+ border: 2px solid ${highlightColor};
1283
+ background: rgba(255, 0, 0, 0.1);
1284
+ display: none;
1285
+ transition: all 0.1s ease;
1286
+ box-sizing: border-box;
1287
+ `;
1288
+ document.body.appendChild(highlightBox);
1289
+ }
1290
+
1291
+ // Create visual indicator (red border on page when recording)
1292
+ let recordingIndicator = document.getElementById('sentience-recording-indicator');
1293
+ if (!recordingIndicator) {
1294
+ recordingIndicator = document.createElement('div');
1295
+ recordingIndicator.id = 'sentience-recording-indicator';
1296
+ recordingIndicator.style.cssText = `
1297
+ position: fixed;
1298
+ top: 0;
1299
+ left: 0;
1300
+ right: 0;
1301
+ height: 3px;
1302
+ background: ${highlightColor};
1303
+ z-index: 2147483646;
1304
+ pointer-events: none;
1305
+ `;
1306
+ document.body.appendChild(recordingIndicator);
1307
+ }
1308
+ recordingIndicator.style.display = 'block';
1309
+
1310
+ // Hover handler (visual feedback)
1311
+ const mouseOverHandler = (e) => {
1312
+ const el = e.target;
1313
+ if (!el || el === highlightBox || el === recordingIndicator) return;
1314
+
1315
+ const rect = el.getBoundingClientRect();
1316
+ highlightBox.style.display = 'block';
1317
+ highlightBox.style.top = (rect.top + window.scrollY) + 'px';
1318
+ highlightBox.style.left = (rect.left + window.scrollX) + 'px';
1319
+ highlightBox.style.width = rect.width + 'px';
1320
+ highlightBox.style.height = rect.height + 'px';
1321
+ };
1322
+
1323
+ // Click handler (capture ground truth data)
1324
+ const clickHandler = (e) => {
1325
+ e.preventDefault();
1326
+ e.stopPropagation();
1327
+
1328
+ const el = e.target;
1329
+ if (!el || el === highlightBox || el === recordingIndicator) return;
1330
+
1331
+ // Use Map for reliable O(1) lookup
1332
+ const sentienceId = window.sentience_registry_map.get(el);
1333
+ if (sentienceId === undefined) {
1334
+ console.warn("⚠️ Element not found in Sentience Registry. Did you run snapshot() first?");
1335
+ alert("Element not in registry. Run `await window.sentience.snapshot()` first!");
1336
+ return;
1337
+ }
1338
+
1339
+ // Extract raw data (ground truth + raw signals, NOT model outputs)
1340
+ const rawData = extractRawElementData(el);
1341
+ const selector = getUniqueSelector(el);
1342
+ const role = el.getAttribute('role') || el.tagName.toLowerCase();
1343
+ const text = getText(el);
1344
+
1345
+ // Build golden set JSON (ground truth + raw signals only)
1346
+ const snippet = {
1347
+ task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`,
1348
+ url: window.location.href,
1349
+ timestamp: new Date().toISOString(),
1350
+ target_criteria: {
1351
+ id: sentienceId,
1352
+ selector: selector,
1353
+ role: role,
1354
+ text: text.substring(0, 50)
1355
+ },
1356
+ debug_snapshot: rawData
1357
+ };
1358
+
1359
+ // Copy to clipboard
1360
+ const jsonString = JSON.stringify(snippet, null, 2);
1361
+ navigator.clipboard.writeText(jsonString).then(() => {
1362
+ console.log("✅ Copied Ground Truth to clipboard:", snippet);
1363
+
1364
+ // Flash green to indicate success
1365
+ highlightBox.style.border = `2px solid ${successColor}`;
1366
+ highlightBox.style.background = 'rgba(0, 255, 0, 0.2)';
1367
+ setTimeout(() => {
1368
+ highlightBox.style.border = `2px solid ${highlightColor}`;
1369
+ highlightBox.style.background = 'rgba(255, 0, 0, 0.1)';
1370
+ }, 500);
1371
+ }).catch(err => {
1372
+ console.error("❌ Failed to copy to clipboard:", err);
1373
+ alert("Failed to copy to clipboard. Check console for JSON.");
1374
+ });
1375
+ };
1376
+
1377
+ // Auto-disable timeout
1378
+ let timeoutId = null;
1379
+
1380
+ // Cleanup function to stop recording (defined before use)
1381
+ const stopRecording = () => {
1382
+ document.removeEventListener('mouseover', mouseOverHandler, true);
1383
+ document.removeEventListener('click', clickHandler, true);
1384
+ document.removeEventListener('keydown', keyboardHandler, true);
1385
+
1386
+ if (timeoutId) {
1387
+ clearTimeout(timeoutId);
1388
+ timeoutId = null;
1389
+ }
1390
+
1391
+ if (highlightBox) {
1392
+ highlightBox.style.display = 'none';
1393
+ }
1394
+
1395
+ if (recordingIndicator) {
1396
+ recordingIndicator.style.display = 'none';
1397
+ }
1398
+
1399
+ // Clean up registry map (optional, but good practice)
1400
+ if (window.sentience_registry_map) {
1401
+ window.sentience_registry_map.clear();
1402
+ }
1403
+
1404
+ // Remove global reference
1405
+ if (window.sentience_stopRecording === stopRecording) {
1406
+ delete window.sentience_stopRecording;
1407
+ }
1408
+
1409
+ console.log("⚪ [Sentience] Recording Mode STOPPED.");
1410
+ };
1411
+
1412
+ // Keyboard shortcut handler (defined after stopRecording)
1413
+ const keyboardHandler = (e) => {
1414
+ // Ctrl+Shift+I or Cmd+Shift+I
1415
+ if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'I') {
1416
+ e.preventDefault();
1417
+ stopRecording();
1418
+ }
1419
+ };
1420
+
1421
+ // Attach event listeners (use capture phase to intercept early)
1422
+ document.addEventListener('mouseover', mouseOverHandler, true);
1423
+ document.addEventListener('click', clickHandler, true);
1424
+ document.addEventListener('keydown', keyboardHandler, true);
1425
+
1426
+ // Set up auto-disable timeout
1427
+ if (autoDisableTimeout > 0) {
1428
+ timeoutId = setTimeout(() => {
1429
+ console.log("⏰ [Sentience] Recording Mode auto-disabled after timeout.");
1430
+ stopRecording();
1431
+ }, autoDisableTimeout);
1432
+ }
1433
+
1434
+ // Store stop function globally for keyboard shortcut access
1435
+ window.sentience_stopRecording = stopRecording;
1436
+
1437
+ return stopRecording;
1438
+ }
1439
+ };
1440
+
1441
+ /**
1442
+ * Show overlay highlighting specific elements with Shadow DOM
1443
+ * @param {Array} elements - List of elements with bbox, importance, visual_cues
1444
+ * @param {number} targetElementId - Optional ID of target element (shown in red)
1445
+ */
1446
+ window.sentience.showOverlay = function(elements, targetElementId = null) {
1447
+ if (!elements || !Array.isArray(elements)) {
1448
+ console.warn('[Sentience] showOverlay: elements must be an array');
1449
+ return;
1450
+ }
1451
+
1452
+ window.postMessage({
1453
+ type: 'SENTIENCE_SHOW_OVERLAY',
1454
+ elements: elements,
1455
+ targetElementId: targetElementId,
1456
+ timestamp: Date.now()
1457
+ }, '*');
1458
+
1459
+ console.log(`[Sentience] Overlay requested for ${elements.length} elements`);
1460
+ };
1461
+
1462
+ /**
1463
+ * Clear overlay manually
1464
+ */
1465
+ window.sentience.clearOverlay = function() {
1466
+ window.postMessage({
1467
+ type: 'SENTIENCE_CLEAR_OVERLAY'
1468
+ }, '*');
1469
+ console.log('[Sentience] Overlay cleared');
1470
+ };
1471
+
1472
+ console.log('[SentienceAPI] ✓ Ready! (CSP-Resistant - WASM runs in background)');
1473
+ })();