sentienceapi 0.90.6 → 0.90.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +81 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -1
- package/dist/index.js.map +1 -1
- package/dist/textSearch.d.ts +64 -0
- package/dist/textSearch.d.ts.map +1 -0
- package/dist/textSearch.js +109 -0
- package/dist/textSearch.js.map +1 -0
- package/dist/tracing/cloud-sink.d.ts +4 -0
- package/dist/tracing/cloud-sink.d.ts.map +1 -1
- package/dist/tracing/cloud-sink.js +15 -0
- package/dist/tracing/cloud-sink.js.map +1 -1
- package/dist/tracing/index-schema.d.ts +182 -0
- package/dist/tracing/index-schema.d.ts.map +1 -0
- package/dist/tracing/index-schema.js +150 -0
- package/dist/tracing/index-schema.js.map +1 -0
- package/dist/tracing/indexer.d.ts +17 -0
- package/dist/tracing/indexer.d.ts.map +1 -0
- package/dist/tracing/indexer.js +282 -0
- package/dist/tracing/indexer.js.map +1 -0
- package/dist/tracing/jsonl-sink.d.ts +4 -0
- package/dist/tracing/jsonl-sink.d.ts.map +1 -1
- package/dist/tracing/jsonl-sink.js +15 -0
- package/dist/tracing/jsonl-sink.js.map +1 -1
- package/dist/tracing/tracer-factory.d.ts +11 -5
- package/dist/tracing/tracer-factory.d.ts.map +1 -1
- package/dist/tracing/tracer-factory.js +16 -7
- package/dist/tracing/tracer-factory.js.map +1 -1
- package/dist/types.d.ts +98 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +1 -1
- package/src/extension/background.js +222 -52
- package/src/extension/content.js +285 -9
- package/src/extension/injected_api.js +1224 -189
- package/src/extension/manifest.json +10 -4
- package/src/extension/pkg/README.md +163 -2
- package/src/extension/pkg/sentience_core.d.ts +9 -0
- package/src/extension/pkg/sentience_core.js +16 -0
- package/src/extension/pkg/sentience_core_bg.wasm +0 -0
- package/src/extension/pkg/sentience_core_bg.wasm.d.ts +1 -0
- package/src/extension/release.json +115 -0
- package/src/extension/test-content.js +4 -0
|
@@ -1,33 +1,56 @@
|
|
|
1
|
-
// injected_api.js - MAIN WORLD
|
|
1
|
+
// injected_api.js - MAIN WORLD (NO WASM! CSP-Resistant!)
|
|
2
|
+
// This script ONLY collects raw DOM data and sends it to background for processing
|
|
2
3
|
(async () => {
|
|
3
|
-
//
|
|
4
|
+
// console.log('[SentienceAPI] Initializing (CSP-Resistant Mode)...');
|
|
5
|
+
|
|
6
|
+
// Wait for Extension ID from content.js
|
|
4
7
|
const getExtensionId = () => document.documentElement.dataset.sentienceExtensionId;
|
|
5
8
|
let extId = getExtensionId();
|
|
6
|
-
|
|
7
|
-
// Safety poller for async loading race conditions
|
|
9
|
+
|
|
8
10
|
if (!extId) {
|
|
9
11
|
await new Promise(resolve => {
|
|
10
12
|
const check = setInterval(() => {
|
|
11
13
|
extId = getExtensionId();
|
|
12
14
|
if (extId) { clearInterval(check); resolve(); }
|
|
13
15
|
}, 50);
|
|
16
|
+
setTimeout(() => resolve(), 5000); // Max 5s wait
|
|
14
17
|
});
|
|
15
18
|
}
|
|
16
19
|
|
|
17
|
-
|
|
18
|
-
|
|
20
|
+
if (!extId) {
|
|
21
|
+
console.error('[SentienceAPI] Failed to get extension ID');
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// console.log('[SentienceAPI] Extension ID:', extId);
|
|
19
26
|
|
|
27
|
+
// Registry for click actions (still needed for click() function)
|
|
20
28
|
window.sentience_registry = [];
|
|
21
|
-
let wasmModule = null;
|
|
22
29
|
|
|
23
|
-
// --- HELPER: Deep Walker ---
|
|
30
|
+
// --- HELPER: Deep Walker with Native Filter ---
|
|
24
31
|
function getAllElements(root = document) {
|
|
25
32
|
const elements = [];
|
|
26
|
-
const
|
|
33
|
+
const filter = {
|
|
34
|
+
acceptNode: function(node) {
|
|
35
|
+
// Skip metadata and script/style tags
|
|
36
|
+
if (['SCRIPT', 'STYLE', 'NOSCRIPT', 'META', 'LINK', 'HEAD'].includes(node.tagName)) {
|
|
37
|
+
return NodeFilter.FILTER_REJECT;
|
|
38
|
+
}
|
|
39
|
+
// Skip deep SVG children
|
|
40
|
+
if (node.parentNode && node.parentNode.tagName === 'SVG' && node.tagName !== 'SVG') {
|
|
41
|
+
return NodeFilter.FILTER_REJECT;
|
|
42
|
+
}
|
|
43
|
+
return NodeFilter.FILTER_ACCEPT;
|
|
44
|
+
}
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const walker = document.createTreeWalker(root, NodeFilter.SHOW_ELEMENT, filter);
|
|
27
48
|
while(walker.nextNode()) {
|
|
28
49
|
const node = walker.currentNode;
|
|
29
|
-
|
|
30
|
-
|
|
50
|
+
if (node.isConnected) {
|
|
51
|
+
elements.push(node);
|
|
52
|
+
if (node.shadowRoot) elements.push(...getAllElements(node.shadowRoot));
|
|
53
|
+
}
|
|
31
54
|
}
|
|
32
55
|
return elements;
|
|
33
56
|
}
|
|
@@ -40,7 +63,163 @@
|
|
|
40
63
|
return (el.innerText || '').replace(/\s+/g, ' ').trim().substring(0, 100);
|
|
41
64
|
}
|
|
42
65
|
|
|
43
|
-
// --- HELPER:
|
|
66
|
+
// --- HELPER: Safe Class Name Extractor (Handles SVGAnimatedString) ---
|
|
67
|
+
function getClassName(el) {
|
|
68
|
+
if (!el || !el.className) return '';
|
|
69
|
+
|
|
70
|
+
// Handle string (HTML elements)
|
|
71
|
+
if (typeof el.className === 'string') return el.className;
|
|
72
|
+
|
|
73
|
+
// Handle SVGAnimatedString (SVG elements)
|
|
74
|
+
if (typeof el.className === 'object') {
|
|
75
|
+
if ('baseVal' in el.className && typeof el.className.baseVal === 'string') {
|
|
76
|
+
return el.className.baseVal;
|
|
77
|
+
}
|
|
78
|
+
if ('animVal' in el.className && typeof el.className.animVal === 'string') {
|
|
79
|
+
return el.className.animVal;
|
|
80
|
+
}
|
|
81
|
+
// Fallback: convert to string
|
|
82
|
+
try {
|
|
83
|
+
return String(el.className);
|
|
84
|
+
} catch (e) {
|
|
85
|
+
return '';
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return '';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// --- HELPER: Paranoid String Converter (Handles SVGAnimatedString) ---
|
|
93
|
+
function toSafeString(value) {
|
|
94
|
+
if (value === null || value === undefined) return null;
|
|
95
|
+
|
|
96
|
+
// 1. If it's already a primitive string, return it
|
|
97
|
+
if (typeof value === 'string') return value;
|
|
98
|
+
|
|
99
|
+
// 2. Handle SVG objects (SVGAnimatedString, SVGAnimatedNumber, etc.)
|
|
100
|
+
if (typeof value === 'object') {
|
|
101
|
+
// Try extracting baseVal (standard SVG property)
|
|
102
|
+
if ('baseVal' in value && typeof value.baseVal === 'string') {
|
|
103
|
+
return value.baseVal;
|
|
104
|
+
}
|
|
105
|
+
// Try animVal as fallback
|
|
106
|
+
if ('animVal' in value && typeof value.animVal === 'string') {
|
|
107
|
+
return value.animVal;
|
|
108
|
+
}
|
|
109
|
+
// Fallback: Force to string (prevents WASM crash even if data is less useful)
|
|
110
|
+
// This prevents the "Invalid Type" crash, even if the data is "[object SVGAnimatedString]"
|
|
111
|
+
try {
|
|
112
|
+
return String(value);
|
|
113
|
+
} catch (e) {
|
|
114
|
+
return null;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// 3. Last resort cast for primitives
|
|
119
|
+
try {
|
|
120
|
+
return String(value);
|
|
121
|
+
} catch (e) {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// --- HELPER: Get SVG Fill/Stroke Color ---
|
|
127
|
+
// For SVG elements, get the fill or stroke color (SVGs use fill/stroke, not backgroundColor)
|
|
128
|
+
function getSVGColor(el) {
|
|
129
|
+
if (!el || el.tagName !== 'SVG') return null;
|
|
130
|
+
|
|
131
|
+
const style = window.getComputedStyle(el);
|
|
132
|
+
|
|
133
|
+
// Try fill first (most common for SVG icons)
|
|
134
|
+
const fill = style.fill;
|
|
135
|
+
if (fill && fill !== 'none' && fill !== 'transparent' && fill !== 'rgba(0, 0, 0, 0)') {
|
|
136
|
+
// Convert fill to rgb() format if needed
|
|
137
|
+
const rgbaMatch = fill.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
|
|
138
|
+
if (rgbaMatch) {
|
|
139
|
+
const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
|
|
140
|
+
if (alpha >= 0.9) {
|
|
141
|
+
return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
|
|
142
|
+
}
|
|
143
|
+
} else if (fill.startsWith('rgb(')) {
|
|
144
|
+
return fill;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// Fallback to stroke if fill is not available
|
|
149
|
+
const stroke = style.stroke;
|
|
150
|
+
if (stroke && stroke !== 'none' && stroke !== 'transparent' && stroke !== 'rgba(0, 0, 0, 0)') {
|
|
151
|
+
const rgbaMatch = stroke.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
|
|
152
|
+
if (rgbaMatch) {
|
|
153
|
+
const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
|
|
154
|
+
if (alpha >= 0.9) {
|
|
155
|
+
return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
|
|
156
|
+
}
|
|
157
|
+
} else if (stroke.startsWith('rgb(')) {
|
|
158
|
+
return stroke;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return null;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// --- HELPER: Get Effective Background Color ---
|
|
166
|
+
// Traverses up the DOM tree to find the nearest non-transparent background color
|
|
167
|
+
// For SVGs, also checks fill/stroke properties
|
|
168
|
+
// This handles rgba(0,0,0,0) and transparent values that browsers commonly return
|
|
169
|
+
function getEffectiveBackgroundColor(el) {
|
|
170
|
+
if (!el) return null;
|
|
171
|
+
|
|
172
|
+
// For SVG elements, use fill/stroke instead of backgroundColor
|
|
173
|
+
if (el.tagName === 'SVG') {
|
|
174
|
+
const svgColor = getSVGColor(el);
|
|
175
|
+
if (svgColor) return svgColor;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let current = el;
|
|
179
|
+
const maxDepth = 10; // Prevent infinite loops
|
|
180
|
+
let depth = 0;
|
|
181
|
+
|
|
182
|
+
while (current && depth < maxDepth) {
|
|
183
|
+
const style = window.getComputedStyle(current);
|
|
184
|
+
|
|
185
|
+
// For SVG elements in the tree, also check fill/stroke
|
|
186
|
+
if (current.tagName === 'SVG') {
|
|
187
|
+
const svgColor = getSVGColor(current);
|
|
188
|
+
if (svgColor) return svgColor;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
const bgColor = style.backgroundColor;
|
|
192
|
+
|
|
193
|
+
if (bgColor && bgColor !== 'transparent' && bgColor !== 'rgba(0, 0, 0, 0)') {
|
|
194
|
+
// Check if it's rgba with alpha < 1 (semi-transparent)
|
|
195
|
+
const rgbaMatch = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*([\d.]+))?\)/);
|
|
196
|
+
if (rgbaMatch) {
|
|
197
|
+
const alpha = rgbaMatch[4] ? parseFloat(rgbaMatch[4]) : 1.0;
|
|
198
|
+
// If alpha is high enough (>= 0.9), consider it opaque enough
|
|
199
|
+
if (alpha >= 0.9) {
|
|
200
|
+
// Convert to rgb() format for Gateway compatibility
|
|
201
|
+
return `rgb(${rgbaMatch[1]}, ${rgbaMatch[2]}, ${rgbaMatch[3]})`;
|
|
202
|
+
}
|
|
203
|
+
// If semi-transparent, continue up the tree
|
|
204
|
+
} else if (bgColor.startsWith('rgb(')) {
|
|
205
|
+
// Already in rgb() format, use it
|
|
206
|
+
return bgColor;
|
|
207
|
+
} else {
|
|
208
|
+
// Named color or other format, return as-is
|
|
209
|
+
return bgColor;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Move up the DOM tree
|
|
214
|
+
current = current.parentElement;
|
|
215
|
+
depth++;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
// Fallback: return null if nothing found
|
|
219
|
+
return null;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// --- HELPER: Viewport Check ---
|
|
44
223
|
function isInViewport(rect) {
|
|
45
224
|
return (
|
|
46
225
|
rect.top < window.innerHeight && rect.bottom > 0 &&
|
|
@@ -48,19 +227,30 @@
|
|
|
48
227
|
);
|
|
49
228
|
}
|
|
50
229
|
|
|
51
|
-
// --- HELPER: Occlusion Check (
|
|
52
|
-
|
|
53
|
-
|
|
230
|
+
// --- HELPER: Occlusion Check (Optimized to avoid layout thrashing) ---
|
|
231
|
+
// Only checks occlusion for elements likely to be occluded (high z-index, positioned)
|
|
232
|
+
// This avoids forced reflow for most elements, dramatically improving performance
|
|
233
|
+
function isOccluded(el, rect, style) {
|
|
234
|
+
// Fast path: Skip occlusion check for most elements
|
|
235
|
+
// Only check for elements that are likely to be occluded (overlays, modals, tooltips)
|
|
236
|
+
const zIndex = parseInt(style.zIndex, 10);
|
|
237
|
+
const position = style.position;
|
|
238
|
+
|
|
239
|
+
// Skip occlusion check for normal flow elements (vast majority)
|
|
240
|
+
// Only check for positioned elements or high z-index (likely overlays)
|
|
241
|
+
if (position === 'static' && (isNaN(zIndex) || zIndex <= 10)) {
|
|
242
|
+
return false; // Assume not occluded for performance
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// For positioned/high z-index elements, do the expensive check
|
|
54
246
|
const cx = rect.x + rect.width / 2;
|
|
55
247
|
const cy = rect.y + rect.height / 2;
|
|
56
|
-
|
|
57
|
-
// If point is off-screen, elementFromPoint returns null, assume NOT occluded for safety
|
|
248
|
+
|
|
58
249
|
if (cx < 0 || cx > window.innerWidth || cy < 0 || cy > window.innerHeight) return false;
|
|
59
250
|
|
|
60
251
|
const topEl = document.elementFromPoint(cx, cy);
|
|
61
252
|
if (!topEl) return false;
|
|
62
|
-
|
|
63
|
-
// It's visible if the top element is us, or contains us, or we contain it
|
|
253
|
+
|
|
64
254
|
return !(el === topEl || el.contains(topEl) || topEl.contains(el));
|
|
65
255
|
}
|
|
66
256
|
|
|
@@ -76,45 +266,91 @@
|
|
|
76
266
|
};
|
|
77
267
|
window.addEventListener('message', listener);
|
|
78
268
|
window.postMessage({ type: 'SENTIENCE_SCREENSHOT_REQUEST', requestId, options }, '*');
|
|
269
|
+
setTimeout(() => {
|
|
270
|
+
window.removeEventListener('message', listener);
|
|
271
|
+
resolve(null);
|
|
272
|
+
}, 10000); // 10s timeout
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// --- HELPER: Snapshot Processing Bridge (NEW!) ---
|
|
277
|
+
function processSnapshotInBackground(rawData, options) {
|
|
278
|
+
return new Promise((resolve, reject) => {
|
|
279
|
+
const requestId = Math.random().toString(36).substring(7);
|
|
280
|
+
const TIMEOUT_MS = 25000; // 25 seconds (longer than content.js timeout)
|
|
281
|
+
let resolved = false;
|
|
282
|
+
|
|
283
|
+
const timeout = setTimeout(() => {
|
|
284
|
+
if (!resolved) {
|
|
285
|
+
resolved = true;
|
|
286
|
+
window.removeEventListener('message', listener);
|
|
287
|
+
reject(new Error('WASM processing timeout - extension may be unresponsive. Try reloading the extension.'));
|
|
288
|
+
}
|
|
289
|
+
}, TIMEOUT_MS);
|
|
290
|
+
|
|
291
|
+
const listener = (e) => {
|
|
292
|
+
if (e.data.type === 'SENTIENCE_SNAPSHOT_RESULT' && e.data.requestId === requestId) {
|
|
293
|
+
if (resolved) return; // Already handled
|
|
294
|
+
resolved = true;
|
|
295
|
+
clearTimeout(timeout);
|
|
296
|
+
window.removeEventListener('message', listener);
|
|
297
|
+
|
|
298
|
+
if (e.data.error) {
|
|
299
|
+
reject(new Error(e.data.error));
|
|
300
|
+
} else {
|
|
301
|
+
resolve({
|
|
302
|
+
elements: e.data.elements,
|
|
303
|
+
raw_elements: e.data.raw_elements,
|
|
304
|
+
duration: e.data.duration
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
window.addEventListener('message', listener);
|
|
311
|
+
|
|
312
|
+
try {
|
|
313
|
+
window.postMessage({
|
|
314
|
+
type: 'SENTIENCE_SNAPSHOT_REQUEST',
|
|
315
|
+
requestId,
|
|
316
|
+
rawData,
|
|
317
|
+
options
|
|
318
|
+
}, '*');
|
|
319
|
+
} catch (error) {
|
|
320
|
+
if (!resolved) {
|
|
321
|
+
resolved = true;
|
|
322
|
+
clearTimeout(timeout);
|
|
323
|
+
window.removeEventListener('message', listener);
|
|
324
|
+
reject(new Error(`Failed to send snapshot request: ${error.message}`));
|
|
325
|
+
}
|
|
326
|
+
}
|
|
79
327
|
});
|
|
80
328
|
}
|
|
81
329
|
|
|
82
|
-
// --- HELPER:
|
|
83
|
-
// Returns cleaned HTML that can be processed by Turndown or other Node.js libraries
|
|
330
|
+
// --- HELPER: Raw HTML Extractor (unchanged) ---
|
|
84
331
|
function getRawHTML(root) {
|
|
85
332
|
const sourceRoot = root || document.body;
|
|
86
333
|
const clone = sourceRoot.cloneNode(true);
|
|
87
|
-
|
|
88
|
-
// Remove unwanted elements by tag name (simple and reliable)
|
|
334
|
+
|
|
89
335
|
const unwantedTags = ['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'];
|
|
90
336
|
unwantedTags.forEach(tag => {
|
|
91
337
|
const elements = clone.querySelectorAll(tag);
|
|
92
338
|
elements.forEach(el => {
|
|
93
|
-
if (el.parentNode)
|
|
94
|
-
el.parentNode.removeChild(el);
|
|
95
|
-
}
|
|
339
|
+
if (el.parentNode) el.parentNode.removeChild(el);
|
|
96
340
|
});
|
|
97
341
|
});
|
|
98
342
|
|
|
99
|
-
// Remove invisible elements
|
|
100
|
-
// We'll use a simple approach: mark elements in original, then remove from clone
|
|
343
|
+
// Remove invisible elements
|
|
101
344
|
const invisibleSelectors = [];
|
|
102
|
-
const walker = document.createTreeWalker(
|
|
103
|
-
sourceRoot,
|
|
104
|
-
NodeFilter.SHOW_ELEMENT,
|
|
105
|
-
null,
|
|
106
|
-
false
|
|
107
|
-
);
|
|
108
|
-
|
|
345
|
+
const walker = document.createTreeWalker(sourceRoot, NodeFilter.SHOW_ELEMENT, null, false);
|
|
109
346
|
let node;
|
|
110
347
|
while (node = walker.nextNode()) {
|
|
111
348
|
const tag = node.tagName.toLowerCase();
|
|
112
349
|
if (tag === 'head' || tag === 'title') continue;
|
|
113
|
-
|
|
350
|
+
|
|
114
351
|
const style = window.getComputedStyle(node);
|
|
115
352
|
if (style.display === 'none' || style.visibility === 'hidden' ||
|
|
116
353
|
(node.offsetWidth === 0 && node.offsetHeight === 0)) {
|
|
117
|
-
// Build a selector for this element
|
|
118
354
|
let selector = tag;
|
|
119
355
|
if (node.id) {
|
|
120
356
|
selector = `#${node.id}`;
|
|
@@ -128,30 +364,25 @@
|
|
|
128
364
|
}
|
|
129
365
|
}
|
|
130
366
|
|
|
131
|
-
// Remove invisible elements from clone (if we can find them)
|
|
132
367
|
invisibleSelectors.forEach(selector => {
|
|
133
368
|
try {
|
|
134
369
|
const elements = clone.querySelectorAll(selector);
|
|
135
370
|
elements.forEach(el => {
|
|
136
|
-
if (el.parentNode)
|
|
137
|
-
el.parentNode.removeChild(el);
|
|
138
|
-
}
|
|
371
|
+
if (el.parentNode) el.parentNode.removeChild(el);
|
|
139
372
|
});
|
|
140
373
|
} catch (e) {
|
|
141
374
|
// Invalid selector, skip
|
|
142
375
|
}
|
|
143
376
|
});
|
|
144
377
|
|
|
145
|
-
// Resolve relative URLs
|
|
378
|
+
// Resolve relative URLs
|
|
146
379
|
const links = clone.querySelectorAll('a[href]');
|
|
147
380
|
links.forEach(link => {
|
|
148
381
|
const href = link.getAttribute('href');
|
|
149
382
|
if (href && !href.startsWith('http://') && !href.startsWith('https://') && !href.startsWith('#')) {
|
|
150
383
|
try {
|
|
151
384
|
link.setAttribute('href', new URL(href, document.baseURI).href);
|
|
152
|
-
} catch (e) {
|
|
153
|
-
// Keep original href if URL parsing fails
|
|
154
|
-
}
|
|
385
|
+
} catch (e) {}
|
|
155
386
|
}
|
|
156
387
|
});
|
|
157
388
|
|
|
@@ -161,32 +392,24 @@
|
|
|
161
392
|
if (src && !src.startsWith('http://') && !src.startsWith('https://') && !src.startsWith('data:')) {
|
|
162
393
|
try {
|
|
163
394
|
img.setAttribute('src', new URL(src, document.baseURI).href);
|
|
164
|
-
} catch (e) {
|
|
165
|
-
// Keep original src if URL parsing fails
|
|
166
|
-
}
|
|
395
|
+
} catch (e) {}
|
|
167
396
|
}
|
|
168
397
|
});
|
|
169
398
|
|
|
170
399
|
return clone.innerHTML;
|
|
171
400
|
}
|
|
172
401
|
|
|
173
|
-
// --- HELPER:
|
|
174
|
-
// Uses getRawHTML() and then converts to markdown for consistency
|
|
402
|
+
// --- HELPER: Markdown Converter (unchanged) ---
|
|
175
403
|
function convertToMarkdown(root) {
|
|
176
|
-
// Get cleaned HTML first
|
|
177
404
|
const rawHTML = getRawHTML(root);
|
|
178
|
-
|
|
179
|
-
// Create a temporary container to parse the HTML
|
|
180
405
|
const tempDiv = document.createElement('div');
|
|
181
406
|
tempDiv.innerHTML = rawHTML;
|
|
182
|
-
|
|
407
|
+
|
|
183
408
|
let markdown = '';
|
|
184
|
-
let insideLink = false;
|
|
409
|
+
let insideLink = false;
|
|
185
410
|
|
|
186
411
|
function walk(node) {
|
|
187
412
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
188
|
-
// Keep minimal whitespace to prevent words merging
|
|
189
|
-
// Strip newlines inside text nodes to prevent broken links
|
|
190
413
|
const text = node.textContent.replace(/[\r\n]+/g, ' ').replace(/\s+/g, ' ');
|
|
191
414
|
if (text.trim()) markdown += text;
|
|
192
415
|
return;
|
|
@@ -201,13 +424,12 @@
|
|
|
201
424
|
if (tag === 'h2') markdown += '\n## ';
|
|
202
425
|
if (tag === 'h3') markdown += '\n### ';
|
|
203
426
|
if (tag === 'li') markdown += '\n- ';
|
|
204
|
-
// IMPORTANT: Don't add newlines for block elements when inside a link
|
|
205
427
|
if (!insideLink && (tag === 'p' || tag === 'div' || tag === 'br')) markdown += '\n';
|
|
206
428
|
if (tag === 'strong' || tag === 'b') markdown += '**';
|
|
207
429
|
if (tag === 'em' || tag === 'i') markdown += '_';
|
|
208
430
|
if (tag === 'a') {
|
|
209
431
|
markdown += '[';
|
|
210
|
-
insideLink = true;
|
|
432
|
+
insideLink = true;
|
|
211
433
|
}
|
|
212
434
|
|
|
213
435
|
// Children
|
|
@@ -219,25 +441,21 @@
|
|
|
219
441
|
|
|
220
442
|
// Suffix
|
|
221
443
|
if (tag === 'a') {
|
|
222
|
-
// Get absolute URL from href attribute (already resolved in getRawHTML)
|
|
223
444
|
const href = node.getAttribute('href');
|
|
224
445
|
if (href) markdown += `](${href})`;
|
|
225
446
|
else markdown += ']';
|
|
226
|
-
insideLink = false;
|
|
447
|
+
insideLink = false;
|
|
227
448
|
}
|
|
228
449
|
if (tag === 'strong' || tag === 'b') markdown += '**';
|
|
229
450
|
if (tag === 'em' || tag === 'i') markdown += '_';
|
|
230
|
-
// IMPORTANT: Don't add newlines for block elements when inside a link (suffix section too)
|
|
231
451
|
if (!insideLink && (tag === 'h1' || tag === 'h2' || tag === 'h3' || tag === 'p' || tag === 'div')) markdown += '\n';
|
|
232
452
|
}
|
|
233
453
|
|
|
234
454
|
walk(tempDiv);
|
|
235
|
-
|
|
236
|
-
// Cleanup: remove excessive newlines
|
|
237
455
|
return markdown.replace(/\n{3,}/g, '\n\n').trim();
|
|
238
456
|
}
|
|
239
457
|
|
|
240
|
-
// --- HELPER:
|
|
458
|
+
// --- HELPER: Text Extractor (unchanged) ---
|
|
241
459
|
function convertToText(root) {
|
|
242
460
|
let text = '';
|
|
243
461
|
function walk(node) {
|
|
@@ -247,22 +465,20 @@
|
|
|
247
465
|
}
|
|
248
466
|
if (node.nodeType === Node.ELEMENT_NODE) {
|
|
249
467
|
const tag = node.tagName.toLowerCase();
|
|
250
|
-
// Skip nav/footer/header/script/style/noscript/iframe/svg
|
|
251
468
|
if (['nav', 'footer', 'header', 'script', 'style', 'noscript', 'iframe', 'svg'].includes(tag)) return;
|
|
252
469
|
|
|
253
470
|
const style = window.getComputedStyle(node);
|
|
254
471
|
if (style.display === 'none' || style.visibility === 'hidden') return;
|
|
255
|
-
|
|
256
|
-
// Block level elements get a newline
|
|
472
|
+
|
|
257
473
|
const isBlock = style.display === 'block' || style.display === 'flex' || node.tagName === 'P' || node.tagName === 'DIV';
|
|
258
474
|
if (isBlock) text += ' ';
|
|
259
|
-
|
|
475
|
+
|
|
260
476
|
if (node.shadowRoot) {
|
|
261
477
|
Array.from(node.shadowRoot.childNodes).forEach(walk);
|
|
262
478
|
} else {
|
|
263
479
|
node.childNodes.forEach(walk);
|
|
264
480
|
}
|
|
265
|
-
|
|
481
|
+
|
|
266
482
|
if (isBlock) text += '\n';
|
|
267
483
|
}
|
|
268
484
|
}
|
|
@@ -270,155 +486,597 @@
|
|
|
270
486
|
return text.replace(/\n{3,}/g, '\n\n').trim();
|
|
271
487
|
}
|
|
272
488
|
|
|
273
|
-
//
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
489
|
+
// --- HELPER: Clean null/undefined fields ---
|
|
490
|
+
function cleanElement(obj) {
|
|
491
|
+
if (Array.isArray(obj)) {
|
|
492
|
+
return obj.map(cleanElement);
|
|
493
|
+
}
|
|
494
|
+
if (obj !== null && typeof obj === 'object') {
|
|
495
|
+
const cleaned = {};
|
|
496
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
497
|
+
if (value !== null && value !== undefined) {
|
|
498
|
+
if (typeof value === 'object') {
|
|
499
|
+
const deepClean = cleanElement(value);
|
|
500
|
+
if (Object.keys(deepClean).length > 0) {
|
|
501
|
+
cleaned[key] = deepClean;
|
|
502
|
+
}
|
|
503
|
+
} else {
|
|
504
|
+
cleaned[key] = value;
|
|
505
|
+
}
|
|
282
506
|
}
|
|
283
507
|
}
|
|
284
|
-
|
|
285
|
-
await module.default(undefined, imports);
|
|
286
|
-
wasmModule = module;
|
|
287
|
-
|
|
288
|
-
// Verify functions are available
|
|
289
|
-
if (!wasmModule.analyze_page) {
|
|
290
|
-
console.error('[SentienceAPI.com] WASM functions not available');
|
|
291
|
-
} else {
|
|
292
|
-
console.log('[SentienceAPI.com] ✓ API Ready!');
|
|
293
|
-
console.log('[SentienceAPI.com] Available functions:', Object.keys(wasmModule).filter(k => k.startsWith('analyze')));
|
|
508
|
+
return cleaned;
|
|
294
509
|
}
|
|
295
|
-
|
|
296
|
-
console.error('[SentienceAPI.com] WASM Load Failed:', e);
|
|
510
|
+
return obj;
|
|
297
511
|
}
|
|
298
512
|
|
|
299
|
-
//
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
513
|
+
// --- HELPER: Extract Raw Element Data (for Golden Set) ---
|
|
514
|
+
function extractRawElementData(el) {
|
|
515
|
+
const style = window.getComputedStyle(el);
|
|
516
|
+
const rect = el.getBoundingClientRect();
|
|
517
|
+
|
|
518
|
+
return {
|
|
519
|
+
tag: el.tagName,
|
|
520
|
+
rect: {
|
|
521
|
+
x: Math.round(rect.x),
|
|
522
|
+
y: Math.round(rect.y),
|
|
523
|
+
width: Math.round(rect.width),
|
|
524
|
+
height: Math.round(rect.height)
|
|
525
|
+
},
|
|
526
|
+
styles: {
|
|
527
|
+
cursor: style.cursor || null,
|
|
528
|
+
backgroundColor: style.backgroundColor || null,
|
|
529
|
+
color: style.color || null,
|
|
530
|
+
fontWeight: style.fontWeight || null,
|
|
531
|
+
fontSize: style.fontSize || null,
|
|
532
|
+
display: style.display || null,
|
|
533
|
+
position: style.position || null,
|
|
534
|
+
zIndex: style.zIndex || null,
|
|
535
|
+
opacity: style.opacity || null,
|
|
536
|
+
visibility: style.visibility || null
|
|
537
|
+
},
|
|
538
|
+
attributes: {
|
|
539
|
+
role: el.getAttribute('role') || null,
|
|
540
|
+
type: el.getAttribute('type') || null,
|
|
541
|
+
ariaLabel: el.getAttribute('aria-label') || null,
|
|
542
|
+
id: el.id || null,
|
|
543
|
+
className: el.className || null
|
|
544
|
+
}
|
|
545
|
+
};
|
|
546
|
+
}
|
|
306
547
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
548
|
+
// --- HELPER: Generate Unique CSS Selector (for Golden Set) ---
|
|
549
|
+
function getUniqueSelector(el) {
|
|
550
|
+
if (!el || !el.tagName) return '';
|
|
551
|
+
|
|
552
|
+
// If element has a unique ID, use it
|
|
553
|
+
if (el.id) {
|
|
554
|
+
return `#${el.id}`;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// Try data attributes or aria-label for uniqueness
|
|
558
|
+
for (const attr of el.attributes) {
|
|
559
|
+
if (attr.name.startsWith('data-') || attr.name === 'aria-label') {
|
|
560
|
+
const value = attr.value ? attr.value.replace(/"/g, '\\"') : '';
|
|
561
|
+
return `${el.tagName.toLowerCase()}[${attr.name}="${value}"]`;
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
// Build path with classes and nth-child for uniqueness
|
|
566
|
+
const path = [];
|
|
567
|
+
let current = el;
|
|
568
|
+
|
|
569
|
+
while (current && current !== document.body && current !== document.documentElement) {
|
|
570
|
+
let selector = current.tagName.toLowerCase();
|
|
310
571
|
|
|
311
|
-
|
|
572
|
+
// If current element has ID, use it and stop
|
|
573
|
+
if (current.id) {
|
|
574
|
+
selector = `#${current.id}`;
|
|
575
|
+
path.unshift(selector);
|
|
576
|
+
break;
|
|
577
|
+
}
|
|
312
578
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
const
|
|
316
|
-
if (
|
|
579
|
+
// Add class if available
|
|
580
|
+
if (current.className && typeof current.className === 'string') {
|
|
581
|
+
const classes = current.className.trim().split(/\s+/).filter(c => c);
|
|
582
|
+
if (classes.length > 0) {
|
|
583
|
+
// Use first class for simplicity
|
|
584
|
+
selector += `.${classes[0]}`;
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
// Add nth-of-type if needed for uniqueness
|
|
589
|
+
if (current.parentElement) {
|
|
590
|
+
const siblings = Array.from(current.parentElement.children);
|
|
591
|
+
const sameTagSiblings = siblings.filter(s => s.tagName === current.tagName);
|
|
592
|
+
const index = sameTagSiblings.indexOf(current);
|
|
593
|
+
if (index > 0 || sameTagSiblings.length > 1) {
|
|
594
|
+
selector += `:nth-of-type(${index + 1})`;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
path.unshift(selector);
|
|
599
|
+
current = current.parentElement;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return path.join(' > ') || el.tagName.toLowerCase();
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
// --- HELPER: Wait for DOM Stability (SPA Hydration) ---
|
|
606
|
+
// Waits for the DOM to stabilize before taking a snapshot
|
|
607
|
+
// Useful for React/Vue apps that render empty skeletons before hydration
|
|
608
|
+
async function waitForStability(options = {}) {
|
|
609
|
+
const {
|
|
610
|
+
minNodeCount = 500,
|
|
611
|
+
quietPeriod = 200, // milliseconds
|
|
612
|
+
maxWait = 5000 // maximum wait time
|
|
613
|
+
} = options;
|
|
317
614
|
|
|
318
|
-
|
|
615
|
+
const startTime = Date.now();
|
|
616
|
+
|
|
617
|
+
return new Promise((resolve) => {
|
|
618
|
+
// Check if DOM already has enough nodes
|
|
619
|
+
const nodeCount = document.querySelectorAll('*').length;
|
|
620
|
+
if (nodeCount >= minNodeCount) {
|
|
621
|
+
// DOM seems ready, but wait for quiet period to ensure stability
|
|
622
|
+
let lastChange = Date.now();
|
|
623
|
+
const observer = new MutationObserver(() => {
|
|
624
|
+
lastChange = Date.now();
|
|
625
|
+
});
|
|
319
626
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
const
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
627
|
+
observer.observe(document.body, {
|
|
628
|
+
childList: true,
|
|
629
|
+
subtree: true,
|
|
630
|
+
attributes: false
|
|
631
|
+
});
|
|
632
|
+
|
|
633
|
+
const checkStable = () => {
|
|
634
|
+
const timeSinceLastChange = Date.now() - lastChange;
|
|
635
|
+
const totalWait = Date.now() - startTime;
|
|
636
|
+
|
|
637
|
+
if (timeSinceLastChange >= quietPeriod) {
|
|
638
|
+
observer.disconnect();
|
|
639
|
+
resolve();
|
|
640
|
+
} else if (totalWait >= maxWait) {
|
|
641
|
+
observer.disconnect();
|
|
642
|
+
console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway');
|
|
643
|
+
resolve();
|
|
644
|
+
} else {
|
|
645
|
+
setTimeout(checkStable, 50);
|
|
646
|
+
}
|
|
647
|
+
};
|
|
648
|
+
|
|
649
|
+
checkStable();
|
|
650
|
+
} else {
|
|
651
|
+
// DOM doesn't have enough nodes yet, wait for them
|
|
652
|
+
const observer = new MutationObserver(() => {
|
|
653
|
+
const currentCount = document.querySelectorAll('*').length;
|
|
654
|
+
const totalWait = Date.now() - startTime;
|
|
655
|
+
|
|
656
|
+
if (currentCount >= minNodeCount) {
|
|
657
|
+
observer.disconnect();
|
|
658
|
+
// Now wait for quiet period
|
|
659
|
+
let lastChange = Date.now();
|
|
660
|
+
const quietObserver = new MutationObserver(() => {
|
|
661
|
+
lastChange = Date.now();
|
|
662
|
+
});
|
|
663
|
+
|
|
664
|
+
quietObserver.observe(document.body, {
|
|
665
|
+
childList: true,
|
|
666
|
+
subtree: true,
|
|
667
|
+
attributes: false
|
|
668
|
+
});
|
|
669
|
+
|
|
670
|
+
const checkQuiet = () => {
|
|
671
|
+
const timeSinceLastChange = Date.now() - lastChange;
|
|
672
|
+
const totalWait = Date.now() - startTime;
|
|
673
|
+
|
|
674
|
+
if (timeSinceLastChange >= quietPeriod) {
|
|
675
|
+
quietObserver.disconnect();
|
|
676
|
+
resolve();
|
|
677
|
+
} else if (totalWait >= maxWait) {
|
|
678
|
+
quietObserver.disconnect();
|
|
679
|
+
console.warn('[SentienceAPI] DOM stability timeout - proceeding anyway');
|
|
680
|
+
resolve();
|
|
681
|
+
} else {
|
|
682
|
+
setTimeout(checkQuiet, 50);
|
|
683
|
+
}
|
|
684
|
+
};
|
|
685
|
+
|
|
686
|
+
checkQuiet();
|
|
687
|
+
} else if (totalWait >= maxWait) {
|
|
688
|
+
observer.disconnect();
|
|
689
|
+
console.warn('[SentienceAPI] DOM node count timeout - proceeding anyway');
|
|
690
|
+
resolve();
|
|
691
|
+
}
|
|
353
692
|
});
|
|
693
|
+
|
|
694
|
+
observer.observe(document.body, {
|
|
695
|
+
childList: true,
|
|
696
|
+
subtree: true,
|
|
697
|
+
attributes: false
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
// Timeout fallback
|
|
701
|
+
setTimeout(() => {
|
|
702
|
+
observer.disconnect();
|
|
703
|
+
console.warn('[SentienceAPI] DOM stability max wait reached - proceeding');
|
|
704
|
+
resolve();
|
|
705
|
+
}, maxWait);
|
|
706
|
+
}
|
|
707
|
+
});
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
// --- HELPER: Collect Iframe Snapshots (Frame Stitching) ---
|
|
711
|
+
// Recursively collects snapshot data from all child iframes
|
|
712
|
+
// This enables detection of elements inside iframes (e.g., Stripe forms)
|
|
713
|
+
//
|
|
714
|
+
// NOTE: Cross-origin iframes cannot be accessed due to browser security (Same-Origin Policy).
|
|
715
|
+
// Only same-origin iframes will return snapshot data. Cross-origin iframes will be skipped
|
|
716
|
+
// with a warning. For cross-origin iframes, users must manually switch frames using
|
|
717
|
+
// Playwright's page.frame() API.
|
|
718
|
+
async function collectIframeSnapshots(options = {}) {
|
|
719
|
+
const iframeData = new Map(); // Map of iframe element -> snapshot data
|
|
720
|
+
|
|
721
|
+
// Find all iframe elements in current document
|
|
722
|
+
const iframes = Array.from(document.querySelectorAll('iframe'));
|
|
723
|
+
|
|
724
|
+
if (iframes.length === 0) {
|
|
725
|
+
return iframeData;
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
console.log(`[SentienceAPI] Found ${iframes.length} iframe(s), requesting snapshots...`);
|
|
729
|
+
// Request snapshot from each iframe
|
|
730
|
+
const iframePromises = iframes.map((iframe, idx) => {
|
|
731
|
+
// OPTIMIZATION: Skip common ad domains to save time
|
|
732
|
+
const src = iframe.src || '';
|
|
733
|
+
if (src.includes('doubleclick') || src.includes('googleadservices') || src.includes('ads system')) {
|
|
734
|
+
console.log(`[SentienceAPI] Skipping ad iframe: ${src.substring(0, 30)}...`);
|
|
735
|
+
return Promise.resolve(null);
|
|
736
|
+
}
|
|
737
|
+
|
|
738
|
+
return new Promise((resolve) => {
|
|
739
|
+
const requestId = `iframe-${idx}-${Date.now()}`;
|
|
740
|
+
|
|
741
|
+
// 1. EXTENDED TIMEOUT (Handle slow children)
|
|
742
|
+
const timeout = setTimeout(() => {
|
|
743
|
+
console.warn(`[SentienceAPI] ⚠️ Iframe ${idx} snapshot TIMEOUT (id: ${requestId})`);
|
|
744
|
+
resolve(null);
|
|
745
|
+
}, 5000); // Increased to 5s to handle slow processing
|
|
746
|
+
|
|
747
|
+
// 2. ROBUST LISTENER with debugging
|
|
748
|
+
const listener = (event) => {
|
|
749
|
+
// Debug: Log all SENTIENCE_IFRAME_SNAPSHOT_RESPONSE messages to see what's happening
|
|
750
|
+
if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE') {
|
|
751
|
+
// Only log if it's not our request (for debugging)
|
|
752
|
+
if (event.data?.requestId !== requestId) {
|
|
753
|
+
// console.log(`[SentienceAPI] Received response for different request: ${event.data.requestId} (expected: ${requestId})`);
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Check if this is the response we're waiting for
|
|
758
|
+
if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE' &&
|
|
759
|
+
event.data?.requestId === requestId) {
|
|
760
|
+
|
|
761
|
+
clearTimeout(timeout);
|
|
762
|
+
window.removeEventListener('message', listener);
|
|
763
|
+
|
|
764
|
+
if (event.data.error) {
|
|
765
|
+
console.warn(`[SentienceAPI] Iframe ${idx} returned error:`, event.data.error);
|
|
766
|
+
resolve(null);
|
|
767
|
+
} else {
|
|
768
|
+
const elementCount = event.data.snapshot?.raw_elements?.length || 0;
|
|
769
|
+
console.log(`[SentienceAPI] ✓ Received ${elementCount} elements from Iframe ${idx} (id: ${requestId})`);
|
|
770
|
+
resolve({
|
|
771
|
+
iframe: iframe,
|
|
772
|
+
data: event.data.snapshot,
|
|
773
|
+
error: null
|
|
774
|
+
});
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
};
|
|
778
|
+
|
|
779
|
+
window.addEventListener('message', listener);
|
|
780
|
+
|
|
781
|
+
// 3. SEND REQUEST with error handling
|
|
782
|
+
try {
|
|
783
|
+
if (iframe.contentWindow) {
|
|
784
|
+
// console.log(`[SentienceAPI] Sending request to Iframe ${idx} (id: ${requestId})`);
|
|
785
|
+
iframe.contentWindow.postMessage({
|
|
786
|
+
type: 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST',
|
|
787
|
+
requestId: requestId,
|
|
788
|
+
options: {
|
|
789
|
+
...options,
|
|
790
|
+
collectIframes: true // Enable recursion for nested iframes
|
|
791
|
+
}
|
|
792
|
+
}, '*'); // Use '*' for cross-origin, but browser will enforce same-origin policy
|
|
793
|
+
} else {
|
|
794
|
+
console.warn(`[SentienceAPI] Iframe ${idx} contentWindow is inaccessible (Cross-Origin?)`);
|
|
795
|
+
clearTimeout(timeout);
|
|
796
|
+
window.removeEventListener('message', listener);
|
|
797
|
+
resolve(null);
|
|
798
|
+
}
|
|
799
|
+
} catch (error) {
|
|
800
|
+
console.error(`[SentienceAPI] Failed to postMessage to Iframe ${idx}:`, error);
|
|
801
|
+
clearTimeout(timeout);
|
|
802
|
+
window.removeEventListener('message', listener);
|
|
803
|
+
resolve(null);
|
|
804
|
+
}
|
|
354
805
|
});
|
|
806
|
+
});
|
|
807
|
+
|
|
808
|
+
// Wait for all iframe responses
|
|
809
|
+
const results = await Promise.all(iframePromises);
|
|
810
|
+
|
|
811
|
+
// Store iframe data
|
|
812
|
+
results.forEach((result, idx) => {
|
|
813
|
+
if (result && result.data && !result.error) {
|
|
814
|
+
iframeData.set(iframes[idx], result.data);
|
|
815
|
+
console.log(`[SentienceAPI] ✓ Collected snapshot from iframe ${idx}`);
|
|
816
|
+
} else if (result && result.error) {
|
|
817
|
+
console.warn(`[SentienceAPI] Iframe ${idx} snapshot error:`, result.error);
|
|
818
|
+
} else if (!result) {
|
|
819
|
+
console.warn(`[SentienceAPI] Iframe ${idx} returned no data (timeout or error)`);
|
|
820
|
+
}
|
|
821
|
+
});
|
|
822
|
+
|
|
823
|
+
return iframeData;
|
|
824
|
+
}
|
|
355
825
|
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
}
|
|
365
|
-
|
|
826
|
+
// --- HELPER: Handle Iframe Snapshot Request (for child frames) ---
|
|
827
|
+
// When a parent frame requests snapshot, this handler responds with local snapshot
|
|
828
|
+
// NOTE: Recursion is safe because querySelectorAll('iframe') only finds direct children.
|
|
829
|
+
// Iframe A can ask Iframe B, but won't go back up to parent (no circular dependency risk).
|
|
830
|
+
function setupIframeSnapshotHandler() {
|
|
831
|
+
window.addEventListener('message', async (event) => {
|
|
832
|
+
// Security: only respond to snapshot requests from parent frames
|
|
833
|
+
if (event.data?.type === 'SENTIENCE_IFRAME_SNAPSHOT_REQUEST') {
|
|
834
|
+
const { requestId, options } = event.data;
|
|
835
|
+
|
|
836
|
+
try {
|
|
837
|
+
// Generate snapshot for this iframe's content
|
|
838
|
+
// Allow recursive collection - querySelectorAll('iframe') only finds direct children,
|
|
839
|
+
// so Iframe A will ask Iframe B, but won't go back up to parent (safe recursion)
|
|
840
|
+
// waitForStability: false makes performance better - i.e. don't wait for children frames
|
|
841
|
+
const snapshotOptions = { ...options, collectIframes: true, waitForStability: options.waitForStability === false ? false : false };
|
|
842
|
+
const snapshot = await window.sentience.snapshot(snapshotOptions);
|
|
843
|
+
|
|
844
|
+
// Send response back to parent
|
|
845
|
+
if (event.source && event.source.postMessage) {
|
|
846
|
+
event.source.postMessage({
|
|
847
|
+
type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE',
|
|
848
|
+
requestId: requestId,
|
|
849
|
+
snapshot: snapshot,
|
|
850
|
+
error: null
|
|
851
|
+
}, '*');
|
|
852
|
+
}
|
|
853
|
+
} catch (error) {
|
|
854
|
+
// Send error response
|
|
855
|
+
if (event.source && event.source.postMessage) {
|
|
856
|
+
event.source.postMessage({
|
|
857
|
+
type: 'SENTIENCE_IFRAME_SNAPSHOT_RESPONSE',
|
|
858
|
+
requestId: requestId,
|
|
859
|
+
snapshot: null,
|
|
860
|
+
error: error.message
|
|
861
|
+
}, '*');
|
|
862
|
+
}
|
|
366
863
|
}
|
|
367
|
-
} catch (e) {
|
|
368
|
-
return { status: "error", error: e.message };
|
|
369
864
|
}
|
|
865
|
+
});
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
// Setup iframe handler when script loads (only once)
|
|
869
|
+
if (!window.sentience_iframe_handler_setup) {
|
|
870
|
+
setupIframeSnapshotHandler();
|
|
871
|
+
window.sentience_iframe_handler_setup = true;
|
|
872
|
+
}
|
|
370
873
|
|
|
371
|
-
|
|
874
|
+
// --- GLOBAL API ---
|
|
875
|
+
window.sentience = {
|
|
876
|
+
// 1. Geometry snapshot (NEW ARCHITECTURE - No WASM in Main World!)
|
|
877
|
+
snapshot: async (options = {}) => {
|
|
878
|
+
try {
|
|
879
|
+
// Step 0: Wait for DOM stability if requested (for SPA hydration)
|
|
880
|
+
if (options.waitForStability !== false) {
|
|
881
|
+
await waitForStability(options.waitForStability || {});
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
// Step 1: Collect raw DOM data (Main World - CSP can't block this!)
|
|
885
|
+
const rawData = [];
|
|
886
|
+
window.sentience_registry = [];
|
|
372
887
|
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
888
|
+
const nodes = getAllElements();
|
|
889
|
+
|
|
890
|
+
nodes.forEach((el, idx) => {
|
|
891
|
+
if (!el.getBoundingClientRect) return;
|
|
892
|
+
const rect = el.getBoundingClientRect();
|
|
893
|
+
if (rect.width < 5 || rect.height < 5) return;
|
|
894
|
+
|
|
895
|
+
window.sentience_registry[idx] = el;
|
|
896
|
+
|
|
897
|
+
const textVal = getText(el);
|
|
898
|
+
const inView = isInViewport(rect);
|
|
899
|
+
|
|
900
|
+
// Get computed style once (needed for both occlusion check and data collection)
|
|
901
|
+
const style = window.getComputedStyle(el);
|
|
902
|
+
|
|
903
|
+
// Only check occlusion for elements likely to be occluded (optimized)
|
|
904
|
+
// This avoids layout thrashing for the vast majority of elements
|
|
905
|
+
const occluded = inView ? isOccluded(el, rect, style) : false;
|
|
906
|
+
|
|
907
|
+
// Get effective background color (traverses DOM to find non-transparent color)
|
|
908
|
+
const effectiveBgColor = getEffectiveBackgroundColor(el);
|
|
909
|
+
|
|
910
|
+
rawData.push({
|
|
911
|
+
id: idx,
|
|
912
|
+
tag: el.tagName.toLowerCase(),
|
|
913
|
+
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
914
|
+
styles: {
|
|
915
|
+
display: toSafeString(style.display),
|
|
916
|
+
visibility: toSafeString(style.visibility),
|
|
917
|
+
opacity: toSafeString(style.opacity),
|
|
918
|
+
z_index: toSafeString(style.zIndex || "auto"),
|
|
919
|
+
position: toSafeString(style.position),
|
|
920
|
+
bg_color: toSafeString(effectiveBgColor || style.backgroundColor),
|
|
921
|
+
color: toSafeString(style.color),
|
|
922
|
+
cursor: toSafeString(style.cursor),
|
|
923
|
+
font_weight: toSafeString(style.fontWeight),
|
|
924
|
+
font_size: toSafeString(style.fontSize)
|
|
925
|
+
},
|
|
926
|
+
attributes: {
|
|
927
|
+
role: toSafeString(el.getAttribute('role')),
|
|
928
|
+
type_: toSafeString(el.getAttribute('type')),
|
|
929
|
+
aria_label: toSafeString(el.getAttribute('aria-label')),
|
|
930
|
+
href: toSafeString(el.href || el.getAttribute('href') || null),
|
|
931
|
+
class: toSafeString(getClassName(el)),
|
|
932
|
+
// Capture dynamic input state (not just initial attributes)
|
|
933
|
+
value: el.value !== undefined ? toSafeString(el.value) : toSafeString(el.getAttribute('value')),
|
|
934
|
+
checked: el.checked !== undefined ? String(el.checked) : null
|
|
935
|
+
},
|
|
936
|
+
text: toSafeString(textVal),
|
|
937
|
+
in_viewport: inView,
|
|
938
|
+
is_occluded: occluded
|
|
939
|
+
});
|
|
940
|
+
});
|
|
941
|
+
|
|
942
|
+
console.log(`[SentienceAPI] Collected ${rawData.length} elements from main frame`);
|
|
378
943
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
944
|
+
// Step 1.5: Collect iframe snapshots and FLATTEN immediately
|
|
945
|
+
// "Flatten Early" architecture: Merge iframe elements into main array before WASM
|
|
946
|
+
// This allows WASM to process all elements uniformly (no recursion needed)
|
|
947
|
+
let allRawElements = [...rawData]; // Start with main frame elements
|
|
948
|
+
let totalIframeElements = 0;
|
|
949
|
+
|
|
950
|
+
if (options.collectIframes !== false) {
|
|
951
|
+
try {
|
|
952
|
+
console.log(`[SentienceAPI] Starting iframe collection...`);
|
|
953
|
+
const iframeSnapshots = await collectIframeSnapshots(options);
|
|
954
|
+
console.log(`[SentienceAPI] Iframe collection complete. Received ${iframeSnapshots.size} snapshot(s)`);
|
|
955
|
+
|
|
956
|
+
if (iframeSnapshots.size > 0) {
|
|
957
|
+
// FLATTEN IMMEDIATELY: Don't nest them. Just append them with coordinate translation.
|
|
958
|
+
iframeSnapshots.forEach((iframeSnapshot, iframeEl) => {
|
|
959
|
+
// Debug: Log structure to verify data is correct
|
|
960
|
+
// console.log(`[SentienceAPI] Processing iframe snapshot:`, iframeSnapshot);
|
|
961
|
+
|
|
962
|
+
if (iframeSnapshot && iframeSnapshot.raw_elements) {
|
|
963
|
+
const rawElementsCount = iframeSnapshot.raw_elements.length;
|
|
964
|
+
console.log(`[SentienceAPI] Processing ${rawElementsCount} elements from iframe (src: ${iframeEl.src || 'unknown'})`);
|
|
965
|
+
// Get iframe's bounding rect (offset for coordinate translation)
|
|
966
|
+
const iframeRect = iframeEl.getBoundingClientRect();
|
|
967
|
+
const offset = { x: iframeRect.x, y: iframeRect.y };
|
|
968
|
+
|
|
969
|
+
// Get iframe context for frame switching (Playwright needs this)
|
|
970
|
+
const iframeSrc = iframeEl.src || iframeEl.getAttribute('src') || '';
|
|
971
|
+
let isSameOrigin = false;
|
|
972
|
+
try {
|
|
973
|
+
// Try to access contentWindow to check if same-origin
|
|
974
|
+
isSameOrigin = iframeEl.contentWindow !== null;
|
|
975
|
+
} catch (e) {
|
|
976
|
+
isSameOrigin = false;
|
|
977
|
+
}
|
|
978
|
+
|
|
979
|
+
// Adjust coordinates and add iframe context to each element
|
|
980
|
+
const adjustedElements = iframeSnapshot.raw_elements.map(el => {
|
|
981
|
+
const adjusted = { ...el };
|
|
982
|
+
|
|
983
|
+
// Adjust rect coordinates to parent viewport
|
|
984
|
+
if (adjusted.rect) {
|
|
985
|
+
adjusted.rect = {
|
|
986
|
+
...adjusted.rect,
|
|
987
|
+
x: adjusted.rect.x + offset.x,
|
|
988
|
+
y: adjusted.rect.y + offset.y
|
|
989
|
+
};
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// Add iframe context so agents can switch frames in Playwright
|
|
993
|
+
adjusted.iframe_context = {
|
|
994
|
+
src: iframeSrc,
|
|
995
|
+
is_same_origin: isSameOrigin
|
|
996
|
+
};
|
|
997
|
+
|
|
998
|
+
return adjusted;
|
|
999
|
+
});
|
|
1000
|
+
|
|
1001
|
+
// Append flattened iframe elements to main array
|
|
1002
|
+
allRawElements.push(...adjustedElements);
|
|
1003
|
+
totalIframeElements += adjustedElements.length;
|
|
1004
|
+
}
|
|
1005
|
+
});
|
|
1006
|
+
|
|
1007
|
+
// console.log(`[SentienceAPI] Merged ${iframeSnapshots.size} iframe(s). Total elements: ${allRawElements.length} (${rawData.length} main + ${totalIframeElements} iframe)`);
|
|
389
1008
|
}
|
|
1009
|
+
} catch (error) {
|
|
1010
|
+
console.warn('[SentienceAPI] Iframe collection failed:', error);
|
|
390
1011
|
}
|
|
391
|
-
return cleaned;
|
|
392
1012
|
}
|
|
393
|
-
return obj;
|
|
394
|
-
};
|
|
395
1013
|
|
|
396
|
-
|
|
1014
|
+
// Step 2: Send EVERYTHING to WASM (One giant flat list)
|
|
1015
|
+
// Now WASM prunes iframe elements and main elements in one pass!
|
|
1016
|
+
// No recursion needed - everything is already flat
|
|
1017
|
+
console.log(`[SentienceAPI] Sending ${allRawElements.length} total elements to WASM (${rawData.length} main + ${totalIframeElements} iframe)`);
|
|
1018
|
+
const processed = await processSnapshotInBackground(allRawElements, options);
|
|
1019
|
+
|
|
1020
|
+
if (!processed || !processed.elements) {
|
|
1021
|
+
throw new Error('WASM processing returned invalid result');
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
// Step 3: Capture screenshot if requested
|
|
1025
|
+
let screenshot = null;
|
|
1026
|
+
if (options.screenshot) {
|
|
1027
|
+
screenshot = await captureScreenshot(options.screenshot);
|
|
1028
|
+
}
|
|
397
1029
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
1030
|
+
// Step 4: Clean and return
|
|
1031
|
+
const cleanedElements = cleanElement(processed.elements);
|
|
1032
|
+
const cleanedRawElements = cleanElement(processed.raw_elements);
|
|
1033
|
+
|
|
1034
|
+
// FIXED: Removed undefined 'totalIframeRawElements'
|
|
1035
|
+
// FIXED: Logic updated for "Flatten Early" architecture.
|
|
1036
|
+
// processed.elements ALREADY contains the merged iframe elements,
|
|
1037
|
+
// so we simply use .length. No addition needed.
|
|
1038
|
+
|
|
1039
|
+
const totalCount = cleanedElements.length;
|
|
1040
|
+
const totalRaw = cleanedRawElements.length;
|
|
1041
|
+
const iframeCount = totalIframeElements || 0;
|
|
1042
|
+
|
|
1043
|
+
console.log(`[SentienceAPI] ✓ Complete: ${totalCount} Smart Elements, ${totalRaw} Raw Elements (includes ${iframeCount} from iframes) (WASM took ${processed.duration?.toFixed(1)}ms)`);
|
|
1044
|
+
|
|
1045
|
+
return {
|
|
1046
|
+
status: "success",
|
|
1047
|
+
url: window.location.href,
|
|
1048
|
+
viewport: {
|
|
1049
|
+
width: window.innerWidth,
|
|
1050
|
+
height: window.innerHeight
|
|
1051
|
+
},
|
|
1052
|
+
elements: cleanedElements,
|
|
1053
|
+
raw_elements: cleanedRawElements,
|
|
1054
|
+
screenshot: screenshot
|
|
1055
|
+
};
|
|
1056
|
+
} catch (error) {
|
|
1057
|
+
console.error('[SentienceAPI] snapshot() failed:', error);
|
|
1058
|
+
console.error('[SentienceAPI] Error stack:', error.stack);
|
|
1059
|
+
return {
|
|
1060
|
+
status: "error",
|
|
1061
|
+
error: error.message || 'Unknown error',
|
|
1062
|
+
stack: error.stack
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
405
1065
|
},
|
|
406
|
-
|
|
1066
|
+
|
|
1067
|
+
// 2. Read Content (unchanged)
|
|
407
1068
|
read: (options = {}) => {
|
|
408
|
-
const format = options.format || 'raw';
|
|
1069
|
+
const format = options.format || 'raw';
|
|
409
1070
|
let content;
|
|
410
|
-
|
|
1071
|
+
|
|
411
1072
|
if (format === 'raw') {
|
|
412
|
-
// Return raw HTML suitable for Turndown or other Node.js libraries
|
|
413
1073
|
content = getRawHTML(document.body);
|
|
414
1074
|
} else if (format === 'markdown') {
|
|
415
|
-
// Return lightweight markdown conversion
|
|
416
1075
|
content = convertToMarkdown(document.body);
|
|
417
1076
|
} else {
|
|
418
|
-
// Default to text
|
|
419
1077
|
content = convertToText(document.body);
|
|
420
1078
|
}
|
|
421
|
-
|
|
1079
|
+
|
|
422
1080
|
return {
|
|
423
1081
|
status: "success",
|
|
424
1082
|
url: window.location.href,
|
|
@@ -428,11 +1086,388 @@
|
|
|
428
1086
|
};
|
|
429
1087
|
},
|
|
430
1088
|
|
|
431
|
-
//
|
|
1089
|
+
// 2b. Find Text Rectangle - Get exact pixel coordinates of specific text
|
|
1090
|
+
findTextRect: (options = {}) => {
|
|
1091
|
+
const {
|
|
1092
|
+
text,
|
|
1093
|
+
containerElement = document.body,
|
|
1094
|
+
caseSensitive = false,
|
|
1095
|
+
wholeWord = false,
|
|
1096
|
+
maxResults = 10
|
|
1097
|
+
} = options;
|
|
1098
|
+
|
|
1099
|
+
if (!text || text.trim().length === 0) {
|
|
1100
|
+
return {
|
|
1101
|
+
status: "error",
|
|
1102
|
+
error: "Text parameter is required"
|
|
1103
|
+
};
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
const results = [];
|
|
1107
|
+
const searchText = caseSensitive ? text : text.toLowerCase();
|
|
1108
|
+
|
|
1109
|
+
// Helper function to find text in a single text node
|
|
1110
|
+
function findInTextNode(textNode) {
|
|
1111
|
+
const nodeText = textNode.nodeValue;
|
|
1112
|
+
const searchableText = caseSensitive ? nodeText : nodeText.toLowerCase();
|
|
1113
|
+
|
|
1114
|
+
let startIndex = 0;
|
|
1115
|
+
while (startIndex < nodeText.length && results.length < maxResults) {
|
|
1116
|
+
const foundIndex = searchableText.indexOf(searchText, startIndex);
|
|
1117
|
+
|
|
1118
|
+
if (foundIndex === -1) break;
|
|
1119
|
+
|
|
1120
|
+
// Check whole word matching if required
|
|
1121
|
+
if (wholeWord) {
|
|
1122
|
+
const before = foundIndex > 0 ? nodeText[foundIndex - 1] : ' ';
|
|
1123
|
+
const after = foundIndex + text.length < nodeText.length
|
|
1124
|
+
? nodeText[foundIndex + text.length]
|
|
1125
|
+
: ' ';
|
|
1126
|
+
|
|
1127
|
+
// Check if surrounded by word boundaries
|
|
1128
|
+
if (!/\s/.test(before) || !/\s/.test(after)) {
|
|
1129
|
+
startIndex = foundIndex + 1;
|
|
1130
|
+
continue;
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
try {
|
|
1135
|
+
// Create range for this occurrence
|
|
1136
|
+
const range = document.createRange();
|
|
1137
|
+
range.setStart(textNode, foundIndex);
|
|
1138
|
+
range.setEnd(textNode, foundIndex + text.length);
|
|
1139
|
+
|
|
1140
|
+
const rect = range.getBoundingClientRect();
|
|
1141
|
+
|
|
1142
|
+
// Only include visible rectangles
|
|
1143
|
+
if (rect.width > 0 && rect.height > 0) {
|
|
1144
|
+
results.push({
|
|
1145
|
+
text: nodeText.substring(foundIndex, foundIndex + text.length),
|
|
1146
|
+
rect: {
|
|
1147
|
+
x: rect.left + window.scrollX,
|
|
1148
|
+
y: rect.top + window.scrollY,
|
|
1149
|
+
width: rect.width,
|
|
1150
|
+
height: rect.height,
|
|
1151
|
+
left: rect.left + window.scrollX,
|
|
1152
|
+
top: rect.top + window.scrollY,
|
|
1153
|
+
right: rect.right + window.scrollX,
|
|
1154
|
+
bottom: rect.bottom + window.scrollY
|
|
1155
|
+
},
|
|
1156
|
+
viewport_rect: {
|
|
1157
|
+
x: rect.left,
|
|
1158
|
+
y: rect.top,
|
|
1159
|
+
width: rect.width,
|
|
1160
|
+
height: rect.height
|
|
1161
|
+
},
|
|
1162
|
+
context: {
|
|
1163
|
+
before: nodeText.substring(Math.max(0, foundIndex - 20), foundIndex),
|
|
1164
|
+
after: nodeText.substring(foundIndex + text.length, Math.min(nodeText.length, foundIndex + text.length + 20))
|
|
1165
|
+
},
|
|
1166
|
+
in_viewport: (
|
|
1167
|
+
rect.top >= 0 &&
|
|
1168
|
+
rect.left >= 0 &&
|
|
1169
|
+
rect.bottom <= window.innerHeight &&
|
|
1170
|
+
rect.right <= window.innerWidth
|
|
1171
|
+
)
|
|
1172
|
+
});
|
|
1173
|
+
}
|
|
1174
|
+
} catch (e) {
|
|
1175
|
+
console.warn('[SentienceAPI] Failed to get rect for text:', e);
|
|
1176
|
+
}
|
|
1177
|
+
|
|
1178
|
+
startIndex = foundIndex + 1;
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
// Tree walker to find all text nodes
|
|
1183
|
+
const walker = document.createTreeWalker(
|
|
1184
|
+
containerElement,
|
|
1185
|
+
NodeFilter.SHOW_TEXT,
|
|
1186
|
+
{
|
|
1187
|
+
acceptNode: function(node) {
|
|
1188
|
+
// Skip script, style, and empty text nodes
|
|
1189
|
+
const parent = node.parentElement;
|
|
1190
|
+
if (!parent) return NodeFilter.FILTER_REJECT;
|
|
1191
|
+
|
|
1192
|
+
const tagName = parent.tagName.toLowerCase();
|
|
1193
|
+
if (tagName === 'script' || tagName === 'style' || tagName === 'noscript') {
|
|
1194
|
+
return NodeFilter.FILTER_REJECT;
|
|
1195
|
+
}
|
|
1196
|
+
|
|
1197
|
+
// Skip whitespace-only nodes
|
|
1198
|
+
if (!node.nodeValue || node.nodeValue.trim().length === 0) {
|
|
1199
|
+
return NodeFilter.FILTER_REJECT;
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
// Check if element is visible
|
|
1203
|
+
const computedStyle = window.getComputedStyle(parent);
|
|
1204
|
+
if (computedStyle.display === 'none' ||
|
|
1205
|
+
computedStyle.visibility === 'hidden' ||
|
|
1206
|
+
computedStyle.opacity === '0') {
|
|
1207
|
+
return NodeFilter.FILTER_REJECT;
|
|
1208
|
+
}
|
|
1209
|
+
|
|
1210
|
+
return NodeFilter.FILTER_ACCEPT;
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
);
|
|
1214
|
+
|
|
1215
|
+
// Walk through all text nodes
|
|
1216
|
+
let currentNode;
|
|
1217
|
+
while ((currentNode = walker.nextNode()) && results.length < maxResults) {
|
|
1218
|
+
findInTextNode(currentNode);
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
return {
|
|
1222
|
+
status: "success",
|
|
1223
|
+
query: text,
|
|
1224
|
+
case_sensitive: caseSensitive,
|
|
1225
|
+
whole_word: wholeWord,
|
|
1226
|
+
matches: results.length,
|
|
1227
|
+
results: results,
|
|
1228
|
+
viewport: {
|
|
1229
|
+
width: window.innerWidth,
|
|
1230
|
+
height: window.innerHeight,
|
|
1231
|
+
scroll_x: window.scrollX,
|
|
1232
|
+
scroll_y: window.scrollY
|
|
1233
|
+
}
|
|
1234
|
+
};
|
|
1235
|
+
},
|
|
1236
|
+
|
|
1237
|
+
// 3. Click Action (unchanged)
|
|
432
1238
|
click: (id) => {
|
|
433
1239
|
const el = window.sentience_registry[id];
|
|
434
|
-
if (el) {
|
|
1240
|
+
if (el) {
|
|
1241
|
+
el.click();
|
|
1242
|
+
el.focus();
|
|
1243
|
+
return true;
|
|
1244
|
+
}
|
|
435
1245
|
return false;
|
|
1246
|
+
},
|
|
1247
|
+
|
|
1248
|
+
// 4. Inspector Mode: Start Recording for Golden Set Collection
|
|
1249
|
+
startRecording: (options = {}) => {
|
|
1250
|
+
const {
|
|
1251
|
+
highlightColor = '#ff0000',
|
|
1252
|
+
successColor = '#00ff00',
|
|
1253
|
+
autoDisableTimeout = 30 * 60 * 1000, // 30 minutes default
|
|
1254
|
+
keyboardShortcut = 'Ctrl+Shift+I'
|
|
1255
|
+
} = options;
|
|
1256
|
+
|
|
1257
|
+
console.log("🔴 [Sentience] Recording Mode STARTED. Click an element to copy its Ground Truth JSON.");
|
|
1258
|
+
console.log(` Press ${keyboardShortcut} or call stopRecording() to stop.`);
|
|
1259
|
+
|
|
1260
|
+
// Validate registry is populated
|
|
1261
|
+
if (!window.sentience_registry || window.sentience_registry.length === 0) {
|
|
1262
|
+
console.warn("⚠️ Registry empty. Call `await window.sentience.snapshot()` first to populate registry.");
|
|
1263
|
+
alert("Registry empty. Run `await window.sentience.snapshot()` first!");
|
|
1264
|
+
return () => {}; // Return no-op cleanup function
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
// Create reverse mapping for O(1) lookup (fixes registry lookup bug)
|
|
1268
|
+
window.sentience_registry_map = new Map();
|
|
1269
|
+
window.sentience_registry.forEach((el, idx) => {
|
|
1270
|
+
if (el) window.sentience_registry_map.set(el, idx);
|
|
1271
|
+
});
|
|
1272
|
+
|
|
1273
|
+
// Create highlight box overlay
|
|
1274
|
+
let highlightBox = document.getElementById('sentience-highlight-box');
|
|
1275
|
+
if (!highlightBox) {
|
|
1276
|
+
highlightBox = document.createElement('div');
|
|
1277
|
+
highlightBox.id = 'sentience-highlight-box';
|
|
1278
|
+
highlightBox.style.cssText = `
|
|
1279
|
+
position: fixed;
|
|
1280
|
+
pointer-events: none;
|
|
1281
|
+
z-index: 2147483647;
|
|
1282
|
+
border: 2px solid ${highlightColor};
|
|
1283
|
+
background: rgba(255, 0, 0, 0.1);
|
|
1284
|
+
display: none;
|
|
1285
|
+
transition: all 0.1s ease;
|
|
1286
|
+
box-sizing: border-box;
|
|
1287
|
+
`;
|
|
1288
|
+
document.body.appendChild(highlightBox);
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
// Create visual indicator (red border on page when recording)
|
|
1292
|
+
let recordingIndicator = document.getElementById('sentience-recording-indicator');
|
|
1293
|
+
if (!recordingIndicator) {
|
|
1294
|
+
recordingIndicator = document.createElement('div');
|
|
1295
|
+
recordingIndicator.id = 'sentience-recording-indicator';
|
|
1296
|
+
recordingIndicator.style.cssText = `
|
|
1297
|
+
position: fixed;
|
|
1298
|
+
top: 0;
|
|
1299
|
+
left: 0;
|
|
1300
|
+
right: 0;
|
|
1301
|
+
height: 3px;
|
|
1302
|
+
background: ${highlightColor};
|
|
1303
|
+
z-index: 2147483646;
|
|
1304
|
+
pointer-events: none;
|
|
1305
|
+
`;
|
|
1306
|
+
document.body.appendChild(recordingIndicator);
|
|
1307
|
+
}
|
|
1308
|
+
recordingIndicator.style.display = 'block';
|
|
1309
|
+
|
|
1310
|
+
// Hover handler (visual feedback)
|
|
1311
|
+
const mouseOverHandler = (e) => {
|
|
1312
|
+
const el = e.target;
|
|
1313
|
+
if (!el || el === highlightBox || el === recordingIndicator) return;
|
|
1314
|
+
|
|
1315
|
+
const rect = el.getBoundingClientRect();
|
|
1316
|
+
highlightBox.style.display = 'block';
|
|
1317
|
+
highlightBox.style.top = (rect.top + window.scrollY) + 'px';
|
|
1318
|
+
highlightBox.style.left = (rect.left + window.scrollX) + 'px';
|
|
1319
|
+
highlightBox.style.width = rect.width + 'px';
|
|
1320
|
+
highlightBox.style.height = rect.height + 'px';
|
|
1321
|
+
};
|
|
1322
|
+
|
|
1323
|
+
// Click handler (capture ground truth data)
|
|
1324
|
+
const clickHandler = (e) => {
|
|
1325
|
+
e.preventDefault();
|
|
1326
|
+
e.stopPropagation();
|
|
1327
|
+
|
|
1328
|
+
const el = e.target;
|
|
1329
|
+
if (!el || el === highlightBox || el === recordingIndicator) return;
|
|
1330
|
+
|
|
1331
|
+
// Use Map for reliable O(1) lookup
|
|
1332
|
+
const sentienceId = window.sentience_registry_map.get(el);
|
|
1333
|
+
if (sentienceId === undefined) {
|
|
1334
|
+
console.warn("⚠️ Element not found in Sentience Registry. Did you run snapshot() first?");
|
|
1335
|
+
alert("Element not in registry. Run `await window.sentience.snapshot()` first!");
|
|
1336
|
+
return;
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
// Extract raw data (ground truth + raw signals, NOT model outputs)
|
|
1340
|
+
const rawData = extractRawElementData(el);
|
|
1341
|
+
const selector = getUniqueSelector(el);
|
|
1342
|
+
const role = el.getAttribute('role') || el.tagName.toLowerCase();
|
|
1343
|
+
const text = getText(el);
|
|
1344
|
+
|
|
1345
|
+
// Build golden set JSON (ground truth + raw signals only)
|
|
1346
|
+
const snippet = {
|
|
1347
|
+
task: `Interact with ${text.substring(0, 20)}${text.length > 20 ? '...' : ''}`,
|
|
1348
|
+
url: window.location.href,
|
|
1349
|
+
timestamp: new Date().toISOString(),
|
|
1350
|
+
target_criteria: {
|
|
1351
|
+
id: sentienceId,
|
|
1352
|
+
selector: selector,
|
|
1353
|
+
role: role,
|
|
1354
|
+
text: text.substring(0, 50)
|
|
1355
|
+
},
|
|
1356
|
+
debug_snapshot: rawData
|
|
1357
|
+
};
|
|
1358
|
+
|
|
1359
|
+
// Copy to clipboard
|
|
1360
|
+
const jsonString = JSON.stringify(snippet, null, 2);
|
|
1361
|
+
navigator.clipboard.writeText(jsonString).then(() => {
|
|
1362
|
+
console.log("✅ Copied Ground Truth to clipboard:", snippet);
|
|
1363
|
+
|
|
1364
|
+
// Flash green to indicate success
|
|
1365
|
+
highlightBox.style.border = `2px solid ${successColor}`;
|
|
1366
|
+
highlightBox.style.background = 'rgba(0, 255, 0, 0.2)';
|
|
1367
|
+
setTimeout(() => {
|
|
1368
|
+
highlightBox.style.border = `2px solid ${highlightColor}`;
|
|
1369
|
+
highlightBox.style.background = 'rgba(255, 0, 0, 0.1)';
|
|
1370
|
+
}, 500);
|
|
1371
|
+
}).catch(err => {
|
|
1372
|
+
console.error("❌ Failed to copy to clipboard:", err);
|
|
1373
|
+
alert("Failed to copy to clipboard. Check console for JSON.");
|
|
1374
|
+
});
|
|
1375
|
+
};
|
|
1376
|
+
|
|
1377
|
+
// Auto-disable timeout
|
|
1378
|
+
let timeoutId = null;
|
|
1379
|
+
|
|
1380
|
+
// Cleanup function to stop recording (defined before use)
|
|
1381
|
+
const stopRecording = () => {
|
|
1382
|
+
document.removeEventListener('mouseover', mouseOverHandler, true);
|
|
1383
|
+
document.removeEventListener('click', clickHandler, true);
|
|
1384
|
+
document.removeEventListener('keydown', keyboardHandler, true);
|
|
1385
|
+
|
|
1386
|
+
if (timeoutId) {
|
|
1387
|
+
clearTimeout(timeoutId);
|
|
1388
|
+
timeoutId = null;
|
|
1389
|
+
}
|
|
1390
|
+
|
|
1391
|
+
if (highlightBox) {
|
|
1392
|
+
highlightBox.style.display = 'none';
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
if (recordingIndicator) {
|
|
1396
|
+
recordingIndicator.style.display = 'none';
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
// Clean up registry map (optional, but good practice)
|
|
1400
|
+
if (window.sentience_registry_map) {
|
|
1401
|
+
window.sentience_registry_map.clear();
|
|
1402
|
+
}
|
|
1403
|
+
|
|
1404
|
+
// Remove global reference
|
|
1405
|
+
if (window.sentience_stopRecording === stopRecording) {
|
|
1406
|
+
delete window.sentience_stopRecording;
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
console.log("⚪ [Sentience] Recording Mode STOPPED.");
|
|
1410
|
+
};
|
|
1411
|
+
|
|
1412
|
+
// Keyboard shortcut handler (defined after stopRecording)
|
|
1413
|
+
const keyboardHandler = (e) => {
|
|
1414
|
+
// Ctrl+Shift+I or Cmd+Shift+I
|
|
1415
|
+
if ((e.ctrlKey || e.metaKey) && e.shiftKey && e.key === 'I') {
|
|
1416
|
+
e.preventDefault();
|
|
1417
|
+
stopRecording();
|
|
1418
|
+
}
|
|
1419
|
+
};
|
|
1420
|
+
|
|
1421
|
+
// Attach event listeners (use capture phase to intercept early)
|
|
1422
|
+
document.addEventListener('mouseover', mouseOverHandler, true);
|
|
1423
|
+
document.addEventListener('click', clickHandler, true);
|
|
1424
|
+
document.addEventListener('keydown', keyboardHandler, true);
|
|
1425
|
+
|
|
1426
|
+
// Set up auto-disable timeout
|
|
1427
|
+
if (autoDisableTimeout > 0) {
|
|
1428
|
+
timeoutId = setTimeout(() => {
|
|
1429
|
+
console.log("⏰ [Sentience] Recording Mode auto-disabled after timeout.");
|
|
1430
|
+
stopRecording();
|
|
1431
|
+
}, autoDisableTimeout);
|
|
1432
|
+
}
|
|
1433
|
+
|
|
1434
|
+
// Store stop function globally for keyboard shortcut access
|
|
1435
|
+
window.sentience_stopRecording = stopRecording;
|
|
1436
|
+
|
|
1437
|
+
return stopRecording;
|
|
1438
|
+
}
|
|
1439
|
+
};
|
|
1440
|
+
|
|
1441
|
+
/**
|
|
1442
|
+
* Show overlay highlighting specific elements with Shadow DOM
|
|
1443
|
+
* @param {Array} elements - List of elements with bbox, importance, visual_cues
|
|
1444
|
+
* @param {number} targetElementId - Optional ID of target element (shown in red)
|
|
1445
|
+
*/
|
|
1446
|
+
window.sentience.showOverlay = function(elements, targetElementId = null) {
|
|
1447
|
+
if (!elements || !Array.isArray(elements)) {
|
|
1448
|
+
console.warn('[Sentience] showOverlay: elements must be an array');
|
|
1449
|
+
return;
|
|
436
1450
|
}
|
|
1451
|
+
|
|
1452
|
+
window.postMessage({
|
|
1453
|
+
type: 'SENTIENCE_SHOW_OVERLAY',
|
|
1454
|
+
elements: elements,
|
|
1455
|
+
targetElementId: targetElementId,
|
|
1456
|
+
timestamp: Date.now()
|
|
1457
|
+
}, '*');
|
|
1458
|
+
|
|
1459
|
+
console.log(`[Sentience] Overlay requested for ${elements.length} elements`);
|
|
437
1460
|
};
|
|
438
|
-
|
|
1461
|
+
|
|
1462
|
+
/**
|
|
1463
|
+
* Clear overlay manually
|
|
1464
|
+
*/
|
|
1465
|
+
window.sentience.clearOverlay = function() {
|
|
1466
|
+
window.postMessage({
|
|
1467
|
+
type: 'SENTIENCE_CLEAR_OVERLAY'
|
|
1468
|
+
}, '*');
|
|
1469
|
+
console.log('[Sentience] Overlay cleared');
|
|
1470
|
+
};
|
|
1471
|
+
|
|
1472
|
+
console.log('[SentienceAPI] ✓ Ready! (CSP-Resistant - WASM runs in background)');
|
|
1473
|
+
})();
|