lobster-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/README.md +389 -0
  2. package/dist/agent/core.js +1013 -0
  3. package/dist/agent/core.js.map +1 -0
  4. package/dist/agent/index.js +1027 -0
  5. package/dist/agent/index.js.map +1 -0
  6. package/dist/brain/index.js +60 -0
  7. package/dist/brain/index.js.map +1 -0
  8. package/dist/browser/dom/index.js +1096 -0
  9. package/dist/browser/dom/index.js.map +1 -0
  10. package/dist/browser/index.js +2034 -0
  11. package/dist/browser/index.js.map +1 -0
  12. package/dist/browser/manager.js +86 -0
  13. package/dist/browser/manager.js.map +1 -0
  14. package/dist/browser/page-adapter.js +1345 -0
  15. package/dist/browser/page-adapter.js.map +1 -0
  16. package/dist/cascade/index.js +138 -0
  17. package/dist/cascade/index.js.map +1 -0
  18. package/dist/config/index.js +110 -0
  19. package/dist/config/index.js.map +1 -0
  20. package/dist/config/schema.js +66 -0
  21. package/dist/config/schema.js.map +1 -0
  22. package/dist/discover/index.js +545 -0
  23. package/dist/discover/index.js.map +1 -0
  24. package/dist/index.js +5529 -0
  25. package/dist/index.js.map +1 -0
  26. package/dist/lib.js +4206 -0
  27. package/dist/lib.js.map +1 -0
  28. package/dist/llm/client.js +379 -0
  29. package/dist/llm/client.js.map +1 -0
  30. package/dist/llm/index.js +397 -0
  31. package/dist/llm/index.js.map +1 -0
  32. package/dist/llm/openai-client.js +214 -0
  33. package/dist/llm/openai-client.js.map +1 -0
  34. package/dist/output/index.js +93 -0
  35. package/dist/output/index.js.map +1 -0
  36. package/dist/pipeline/index.js +802 -0
  37. package/dist/pipeline/index.js.map +1 -0
  38. package/dist/router/decision.js +80 -0
  39. package/dist/router/decision.js.map +1 -0
  40. package/dist/router/index.js +3443 -0
  41. package/dist/router/index.js.map +1 -0
  42. package/dist/types/index.js +23 -0
  43. package/dist/types/index.js.map +1 -0
  44. package/logo.svg +11 -0
  45. package/package.json +65 -0
@@ -0,0 +1,2034 @@
1
+ // src/browser/manager.ts
2
+ import puppeteer from "puppeteer-core";
3
+ import { existsSync } from "fs";
4
+
5
+ // src/utils/logger.ts
6
+ import chalk from "chalk";
7
+ var log = {
8
+ info: (msg) => console.log(chalk.blue("\u2139"), msg),
9
+ success: (msg) => console.log(chalk.green("\u2713"), msg),
10
+ warn: (msg) => console.log(chalk.yellow("\u26A0"), msg),
11
+ error: (msg) => console.error(chalk.red("\u2717"), msg),
12
+ debug: (msg) => {
13
+ if (process.env.LOBSTER_DEBUG) console.log(chalk.gray("\u22EF"), msg);
14
+ },
15
+ step: (n, msg) => console.log(chalk.cyan(`[${n}]`), msg),
16
+ dim: (msg) => console.log(chalk.dim(msg))
17
+ };
18
+
19
+ // src/browser/manager.ts
20
+ var BrowserManager = class {
21
+ browser = null;
22
+ config;
23
+ constructor(config = {}) {
24
+ this.config = config;
25
+ }
26
+ async connect() {
27
+ if (this.browser?.connected) return this.browser;
28
+ if (this.config.cdpEndpoint) {
29
+ log.debug(`Connecting to CDP endpoint: ${this.config.cdpEndpoint}`);
30
+ this.browser = await puppeteer.connect({
31
+ browserWSEndpoint: this.config.cdpEndpoint
32
+ });
33
+ return this.browser;
34
+ }
35
+ const executablePath = this.config.executablePath || findChrome();
36
+ if (!executablePath) {
37
+ throw new Error(
38
+ "Chrome/Chromium not found. Set LOBSTER_BROWSER_PATH or config browser.executablePath"
39
+ );
40
+ }
41
+ log.debug(`Launching Chrome: ${executablePath}`);
42
+ this.browser = await puppeteer.launch({
43
+ executablePath,
44
+ headless: this.config.headless ?? true,
45
+ args: [
46
+ "--no-sandbox",
47
+ "--disable-setuid-sandbox",
48
+ "--disable-dev-shm-usage",
49
+ "--disable-gpu"
50
+ ]
51
+ });
52
+ return this.browser;
53
+ }
54
+ async newPage() {
55
+ const browser = await this.connect();
56
+ return browser.newPage();
57
+ }
58
+ async close() {
59
+ if (this.browser) {
60
+ await this.browser.close().catch(() => {
61
+ });
62
+ this.browser = null;
63
+ }
64
+ }
65
+ };
66
+ function findChrome() {
67
+ const paths = process.platform === "darwin" ? [
68
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
69
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
70
+ "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary"
71
+ ] : process.platform === "win32" ? [
72
+ "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
73
+ "C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
74
+ ] : [
75
+ "/usr/bin/google-chrome",
76
+ "/usr/bin/google-chrome-stable",
77
+ "/usr/bin/chromium-browser",
78
+ "/usr/bin/chromium",
79
+ "/snap/bin/chromium"
80
+ ];
81
+ return paths.find((p) => existsSync(p));
82
+ }
83
+
84
+ // src/browser/dom/flat-tree.ts
85
+ var FLAT_TREE_SCRIPT = `
86
+ (() => {
87
+ const INTERACTIVE_TAGS = new Set([
88
+ 'a', 'button', 'input', 'select', 'textarea', 'details', 'summary',
89
+ 'label', 'option', 'fieldset', 'legend',
90
+ ]);
91
+
92
+ const INTERACTIVE_ROLES = new Set([
93
+ 'button', 'link', 'textbox', 'checkbox', 'radio', 'combobox',
94
+ 'listbox', 'menu', 'menuitem', 'tab', 'switch', 'slider',
95
+ 'searchbox', 'spinbutton', 'option', 'menuitemcheckbox', 'menuitemradio',
96
+ ]);
97
+
98
+ const ATTR_WHITELIST = [
99
+ 'type', 'role', 'aria-label', 'aria-expanded', 'aria-selected',
100
+ 'aria-checked', 'aria-disabled', 'placeholder', 'title', 'href',
101
+ 'value', 'name', 'alt', 'src',
102
+ ];
103
+
104
+ let highlightIndex = 0;
105
+ const nodes = {};
106
+ const selectorMap = {};
107
+
108
+ function isVisible(el) {
109
+ if (el.offsetWidth === 0 && el.offsetHeight === 0) return false;
110
+ const style = getComputedStyle(el);
111
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return false;
112
+ return true;
113
+ }
114
+
115
+ function isInteractive(el) {
116
+ const tag = el.tagName.toLowerCase();
117
+ if (INTERACTIVE_TAGS.has(tag)) return true;
118
+ const role = el.getAttribute('role');
119
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
120
+ if (el.getAttribute('contenteditable') === 'true') return true;
121
+ if (el.getAttribute('tabindex') !== null && parseInt(el.getAttribute('tabindex')) >= 0) return true;
122
+ if (el.onclick || el.getAttribute('onclick')) return true;
123
+ return false;
124
+ }
125
+
126
+ function getAttributes(el) {
127
+ const attrs = {};
128
+ for (const attr of ATTR_WHITELIST) {
129
+ const val = el.getAttribute(attr);
130
+ if (val !== null && val !== '') attrs[attr] = val;
131
+ }
132
+ return attrs;
133
+ }
134
+
135
+ function getScrollable(el) {
136
+ const style = getComputedStyle(el);
137
+ const overflowY = style.overflowY;
138
+ const overflowX = style.overflowX;
139
+ const isScrollableY = (overflowY === 'auto' || overflowY === 'scroll') && el.scrollHeight > el.clientHeight;
140
+ const isScrollableX = (overflowX === 'auto' || overflowX === 'scroll') && el.scrollWidth > el.clientWidth;
141
+ if (!isScrollableY && !isScrollableX) return null;
142
+ return {
143
+ left: el.scrollLeft,
144
+ top: el.scrollTop,
145
+ right: el.scrollWidth - el.clientWidth - el.scrollLeft,
146
+ bottom: el.scrollHeight - el.clientHeight - el.scrollTop,
147
+ };
148
+ }
149
+
150
+ function walk(el, parentId) {
151
+ if (!el || el.nodeType === 8) return; // skip comments
152
+
153
+ if (el.nodeType === 3) { // text node
154
+ const text = el.textContent.trim();
155
+ if (!text) return;
156
+ const id = 'text_' + Math.random().toString(36).slice(2, 8);
157
+ nodes[id] = { id, tagName: '#text', text, parentId };
158
+ if (parentId && nodes[parentId]) {
159
+ nodes[parentId].children = nodes[parentId].children || [];
160
+ nodes[parentId].children.push(id);
161
+ }
162
+ return;
163
+ }
164
+
165
+ if (el.nodeType !== 1) return; // only elements
166
+
167
+ const tag = el.tagName.toLowerCase();
168
+ if (['script', 'style', 'noscript', 'svg', 'path'].includes(tag)) return;
169
+ if (!isVisible(el)) return;
170
+
171
+ const id = tag + '_' + Math.random().toString(36).slice(2, 8);
172
+ const interactive = isInteractive(el);
173
+ const node = {
174
+ id,
175
+ tagName: tag,
176
+ attributes: getAttributes(el),
177
+ parentId,
178
+ children: [],
179
+ isInteractive: interactive,
180
+ };
181
+
182
+ if (interactive) {
183
+ node.highlightIndex = highlightIndex;
184
+ selectorMap[highlightIndex] = id;
185
+ highlightIndex++;
186
+ }
187
+
188
+ const scrollable = getScrollable(el);
189
+ if (scrollable) node.scrollable = scrollable;
190
+
191
+ const text = [];
192
+ for (const child of el.childNodes) {
193
+ if (child.nodeType === 3 && child.textContent.trim()) {
194
+ text.push(child.textContent.trim());
195
+ }
196
+ }
197
+ if (text.length > 0) node.text = text.join(' ').slice(0, 200);
198
+
199
+ nodes[id] = node;
200
+
201
+ if (parentId && nodes[parentId]) {
202
+ nodes[parentId].children.push(id);
203
+ }
204
+
205
+ for (const child of el.children) {
206
+ walk(child, id);
207
+ }
208
+ }
209
+
210
+ const rootId = 'root';
211
+ nodes[rootId] = { id: rootId, tagName: 'body', children: [], attributes: {} };
212
+ for (const child of document.body.children) {
213
+ walk(child, rootId);
214
+ }
215
+
216
+ return { rootId, map: nodes, selectorMap };
217
+ })()
218
+ `;
219
+ function flatTreeToString(tree) {
220
+ const lines = [];
221
+ function walk(nodeId, depth) {
222
+ const node = tree.map[nodeId];
223
+ if (!node) return;
224
+ const indent = " ".repeat(depth);
225
+ if (node.tagName === "#text") {
226
+ if (node.text) lines.push(`${indent}${node.text}`);
227
+ return;
228
+ }
229
+ const attrs = node.attributes || {};
230
+ const attrStr = Object.entries(attrs).map(([k, v]) => v === "" ? k : `${k}="${v}"`).join(" ");
231
+ const prefix = node.highlightIndex !== void 0 ? `[${node.highlightIndex}]` : "";
232
+ const scrollInfo = node.scrollable ? ` |scroll: ${Math.round(node.scrollable.top)}px up, ${Math.round(node.scrollable.bottom)}px down|` : "";
233
+ const text = node.text || "";
234
+ const tag = node.tagName;
235
+ if (prefix || text || node.children?.length > 0) {
236
+ const opening = `${indent}${prefix}<${tag}${attrStr ? " " + attrStr : ""}${scrollInfo}>`;
237
+ if (!node.children?.length || node.children.length === 0 && text) {
238
+ lines.push(`${opening}${text}</>`);
239
+ } else {
240
+ lines.push(`${opening}${text}`);
241
+ for (const childId of node.children || []) {
242
+ walk(childId, depth + 1);
243
+ }
244
+ }
245
+ } else {
246
+ for (const childId of node.children || []) {
247
+ walk(childId, depth);
248
+ }
249
+ }
250
+ }
251
+ walk(tree.rootId, 0);
252
+ return lines.join("\n");
253
+ }
254
+
255
+ // src/browser/dom/snapshot.ts
256
+ function buildSnapshotScript(previousHashes) {
257
+ return SNAPSHOT_SCRIPT_FN(previousHashes || []);
258
+ }
259
+ function SNAPSHOT_SCRIPT_FN(prevHashes) {
260
+ return `
261
+ (() => {
262
+ let idx = 0;
263
+ const __prevHashes = new Set(${JSON.stringify(prevHashes)});
264
+ const __currentHashes = [];
265
+ `;
266
+ }
267
+ var SNAPSHOT_SCRIPT = `
268
+ (() => {
269
+ let idx = 0;
270
+ const __prevHashes = (window.__lobster_prev_hashes) ? new Set(window.__lobster_prev_hashes) : null;
271
+ const __currentHashes = [];
272
+
273
+ const SKIP_TAGS = new Set([
274
+ 'script','style','noscript','svg','path','meta','link','head',
275
+ 'template','slot','colgroup','col',
276
+ ]);
277
+
278
+ const INTERACTIVE_TAGS = new Set([
279
+ 'a','button','input','select','textarea','details','summary','label',
280
+ ]);
281
+
282
+ const INTERACTIVE_ROLES = new Set([
283
+ 'button','link','textbox','checkbox','radio','combobox','listbox',
284
+ 'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
285
+ 'option','menuitemcheckbox','menuitemradio','treeitem',
286
+ ]);
287
+
288
+ const ATTR_WHITELIST = [
289
+ 'type','role','aria-label','aria-expanded','aria-selected','aria-checked',
290
+ 'aria-disabled','aria-haspopup','aria-pressed','placeholder','title',
291
+ 'href','value','name','alt','src','action','method','for',
292
+ 'data-testid','data-id','contenteditable','tabindex',
293
+ ];
294
+
295
+ const AD_PATTERNS = /ad[-_]?banner|ad[-_]?container|google[-_]?ad|doubleclick|adsbygoogle|sponsored|^ad$/i;
296
+
297
+ // \u2500\u2500 Stage 1: Visibility check \u2500\u2500
298
+ function isVisible(el) {
299
+ if (el.offsetWidth === 0 && el.offsetHeight === 0 && el.tagName !== 'INPUT') return false;
300
+ const s = getComputedStyle(el);
301
+ if (s.display === 'none') return false;
302
+ if (s.visibility === 'hidden' || s.visibility === 'collapse') return false;
303
+ if (s.opacity === '0') return false;
304
+ if (s.clipPath === 'inset(100%)') return false;
305
+ // Check for offscreen positioning
306
+ const rect = el.getBoundingClientRect();
307
+ if (rect.right < 0 || rect.bottom < 0) return false;
308
+ return true;
309
+ }
310
+
311
+ // \u2500\u2500 Stage 2: Interactive detection \u2500\u2500
312
+ function isInteractive(el) {
313
+ const tag = el.tagName.toLowerCase();
314
+ if (INTERACTIVE_TAGS.has(tag)) {
315
+ // Skip disabled elements
316
+ if (el.disabled) return false;
317
+ // Skip hidden inputs
318
+ if (tag === 'input' && el.type === 'hidden') return false;
319
+ return true;
320
+ }
321
+ const role = el.getAttribute('role');
322
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
323
+ if (el.contentEditable === 'true') return true;
324
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) return true;
325
+ if (el.onclick) return true;
326
+ return false;
327
+ }
328
+
329
+ // \u2500\u2500 Stage 8: Attribute filtering \u2500\u2500
330
+ function getAttrs(el) {
331
+ const parts = [];
332
+ for (const name of ATTR_WHITELIST) {
333
+ let v = el.getAttribute(name);
334
+ if (v === null || v === '') continue;
335
+ // Truncate long values
336
+ if (v.length > 80) v = v.slice(0, 77) + '...';
337
+ // Skip href="javascript:..."
338
+ if (name === 'href' && v.startsWith('javascript:')) continue;
339
+ parts.push(name + '=' + v);
340
+ }
341
+ return parts.length ? ' ' + parts.join(' ') : '';
342
+ }
343
+
344
+ // \u2500\u2500 Stage 9: Ad filtering \u2500\u2500
345
+ function isAd(el) {
346
+ const id = el.id || '';
347
+ const cls = el.className || '';
348
+ if (typeof cls === 'string' && AD_PATTERNS.test(cls)) return true;
349
+ if (AD_PATTERNS.test(id)) return true;
350
+ if (el.tagName === 'IFRAME' && AD_PATTERNS.test(el.src || '')) return true;
351
+ return false;
352
+ }
353
+
354
+ // \u2500\u2500 Stage 10: Scroll info \u2500\u2500
355
+ function getScrollInfo(el) {
356
+ const s = getComputedStyle(el);
357
+ const overflowY = s.overflowY;
358
+ const overflowX = s.overflowX;
359
+ const scrollableY = (overflowY === 'auto' || overflowY === 'scroll') && el.scrollHeight > el.clientHeight;
360
+ const scrollableX = (overflowX === 'auto' || overflowX === 'scroll') && el.scrollWidth > el.clientWidth;
361
+ if (!scrollableY && !scrollableX) return '';
362
+
363
+ const parts = [];
364
+ if (scrollableY) {
365
+ const up = Math.round(el.scrollTop);
366
+ const down = Math.round(el.scrollHeight - el.clientHeight - el.scrollTop);
367
+ if (up > 0) parts.push(up + 'px up');
368
+ if (down > 0) parts.push(down + 'px down');
369
+ }
370
+ if (scrollableX) {
371
+ const left = Math.round(el.scrollLeft);
372
+ const right = Math.round(el.scrollWidth - el.clientWidth - el.scrollLeft);
373
+ if (left > 0) parts.push(left + 'px left');
374
+ if (right > 0) parts.push(right + 'px right');
375
+ }
376
+ return parts.length ? ' |scroll: ' + parts.join(', ') + '|' : '';
377
+ }
378
+
379
+ // \u2500\u2500 Stage 6: Bounding-box dedup \u2500\u2500
380
+ // If a parent and child are both interactive and have ~same bounding box,
381
+ // skip the parent (e.g., <a><button>Click</button></a>)
382
+ function isWrappingInteractive(el) {
383
+ if (!isInteractive(el)) return false;
384
+ const rect = el.getBoundingClientRect();
385
+ if (rect.width === 0 || rect.height === 0) return false;
386
+ for (const child of el.children) {
387
+ if (!isInteractive(child)) continue;
388
+ const cr = child.getBoundingClientRect();
389
+ const overlapX = Math.min(rect.right, cr.right) - Math.max(rect.left, cr.left);
390
+ const overlapY = Math.min(rect.bottom, cr.bottom) - Math.max(rect.top, cr.top);
391
+ const overlapArea = Math.max(0, overlapX) * Math.max(0, overlapY);
392
+ const parentArea = rect.width * rect.height;
393
+ if (parentArea > 0 && overlapArea / parentArea > 0.85) return true;
394
+ }
395
+ return false;
396
+ }
397
+
398
+ // \u2500\u2500 Stage 7: Occlusion detection \u2500\u2500
399
+ function isOccluded(el) {
400
+ const rect = el.getBoundingClientRect();
401
+ if (rect.width === 0 || rect.height === 0) return false;
402
+ const cx = rect.left + rect.width / 2;
403
+ const cy = rect.top + rect.height / 2;
404
+ const topEl = document.elementFromPoint(cx, cy);
405
+ if (!topEl) return false;
406
+ if (topEl === el || el.contains(topEl) || topEl.contains(el)) return false;
407
+ // Check z-index \u2014 if top element is a modal/overlay, mark as occluded
408
+ const topZ = parseInt(getComputedStyle(topEl).zIndex) || 0;
409
+ const elZ = parseInt(getComputedStyle(el).zIndex) || 0;
410
+ return topZ > elZ + 10;
411
+ }
412
+
413
+ // \u2500\u2500 Stage 5: Iframe content extraction \u2500\u2500
414
+ function getIframeContent(iframe, depth, maxDepth) {
415
+ try {
416
+ const doc = iframe.contentDocument;
417
+ if (!doc || !doc.body) return '';
418
+ return '\\n' + walkNode(doc.body, depth, maxDepth);
419
+ } catch { return ''; }
420
+ }
421
+
422
+ // \u2500\u2500 Stage 4: Shadow DOM traversal \u2500\u2500
423
+ function getShadowContent(el, depth, maxDepth) {
424
+ if (!el.shadowRoot) return '';
425
+ let out = '';
426
+ for (const child of el.shadowRoot.childNodes) {
427
+ out += walkNode(child, depth, maxDepth);
428
+ }
429
+ return out;
430
+ }
431
+
432
+ // \u2500\u2500 Input value hint \u2500\u2500
433
+ function getInputHint(el) {
434
+ const tag = el.tagName.toLowerCase();
435
+ if (tag === 'input') {
436
+ const type = el.type || 'text';
437
+ const val = el.value || '';
438
+ const checked = el.checked;
439
+ if (type === 'checkbox' || type === 'radio') {
440
+ return checked ? ' [checked]' : ' [unchecked]';
441
+ }
442
+ if (val) return ' value="' + val.slice(0, 50) + '"';
443
+ }
444
+ if (tag === 'textarea' && el.value) {
445
+ return ' value="' + el.value.slice(0, 50) + '"';
446
+ }
447
+ if (tag === 'select' && el.selectedOptions?.length) {
448
+ return ' selected="' + el.selectedOptions[0].text.slice(0, 40) + '"';
449
+ }
450
+ return '';
451
+ }
452
+
453
+ const MAX_DEPTH = 25;
454
+ const MAX_TEXT = 150;
455
+
456
+ function walkNode(node, depth, maxDepth) {
457
+ if (depth > maxDepth) return '';
458
+ if (!node) return '';
459
+
460
+ // Text node
461
+ if (node.nodeType === 3) {
462
+ const t = node.textContent.trim();
463
+ if (!t) return '';
464
+ const text = t.length > MAX_TEXT ? t.slice(0, MAX_TEXT) + '...' : t;
465
+ return ' '.repeat(depth) + text + '\\n';
466
+ }
467
+
468
+ // Comment node \u2014 skip
469
+ if (node.nodeType === 8) return '';
470
+
471
+ // Only element nodes from here
472
+ if (node.nodeType !== 1) return '';
473
+
474
+ const el = node;
475
+ const tag = el.tagName.toLowerCase();
476
+
477
+ // \u2500\u2500 Stage 3: Skip tags \u2500\u2500
478
+ if (SKIP_TAGS.has(tag)) return '';
479
+
480
+ // \u2500\u2500 Stage 2: Visibility \u2500\u2500
481
+ if (!isVisible(el)) return '';
482
+
483
+ // \u2500\u2500 Stage 9: Ad filtering \u2500\u2500
484
+ if (isAd(el)) return '';
485
+
486
+ // \u2500\u2500 Stage 6: Bbox dedup \u2014 skip wrapping interactive parent \u2500\u2500
487
+ const skipSelf = isWrappingInteractive(el);
488
+
489
+ const indent = ' '.repeat(depth);
490
+ const inter = !skipSelf && isInteractive(el);
491
+ let prefix = '';
492
+ if (inter) {
493
+ const thisIdx = idx++;
494
+ // Hash: tag + text + key attributes for diff tracking
495
+ const hashText = tag + ':' + (el.textContent || '').trim().slice(0, 40) + ':' + (el.getAttribute('href') || '') + ':' + (el.getAttribute('aria-label') || '');
496
+ __currentHashes.push(hashText);
497
+ const isNew = __prevHashes && __prevHashes.size > 0 && !__prevHashes.has(hashText);
498
+ prefix = isNew ? '*[' + thisIdx + ']' : '[' + thisIdx + ']';
499
+ }
500
+
501
+ // \u2500\u2500 Stage 11: Annotate with data-ref \u2500\u2500
502
+ if (inter) {
503
+ try { el.dataset.ref = String(idx - 1); } catch {}
504
+ }
505
+
506
+ // \u2500\u2500 Stage 7: Occlusion check for interactive elements \u2500\u2500
507
+ if (inter && isOccluded(el)) {
508
+ // Still include but mark as occluded
509
+ // (agent needs to know element exists but may need to scroll/close modal)
510
+ }
511
+
512
+ const a = getAttrs(el);
513
+ const scrollInfo = getScrollInfo(el);
514
+ const inputHint = inter ? getInputHint(el) : '';
515
+
516
+ // Leaf text extraction
517
+ let leafText = '';
518
+ if (el.childNodes.length === 1 && el.childNodes[0].nodeType === 3) {
519
+ const t = el.childNodes[0].textContent.trim();
520
+ if (t) leafText = t.length > MAX_TEXT ? t.slice(0, MAX_TEXT) + '...' : t;
521
+ }
522
+
523
+ // \u2500\u2500 Stage 5: Iframe \u2500\u2500
524
+ if (tag === 'iframe') {
525
+ const iframeContent = getIframeContent(el, depth + 1, maxDepth);
526
+ if (iframeContent) {
527
+ return indent + prefix + '<iframe' + a + '>\\n' + iframeContent;
528
+ }
529
+ return '';
530
+ }
531
+
532
+ // Build output
533
+ let out = '';
534
+
535
+ if (skipSelf) {
536
+ // Skip self but render children
537
+ for (const c of el.childNodes) out += walkNode(c, depth, maxDepth);
538
+ out += getShadowContent(el, depth, maxDepth);
539
+ return out;
540
+ }
541
+
542
+ if (inter || leafText || el.children.length === 0) {
543
+ if (leafText) {
544
+ out = indent + prefix + '<' + tag + a + scrollInfo + inputHint + '>' + leafText + '</' + tag + '>\\n';
545
+ } else {
546
+ out = indent + prefix + '<' + tag + a + scrollInfo + inputHint + '>\\n';
547
+ for (const c of el.childNodes) out += walkNode(c, depth + 1, maxDepth);
548
+ out += getShadowContent(el, depth + 1, maxDepth);
549
+ }
550
+ } else {
551
+ // Non-interactive container \u2014 flatten depth if no useful info
552
+ if (scrollInfo) {
553
+ out = indent + '<' + tag + scrollInfo + '>\\n';
554
+ for (const c of el.childNodes) out += walkNode(c, depth + 1, maxDepth);
555
+ out += getShadowContent(el, depth + 1, maxDepth);
556
+ } else {
557
+ for (const c of el.childNodes) out += walkNode(c, depth, maxDepth);
558
+ out += getShadowContent(el, depth, maxDepth);
559
+ }
560
+ }
561
+
562
+ return out;
563
+ }
564
+
565
+ // \u2500\u2500 Page-level scroll info header \u2500\u2500
566
+ const scrollY = window.scrollY;
567
+ const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
568
+ const scrollPct = scrollMax > 0 ? Math.round((scrollY / scrollMax) * 100) : 0;
569
+ const vpW = window.innerWidth;
570
+ const vpH = window.innerHeight;
571
+ const pageH = document.documentElement.scrollHeight;
572
+
573
+ let header = '';
574
+ header += 'viewport: ' + vpW + 'x' + vpH + ' | page_height: ' + pageH + 'px';
575
+ header += ' | scroll: ' + scrollPct + '%';
576
+ if (scrollY > 50) header += ' (' + Math.round(scrollY) + 'px from top)';
577
+ if (scrollMax - scrollY > 50) header += ' (' + Math.round(scrollMax - scrollY) + 'px more below)';
578
+ header += '\\n---\\n';
579
+
580
+ // Store current hashes for next diff comparison
581
+ window.__lobster_prev_hashes = __currentHashes;
582
+
583
+ return header + walkNode(document.body, 0, MAX_DEPTH);
584
+ })()
585
+ `;
586
+
587
+ // src/browser/dom/semantic-tree.ts
588
+ var SEMANTIC_TREE_SCRIPT = `
589
+ (() => {
590
+ const SKIP = new Set(['script','style','noscript','svg','head','meta','link','template']);
591
+
592
+ const ROLE_MAP = {
593
+ a: 'link', button: 'button', input: 'textbox', select: 'combobox',
594
+ textarea: 'textbox', h1: 'heading', h2: 'heading', h3: 'heading',
595
+ h4: 'heading', h5: 'heading', h6: 'heading', nav: 'navigation',
596
+ main: 'main', header: 'banner', footer: 'contentinfo', aside: 'complementary',
597
+ form: 'form', table: 'table', img: 'img', ul: 'list', ol: 'list', li: 'listitem',
598
+ section: 'region', article: 'article', dialog: 'dialog', details: 'group',
599
+ summary: 'button', progress: 'progressbar', meter: 'meter', output: 'status',
600
+ label: 'label', legend: 'legend', fieldset: 'group', option: 'option',
601
+ tr: 'row', td: 'cell', th: 'columnheader', caption: 'caption',
602
+ };
603
+
604
+ const INTERACTIVE_ROLES = new Set([
605
+ 'button','link','textbox','checkbox','radio','combobox','listbox',
606
+ 'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
607
+ 'option','menuitemcheckbox','menuitemradio','treeitem',
608
+ ]);
609
+
610
+ // \u2500\u2500 W3C Accessible Name Algorithm (simplified) \u2500\u2500
611
+ function getAccessibleName(el) {
612
+ // 1. aria-labelledby (highest priority)
613
+ const labelledBy = el.getAttribute('aria-labelledby');
614
+ if (labelledBy) {
615
+ const ids = labelledBy.split(/\\s+/);
616
+ const parts = ids.map(id => {
617
+ const ref = document.getElementById(id);
618
+ return ref ? ref.textContent.trim() : '';
619
+ }).filter(Boolean);
620
+ if (parts.length > 0) return parts.join(' ').slice(0, 120);
621
+ }
622
+
623
+ // 2. aria-label
624
+ const ariaLabel = el.getAttribute('aria-label');
625
+ if (ariaLabel) return ariaLabel.slice(0, 120);
626
+
627
+ // 3. alt (for images)
628
+ const alt = el.getAttribute('alt');
629
+ if (alt) return alt.slice(0, 120);
630
+
631
+ // 4. title
632
+ const title = el.getAttribute('title');
633
+ if (title) return title.slice(0, 120);
634
+
635
+ // 5. placeholder (for inputs)
636
+ const placeholder = el.getAttribute('placeholder');
637
+ if (placeholder) return placeholder.slice(0, 120);
638
+
639
+ // 6. value (for buttons)
640
+ if (el.tagName === 'INPUT' && (el.type === 'submit' || el.type === 'button')) {
641
+ const val = el.getAttribute('value');
642
+ if (val) return val.slice(0, 120);
643
+ }
644
+
645
+ // 7. Associated label
646
+ if (el.id) {
647
+ const label = document.querySelector('label[for="' + el.id + '"]');
648
+ if (label) return label.textContent.trim().slice(0, 120);
649
+ }
650
+
651
+ // 8. Direct text content (only for leaf-ish elements)
652
+ if (el.children.length <= 2) {
653
+ const text = el.textContent.trim();
654
+ if (text && text.length < 120) return text;
655
+ }
656
+
657
+ return '';
658
+ }
659
+
660
+ // \u2500\u2500 XPath generation \u2500\u2500
661
+ function getXPath(el) {
662
+ const parts = [];
663
+ let current = el;
664
+ while (current && current.nodeType === 1) {
665
+ let index = 1;
666
+ let sibling = current.previousElementSibling;
667
+ while (sibling) {
668
+ if (sibling.tagName === current.tagName) index++;
669
+ sibling = sibling.previousElementSibling;
670
+ }
671
+ const tag = current.tagName.toLowerCase();
672
+ parts.unshift(tag + '[' + index + ']');
673
+ current = current.parentElement;
674
+ }
675
+ return '/' + parts.join('/');
676
+ }
677
+
678
+ // \u2500\u2500 Interactivity classification \u2500\u2500
679
+ function classifyInteractivity(el) {
680
+ const types = [];
681
+ const tag = el.tagName.toLowerCase();
682
+
683
+ // Native
684
+ if (['a','button','input','select','textarea','details','summary'].includes(tag)) {
685
+ if (tag === 'a' && !el.href) {} // anchor without href is not interactive
686
+ else if (tag === 'input' && el.type === 'hidden') {} // hidden inputs
687
+ else types.push('native');
688
+ }
689
+
690
+ // ARIA role
691
+ const role = el.getAttribute('role');
692
+ if (role && INTERACTIVE_ROLES.has(role)) types.push('aria');
693
+
694
+ // Contenteditable
695
+ if (el.contentEditable === 'true') types.push('contenteditable');
696
+
697
+ // Focusable
698
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
699
+
700
+ // Event listeners (check onclick and common inline handlers)
701
+ if (el.onclick || el.onmousedown || el.onkeydown || el.onkeypress ||
702
+ el.getAttribute('onclick') || el.getAttribute('onmousedown')) {
703
+ types.push('listener');
704
+ }
705
+
706
+ return types;
707
+ }
708
+
709
+ // \u2500\u2500 Disabled state with fieldset inheritance \u2500\u2500
710
+ function isDisabled(el) {
711
+ if (el.disabled) return true;
712
+ // Check fieldset disabled inheritance
713
+ let parent = el.parentElement;
714
+ while (parent) {
715
+ if (parent.tagName === 'FIELDSET' && parent.disabled) {
716
+ // Exception: elements inside the first legend child are NOT disabled
717
+ const firstLegend = parent.querySelector(':scope > legend');
718
+ if (firstLegend && firstLegend.contains(el)) return false;
719
+ return true;
720
+ }
721
+ parent = parent.parentElement;
722
+ }
723
+ return false;
724
+ }
725
+
726
+ // \u2500\u2500 Walk the DOM \u2500\u2500
727
+ function walk(el, depth, maxDepth) {
728
+ if (!el || depth > maxDepth) return '';
729
+
730
+ if (el.nodeType === 3) {
731
+ const t = el.textContent.trim();
732
+ return t ? ' '.repeat(depth) + 'text "' + t.slice(0, 100) + '"\\n' : '';
733
+ }
734
+
735
+ if (el.nodeType !== 1) return '';
736
+ const tag = el.tagName.toLowerCase();
737
+ if (SKIP.has(tag)) return '';
738
+
739
+ const style = getComputedStyle(el);
740
+ if (style.display === 'none' || style.visibility === 'hidden') return '';
741
+
742
+ const indent = ' '.repeat(depth);
743
+ const role = el.getAttribute('role') || ROLE_MAP[tag] || '';
744
+ const name = getAccessibleName(el);
745
+ const interTypes = classifyInteractivity(el);
746
+ const interactive = interTypes.length > 0;
747
+ const disabled = interactive && isDisabled(el);
748
+
749
+ let line = indent;
750
+ line += role || tag;
751
+
752
+ if (name) line += ' "' + name.slice(0, 80) + '"';
753
+
754
+ if (interactive) {
755
+ line += ' [' + interTypes.join(',') + ']';
756
+ if (disabled) line += ' {disabled}';
757
+ line += ' xpath=' + getXPath(el);
758
+ }
759
+
760
+ // Input state
761
+ if (tag === 'input') {
762
+ const type = el.type || 'text';
763
+ line += ' type=' + type;
764
+ if (type === 'checkbox' || type === 'radio') {
765
+ line += el.checked ? ' [checked]' : ' [unchecked]';
766
+ } else if (el.value) {
767
+ line += ' value="' + el.value.slice(0, 50) + '"';
768
+ }
769
+ }
770
+ if (tag === 'textarea' && el.value) {
771
+ line += ' value="' + el.value.slice(0, 50) + '"';
772
+ }
773
+ if (tag === 'select') {
774
+ const opts = Array.from(el.options || []).map(o => ({
775
+ text: o.text.slice(0, 30),
776
+ value: o.value,
777
+ selected: o.selected,
778
+ }));
779
+ const selected = opts.find(o => o.selected);
780
+ if (selected) line += ' selected="' + selected.text + '"';
781
+ if (opts.length <= 10) {
782
+ line += ' options=[' + opts.map(o => o.text).join('|') + ']';
783
+ }
784
+ }
785
+
786
+ line += '\\n';
787
+
788
+ let out = line;
789
+ for (const c of el.childNodes) {
790
+ out += walk(c, depth + 1, maxDepth);
791
+ }
792
+
793
+ // Shadow DOM
794
+ if (el.shadowRoot) {
795
+ for (const c of el.shadowRoot.childNodes) {
796
+ out += walk(c, depth + 1, maxDepth);
797
+ }
798
+ }
799
+
800
+ return out;
801
+ }
802
+
803
+ return walk(document.body, 0, 20);
804
+ })()
805
+ `;
806
+
807
+ // src/browser/dom/markdown.ts
808
+ var MARKDOWN_SCRIPT = `
809
+ (() => {
810
+ const SKIP = new Set(['script','style','noscript','svg','head','template']);
811
+ const baseUrl = location.href;
812
+
813
+ // Resolve relative URLs to absolute
814
+ function resolveUrl(href) {
815
+ if (!href || href.startsWith('javascript:') || href.startsWith('#')) return href;
816
+ try { return new URL(href, baseUrl).href; } catch { return href; }
817
+ }
818
+
819
+ // Escape Markdown special chars in text
820
+ function escapeText(text) {
821
+ return text
822
+ .replace(/\\\\/g, '\\\\\\\\')
823
+ .replace(/([*_~\`\\[\\]|])/g, '\\\\$1');
824
+ }
825
+
826
+ // State tracking
827
+ let listDepth = 0;
828
+ let orderedCounters = [];
829
+ let inPre = false;
830
+ let inTable = false;
831
+
832
+ function listIndent() { return ' '.repeat(listDepth); }
833
+
834
+ function walk(el) {
835
+ if (!el) return '';
836
+
837
+ // Text node
838
+ if (el.nodeType === 3) {
839
+ const text = el.textContent || '';
840
+ if (inPre) return text;
841
+ // Collapse whitespace
842
+ const collapsed = text.replace(/\\s+/g, ' ');
843
+ return collapsed === ' ' && !el.previousSibling && !el.nextSibling ? '' : collapsed;
844
+ }
845
+
846
+ if (el.nodeType !== 1) return '';
847
+ const tag = el.tagName.toLowerCase();
848
+ if (SKIP.has(tag)) return '';
849
+
850
+ // Visibility check
851
+ try {
852
+ const s = getComputedStyle(el);
853
+ if (s.display === 'none' || s.visibility === 'hidden') return '';
854
+ } catch {}
855
+
856
+ // Get children content
857
+ function childContent() {
858
+ let out = '';
859
+ for (const c of el.childNodes) out += walk(c);
860
+ return out;
861
+ }
862
+
863
+ switch (tag) {
864
+ // \u2500\u2500 Headings \u2500\u2500
865
+ case 'h1': return '\\n\\n# ' + childContent().trim() + '\\n\\n';
866
+ case 'h2': return '\\n\\n## ' + childContent().trim() + '\\n\\n';
867
+ case 'h3': return '\\n\\n### ' + childContent().trim() + '\\n\\n';
868
+ case 'h4': return '\\n\\n#### ' + childContent().trim() + '\\n\\n';
869
+ case 'h5': return '\\n\\n##### ' + childContent().trim() + '\\n\\n';
870
+ case 'h6': return '\\n\\n###### ' + childContent().trim() + '\\n\\n';
871
+
872
+ // \u2500\u2500 Block elements \u2500\u2500
873
+ case 'p': return '\\n\\n' + childContent().trim() + '\\n\\n';
874
+ case 'br': return '\\n';
875
+ case 'hr': return '\\n\\n---\\n\\n';
876
+
877
+ // \u2500\u2500 Inline formatting \u2500\u2500
878
+ case 'strong': case 'b': {
879
+ const inner = childContent().trim();
880
+ return inner ? '**' + inner + '**' : '';
881
+ }
882
+ case 'em': case 'i': {
883
+ const inner = childContent().trim();
884
+ return inner ? '*' + inner + '*' : '';
885
+ }
886
+ case 's': case 'del': case 'strike': {
887
+ const inner = childContent().trim();
888
+ return inner ? '~~' + inner + '~~' : '';
889
+ }
890
+ case 'code': {
891
+ if (inPre) return childContent();
892
+ const inner = childContent();
893
+ return inner ? '\\x60' + inner + '\\x60' : '';
894
+ }
895
+
896
+ // \u2500\u2500 Code blocks \u2500\u2500
897
+ case 'pre': {
898
+ inPre = true;
899
+ const inner = childContent();
900
+ inPre = false;
901
+ const lang = el.querySelector('code')?.className?.match(/language-(\\w+)/)?.[1] || '';
902
+ return '\\n\\n\\x60\\x60\\x60' + lang + '\\n' + inner.trim() + '\\n\\x60\\x60\\x60\\n\\n';
903
+ }
904
+
905
+ // \u2500\u2500 Links \u2500\u2500
906
+ case 'a': {
907
+ const href = resolveUrl(el.getAttribute('href') || '');
908
+ const inner = childContent().trim();
909
+ const name = inner || el.getAttribute('aria-label') || el.getAttribute('title') || '';
910
+ if (!name) return '';
911
+ if (!href || href === '#' || href.startsWith('javascript:')) return name;
912
+ return '[' + name + '](' + href + ')';
913
+ }
914
+
915
+ // \u2500\u2500 Images \u2500\u2500
916
+ case 'img': {
917
+ const alt = el.getAttribute('alt') || '';
918
+ const src = resolveUrl(el.getAttribute('src') || '');
919
+ return src ? '![' + alt + '](' + src + ')' : '';
920
+ }
921
+
922
+ // \u2500\u2500 Lists \u2500\u2500
923
+ case 'ul': {
924
+ listDepth++;
925
+ orderedCounters.push(0);
926
+ const inner = childContent();
927
+ listDepth--;
928
+ orderedCounters.pop();
929
+ return '\\n' + inner;
930
+ }
931
+ case 'ol': {
932
+ listDepth++;
933
+ orderedCounters.push(0);
934
+ const inner = childContent();
935
+ listDepth--;
936
+ orderedCounters.pop();
937
+ return '\\n' + inner;
938
+ }
939
+ case 'li': {
940
+ const parent = el.parentElement?.tagName?.toLowerCase();
941
+ const isOrdered = parent === 'ol';
942
+ const inner = childContent().trim();
943
+ if (!inner) return '';
944
+ if (isOrdered) {
945
+ const counter = orderedCounters.length > 0
946
+ ? ++orderedCounters[orderedCounters.length - 1] : 1;
947
+ return listIndent() + counter + '. ' + inner + '\\n';
948
+ }
949
+ return listIndent() + '- ' + inner + '\\n';
950
+ }
951
+
952
+ // \u2500\u2500 Blockquote \u2500\u2500
953
+ case 'blockquote': {
954
+ const inner = childContent().trim();
955
+ if (!inner) return '';
956
+ return '\\n\\n' + inner.split('\\n').map(line => '> ' + line).join('\\n') + '\\n\\n';
957
+ }
958
+
959
+ // \u2500\u2500 Tables \u2500\u2500
960
+ case 'table': {
961
+ inTable = true;
962
+ let out = '\\n\\n';
963
+ const rows = el.querySelectorAll('tr');
964
+ let headerDone = false;
965
+
966
+ for (let i = 0; i < rows.length; i++) {
967
+ const cells = rows[i].querySelectorAll('th, td');
968
+ const isHeader = rows[i].querySelector('th') !== null;
969
+ const cellTexts = [];
970
+ for (const cell of cells) {
971
+ let cellText = '';
972
+ for (const c of cell.childNodes) cellText += walk(c);
973
+ cellTexts.push(cellText.trim().replace(/\\|/g, '\\\\|').replace(/\\n/g, ' '));
974
+ }
975
+
976
+ out += '| ' + cellTexts.join(' | ') + ' |\\n';
977
+
978
+ if (isHeader && !headerDone) {
979
+ out += '| ' + cellTexts.map(() => '---').join(' | ') + ' |\\n';
980
+ headerDone = true;
981
+ }
982
+
983
+ // First data row without headers \u2014 synthesize separator
984
+ if (i === 0 && !isHeader && !headerDone) {
985
+ out += '| ' + cellTexts.map(() => '---').join(' | ') + ' |\\n';
986
+ headerDone = true;
987
+ }
988
+ }
989
+
990
+ inTable = false;
991
+ return out + '\\n';
992
+ }
993
+ case 'thead': case 'tbody': case 'tfoot':
994
+ return childContent();
995
+ case 'tr': case 'td': case 'th':
996
+ // Handled by table walker above; fallback for orphaned elements
997
+ return childContent();
998
+
999
+ // \u2500\u2500 Definition lists \u2500\u2500
1000
+ case 'dl': return '\\n\\n' + childContent() + '\\n\\n';
1001
+ case 'dt': return '\\n**' + childContent().trim() + '**\\n';
1002
+ case 'dd': return ': ' + childContent().trim() + '\\n';
1003
+
1004
+ // \u2500\u2500 Figure \u2500\u2500
1005
+ case 'figure': return '\\n\\n' + childContent().trim() + '\\n\\n';
1006
+ case 'figcaption': return '\\n*' + childContent().trim() + '*\\n';
1007
+
1008
+ // \u2500\u2500 Details/Summary \u2500\u2500
1009
+ case 'details': return '\\n\\n' + childContent() + '\\n\\n';
1010
+ case 'summary': return '**' + childContent().trim() + '**\\n\\n';
1011
+
1012
+ // \u2500\u2500 Generic blocks \u2500\u2500
1013
+ case 'div': case 'section': case 'article': case 'main': case 'aside':
1014
+ case 'header': case 'footer': case 'nav':
1015
+ return '\\n' + childContent() + '\\n';
1016
+
1017
+ case 'span': case 'small': case 'sub': case 'sup': case 'abbr':
1018
+ case 'time': case 'mark': case 'cite': case 'q':
1019
+ return childContent();
1020
+
1021
+ default:
1022
+ return childContent();
1023
+ }
1024
+ }
1025
+
1026
+ const raw = walk(document.body);
1027
+ // Clean up: collapse 3+ newlines to 2, trim
1028
+ return raw.replace(/\\n{3,}/g, '\\n\\n').replace(/^\\n+|\\n+$/g, '').trim();
1029
+ })()
1030
+ `;
1031
+
1032
+ // src/browser/dom/form-state.ts
1033
+ var FORM_STATE_SCRIPT = `
1034
+ (() => {
1035
+ function extractField(el) {
1036
+ const tag = el.tagName.toLowerCase();
1037
+ const type = (el.getAttribute('type') || tag).toLowerCase();
1038
+
1039
+ // Skip non-user-facing inputs
1040
+ if (['hidden', 'submit', 'button', 'reset', 'image'].includes(type)) return null;
1041
+
1042
+ const name = el.name || el.id || '';
1043
+
1044
+ // Find label via multiple strategies
1045
+ const label =
1046
+ el.getAttribute('aria-label') ||
1047
+ (el.id ? document.querySelector('label[for="' + el.id + '"]')?.textContent?.trim() : null) ||
1048
+ el.closest('label')?.textContent?.trim() ||
1049
+ el.placeholder ||
1050
+ '';
1051
+
1052
+ // Extract value based on type
1053
+ let value;
1054
+ if (tag === 'select') {
1055
+ const selected = el.options[el.selectedIndex];
1056
+ value = selected ? selected.textContent.trim() : '';
1057
+ } else if (type === 'checkbox' || type === 'radio') {
1058
+ value = el.checked;
1059
+ } else if (type === 'password') {
1060
+ value = el.value ? '\u2022\u2022\u2022\u2022' : '';
1061
+ } else if (el.isContentEditable) {
1062
+ value = el.textContent?.trim()?.slice(0, 200) || '';
1063
+ } else {
1064
+ value = el.value || '';
1065
+ }
1066
+
1067
+ return {
1068
+ tag,
1069
+ type,
1070
+ name,
1071
+ label: label.slice(0, 80),
1072
+ value: typeof value === 'string' ? value.slice(0, 200) : value,
1073
+ required: !!el.required,
1074
+ disabled: !!el.disabled,
1075
+ ref: el.dataset?.ref || null,
1076
+ };
1077
+ }
1078
+
1079
+ const result = { forms: [], orphanFields: [] };
1080
+
1081
+ // Collect forms
1082
+ for (const form of document.forms) {
1083
+ const fields = [];
1084
+ for (const el of form.elements) {
1085
+ const field = extractField(el);
1086
+ if (field) fields.push(field);
1087
+ }
1088
+ result.forms.push({
1089
+ id: form.id || '',
1090
+ name: form.name || '',
1091
+ action: form.action || '',
1092
+ method: (form.method || 'get').toUpperCase(),
1093
+ fields,
1094
+ });
1095
+ }
1096
+
1097
+ // Collect orphan fields (not in a <form>)
1098
+ const allInputs = document.querySelectorAll(
1099
+ 'input, textarea, select, [contenteditable="true"]'
1100
+ );
1101
+ for (const el of allInputs) {
1102
+ if (!el.form) {
1103
+ const field = extractField(el);
1104
+ if (field) result.orphanFields.push(field);
1105
+ }
1106
+ }
1107
+
1108
+ return result;
1109
+ })()
1110
+ `;
1111
+
1112
+ // src/browser/interceptor.ts
1113
+ function buildInterceptorScript(pattern) {
1114
+ return `
1115
+ (() => {
1116
+ if (window.__lobster_interceptor__) return;
1117
+ window.__lobster_interceptor__ = { requests: [] };
1118
+ const store = window.__lobster_interceptor__;
1119
+ const pattern = ${JSON.stringify(pattern)};
1120
+
1121
+ // Patch fetch
1122
+ const origFetch = window.fetch;
1123
+ window.fetch = async function(...args) {
1124
+ const url = typeof args[0] === 'string' ? args[0] : args[0]?.url || '';
1125
+ const resp = await origFetch.apply(this, args);
1126
+ if (url.includes(pattern)) {
1127
+ const clone = resp.clone();
1128
+ try {
1129
+ const body = await clone.json();
1130
+ store.requests.push({ url, method: 'GET', status: resp.status, body, timestamp: Date.now() });
1131
+ } catch {}
1132
+ }
1133
+ return resp;
1134
+ };
1135
+
1136
+ // Patch XHR
1137
+ const origOpen = XMLHttpRequest.prototype.open;
1138
+ const origSend = XMLHttpRequest.prototype.send;
1139
+ XMLHttpRequest.prototype.open = function(method, url, ...rest) {
1140
+ this.__url = url;
1141
+ this.__method = method;
1142
+ return origOpen.call(this, method, url, ...rest);
1143
+ };
1144
+ XMLHttpRequest.prototype.send = function(...args) {
1145
+ this.addEventListener('load', function() {
1146
+ if (this.__url && this.__url.includes(pattern)) {
1147
+ try {
1148
+ const body = JSON.parse(this.responseText);
1149
+ store.requests.push({ url: this.__url, method: this.__method, status: this.status, body, timestamp: Date.now() });
1150
+ } catch {}
1151
+ }
1152
+ });
1153
+ return origSend.apply(this, args);
1154
+ };
1155
+ })()
1156
+ `;
1157
+ }
1158
+ var GET_INTERCEPTED_SCRIPT = `
1159
+ (() => {
1160
+ const store = window.__lobster_interceptor__;
1161
+ if (!store) return [];
1162
+ const reqs = [...store.requests];
1163
+ store.requests = [];
1164
+ return reqs;
1165
+ })()
1166
+ `;
1167
+
1168
+ // src/browser/page-adapter.ts
1169
+ var PuppeteerPage = class {
1170
+ page;
1171
+ constructor(page) {
1172
+ this.page = page;
1173
+ }
1174
+ get raw() {
1175
+ return this.page;
1176
+ }
1177
+ async goto(url, options) {
1178
+ await this.page.goto(url, {
1179
+ waitUntil: options?.waitUntil || "networkidle2",
1180
+ timeout: options?.timeout || 3e4
1181
+ });
1182
+ }
1183
+ async goBack() {
1184
+ await this.page.goBack({ waitUntil: "networkidle2" });
1185
+ }
1186
+ async url() {
1187
+ return this.page.url();
1188
+ }
1189
+ async title() {
1190
+ return this.page.title();
1191
+ }
1192
+ async evaluate(js) {
1193
+ return this.page.evaluate(js);
1194
+ }
1195
+ async snapshot(_opts) {
1196
+ return this.page.evaluate(SNAPSHOT_SCRIPT);
1197
+ }
1198
+ async semanticTree(_opts) {
1199
+ return this.page.evaluate(SEMANTIC_TREE_SCRIPT);
1200
+ }
1201
+ async flatTree() {
1202
+ const raw = await this.page.evaluate(FLAT_TREE_SCRIPT);
1203
+ return raw;
1204
+ }
1205
+ async markdown() {
1206
+ return this.page.evaluate(MARKDOWN_SCRIPT);
1207
+ }
1208
+ async browserState() {
1209
+ const state = await this.page.evaluate(`
1210
+ (() => {
1211
+ const scrollY = window.scrollY;
1212
+ const scrollX = window.scrollX;
1213
+ const vpW = window.innerWidth;
1214
+ const vpH = window.innerHeight;
1215
+ const pageW = document.documentElement.scrollWidth;
1216
+ const pageH = document.documentElement.scrollHeight;
1217
+ const maxScrollY = pageH - vpH;
1218
+ return {
1219
+ url: location.href,
1220
+ title: document.title,
1221
+ viewportWidth: vpW,
1222
+ viewportHeight: vpH,
1223
+ pageWidth: pageW,
1224
+ pageHeight: pageH,
1225
+ scrollX: scrollX,
1226
+ scrollY: scrollY,
1227
+ scrollPercent: maxScrollY > 0 ? Math.round((scrollY / maxScrollY) * 100) : 0,
1228
+ pixelsAbove: Math.round(scrollY),
1229
+ pixelsBelow: Math.round(Math.max(0, maxScrollY - scrollY)),
1230
+ };
1231
+ })()
1232
+ `);
1233
+ return state;
1234
+ }
1235
+ async formState() {
1236
+ return this.page.evaluate(FORM_STATE_SCRIPT);
1237
+ }
1238
+ async click(ref) {
1239
+ if (typeof ref === "number") {
1240
+ await this.page.evaluate((idx) => {
1241
+ const el = document.querySelector('[data-ref="' + idx + '"]');
1242
+ if (!el) throw new Error("Element with index " + idx + " not found");
1243
+ const prev = document.activeElement;
1244
+ if (prev && prev !== el && prev !== document.body) {
1245
+ prev.blur();
1246
+ prev.dispatchEvent(new MouseEvent("mouseout", { bubbles: true, cancelable: true }));
1247
+ prev.dispatchEvent(new MouseEvent("mouseleave", { bubbles: false, cancelable: true }));
1248
+ }
1249
+ if (typeof el.scrollIntoViewIfNeeded === "function") {
1250
+ el.scrollIntoViewIfNeeded();
1251
+ } else {
1252
+ el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
1253
+ }
1254
+ el.dispatchEvent(new MouseEvent("mouseenter", { bubbles: true, cancelable: true }));
1255
+ el.dispatchEvent(new MouseEvent("mouseover", { bubbles: true, cancelable: true }));
1256
+ el.dispatchEvent(new MouseEvent("mousedown", { bubbles: true, cancelable: true }));
1257
+ el.focus();
1258
+ el.dispatchEvent(new MouseEvent("mouseup", { bubbles: true, cancelable: true }));
1259
+ el.dispatchEvent(new MouseEvent("click", { bubbles: true, cancelable: true }));
1260
+ }, ref);
1261
+ await new Promise((r) => setTimeout(r, 200));
1262
+ } else {
1263
+ await this.page.click(ref);
1264
+ }
1265
+ }
1266
+ async typeText(ref, text) {
1267
+ if (typeof ref === "number") {
1268
+ await this.click(ref);
1269
+ await this.page.evaluate((idx, txt) => {
1270
+ const el = document.querySelector('[data-ref="' + idx + '"]');
1271
+ if (!el) throw new Error("Element with index " + idx + " not found");
1272
+ const isInput = el.tagName === "INPUT" || el.tagName === "TEXTAREA";
1273
+ const isContentEditable = el.isContentEditable;
1274
+ if (isContentEditable) {
1275
+ if (el.dispatchEvent(new InputEvent("beforeinput", {
1276
+ bubbles: true,
1277
+ cancelable: true,
1278
+ inputType: "deleteContent"
1279
+ }))) {
1280
+ el.innerText = "";
1281
+ el.dispatchEvent(new InputEvent("input", {
1282
+ bubbles: true,
1283
+ inputType: "deleteContent"
1284
+ }));
1285
+ }
1286
+ if (el.dispatchEvent(new InputEvent("beforeinput", {
1287
+ bubbles: true,
1288
+ cancelable: true,
1289
+ inputType: "insertText",
1290
+ data: txt
1291
+ }))) {
1292
+ el.innerText = txt;
1293
+ el.dispatchEvent(new InputEvent("input", {
1294
+ bubbles: true,
1295
+ inputType: "insertText",
1296
+ data: txt
1297
+ }));
1298
+ }
1299
+ const planAOk = el.innerText.trim() === txt.trim();
1300
+ if (!planAOk) {
1301
+ el.focus();
1302
+ const doc = el.ownerDocument;
1303
+ const sel = (doc.defaultView || window).getSelection();
1304
+ const range = doc.createRange();
1305
+ range.selectNodeContents(el);
1306
+ sel?.removeAllRanges();
1307
+ sel?.addRange(range);
1308
+ doc.execCommand("delete", false);
1309
+ doc.execCommand("insertText", false, txt);
1310
+ }
1311
+ el.dispatchEvent(new Event("change", { bubbles: true }));
1312
+ el.blur();
1313
+ } else if (isInput) {
1314
+ const inputEl = el;
1315
+ const proto = Object.getPrototypeOf(inputEl);
1316
+ const descriptor = Object.getOwnPropertyDescriptor(proto, "value") || Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, "value") || Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, "value");
1317
+ if (descriptor?.set) {
1318
+ descriptor.set.call(inputEl, txt);
1319
+ } else {
1320
+ inputEl.value = txt;
1321
+ }
1322
+ inputEl.dispatchEvent(new Event("input", { bubbles: true }));
1323
+ inputEl.dispatchEvent(new Event("change", { bubbles: true }));
1324
+ } else {
1325
+ el.value = txt;
1326
+ el.dispatchEvent(new Event("input", { bubbles: true }));
1327
+ el.dispatchEvent(new Event("change", { bubbles: true }));
1328
+ }
1329
+ }, ref, text);
1330
+ } else {
1331
+ await this.page.click(ref, { count: 3 });
1332
+ await this.page.keyboard.type(text);
1333
+ }
1334
+ }
1335
+ async pressKey(key) {
1336
+ await this.page.keyboard.press(key);
1337
+ }
1338
+ async selectOption(ref, value) {
1339
+ const selector = typeof ref === "number" ? '[data-ref="' + ref + '"]' : ref;
1340
+ await this.page.select(selector, value);
1341
+ }
1342
+ async scroll(direction, amount) {
1343
+ const distance = amount || 500;
1344
+ const isVertical = direction === "up" || direction === "down";
1345
+ const positive = direction === "down" || direction === "right";
1346
+ const delta = positive ? distance : -distance;
1347
+ await this.page.evaluate((dy, dx, isVert) => {
1348
+ const canScroll = (el2) => {
1349
+ if (!el2) return false;
1350
+ const s = getComputedStyle(el2);
1351
+ if (isVert) {
1352
+ return /(auto|scroll|overlay)/.test(s.overflowY) && el2.scrollHeight > el2.clientHeight && el2.clientHeight >= window.innerHeight * 0.3;
1353
+ } else {
1354
+ return /(auto|scroll|overlay)/.test(s.overflowX) && el2.scrollWidth > el2.clientWidth && el2.clientWidth >= window.innerWidth * 0.3;
1355
+ }
1356
+ };
1357
+ let el = document.activeElement;
1358
+ while (el && !canScroll(el) && el !== document.body) {
1359
+ el = el.parentElement;
1360
+ }
1361
+ if (!canScroll(el)) {
1362
+ el = Array.from(document.querySelectorAll("*")).find(canScroll) || null;
1363
+ }
1364
+ const isPageLevel = !el || el === document.body || el === document.documentElement || el === document.scrollingElement;
1365
+ if (isPageLevel) {
1366
+ if (isVert) {
1367
+ window.scrollBy(0, dy);
1368
+ } else {
1369
+ window.scrollBy(dx, 0);
1370
+ }
1371
+ } else {
1372
+ if (isVert) {
1373
+ el.scrollBy({ top: dy, behavior: "smooth" });
1374
+ } else {
1375
+ el.scrollBy({ left: dx, behavior: "smooth" });
1376
+ }
1377
+ }
1378
+ }, isVertical ? delta : 0, isVertical ? 0 : delta, isVertical);
1379
+ await new Promise((r) => setTimeout(r, 150));
1380
+ }
1381
+ async scrollToElement(ref) {
1382
+ const selector = typeof ref === "number" ? '[data-ref="' + ref + '"]' : ref;
1383
+ await this.page.evaluate((sel) => {
1384
+ const el = document.querySelector(sel);
1385
+ if (!el) return;
1386
+ if (typeof el.scrollIntoViewIfNeeded === "function") {
1387
+ el.scrollIntoViewIfNeeded();
1388
+ } else {
1389
+ el.scrollIntoView({ behavior: "auto", block: "center", inline: "nearest" });
1390
+ }
1391
+ }, selector);
1392
+ }
1393
+ async getCookies(opts) {
1394
+ const cookies = await this.page.cookies();
1395
+ const filtered = opts?.domain ? cookies.filter((c) => c.domain.includes(opts.domain)) : cookies;
1396
+ return filtered.map((c) => ({
1397
+ name: c.name,
1398
+ value: c.value,
1399
+ domain: c.domain,
1400
+ path: c.path,
1401
+ expires: c.expires,
1402
+ httpOnly: c.httpOnly,
1403
+ secure: c.secure,
1404
+ sameSite: c.sameSite
1405
+ }));
1406
+ }
1407
+ async wait(options) {
1408
+ if (typeof options === "number") {
1409
+ await new Promise((r) => setTimeout(r, options * 1e3));
1410
+ return;
1411
+ }
1412
+ if (options.time) {
1413
+ await new Promise((r) => setTimeout(r, options.time * 1e3));
1414
+ }
1415
+ if (options.text) {
1416
+ await this.page.waitForFunction(
1417
+ (t) => document.body.innerText.includes(t),
1418
+ { timeout: options.timeout || 3e4 },
1419
+ options.text
1420
+ );
1421
+ }
1422
+ }
1423
+ async networkRequests(includeStatic) {
1424
+ const entries = await this.page.evaluate(`
1425
+ (() => {
1426
+ const entries = performance.getEntriesByType('resource');
1427
+ const staticTypes = new Set(['img', 'font', 'css', 'script', 'link']);
1428
+ const includeStatic = ${!!includeStatic};
1429
+
1430
+ return entries
1431
+ .filter(e => includeStatic || !staticTypes.has(e.initiatorType))
1432
+ .map(e => ({
1433
+ url: e.name,
1434
+ method: 'GET',
1435
+ status: 200,
1436
+ type: e.initiatorType || 'other',
1437
+ size: e.transferSize || e.encodedBodySize || 0,
1438
+ duration: Math.round(e.duration),
1439
+ }));
1440
+ })()
1441
+ `);
1442
+ return entries || [];
1443
+ }
1444
+ async installInterceptor(pattern) {
1445
+ await this.page.evaluate(buildInterceptorScript(pattern));
1446
+ }
1447
+ async getInterceptedRequests() {
1448
+ return this.page.evaluate(GET_INTERCEPTED_SCRIPT);
1449
+ }
1450
+ async screenshot(opts) {
1451
+ const result = await this.page.screenshot({
1452
+ type: opts?.format || "png",
1453
+ fullPage: opts?.fullPage ?? false
1454
+ });
1455
+ return Buffer.from(result);
1456
+ }
1457
+ async tabs() {
1458
+ const browser = this.page.browser();
1459
+ const pages = await browser.pages();
1460
+ return pages.map((p, i) => ({
1461
+ id: i,
1462
+ url: p.url(),
1463
+ title: "",
1464
+ active: p === this.page
1465
+ }));
1466
+ }
1467
+ async close() {
1468
+ await this.page.close();
1469
+ }
1470
+ };
1471
+
1472
+ // src/browser/dom/interactive.ts
1473
+ var INTERACTIVE_ELEMENTS_SCRIPT = `
1474
+ (() => {
1475
+ const results = [];
1476
+
1477
+ function classify(el) {
1478
+ const tag = el.tagName.toLowerCase();
1479
+ const role = el.getAttribute('role');
1480
+ const types = [];
1481
+
1482
+ // Native interactive
1483
+ if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
1484
+ types.push('native');
1485
+ }
1486
+
1487
+ // ARIA role interactive
1488
+ if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
1489
+ types.push('aria');
1490
+ }
1491
+
1492
+ // Contenteditable
1493
+ if (el.contentEditable === 'true') types.push('contenteditable');
1494
+
1495
+ // Focusable
1496
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
1497
+
1498
+ // Has click listener (approximate)
1499
+ if (el.onclick) types.push('listener');
1500
+
1501
+ return types;
1502
+ }
1503
+
1504
+ let idx = 0;
1505
+ const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1506
+ let node;
1507
+ while (node = walker.nextNode()) {
1508
+ const types = classify(node);
1509
+ if (types.length === 0) continue;
1510
+
1511
+ const style = getComputedStyle(node);
1512
+ if (style.display === 'none' || style.visibility === 'hidden') continue;
1513
+
1514
+ const rect = node.getBoundingClientRect();
1515
+ results.push({
1516
+ index: idx++,
1517
+ tag: node.tagName.toLowerCase(),
1518
+ role: node.getAttribute('role') || '',
1519
+ text: (node.textContent || '').trim().slice(0, 100),
1520
+ types,
1521
+ ariaLabel: node.getAttribute('aria-label') || '',
1522
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
1523
+ });
1524
+ }
1525
+
1526
+ return results;
1527
+ })()
1528
+ `;
1529
+
1530
+ // src/browser/wait.ts
1531
+ async function waitForCondition(page, condition, timeout = 3e4) {
1532
+ switch (condition) {
1533
+ case "load":
1534
+ await page.waitForNavigation({ waitUntil: "load", timeout }).catch(() => {
1535
+ });
1536
+ break;
1537
+ case "domcontentloaded":
1538
+ await page.waitForNavigation({ waitUntil: "domcontentloaded", timeout }).catch(() => {
1539
+ });
1540
+ break;
1541
+ case "networkidle0":
1542
+ await page.waitForNavigation({ waitUntil: "networkidle0", timeout }).catch(() => {
1543
+ });
1544
+ break;
1545
+ case "networkidle2":
1546
+ await page.waitForNavigation({ waitUntil: "networkidle2", timeout }).catch(() => {
1547
+ });
1548
+ break;
1549
+ }
1550
+ }
1551
+
1552
+ // src/browser/lightpanda.ts
1553
+ var SELF_CLOSING = /* @__PURE__ */ new Set([
1554
+ "area",
1555
+ "base",
1556
+ "br",
1557
+ "col",
1558
+ "embed",
1559
+ "hr",
1560
+ "img",
1561
+ "input",
1562
+ "link",
1563
+ "meta",
1564
+ "param",
1565
+ "source",
1566
+ "track",
1567
+ "wbr"
1568
+ ]);
1569
+ var RAWTEXT_TAGS = /* @__PURE__ */ new Set(["script", "style", "textarea", "title"]);
1570
+ function parseHtml(html) {
1571
+ const root = [];
1572
+ const stack = [{ node: { type: "element", tag: "root", children: root }, children: root }];
1573
+ let pos = 0;
1574
+ function current() {
1575
+ return stack[stack.length - 1];
1576
+ }
1577
+ function addText(text) {
1578
+ if (!text) return;
1579
+ const decoded = decodeEntities(text);
1580
+ if (decoded.trim() || decoded.includes("\n")) {
1581
+ current().children.push({ type: "text", text: decoded });
1582
+ }
1583
+ }
1584
+ while (pos < html.length) {
1585
+ const nextTag = html.indexOf("<", pos);
1586
+ if (nextTag === -1) {
1587
+ addText(html.slice(pos));
1588
+ break;
1589
+ }
1590
+ if (nextTag > pos) {
1591
+ addText(html.slice(pos, nextTag));
1592
+ }
1593
+ if (html.startsWith("<!--", nextTag)) {
1594
+ const endComment = html.indexOf("-->", nextTag + 4);
1595
+ pos = endComment === -1 ? html.length : endComment + 3;
1596
+ continue;
1597
+ }
1598
+ if (html.startsWith("<!", nextTag) || html.startsWith("<?", nextTag)) {
1599
+ const endDoctype = html.indexOf(">", nextTag);
1600
+ pos = endDoctype === -1 ? html.length : endDoctype + 1;
1601
+ continue;
1602
+ }
1603
+ if (html[nextTag + 1] === "/") {
1604
+ const endClose = html.indexOf(">", nextTag);
1605
+ if (endClose === -1) {
1606
+ pos = html.length;
1607
+ break;
1608
+ }
1609
+ const closeTag = html.slice(nextTag + 2, endClose).trim().toLowerCase();
1610
+ pos = endClose + 1;
1611
+ for (let i = stack.length - 1; i > 0; i--) {
1612
+ if (stack[i].node.tag === closeTag) {
1613
+ stack.length = i;
1614
+ break;
1615
+ }
1616
+ }
1617
+ continue;
1618
+ }
1619
+ const tagEnd = html.indexOf(">", nextTag);
1620
+ if (tagEnd === -1) {
1621
+ pos = html.length;
1622
+ break;
1623
+ }
1624
+ const tagContent = html.slice(nextTag + 1, tagEnd);
1625
+ const selfClose = tagContent.endsWith("/");
1626
+ const cleanContent = selfClose ? tagContent.slice(0, -1).trim() : tagContent.trim();
1627
+ const spaceIdx = cleanContent.search(/[\s/]/);
1628
+ const tagName = (spaceIdx === -1 ? cleanContent : cleanContent.slice(0, spaceIdx)).toLowerCase();
1629
+ const attrStr = spaceIdx === -1 ? "" : cleanContent.slice(spaceIdx);
1630
+ if (!tagName || tagName.startsWith("!")) {
1631
+ pos = tagEnd + 1;
1632
+ continue;
1633
+ }
1634
+ const attributes = parseAttributes(attrStr);
1635
+ const isSelfClosing = selfClose || SELF_CLOSING.has(tagName);
1636
+ const node = {
1637
+ type: "element",
1638
+ tag: tagName,
1639
+ attributes,
1640
+ children: isSelfClosing ? void 0 : [],
1641
+ selfClosing: isSelfClosing
1642
+ };
1643
+ current().children.push(node);
1644
+ pos = tagEnd + 1;
1645
+ if (isSelfClosing) continue;
1646
+ if (RAWTEXT_TAGS.has(tagName)) {
1647
+ const endRaw = html.toLowerCase().indexOf(`</${tagName}`, pos);
1648
+ if (endRaw !== -1) {
1649
+ const rawText = html.slice(pos, endRaw);
1650
+ if (rawText.trim()) {
1651
+ node.children.push({ type: "text", text: rawText });
1652
+ }
1653
+ pos = html.indexOf(">", endRaw) + 1;
1654
+ }
1655
+ continue;
1656
+ }
1657
+ stack.push({ node, children: node.children });
1658
+ if (tagName === "p" || tagName === "li" || tagName === "td" || tagName === "th" || tagName === "dt" || tagName === "dd") {
1659
+ if (stack.length >= 3 && stack[stack.length - 2].node.tag === tagName) {
1660
+ stack.splice(stack.length - 2, 1);
1661
+ }
1662
+ }
1663
+ }
1664
+ return root;
1665
+ }
1666
+ function parseAttributes(str) {
1667
+ const attrs = {};
1668
+ const re = /(\w[\w-]*)(?:\s*=\s*(?:"([^"]*)"|'([^']*)'|(\S+)))?/g;
1669
+ let m;
1670
+ while ((m = re.exec(str)) !== null) {
1671
+ attrs[m[1].toLowerCase()] = decodeEntities(m[2] ?? m[3] ?? m[4] ?? "");
1672
+ }
1673
+ return attrs;
1674
+ }
1675
+ function decodeEntities(text) {
1676
+ return text.replace(/&amp;/g, "&").replace(/&lt;/g, "<").replace(/&gt;/g, ">").replace(/&quot;/g, '"').replace(/&#39;/g, "'").replace(/&apos;/g, "'").replace(/&nbsp;/g, " ").replace(/&#(\d+);/g, (_, n) => String.fromCharCode(parseInt(n))).replace(/&#x([0-9a-fA-F]+);/g, (_, n) => String.fromCharCode(parseInt(n, 16)));
1677
+ }
1678
+ var SKIP_TAGS = /* @__PURE__ */ new Set(["script", "style", "noscript", "svg", "head", "template", "iframe"]);
1679
+ var BLOCK_TAGS = /* @__PURE__ */ new Set(["div", "p", "section", "article", "main", "aside", "blockquote", "pre", "ul", "ol", "li", "table", "tr", "td", "th", "h1", "h2", "h3", "h4", "h5", "h6", "br", "hr", "figure", "figcaption", "details", "summary", "dl", "dt", "dd", "header", "footer", "nav", "form"]);
1680
+ var HEADING_LEVELS = { h1: "#", h2: "##", h3: "###", h4: "####", h5: "#####", h6: "######" };
1681
+ var INTERACTIVE_TAGS = /* @__PURE__ */ new Set(["a", "button", "input", "select", "textarea", "details", "summary"]);
1682
+ var INTERACTIVE_ROLES = /* @__PURE__ */ new Set(["button", "link", "textbox", "checkbox", "radio", "combobox", "tab", "switch", "menuitem"]);
1683
+ function extractText(nodes) {
1684
+ let out = "";
1685
+ for (const node of nodes) {
1686
+ if (node.type === "text") {
1687
+ out += node.text;
1688
+ continue;
1689
+ }
1690
+ if (node.type !== "element" || !node.tag) continue;
1691
+ if (SKIP_TAGS.has(node.tag)) continue;
1692
+ const inner = node.children ? extractText(node.children) : "";
1693
+ if (BLOCK_TAGS.has(node.tag)) {
1694
+ out += "\n" + inner.trim() + "\n";
1695
+ } else {
1696
+ out += inner;
1697
+ }
1698
+ }
1699
+ return out.replace(/\n{3,}/g, "\n\n").trim();
1700
+ }
1701
+ function extractMarkdown(nodes, baseUrl) {
1702
+ let listDepth = 0;
1703
+ let olCounter = [];
1704
+ function resolveUrl(href) {
1705
+ if (!href || href.startsWith("javascript:") || href.startsWith("#")) return href;
1706
+ if (href.startsWith("http://") || href.startsWith("https://") || href.startsWith("//")) return href;
1707
+ if (baseUrl) {
1708
+ try {
1709
+ return new URL(href, baseUrl).href;
1710
+ } catch {
1711
+ }
1712
+ }
1713
+ return href;
1714
+ }
1715
+ function walk(nodes2) {
1716
+ let out = "";
1717
+ for (const node of nodes2) {
1718
+ if (node.type === "text") {
1719
+ out += node.text?.replace(/\s+/g, " ") || "";
1720
+ continue;
1721
+ }
1722
+ if (node.type !== "element" || !node.tag) continue;
1723
+ if (SKIP_TAGS.has(node.tag)) continue;
1724
+ const tag = node.tag;
1725
+ const children = node.children || [];
1726
+ const inner = walk(children).trim();
1727
+ if (HEADING_LEVELS[tag]) {
1728
+ out += `
1729
+
1730
+ ${HEADING_LEVELS[tag]} ${inner}
1731
+
1732
+ `;
1733
+ continue;
1734
+ }
1735
+ switch (tag) {
1736
+ case "p":
1737
+ out += `
1738
+
1739
+ ${inner}
1740
+
1741
+ `;
1742
+ break;
1743
+ case "br":
1744
+ out += "\n";
1745
+ break;
1746
+ case "hr":
1747
+ out += "\n\n---\n\n";
1748
+ break;
1749
+ case "strong":
1750
+ case "b":
1751
+ if (inner) out += `**${inner}**`;
1752
+ break;
1753
+ case "em":
1754
+ case "i":
1755
+ if (inner) out += `*${inner}*`;
1756
+ break;
1757
+ case "s":
1758
+ case "del":
1759
+ case "strike":
1760
+ if (inner) out += `~~${inner}~~`;
1761
+ break;
1762
+ case "code":
1763
+ if (inner) out += `\`${inner}\``;
1764
+ break;
1765
+ case "pre": {
1766
+ const lang = children.find((c) => c.tag === "code")?.attributes?.class?.match(/language-(\w+)/)?.[1] || "";
1767
+ out += `
1768
+
1769
+ \`\`\`${lang}
1770
+ ${inner}
1771
+ \`\`\`
1772
+
1773
+ `;
1774
+ break;
1775
+ }
1776
+ case "a": {
1777
+ const href = resolveUrl(node.attributes?.href || "");
1778
+ const text = inner || node.attributes?.["aria-label"] || node.attributes?.title || "";
1779
+ if (!text) break;
1780
+ if (!href || href === "#" || href.startsWith("javascript:")) {
1781
+ out += text;
1782
+ break;
1783
+ }
1784
+ out += `[${text}](${href})`;
1785
+ break;
1786
+ }
1787
+ case "img": {
1788
+ const alt = node.attributes?.alt || "";
1789
+ const src = resolveUrl(node.attributes?.src || "");
1790
+ if (src) out += `![${alt}](${src})`;
1791
+ break;
1792
+ }
1793
+ case "ul":
1794
+ listDepth++;
1795
+ olCounter.push(0);
1796
+ out += "\n" + walk(children);
1797
+ listDepth--;
1798
+ olCounter.pop();
1799
+ break;
1800
+ case "ol":
1801
+ listDepth++;
1802
+ olCounter.push(0);
1803
+ out += "\n" + walk(children);
1804
+ listDepth--;
1805
+ olCounter.pop();
1806
+ break;
1807
+ case "li": {
1808
+ const indent = " ".repeat(Math.max(0, listDepth - 1));
1809
+ const isOrdered = olCounter.length > 0 && olCounter[olCounter.length - 1] >= 0;
1810
+ if (isOrdered && olCounter.length > 0) olCounter[olCounter.length - 1]++;
1811
+ const counter = isOrdered && olCounter.length > 0 ? olCounter[olCounter.length - 1] : 0;
1812
+ const bullet = isOrdered ? `${counter}. ` : "- ";
1813
+ out += `${indent}${bullet}${inner}
1814
+ `;
1815
+ break;
1816
+ }
1817
+ case "blockquote": {
1818
+ if (inner) out += "\n\n" + inner.split("\n").map((l) => `> ${l}`).join("\n") + "\n\n";
1819
+ break;
1820
+ }
1821
+ case "table": {
1822
+ const rows = collectTableRows(children);
1823
+ if (rows.length > 0) {
1824
+ out += "\n\n";
1825
+ for (let i = 0; i < rows.length; i++) {
1826
+ out += "| " + rows[i].join(" | ") + " |\n";
1827
+ if (i === 0) out += "| " + rows[i].map(() => "---").join(" | ") + " |\n";
1828
+ }
1829
+ out += "\n";
1830
+ }
1831
+ break;
1832
+ }
1833
+ case "dt":
1834
+ out += `
1835
+ **${inner}**
1836
+ `;
1837
+ break;
1838
+ case "dd":
1839
+ out += `: ${inner}
1840
+ `;
1841
+ break;
1842
+ case "figcaption":
1843
+ out += `
1844
+ *${inner}*
1845
+ `;
1846
+ break;
1847
+ case "summary":
1848
+ out += `**${inner}**
1849
+
1850
+ `;
1851
+ break;
1852
+ default:
1853
+ if (BLOCK_TAGS.has(tag)) {
1854
+ out += "\n" + walk(children) + "\n";
1855
+ } else {
1856
+ out += walk(children);
1857
+ }
1858
+ }
1859
+ }
1860
+ return out;
1861
+ }
1862
+ function collectTableRows(nodes2) {
1863
+ const rows = [];
1864
+ for (const node of nodes2) {
1865
+ if (node.tag === "tr") {
1866
+ const cells = [];
1867
+ for (const cell of node.children || []) {
1868
+ if (cell.tag === "td" || cell.tag === "th") {
1869
+ cells.push(walk(cell.children || []).trim().replace(/\|/g, "\\|").replace(/\n/g, " "));
1870
+ }
1871
+ }
1872
+ if (cells.length > 0) rows.push(cells);
1873
+ } else if (node.tag === "thead" || node.tag === "tbody" || node.tag === "tfoot") {
1874
+ rows.push(...collectTableRows(node.children || []));
1875
+ }
1876
+ }
1877
+ return rows;
1878
+ }
1879
+ const raw = walk(nodes);
1880
+ return raw.replace(/\n{3,}/g, "\n\n").trim();
1881
+ }
1882
+ function extractSnapshot(nodes) {
1883
+ let idx = 0;
1884
+ const ATTR_WHITELIST = ["type", "role", "aria-label", "placeholder", "href", "value", "name", "alt"];
1885
+ function isInteractive(node) {
1886
+ if (!node.tag) return false;
1887
+ if (INTERACTIVE_TAGS.has(node.tag)) return true;
1888
+ const role = node.attributes?.role;
1889
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
1890
+ if (node.attributes?.contenteditable === "true") return true;
1891
+ if (node.attributes?.tabindex && parseInt(node.attributes.tabindex) >= 0) return true;
1892
+ return false;
1893
+ }
1894
+ function getAttrs(node) {
1895
+ const parts = [];
1896
+ for (const name of ATTR_WHITELIST) {
1897
+ const v = node.attributes?.[name];
1898
+ if (v) parts.push(`${name}=${v.slice(0, 60)}`);
1899
+ }
1900
+ return parts.length ? " " + parts.join(" ") : "";
1901
+ }
1902
+ function walk(nodes2, depth) {
1903
+ let out = "";
1904
+ for (const node of nodes2) {
1905
+ if (node.type === "text") {
1906
+ const t = node.text?.trim();
1907
+ if (t) out += " ".repeat(depth) + t.slice(0, 150) + "\n";
1908
+ continue;
1909
+ }
1910
+ if (node.type !== "element" || !node.tag) continue;
1911
+ if (SKIP_TAGS.has(node.tag)) continue;
1912
+ const indent = " ".repeat(depth);
1913
+ const inter = isInteractive(node);
1914
+ const prefix = inter ? `[${idx++}]` : "";
1915
+ const attrs = getAttrs(node);
1916
+ const leafText = node.children?.length === 1 && node.children[0].type === "text" ? node.children[0].text?.trim().slice(0, 150) || "" : "";
1917
+ if (inter || leafText || !node.children?.length) {
1918
+ if (leafText) {
1919
+ out += `${indent}${prefix}<${node.tag}${attrs}>${leafText}</${node.tag}>
1920
+ `;
1921
+ } else {
1922
+ out += `${indent}${prefix}<${node.tag}${attrs}>
1923
+ `;
1924
+ if (node.children) out += walk(node.children, depth + 1);
1925
+ }
1926
+ } else {
1927
+ if (node.children) out += walk(node.children, depth);
1928
+ }
1929
+ }
1930
+ return out;
1931
+ }
1932
+ return walk(nodes, 0);
1933
+ }
1934
+ function extractLinks(nodes, baseUrl) {
1935
+ const links = [];
1936
+ function walk(nodes2) {
1937
+ for (const node of nodes2) {
1938
+ if (node.type === "element" && node.tag === "a" && node.attributes?.href) {
1939
+ let href = node.attributes.href;
1940
+ if (baseUrl && !href.startsWith("http")) {
1941
+ try {
1942
+ href = new URL(href, baseUrl).href;
1943
+ } catch {
1944
+ }
1945
+ }
1946
+ const text = extractText(node.children || []).trim();
1947
+ if (text && href && !href.startsWith("javascript:")) {
1948
+ links.push({ text: text.slice(0, 200), href });
1949
+ }
1950
+ }
1951
+ if (node.children) walk(node.children);
1952
+ }
1953
+ }
1954
+ walk(nodes);
1955
+ return links;
1956
+ }
1957
+ async function lobsterFetch(url, options) {
1958
+ const timeout = options?.timeout || 3e4;
1959
+ const dump = options?.dump || "markdown";
1960
+ const start = Date.now();
1961
+ const resp = await fetch(url, {
1962
+ headers: {
1963
+ "User-Agent": "LobsterCLI/0.1 (+https://github.com/iexcalibur/lobster-cli)",
1964
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
1965
+ "Accept-Language": "en-US,en;q=0.5",
1966
+ ...options?.headers || {}
1967
+ },
1968
+ redirect: options?.followRedirects !== false ? "follow" : "manual",
1969
+ signal: AbortSignal.timeout(timeout)
1970
+ });
1971
+ if (!resp.ok) {
1972
+ throw new Error(`HTTP ${resp.status} ${resp.statusText}`);
1973
+ }
1974
+ const html = await resp.text();
1975
+ const duration = Date.now() - start;
1976
+ const finalUrl = resp.url || url;
1977
+ const nodes = parseHtml(html);
1978
+ let title = "";
1979
+ function findTitle(nodes2) {
1980
+ for (const node of nodes2) {
1981
+ if (node.tag === "title" && node.children?.[0]?.text) {
1982
+ title = node.children[0].text.trim();
1983
+ return;
1984
+ }
1985
+ if (node.children) findTitle(node.children);
1986
+ }
1987
+ }
1988
+ findTitle(nodes);
1989
+ let content;
1990
+ let links;
1991
+ switch (dump) {
1992
+ case "markdown":
1993
+ content = extractMarkdown(nodes, finalUrl);
1994
+ break;
1995
+ case "text":
1996
+ content = extractText(nodes);
1997
+ break;
1998
+ case "snapshot":
1999
+ content = extractSnapshot(nodes);
2000
+ break;
2001
+ case "html":
2002
+ content = html;
2003
+ break;
2004
+ case "links":
2005
+ links = extractLinks(nodes, finalUrl);
2006
+ content = links.map((l, i) => `${i + 1}. [${l.text}](${l.href})`).join("\n");
2007
+ break;
2008
+ default:
2009
+ content = extractMarkdown(nodes, finalUrl);
2010
+ }
2011
+ return { url, finalUrl, status: resp.status, title, content, links, duration };
2012
+ }
2013
+ export {
2014
+ BrowserManager,
2015
+ FLAT_TREE_SCRIPT,
2016
+ FORM_STATE_SCRIPT,
2017
+ GET_INTERCEPTED_SCRIPT,
2018
+ INTERACTIVE_ELEMENTS_SCRIPT,
2019
+ MARKDOWN_SCRIPT,
2020
+ PuppeteerPage,
2021
+ SEMANTIC_TREE_SCRIPT,
2022
+ SNAPSHOT_SCRIPT,
2023
+ buildInterceptorScript,
2024
+ buildSnapshotScript,
2025
+ extractLinks,
2026
+ extractMarkdown,
2027
+ extractSnapshot,
2028
+ extractText,
2029
+ flatTreeToString,
2030
+ lobsterFetch,
2031
+ parseHtml,
2032
+ waitForCondition
2033
+ };
2034
+ //# sourceMappingURL=index.js.map