lobster-cli 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  // src/browser/manager.ts
2
2
  import puppeteer from "puppeteer-core";
3
- import { existsSync } from "fs";
3
+ import { existsSync as existsSync3 } from "fs";
4
4
 
5
5
  // src/utils/logger.ts
6
6
  import chalk from "chalk";
@@ -16,20 +16,509 @@ var log = {
16
16
  dim: (msg) => console.log(chalk.dim(msg))
17
17
  };
18
18
 
19
+ // src/browser/profiles.ts
20
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2, readdirSync, rmSync, statSync } from "fs";
21
+ import { join as join2 } from "path";
22
+
23
+ // src/config/index.ts
24
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
25
+ import { join } from "path";
26
+ import { homedir } from "os";
27
+ import yaml from "js-yaml";
28
+
29
+ // src/config/schema.ts
30
+ import { z } from "zod";
31
+ var configSchema = z.object({
32
+ llm: z.object({
33
+ provider: z.enum(["openai", "anthropic", "gemini", "ollama"]).default("openai"),
34
+ baseURL: z.string().default("https://api.openai.com/v1"),
35
+ model: z.string().default("gpt-4o"),
36
+ apiKey: z.string().default(""),
37
+ temperature: z.number().min(0).max(2).default(0.1),
38
+ maxRetries: z.number().int().min(0).default(3)
39
+ }).default({}),
40
+ browser: z.object({
41
+ executablePath: z.string().default(""),
42
+ headless: z.boolean().default(true),
43
+ connectTimeout: z.number().default(30),
44
+ commandTimeout: z.number().default(60),
45
+ cdpEndpoint: z.string().default(""),
46
+ profile: z.string().default(""),
47
+ stealth: z.boolean().default(false)
48
+ }).default({}),
49
+ agent: z.object({
50
+ maxSteps: z.number().int().default(40),
51
+ stepDelay: z.number().default(0.4)
52
+ }).default({}),
53
+ domains: z.object({
54
+ allow: z.array(z.string()).default([]),
55
+ block: z.array(z.string()).default([]),
56
+ blockMessage: z.string().default("")
57
+ }).default({}),
58
+ output: z.object({
59
+ defaultFormat: z.enum(["table", "json", "yaml", "markdown", "csv"]).default("table"),
60
+ color: z.boolean().default(true)
61
+ }).default({})
62
+ });
63
+
64
+ // src/config/index.ts
65
+ var CONFIG_DIR = join(homedir(), ".lobster");
66
+ var CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
67
+ function getConfigDir() {
68
+ return CONFIG_DIR;
69
+ }
70
+
71
+ // src/browser/profiles.ts
72
+ var PROFILES_DIR = () => join2(getConfigDir(), "profiles");
73
+ var META_FILE = ".lobster-meta.json";
74
+ var VALID_NAME = /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,63}$/;
75
+ var RESERVED_NAMES = /* @__PURE__ */ new Set([
76
+ "default",
77
+ "system",
78
+ "con",
79
+ "prn",
80
+ "aux",
81
+ "nul",
82
+ "com1",
83
+ "com2",
84
+ "com3",
85
+ "com4",
86
+ "com5",
87
+ "com6",
88
+ "com7",
89
+ "com8",
90
+ "com9",
91
+ "lpt1",
92
+ "lpt2",
93
+ "lpt3",
94
+ "lpt4",
95
+ "lpt5",
96
+ "lpt6",
97
+ "lpt7",
98
+ "lpt8",
99
+ "lpt9"
100
+ ]);
101
+ var CACHE_DIRS = [
102
+ "Cache",
103
+ "Code Cache",
104
+ "GPUCache",
105
+ "GrShaderCache",
106
+ "ShaderCache",
107
+ "Service Worker",
108
+ "Sessions",
109
+ "Session Storage",
110
+ "blob_storage"
111
+ ];
112
+ function ensureProfilesDir() {
113
+ const dir = PROFILES_DIR();
114
+ if (!existsSync2(dir)) mkdirSync2(dir, { recursive: true });
115
+ }
116
+ function validateName(name) {
117
+ if (!VALID_NAME.test(name)) {
118
+ throw new Error(`Invalid profile name "${name}". Use only letters, numbers, hyphens, underscores (max 64 chars).`);
119
+ }
120
+ if (RESERVED_NAMES.has(name.toLowerCase())) {
121
+ throw new Error(`"${name}" is a reserved name. Choose a different profile name.`);
122
+ }
123
+ }
124
+ function getProfileDir(name) {
125
+ return join2(PROFILES_DIR(), name);
126
+ }
127
+ function readMeta(profileDir) {
128
+ const metaPath = join2(profileDir, META_FILE);
129
+ if (!existsSync2(metaPath)) return null;
130
+ try {
131
+ return JSON.parse(readFileSync2(metaPath, "utf-8"));
132
+ } catch {
133
+ return null;
134
+ }
135
+ }
136
+ function writeMeta(profileDir, meta) {
137
+ writeFileSync2(join2(profileDir, META_FILE), JSON.stringify(meta, null, 2));
138
+ }
139
+ function getDirSizeMB(dirPath) {
140
+ let total = 0;
141
+ try {
142
+ const entries = readdirSync(dirPath, { withFileTypes: true });
143
+ for (const entry of entries) {
144
+ const fullPath = join2(dirPath, entry.name);
145
+ if (entry.isFile()) {
146
+ total += statSync(fullPath).size;
147
+ } else if (entry.isDirectory() && entry.name !== ".lobster-meta.json") {
148
+ total += getDirSizeMB(fullPath) * 1024 * 1024;
149
+ }
150
+ }
151
+ } catch {
152
+ }
153
+ return Math.round(total / (1024 * 1024) * 10) / 10;
154
+ }
155
+ function createProfile(name) {
156
+ validateName(name);
157
+ ensureProfilesDir();
158
+ const dir = getProfileDir(name);
159
+ if (existsSync2(dir)) {
160
+ throw new Error(`Profile "${name}" already exists.`);
161
+ }
162
+ mkdirSync2(dir, { recursive: true });
163
+ const meta = {
164
+ name,
165
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
166
+ lastUsed: (/* @__PURE__ */ new Date()).toISOString()
167
+ };
168
+ writeMeta(dir, meta);
169
+ log.success(`Profile "${name}" created at ${dir}`);
170
+ return meta;
171
+ }
172
+ function listProfiles() {
173
+ ensureProfilesDir();
174
+ const dir = PROFILES_DIR();
175
+ const profiles = [];
176
+ try {
177
+ const entries = readdirSync(dir, { withFileTypes: true });
178
+ for (const entry of entries) {
179
+ if (!entry.isDirectory()) continue;
180
+ const profileDir = join2(dir, entry.name);
181
+ const meta = readMeta(profileDir);
182
+ if (meta) {
183
+ meta.sizeMB = getDirSizeMB(profileDir);
184
+ profiles.push(meta);
185
+ } else {
186
+ profiles.push({
187
+ name: entry.name,
188
+ createdAt: "unknown",
189
+ lastUsed: "unknown",
190
+ sizeMB: getDirSizeMB(profileDir)
191
+ });
192
+ }
193
+ }
194
+ } catch {
195
+ }
196
+ return profiles.sort((a, b) => a.name.localeCompare(b.name));
197
+ }
198
+ function removeProfile(name) {
199
+ const dir = getProfileDir(name);
200
+ if (!existsSync2(dir)) {
201
+ throw new Error(`Profile "${name}" does not exist.`);
202
+ }
203
+ rmSync(dir, { recursive: true, force: true });
204
+ log.success(`Profile "${name}" deleted.`);
205
+ }
206
+ function getProfileDataDir(name) {
207
+ validateName(name);
208
+ const dir = getProfileDir(name);
209
+ if (!existsSync2(dir)) {
210
+ createProfile(name);
211
+ } else {
212
+ const meta = readMeta(dir) || { name, createdAt: "unknown", lastUsed: "" };
213
+ meta.lastUsed = (/* @__PURE__ */ new Date()).toISOString();
214
+ writeMeta(dir, meta);
215
+ }
216
+ return dir;
217
+ }
218
+ function resetProfileCache(name) {
219
+ const dir = getProfileDir(name);
220
+ if (!existsSync2(dir)) {
221
+ throw new Error(`Profile "${name}" does not exist.`);
222
+ }
223
+ let cleaned = 0;
224
+ for (const cacheDir of CACHE_DIRS) {
225
+ for (const base of [dir, join2(dir, "Default")]) {
226
+ const target = join2(base, cacheDir);
227
+ if (existsSync2(target)) {
228
+ rmSync(target, { recursive: true, force: true });
229
+ cleaned++;
230
+ }
231
+ }
232
+ }
233
+ log.success(`Profile "${name}" cache reset (${cleaned} directories cleaned).`);
234
+ }
235
+
236
+ // src/browser/chrome-attach.ts
237
+ import http from "http";
238
+ var DEFAULT_PORTS = [9222, 9229, 9333, 9515];
239
+ var PROBE_TIMEOUT = 1500;
240
+ function probePort(port) {
241
+ return new Promise((resolve) => {
242
+ const req = http.get(`http://127.0.0.1:${port}/json/version`, {
243
+ timeout: PROBE_TIMEOUT
244
+ }, (res) => {
245
+ let data = "";
246
+ res.on("data", (chunk) => {
247
+ data += chunk;
248
+ });
249
+ res.on("end", () => {
250
+ try {
251
+ const info = JSON.parse(data);
252
+ if (info.webSocketDebuggerUrl) {
253
+ resolve({
254
+ wsEndpoint: info.webSocketDebuggerUrl,
255
+ port,
256
+ version: info["Protocol-Version"] || "",
257
+ browser: info.Browser || ""
258
+ });
259
+ } else {
260
+ resolve(null);
261
+ }
262
+ } catch {
263
+ resolve(null);
264
+ }
265
+ });
266
+ });
267
+ req.on("error", () => resolve(null));
268
+ req.on("timeout", () => {
269
+ req.destroy();
270
+ resolve(null);
271
+ });
272
+ });
273
+ }
274
+ async function discoverChrome(ports) {
275
+ const portsToCheck = ports || DEFAULT_PORTS;
276
+ log.debug(`Scanning ports for Chrome: ${portsToCheck.join(", ")}`);
277
+ const results = await Promise.all(portsToCheck.map(probePort));
278
+ const found = results.find(Boolean) || null;
279
+ if (found) {
280
+ log.info(`Found Chrome on port ${found.port}: ${found.browser}`);
281
+ } else {
282
+ log.debug("No running Chrome instance found on debug ports.");
283
+ }
284
+ return found;
285
+ }
286
+ async function getWebSocketDebuggerUrl(port) {
287
+ const result = await probePort(port);
288
+ return result?.wsEndpoint || null;
289
+ }
290
+ async function resolveAttachTarget(target) {
291
+ if (target === true || target === "true") {
292
+ const result = await discoverChrome();
293
+ if (!result) {
294
+ throw new Error(
295
+ "No running Chrome found. Start Chrome with:\n google-chrome --remote-debugging-port=9222\n # or on Mac:\n /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222"
296
+ );
297
+ }
298
+ return result.wsEndpoint;
299
+ }
300
+ if (typeof target === "string") {
301
+ if (target.startsWith("ws://") || target.startsWith("wss://")) {
302
+ return target;
303
+ }
304
+ const port = parseInt(target, 10);
305
+ if (!isNaN(port) && port > 0 && port < 65536) {
306
+ const url = await getWebSocketDebuggerUrl(port);
307
+ if (!url) {
308
+ throw new Error(`No Chrome found on port ${port}. Make sure Chrome is running with --remote-debugging-port=${port}`);
309
+ }
310
+ return url;
311
+ }
312
+ throw new Error(`Invalid attach target: "${target}". Use "true" for auto-discover, a port number, or a ws:// URL.`);
313
+ }
314
+ throw new Error("Invalid attach target.");
315
+ }
316
+
317
+ // src/browser/stealth.ts
318
+ var STEALTH_SCRIPT = `
319
+ (() => {
320
+ // \u2500\u2500 1. navigator.webdriver removal \u2500\u2500
321
+ // Most important: this is the #1 detection vector
322
+ Object.defineProperty(navigator, 'webdriver', {
323
+ get: () => undefined,
324
+ configurable: true,
325
+ });
326
+
327
+ // Also delete from prototype
328
+ delete Object.getPrototypeOf(navigator).webdriver;
329
+
330
+ // \u2500\u2500 2. CDP marker removal \u2500\u2500
331
+ // Chrome DevTools Protocol injects cdc_* properties on window
332
+ for (const key of Object.keys(window)) {
333
+ if (/^cdc_|^__webdriver|^__selenium|^__driver/.test(key)) {
334
+ try { delete window[key]; } catch {}
335
+ }
336
+ }
337
+
338
+ // \u2500\u2500 3. Chrome runtime spoofing \u2500\u2500
339
+ // Real Chrome has window.chrome with runtime, loadTimes, csi
340
+ if (!window.chrome) {
341
+ window.chrome = {};
342
+ }
343
+ if (!window.chrome.runtime) {
344
+ window.chrome.runtime = {
345
+ connect: function() {},
346
+ sendMessage: function() {},
347
+ onMessage: { addListener: function() {} },
348
+ id: undefined,
349
+ };
350
+ }
351
+ if (!window.chrome.loadTimes) {
352
+ window.chrome.loadTimes = function() {
353
+ return {
354
+ commitLoadTime: Date.now() / 1000 - 0.5,
355
+ connectionInfo: 'h2',
356
+ finishDocumentLoadTime: Date.now() / 1000 - 0.1,
357
+ finishLoadTime: Date.now() / 1000 - 0.05,
358
+ firstPaintAfterLoadTime: 0,
359
+ firstPaintTime: Date.now() / 1000 - 0.3,
360
+ navigationType: 'Other',
361
+ npnNegotiatedProtocol: 'h2',
362
+ requestTime: Date.now() / 1000 - 1,
363
+ startLoadTime: Date.now() / 1000 - 0.8,
364
+ wasAlternateProtocolAvailable: false,
365
+ wasFetchedViaSpdy: true,
366
+ wasNpnNegotiated: true,
367
+ };
368
+ };
369
+ }
370
+ if (!window.chrome.csi) {
371
+ window.chrome.csi = function() {
372
+ return {
373
+ onloadT: Date.now(),
374
+ startE: Date.now() - 500,
375
+ pageT: 500,
376
+ tran: 15,
377
+ };
378
+ };
379
+ }
380
+
381
+ // \u2500\u2500 4. Plugin array spoofing \u2500\u2500
382
+ // Headless Chrome reports empty plugins; real Chrome has at least 2
383
+ const fakePlugins = [
384
+ { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format', length: 1 },
385
+ { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '', length: 1 },
386
+ { name: 'Native Client', filename: 'internal-nacl-plugin', description: '', length: 2 },
387
+ ];
388
+
389
+ Object.defineProperty(navigator, 'plugins', {
390
+ get: () => {
391
+ const arr = fakePlugins.map(p => {
392
+ const plugin = { ...p, item: (i) => plugin, namedItem: (n) => plugin };
393
+ return plugin;
394
+ });
395
+ arr.item = (i) => arr[i];
396
+ arr.namedItem = (n) => arr.find(p => p.name === n);
397
+ arr.refresh = () => {};
398
+ return arr;
399
+ },
400
+ });
401
+
402
+ // \u2500\u2500 5. Languages \u2500\u2500
403
+ Object.defineProperty(navigator, 'languages', {
404
+ get: () => ['en-US', 'en'],
405
+ });
406
+ Object.defineProperty(navigator, 'language', {
407
+ get: () => 'en-US',
408
+ });
409
+
410
+ // \u2500\u2500 6. Platform consistency \u2500\u2500
411
+ // Ensure platform matches user agent
412
+ const platform = navigator.userAgent.includes('Mac') ? 'MacIntel' :
413
+ navigator.userAgent.includes('Win') ? 'Win32' :
414
+ navigator.userAgent.includes('Linux') ? 'Linux x86_64' : navigator.platform;
415
+ Object.defineProperty(navigator, 'platform', { get: () => platform });
416
+
417
+ // \u2500\u2500 7. Hardware concurrency & device memory \u2500\u2500
418
+ // Headless often reports unusual values
419
+ if (navigator.hardwareConcurrency < 2) {
420
+ Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
421
+ }
422
+ if (!navigator.deviceMemory || navigator.deviceMemory < 2) {
423
+ Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
424
+ }
425
+
426
+ // \u2500\u2500 8. WebGL vendor/renderer spoofing \u2500\u2500
427
+ // Headless reports "Google SwiftShader" which is a dead giveaway
428
+ const origGetParameter = WebGLRenderingContext.prototype.getParameter;
429
+ WebGLRenderingContext.prototype.getParameter = function(param) {
430
+ // UNMASKED_VENDOR_WEBGL
431
+ if (param === 0x9245) return 'Intel Inc.';
432
+ // UNMASKED_RENDERER_WEBGL
433
+ if (param === 0x9246) return 'Intel Iris OpenGL Engine';
434
+ return origGetParameter.call(this, param);
435
+ };
436
+
437
+ // Also for WebGL2
438
+ if (typeof WebGL2RenderingContext !== 'undefined') {
439
+ const origGetParameter2 = WebGL2RenderingContext.prototype.getParameter;
440
+ WebGL2RenderingContext.prototype.getParameter = function(param) {
441
+ if (param === 0x9245) return 'Intel Inc.';
442
+ if (param === 0x9246) return 'Intel Iris OpenGL Engine';
443
+ return origGetParameter2.call(this, param);
444
+ };
445
+ }
446
+
447
+ // \u2500\u2500 9. Canvas fingerprint noise \u2500\u2500
448
+ // Adds subtle deterministic noise to canvas output based on domain
449
+ const seed = location.hostname.split('').reduce((a, c) => a + c.charCodeAt(0), 0);
450
+ const origToDataURL = HTMLCanvasElement.prototype.toDataURL;
451
+ HTMLCanvasElement.prototype.toDataURL = function(type) {
452
+ const ctx = this.getContext('2d');
453
+ if (ctx && this.width > 0 && this.height > 0) {
454
+ try {
455
+ const imageData = ctx.getImageData(0, 0, 1, 1);
456
+ // Flip a single pixel with seeded noise
457
+ imageData.data[0] = (imageData.data[0] + seed) % 256;
458
+ ctx.putImageData(imageData, 0, 0);
459
+ } catch {}
460
+ }
461
+ return origToDataURL.apply(this, arguments);
462
+ };
463
+
464
+ // \u2500\u2500 10. Permissions API \u2500\u2500
465
+ // Headless returns 'denied' for notifications; real Chrome returns 'prompt'
466
+ const origQuery = navigator.permissions?.query?.bind(navigator.permissions);
467
+ if (origQuery) {
468
+ navigator.permissions.query = function(descriptor) {
469
+ if (descriptor.name === 'notifications') {
470
+ return Promise.resolve({ state: Notification.permission || 'prompt', onchange: null });
471
+ }
472
+ return origQuery(descriptor);
473
+ };
474
+ }
475
+
476
+ // \u2500\u2500 11. Notification constructor \u2500\u2500
477
+ if (!window.Notification) {
478
+ window.Notification = function() {};
479
+ window.Notification.permission = 'default';
480
+ window.Notification.requestPermission = () => Promise.resolve('default');
481
+ }
482
+
483
+ // \u2500\u2500 12. Connection type \u2500\u2500
484
+ if (navigator.connection) {
485
+ Object.defineProperty(navigator.connection, 'rtt', { get: () => 50 });
486
+ }
487
+ })()
488
+ `;
489
+ async function injectStealth(page) {
490
+ await page.evaluateOnNewDocument(STEALTH_SCRIPT);
491
+ }
492
+ var STEALTH_ARGS = [
493
+ "--disable-blink-features=AutomationControlled",
494
+ "--disable-features=IsolateOrigins,site-per-process",
495
+ "--disable-infobars",
496
+ "--window-size=1920,1080"
497
+ ];
498
+
19
499
  // src/browser/manager.ts
20
500
  var BrowserManager = class {
21
501
  browser = null;
22
502
  config;
503
+ isAttached = false;
23
504
  constructor(config = {}) {
24
505
  this.config = config;
25
506
  }
26
507
  async connect() {
27
508
  if (this.browser?.connected) return this.browser;
509
+ if (this.config.attach) {
510
+ const wsEndpoint = await resolveAttachTarget(this.config.attach);
511
+ log.info(`Attaching to Chrome: ${wsEndpoint}`);
512
+ this.browser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
513
+ this.isAttached = true;
514
+ return this.browser;
515
+ }
28
516
  if (this.config.cdpEndpoint) {
29
517
  log.debug(`Connecting to CDP endpoint: ${this.config.cdpEndpoint}`);
30
518
  this.browser = await puppeteer.connect({
31
519
  browserWSEndpoint: this.config.cdpEndpoint
32
520
  });
521
+ this.isAttached = true;
33
522
  return this.browser;
34
523
  }
35
524
  const executablePath = this.config.executablePath || findChrome();
@@ -38,27 +527,48 @@ var BrowserManager = class {
38
527
  "Chrome/Chromium not found. Set LOBSTER_BROWSER_PATH or config browser.executablePath"
39
528
  );
40
529
  }
530
+ const args = [
531
+ "--no-sandbox",
532
+ "--disable-setuid-sandbox",
533
+ "--disable-dev-shm-usage",
534
+ "--disable-gpu"
535
+ ];
536
+ if (this.config.stealth) {
537
+ args.push(...STEALTH_ARGS);
538
+ }
539
+ let userDataDir;
540
+ if (this.config.profile) {
541
+ userDataDir = getProfileDataDir(this.config.profile);
542
+ log.info(`Using profile "${this.config.profile}" \u2192 ${userDataDir}`);
543
+ }
41
544
  log.debug(`Launching Chrome: ${executablePath}`);
42
545
  this.browser = await puppeteer.launch({
43
546
  executablePath,
44
547
  headless: this.config.headless ?? true,
45
- args: [
46
- "--no-sandbox",
47
- "--disable-setuid-sandbox",
48
- "--disable-dev-shm-usage",
49
- "--disable-gpu"
50
- ]
548
+ userDataDir,
549
+ args
51
550
  });
551
+ this.isAttached = false;
52
552
  return this.browser;
53
553
  }
54
554
  async newPage() {
55
555
  const browser = await this.connect();
56
- return browser.newPage();
556
+ const page = await browser.newPage();
557
+ if (this.config.stealth) {
558
+ await injectStealth(page);
559
+ log.debug("Stealth mode enabled");
560
+ }
561
+ return page;
57
562
  }
58
563
  async close() {
59
564
  if (this.browser) {
60
- await this.browser.close().catch(() => {
61
- });
565
+ if (this.isAttached) {
566
+ this.browser.disconnect();
567
+ log.debug("Disconnected from Chrome (attached mode)");
568
+ } else {
569
+ await this.browser.close().catch(() => {
570
+ });
571
+ }
62
572
  this.browser = null;
63
573
  }
64
574
  }
@@ -78,7 +588,7 @@ function findChrome() {
78
588
  "/usr/bin/chromium",
79
589
  "/snap/bin/chromium"
80
590
  ];
81
- return paths.find((p) => existsSync(p));
591
+ return paths.find((p) => existsSync3(p));
82
592
  }
83
593
 
84
594
  // src/browser/dom/flat-tree.ts
@@ -584,6 +1094,164 @@ var SNAPSHOT_SCRIPT = `
584
1094
  })()
585
1095
  `;
586
1096
 
1097
+ // src/browser/dom/compact-snapshot.ts
1098
+ var COMPACT_SNAPSHOT_SCRIPT = `
1099
+ (() => {
1100
+ const TOKEN_BUDGET = 800;
1101
+ const CHARS_PER_TOKEN = 4;
1102
+
1103
+ const INTERACTIVE_TAGS = new Set([
1104
+ 'a','button','input','select','textarea','details','summary','label',
1105
+ ]);
1106
+ const INTERACTIVE_ROLES = new Set([
1107
+ 'button','link','textbox','checkbox','radio','combobox','listbox',
1108
+ 'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
1109
+ 'option','menuitemcheckbox','menuitemradio','treeitem',
1110
+ ]);
1111
+ const LANDMARK_TAGS = new Map([
1112
+ ['nav', 'Navigation'],
1113
+ ['main', 'Main Content'],
1114
+ ['header', 'Header'],
1115
+ ['footer', 'Footer'],
1116
+ ['aside', 'Sidebar'],
1117
+ ['form', 'Form'],
1118
+ ]);
1119
+ const LANDMARK_ROLES = new Map([
1120
+ ['navigation', 'Navigation'],
1121
+ ['main', 'Main Content'],
1122
+ ['banner', 'Header'],
1123
+ ['contentinfo', 'Footer'],
1124
+ ['complementary', 'Sidebar'],
1125
+ ['search', 'Search'],
1126
+ ['dialog', 'Dialog'],
1127
+ ]);
1128
+
1129
+ function isVisible(el) {
1130
+ if (el.offsetWidth === 0 && el.offsetHeight === 0 && el.tagName !== 'INPUT') return false;
1131
+ const s = getComputedStyle(el);
1132
+ return s.display !== 'none' && s.visibility !== 'hidden' && s.opacity !== '0';
1133
+ }
1134
+
1135
+ function isInteractive(el) {
1136
+ const tag = el.tagName.toLowerCase();
1137
+ if (INTERACTIVE_TAGS.has(tag)) {
1138
+ if (el.disabled) return false;
1139
+ if (tag === 'input' && el.type === 'hidden') return false;
1140
+ return true;
1141
+ }
1142
+ const role = el.getAttribute('role');
1143
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
1144
+ if (el.contentEditable === 'true') return true;
1145
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) return true;
1146
+ return false;
1147
+ }
1148
+
1149
+ function getRole(el) {
1150
+ const role = el.getAttribute('role');
1151
+ if (role) return role;
1152
+ const tag = el.tagName.toLowerCase();
1153
+ if (tag === 'a') return 'link';
1154
+ if (tag === 'button' || tag === 'summary') return 'button';
1155
+ if (tag === 'input') return el.type || 'text';
1156
+ if (tag === 'select') return 'select';
1157
+ if (tag === 'textarea') return 'textarea';
1158
+ if (tag === 'label') return 'label';
1159
+ return tag;
1160
+ }
1161
+
1162
+ function getName(el) {
1163
+ return (
1164
+ el.getAttribute('aria-label') ||
1165
+ el.getAttribute('alt') ||
1166
+ el.getAttribute('title') ||
1167
+ el.getAttribute('placeholder') ||
1168
+ (el.tagName === 'INPUT' && (el.type === 'submit' || el.type === 'button') ? el.value : '') ||
1169
+ (el.id ? document.querySelector('label[for="' + el.id + '"]')?.textContent?.trim() : '') ||
1170
+ (el.children.length <= 2 ? el.textContent?.trim() : '') ||
1171
+ ''
1172
+ ).slice(0, 60);
1173
+ }
1174
+
1175
+ function getValue(el) {
1176
+ const tag = el.tagName.toLowerCase();
1177
+ if (tag === 'input') {
1178
+ const type = el.type || 'text';
1179
+ if (type === 'checkbox' || type === 'radio') return el.checked ? 'checked' : 'unchecked';
1180
+ if (type === 'password') return el.value ? '****' : '';
1181
+ return el.value ? el.value.slice(0, 30) : '';
1182
+ }
1183
+ if (tag === 'textarea') return el.value ? el.value.slice(0, 30) : '';
1184
+ if (tag === 'select' && el.selectedOptions?.length) return el.selectedOptions[0].text.slice(0, 30);
1185
+ return '';
1186
+ }
1187
+
1188
+ // Collect elements
1189
+ let idx = 0;
1190
+ let charsUsed = 0;
1191
+ const lines = [];
1192
+ let lastLandmark = '';
1193
+
1194
+ // Page header
1195
+ const scrollY = window.scrollY;
1196
+ const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
1197
+ const scrollPct = scrollMax > 0 ? Math.round((scrollY / scrollMax) * 100) : 0;
1198
+ const header = 'url: ' + location.href + ' | scroll: ' + scrollPct + '%';
1199
+ lines.push(header);
1200
+ charsUsed += header.length;
1201
+
1202
+ // Walk DOM
1203
+ const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1204
+ let node;
1205
+ while ((node = walker.nextNode())) {
1206
+ if (!isVisible(node)) continue;
1207
+
1208
+ const tag = node.tagName.toLowerCase();
1209
+ if (['script','style','noscript','svg','path','meta','link','head','template'].includes(tag)) continue;
1210
+
1211
+ // Check for landmark
1212
+ const role = node.getAttribute('role');
1213
+ const landmark = LANDMARK_TAGS.get(tag) || (role ? LANDMARK_ROLES.get(role) : null);
1214
+ if (landmark && landmark !== lastLandmark) {
1215
+ const sectionLine = '--- ' + landmark + ' ---';
1216
+ if (charsUsed + sectionLine.length > TOKEN_BUDGET * CHARS_PER_TOKEN) break;
1217
+ lines.push(sectionLine);
1218
+ charsUsed += sectionLine.length;
1219
+ lastLandmark = landmark;
1220
+ }
1221
+
1222
+ // Only emit interactive elements
1223
+ if (!isInteractive(node)) continue;
1224
+
1225
+ const elRole = getRole(node);
1226
+ const name = getName(node);
1227
+ const value = getValue(node);
1228
+
1229
+ // Build compact line
1230
+ let line = '[' + idx + '] ' + elRole;
1231
+ if (name) line += ' "' + name.replace(/"/g, "'") + '"';
1232
+ if (value) line += ' val="' + value.replace(/"/g, "'") + '"';
1233
+
1234
+ // Check token budget
1235
+ if (charsUsed + line.length > TOKEN_BUDGET * CHARS_PER_TOKEN) {
1236
+ lines.push('... (' + (document.querySelectorAll('a,button,input,select,textarea,[role]').length - idx) + ' more elements)');
1237
+ break;
1238
+ }
1239
+
1240
+ // Annotate element with ref for clicking
1241
+ try { node.dataset.ref = String(idx); } catch {}
1242
+
1243
+ lines.push(line);
1244
+ charsUsed += line.length;
1245
+ idx++;
1246
+ }
1247
+
1248
+ return lines.join('\\n');
1249
+ })()
1250
+ `;
1251
+ function buildCompactSnapshotScript(tokenBudget = 800) {
1252
+ return COMPACT_SNAPSHOT_SCRIPT.replace("const TOKEN_BUDGET = 800;", `const TOKEN_BUDGET = ${tokenBudget};`);
1253
+ }
1254
+
587
1255
  // src/browser/dom/semantic-tree.ts
588
1256
  var SEMANTIC_TREE_SCRIPT = `
589
1257
  (() => {
@@ -1109,6 +1777,64 @@ var FORM_STATE_SCRIPT = `
1109
1777
  })()
1110
1778
  `;
1111
1779
 
1780
+ // src/browser/dom/interactive.ts
1781
+ var INTERACTIVE_ELEMENTS_SCRIPT = `
1782
+ (() => {
1783
+ const results = [];
1784
+
1785
+ function classify(el) {
1786
+ const tag = el.tagName.toLowerCase();
1787
+ const role = el.getAttribute('role');
1788
+ const types = [];
1789
+
1790
+ // Native interactive
1791
+ if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
1792
+ types.push('native');
1793
+ }
1794
+
1795
+ // ARIA role interactive
1796
+ if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
1797
+ types.push('aria');
1798
+ }
1799
+
1800
+ // Contenteditable
1801
+ if (el.contentEditable === 'true') types.push('contenteditable');
1802
+
1803
+ // Focusable
1804
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
1805
+
1806
+ // Has click listener (approximate)
1807
+ if (el.onclick) types.push('listener');
1808
+
1809
+ return types;
1810
+ }
1811
+
1812
+ let idx = 0;
1813
+ const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1814
+ let node;
1815
+ while (node = walker.nextNode()) {
1816
+ const types = classify(node);
1817
+ if (types.length === 0) continue;
1818
+
1819
+ const style = getComputedStyle(node);
1820
+ if (style.display === 'none' || style.visibility === 'hidden') continue;
1821
+
1822
+ const rect = node.getBoundingClientRect();
1823
+ results.push({
1824
+ index: idx++,
1825
+ tag: node.tagName.toLowerCase(),
1826
+ role: node.getAttribute('role') || '',
1827
+ text: (node.textContent || '').trim().slice(0, 100),
1828
+ types,
1829
+ ariaLabel: node.getAttribute('aria-label') || '',
1830
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
1831
+ });
1832
+ }
1833
+
1834
+ return results;
1835
+ })()
1836
+ `;
1837
+
1112
1838
  // src/browser/interceptor.ts
1113
1839
  function buildInterceptorScript(pattern) {
1114
1840
  return `
@@ -1165,6 +1891,155 @@ var GET_INTERCEPTED_SCRIPT = `
1165
1891
  })()
1166
1892
  `;
1167
1893
 
1894
+ // src/browser/semantic-find.ts
1895
+ var SYNONYMS = {
1896
+ btn: ["button"],
1897
+ button: ["btn", "submit", "click"],
1898
+ submit: ["go", "send", "ok", "confirm", "done", "button"],
1899
+ search: ["find", "lookup", "query", "filter"],
1900
+ login: ["signin", "sign-in", "log-in", "authenticate"],
1901
+ signup: ["register", "create-account", "sign-up", "join"],
1902
+ logout: ["signout", "sign-out", "log-out"],
1903
+ close: ["dismiss", "x", "cancel", "exit"],
1904
+ menu: ["nav", "navigation", "hamburger", "sidebar"],
1905
+ nav: ["navigation", "menu", "navbar"],
1906
+ input: ["field", "textbox", "text", "entry"],
1907
+ email: ["mail", "e-mail"],
1908
+ password: ["pass", "pwd", "secret"],
1909
+ next: ["continue", "forward", "proceed"],
1910
+ back: ["previous", "return", "go-back"],
1911
+ save: ["store", "keep", "persist"],
1912
+ delete: ["remove", "trash", "discard", "destroy"],
1913
+ edit: ["modify", "change", "update"],
1914
+ add: ["create", "new", "plus", "insert"],
1915
+ settings: ["preferences", "config", "options", "gear"],
1916
+ profile: ["account", "user", "avatar"],
1917
+ home: ["main", "dashboard", "start"],
1918
+ link: ["anchor", "href", "url"],
1919
+ select: ["dropdown", "combo", "picker", "choose"],
1920
+ checkbox: ["check", "toggle", "tick"],
1921
+ upload: ["attach", "file", "browse"],
1922
+ download: ["save", "export"]
1923
+ };
1924
+ var ROLE_KEYWORDS = /* @__PURE__ */ new Set([
1925
+ "button",
1926
+ "link",
1927
+ "input",
1928
+ "textbox",
1929
+ "checkbox",
1930
+ "radio",
1931
+ "select",
1932
+ "dropdown",
1933
+ "tab",
1934
+ "menu",
1935
+ "menuitem",
1936
+ "switch",
1937
+ "slider",
1938
+ "combobox",
1939
+ "searchbox",
1940
+ "option"
1941
+ ]);
1942
+ function tokenize(text) {
1943
+ return text.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/[\s-]+/).filter((t) => t.length > 0);
1944
+ }
1945
+ function expandSynonyms(tokens) {
1946
+ const expanded = new Set(tokens);
1947
+ for (const token of tokens) {
1948
+ const syns = SYNONYMS[token];
1949
+ if (syns) {
1950
+ for (const syn of syns) expanded.add(syn);
1951
+ }
1952
+ }
1953
+ return expanded;
1954
+ }
1955
+ function freqMap(tokens) {
1956
+ const map = /* @__PURE__ */ new Map();
1957
+ for (const t of tokens) {
1958
+ map.set(t, (map.get(t) || 0) + 1);
1959
+ }
1960
+ return map;
1961
+ }
1962
+ function jaccardScore(queryTokens, descTokens) {
1963
+ const qFreq = freqMap(queryTokens);
1964
+ const dFreq = freqMap(descTokens);
1965
+ let intersection = 0;
1966
+ let union = 0;
1967
+ const allTokens = /* @__PURE__ */ new Set([...qFreq.keys(), ...dFreq.keys()]);
1968
+ for (const token of allTokens) {
1969
+ const qCount = qFreq.get(token) || 0;
1970
+ const dCount = dFreq.get(token) || 0;
1971
+ intersection += Math.min(qCount, dCount);
1972
+ union += Math.max(qCount, dCount);
1973
+ }
1974
+ return union === 0 ? 0 : intersection / union;
1975
+ }
1976
+ function prefixScore(queryTokens, descTokens) {
1977
+ if (queryTokens.length === 0 || descTokens.length === 0) return 0;
1978
+ let matches = 0;
1979
+ for (const qt of queryTokens) {
1980
+ if (qt.length < 3) continue;
1981
+ for (const dt of descTokens) {
1982
+ if (dt.startsWith(qt) || qt.startsWith(dt)) {
1983
+ matches += 0.5;
1984
+ break;
1985
+ }
1986
+ }
1987
+ }
1988
+ return Math.min(matches / queryTokens.length, 0.3);
1989
+ }
1990
+ function roleBoost(queryTokens, elementRole) {
1991
+ const roleLower = elementRole.toLowerCase();
1992
+ for (const qt of queryTokens) {
1993
+ if (ROLE_KEYWORDS.has(qt) && roleLower.includes(qt)) {
1994
+ return 0.2;
1995
+ }
1996
+ }
1997
+ return 0;
1998
+ }
1999
+ function scoreElement(queryTokens, queryExpanded, element) {
2000
+ const descParts = [
2001
+ element.text,
2002
+ element.role,
2003
+ element.tag,
2004
+ element.ariaLabel
2005
+ ].filter(Boolean);
2006
+ const descText = descParts.join(" ");
2007
+ const descTokens = tokenize(descText);
2008
+ if (descTokens.length === 0) return 0;
2009
+ const descExpanded = expandSynonyms(descTokens);
2010
+ const expandedQueryTokens = [...queryExpanded];
2011
+ const expandedDescTokens = [...descExpanded];
2012
+ const jaccard = jaccardScore(expandedQueryTokens, expandedDescTokens);
2013
+ const prefix = prefixScore(queryTokens, descTokens);
2014
+ const role = roleBoost(queryTokens, element.role || element.tag);
2015
+ const queryStr = queryTokens.join(" ");
2016
+ const descStr = descTokens.join(" ");
2017
+ const exactBonus = descStr.includes(queryStr) ? 0.3 : 0;
2018
+ return Math.min(jaccard + prefix + role + exactBonus, 1);
2019
+ }
2020
+ function semanticFind(elements, query, options) {
2021
+ const maxResults = options?.maxResults ?? 5;
2022
+ const minScore = options?.minScore ?? 0.3;
2023
+ const queryTokens = tokenize(query);
2024
+ if (queryTokens.length === 0) return [];
2025
+ const queryExpanded = expandSynonyms(queryTokens);
2026
+ const scored = [];
2027
+ for (const el of elements) {
2028
+ const score = scoreElement(queryTokens, queryExpanded, el);
2029
+ if (score >= minScore) {
2030
+ scored.push({
2031
+ ref: el.index,
2032
+ score: Math.round(score * 100) / 100,
2033
+ text: (el.text || el.ariaLabel || "").slice(0, 60),
2034
+ role: el.role || el.tag,
2035
+ tag: el.tag
2036
+ });
2037
+ }
2038
+ }
2039
+ scored.sort((a, b) => b.score - a.score);
2040
+ return scored.slice(0, maxResults);
2041
+ }
2042
+
1168
2043
  // src/browser/page-adapter.ts
1169
2044
  var PuppeteerPage = class {
1170
2045
  page;
@@ -1192,7 +2067,10 @@ var PuppeteerPage = class {
1192
2067
  async evaluate(js) {
1193
2068
  return this.page.evaluate(js);
1194
2069
  }
1195
- async snapshot(_opts) {
2070
+ async snapshot(opts) {
2071
+ if (opts?.compact) {
2072
+ return this.page.evaluate(COMPACT_SNAPSHOT_SCRIPT);
2073
+ }
1196
2074
  return this.page.evaluate(SNAPSHOT_SCRIPT);
1197
2075
  }
1198
2076
  async semanticTree(_opts) {
@@ -1464,69 +2342,15 @@ var PuppeteerPage = class {
1464
2342
  active: p === this.page
1465
2343
  }));
1466
2344
  }
2345
+ async find(query, options) {
2346
+ const elements = await this.page.evaluate(INTERACTIVE_ELEMENTS_SCRIPT);
2347
+ return semanticFind(elements, query, options);
2348
+ }
1467
2349
  async close() {
1468
2350
  await this.page.close();
1469
2351
  }
1470
2352
  };
1471
2353
 
1472
- // src/browser/dom/interactive.ts
1473
- var INTERACTIVE_ELEMENTS_SCRIPT = `
1474
- (() => {
1475
- const results = [];
1476
-
1477
- function classify(el) {
1478
- const tag = el.tagName.toLowerCase();
1479
- const role = el.getAttribute('role');
1480
- const types = [];
1481
-
1482
- // Native interactive
1483
- if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
1484
- types.push('native');
1485
- }
1486
-
1487
- // ARIA role interactive
1488
- if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
1489
- types.push('aria');
1490
- }
1491
-
1492
- // Contenteditable
1493
- if (el.contentEditable === 'true') types.push('contenteditable');
1494
-
1495
- // Focusable
1496
- if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
1497
-
1498
- // Has click listener (approximate)
1499
- if (el.onclick) types.push('listener');
1500
-
1501
- return types;
1502
- }
1503
-
1504
- let idx = 0;
1505
- const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1506
- let node;
1507
- while (node = walker.nextNode()) {
1508
- const types = classify(node);
1509
- if (types.length === 0) continue;
1510
-
1511
- const style = getComputedStyle(node);
1512
- if (style.display === 'none' || style.visibility === 'hidden') continue;
1513
-
1514
- const rect = node.getBoundingClientRect();
1515
- results.push({
1516
- index: idx++,
1517
- tag: node.tagName.toLowerCase(),
1518
- role: node.getAttribute('role') || '',
1519
- text: (node.textContent || '').trim().slice(0, 100),
1520
- types,
1521
- ariaLabel: node.getAttribute('aria-label') || '',
1522
- rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
1523
- });
1524
- }
1525
-
1526
- return results;
1527
- })()
1528
- `;
1529
-
1530
2354
  // src/browser/wait.ts
1531
2355
  async function waitForCondition(page, condition, timeout = 3e4) {
1532
2356
  switch (condition) {
@@ -2012,6 +2836,7 @@ async function lobsterFetch(url, options) {
2012
2836
  }
2013
2837
  export {
2014
2838
  BrowserManager,
2839
+ COMPACT_SNAPSHOT_SCRIPT,
2015
2840
  FLAT_TREE_SCRIPT,
2016
2841
  FORM_STATE_SCRIPT,
2017
2842
  GET_INTERCEPTED_SCRIPT,
@@ -2020,15 +2845,27 @@ export {
2020
2845
  PuppeteerPage,
2021
2846
  SEMANTIC_TREE_SCRIPT,
2022
2847
  SNAPSHOT_SCRIPT,
2848
+ STEALTH_ARGS,
2849
+ STEALTH_SCRIPT,
2850
+ buildCompactSnapshotScript,
2023
2851
  buildInterceptorScript,
2024
2852
  buildSnapshotScript,
2853
+ createProfile,
2854
+ discoverChrome,
2025
2855
  extractLinks,
2026
2856
  extractMarkdown,
2027
2857
  extractSnapshot,
2028
2858
  extractText,
2029
2859
  flatTreeToString,
2860
+ getProfileDataDir,
2861
+ injectStealth,
2862
+ listProfiles,
2030
2863
  lobsterFetch,
2031
2864
  parseHtml,
2865
+ removeProfile,
2866
+ resetProfileCache,
2867
+ resolveAttachTarget,
2868
+ semanticFind,
2032
2869
  waitForCondition
2033
2870
  };
2034
2871
  //# sourceMappingURL=index.js.map