lobster-cli 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +148 -268
  3. package/dist/agent/core.js +63 -0
  4. package/dist/agent/core.js.map +1 -1
  5. package/dist/agent/index.js +63 -0
  6. package/dist/agent/index.js.map +1 -1
  7. package/dist/browser/chrome-attach.js +102 -0
  8. package/dist/browser/chrome-attach.js.map +1 -0
  9. package/dist/browser/dom/compact-snapshot.js +162 -0
  10. package/dist/browser/dom/compact-snapshot.js.map +1 -0
  11. package/dist/browser/dom/index.js +160 -0
  12. package/dist/browser/dom/index.js.map +1 -1
  13. package/dist/browser/index.js +1201 -70
  14. package/dist/browser/index.js.map +1 -1
  15. package/dist/browser/manager.js +443 -11
  16. package/dist/browser/manager.js.map +1 -1
  17. package/dist/browser/page-adapter.js +370 -1
  18. package/dist/browser/page-adapter.js.map +1 -1
  19. package/dist/browser/profiles.js +238 -0
  20. package/dist/browser/profiles.js.map +1 -0
  21. package/dist/browser/semantic-find.js +152 -0
  22. package/dist/browser/semantic-find.js.map +1 -0
  23. package/dist/browser/stealth.js +187 -0
  24. package/dist/browser/stealth.js.map +1 -0
  25. package/dist/config/index.js +8 -1
  26. package/dist/config/index.js.map +1 -1
  27. package/dist/config/schema.js +8 -1
  28. package/dist/config/schema.js.map +1 -1
  29. package/dist/doc/index.js +31715 -0
  30. package/dist/doc/index.js.map +1 -0
  31. package/dist/domain-guard.js +103 -0
  32. package/dist/domain-guard.js.map +1 -0
  33. package/dist/index.js +32914 -262
  34. package/dist/index.js.map +1 -1
  35. package/dist/lib.js +1488 -241
  36. package/dist/lib.js.map +1 -1
  37. package/dist/llm/client.js +63 -0
  38. package/dist/llm/client.js.map +1 -1
  39. package/dist/llm/index.js +63 -0
  40. package/dist/llm/index.js.map +1 -1
  41. package/dist/llm/openai-client.js +63 -0
  42. package/dist/llm/openai-client.js.map +1 -1
  43. package/dist/router/index.js +925 -61
  44. package/dist/router/index.js.map +1 -1
  45. package/package.json +16 -2
@@ -1,6 +1,214 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+
11
+ // src/browser/pdf.ts
12
+ var pdf_exports = {};
13
+ __export(pdf_exports, {
14
+ extractPdf: () => extractPdf,
15
+ isPdfResponse: () => isPdfResponse,
16
+ isPdfUrl: () => isPdfUrl,
17
+ tryExtractPdf: () => tryExtractPdf
18
+ });
19
+ import { readFileSync as readFileSync3 } from "fs";
20
+ async function getPdfParser() {
21
+ if (!pdfParseFn) {
22
+ const mod = await import("pdf-parse");
23
+ const PDFParseClass = mod.PDFParse;
24
+ if (PDFParseClass && typeof PDFParseClass === "function") {
25
+ pdfParseFn = async (buffer) => {
26
+ const parser = new PDFParseClass(buffer);
27
+ return parser.parse ? await parser.parse() : parser;
28
+ };
29
+ } else {
30
+ pdfParseFn = mod.default || mod;
31
+ }
32
+ }
33
+ return pdfParseFn;
34
+ }
35
+ function isPdfUrl(urlOrPath) {
36
+ const lower = urlOrPath.toLowerCase();
37
+ if (lower.endsWith(".pdf")) return true;
38
+ if (/\/pdf\//.test(lower)) return true;
39
+ if (/arxiv\.org\/pdf\//.test(lower)) return true;
40
+ if (/[?&]format=pdf/i.test(lower)) return true;
41
+ if (/[?&]type=pdf/i.test(lower)) return true;
42
+ return false;
43
+ }
44
+ function isPdfResponse(contentType) {
45
+ return contentType.includes("application/pdf");
46
+ }
47
+ async function downloadPdf(url) {
48
+ const response = await fetch(url, {
49
+ headers: {
50
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
51
+ "Accept": "application/pdf,*/*"
52
+ },
53
+ redirect: "follow"
54
+ });
55
+ if (!response.ok) {
56
+ throw new Error(`Failed to download PDF: ${response.status} ${response.statusText}`);
57
+ }
58
+ const contentType = response.headers.get("content-type") || "";
59
+ if (!contentType.includes("pdf") && !isPdfUrl(url)) {
60
+ }
61
+ const arrayBuffer = await response.arrayBuffer();
62
+ return Buffer.from(arrayBuffer);
63
+ }
64
+ function readLocalPdf(filePath) {
65
+ return readFileSync3(filePath);
66
+ }
67
+ function textToMarkdown(text, metadata) {
68
+ const lines = text.split("\n");
69
+ const mdLines = [];
70
+ if (metadata.title && metadata.title !== "untitled") {
71
+ mdLines.push(`# ${metadata.title}`);
72
+ mdLines.push("");
73
+ if (metadata.author) {
74
+ mdLines.push(`**Authors:** ${metadata.author}`);
75
+ mdLines.push("");
76
+ }
77
+ mdLines.push("---");
78
+ mdLines.push("");
79
+ }
80
+ let inReferences = false;
81
+ let prevWasBlank = false;
82
+ let paragraphBuffer = [];
83
+ function flushParagraph() {
84
+ if (paragraphBuffer.length > 0) {
85
+ mdLines.push(paragraphBuffer.join(" "));
86
+ mdLines.push("");
87
+ paragraphBuffer = [];
88
+ }
89
+ }
90
+ for (let i = 0; i < lines.length; i++) {
91
+ const line = lines[i].trim();
92
+ if (!line) {
93
+ if (!prevWasBlank) {
94
+ flushParagraph();
95
+ }
96
+ prevWasBlank = true;
97
+ continue;
98
+ }
99
+ prevWasBlank = false;
100
+ const numberedHeading = line.match(/^(\d+\.?\d*\.?\d*)\s+([A-Z][A-Za-z\s:&-]+)$/);
101
+ if (numberedHeading && line.length < 80) {
102
+ flushParagraph();
103
+ const depth = numberedHeading[1].split(".").filter(Boolean).length;
104
+ const prefix = depth <= 1 ? "##" : depth === 2 ? "###" : "####";
105
+ mdLines.push(`${prefix} ${line}`);
106
+ mdLines.push("");
107
+ continue;
108
+ }
109
+ if (line === line.toUpperCase() && line.length > 3 && line.length < 60 && /^[A-Z\s:&-]+$/.test(line)) {
110
+ flushParagraph();
111
+ mdLines.push(`## ${line.charAt(0) + line.slice(1).toLowerCase()}`);
112
+ mdLines.push("");
113
+ continue;
114
+ }
115
+ const knownHeadings = /^(Abstract|Introduction|Conclusion|Discussion|Results|Methods|Methodology|Background|Related Work|Acknowledgments|Acknowledgements|References|Bibliography|Appendix)/i;
116
+ if (knownHeadings.test(line) && line.length < 40) {
117
+ flushParagraph();
118
+ if (/^(References|Bibliography)/i.test(line)) {
119
+ inReferences = true;
120
+ }
121
+ mdLines.push(`## ${line}`);
122
+ mdLines.push("");
123
+ continue;
124
+ }
125
+ if (/^[-•∙◦▪]/.test(line)) {
126
+ flushParagraph();
127
+ mdLines.push(`- ${line.replace(/^[-•∙◦▪]\s*/, "")}`);
128
+ continue;
129
+ }
130
+ if (/^\(\d+\)|^[a-z]\)/.test(line)) {
131
+ flushParagraph();
132
+ mdLines.push(`- ${line}`);
133
+ continue;
134
+ }
135
+ if (inReferences && /^\[?\d+\]?\.?\s/.test(line)) {
136
+ flushParagraph();
137
+ mdLines.push(`- ${line}`);
138
+ continue;
139
+ }
140
+ if (line.endsWith("-") && i + 1 < lines.length) {
141
+ paragraphBuffer.push(line.slice(0, -1));
142
+ } else {
143
+ paragraphBuffer.push(line);
144
+ }
145
+ }
146
+ flushParagraph();
147
+ return mdLines.join("\n").replace(/\n{3,}/g, "\n\n").trim();
148
+ }
149
+ async function extractFromBuffer(buffer) {
150
+ const parse = await getPdfParser();
151
+ const result = await parse(buffer);
152
+ const info = result.info || {};
153
+ const metadata = {
154
+ title: info.Title || "untitled",
155
+ author: info.Author || "",
156
+ pages: result.numpages,
157
+ creator: info.Creator || "",
158
+ producer: info.Producer || "",
159
+ creationDate: info.CreationDate || ""
160
+ };
161
+ const text = result.text || "";
162
+ const pageTexts = text.split(/\f/).filter(Boolean);
163
+ const pages = pageTexts.length === result.numpages ? pageTexts : [text];
164
+ const markdown = textToMarkdown(text, metadata);
165
+ const words = text.split(/\s+/).filter(Boolean);
166
+ return {
167
+ metadata,
168
+ text,
169
+ markdown,
170
+ pages: pages.map((p) => p.trim()),
171
+ wordCount: words.length,
172
+ charCount: text.length
173
+ };
174
+ }
175
+ async function extractPdf(urlOrPath) {
176
+ let buffer;
177
+ if (urlOrPath.startsWith("http://") || urlOrPath.startsWith("https://")) {
178
+ buffer = await downloadPdf(urlOrPath);
179
+ } else {
180
+ buffer = readLocalPdf(urlOrPath);
181
+ }
182
+ if (buffer[0] !== 37 || buffer[1] !== 80 || buffer[2] !== 68 || buffer[3] !== 70) {
183
+ throw new Error("Not a valid PDF file (invalid magic bytes)");
184
+ }
185
+ return extractFromBuffer(buffer);
186
+ }
187
+ async function tryExtractPdf(url) {
188
+ if (isPdfUrl(url)) {
189
+ return extractPdf(url);
190
+ }
191
+ try {
192
+ const head = await fetch(url, { method: "HEAD", redirect: "follow" });
193
+ const contentType = head.headers.get("content-type") || "";
194
+ if (isPdfResponse(contentType)) {
195
+ return extractPdf(url);
196
+ }
197
+ } catch {
198
+ }
199
+ return null;
200
+ }
201
+ var pdfParseFn;
202
+ var init_pdf = __esm({
203
+ "src/browser/pdf.ts"() {
204
+ "use strict";
205
+ pdfParseFn = null;
206
+ }
207
+ });
208
+
1
209
  // src/browser/manager.ts
2
210
  import puppeteer from "puppeteer-core";
3
- import { existsSync } from "fs";
211
+ import { existsSync as existsSync3 } from "fs";
4
212
 
5
213
  // src/utils/logger.ts
6
214
  import chalk from "chalk";
@@ -16,20 +224,509 @@ var log = {
16
224
  dim: (msg) => console.log(chalk.dim(msg))
17
225
  };
18
226
 
227
+ // src/browser/profiles.ts
228
+ import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2, readdirSync, rmSync, statSync } from "fs";
229
+ import { join as join2 } from "path";
230
+
231
+ // src/config/index.ts
232
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
233
+ import { join } from "path";
234
+ import { homedir } from "os";
235
+ import yaml from "js-yaml";
236
+
237
+ // src/config/schema.ts
238
+ import { z } from "zod";
239
+ var configSchema = z.object({
240
+ llm: z.object({
241
+ provider: z.enum(["openai", "anthropic", "gemini", "ollama"]).default("openai"),
242
+ baseURL: z.string().default("https://api.openai.com/v1"),
243
+ model: z.string().default("gpt-4o"),
244
+ apiKey: z.string().default(""),
245
+ temperature: z.number().min(0).max(2).default(0.1),
246
+ maxRetries: z.number().int().min(0).default(3)
247
+ }).default({}),
248
+ browser: z.object({
249
+ executablePath: z.string().default(""),
250
+ headless: z.boolean().default(true),
251
+ connectTimeout: z.number().default(30),
252
+ commandTimeout: z.number().default(60),
253
+ cdpEndpoint: z.string().default(""),
254
+ profile: z.string().default(""),
255
+ stealth: z.boolean().default(false)
256
+ }).default({}),
257
+ agent: z.object({
258
+ maxSteps: z.number().int().default(40),
259
+ stepDelay: z.number().default(0.4)
260
+ }).default({}),
261
+ domains: z.object({
262
+ allow: z.array(z.string()).default([]),
263
+ block: z.array(z.string()).default([]),
264
+ blockMessage: z.string().default("")
265
+ }).default({}),
266
+ output: z.object({
267
+ defaultFormat: z.enum(["table", "json", "yaml", "markdown", "csv"]).default("table"),
268
+ color: z.boolean().default(true)
269
+ }).default({})
270
+ });
271
+
272
+ // src/config/index.ts
273
+ var CONFIG_DIR = join(homedir(), ".lobster");
274
+ var CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
275
+ function getConfigDir() {
276
+ return CONFIG_DIR;
277
+ }
278
+
279
+ // src/browser/profiles.ts
280
+ var PROFILES_DIR = () => join2(getConfigDir(), "profiles");
281
+ var META_FILE = ".lobster-meta.json";
282
+ var VALID_NAME = /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,63}$/;
283
+ var RESERVED_NAMES = /* @__PURE__ */ new Set([
284
+ "default",
285
+ "system",
286
+ "con",
287
+ "prn",
288
+ "aux",
289
+ "nul",
290
+ "com1",
291
+ "com2",
292
+ "com3",
293
+ "com4",
294
+ "com5",
295
+ "com6",
296
+ "com7",
297
+ "com8",
298
+ "com9",
299
+ "lpt1",
300
+ "lpt2",
301
+ "lpt3",
302
+ "lpt4",
303
+ "lpt5",
304
+ "lpt6",
305
+ "lpt7",
306
+ "lpt8",
307
+ "lpt9"
308
+ ]);
309
+ var CACHE_DIRS = [
310
+ "Cache",
311
+ "Code Cache",
312
+ "GPUCache",
313
+ "GrShaderCache",
314
+ "ShaderCache",
315
+ "Service Worker",
316
+ "Sessions",
317
+ "Session Storage",
318
+ "blob_storage"
319
+ ];
320
+ function ensureProfilesDir() {
321
+ const dir = PROFILES_DIR();
322
+ if (!existsSync2(dir)) mkdirSync2(dir, { recursive: true });
323
+ }
324
+ function validateName(name) {
325
+ if (!VALID_NAME.test(name)) {
326
+ throw new Error(`Invalid profile name "${name}". Use only letters, numbers, hyphens, underscores (max 64 chars).`);
327
+ }
328
+ if (RESERVED_NAMES.has(name.toLowerCase())) {
329
+ throw new Error(`"${name}" is a reserved name. Choose a different profile name.`);
330
+ }
331
+ }
332
+ function getProfileDir(name) {
333
+ return join2(PROFILES_DIR(), name);
334
+ }
335
+ function readMeta(profileDir) {
336
+ const metaPath = join2(profileDir, META_FILE);
337
+ if (!existsSync2(metaPath)) return null;
338
+ try {
339
+ return JSON.parse(readFileSync2(metaPath, "utf-8"));
340
+ } catch {
341
+ return null;
342
+ }
343
+ }
344
+ function writeMeta(profileDir, meta) {
345
+ writeFileSync2(join2(profileDir, META_FILE), JSON.stringify(meta, null, 2));
346
+ }
347
+ function getDirSizeMB(dirPath) {
348
+ let total = 0;
349
+ try {
350
+ const entries = readdirSync(dirPath, { withFileTypes: true });
351
+ for (const entry of entries) {
352
+ const fullPath = join2(dirPath, entry.name);
353
+ if (entry.isFile()) {
354
+ total += statSync(fullPath).size;
355
+ } else if (entry.isDirectory() && entry.name !== ".lobster-meta.json") {
356
+ total += getDirSizeMB(fullPath) * 1024 * 1024;
357
+ }
358
+ }
359
+ } catch {
360
+ }
361
+ return Math.round(total / (1024 * 1024) * 10) / 10;
362
+ }
363
+ function createProfile(name) {
364
+ validateName(name);
365
+ ensureProfilesDir();
366
+ const dir = getProfileDir(name);
367
+ if (existsSync2(dir)) {
368
+ throw new Error(`Profile "${name}" already exists.`);
369
+ }
370
+ mkdirSync2(dir, { recursive: true });
371
+ const meta = {
372
+ name,
373
+ createdAt: (/* @__PURE__ */ new Date()).toISOString(),
374
+ lastUsed: (/* @__PURE__ */ new Date()).toISOString()
375
+ };
376
+ writeMeta(dir, meta);
377
+ log.success(`Profile "${name}" created at ${dir}`);
378
+ return meta;
379
+ }
380
+ function listProfiles() {
381
+ ensureProfilesDir();
382
+ const dir = PROFILES_DIR();
383
+ const profiles = [];
384
+ try {
385
+ const entries = readdirSync(dir, { withFileTypes: true });
386
+ for (const entry of entries) {
387
+ if (!entry.isDirectory()) continue;
388
+ const profileDir = join2(dir, entry.name);
389
+ const meta = readMeta(profileDir);
390
+ if (meta) {
391
+ meta.sizeMB = getDirSizeMB(profileDir);
392
+ profiles.push(meta);
393
+ } else {
394
+ profiles.push({
395
+ name: entry.name,
396
+ createdAt: "unknown",
397
+ lastUsed: "unknown",
398
+ sizeMB: getDirSizeMB(profileDir)
399
+ });
400
+ }
401
+ }
402
+ } catch {
403
+ }
404
+ return profiles.sort((a, b) => a.name.localeCompare(b.name));
405
+ }
406
+ function removeProfile(name) {
407
+ const dir = getProfileDir(name);
408
+ if (!existsSync2(dir)) {
409
+ throw new Error(`Profile "${name}" does not exist.`);
410
+ }
411
+ rmSync(dir, { recursive: true, force: true });
412
+ log.success(`Profile "${name}" deleted.`);
413
+ }
414
+ function getProfileDataDir(name) {
415
+ validateName(name);
416
+ const dir = getProfileDir(name);
417
+ if (!existsSync2(dir)) {
418
+ createProfile(name);
419
+ } else {
420
+ const meta = readMeta(dir) || { name, createdAt: "unknown", lastUsed: "" };
421
+ meta.lastUsed = (/* @__PURE__ */ new Date()).toISOString();
422
+ writeMeta(dir, meta);
423
+ }
424
+ return dir;
425
+ }
426
+ function resetProfileCache(name) {
427
+ const dir = getProfileDir(name);
428
+ if (!existsSync2(dir)) {
429
+ throw new Error(`Profile "${name}" does not exist.`);
430
+ }
431
+ let cleaned = 0;
432
+ for (const cacheDir of CACHE_DIRS) {
433
+ for (const base of [dir, join2(dir, "Default")]) {
434
+ const target = join2(base, cacheDir);
435
+ if (existsSync2(target)) {
436
+ rmSync(target, { recursive: true, force: true });
437
+ cleaned++;
438
+ }
439
+ }
440
+ }
441
+ log.success(`Profile "${name}" cache reset (${cleaned} directories cleaned).`);
442
+ }
443
+
444
+ // src/browser/chrome-attach.ts
445
+ import http from "http";
446
+ var DEFAULT_PORTS = [9222, 9229, 9333, 9515];
447
+ var PROBE_TIMEOUT = 1500;
448
+ function probePort(port) {
449
+ return new Promise((resolve) => {
450
+ const req = http.get(`http://127.0.0.1:${port}/json/version`, {
451
+ timeout: PROBE_TIMEOUT
452
+ }, (res) => {
453
+ let data = "";
454
+ res.on("data", (chunk) => {
455
+ data += chunk;
456
+ });
457
+ res.on("end", () => {
458
+ try {
459
+ const info = JSON.parse(data);
460
+ if (info.webSocketDebuggerUrl) {
461
+ resolve({
462
+ wsEndpoint: info.webSocketDebuggerUrl,
463
+ port,
464
+ version: info["Protocol-Version"] || "",
465
+ browser: info.Browser || ""
466
+ });
467
+ } else {
468
+ resolve(null);
469
+ }
470
+ } catch {
471
+ resolve(null);
472
+ }
473
+ });
474
+ });
475
+ req.on("error", () => resolve(null));
476
+ req.on("timeout", () => {
477
+ req.destroy();
478
+ resolve(null);
479
+ });
480
+ });
481
+ }
482
+ async function discoverChrome(ports) {
483
+ const portsToCheck = ports || DEFAULT_PORTS;
484
+ log.debug(`Scanning ports for Chrome: ${portsToCheck.join(", ")}`);
485
+ const results = await Promise.all(portsToCheck.map(probePort));
486
+ const found = results.find(Boolean) || null;
487
+ if (found) {
488
+ log.info(`Found Chrome on port ${found.port}: ${found.browser}`);
489
+ } else {
490
+ log.debug("No running Chrome instance found on debug ports.");
491
+ }
492
+ return found;
493
+ }
494
+ async function getWebSocketDebuggerUrl(port) {
495
+ const result = await probePort(port);
496
+ return result?.wsEndpoint || null;
497
+ }
498
+ async function resolveAttachTarget(target) {
499
+ if (target === true || target === "true") {
500
+ const result = await discoverChrome();
501
+ if (!result) {
502
+ throw new Error(
503
+ "No running Chrome found. Start Chrome with:\n google-chrome --remote-debugging-port=9222\n # or on Mac:\n /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222"
504
+ );
505
+ }
506
+ return result.wsEndpoint;
507
+ }
508
+ if (typeof target === "string") {
509
+ if (target.startsWith("ws://") || target.startsWith("wss://")) {
510
+ return target;
511
+ }
512
+ const port = parseInt(target, 10);
513
+ if (!isNaN(port) && port > 0 && port < 65536) {
514
+ const url = await getWebSocketDebuggerUrl(port);
515
+ if (!url) {
516
+ throw new Error(`No Chrome found on port ${port}. Make sure Chrome is running with --remote-debugging-port=${port}`);
517
+ }
518
+ return url;
519
+ }
520
+ throw new Error(`Invalid attach target: "${target}". Use "true" for auto-discover, a port number, or a ws:// URL.`);
521
+ }
522
+ throw new Error("Invalid attach target.");
523
+ }
524
+
525
+ // src/browser/stealth.ts
526
+ var STEALTH_SCRIPT = `
527
+ (() => {
528
+ // \u2500\u2500 1. navigator.webdriver removal \u2500\u2500
529
+ // Most important: this is the #1 detection vector
530
+ Object.defineProperty(navigator, 'webdriver', {
531
+ get: () => undefined,
532
+ configurable: true,
533
+ });
534
+
535
+ // Also delete from prototype
536
+ delete Object.getPrototypeOf(navigator).webdriver;
537
+
538
+ // \u2500\u2500 2. CDP marker removal \u2500\u2500
539
+ // Chrome DevTools Protocol injects cdc_* properties on window
540
+ for (const key of Object.keys(window)) {
541
+ if (/^cdc_|^__webdriver|^__selenium|^__driver/.test(key)) {
542
+ try { delete window[key]; } catch {}
543
+ }
544
+ }
545
+
546
+ // \u2500\u2500 3. Chrome runtime spoofing \u2500\u2500
547
+ // Real Chrome has window.chrome with runtime, loadTimes, csi
548
+ if (!window.chrome) {
549
+ window.chrome = {};
550
+ }
551
+ if (!window.chrome.runtime) {
552
+ window.chrome.runtime = {
553
+ connect: function() {},
554
+ sendMessage: function() {},
555
+ onMessage: { addListener: function() {} },
556
+ id: undefined,
557
+ };
558
+ }
559
+ if (!window.chrome.loadTimes) {
560
+ window.chrome.loadTimes = function() {
561
+ return {
562
+ commitLoadTime: Date.now() / 1000 - 0.5,
563
+ connectionInfo: 'h2',
564
+ finishDocumentLoadTime: Date.now() / 1000 - 0.1,
565
+ finishLoadTime: Date.now() / 1000 - 0.05,
566
+ firstPaintAfterLoadTime: 0,
567
+ firstPaintTime: Date.now() / 1000 - 0.3,
568
+ navigationType: 'Other',
569
+ npnNegotiatedProtocol: 'h2',
570
+ requestTime: Date.now() / 1000 - 1,
571
+ startLoadTime: Date.now() / 1000 - 0.8,
572
+ wasAlternateProtocolAvailable: false,
573
+ wasFetchedViaSpdy: true,
574
+ wasNpnNegotiated: true,
575
+ };
576
+ };
577
+ }
578
+ if (!window.chrome.csi) {
579
+ window.chrome.csi = function() {
580
+ return {
581
+ onloadT: Date.now(),
582
+ startE: Date.now() - 500,
583
+ pageT: 500,
584
+ tran: 15,
585
+ };
586
+ };
587
+ }
588
+
589
+ // \u2500\u2500 4. Plugin array spoofing \u2500\u2500
590
+ // Headless Chrome reports empty plugins; real Chrome has at least 2
591
+ const fakePlugins = [
592
+ { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format', length: 1 },
593
+ { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '', length: 1 },
594
+ { name: 'Native Client', filename: 'internal-nacl-plugin', description: '', length: 2 },
595
+ ];
596
+
597
+ Object.defineProperty(navigator, 'plugins', {
598
+ get: () => {
599
+ const arr = fakePlugins.map(p => {
600
+ const plugin = { ...p, item: (i) => plugin, namedItem: (n) => plugin };
601
+ return plugin;
602
+ });
603
+ arr.item = (i) => arr[i];
604
+ arr.namedItem = (n) => arr.find(p => p.name === n);
605
+ arr.refresh = () => {};
606
+ return arr;
607
+ },
608
+ });
609
+
610
+ // \u2500\u2500 5. Languages \u2500\u2500
611
+ Object.defineProperty(navigator, 'languages', {
612
+ get: () => ['en-US', 'en'],
613
+ });
614
+ Object.defineProperty(navigator, 'language', {
615
+ get: () => 'en-US',
616
+ });
617
+
618
+ // \u2500\u2500 6. Platform consistency \u2500\u2500
619
+ // Ensure platform matches user agent
620
+ const platform = navigator.userAgent.includes('Mac') ? 'MacIntel' :
621
+ navigator.userAgent.includes('Win') ? 'Win32' :
622
+ navigator.userAgent.includes('Linux') ? 'Linux x86_64' : navigator.platform;
623
+ Object.defineProperty(navigator, 'platform', { get: () => platform });
624
+
625
+ // \u2500\u2500 7. Hardware concurrency & device memory \u2500\u2500
626
+ // Headless often reports unusual values
627
+ if (navigator.hardwareConcurrency < 2) {
628
+ Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
629
+ }
630
+ if (!navigator.deviceMemory || navigator.deviceMemory < 2) {
631
+ Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
632
+ }
633
+
634
+ // \u2500\u2500 8. WebGL vendor/renderer spoofing \u2500\u2500
635
+ // Headless reports "Google SwiftShader" which is a dead giveaway
636
+ const origGetParameter = WebGLRenderingContext.prototype.getParameter;
637
+ WebGLRenderingContext.prototype.getParameter = function(param) {
638
+ // UNMASKED_VENDOR_WEBGL
639
+ if (param === 0x9245) return 'Intel Inc.';
640
+ // UNMASKED_RENDERER_WEBGL
641
+ if (param === 0x9246) return 'Intel Iris OpenGL Engine';
642
+ return origGetParameter.call(this, param);
643
+ };
644
+
645
+ // Also for WebGL2
646
+ if (typeof WebGL2RenderingContext !== 'undefined') {
647
+ const origGetParameter2 = WebGL2RenderingContext.prototype.getParameter;
648
+ WebGL2RenderingContext.prototype.getParameter = function(param) {
649
+ if (param === 0x9245) return 'Intel Inc.';
650
+ if (param === 0x9246) return 'Intel Iris OpenGL Engine';
651
+ return origGetParameter2.call(this, param);
652
+ };
653
+ }
654
+
655
+ // \u2500\u2500 9. Canvas fingerprint noise \u2500\u2500
656
+ // Adds subtle deterministic noise to canvas output based on domain
657
+ const seed = location.hostname.split('').reduce((a, c) => a + c.charCodeAt(0), 0);
658
+ const origToDataURL = HTMLCanvasElement.prototype.toDataURL;
659
+ HTMLCanvasElement.prototype.toDataURL = function(type) {
660
+ const ctx = this.getContext('2d');
661
+ if (ctx && this.width > 0 && this.height > 0) {
662
+ try {
663
+ const imageData = ctx.getImageData(0, 0, 1, 1);
664
+ // Flip a single pixel with seeded noise
665
+ imageData.data[0] = (imageData.data[0] + seed) % 256;
666
+ ctx.putImageData(imageData, 0, 0);
667
+ } catch {}
668
+ }
669
+ return origToDataURL.apply(this, arguments);
670
+ };
671
+
672
+ // \u2500\u2500 10. Permissions API \u2500\u2500
673
+ // Headless returns 'denied' for notifications; real Chrome returns 'prompt'
674
+ const origQuery = navigator.permissions?.query?.bind(navigator.permissions);
675
+ if (origQuery) {
676
+ navigator.permissions.query = function(descriptor) {
677
+ if (descriptor.name === 'notifications') {
678
+ return Promise.resolve({ state: Notification.permission || 'prompt', onchange: null });
679
+ }
680
+ return origQuery(descriptor);
681
+ };
682
+ }
683
+
684
+ // \u2500\u2500 11. Notification constructor \u2500\u2500
685
+ if (!window.Notification) {
686
+ window.Notification = function() {};
687
+ window.Notification.permission = 'default';
688
+ window.Notification.requestPermission = () => Promise.resolve('default');
689
+ }
690
+
691
+ // \u2500\u2500 12. Connection type \u2500\u2500
692
+ if (navigator.connection) {
693
+ Object.defineProperty(navigator.connection, 'rtt', { get: () => 50 });
694
+ }
695
+ })()
696
+ `;
697
+ async function injectStealth(page) {
698
+ await page.evaluateOnNewDocument(STEALTH_SCRIPT);
699
+ }
700
+ var STEALTH_ARGS = [
701
+ "--disable-blink-features=AutomationControlled",
702
+ "--disable-features=IsolateOrigins,site-per-process",
703
+ "--disable-infobars",
704
+ "--window-size=1920,1080"
705
+ ];
706
+
19
707
  // src/browser/manager.ts
20
708
  var BrowserManager = class {
21
709
  browser = null;
22
710
  config;
711
+ isAttached = false;
23
712
  constructor(config = {}) {
24
713
  this.config = config;
25
714
  }
26
715
  async connect() {
27
716
  if (this.browser?.connected) return this.browser;
717
+ if (this.config.attach) {
718
+ const wsEndpoint = await resolveAttachTarget(this.config.attach);
719
+ log.info(`Attaching to Chrome: ${wsEndpoint}`);
720
+ this.browser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
721
+ this.isAttached = true;
722
+ return this.browser;
723
+ }
28
724
  if (this.config.cdpEndpoint) {
29
725
  log.debug(`Connecting to CDP endpoint: ${this.config.cdpEndpoint}`);
30
726
  this.browser = await puppeteer.connect({
31
727
  browserWSEndpoint: this.config.cdpEndpoint
32
728
  });
729
+ this.isAttached = true;
33
730
  return this.browser;
34
731
  }
35
732
  const executablePath = this.config.executablePath || findChrome();
@@ -38,27 +735,48 @@ var BrowserManager = class {
38
735
  "Chrome/Chromium not found. Set LOBSTER_BROWSER_PATH or config browser.executablePath"
39
736
  );
40
737
  }
738
+ const args = [
739
+ "--no-sandbox",
740
+ "--disable-setuid-sandbox",
741
+ "--disable-dev-shm-usage",
742
+ "--disable-gpu"
743
+ ];
744
+ if (this.config.stealth) {
745
+ args.push(...STEALTH_ARGS);
746
+ }
747
+ let userDataDir;
748
+ if (this.config.profile) {
749
+ userDataDir = getProfileDataDir(this.config.profile);
750
+ log.info(`Using profile "${this.config.profile}" \u2192 ${userDataDir}`);
751
+ }
41
752
  log.debug(`Launching Chrome: ${executablePath}`);
42
753
  this.browser = await puppeteer.launch({
43
754
  executablePath,
44
755
  headless: this.config.headless ?? true,
45
- args: [
46
- "--no-sandbox",
47
- "--disable-setuid-sandbox",
48
- "--disable-dev-shm-usage",
49
- "--disable-gpu"
50
- ]
756
+ userDataDir,
757
+ args
51
758
  });
759
+ this.isAttached = false;
52
760
  return this.browser;
53
761
  }
54
762
  async newPage() {
55
763
  const browser = await this.connect();
56
- return browser.newPage();
764
+ const page = await browser.newPage();
765
+ if (this.config.stealth) {
766
+ await injectStealth(page);
767
+ log.debug("Stealth mode enabled");
768
+ }
769
+ return page;
57
770
  }
58
771
  async close() {
59
772
  if (this.browser) {
60
- await this.browser.close().catch(() => {
61
- });
773
+ if (this.isAttached) {
774
+ this.browser.disconnect();
775
+ log.debug("Disconnected from Chrome (attached mode)");
776
+ } else {
777
+ await this.browser.close().catch(() => {
778
+ });
779
+ }
62
780
  this.browser = null;
63
781
  }
64
782
  }
@@ -78,7 +796,7 @@ function findChrome() {
78
796
  "/usr/bin/chromium",
79
797
  "/snap/bin/chromium"
80
798
  ];
81
- return paths.find((p) => existsSync(p));
799
+ return paths.find((p) => existsSync3(p));
82
800
  }
83
801
 
84
802
  // src/browser/dom/flat-tree.ts
@@ -584,6 +1302,164 @@ var SNAPSHOT_SCRIPT = `
584
1302
  })()
585
1303
  `;
586
1304
 
1305
+ // src/browser/dom/compact-snapshot.ts
1306
+ var COMPACT_SNAPSHOT_SCRIPT = `
1307
+ (() => {
1308
+ const TOKEN_BUDGET = 800;
1309
+ const CHARS_PER_TOKEN = 4;
1310
+
1311
+ const INTERACTIVE_TAGS = new Set([
1312
+ 'a','button','input','select','textarea','details','summary','label',
1313
+ ]);
1314
+ const INTERACTIVE_ROLES = new Set([
1315
+ 'button','link','textbox','checkbox','radio','combobox','listbox',
1316
+ 'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
1317
+ 'option','menuitemcheckbox','menuitemradio','treeitem',
1318
+ ]);
1319
+ const LANDMARK_TAGS = new Map([
1320
+ ['nav', 'Navigation'],
1321
+ ['main', 'Main Content'],
1322
+ ['header', 'Header'],
1323
+ ['footer', 'Footer'],
1324
+ ['aside', 'Sidebar'],
1325
+ ['form', 'Form'],
1326
+ ]);
1327
+ const LANDMARK_ROLES = new Map([
1328
+ ['navigation', 'Navigation'],
1329
+ ['main', 'Main Content'],
1330
+ ['banner', 'Header'],
1331
+ ['contentinfo', 'Footer'],
1332
+ ['complementary', 'Sidebar'],
1333
+ ['search', 'Search'],
1334
+ ['dialog', 'Dialog'],
1335
+ ]);
1336
+
1337
+ function isVisible(el) {
1338
+ if (el.offsetWidth === 0 && el.offsetHeight === 0 && el.tagName !== 'INPUT') return false;
1339
+ const s = getComputedStyle(el);
1340
+ return s.display !== 'none' && s.visibility !== 'hidden' && s.opacity !== '0';
1341
+ }
1342
+
1343
+ function isInteractive(el) {
1344
+ const tag = el.tagName.toLowerCase();
1345
+ if (INTERACTIVE_TAGS.has(tag)) {
1346
+ if (el.disabled) return false;
1347
+ if (tag === 'input' && el.type === 'hidden') return false;
1348
+ return true;
1349
+ }
1350
+ const role = el.getAttribute('role');
1351
+ if (role && INTERACTIVE_ROLES.has(role)) return true;
1352
+ if (el.contentEditable === 'true') return true;
1353
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) return true;
1354
+ return false;
1355
+ }
1356
+
1357
+ function getRole(el) {
1358
+ const role = el.getAttribute('role');
1359
+ if (role) return role;
1360
+ const tag = el.tagName.toLowerCase();
1361
+ if (tag === 'a') return 'link';
1362
+ if (tag === 'button' || tag === 'summary') return 'button';
1363
+ if (tag === 'input') return el.type || 'text';
1364
+ if (tag === 'select') return 'select';
1365
+ if (tag === 'textarea') return 'textarea';
1366
+ if (tag === 'label') return 'label';
1367
+ return tag;
1368
+ }
1369
+
1370
+ function getName(el) {
1371
+ return (
1372
+ el.getAttribute('aria-label') ||
1373
+ el.getAttribute('alt') ||
1374
+ el.getAttribute('title') ||
1375
+ el.getAttribute('placeholder') ||
1376
+ (el.tagName === 'INPUT' && (el.type === 'submit' || el.type === 'button') ? el.value : '') ||
1377
+ (el.id ? document.querySelector('label[for="' + el.id + '"]')?.textContent?.trim() : '') ||
1378
+ (el.children.length <= 2 ? el.textContent?.trim() : '') ||
1379
+ ''
1380
+ ).slice(0, 60);
1381
+ }
1382
+
1383
+ function getValue(el) {
1384
+ const tag = el.tagName.toLowerCase();
1385
+ if (tag === 'input') {
1386
+ const type = el.type || 'text';
1387
+ if (type === 'checkbox' || type === 'radio') return el.checked ? 'checked' : 'unchecked';
1388
+ if (type === 'password') return el.value ? '****' : '';
1389
+ return el.value ? el.value.slice(0, 30) : '';
1390
+ }
1391
+ if (tag === 'textarea') return el.value ? el.value.slice(0, 30) : '';
1392
+ if (tag === 'select' && el.selectedOptions?.length) return el.selectedOptions[0].text.slice(0, 30);
1393
+ return '';
1394
+ }
1395
+
1396
+ // Collect elements
1397
+ let idx = 0;
1398
+ let charsUsed = 0;
1399
+ const lines = [];
1400
+ let lastLandmark = '';
1401
+
1402
+ // Page header
1403
+ const scrollY = window.scrollY;
1404
+ const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
1405
+ const scrollPct = scrollMax > 0 ? Math.round((scrollY / scrollMax) * 100) : 0;
1406
+ const header = 'url: ' + location.href + ' | scroll: ' + scrollPct + '%';
1407
+ lines.push(header);
1408
+ charsUsed += header.length;
1409
+
1410
+ // Walk DOM
1411
+ const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1412
+ let node;
1413
+ while ((node = walker.nextNode())) {
1414
+ if (!isVisible(node)) continue;
1415
+
1416
+ const tag = node.tagName.toLowerCase();
1417
+ if (['script','style','noscript','svg','path','meta','link','head','template'].includes(tag)) continue;
1418
+
1419
+ // Check for landmark
1420
+ const role = node.getAttribute('role');
1421
+ const landmark = LANDMARK_TAGS.get(tag) || (role ? LANDMARK_ROLES.get(role) : null);
1422
+ if (landmark && landmark !== lastLandmark) {
1423
+ const sectionLine = '--- ' + landmark + ' ---';
1424
+ if (charsUsed + sectionLine.length > TOKEN_BUDGET * CHARS_PER_TOKEN) break;
1425
+ lines.push(sectionLine);
1426
+ charsUsed += sectionLine.length;
1427
+ lastLandmark = landmark;
1428
+ }
1429
+
1430
+ // Only emit interactive elements
1431
+ if (!isInteractive(node)) continue;
1432
+
1433
+ const elRole = getRole(node);
1434
+ const name = getName(node);
1435
+ const value = getValue(node);
1436
+
1437
+ // Build compact line
1438
+ let line = '[' + idx + '] ' + elRole;
1439
+ if (name) line += ' "' + name.replace(/"/g, "'") + '"';
1440
+ if (value) line += ' val="' + value.replace(/"/g, "'") + '"';
1441
+
1442
+ // Check token budget
1443
+ if (charsUsed + line.length > TOKEN_BUDGET * CHARS_PER_TOKEN) {
1444
+ lines.push('... (' + (document.querySelectorAll('a,button,input,select,textarea,[role]').length - idx) + ' more elements)');
1445
+ break;
1446
+ }
1447
+
1448
+ // Annotate element with ref for clicking
1449
+ try { node.dataset.ref = String(idx); } catch {}
1450
+
1451
+ lines.push(line);
1452
+ charsUsed += line.length;
1453
+ idx++;
1454
+ }
1455
+
1456
+ return lines.join('\\n');
1457
+ })()
1458
+ `;
1459
+ function buildCompactSnapshotScript(tokenBudget = 800) {
1460
+ return COMPACT_SNAPSHOT_SCRIPT.replace("const TOKEN_BUDGET = 800;", `const TOKEN_BUDGET = ${tokenBudget};`);
1461
+ }
1462
+
587
1463
  // src/browser/dom/semantic-tree.ts
588
1464
  var SEMANTIC_TREE_SCRIPT = `
589
1465
  (() => {
@@ -1109,6 +1985,64 @@ var FORM_STATE_SCRIPT = `
1109
1985
  })()
1110
1986
  `;
1111
1987
 
1988
+ // src/browser/dom/interactive.ts
1989
+ var INTERACTIVE_ELEMENTS_SCRIPT = `
1990
+ (() => {
1991
+ const results = [];
1992
+
1993
+ function classify(el) {
1994
+ const tag = el.tagName.toLowerCase();
1995
+ const role = el.getAttribute('role');
1996
+ const types = [];
1997
+
1998
+ // Native interactive
1999
+ if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
2000
+ types.push('native');
2001
+ }
2002
+
2003
+ // ARIA role interactive
2004
+ if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
2005
+ types.push('aria');
2006
+ }
2007
+
2008
+ // Contenteditable
2009
+ if (el.contentEditable === 'true') types.push('contenteditable');
2010
+
2011
+ // Focusable
2012
+ if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
2013
+
2014
+ // Has click listener (approximate)
2015
+ if (el.onclick) types.push('listener');
2016
+
2017
+ return types;
2018
+ }
2019
+
2020
+ let idx = 0;
2021
+ const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
2022
+ let node;
2023
+ while (node = walker.nextNode()) {
2024
+ const types = classify(node);
2025
+ if (types.length === 0) continue;
2026
+
2027
+ const style = getComputedStyle(node);
2028
+ if (style.display === 'none' || style.visibility === 'hidden') continue;
2029
+
2030
+ const rect = node.getBoundingClientRect();
2031
+ results.push({
2032
+ index: idx++,
2033
+ tag: node.tagName.toLowerCase(),
2034
+ role: node.getAttribute('role') || '',
2035
+ text: (node.textContent || '').trim().slice(0, 100),
2036
+ types,
2037
+ ariaLabel: node.getAttribute('aria-label') || '',
2038
+ rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
2039
+ });
2040
+ }
2041
+
2042
+ return results;
2043
+ })()
2044
+ `;
2045
+
1112
2046
  // src/browser/interceptor.ts
1113
2047
  function buildInterceptorScript(pattern) {
1114
2048
  return `
@@ -1165,6 +2099,155 @@ var GET_INTERCEPTED_SCRIPT = `
1165
2099
  })()
1166
2100
  `;
1167
2101
 
2102
+ // src/browser/semantic-find.ts
2103
+ var SYNONYMS = {
2104
+ btn: ["button"],
2105
+ button: ["btn", "submit", "click"],
2106
+ submit: ["go", "send", "ok", "confirm", "done", "button"],
2107
+ search: ["find", "lookup", "query", "filter"],
2108
+ login: ["signin", "sign-in", "log-in", "authenticate"],
2109
+ signup: ["register", "create-account", "sign-up", "join"],
2110
+ logout: ["signout", "sign-out", "log-out"],
2111
+ close: ["dismiss", "x", "cancel", "exit"],
2112
+ menu: ["nav", "navigation", "hamburger", "sidebar"],
2113
+ nav: ["navigation", "menu", "navbar"],
2114
+ input: ["field", "textbox", "text", "entry"],
2115
+ email: ["mail", "e-mail"],
2116
+ password: ["pass", "pwd", "secret"],
2117
+ next: ["continue", "forward", "proceed"],
2118
+ back: ["previous", "return", "go-back"],
2119
+ save: ["store", "keep", "persist"],
2120
+ delete: ["remove", "trash", "discard", "destroy"],
2121
+ edit: ["modify", "change", "update"],
2122
+ add: ["create", "new", "plus", "insert"],
2123
+ settings: ["preferences", "config", "options", "gear"],
2124
+ profile: ["account", "user", "avatar"],
2125
+ home: ["main", "dashboard", "start"],
2126
+ link: ["anchor", "href", "url"],
2127
+ select: ["dropdown", "combo", "picker", "choose"],
2128
+ checkbox: ["check", "toggle", "tick"],
2129
+ upload: ["attach", "file", "browse"],
2130
+ download: ["save", "export"]
2131
+ };
2132
+ var ROLE_KEYWORDS = /* @__PURE__ */ new Set([
2133
+ "button",
2134
+ "link",
2135
+ "input",
2136
+ "textbox",
2137
+ "checkbox",
2138
+ "radio",
2139
+ "select",
2140
+ "dropdown",
2141
+ "tab",
2142
+ "menu",
2143
+ "menuitem",
2144
+ "switch",
2145
+ "slider",
2146
+ "combobox",
2147
+ "searchbox",
2148
+ "option"
2149
+ ]);
2150
+ function tokenize(text) {
2151
+ return text.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/[\s-]+/).filter((t) => t.length > 0);
2152
+ }
2153
+ function expandSynonyms(tokens) {
2154
+ const expanded = new Set(tokens);
2155
+ for (const token of tokens) {
2156
+ const syns = SYNONYMS[token];
2157
+ if (syns) {
2158
+ for (const syn of syns) expanded.add(syn);
2159
+ }
2160
+ }
2161
+ return expanded;
2162
+ }
2163
+ function freqMap(tokens) {
2164
+ const map = /* @__PURE__ */ new Map();
2165
+ for (const t of tokens) {
2166
+ map.set(t, (map.get(t) || 0) + 1);
2167
+ }
2168
+ return map;
2169
+ }
2170
+ function jaccardScore(queryTokens, descTokens) {
2171
+ const qFreq = freqMap(queryTokens);
2172
+ const dFreq = freqMap(descTokens);
2173
+ let intersection = 0;
2174
+ let union = 0;
2175
+ const allTokens = /* @__PURE__ */ new Set([...qFreq.keys(), ...dFreq.keys()]);
2176
+ for (const token of allTokens) {
2177
+ const qCount = qFreq.get(token) || 0;
2178
+ const dCount = dFreq.get(token) || 0;
2179
+ intersection += Math.min(qCount, dCount);
2180
+ union += Math.max(qCount, dCount);
2181
+ }
2182
+ return union === 0 ? 0 : intersection / union;
2183
+ }
2184
+ function prefixScore(queryTokens, descTokens) {
2185
+ if (queryTokens.length === 0 || descTokens.length === 0) return 0;
2186
+ let matches = 0;
2187
+ for (const qt of queryTokens) {
2188
+ if (qt.length < 3) continue;
2189
+ for (const dt of descTokens) {
2190
+ if (dt.startsWith(qt) || qt.startsWith(dt)) {
2191
+ matches += 0.5;
2192
+ break;
2193
+ }
2194
+ }
2195
+ }
2196
+ return Math.min(matches / queryTokens.length, 0.3);
2197
+ }
2198
+ function roleBoost(queryTokens, elementRole) {
2199
+ const roleLower = elementRole.toLowerCase();
2200
+ for (const qt of queryTokens) {
2201
+ if (ROLE_KEYWORDS.has(qt) && roleLower.includes(qt)) {
2202
+ return 0.2;
2203
+ }
2204
+ }
2205
+ return 0;
2206
+ }
2207
+ function scoreElement(queryTokens, queryExpanded, element) {
2208
+ const descParts = [
2209
+ element.text,
2210
+ element.role,
2211
+ element.tag,
2212
+ element.ariaLabel
2213
+ ].filter(Boolean);
2214
+ const descText = descParts.join(" ");
2215
+ const descTokens = tokenize(descText);
2216
+ if (descTokens.length === 0) return 0;
2217
+ const descExpanded = expandSynonyms(descTokens);
2218
+ const expandedQueryTokens = [...queryExpanded];
2219
+ const expandedDescTokens = [...descExpanded];
2220
+ const jaccard = jaccardScore(expandedQueryTokens, expandedDescTokens);
2221
+ const prefix = prefixScore(queryTokens, descTokens);
2222
+ const role = roleBoost(queryTokens, element.role || element.tag);
2223
+ const queryStr = queryTokens.join(" ");
2224
+ const descStr = descTokens.join(" ");
2225
+ const exactBonus = descStr.includes(queryStr) ? 0.3 : 0;
2226
+ return Math.min(jaccard + prefix + role + exactBonus, 1);
2227
+ }
2228
+ function semanticFind(elements, query, options) {
2229
+ const maxResults = options?.maxResults ?? 5;
2230
+ const minScore = options?.minScore ?? 0.3;
2231
+ const queryTokens = tokenize(query);
2232
+ if (queryTokens.length === 0) return [];
2233
+ const queryExpanded = expandSynonyms(queryTokens);
2234
+ const scored = [];
2235
+ for (const el of elements) {
2236
+ const score = scoreElement(queryTokens, queryExpanded, el);
2237
+ if (score >= minScore) {
2238
+ scored.push({
2239
+ ref: el.index,
2240
+ score: Math.round(score * 100) / 100,
2241
+ text: (el.text || el.ariaLabel || "").slice(0, 60),
2242
+ role: el.role || el.tag,
2243
+ tag: el.tag
2244
+ });
2245
+ }
2246
+ }
2247
+ scored.sort((a, b) => b.score - a.score);
2248
+ return scored.slice(0, maxResults);
2249
+ }
2250
+
1168
2251
  // src/browser/page-adapter.ts
1169
2252
  var PuppeteerPage = class {
1170
2253
  page;
@@ -1192,7 +2275,10 @@ var PuppeteerPage = class {
1192
2275
  async evaluate(js) {
1193
2276
  return this.page.evaluate(js);
1194
2277
  }
1195
- async snapshot(_opts) {
2278
+ async snapshot(opts) {
2279
+ if (opts?.compact) {
2280
+ return this.page.evaluate(COMPACT_SNAPSHOT_SCRIPT);
2281
+ }
1196
2282
  return this.page.evaluate(SNAPSHOT_SCRIPT);
1197
2283
  }
1198
2284
  async semanticTree(_opts) {
@@ -1464,69 +2550,15 @@ var PuppeteerPage = class {
1464
2550
  active: p === this.page
1465
2551
  }));
1466
2552
  }
2553
+ async find(query, options) {
2554
+ const elements = await this.page.evaluate(INTERACTIVE_ELEMENTS_SCRIPT);
2555
+ return semanticFind(elements, query, options);
2556
+ }
1467
2557
  async close() {
1468
2558
  await this.page.close();
1469
2559
  }
1470
2560
  };
1471
2561
 
1472
- // src/browser/dom/interactive.ts
1473
- var INTERACTIVE_ELEMENTS_SCRIPT = `
1474
- (() => {
1475
- const results = [];
1476
-
1477
- function classify(el) {
1478
- const tag = el.tagName.toLowerCase();
1479
- const role = el.getAttribute('role');
1480
- const types = [];
1481
-
1482
- // Native interactive
1483
- if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
1484
- types.push('native');
1485
- }
1486
-
1487
- // ARIA role interactive
1488
- if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
1489
- types.push('aria');
1490
- }
1491
-
1492
- // Contenteditable
1493
- if (el.contentEditable === 'true') types.push('contenteditable');
1494
-
1495
- // Focusable
1496
- if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
1497
-
1498
- // Has click listener (approximate)
1499
- if (el.onclick) types.push('listener');
1500
-
1501
- return types;
1502
- }
1503
-
1504
- let idx = 0;
1505
- const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
1506
- let node;
1507
- while (node = walker.nextNode()) {
1508
- const types = classify(node);
1509
- if (types.length === 0) continue;
1510
-
1511
- const style = getComputedStyle(node);
1512
- if (style.display === 'none' || style.visibility === 'hidden') continue;
1513
-
1514
- const rect = node.getBoundingClientRect();
1515
- results.push({
1516
- index: idx++,
1517
- tag: node.tagName.toLowerCase(),
1518
- role: node.getAttribute('role') || '',
1519
- text: (node.textContent || '').trim().slice(0, 100),
1520
- types,
1521
- ariaLabel: node.getAttribute('aria-label') || '',
1522
- rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
1523
- });
1524
- }
1525
-
1526
- return results;
1527
- })()
1528
- `;
1529
-
1530
2562
  // src/browser/wait.ts
1531
2563
  async function waitForCondition(page, condition, timeout = 3e4) {
1532
2564
  switch (condition) {
@@ -1958,6 +2990,43 @@ async function lobsterFetch(url, options) {
1958
2990
  const timeout = options?.timeout || 3e4;
1959
2991
  const dump = options?.dump || "markdown";
1960
2992
  const start = Date.now();
2993
+ const { isPdfUrl: isPdfUrl2, isPdfResponse: isPdfResponse2, extractPdf: extractPdf2 } = await Promise.resolve().then(() => (init_pdf(), pdf_exports));
2994
+ if (isPdfUrl2(url)) {
2995
+ const pdfResult = await extractPdf2(url);
2996
+ const duration2 = Date.now() - start;
2997
+ let content2;
2998
+ switch (dump) {
2999
+ case "markdown":
3000
+ content2 = pdfResult.markdown;
3001
+ break;
3002
+ case "text":
3003
+ content2 = pdfResult.text;
3004
+ break;
3005
+ case "html":
3006
+ content2 = `<pre>${pdfResult.text}</pre>`;
3007
+ break;
3008
+ case "snapshot":
3009
+ content2 = `[PDF] ${pdfResult.metadata.title} (${pdfResult.metadata.pages} pages, ${pdfResult.wordCount} words)
3010
+
3011
+ ${pdfResult.text.slice(0, 5e3)}`;
3012
+ break;
3013
+ case "links":
3014
+ content2 = "";
3015
+ break;
3016
+ default:
3017
+ content2 = pdfResult.markdown;
3018
+ }
3019
+ return {
3020
+ url,
3021
+ finalUrl: url,
3022
+ title: pdfResult.metadata.title,
3023
+ content: content2,
3024
+ links: [],
3025
+ engine: "pdf",
3026
+ duration: duration2,
3027
+ statusCode: 200
3028
+ };
3029
+ }
1961
3030
  const resp = await fetch(url, {
1962
3031
  headers: {
1963
3032
  "User-Agent": "LobsterCLI/0.1 (+https://github.com/iexcalibur/lobster-cli)",
@@ -1971,6 +3040,48 @@ async function lobsterFetch(url, options) {
1971
3040
  if (!resp.ok) {
1972
3041
  throw new Error(`HTTP ${resp.status} ${resp.statusText}`);
1973
3042
  }
3043
+ const contentType = resp.headers.get("content-type") || "";
3044
+ if (isPdfResponse2(contentType)) {
3045
+ const arrayBuffer = await resp.arrayBuffer();
3046
+ const buffer = Buffer.from(arrayBuffer);
3047
+ const pdfMod = await import("pdf-parse");
3048
+ const pdfParseFn2 = pdfMod.PDFParse || pdfMod.default || pdfMod;
3049
+ const pdfResult = await pdfParseFn2(buffer);
3050
+ const info = pdfResult.info || {};
3051
+ const metadata = {
3052
+ title: info.Title || "untitled",
3053
+ author: info.Author || "",
3054
+ pages: pdfResult.numpages
3055
+ };
3056
+ const duration2 = Date.now() - start;
3057
+ const text = pdfResult.text || "";
3058
+ let content2;
3059
+ switch (dump) {
3060
+ case "text":
3061
+ content2 = text;
3062
+ break;
3063
+ case "html":
3064
+ content2 = `<pre>${text}</pre>`;
3065
+ break;
3066
+ case "snapshot":
3067
+ content2 = `[PDF] ${metadata.title} (${metadata.pages} pages)
3068
+
3069
+ ${text.slice(0, 5e3)}`;
3070
+ break;
3071
+ default:
3072
+ content2 = text;
3073
+ }
3074
+ return {
3075
+ url,
3076
+ finalUrl: resp.url || url,
3077
+ title: metadata.title,
3078
+ content: content2,
3079
+ links: [],
3080
+ engine: "pdf",
3081
+ duration: duration2,
3082
+ statusCode: 200
3083
+ };
3084
+ }
1974
3085
  const html = await resp.text();
1975
3086
  const duration = Date.now() - start;
1976
3087
  const finalUrl = resp.url || url;
@@ -2010,8 +3121,12 @@ async function lobsterFetch(url, options) {
2010
3121
  }
2011
3122
  return { url, finalUrl, status: resp.status, title, content, links, duration };
2012
3123
  }
3124
+
3125
+ // src/browser/index.ts
3126
+ init_pdf();
2013
3127
  export {
2014
3128
  BrowserManager,
3129
+ COMPACT_SNAPSHOT_SCRIPT,
2015
3130
  FLAT_TREE_SCRIPT,
2016
3131
  FORM_STATE_SCRIPT,
2017
3132
  GET_INTERCEPTED_SCRIPT,
@@ -2020,15 +3135,31 @@ export {
2020
3135
  PuppeteerPage,
2021
3136
  SEMANTIC_TREE_SCRIPT,
2022
3137
  SNAPSHOT_SCRIPT,
3138
+ STEALTH_ARGS,
3139
+ STEALTH_SCRIPT,
3140
+ buildCompactSnapshotScript,
2023
3141
  buildInterceptorScript,
2024
3142
  buildSnapshotScript,
3143
+ createProfile,
3144
+ discoverChrome,
2025
3145
  extractLinks,
2026
3146
  extractMarkdown,
3147
+ extractPdf,
2027
3148
  extractSnapshot,
2028
3149
  extractText,
2029
3150
  flatTreeToString,
3151
+ getProfileDataDir,
3152
+ injectStealth,
3153
+ isPdfResponse,
3154
+ isPdfUrl,
3155
+ listProfiles,
2030
3156
  lobsterFetch,
2031
3157
  parseHtml,
3158
+ removeProfile,
3159
+ resetProfileCache,
3160
+ resolveAttachTarget,
3161
+ semanticFind,
3162
+ tryExtractPdf,
2032
3163
  waitForCondition
2033
3164
  };
2034
3165
  //# sourceMappingURL=index.js.map