lobster-cli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +147 -269
- package/dist/browser/chrome-attach.js +102 -0
- package/dist/browser/chrome-attach.js.map +1 -0
- package/dist/browser/dom/compact-snapshot.js +162 -0
- package/dist/browser/dom/compact-snapshot.js.map +1 -0
- package/dist/browser/dom/index.js +160 -0
- package/dist/browser/dom/index.js.map +1 -1
- package/dist/browser/index.js +907 -70
- package/dist/browser/index.js.map +1 -1
- package/dist/browser/manager.js +443 -11
- package/dist/browser/manager.js.map +1 -1
- package/dist/browser/page-adapter.js +370 -1
- package/dist/browser/page-adapter.js.map +1 -1
- package/dist/browser/profiles.js +238 -0
- package/dist/browser/profiles.js.map +1 -0
- package/dist/browser/semantic-find.js +152 -0
- package/dist/browser/semantic-find.js.map +1 -0
- package/dist/browser/stealth.js +187 -0
- package/dist/browser/stealth.js.map +1 -0
- package/dist/config/index.js +8 -1
- package/dist/config/index.js.map +1 -1
- package/dist/config/schema.js +8 -1
- package/dist/config/schema.js.map +1 -1
- package/dist/domain-guard.js +103 -0
- package/dist/domain-guard.js.map +1 -0
- package/dist/index.js +851 -48
- package/dist/index.js.map +1 -1
- package/dist/lib.js +1141 -244
- package/dist/lib.js.map +1 -1
- package/dist/router/index.js +862 -61
- package/dist/router/index.js.map +1 -1
- package/package.json +2 -1
package/dist/browser/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// src/browser/manager.ts
|
|
2
2
|
import puppeteer from "puppeteer-core";
|
|
3
|
-
import { existsSync } from "fs";
|
|
3
|
+
import { existsSync as existsSync3 } from "fs";
|
|
4
4
|
|
|
5
5
|
// src/utils/logger.ts
|
|
6
6
|
import chalk from "chalk";
|
|
@@ -16,20 +16,509 @@ var log = {
|
|
|
16
16
|
dim: (msg) => console.log(chalk.dim(msg))
|
|
17
17
|
};
|
|
18
18
|
|
|
19
|
+
// src/browser/profiles.ts
|
|
20
|
+
import { existsSync as existsSync2, mkdirSync as mkdirSync2, readFileSync as readFileSync2, writeFileSync as writeFileSync2, readdirSync, rmSync, statSync } from "fs";
|
|
21
|
+
import { join as join2 } from "path";
|
|
22
|
+
|
|
23
|
+
// src/config/index.ts
|
|
24
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from "fs";
|
|
25
|
+
import { join } from "path";
|
|
26
|
+
import { homedir } from "os";
|
|
27
|
+
import yaml from "js-yaml";
|
|
28
|
+
|
|
29
|
+
// src/config/schema.ts
|
|
30
|
+
import { z } from "zod";
|
|
31
|
+
var configSchema = z.object({
|
|
32
|
+
llm: z.object({
|
|
33
|
+
provider: z.enum(["openai", "anthropic", "gemini", "ollama"]).default("openai"),
|
|
34
|
+
baseURL: z.string().default("https://api.openai.com/v1"),
|
|
35
|
+
model: z.string().default("gpt-4o"),
|
|
36
|
+
apiKey: z.string().default(""),
|
|
37
|
+
temperature: z.number().min(0).max(2).default(0.1),
|
|
38
|
+
maxRetries: z.number().int().min(0).default(3)
|
|
39
|
+
}).default({}),
|
|
40
|
+
browser: z.object({
|
|
41
|
+
executablePath: z.string().default(""),
|
|
42
|
+
headless: z.boolean().default(true),
|
|
43
|
+
connectTimeout: z.number().default(30),
|
|
44
|
+
commandTimeout: z.number().default(60),
|
|
45
|
+
cdpEndpoint: z.string().default(""),
|
|
46
|
+
profile: z.string().default(""),
|
|
47
|
+
stealth: z.boolean().default(false)
|
|
48
|
+
}).default({}),
|
|
49
|
+
agent: z.object({
|
|
50
|
+
maxSteps: z.number().int().default(40),
|
|
51
|
+
stepDelay: z.number().default(0.4)
|
|
52
|
+
}).default({}),
|
|
53
|
+
domains: z.object({
|
|
54
|
+
allow: z.array(z.string()).default([]),
|
|
55
|
+
block: z.array(z.string()).default([]),
|
|
56
|
+
blockMessage: z.string().default("")
|
|
57
|
+
}).default({}),
|
|
58
|
+
output: z.object({
|
|
59
|
+
defaultFormat: z.enum(["table", "json", "yaml", "markdown", "csv"]).default("table"),
|
|
60
|
+
color: z.boolean().default(true)
|
|
61
|
+
}).default({})
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
// src/config/index.ts
|
|
65
|
+
var CONFIG_DIR = join(homedir(), ".lobster");
|
|
66
|
+
var CONFIG_FILE = join(CONFIG_DIR, "config.yaml");
|
|
67
|
+
function getConfigDir() {
|
|
68
|
+
return CONFIG_DIR;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// src/browser/profiles.ts
|
|
72
|
+
var PROFILES_DIR = () => join2(getConfigDir(), "profiles");
|
|
73
|
+
var META_FILE = ".lobster-meta.json";
|
|
74
|
+
var VALID_NAME = /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,63}$/;
|
|
75
|
+
var RESERVED_NAMES = /* @__PURE__ */ new Set([
|
|
76
|
+
"default",
|
|
77
|
+
"system",
|
|
78
|
+
"con",
|
|
79
|
+
"prn",
|
|
80
|
+
"aux",
|
|
81
|
+
"nul",
|
|
82
|
+
"com1",
|
|
83
|
+
"com2",
|
|
84
|
+
"com3",
|
|
85
|
+
"com4",
|
|
86
|
+
"com5",
|
|
87
|
+
"com6",
|
|
88
|
+
"com7",
|
|
89
|
+
"com8",
|
|
90
|
+
"com9",
|
|
91
|
+
"lpt1",
|
|
92
|
+
"lpt2",
|
|
93
|
+
"lpt3",
|
|
94
|
+
"lpt4",
|
|
95
|
+
"lpt5",
|
|
96
|
+
"lpt6",
|
|
97
|
+
"lpt7",
|
|
98
|
+
"lpt8",
|
|
99
|
+
"lpt9"
|
|
100
|
+
]);
|
|
101
|
+
var CACHE_DIRS = [
|
|
102
|
+
"Cache",
|
|
103
|
+
"Code Cache",
|
|
104
|
+
"GPUCache",
|
|
105
|
+
"GrShaderCache",
|
|
106
|
+
"ShaderCache",
|
|
107
|
+
"Service Worker",
|
|
108
|
+
"Sessions",
|
|
109
|
+
"Session Storage",
|
|
110
|
+
"blob_storage"
|
|
111
|
+
];
|
|
112
|
+
function ensureProfilesDir() {
|
|
113
|
+
const dir = PROFILES_DIR();
|
|
114
|
+
if (!existsSync2(dir)) mkdirSync2(dir, { recursive: true });
|
|
115
|
+
}
|
|
116
|
+
function validateName(name) {
|
|
117
|
+
if (!VALID_NAME.test(name)) {
|
|
118
|
+
throw new Error(`Invalid profile name "${name}". Use only letters, numbers, hyphens, underscores (max 64 chars).`);
|
|
119
|
+
}
|
|
120
|
+
if (RESERVED_NAMES.has(name.toLowerCase())) {
|
|
121
|
+
throw new Error(`"${name}" is a reserved name. Choose a different profile name.`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
function getProfileDir(name) {
|
|
125
|
+
return join2(PROFILES_DIR(), name);
|
|
126
|
+
}
|
|
127
|
+
function readMeta(profileDir) {
|
|
128
|
+
const metaPath = join2(profileDir, META_FILE);
|
|
129
|
+
if (!existsSync2(metaPath)) return null;
|
|
130
|
+
try {
|
|
131
|
+
return JSON.parse(readFileSync2(metaPath, "utf-8"));
|
|
132
|
+
} catch {
|
|
133
|
+
return null;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
function writeMeta(profileDir, meta) {
|
|
137
|
+
writeFileSync2(join2(profileDir, META_FILE), JSON.stringify(meta, null, 2));
|
|
138
|
+
}
|
|
139
|
+
function getDirSizeMB(dirPath) {
|
|
140
|
+
let total = 0;
|
|
141
|
+
try {
|
|
142
|
+
const entries = readdirSync(dirPath, { withFileTypes: true });
|
|
143
|
+
for (const entry of entries) {
|
|
144
|
+
const fullPath = join2(dirPath, entry.name);
|
|
145
|
+
if (entry.isFile()) {
|
|
146
|
+
total += statSync(fullPath).size;
|
|
147
|
+
} else if (entry.isDirectory() && entry.name !== ".lobster-meta.json") {
|
|
148
|
+
total += getDirSizeMB(fullPath) * 1024 * 1024;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
} catch {
|
|
152
|
+
}
|
|
153
|
+
return Math.round(total / (1024 * 1024) * 10) / 10;
|
|
154
|
+
}
|
|
155
|
+
function createProfile(name) {
|
|
156
|
+
validateName(name);
|
|
157
|
+
ensureProfilesDir();
|
|
158
|
+
const dir = getProfileDir(name);
|
|
159
|
+
if (existsSync2(dir)) {
|
|
160
|
+
throw new Error(`Profile "${name}" already exists.`);
|
|
161
|
+
}
|
|
162
|
+
mkdirSync2(dir, { recursive: true });
|
|
163
|
+
const meta = {
|
|
164
|
+
name,
|
|
165
|
+
createdAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
166
|
+
lastUsed: (/* @__PURE__ */ new Date()).toISOString()
|
|
167
|
+
};
|
|
168
|
+
writeMeta(dir, meta);
|
|
169
|
+
log.success(`Profile "${name}" created at ${dir}`);
|
|
170
|
+
return meta;
|
|
171
|
+
}
|
|
172
|
+
function listProfiles() {
|
|
173
|
+
ensureProfilesDir();
|
|
174
|
+
const dir = PROFILES_DIR();
|
|
175
|
+
const profiles = [];
|
|
176
|
+
try {
|
|
177
|
+
const entries = readdirSync(dir, { withFileTypes: true });
|
|
178
|
+
for (const entry of entries) {
|
|
179
|
+
if (!entry.isDirectory()) continue;
|
|
180
|
+
const profileDir = join2(dir, entry.name);
|
|
181
|
+
const meta = readMeta(profileDir);
|
|
182
|
+
if (meta) {
|
|
183
|
+
meta.sizeMB = getDirSizeMB(profileDir);
|
|
184
|
+
profiles.push(meta);
|
|
185
|
+
} else {
|
|
186
|
+
profiles.push({
|
|
187
|
+
name: entry.name,
|
|
188
|
+
createdAt: "unknown",
|
|
189
|
+
lastUsed: "unknown",
|
|
190
|
+
sizeMB: getDirSizeMB(profileDir)
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
} catch {
|
|
195
|
+
}
|
|
196
|
+
return profiles.sort((a, b) => a.name.localeCompare(b.name));
|
|
197
|
+
}
|
|
198
|
+
function removeProfile(name) {
|
|
199
|
+
const dir = getProfileDir(name);
|
|
200
|
+
if (!existsSync2(dir)) {
|
|
201
|
+
throw new Error(`Profile "${name}" does not exist.`);
|
|
202
|
+
}
|
|
203
|
+
rmSync(dir, { recursive: true, force: true });
|
|
204
|
+
log.success(`Profile "${name}" deleted.`);
|
|
205
|
+
}
|
|
206
|
+
function getProfileDataDir(name) {
|
|
207
|
+
validateName(name);
|
|
208
|
+
const dir = getProfileDir(name);
|
|
209
|
+
if (!existsSync2(dir)) {
|
|
210
|
+
createProfile(name);
|
|
211
|
+
} else {
|
|
212
|
+
const meta = readMeta(dir) || { name, createdAt: "unknown", lastUsed: "" };
|
|
213
|
+
meta.lastUsed = (/* @__PURE__ */ new Date()).toISOString();
|
|
214
|
+
writeMeta(dir, meta);
|
|
215
|
+
}
|
|
216
|
+
return dir;
|
|
217
|
+
}
|
|
218
|
+
function resetProfileCache(name) {
|
|
219
|
+
const dir = getProfileDir(name);
|
|
220
|
+
if (!existsSync2(dir)) {
|
|
221
|
+
throw new Error(`Profile "${name}" does not exist.`);
|
|
222
|
+
}
|
|
223
|
+
let cleaned = 0;
|
|
224
|
+
for (const cacheDir of CACHE_DIRS) {
|
|
225
|
+
for (const base of [dir, join2(dir, "Default")]) {
|
|
226
|
+
const target = join2(base, cacheDir);
|
|
227
|
+
if (existsSync2(target)) {
|
|
228
|
+
rmSync(target, { recursive: true, force: true });
|
|
229
|
+
cleaned++;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
log.success(`Profile "${name}" cache reset (${cleaned} directories cleaned).`);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// src/browser/chrome-attach.ts
|
|
237
|
+
import http from "http";
|
|
238
|
+
var DEFAULT_PORTS = [9222, 9229, 9333, 9515];
|
|
239
|
+
var PROBE_TIMEOUT = 1500;
|
|
240
|
+
function probePort(port) {
|
|
241
|
+
return new Promise((resolve) => {
|
|
242
|
+
const req = http.get(`http://127.0.0.1:${port}/json/version`, {
|
|
243
|
+
timeout: PROBE_TIMEOUT
|
|
244
|
+
}, (res) => {
|
|
245
|
+
let data = "";
|
|
246
|
+
res.on("data", (chunk) => {
|
|
247
|
+
data += chunk;
|
|
248
|
+
});
|
|
249
|
+
res.on("end", () => {
|
|
250
|
+
try {
|
|
251
|
+
const info = JSON.parse(data);
|
|
252
|
+
if (info.webSocketDebuggerUrl) {
|
|
253
|
+
resolve({
|
|
254
|
+
wsEndpoint: info.webSocketDebuggerUrl,
|
|
255
|
+
port,
|
|
256
|
+
version: info["Protocol-Version"] || "",
|
|
257
|
+
browser: info.Browser || ""
|
|
258
|
+
});
|
|
259
|
+
} else {
|
|
260
|
+
resolve(null);
|
|
261
|
+
}
|
|
262
|
+
} catch {
|
|
263
|
+
resolve(null);
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
});
|
|
267
|
+
req.on("error", () => resolve(null));
|
|
268
|
+
req.on("timeout", () => {
|
|
269
|
+
req.destroy();
|
|
270
|
+
resolve(null);
|
|
271
|
+
});
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
async function discoverChrome(ports) {
|
|
275
|
+
const portsToCheck = ports || DEFAULT_PORTS;
|
|
276
|
+
log.debug(`Scanning ports for Chrome: ${portsToCheck.join(", ")}`);
|
|
277
|
+
const results = await Promise.all(portsToCheck.map(probePort));
|
|
278
|
+
const found = results.find(Boolean) || null;
|
|
279
|
+
if (found) {
|
|
280
|
+
log.info(`Found Chrome on port ${found.port}: ${found.browser}`);
|
|
281
|
+
} else {
|
|
282
|
+
log.debug("No running Chrome instance found on debug ports.");
|
|
283
|
+
}
|
|
284
|
+
return found;
|
|
285
|
+
}
|
|
286
|
+
async function getWebSocketDebuggerUrl(port) {
|
|
287
|
+
const result = await probePort(port);
|
|
288
|
+
return result?.wsEndpoint || null;
|
|
289
|
+
}
|
|
290
|
+
async function resolveAttachTarget(target) {
|
|
291
|
+
if (target === true || target === "true") {
|
|
292
|
+
const result = await discoverChrome();
|
|
293
|
+
if (!result) {
|
|
294
|
+
throw new Error(
|
|
295
|
+
"No running Chrome found. Start Chrome with:\n google-chrome --remote-debugging-port=9222\n # or on Mac:\n /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222"
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
return result.wsEndpoint;
|
|
299
|
+
}
|
|
300
|
+
if (typeof target === "string") {
|
|
301
|
+
if (target.startsWith("ws://") || target.startsWith("wss://")) {
|
|
302
|
+
return target;
|
|
303
|
+
}
|
|
304
|
+
const port = parseInt(target, 10);
|
|
305
|
+
if (!isNaN(port) && port > 0 && port < 65536) {
|
|
306
|
+
const url = await getWebSocketDebuggerUrl(port);
|
|
307
|
+
if (!url) {
|
|
308
|
+
throw new Error(`No Chrome found on port ${port}. Make sure Chrome is running with --remote-debugging-port=${port}`);
|
|
309
|
+
}
|
|
310
|
+
return url;
|
|
311
|
+
}
|
|
312
|
+
throw new Error(`Invalid attach target: "${target}". Use "true" for auto-discover, a port number, or a ws:// URL.`);
|
|
313
|
+
}
|
|
314
|
+
throw new Error("Invalid attach target.");
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// src/browser/stealth.ts
|
|
318
|
+
var STEALTH_SCRIPT = `
|
|
319
|
+
(() => {
|
|
320
|
+
// \u2500\u2500 1. navigator.webdriver removal \u2500\u2500
|
|
321
|
+
// Most important: this is the #1 detection vector
|
|
322
|
+
Object.defineProperty(navigator, 'webdriver', {
|
|
323
|
+
get: () => undefined,
|
|
324
|
+
configurable: true,
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
// Also delete from prototype
|
|
328
|
+
delete Object.getPrototypeOf(navigator).webdriver;
|
|
329
|
+
|
|
330
|
+
// \u2500\u2500 2. CDP marker removal \u2500\u2500
|
|
331
|
+
// Chrome DevTools Protocol injects cdc_* properties on window
|
|
332
|
+
for (const key of Object.keys(window)) {
|
|
333
|
+
if (/^cdc_|^__webdriver|^__selenium|^__driver/.test(key)) {
|
|
334
|
+
try { delete window[key]; } catch {}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
// \u2500\u2500 3. Chrome runtime spoofing \u2500\u2500
|
|
339
|
+
// Real Chrome has window.chrome with runtime, loadTimes, csi
|
|
340
|
+
if (!window.chrome) {
|
|
341
|
+
window.chrome = {};
|
|
342
|
+
}
|
|
343
|
+
if (!window.chrome.runtime) {
|
|
344
|
+
window.chrome.runtime = {
|
|
345
|
+
connect: function() {},
|
|
346
|
+
sendMessage: function() {},
|
|
347
|
+
onMessage: { addListener: function() {} },
|
|
348
|
+
id: undefined,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
if (!window.chrome.loadTimes) {
|
|
352
|
+
window.chrome.loadTimes = function() {
|
|
353
|
+
return {
|
|
354
|
+
commitLoadTime: Date.now() / 1000 - 0.5,
|
|
355
|
+
connectionInfo: 'h2',
|
|
356
|
+
finishDocumentLoadTime: Date.now() / 1000 - 0.1,
|
|
357
|
+
finishLoadTime: Date.now() / 1000 - 0.05,
|
|
358
|
+
firstPaintAfterLoadTime: 0,
|
|
359
|
+
firstPaintTime: Date.now() / 1000 - 0.3,
|
|
360
|
+
navigationType: 'Other',
|
|
361
|
+
npnNegotiatedProtocol: 'h2',
|
|
362
|
+
requestTime: Date.now() / 1000 - 1,
|
|
363
|
+
startLoadTime: Date.now() / 1000 - 0.8,
|
|
364
|
+
wasAlternateProtocolAvailable: false,
|
|
365
|
+
wasFetchedViaSpdy: true,
|
|
366
|
+
wasNpnNegotiated: true,
|
|
367
|
+
};
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
if (!window.chrome.csi) {
|
|
371
|
+
window.chrome.csi = function() {
|
|
372
|
+
return {
|
|
373
|
+
onloadT: Date.now(),
|
|
374
|
+
startE: Date.now() - 500,
|
|
375
|
+
pageT: 500,
|
|
376
|
+
tran: 15,
|
|
377
|
+
};
|
|
378
|
+
};
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// \u2500\u2500 4. Plugin array spoofing \u2500\u2500
|
|
382
|
+
// Headless Chrome reports empty plugins; real Chrome has at least 2
|
|
383
|
+
const fakePlugins = [
|
|
384
|
+
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format', length: 1 },
|
|
385
|
+
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '', length: 1 },
|
|
386
|
+
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '', length: 2 },
|
|
387
|
+
];
|
|
388
|
+
|
|
389
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
390
|
+
get: () => {
|
|
391
|
+
const arr = fakePlugins.map(p => {
|
|
392
|
+
const plugin = { ...p, item: (i) => plugin, namedItem: (n) => plugin };
|
|
393
|
+
return plugin;
|
|
394
|
+
});
|
|
395
|
+
arr.item = (i) => arr[i];
|
|
396
|
+
arr.namedItem = (n) => arr.find(p => p.name === n);
|
|
397
|
+
arr.refresh = () => {};
|
|
398
|
+
return arr;
|
|
399
|
+
},
|
|
400
|
+
});
|
|
401
|
+
|
|
402
|
+
// \u2500\u2500 5. Languages \u2500\u2500
|
|
403
|
+
Object.defineProperty(navigator, 'languages', {
|
|
404
|
+
get: () => ['en-US', 'en'],
|
|
405
|
+
});
|
|
406
|
+
Object.defineProperty(navigator, 'language', {
|
|
407
|
+
get: () => 'en-US',
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
// \u2500\u2500 6. Platform consistency \u2500\u2500
|
|
411
|
+
// Ensure platform matches user agent
|
|
412
|
+
const platform = navigator.userAgent.includes('Mac') ? 'MacIntel' :
|
|
413
|
+
navigator.userAgent.includes('Win') ? 'Win32' :
|
|
414
|
+
navigator.userAgent.includes('Linux') ? 'Linux x86_64' : navigator.platform;
|
|
415
|
+
Object.defineProperty(navigator, 'platform', { get: () => platform });
|
|
416
|
+
|
|
417
|
+
// \u2500\u2500 7. Hardware concurrency & device memory \u2500\u2500
|
|
418
|
+
// Headless often reports unusual values
|
|
419
|
+
if (navigator.hardwareConcurrency < 2) {
|
|
420
|
+
Object.defineProperty(navigator, 'hardwareConcurrency', { get: () => 8 });
|
|
421
|
+
}
|
|
422
|
+
if (!navigator.deviceMemory || navigator.deviceMemory < 2) {
|
|
423
|
+
Object.defineProperty(navigator, 'deviceMemory', { get: () => 8 });
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// \u2500\u2500 8. WebGL vendor/renderer spoofing \u2500\u2500
|
|
427
|
+
// Headless reports "Google SwiftShader" which is a dead giveaway
|
|
428
|
+
const origGetParameter = WebGLRenderingContext.prototype.getParameter;
|
|
429
|
+
WebGLRenderingContext.prototype.getParameter = function(param) {
|
|
430
|
+
// UNMASKED_VENDOR_WEBGL
|
|
431
|
+
if (param === 0x9245) return 'Intel Inc.';
|
|
432
|
+
// UNMASKED_RENDERER_WEBGL
|
|
433
|
+
if (param === 0x9246) return 'Intel Iris OpenGL Engine';
|
|
434
|
+
return origGetParameter.call(this, param);
|
|
435
|
+
};
|
|
436
|
+
|
|
437
|
+
// Also for WebGL2
|
|
438
|
+
if (typeof WebGL2RenderingContext !== 'undefined') {
|
|
439
|
+
const origGetParameter2 = WebGL2RenderingContext.prototype.getParameter;
|
|
440
|
+
WebGL2RenderingContext.prototype.getParameter = function(param) {
|
|
441
|
+
if (param === 0x9245) return 'Intel Inc.';
|
|
442
|
+
if (param === 0x9246) return 'Intel Iris OpenGL Engine';
|
|
443
|
+
return origGetParameter2.call(this, param);
|
|
444
|
+
};
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// \u2500\u2500 9. Canvas fingerprint noise \u2500\u2500
|
|
448
|
+
// Adds subtle deterministic noise to canvas output based on domain
|
|
449
|
+
const seed = location.hostname.split('').reduce((a, c) => a + c.charCodeAt(0), 0);
|
|
450
|
+
const origToDataURL = HTMLCanvasElement.prototype.toDataURL;
|
|
451
|
+
HTMLCanvasElement.prototype.toDataURL = function(type) {
|
|
452
|
+
const ctx = this.getContext('2d');
|
|
453
|
+
if (ctx && this.width > 0 && this.height > 0) {
|
|
454
|
+
try {
|
|
455
|
+
const imageData = ctx.getImageData(0, 0, 1, 1);
|
|
456
|
+
// Flip a single pixel with seeded noise
|
|
457
|
+
imageData.data[0] = (imageData.data[0] + seed) % 256;
|
|
458
|
+
ctx.putImageData(imageData, 0, 0);
|
|
459
|
+
} catch {}
|
|
460
|
+
}
|
|
461
|
+
return origToDataURL.apply(this, arguments);
|
|
462
|
+
};
|
|
463
|
+
|
|
464
|
+
// \u2500\u2500 10. Permissions API \u2500\u2500
|
|
465
|
+
// Headless returns 'denied' for notifications; real Chrome returns 'prompt'
|
|
466
|
+
const origQuery = navigator.permissions?.query?.bind(navigator.permissions);
|
|
467
|
+
if (origQuery) {
|
|
468
|
+
navigator.permissions.query = function(descriptor) {
|
|
469
|
+
if (descriptor.name === 'notifications') {
|
|
470
|
+
return Promise.resolve({ state: Notification.permission || 'prompt', onchange: null });
|
|
471
|
+
}
|
|
472
|
+
return origQuery(descriptor);
|
|
473
|
+
};
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// \u2500\u2500 11. Notification constructor \u2500\u2500
|
|
477
|
+
if (!window.Notification) {
|
|
478
|
+
window.Notification = function() {};
|
|
479
|
+
window.Notification.permission = 'default';
|
|
480
|
+
window.Notification.requestPermission = () => Promise.resolve('default');
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
// \u2500\u2500 12. Connection type \u2500\u2500
|
|
484
|
+
if (navigator.connection) {
|
|
485
|
+
Object.defineProperty(navigator.connection, 'rtt', { get: () => 50 });
|
|
486
|
+
}
|
|
487
|
+
})()
|
|
488
|
+
`;
|
|
489
|
+
async function injectStealth(page) {
|
|
490
|
+
await page.evaluateOnNewDocument(STEALTH_SCRIPT);
|
|
491
|
+
}
|
|
492
|
+
var STEALTH_ARGS = [
|
|
493
|
+
"--disable-blink-features=AutomationControlled",
|
|
494
|
+
"--disable-features=IsolateOrigins,site-per-process",
|
|
495
|
+
"--disable-infobars",
|
|
496
|
+
"--window-size=1920,1080"
|
|
497
|
+
];
|
|
498
|
+
|
|
19
499
|
// src/browser/manager.ts
|
|
20
500
|
var BrowserManager = class {
|
|
21
501
|
browser = null;
|
|
22
502
|
config;
|
|
503
|
+
isAttached = false;
|
|
23
504
|
constructor(config = {}) {
|
|
24
505
|
this.config = config;
|
|
25
506
|
}
|
|
26
507
|
async connect() {
|
|
27
508
|
if (this.browser?.connected) return this.browser;
|
|
509
|
+
if (this.config.attach) {
|
|
510
|
+
const wsEndpoint = await resolveAttachTarget(this.config.attach);
|
|
511
|
+
log.info(`Attaching to Chrome: ${wsEndpoint}`);
|
|
512
|
+
this.browser = await puppeteer.connect({ browserWSEndpoint: wsEndpoint });
|
|
513
|
+
this.isAttached = true;
|
|
514
|
+
return this.browser;
|
|
515
|
+
}
|
|
28
516
|
if (this.config.cdpEndpoint) {
|
|
29
517
|
log.debug(`Connecting to CDP endpoint: ${this.config.cdpEndpoint}`);
|
|
30
518
|
this.browser = await puppeteer.connect({
|
|
31
519
|
browserWSEndpoint: this.config.cdpEndpoint
|
|
32
520
|
});
|
|
521
|
+
this.isAttached = true;
|
|
33
522
|
return this.browser;
|
|
34
523
|
}
|
|
35
524
|
const executablePath = this.config.executablePath || findChrome();
|
|
@@ -38,27 +527,48 @@ var BrowserManager = class {
|
|
|
38
527
|
"Chrome/Chromium not found. Set LOBSTER_BROWSER_PATH or config browser.executablePath"
|
|
39
528
|
);
|
|
40
529
|
}
|
|
530
|
+
const args = [
|
|
531
|
+
"--no-sandbox",
|
|
532
|
+
"--disable-setuid-sandbox",
|
|
533
|
+
"--disable-dev-shm-usage",
|
|
534
|
+
"--disable-gpu"
|
|
535
|
+
];
|
|
536
|
+
if (this.config.stealth) {
|
|
537
|
+
args.push(...STEALTH_ARGS);
|
|
538
|
+
}
|
|
539
|
+
let userDataDir;
|
|
540
|
+
if (this.config.profile) {
|
|
541
|
+
userDataDir = getProfileDataDir(this.config.profile);
|
|
542
|
+
log.info(`Using profile "${this.config.profile}" \u2192 ${userDataDir}`);
|
|
543
|
+
}
|
|
41
544
|
log.debug(`Launching Chrome: ${executablePath}`);
|
|
42
545
|
this.browser = await puppeteer.launch({
|
|
43
546
|
executablePath,
|
|
44
547
|
headless: this.config.headless ?? true,
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
"--disable-setuid-sandbox",
|
|
48
|
-
"--disable-dev-shm-usage",
|
|
49
|
-
"--disable-gpu"
|
|
50
|
-
]
|
|
548
|
+
userDataDir,
|
|
549
|
+
args
|
|
51
550
|
});
|
|
551
|
+
this.isAttached = false;
|
|
52
552
|
return this.browser;
|
|
53
553
|
}
|
|
54
554
|
async newPage() {
|
|
55
555
|
const browser = await this.connect();
|
|
56
|
-
|
|
556
|
+
const page = await browser.newPage();
|
|
557
|
+
if (this.config.stealth) {
|
|
558
|
+
await injectStealth(page);
|
|
559
|
+
log.debug("Stealth mode enabled");
|
|
560
|
+
}
|
|
561
|
+
return page;
|
|
57
562
|
}
|
|
58
563
|
async close() {
|
|
59
564
|
if (this.browser) {
|
|
60
|
-
|
|
61
|
-
|
|
565
|
+
if (this.isAttached) {
|
|
566
|
+
this.browser.disconnect();
|
|
567
|
+
log.debug("Disconnected from Chrome (attached mode)");
|
|
568
|
+
} else {
|
|
569
|
+
await this.browser.close().catch(() => {
|
|
570
|
+
});
|
|
571
|
+
}
|
|
62
572
|
this.browser = null;
|
|
63
573
|
}
|
|
64
574
|
}
|
|
@@ -78,7 +588,7 @@ function findChrome() {
|
|
|
78
588
|
"/usr/bin/chromium",
|
|
79
589
|
"/snap/bin/chromium"
|
|
80
590
|
];
|
|
81
|
-
return paths.find((p) =>
|
|
591
|
+
return paths.find((p) => existsSync3(p));
|
|
82
592
|
}
|
|
83
593
|
|
|
84
594
|
// src/browser/dom/flat-tree.ts
|
|
@@ -584,6 +1094,164 @@ var SNAPSHOT_SCRIPT = `
|
|
|
584
1094
|
})()
|
|
585
1095
|
`;
|
|
586
1096
|
|
|
1097
|
+
// src/browser/dom/compact-snapshot.ts
|
|
1098
|
+
var COMPACT_SNAPSHOT_SCRIPT = `
|
|
1099
|
+
(() => {
|
|
1100
|
+
const TOKEN_BUDGET = 800;
|
|
1101
|
+
const CHARS_PER_TOKEN = 4;
|
|
1102
|
+
|
|
1103
|
+
const INTERACTIVE_TAGS = new Set([
|
|
1104
|
+
'a','button','input','select','textarea','details','summary','label',
|
|
1105
|
+
]);
|
|
1106
|
+
const INTERACTIVE_ROLES = new Set([
|
|
1107
|
+
'button','link','textbox','checkbox','radio','combobox','listbox',
|
|
1108
|
+
'menu','menuitem','tab','switch','slider','searchbox','spinbutton',
|
|
1109
|
+
'option','menuitemcheckbox','menuitemradio','treeitem',
|
|
1110
|
+
]);
|
|
1111
|
+
const LANDMARK_TAGS = new Map([
|
|
1112
|
+
['nav', 'Navigation'],
|
|
1113
|
+
['main', 'Main Content'],
|
|
1114
|
+
['header', 'Header'],
|
|
1115
|
+
['footer', 'Footer'],
|
|
1116
|
+
['aside', 'Sidebar'],
|
|
1117
|
+
['form', 'Form'],
|
|
1118
|
+
]);
|
|
1119
|
+
const LANDMARK_ROLES = new Map([
|
|
1120
|
+
['navigation', 'Navigation'],
|
|
1121
|
+
['main', 'Main Content'],
|
|
1122
|
+
['banner', 'Header'],
|
|
1123
|
+
['contentinfo', 'Footer'],
|
|
1124
|
+
['complementary', 'Sidebar'],
|
|
1125
|
+
['search', 'Search'],
|
|
1126
|
+
['dialog', 'Dialog'],
|
|
1127
|
+
]);
|
|
1128
|
+
|
|
1129
|
+
function isVisible(el) {
|
|
1130
|
+
if (el.offsetWidth === 0 && el.offsetHeight === 0 && el.tagName !== 'INPUT') return false;
|
|
1131
|
+
const s = getComputedStyle(el);
|
|
1132
|
+
return s.display !== 'none' && s.visibility !== 'hidden' && s.opacity !== '0';
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
function isInteractive(el) {
|
|
1136
|
+
const tag = el.tagName.toLowerCase();
|
|
1137
|
+
if (INTERACTIVE_TAGS.has(tag)) {
|
|
1138
|
+
if (el.disabled) return false;
|
|
1139
|
+
if (tag === 'input' && el.type === 'hidden') return false;
|
|
1140
|
+
return true;
|
|
1141
|
+
}
|
|
1142
|
+
const role = el.getAttribute('role');
|
|
1143
|
+
if (role && INTERACTIVE_ROLES.has(role)) return true;
|
|
1144
|
+
if (el.contentEditable === 'true') return true;
|
|
1145
|
+
if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) return true;
|
|
1146
|
+
return false;
|
|
1147
|
+
}
|
|
1148
|
+
|
|
1149
|
+
function getRole(el) {
|
|
1150
|
+
const role = el.getAttribute('role');
|
|
1151
|
+
if (role) return role;
|
|
1152
|
+
const tag = el.tagName.toLowerCase();
|
|
1153
|
+
if (tag === 'a') return 'link';
|
|
1154
|
+
if (tag === 'button' || tag === 'summary') return 'button';
|
|
1155
|
+
if (tag === 'input') return el.type || 'text';
|
|
1156
|
+
if (tag === 'select') return 'select';
|
|
1157
|
+
if (tag === 'textarea') return 'textarea';
|
|
1158
|
+
if (tag === 'label') return 'label';
|
|
1159
|
+
return tag;
|
|
1160
|
+
}
|
|
1161
|
+
|
|
1162
|
+
function getName(el) {
|
|
1163
|
+
return (
|
|
1164
|
+
el.getAttribute('aria-label') ||
|
|
1165
|
+
el.getAttribute('alt') ||
|
|
1166
|
+
el.getAttribute('title') ||
|
|
1167
|
+
el.getAttribute('placeholder') ||
|
|
1168
|
+
(el.tagName === 'INPUT' && (el.type === 'submit' || el.type === 'button') ? el.value : '') ||
|
|
1169
|
+
(el.id ? document.querySelector('label[for="' + el.id + '"]')?.textContent?.trim() : '') ||
|
|
1170
|
+
(el.children.length <= 2 ? el.textContent?.trim() : '') ||
|
|
1171
|
+
''
|
|
1172
|
+
).slice(0, 60);
|
|
1173
|
+
}
|
|
1174
|
+
|
|
1175
|
+
function getValue(el) {
|
|
1176
|
+
const tag = el.tagName.toLowerCase();
|
|
1177
|
+
if (tag === 'input') {
|
|
1178
|
+
const type = el.type || 'text';
|
|
1179
|
+
if (type === 'checkbox' || type === 'radio') return el.checked ? 'checked' : 'unchecked';
|
|
1180
|
+
if (type === 'password') return el.value ? '****' : '';
|
|
1181
|
+
return el.value ? el.value.slice(0, 30) : '';
|
|
1182
|
+
}
|
|
1183
|
+
if (tag === 'textarea') return el.value ? el.value.slice(0, 30) : '';
|
|
1184
|
+
if (tag === 'select' && el.selectedOptions?.length) return el.selectedOptions[0].text.slice(0, 30);
|
|
1185
|
+
return '';
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
// Collect elements
|
|
1189
|
+
let idx = 0;
|
|
1190
|
+
let charsUsed = 0;
|
|
1191
|
+
const lines = [];
|
|
1192
|
+
let lastLandmark = '';
|
|
1193
|
+
|
|
1194
|
+
// Page header
|
|
1195
|
+
const scrollY = window.scrollY;
|
|
1196
|
+
const scrollMax = document.documentElement.scrollHeight - window.innerHeight;
|
|
1197
|
+
const scrollPct = scrollMax > 0 ? Math.round((scrollY / scrollMax) * 100) : 0;
|
|
1198
|
+
const header = 'url: ' + location.href + ' | scroll: ' + scrollPct + '%';
|
|
1199
|
+
lines.push(header);
|
|
1200
|
+
charsUsed += header.length;
|
|
1201
|
+
|
|
1202
|
+
// Walk DOM
|
|
1203
|
+
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
|
|
1204
|
+
let node;
|
|
1205
|
+
while ((node = walker.nextNode())) {
|
|
1206
|
+
if (!isVisible(node)) continue;
|
|
1207
|
+
|
|
1208
|
+
const tag = node.tagName.toLowerCase();
|
|
1209
|
+
if (['script','style','noscript','svg','path','meta','link','head','template'].includes(tag)) continue;
|
|
1210
|
+
|
|
1211
|
+
// Check for landmark
|
|
1212
|
+
const role = node.getAttribute('role');
|
|
1213
|
+
const landmark = LANDMARK_TAGS.get(tag) || (role ? LANDMARK_ROLES.get(role) : null);
|
|
1214
|
+
if (landmark && landmark !== lastLandmark) {
|
|
1215
|
+
const sectionLine = '--- ' + landmark + ' ---';
|
|
1216
|
+
if (charsUsed + sectionLine.length > TOKEN_BUDGET * CHARS_PER_TOKEN) break;
|
|
1217
|
+
lines.push(sectionLine);
|
|
1218
|
+
charsUsed += sectionLine.length;
|
|
1219
|
+
lastLandmark = landmark;
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
// Only emit interactive elements
|
|
1223
|
+
if (!isInteractive(node)) continue;
|
|
1224
|
+
|
|
1225
|
+
const elRole = getRole(node);
|
|
1226
|
+
const name = getName(node);
|
|
1227
|
+
const value = getValue(node);
|
|
1228
|
+
|
|
1229
|
+
// Build compact line
|
|
1230
|
+
let line = '[' + idx + '] ' + elRole;
|
|
1231
|
+
if (name) line += ' "' + name.replace(/"/g, "'") + '"';
|
|
1232
|
+
if (value) line += ' val="' + value.replace(/"/g, "'") + '"';
|
|
1233
|
+
|
|
1234
|
+
// Check token budget
|
|
1235
|
+
if (charsUsed + line.length > TOKEN_BUDGET * CHARS_PER_TOKEN) {
|
|
1236
|
+
lines.push('... (' + (document.querySelectorAll('a,button,input,select,textarea,[role]').length - idx) + ' more elements)');
|
|
1237
|
+
break;
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
// Annotate element with ref for clicking
|
|
1241
|
+
try { node.dataset.ref = String(idx); } catch {}
|
|
1242
|
+
|
|
1243
|
+
lines.push(line);
|
|
1244
|
+
charsUsed += line.length;
|
|
1245
|
+
idx++;
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
return lines.join('\\n');
|
|
1249
|
+
})()
|
|
1250
|
+
`;
|
|
1251
|
+
function buildCompactSnapshotScript(tokenBudget = 800) {
|
|
1252
|
+
return COMPACT_SNAPSHOT_SCRIPT.replace("const TOKEN_BUDGET = 800;", `const TOKEN_BUDGET = ${tokenBudget};`);
|
|
1253
|
+
}
|
|
1254
|
+
|
|
587
1255
|
// src/browser/dom/semantic-tree.ts
|
|
588
1256
|
var SEMANTIC_TREE_SCRIPT = `
|
|
589
1257
|
(() => {
|
|
@@ -1109,6 +1777,64 @@ var FORM_STATE_SCRIPT = `
|
|
|
1109
1777
|
})()
|
|
1110
1778
|
`;
|
|
1111
1779
|
|
|
1780
|
+
// src/browser/dom/interactive.ts
|
|
1781
|
+
var INTERACTIVE_ELEMENTS_SCRIPT = `
|
|
1782
|
+
(() => {
|
|
1783
|
+
const results = [];
|
|
1784
|
+
|
|
1785
|
+
function classify(el) {
|
|
1786
|
+
const tag = el.tagName.toLowerCase();
|
|
1787
|
+
const role = el.getAttribute('role');
|
|
1788
|
+
const types = [];
|
|
1789
|
+
|
|
1790
|
+
// Native interactive
|
|
1791
|
+
if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
|
|
1792
|
+
types.push('native');
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1795
|
+
// ARIA role interactive
|
|
1796
|
+
if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
|
|
1797
|
+
types.push('aria');
|
|
1798
|
+
}
|
|
1799
|
+
|
|
1800
|
+
// Contenteditable
|
|
1801
|
+
if (el.contentEditable === 'true') types.push('contenteditable');
|
|
1802
|
+
|
|
1803
|
+
// Focusable
|
|
1804
|
+
if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
|
|
1805
|
+
|
|
1806
|
+
// Has click listener (approximate)
|
|
1807
|
+
if (el.onclick) types.push('listener');
|
|
1808
|
+
|
|
1809
|
+
return types;
|
|
1810
|
+
}
|
|
1811
|
+
|
|
1812
|
+
let idx = 0;
|
|
1813
|
+
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
|
|
1814
|
+
let node;
|
|
1815
|
+
while (node = walker.nextNode()) {
|
|
1816
|
+
const types = classify(node);
|
|
1817
|
+
if (types.length === 0) continue;
|
|
1818
|
+
|
|
1819
|
+
const style = getComputedStyle(node);
|
|
1820
|
+
if (style.display === 'none' || style.visibility === 'hidden') continue;
|
|
1821
|
+
|
|
1822
|
+
const rect = node.getBoundingClientRect();
|
|
1823
|
+
results.push({
|
|
1824
|
+
index: idx++,
|
|
1825
|
+
tag: node.tagName.toLowerCase(),
|
|
1826
|
+
role: node.getAttribute('role') || '',
|
|
1827
|
+
text: (node.textContent || '').trim().slice(0, 100),
|
|
1828
|
+
types,
|
|
1829
|
+
ariaLabel: node.getAttribute('aria-label') || '',
|
|
1830
|
+
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
1831
|
+
});
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
return results;
|
|
1835
|
+
})()
|
|
1836
|
+
`;
|
|
1837
|
+
|
|
1112
1838
|
// src/browser/interceptor.ts
|
|
1113
1839
|
function buildInterceptorScript(pattern) {
|
|
1114
1840
|
return `
|
|
@@ -1165,6 +1891,155 @@ var GET_INTERCEPTED_SCRIPT = `
|
|
|
1165
1891
|
})()
|
|
1166
1892
|
`;
|
|
1167
1893
|
|
|
1894
|
+
// src/browser/semantic-find.ts
|
|
1895
|
+
var SYNONYMS = {
|
|
1896
|
+
btn: ["button"],
|
|
1897
|
+
button: ["btn", "submit", "click"],
|
|
1898
|
+
submit: ["go", "send", "ok", "confirm", "done", "button"],
|
|
1899
|
+
search: ["find", "lookup", "query", "filter"],
|
|
1900
|
+
login: ["signin", "sign-in", "log-in", "authenticate"],
|
|
1901
|
+
signup: ["register", "create-account", "sign-up", "join"],
|
|
1902
|
+
logout: ["signout", "sign-out", "log-out"],
|
|
1903
|
+
close: ["dismiss", "x", "cancel", "exit"],
|
|
1904
|
+
menu: ["nav", "navigation", "hamburger", "sidebar"],
|
|
1905
|
+
nav: ["navigation", "menu", "navbar"],
|
|
1906
|
+
input: ["field", "textbox", "text", "entry"],
|
|
1907
|
+
email: ["mail", "e-mail"],
|
|
1908
|
+
password: ["pass", "pwd", "secret"],
|
|
1909
|
+
next: ["continue", "forward", "proceed"],
|
|
1910
|
+
back: ["previous", "return", "go-back"],
|
|
1911
|
+
save: ["store", "keep", "persist"],
|
|
1912
|
+
delete: ["remove", "trash", "discard", "destroy"],
|
|
1913
|
+
edit: ["modify", "change", "update"],
|
|
1914
|
+
add: ["create", "new", "plus", "insert"],
|
|
1915
|
+
settings: ["preferences", "config", "options", "gear"],
|
|
1916
|
+
profile: ["account", "user", "avatar"],
|
|
1917
|
+
home: ["main", "dashboard", "start"],
|
|
1918
|
+
link: ["anchor", "href", "url"],
|
|
1919
|
+
select: ["dropdown", "combo", "picker", "choose"],
|
|
1920
|
+
checkbox: ["check", "toggle", "tick"],
|
|
1921
|
+
upload: ["attach", "file", "browse"],
|
|
1922
|
+
download: ["save", "export"]
|
|
1923
|
+
};
|
|
1924
|
+
var ROLE_KEYWORDS = /* @__PURE__ */ new Set([
|
|
1925
|
+
"button",
|
|
1926
|
+
"link",
|
|
1927
|
+
"input",
|
|
1928
|
+
"textbox",
|
|
1929
|
+
"checkbox",
|
|
1930
|
+
"radio",
|
|
1931
|
+
"select",
|
|
1932
|
+
"dropdown",
|
|
1933
|
+
"tab",
|
|
1934
|
+
"menu",
|
|
1935
|
+
"menuitem",
|
|
1936
|
+
"switch",
|
|
1937
|
+
"slider",
|
|
1938
|
+
"combobox",
|
|
1939
|
+
"searchbox",
|
|
1940
|
+
"option"
|
|
1941
|
+
]);
|
|
1942
|
+
function tokenize(text) {
|
|
1943
|
+
return text.toLowerCase().replace(/[^a-z0-9\s-]/g, " ").split(/[\s-]+/).filter((t) => t.length > 0);
|
|
1944
|
+
}
|
|
1945
|
+
function expandSynonyms(tokens) {
|
|
1946
|
+
const expanded = new Set(tokens);
|
|
1947
|
+
for (const token of tokens) {
|
|
1948
|
+
const syns = SYNONYMS[token];
|
|
1949
|
+
if (syns) {
|
|
1950
|
+
for (const syn of syns) expanded.add(syn);
|
|
1951
|
+
}
|
|
1952
|
+
}
|
|
1953
|
+
return expanded;
|
|
1954
|
+
}
|
|
1955
|
+
function freqMap(tokens) {
|
|
1956
|
+
const map = /* @__PURE__ */ new Map();
|
|
1957
|
+
for (const t of tokens) {
|
|
1958
|
+
map.set(t, (map.get(t) || 0) + 1);
|
|
1959
|
+
}
|
|
1960
|
+
return map;
|
|
1961
|
+
}
|
|
1962
|
+
function jaccardScore(queryTokens, descTokens) {
|
|
1963
|
+
const qFreq = freqMap(queryTokens);
|
|
1964
|
+
const dFreq = freqMap(descTokens);
|
|
1965
|
+
let intersection = 0;
|
|
1966
|
+
let union = 0;
|
|
1967
|
+
const allTokens = /* @__PURE__ */ new Set([...qFreq.keys(), ...dFreq.keys()]);
|
|
1968
|
+
for (const token of allTokens) {
|
|
1969
|
+
const qCount = qFreq.get(token) || 0;
|
|
1970
|
+
const dCount = dFreq.get(token) || 0;
|
|
1971
|
+
intersection += Math.min(qCount, dCount);
|
|
1972
|
+
union += Math.max(qCount, dCount);
|
|
1973
|
+
}
|
|
1974
|
+
return union === 0 ? 0 : intersection / union;
|
|
1975
|
+
}
|
|
1976
|
+
function prefixScore(queryTokens, descTokens) {
|
|
1977
|
+
if (queryTokens.length === 0 || descTokens.length === 0) return 0;
|
|
1978
|
+
let matches = 0;
|
|
1979
|
+
for (const qt of queryTokens) {
|
|
1980
|
+
if (qt.length < 3) continue;
|
|
1981
|
+
for (const dt of descTokens) {
|
|
1982
|
+
if (dt.startsWith(qt) || qt.startsWith(dt)) {
|
|
1983
|
+
matches += 0.5;
|
|
1984
|
+
break;
|
|
1985
|
+
}
|
|
1986
|
+
}
|
|
1987
|
+
}
|
|
1988
|
+
return Math.min(matches / queryTokens.length, 0.3);
|
|
1989
|
+
}
|
|
1990
|
+
function roleBoost(queryTokens, elementRole) {
|
|
1991
|
+
const roleLower = elementRole.toLowerCase();
|
|
1992
|
+
for (const qt of queryTokens) {
|
|
1993
|
+
if (ROLE_KEYWORDS.has(qt) && roleLower.includes(qt)) {
|
|
1994
|
+
return 0.2;
|
|
1995
|
+
}
|
|
1996
|
+
}
|
|
1997
|
+
return 0;
|
|
1998
|
+
}
|
|
1999
|
+
function scoreElement(queryTokens, queryExpanded, element) {
|
|
2000
|
+
const descParts = [
|
|
2001
|
+
element.text,
|
|
2002
|
+
element.role,
|
|
2003
|
+
element.tag,
|
|
2004
|
+
element.ariaLabel
|
|
2005
|
+
].filter(Boolean);
|
|
2006
|
+
const descText = descParts.join(" ");
|
|
2007
|
+
const descTokens = tokenize(descText);
|
|
2008
|
+
if (descTokens.length === 0) return 0;
|
|
2009
|
+
const descExpanded = expandSynonyms(descTokens);
|
|
2010
|
+
const expandedQueryTokens = [...queryExpanded];
|
|
2011
|
+
const expandedDescTokens = [...descExpanded];
|
|
2012
|
+
const jaccard = jaccardScore(expandedQueryTokens, expandedDescTokens);
|
|
2013
|
+
const prefix = prefixScore(queryTokens, descTokens);
|
|
2014
|
+
const role = roleBoost(queryTokens, element.role || element.tag);
|
|
2015
|
+
const queryStr = queryTokens.join(" ");
|
|
2016
|
+
const descStr = descTokens.join(" ");
|
|
2017
|
+
const exactBonus = descStr.includes(queryStr) ? 0.3 : 0;
|
|
2018
|
+
return Math.min(jaccard + prefix + role + exactBonus, 1);
|
|
2019
|
+
}
|
|
2020
|
+
function semanticFind(elements, query, options) {
|
|
2021
|
+
const maxResults = options?.maxResults ?? 5;
|
|
2022
|
+
const minScore = options?.minScore ?? 0.3;
|
|
2023
|
+
const queryTokens = tokenize(query);
|
|
2024
|
+
if (queryTokens.length === 0) return [];
|
|
2025
|
+
const queryExpanded = expandSynonyms(queryTokens);
|
|
2026
|
+
const scored = [];
|
|
2027
|
+
for (const el of elements) {
|
|
2028
|
+
const score = scoreElement(queryTokens, queryExpanded, el);
|
|
2029
|
+
if (score >= minScore) {
|
|
2030
|
+
scored.push({
|
|
2031
|
+
ref: el.index,
|
|
2032
|
+
score: Math.round(score * 100) / 100,
|
|
2033
|
+
text: (el.text || el.ariaLabel || "").slice(0, 60),
|
|
2034
|
+
role: el.role || el.tag,
|
|
2035
|
+
tag: el.tag
|
|
2036
|
+
});
|
|
2037
|
+
}
|
|
2038
|
+
}
|
|
2039
|
+
scored.sort((a, b) => b.score - a.score);
|
|
2040
|
+
return scored.slice(0, maxResults);
|
|
2041
|
+
}
|
|
2042
|
+
|
|
1168
2043
|
// src/browser/page-adapter.ts
|
|
1169
2044
|
var PuppeteerPage = class {
|
|
1170
2045
|
page;
|
|
@@ -1192,7 +2067,10 @@ var PuppeteerPage = class {
|
|
|
1192
2067
|
async evaluate(js) {
|
|
1193
2068
|
return this.page.evaluate(js);
|
|
1194
2069
|
}
|
|
1195
|
-
async snapshot(
|
|
2070
|
+
async snapshot(opts) {
|
|
2071
|
+
if (opts?.compact) {
|
|
2072
|
+
return this.page.evaluate(COMPACT_SNAPSHOT_SCRIPT);
|
|
2073
|
+
}
|
|
1196
2074
|
return this.page.evaluate(SNAPSHOT_SCRIPT);
|
|
1197
2075
|
}
|
|
1198
2076
|
async semanticTree(_opts) {
|
|
@@ -1464,69 +2342,15 @@ var PuppeteerPage = class {
|
|
|
1464
2342
|
active: p === this.page
|
|
1465
2343
|
}));
|
|
1466
2344
|
}
|
|
2345
|
+
async find(query, options) {
|
|
2346
|
+
const elements = await this.page.evaluate(INTERACTIVE_ELEMENTS_SCRIPT);
|
|
2347
|
+
return semanticFind(elements, query, options);
|
|
2348
|
+
}
|
|
1467
2349
|
async close() {
|
|
1468
2350
|
await this.page.close();
|
|
1469
2351
|
}
|
|
1470
2352
|
};
|
|
1471
2353
|
|
|
1472
|
-
// src/browser/dom/interactive.ts
|
|
1473
|
-
var INTERACTIVE_ELEMENTS_SCRIPT = `
|
|
1474
|
-
(() => {
|
|
1475
|
-
const results = [];
|
|
1476
|
-
|
|
1477
|
-
function classify(el) {
|
|
1478
|
-
const tag = el.tagName.toLowerCase();
|
|
1479
|
-
const role = el.getAttribute('role');
|
|
1480
|
-
const types = [];
|
|
1481
|
-
|
|
1482
|
-
// Native interactive
|
|
1483
|
-
if (['a', 'button', 'input', 'select', 'textarea', 'details', 'summary'].includes(tag)) {
|
|
1484
|
-
types.push('native');
|
|
1485
|
-
}
|
|
1486
|
-
|
|
1487
|
-
// ARIA role interactive
|
|
1488
|
-
if (role && ['button', 'link', 'textbox', 'checkbox', 'radio', 'combobox', 'tab', 'switch', 'menuitem', 'slider'].includes(role)) {
|
|
1489
|
-
types.push('aria');
|
|
1490
|
-
}
|
|
1491
|
-
|
|
1492
|
-
// Contenteditable
|
|
1493
|
-
if (el.contentEditable === 'true') types.push('contenteditable');
|
|
1494
|
-
|
|
1495
|
-
// Focusable
|
|
1496
|
-
if (el.tabIndex >= 0 && el.getAttribute('tabindex') !== null) types.push('focusable');
|
|
1497
|
-
|
|
1498
|
-
// Has click listener (approximate)
|
|
1499
|
-
if (el.onclick) types.push('listener');
|
|
1500
|
-
|
|
1501
|
-
return types;
|
|
1502
|
-
}
|
|
1503
|
-
|
|
1504
|
-
let idx = 0;
|
|
1505
|
-
const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT);
|
|
1506
|
-
let node;
|
|
1507
|
-
while (node = walker.nextNode()) {
|
|
1508
|
-
const types = classify(node);
|
|
1509
|
-
if (types.length === 0) continue;
|
|
1510
|
-
|
|
1511
|
-
const style = getComputedStyle(node);
|
|
1512
|
-
if (style.display === 'none' || style.visibility === 'hidden') continue;
|
|
1513
|
-
|
|
1514
|
-
const rect = node.getBoundingClientRect();
|
|
1515
|
-
results.push({
|
|
1516
|
-
index: idx++,
|
|
1517
|
-
tag: node.tagName.toLowerCase(),
|
|
1518
|
-
role: node.getAttribute('role') || '',
|
|
1519
|
-
text: (node.textContent || '').trim().slice(0, 100),
|
|
1520
|
-
types,
|
|
1521
|
-
ariaLabel: node.getAttribute('aria-label') || '',
|
|
1522
|
-
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
1523
|
-
});
|
|
1524
|
-
}
|
|
1525
|
-
|
|
1526
|
-
return results;
|
|
1527
|
-
})()
|
|
1528
|
-
`;
|
|
1529
|
-
|
|
1530
2354
|
// src/browser/wait.ts
|
|
1531
2355
|
async function waitForCondition(page, condition, timeout = 3e4) {
|
|
1532
2356
|
switch (condition) {
|
|
@@ -2012,6 +2836,7 @@ async function lobsterFetch(url, options) {
|
|
|
2012
2836
|
}
|
|
2013
2837
|
export {
|
|
2014
2838
|
BrowserManager,
|
|
2839
|
+
COMPACT_SNAPSHOT_SCRIPT,
|
|
2015
2840
|
FLAT_TREE_SCRIPT,
|
|
2016
2841
|
FORM_STATE_SCRIPT,
|
|
2017
2842
|
GET_INTERCEPTED_SCRIPT,
|
|
@@ -2020,15 +2845,27 @@ export {
|
|
|
2020
2845
|
PuppeteerPage,
|
|
2021
2846
|
SEMANTIC_TREE_SCRIPT,
|
|
2022
2847
|
SNAPSHOT_SCRIPT,
|
|
2848
|
+
STEALTH_ARGS,
|
|
2849
|
+
STEALTH_SCRIPT,
|
|
2850
|
+
buildCompactSnapshotScript,
|
|
2023
2851
|
buildInterceptorScript,
|
|
2024
2852
|
buildSnapshotScript,
|
|
2853
|
+
createProfile,
|
|
2854
|
+
discoverChrome,
|
|
2025
2855
|
extractLinks,
|
|
2026
2856
|
extractMarkdown,
|
|
2027
2857
|
extractSnapshot,
|
|
2028
2858
|
extractText,
|
|
2029
2859
|
flatTreeToString,
|
|
2860
|
+
getProfileDataDir,
|
|
2861
|
+
injectStealth,
|
|
2862
|
+
listProfiles,
|
|
2030
2863
|
lobsterFetch,
|
|
2031
2864
|
parseHtml,
|
|
2865
|
+
removeProfile,
|
|
2866
|
+
resetProfileCache,
|
|
2867
|
+
resolveAttachTarget,
|
|
2868
|
+
semanticFind,
|
|
2032
2869
|
waitForCondition
|
|
2033
2870
|
};
|
|
2034
2871
|
//# sourceMappingURL=index.js.map
|