otherwise-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/README.md +193 -0
  2. package/bin/otherwise.js +5 -0
  3. package/frontend/404.html +84 -0
  4. package/frontend/assets/OpenDyslexic3-Bold-CDyRs55Y.ttf +0 -0
  5. package/frontend/assets/OpenDyslexic3-Regular-CIBXa4WE.ttf +0 -0
  6. package/frontend/assets/__vite-browser-external-BIHI7g3E.js +1 -0
  7. package/frontend/assets/conversational-worker-CeKiciGk.js +2929 -0
  8. package/frontend/assets/dictation-worker-D0aYfq8b.js +29 -0
  9. package/frontend/assets/gemini-color-CgSQmmva.png +0 -0
  10. package/frontend/assets/index-BLux5ps4.js +21 -0
  11. package/frontend/assets/index-Blh8_TEM.js +5272 -0
  12. package/frontend/assets/index-BpQ1PuKu.js +18 -0
  13. package/frontend/assets/index-Df737c8w.css +1 -0
  14. package/frontend/assets/index-xaYHL6wb.js +113 -0
  15. package/frontend/assets/ort-wasm-simd-threaded.asyncify-BynIiDiv.wasm +0 -0
  16. package/frontend/assets/ort-wasm-simd-threaded.jsep-B0T3yYHD.wasm +0 -0
  17. package/frontend/assets/transformers-tULNc5V3.js +31 -0
  18. package/frontend/assets/tts-worker-DPJWqT7N.js +2899 -0
  19. package/frontend/assets/voice-mode-worker-GzvIE_uh.js +2927 -0
  20. package/frontend/assets/worker-2d5ABSLU.js +31 -0
  21. package/frontend/banner.png +0 -0
  22. package/frontend/favicon.svg +3 -0
  23. package/frontend/google55e5ec47ee14a5f8.html +1 -0
  24. package/frontend/index.html +234 -0
  25. package/frontend/manifest.json +17 -0
  26. package/frontend/pdf.worker.min.mjs +21 -0
  27. package/frontend/robots.txt +5 -0
  28. package/frontend/sitemap.xml +27 -0
  29. package/package.json +81 -0
  30. package/src/agent/index.js +1066 -0
  31. package/src/agent/location.js +51 -0
  32. package/src/agent/prompt.js +548 -0
  33. package/src/agent/tools.js +4372 -0
  34. package/src/browser/detect.js +68 -0
  35. package/src/browser/session.js +1109 -0
  36. package/src/config.js +137 -0
  37. package/src/email/client.js +503 -0
  38. package/src/index.js +557 -0
  39. package/src/inference/anthropic.js +113 -0
  40. package/src/inference/google.js +373 -0
  41. package/src/inference/index.js +81 -0
  42. package/src/inference/ollama.js +383 -0
  43. package/src/inference/openai.js +140 -0
  44. package/src/inference/openrouter.js +378 -0
  45. package/src/inference/xai.js +200 -0
  46. package/src/logBridge.js +9 -0
  47. package/src/models.js +146 -0
  48. package/src/remote/client.js +225 -0
  49. package/src/scheduler/cron.js +243 -0
  50. package/src/server.js +3876 -0
  51. package/src/storage/db.js +1135 -0
  52. package/src/storage/supabase.js +364 -0
  53. package/src/tunnel/cloudflare.js +241 -0
  54. package/src/ui/components/App.jsx +687 -0
  55. package/src/ui/components/BrowserSelect.jsx +111 -0
  56. package/src/ui/components/FilePicker.jsx +472 -0
  57. package/src/ui/components/Header.jsx +444 -0
  58. package/src/ui/components/HelpPanel.jsx +173 -0
  59. package/src/ui/components/HistoryPanel.jsx +158 -0
  60. package/src/ui/components/MessageList.jsx +235 -0
  61. package/src/ui/components/ModelSelector.jsx +304 -0
  62. package/src/ui/components/PromptInput.jsx +515 -0
  63. package/src/ui/components/StreamingResponse.jsx +134 -0
  64. package/src/ui/components/ThinkingIndicator.jsx +365 -0
  65. package/src/ui/components/ToolExecution.jsx +714 -0
  66. package/src/ui/components/index.js +82 -0
  67. package/src/ui/context/TerminalContext.jsx +150 -0
  68. package/src/ui/context/index.js +13 -0
  69. package/src/ui/hooks/index.js +16 -0
  70. package/src/ui/hooks/useChatState.js +675 -0
  71. package/src/ui/hooks/useCommands.js +280 -0
  72. package/src/ui/hooks/useFileAttachments.js +216 -0
  73. package/src/ui/hooks/useKeyboardShortcuts.js +173 -0
  74. package/src/ui/hooks/useNotifications.js +185 -0
  75. package/src/ui/hooks/useTerminalSize.js +151 -0
  76. package/src/ui/hooks/useWebSocket.js +273 -0
  77. package/src/ui/index.js +94 -0
  78. package/src/ui/ink-runner.js +22 -0
  79. package/src/ui/utils/formatters.js +424 -0
  80. package/src/ui/utils/index.js +6 -0
  81. package/src/ui/utils/markdown.js +166 -0
@@ -0,0 +1,1109 @@
1
+ /**
2
+ * Browser Session Manager
3
+ *
4
+ * Manages a persistent Playwright browser session across tool calls.
5
+ * Uses singleton pattern - one browser instance per agent session.
6
+ * Supports configurable headless/visible mode and auto-cleanup on idle.
7
+ * Uses user's default/selected browser (browserChannel in config) when set.
8
+ */
9
+
10
+ import { config as cliConfig } from "../config.js";
11
+
12
+ async function getChromium() {
13
+ try {
14
+ const pw = await import("playwright");
15
+ return pw.chromium;
16
+ } catch {
17
+ throw new Error(
18
+ "Playwright is not installed. Run: npx playwright install chromium"
19
+ );
20
+ }
21
+ }
22
+
23
+ // Singleton browser state
24
+ let browserInstance = null;
25
+ let currentPage = null;
26
+ let idleTimeout = null;
27
+
28
+ // Default timeout for idle cleanup (5 minutes)
29
+ const IDLE_TIMEOUT_MS = 5 * 60 * 1000;
30
+
31
+ // Default timeout for operations (30 seconds)
32
+ const OPERATION_TIMEOUT_MS = 30000;
33
+
34
+ // Realistic User-Agent strings (rotated randomly)
35
+ const USER_AGENTS = [
36
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
37
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
38
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
39
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
40
+ ];
41
+
42
+ // Common browser headers to appear more legitimate
43
+ const BROWSER_HEADERS = {
44
+ Accept:
45
+ "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
46
+ "Accept-Language": "en-US,en;q=0.9",
47
+ "Accept-Encoding": "gzip, deflate, br",
48
+ "Cache-Control": "no-cache",
49
+ Pragma: "no-cache",
50
+ "Sec-Ch-Ua":
51
+ '"Not A(Brand";v="99", "Google Chrome";v="121", "Chromium";v="121"',
52
+ "Sec-Ch-Ua-Mobile": "?0",
53
+ "Sec-Ch-Ua-Platform": '"macOS"',
54
+ "Sec-Fetch-Dest": "document",
55
+ "Sec-Fetch-Mode": "navigate",
56
+ "Sec-Fetch-Site": "none",
57
+ "Sec-Fetch-User": "?1",
58
+ "Upgrade-Insecure-Requests": "1",
59
+ };
60
+
61
+ /**
62
+ * Get a random User-Agent string
63
+ */
64
+ function getRandomUserAgent() {
65
+ return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
66
+ }
67
+
68
+ /**
69
+ * Parse WEBSEARCH_PROXY or HTTPS_PROXY/HTTP_PROXY for Playwright.
70
+ * @returns {{ server: string, username?: string, password?: string } | null}
71
+ */
72
+ function getWebSearchProxyConfig() {
73
+ const raw =
74
+ process.env.WEBSEARCH_PROXY ||
75
+ process.env.HTTPS_PROXY ||
76
+ process.env.HTTP_PROXY ||
77
+ null;
78
+ if (!raw || !raw.startsWith("http")) return null;
79
+ try {
80
+ const u = new URL(raw);
81
+ const server = `${u.protocol}//${u.hostname}${u.port ? ":" + u.port : ""}`;
82
+ const username = u.username || undefined;
83
+ const password = u.password || undefined;
84
+ return {
85
+ server,
86
+ ...(username && { username }),
87
+ ...(password && { password }),
88
+ };
89
+ } catch {
90
+ return null;
91
+ }
92
+ }
93
+
94
+ /**
95
+ * Reset the idle timeout - called after each operation
96
+ */
97
+ function resetIdleTimeout() {
98
+ if (idleTimeout) {
99
+ clearTimeout(idleTimeout);
100
+ }
101
+ idleTimeout = setTimeout(async () => {
102
+ console.log("[Browser] Idle timeout reached, closing browser...");
103
+ await closeBrowser();
104
+ }, IDLE_TIMEOUT_MS);
105
+ }
106
+
107
+ /**
108
+ * Build Playwright launch options from config (user's default/selected browser).
109
+ * When config has no browserChannel, falls back to CLI store so fetchHtmlWithBrowser uses same browser.
110
+ * @param {object} config - { browserChannel?: string | null, browserHeadless?: boolean }
111
+ * @returns {{ headless: boolean, channel: string, args: string[] }}
112
+ */
113
+ function getLaunchOptions(config = {}, options = {}) {
114
+ // Interactive session: always visible. One-off fetches (e.g. SERP) can stay headless.
115
+ const headless =
116
+ options.forceVisible === true ? false : config.browserHeadless !== false;
117
+ let channel =
118
+ (config.browserChannel && String(config.browserChannel).trim()) || null;
119
+ if (channel == null && typeof cliConfig?.get === "function") {
120
+ channel = cliConfig.get("browserChannel") || null;
121
+ }
122
+ const args = [
123
+ "--no-sandbox",
124
+ "--disable-setuid-sandbox",
125
+ "--disable-dev-shm-usage",
126
+ "--disable-accelerated-2d-canvas",
127
+ "--disable-gpu",
128
+ "--disable-blink-features=AutomationControlled",
129
+ "--disable-features=IsolateOrigins,site-per-process",
130
+ "--disable-infobars",
131
+ "--window-size=1280,800",
132
+ ];
133
+ // Use configured channel (chrome | msedge | chromium) or fall back to bundled chromium
134
+ const effectiveChannel =
135
+ channel === "chrome" || channel === "msedge" || channel === "chromium"
136
+ ? channel
137
+ : "chromium";
138
+ return {
139
+ headless,
140
+ channel: effectiveChannel,
141
+ args,
142
+ };
143
+ }
144
+
145
+ /**
146
+ * Get or create a browser page
147
+ * @param {object} config - Configuration object with browserHeadless, browserChannel
148
+ * @returns {Promise<Page>} - Playwright page instance
149
+ */
150
+ export async function getBrowserPage(config = {}) {
151
+ resetIdleTimeout();
152
+
153
+ // If we have a valid page, return it
154
+ if (currentPage && !currentPage.isClosed()) {
155
+ return currentPage;
156
+ }
157
+
158
+ // If we have a browser but no page, create a new page
159
+ if (browserInstance) {
160
+ try {
161
+ currentPage = await browserInstance.newPage();
162
+ return currentPage;
163
+ } catch (err) {
164
+ // Browser might be in a bad state, close and recreate
165
+ console.log("[Browser] Browser in bad state, recreating...");
166
+ await closeBrowser();
167
+ }
168
+ }
169
+
170
+ const { headless, channel, args } = getLaunchOptions(config, {
171
+ forceVisible: true,
172
+ });
173
+ console.log(
174
+ `[Browser] Launching browser (channel: ${channel}, headless: ${headless})...`,
175
+ );
176
+
177
+ try {
178
+ const chromium = await getChromium();
179
+ browserInstance = await chromium.launch({
180
+ headless,
181
+ channel,
182
+ args,
183
+ });
184
+
185
+ // Create a new context with stealth settings
186
+ const userAgent = getRandomUserAgent();
187
+ const context = await browserInstance.newContext({
188
+ userAgent,
189
+ viewport: { width: 1280, height: 800 },
190
+ extraHTTPHeaders: BROWSER_HEADERS,
191
+ // Enable JavaScript and cookies like a real browser
192
+ javaScriptEnabled: true,
193
+ ignoreHTTPSErrors: true,
194
+ // Emulate real browser behavior
195
+ locale: "en-US",
196
+ timezoneId: "America/New_York",
197
+ });
198
+
199
+ // Create a new page
200
+ currentPage = await context.newPage();
201
+
202
+ // Set default timeouts
203
+ currentPage.setDefaultTimeout(OPERATION_TIMEOUT_MS);
204
+ currentPage.setDefaultNavigationTimeout(OPERATION_TIMEOUT_MS);
205
+
206
+ // Apply stealth scripts to evade detection
207
+ await currentPage.addInitScript(() => {
208
+ // Remove webdriver flag
209
+ Object.defineProperty(navigator, "webdriver", {
210
+ get: () => undefined,
211
+ });
212
+
213
+ // Spoof plugins
214
+ Object.defineProperty(navigator, "plugins", {
215
+ get: () => [1, 2, 3, 4, 5],
216
+ });
217
+
218
+ // Spoof languages
219
+ Object.defineProperty(navigator, "languages", {
220
+ get: () => ["en-US", "en"],
221
+ });
222
+
223
+ // Hide automation
224
+ window.chrome = { runtime: {} };
225
+
226
+ // Spoof permissions
227
+ const originalQuery = window.navigator.permissions.query;
228
+ window.navigator.permissions.query = (parameters) =>
229
+ parameters.name === "notifications"
230
+ ? Promise.resolve({ state: Notification.permission })
231
+ : originalQuery(parameters);
232
+ });
233
+
234
+ console.log("[Browser] Browser launched successfully with stealth mode");
235
+ console.log(
236
+ "[Browser] Using User-Agent:",
237
+ userAgent.substring(0, 50) + "...",
238
+ );
239
+ return currentPage;
240
+ } catch (err) {
241
+ console.error("[Browser] Failed to launch browser:", err);
242
+ throw new Error(`Failed to launch browser: ${err.message}`);
243
+ }
244
+ }
245
+
246
+ /**
247
+ * Close the browser and cleanup resources
248
+ */
249
+ export async function closeBrowser() {
250
+ if (idleTimeout) {
251
+ clearTimeout(idleTimeout);
252
+ idleTimeout = null;
253
+ }
254
+
255
+ if (currentPage) {
256
+ try {
257
+ await currentPage.close();
258
+ } catch (err) {
259
+ // Page might already be closed
260
+ }
261
+ currentPage = null;
262
+ }
263
+
264
+ if (browserInstance) {
265
+ try {
266
+ await browserInstance.close();
267
+ console.log("[Browser] Browser closed");
268
+ } catch (err) {
269
+ console.error("[Browser] Error closing browser:", err);
270
+ }
271
+ browserInstance = null;
272
+ }
273
+ }
274
+
275
+ /**
276
+ * Get a snapshot of the current page state
277
+ * Useful for providing context to the LLM
278
+ * @returns {object|null} - Page state or null if no page
279
+ */
280
+ export function getPageSnapshot() {
281
+ if (!currentPage || currentPage.isClosed()) {
282
+ return null;
283
+ }
284
+
285
+ return {
286
+ url: currentPage.url(),
287
+ title: currentPage.title(),
288
+ };
289
+ }
290
+
291
+ /**
292
+ * Check if browser is currently active
293
+ * @returns {boolean}
294
+ */
295
+ export function isBrowserActive() {
296
+ return (
297
+ browserInstance !== null && currentPage !== null && !currentPage.isClosed()
298
+ );
299
+ }
300
+
301
+ /**
302
+ * Helper to check if a locator has any matching elements with a short timeout
303
+ * This prevents each selector strategy from waiting the full default timeout
304
+ * @param {Locator} locator - Playwright locator
305
+ * @param {number} timeout - Timeout in ms (default: 2000)
306
+ * @returns {Promise<boolean>} - True if element exists
307
+ */
308
+ async function hasElement(locator, timeout = 2000) {
309
+ try {
310
+ // Use waitFor with a short timeout to check existence
311
+ await locator.first().waitFor({ state: "attached", timeout });
312
+ return true;
313
+ } catch {
314
+ return false;
315
+ }
316
+ }
317
+
318
+ /**
319
+ * Find the best visible element from a locator when there are multiple matches
320
+ * Prioritizes: 1) visible in viewport, 2) visible at all, 3) first match
321
+ * @param {Locator} locator - Playwright locator with potential multiple matches
322
+ * @param {Page} page - Playwright page
323
+ * @returns {Promise<Locator>} - Best matching element
324
+ */
325
+ async function findBestVisibleElement(locator, page) {
326
+ const count = await locator.count();
327
+
328
+ // If only one match, return it
329
+ if (count <= 1) {
330
+ return locator.first();
331
+ }
332
+
333
+ console.log(
334
+ `[Browser] Found ${count} matching elements, prioritizing visible ones`,
335
+ );
336
+
337
+ // Get viewport dimensions
338
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
339
+
340
+ // Check each element for visibility and viewport position
341
+ let bestElement = null;
342
+ let bestScore = -1;
343
+
344
+ for (let i = 0; i < Math.min(count, 10); i++) {
345
+ // Check first 10 matches max
346
+ const element = locator.nth(i);
347
+
348
+ try {
349
+ const isVisible = await element.isVisible();
350
+ if (!isVisible) continue;
351
+
352
+ // Get bounding box to check if in viewport
353
+ const box = await element.boundingBox();
354
+ if (!box) continue;
355
+
356
+ let score = 1; // Base score for visible element
357
+
358
+ // Higher score if fully in viewport
359
+ const inViewportX = box.x >= 0 && box.x + box.width <= viewport.width;
360
+ const inViewportY = box.y >= 0 && box.y + box.height <= viewport.height;
361
+ if (inViewportX && inViewportY) {
362
+ score += 10;
363
+ } else if (inViewportY) {
364
+ // At least vertically in viewport
365
+ score += 5;
366
+ }
367
+
368
+ // Prefer elements with unique identifying attributes (likely the "real" interactive element)
369
+ const hasId = await element.evaluate((el) => !!el.id);
370
+ const hasDataTestId = await element.evaluate((el) => !!el.dataset.testid);
371
+ const hasUniqueClass = await element.evaluate(
372
+ (el) =>
373
+ el.className && el.className.split(" ").some((c) => c.length > 5),
374
+ );
375
+
376
+ if (hasId) score += 3;
377
+ if (hasDataTestId) score += 3;
378
+ if (hasUniqueClass) score += 1;
379
+
380
+ // Prefer larger clickable area
381
+ const area = box.width * box.height;
382
+ if (area > 100) score += 1;
383
+ if (area > 500) score += 1;
384
+
385
+ if (score > bestScore) {
386
+ bestScore = score;
387
+ bestElement = element;
388
+ }
389
+ } catch {
390
+ // Element may have been removed, continue
391
+ }
392
+ }
393
+
394
+ if (bestElement) {
395
+ console.log(
396
+ `[Browser] Selected element with visibility score ${bestScore}`,
397
+ );
398
+ return bestElement;
399
+ }
400
+
401
+ // Fallback to first visible
402
+ console.log("[Browser] Falling back to first visible element");
403
+ return locator.first();
404
+ }
405
+
406
+ /**
407
+ * Smart element finder - tries multiple selector strategies
408
+ * Uses short timeouts for detection to avoid cumulative timeout buildup
409
+ * Supports Playwright-specific selectors like :has-text(), :text(), etc.
410
+ * @param {Page} page - Playwright page
411
+ * @param {string} selector - Selector string (CSS, text, Playwright pseudo-selectors, or aria)
412
+ * @returns {Promise<Locator>} - Element locator
413
+ */
414
+ export async function findElement(page, selector) {
415
+ // Short timeout per strategy so we resolve or fail fast (was 2s; 800ms speeds up multi-strategy tries)
416
+ const DETECT_TIMEOUT = 800;
417
+
418
+ console.log(`[Browser] Finding element: "${selector}"`);
419
+
420
+ // Check if selector uses Playwright-specific pseudo-selectors
421
+ // These need to go through page.locator() directly, not querySelector
422
+ const playwrightPseudoSelectors = [
423
+ ":has-text(",
424
+ ":text(",
425
+ ":has(",
426
+ ":is(",
427
+ ":nth-match(",
428
+ ":visible",
429
+ ":above(",
430
+ ":below(",
431
+ ":left-of(",
432
+ ":right-of(",
433
+ ":near(",
434
+ ];
435
+ const isPlaywrightSelector = playwrightPseudoSelectors.some((ps) =>
436
+ selector.includes(ps),
437
+ );
438
+
439
+ // If it contains Playwright-specific pseudo-selectors, try it directly first
440
+ if (isPlaywrightSelector) {
441
+ try {
442
+ const locator = page.locator(selector);
443
+ if (await hasElement(locator, DETECT_TIMEOUT)) {
444
+ console.log(`[Browser] Found via Playwright selector: "${selector}"`);
445
+ return locator.first();
446
+ }
447
+ } catch (err) {
448
+ console.log(`[Browser] Playwright selector failed: ${err.message}`);
449
+ // Fall through to other strategies
450
+ }
451
+ }
452
+
453
+ // If it looks like a standard CSS selector (including a[href="..."]), try it directly
454
+ const looksLikeCss =
455
+ selector.startsWith("#") ||
456
+ selector.startsWith(".") ||
457
+ selector.startsWith("[") ||
458
+ selector.includes(">") ||
459
+ /^[a-zA-Z][a-zA-Z0-9]*\[/.test(selector.trim());
460
+ if (looksLikeCss) {
461
+ try {
462
+ const locator = page.locator(selector);
463
+ if (await hasElement(locator, DETECT_TIMEOUT)) {
464
+ console.log(`[Browser] Found via CSS selector: "${selector}"`);
465
+ return locator.first();
466
+ }
467
+ } catch (err) {
468
+ console.log(`[Browser] CSS selector failed: ${err.message}`);
469
+ }
470
+ // For a[href="value"] or [href="value"], try partial match a[href*="value"] (sites often use full URLs or extra params)
471
+ const hrefExactMatch =
472
+ selector.match(/^a\[href\s*=\s*["']([^"']+)["']\]$/i) ||
473
+ selector.match(/^\[href\s*=\s*["']([^"']+)["']\]$/i);
474
+ if (hrefExactMatch) {
475
+ const hrefValue = hrefExactMatch[1];
476
+ const tag = selector.startsWith("a") ? "a" : "*";
477
+ const partialSelector =
478
+ tag === "a"
479
+ ? `a[href*="${hrefValue.replace(/"/g, '\\"')}"]`
480
+ : `a[href*="${hrefValue.replace(/"/g, '\\"')}"]`;
481
+ try {
482
+ const locator = page.locator(partialSelector);
483
+ if (await hasElement(locator, DETECT_TIMEOUT)) {
484
+ console.log(`[Browser] Found via partial href: "${partialSelector}"`);
485
+ return await findBestVisibleElement(locator, page);
486
+ }
487
+ } catch (e) {
488
+ // ignore
489
+ }
490
+ // Also try link by accessible name (e.g. "Syrio_Forel" -> link text "Syrio Forel")
491
+ const slug = hrefValue.replace(/^.*\/wiki\//i, "").replace(/#.*$/, "");
492
+ const linkText = slug ? slug.replace(/_/g, " ").trim() : null;
493
+ if (linkText) {
494
+ try {
495
+ const byRole = page.getByRole("link", {
496
+ name: new RegExp(
497
+ linkText.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"),
498
+ "i",
499
+ ),
500
+ });
501
+ if (await hasElement(byRole, DETECT_TIMEOUT)) {
502
+ console.log(`[Browser] Found via link role (name): "${linkText}"`);
503
+ return await findBestVisibleElement(byRole, page);
504
+ }
505
+ } catch (e) {
506
+ // ignore
507
+ }
508
+ }
509
+ }
510
+ }
511
+
512
+ // Try as text content (quoted strings)
513
+ if (selector.startsWith('"') && selector.endsWith('"')) {
514
+ const text = selector.slice(1, -1);
515
+ const locator = page.getByText(text, { exact: false });
516
+ if (await hasElement(locator, DETECT_TIMEOUT)) {
517
+ console.log(`[Browser] Found via quoted text: "${text}"`);
518
+ return locator.first();
519
+ }
520
+ }
521
+
522
+ // Try Playwright's getByText for :text() style selectors
523
+ const textMatch = selector.match(/^:text\(["'](.+)["']\)$/i);
524
+ if (textMatch) {
525
+ const text = textMatch[1];
526
+ const locator = page.getByText(text, { exact: false });
527
+ if (await hasElement(locator, DETECT_TIMEOUT)) {
528
+ console.log(`[Browser] Found via :text() pattern: "${text}"`);
529
+ return locator.first();
530
+ }
531
+ }
532
+
533
+ // Try role-based selectors (e.g., "button:Submit", "Input:Search Wikipedia", "link:Click here")
534
+ // Uses findBestVisibleElement to handle multiple matching roles. "Input" maps to "textbox".
535
+ const roleMatch = selector.match(
536
+ /^(button|link|textbox|input|checkbox|radio|combobox|menuitem|tab|option):(.+)$/i,
537
+ );
538
+ if (roleMatch) {
539
+ const [, role, name] = roleMatch;
540
+ const roleName =
541
+ role.toLowerCase() === "input" ? "textbox" : role.toLowerCase();
542
+ const nameStr =
543
+ name
544
+ .trim()
545
+ .replace(/^["']|["']$/g, "")
546
+ .trim() || name.trim();
547
+ const locator = page.getByRole(roleName, {
548
+ name: new RegExp(nameStr.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "i"),
549
+ });
550
+ if (await hasElement(locator, DETECT_TIMEOUT)) {
551
+ console.log(`[Browser] Found via role selector: ${role}:${name}`);
552
+ return await findBestVisibleElement(locator, page);
553
+ }
554
+ }
555
+
556
+ // Try common search input selectors when selector looks like a search prompt (e.g. "Search", "Search Wikipedia")
557
+ if (/\bsearch\b/i.test(selector) && selector.length < 80) {
558
+ for (const sel of [
559
+ 'input[name="search"]',
560
+ 'input[type="search"]',
561
+ "input[name='search']",
562
+ "[role='search'] input",
563
+ "form[role='search'] input",
564
+ ]) {
565
+ try {
566
+ const loc = page.locator(sel).first();
567
+ if (await hasElement(loc, DETECT_TIMEOUT)) {
568
+ console.log(`[Browser] Found via search input selector: ${sel}`);
569
+ return loc;
570
+ }
571
+ } catch {}
572
+ }
573
+ }
574
+
575
+ // Try finding button/link by text content first (most common use case)
576
+ // This is optimized for clicking buttons/links by their visible text
577
+ // Uses findBestVisibleElement to handle pages with duplicate text labels
578
+ try {
579
+ const buttonByText = page.locator(
580
+ `button:has-text("${selector}"), a:has-text("${selector}"), [role="button"]:has-text("${selector}")`,
581
+ );
582
+ if (await hasElement(buttonByText, DETECT_TIMEOUT)) {
583
+ console.log(`[Browser] Found button/link via text: "${selector}"`);
584
+ return await findBestVisibleElement(buttonByText, page);
585
+ }
586
+ } catch (err) {
587
+ // Selector might have special characters, try escaping
588
+ const escaped = selector.replace(/"/g, '\\"');
589
+ try {
590
+ const buttonByText = page.locator(
591
+ `button:has-text("${escaped}"), a:has-text("${escaped}"), [role="button"]:has-text("${escaped}")`,
592
+ );
593
+ if (await hasElement(buttonByText, DETECT_TIMEOUT)) {
594
+ console.log(
595
+ `[Browser] Found button/link via escaped text: "${escaped}"`,
596
+ );
597
+ return await findBestVisibleElement(buttonByText, page);
598
+ }
599
+ } catch {}
600
+ }
601
+
602
+ // Try as plain text (for any element with this text)
603
+ // Uses findBestVisibleElement to prioritize visible elements in viewport
604
+ const textLocator = page.getByText(selector, { exact: false });
605
+ if (await hasElement(textLocator, DETECT_TIMEOUT)) {
606
+ console.log(`[Browser] Found via text content: "${selector}"`);
607
+ return await findBestVisibleElement(textLocator, page);
608
+ }
609
+
610
+ // Try as placeholder text (for inputs)
611
+ const placeholderLocator = page.getByPlaceholder(selector);
612
+ if (await hasElement(placeholderLocator, DETECT_TIMEOUT)) {
613
+ console.log(`[Browser] Found via placeholder: "${selector}"`);
614
+ return placeholderLocator.first();
615
+ }
616
+
617
+ // Try as label text (for form fields)
618
+ const labelLocator = page.getByLabel(selector);
619
+ if (await hasElement(labelLocator, DETECT_TIMEOUT)) {
620
+ console.log(`[Browser] Found via label: "${selector}"`);
621
+ return labelLocator.first();
622
+ }
623
+
624
+ // Try test ID
625
+ const testIdLocator = page.getByTestId(selector);
626
+ if (await hasElement(testIdLocator, DETECT_TIMEOUT)) {
627
+ console.log(`[Browser] Found via testId: "${selector}"`);
628
+ return testIdLocator.first();
629
+ }
630
+
631
+ // Final fallback: try as generic locator (which supports more than querySelector)
632
+ try {
633
+ const cssLocator = page.locator(selector);
634
+ if (await hasElement(cssLocator, DETECT_TIMEOUT)) {
635
+ console.log(`[Browser] Found via fallback locator: "${selector}"`);
636
+ return cssLocator.first();
637
+ }
638
+ } catch (err) {
639
+ console.log(`[Browser] Fallback locator failed: ${err.message}`);
640
+ }
641
+
642
+ throw new Error(
643
+ `Could not find element: "${selector}". Tried Playwright selectors, CSS selectors, text content, roles, buttons/links, placeholders, labels, and testIds.`,
644
+ );
645
+ }
646
+
647
+ /**
648
+ * Extract readable text content from a page or element
649
+ * @param {Page} page - Playwright page
650
+ * @param {string} selector - Optional selector to scope extraction
651
+ * @returns {Promise<string>} - Extracted text content
652
+ */
653
+ export async function extractPageContent(page, selector = null) {
654
+ // When selector is provided, use it only to SCOPE extraction (querySelector in evaluate).
655
+ // Do NOT use findElement() here — that is for finding a single clickable element; selectors
656
+ // like "#section ~ p" or "#section + p, #section + ul" are for scoping content and would fail findElement.
657
+
658
+ // Get the page title
659
+ const title = await page.title();
660
+ const url = page.url();
661
+
662
+ // Extract main content (selector scopes the root; querySelector returns first match)
663
+ const content = await page.evaluate((sel) => {
664
+ const target = sel ? document.querySelector(sel) : document.body;
665
+ if (!target) return null;
666
+
667
+ // Remove script, style, and hidden elements
668
+ const clone = target.cloneNode(true);
669
+ clone
670
+ .querySelectorAll(
671
+ 'script, style, noscript, [hidden], [aria-hidden="true"]',
672
+ )
673
+ .forEach((el) => el.remove());
674
+
675
+ // Get text content
676
+ let text = clone.textContent || "";
677
+
678
+ // Clean up whitespace
679
+ text = text.replace(/\s+/g, " ").trim();
680
+
681
+ // Limit length
682
+ if (text.length > 10000) {
683
+ text = text.substring(0, 10000) + "... (truncated)";
684
+ }
685
+
686
+ return text;
687
+ }, selector);
688
+
689
+ if (content === null) {
690
+ return `Error: No element found for selector "${selector}". Use a valid CSS selector (e.g. "#section_id" or ".class"), or omit selector for full page content.`;
691
+ }
692
+
693
+ // Also get interactive elements for context (include href for links so agent can click reliably)
694
+ const interactiveElements = await page.evaluate(() => {
695
+ const elements = [];
696
+
697
+ // Get clickable elements; for links include href so agent can use a[href="..."]
698
+ document
699
+ .querySelectorAll('a, button, [role="button"], input[type="submit"]')
700
+ .forEach((el, i) => {
701
+ if (i >= 25) return; // Slightly more links for Wikipedia-style navigation
702
+ const text =
703
+ el.textContent?.trim()?.substring(0, 50) ||
704
+ el.getAttribute("aria-label") ||
705
+ el.getAttribute("title") ||
706
+ "";
707
+ if (text) {
708
+ const href =
709
+ el.tagName === "A" && el.getAttribute("href")
710
+ ? el.getAttribute("href")
711
+ : null;
712
+ elements.push({ type: el.tagName.toLowerCase(), text, href });
713
+ }
714
+ });
715
+
716
+ // Get input fields
717
+ document
718
+ .querySelectorAll('input:not([type="hidden"]), textarea, select')
719
+ .forEach((el, i) => {
720
+ if (i >= 10) return;
721
+ const label =
722
+ el.getAttribute("placeholder") ||
723
+ el.getAttribute("aria-label") ||
724
+ el.getAttribute("name") ||
725
+ "";
726
+ if (label) {
727
+ elements.push({ type: "input", label });
728
+ }
729
+ });
730
+
731
+ return elements;
732
+ });
733
+
734
+ // Format the output (include href for links so agent can click with a[href="..."] or :has-text())
735
+ let output = `Page: ${title}\nURL: ${url}\n\n`;
736
+
737
+ if (interactiveElements.length > 0) {
738
+ output += `Interactive elements (use selector a[href="..."] or text to click):\n`;
739
+ interactiveElements.forEach((el) => {
740
+ if (el.type === "input") {
741
+ output += ` - Input: "${el.label}"\n`;
742
+ } else if (el.href) {
743
+ output += ` - a: "${el.text}" → a[href="${el.href}"]\n`;
744
+ } else {
745
+ output += ` - ${el.type}: "${el.text}"\n`;
746
+ }
747
+ });
748
+ output += "\n";
749
+ }
750
+
751
+ output += `Content:\n${content}`;
752
+
753
+ return output;
754
+ }
755
+
756
+ /**
757
+ * Execute a sequence of browser actions in a single call
758
+ * Reduces latency by batching multiple operations
759
+ * @param {Page} page - Playwright page
760
+ * @param {Array} actions - Array of action objects
761
+ * @returns {Promise<object>} - Results of all actions
762
+ */
763
+ export async function executeActionSequence(page, actions) {
764
+ const results = [];
765
+ let lastError = null;
766
+
767
+ for (let i = 0; i < actions.length; i++) {
768
+ const action = actions[i];
769
+ const actionResult = { action: action.type, index: i, success: false };
770
+
771
+ try {
772
+ switch (action.type) {
773
+ case "click": {
774
+ const element = await findElement(page, action.selector);
775
+ await element.scrollIntoViewIfNeeded();
776
+ const clickOptions = { timeout: 5000 };
777
+ if (action.force) clickOptions.force = true;
778
+
779
+ // Handle waitForNavigation option
780
+ const performClick = async () => {
781
+ try {
782
+ await element.click(clickOptions);
783
+ return { forced: false };
784
+ } catch (clickErr) {
785
+ // Auto-retry with force for overlay, not visible, or outside viewport
786
+ if (!action.force) {
787
+ const retryWithForce =
788
+ clickErr.message.includes("intercepts pointer events") ||
789
+ clickErr.message.includes("element is not visible") ||
790
+ clickErr.message.includes("element is outside the viewport");
791
+ if (retryWithForce) {
792
+ await element.click({ ...clickOptions, force: true });
793
+ return { forced: true };
794
+ }
795
+ }
796
+ throw clickErr;
797
+ }
798
+ };
799
+
800
+ let clickResult;
801
+ if (action.waitForNavigation) {
802
+ // Wait for navigation to complete after click
803
+ const navigationTimeout = action.navigationTimeout || 30000;
804
+ try {
805
+ const [navResponse, result] = await Promise.all([
806
+ page
807
+ .waitForNavigation({
808
+ waitUntil: action.waitUntil || "domcontentloaded",
809
+ timeout: navigationTimeout,
810
+ })
811
+ .catch(() => null), // Don't fail if no navigation occurs
812
+ performClick(),
813
+ ]);
814
+ clickResult = result;
815
+ if (navResponse) {
816
+ actionResult.navigated = true;
817
+ actionResult.newUrl = page.url();
818
+ }
819
+ } catch (navErr) {
820
+ // Navigation may not have occurred, but click might have succeeded
821
+ clickResult = await performClick();
822
+ }
823
+ } else {
824
+ clickResult = await performClick();
825
+ }
826
+
827
+ actionResult.success = true;
828
+ actionResult.message = `Clicked "${action.selector}"${clickResult?.forced ? " (auto-forced)" : ""}${actionResult.navigated ? ` → navigated to ${actionResult.newUrl}` : ""}`;
829
+ break;
830
+ }
831
+
832
+ case "type": {
833
+ const element = await findElement(page, action.selector);
834
+ if (action.clear !== false) {
835
+ await element.clear();
836
+ }
837
+ await element.type(action.text, { delay: action.delay || 0 });
838
+ if (action.submit) {
839
+ await element.press("Enter");
840
+ }
841
+ actionResult.success = true;
842
+ actionResult.message = `Typed "${action.text.substring(0, 20)}${action.text.length > 20 ? "..." : ""}" into "${action.selector}"`;
843
+ break;
844
+ }
845
+
846
+ case "fill": {
847
+ const element = await findElement(page, action.selector);
848
+ await element.fill(action.text);
849
+ if (action.submit) {
850
+ await element.press("Enter");
851
+ }
852
+ actionResult.success = true;
853
+ actionResult.message = `Filled "${action.text.substring(0, 20)}${action.text.length > 20 ? "..." : ""}" into "${action.selector}"`;
854
+ break;
855
+ }
856
+
857
+ case "wait": {
858
+ if (action.selector) {
859
+ // Wait for element
860
+ const element = await findElement(page, action.selector);
861
+ await element.waitFor({
862
+ state: action.state || "visible",
863
+ timeout: action.timeout || 10000,
864
+ });
865
+ actionResult.message = `Waited for "${action.selector}"`;
866
+ } else if (action.timeout || action.ms) {
867
+ // Wait for time
868
+ await page.waitForTimeout(action.timeout || action.ms);
869
+ actionResult.message = `Waited ${action.timeout || action.ms}ms`;
870
+ } else if (action.url) {
871
+ // Wait for URL
872
+ await page.waitForURL(action.url, {
873
+ timeout: action.timeout || 30000,
874
+ });
875
+ actionResult.message = `Waited for URL: ${action.url}`;
876
+ } else if (action.load) {
877
+ // Wait for load state
878
+ await page.waitForLoadState(action.load, {
879
+ timeout: action.timeout || 30000,
880
+ });
881
+ actionResult.message = `Waited for load state: ${action.load}`;
882
+ }
883
+ actionResult.success = true;
884
+ break;
885
+ }
886
+
887
+ case "scroll": {
888
+ if (action.selector) {
889
+ const element = await findElement(page, action.selector);
890
+ await element.scrollIntoViewIfNeeded();
891
+ actionResult.message = `Scrolled to "${action.selector}"`;
892
+ } else {
893
+ const direction = action.direction || "down";
894
+ const amount = action.amount || 500;
895
+ await page.evaluate(
896
+ ({ dir, amt }) => {
897
+ switch (dir) {
898
+ case "down":
899
+ window.scrollBy(0, amt);
900
+ break;
901
+ case "up":
902
+ window.scrollBy(0, -amt);
903
+ break;
904
+ case "bottom":
905
+ window.scrollTo(0, document.body.scrollHeight);
906
+ break;
907
+ case "top":
908
+ window.scrollTo(0, 0);
909
+ break;
910
+ }
911
+ },
912
+ { dir: direction, amt: amount },
913
+ );
914
+ actionResult.message = `Scrolled ${direction} ${amount}px`;
915
+ }
916
+ actionResult.success = true;
917
+ break;
918
+ }
919
+
920
+ case "hover": {
921
+ const element = await findElement(page, action.selector);
922
+ await element.hover();
923
+ actionResult.success = true;
924
+ actionResult.message = `Hovered over "${action.selector}"`;
925
+ break;
926
+ }
927
+
928
+ case "select": {
929
+ const element = await findElement(page, action.selector);
930
+ await element.selectOption(action.value);
931
+ actionResult.success = true;
932
+ actionResult.message = `Selected "${action.value}" in "${action.selector}"`;
933
+ break;
934
+ }
935
+
936
+ case "check": {
937
+ const element = await findElement(page, action.selector);
938
+ if (action.uncheck) {
939
+ await element.uncheck();
940
+ actionResult.message = `Unchecked "${action.selector}"`;
941
+ } else {
942
+ await element.check();
943
+ actionResult.message = `Checked "${action.selector}"`;
944
+ }
945
+ actionResult.success = true;
946
+ break;
947
+ }
948
+
949
+ case "press": {
950
+ if (action.selector) {
951
+ const element = await findElement(page, action.selector);
952
+ await element.press(action.key);
953
+ } else {
954
+ await page.keyboard.press(action.key);
955
+ }
956
+ actionResult.success = true;
957
+ actionResult.message = `Pressed "${action.key}"`;
958
+ break;
959
+ }
960
+
961
+ case "navigate": {
962
+ await page.goto(action.url, {
963
+ waitUntil: action.waitUntil || "domcontentloaded",
964
+ timeout: action.timeout || 30000,
965
+ });
966
+ actionResult.success = true;
967
+ actionResult.message = `Navigated to "${action.url}"`;
968
+ break;
969
+ }
970
+
971
+ case "evaluate": {
972
+ const result = await page.evaluate(action.script);
973
+ actionResult.success = true;
974
+ actionResult.message = "Executed script";
975
+ actionResult.result = result;
976
+ break;
977
+ }
978
+
979
+ case "screenshot": {
980
+ const path = action.path || `screenshot-${Date.now()}.png`;
981
+ await page.screenshot({ path, fullPage: action.fullPage || false });
982
+ actionResult.success = true;
983
+ actionResult.message = `Screenshot saved to ${path}`;
984
+ break;
985
+ }
986
+
987
+ case "read": {
988
+ const content = await extractPageContent(
989
+ page,
990
+ action.selector || null,
991
+ );
992
+ const maxLen = action.maxLength || 8000;
993
+ actionResult.success = true;
994
+ actionResult.message = "Extracted page content";
995
+ actionResult.result =
996
+ content.length > maxLen
997
+ ? content.substring(0, maxLen) + "\n\n... (truncated)"
998
+ : content;
999
+ break;
1000
+ }
1001
+
1002
+ default:
1003
+ actionResult.error = `Unknown action type: ${action.type}`;
1004
+ }
1005
+ } catch (err) {
1006
+ actionResult.error = err.message;
1007
+ lastError = err;
1008
+
1009
+ // If continueOnError is not set, stop the sequence
1010
+ if (!action.continueOnError && !actions[i]?.continueOnError) {
1011
+ results.push(actionResult);
1012
+ break;
1013
+ }
1014
+ }
1015
+
1016
+ results.push(actionResult);
1017
+
1018
+ // Optional delay between actions
1019
+ if (action.delayAfter) {
1020
+ await page.waitForTimeout(action.delayAfter);
1021
+ }
1022
+ }
1023
+
1024
+ // Get final page state
1025
+ const finalState = {
1026
+ url: page.url(),
1027
+ title: await page.title(),
1028
+ };
1029
+
1030
+ return {
1031
+ results,
1032
+ completed: results.filter((r) => r.success).length,
1033
+ failed: results.filter((r) => r.error).length,
1034
+ total: actions.length,
1035
+ finalState,
1036
+ lastError: lastError?.message,
1037
+ };
1038
+ }
1039
+
1040
+ /**
1041
+ * Fetch HTML from a URL using a one-off headless browser (does not use the singleton).
1042
+ * Use for web search SERP pages that require JavaScript.
1043
+ * Uses domcontentloaded + short wait for speed; SERP content is usually in initial DOM.
1044
+ * @param {string} url - Full URL to load
1045
+ * @param {object} options - { waitUntil?, timeout?, waitAfterLoad?, browserConfig? }
1046
+ * @param {object} options.browserConfig - Optional { browserChannel } to use same browser as session
1047
+ * @returns {Promise<string>} - Full page HTML after load
1048
+ */
1049
+ export async function fetchHtmlWithBrowser(url, options = {}) {
1050
+ const {
1051
+ waitUntil = "domcontentloaded",
1052
+ timeout = 12000,
1053
+ waitAfterLoad = 1000,
1054
+ browserConfig = {},
1055
+ } = options;
1056
+ const {
1057
+ headless: _h,
1058
+ channel,
1059
+ args,
1060
+ } = getLaunchOptions({ ...browserConfig, browserHeadless: true });
1061
+ let browser = null;
1062
+ try {
1063
+ const chromium = await getChromium();
1064
+ browser = await chromium.launch({
1065
+ headless: true,
1066
+ channel,
1067
+ args,
1068
+ });
1069
+ const userAgent = getRandomUserAgent();
1070
+ const proxyConfig = getWebSearchProxyConfig();
1071
+ const context = await browser.newContext({
1072
+ userAgent,
1073
+ viewport: { width: 1280, height: 800 },
1074
+ extraHTTPHeaders: BROWSER_HEADERS,
1075
+ javaScriptEnabled: true,
1076
+ ignoreHTTPSErrors: true,
1077
+ locale: "en-US",
1078
+ ...(proxyConfig && { proxy: proxyConfig }),
1079
+ });
1080
+ const page = await context.newPage();
1081
+ page.setDefaultTimeout(timeout);
1082
+ page.setDefaultNavigationTimeout(timeout);
1083
+ await page.addInitScript(() => {
1084
+ Object.defineProperty(navigator, "webdriver", { get: () => undefined });
1085
+ window.chrome = { runtime: {} };
1086
+ });
1087
+ await page.goto(url, { waitUntil, timeout });
1088
+ if (waitAfterLoad > 0) await page.waitForTimeout(waitAfterLoad);
1089
+ const html = await page.content();
1090
+ return html;
1091
+ } finally {
1092
+ if (browser) {
1093
+ try {
1094
+ await browser.close();
1095
+ } catch {}
1096
+ }
1097
+ }
1098
+ }
1099
+
1100
+ export default {
1101
+ getBrowserPage,
1102
+ closeBrowser,
1103
+ getPageSnapshot,
1104
+ isBrowserActive,
1105
+ findElement,
1106
+ extractPageContent,
1107
+ executeActionSequence,
1108
+ fetchHtmlWithBrowser,
1109
+ };