agent-browser-loop 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/browser.ts ADDED
@@ -0,0 +1,564 @@
1
+ import type { Browser, BrowserContext, Page } from "playwright";
2
+ import { chromium } from "playwright";
3
+ import * as actions from "./actions";
4
+ import { findChromeExecutable } from "./chrome";
5
+ import { log } from "./log";
6
+ import { formatStateText, getState } from "./state";
7
+ import type {
8
+ BrowserConfig,
9
+ BrowserState,
10
+ ClickOptions,
11
+ DumpNetworkOptions,
12
+ DumpStateOptions,
13
+ DumpStateTextOptions,
14
+ GetStateOptions,
15
+ NavigateOptions,
16
+ NetworkEvent,
17
+ TypeOptions,
18
+ } from "./types";
19
+
20
+ export type AgentBrowserOptions = BrowserConfig;
21
+
22
+ /**
23
+ * Main browser automation class
24
+ */
25
+ export class AgentBrowser {
26
+ private browser: Browser | null = null;
27
+ private context: BrowserContext | null = null;
28
+ private page: Page | null = null;
29
+ private config: BrowserConfig;
30
+ private consoleLogs: string[] = [];
31
+ private networkLogs: NetworkEvent[] = [];
32
+ private networkCaptureEnabled = false;
33
+ private networkLogLimit: number;
34
+ private usePersistentContext = false;
35
+ private lastState: BrowserState | null = null;
36
+
37
+ constructor(options: AgentBrowserOptions = {}) {
38
+ this.config = {
39
+ headless: options.headless ?? true,
40
+ executablePath: options.executablePath,
41
+ useSystemChrome: options.useSystemChrome ?? true,
42
+ viewportWidth: options.viewportWidth ?? 1280,
43
+ viewportHeight: options.viewportHeight ?? 720,
44
+ userDataDir: options.userDataDir,
45
+ timeout: options.timeout ?? 30000,
46
+ captureNetwork: options.captureNetwork ?? true,
47
+ networkLogLimit: options.networkLogLimit,
48
+ storageState: options.storageState,
49
+ storageStatePath: options.storageStatePath,
50
+ };
51
+ this.networkLogLimit =
52
+ options.networkLogLimit ?? this.config.networkLogLimit ?? 500;
53
+ }
54
+
55
+ /**
56
+ * Start the browser
57
+ */
58
+ async start(): Promise<void> {
59
+ if (this.browser) {
60
+ throw new Error("Browser already started");
61
+ }
62
+
63
+ const resolvedExecutablePath = this.config.useSystemChrome
64
+ ? this.config.executablePath || findChromeExecutable()
65
+ : undefined;
66
+
67
+ log
68
+ .withMetadata({
69
+ headless: this.config.headless,
70
+ useSystemChrome: this.config.useSystemChrome,
71
+ executablePath: resolvedExecutablePath,
72
+ userDataDir: this.config.userDataDir,
73
+ })
74
+ .debug("Launching browser");
75
+
76
+ if (this.config.userDataDir) {
77
+ this.usePersistentContext = true;
78
+ const launchOptions = {
79
+ headless: this.config.headless,
80
+ executablePath: resolvedExecutablePath,
81
+ viewport: {
82
+ width: this.config.viewportWidth!,
83
+ height: this.config.viewportHeight!,
84
+ },
85
+ timeout: this.config.timeout,
86
+ };
87
+ try {
88
+ this.context = await chromium.launchPersistentContext(
89
+ this.config.userDataDir,
90
+ launchOptions,
91
+ );
92
+ } catch (error) {
93
+ log
94
+ .withError(error)
95
+ .warn("Persistent context launch failed, retrying without path");
96
+ if (!resolvedExecutablePath) {
97
+ throw error;
98
+ }
99
+ this.context = await chromium.launchPersistentContext(
100
+ this.config.userDataDir,
101
+ { ...launchOptions, executablePath: undefined },
102
+ );
103
+ }
104
+ this.browser = this.context.browser();
105
+ this.page = this.context.pages()[0] ?? (await this.context.newPage());
106
+ } else {
107
+ try {
108
+ this.browser = await chromium.launch({
109
+ headless: this.config.headless,
110
+ executablePath: resolvedExecutablePath,
111
+ });
112
+ } catch (error) {
113
+ log
114
+ .withError(error)
115
+ .warn("Browser launch failed, retrying without path");
116
+ if (!resolvedExecutablePath) {
117
+ throw error;
118
+ }
119
+ this.browser = await chromium.launch({
120
+ headless: this.config.headless,
121
+ executablePath: undefined,
122
+ });
123
+ }
124
+
125
+ this.context = await this.browser.newContext({
126
+ viewport: {
127
+ width: this.config.viewportWidth!,
128
+ height: this.config.viewportHeight!,
129
+ },
130
+ storageState: this.config.storageStatePath ?? this.config.storageState,
131
+ });
132
+
133
+ this.page = await this.context.newPage();
134
+ }
135
+
136
+ this.page.setDefaultTimeout(this.config.timeout!);
137
+
138
+ // Set up console capture
139
+ this.consoleLogs = actions.setupConsoleCapture(this.page);
140
+
141
+ if (this.config.captureNetwork) {
142
+ this.enableNetworkCapture();
143
+ }
144
+ }
145
+
146
+ /**
147
+ * Stop the browser
148
+ */
149
+ async stop(): Promise<void> {
150
+ if (this.context) {
151
+ await this.context.close();
152
+ }
153
+ if (this.browser && !this.usePersistentContext) {
154
+ await this.browser.close();
155
+ }
156
+ this.browser = null;
157
+ this.context = null;
158
+ this.page = null;
159
+ this.consoleLogs = [];
160
+ this.networkLogs = [];
161
+ this.networkCaptureEnabled = false;
162
+ this.usePersistentContext = false;
163
+ }
164
+
165
+ /**
166
+ * Get current page (throws if not started)
167
+ */
168
+ private getPage(): Page {
169
+ if (!this.page) {
170
+ throw new Error("Browser not started. Call start() first.");
171
+ }
172
+ return this.page;
173
+ }
174
+
175
+ /**
176
+ * Get current context (throws if not started)
177
+ */
178
+ private getContext(): BrowserContext {
179
+ if (!this.context) {
180
+ throw new Error("Browser not started. Call start() first.");
181
+ }
182
+ return this.context;
183
+ }
184
+
185
+ /**
186
+ * Navigate to a URL
187
+ */
188
+ async navigate(
189
+ url: string,
190
+ options?: Omit<NavigateOptions, "url">,
191
+ ): Promise<void> {
192
+ await actions.navigate(this.getPage(), { url, ...options });
193
+ }
194
+
195
+ /**
196
+ * Get rich state of the current page
197
+ * Also injects data-ref attributes for element targeting
198
+ */
199
+ async getState(options?: GetStateOptions): Promise<BrowserState> {
200
+ // getState now handles ref injection internally
201
+ const state = await getState(this.getPage(), this.getContext(), options);
202
+ const result = {
203
+ ...state,
204
+ errors: {
205
+ console: this.getConsoleErrors(),
206
+ network: this.getNetworkErrors(),
207
+ },
208
+ };
209
+ this.lastState = result;
210
+ return result;
211
+ }
212
+
213
+ /**
214
+ * Get the last cached state (non-blocking)
215
+ * Returns null if getState() hasn't been called yet
216
+ */
217
+ getLastState(): BrowserState | null {
218
+ return this.lastState;
219
+ }
220
+
221
+ /**
222
+ * Dump current state to a JSON file
223
+ */
224
+ async dumpState(options: DumpStateOptions): Promise<void> {
225
+ const state = await this.getState(options.state);
226
+ const pretty = options.pretty ?? true;
227
+ const json = JSON.stringify(state, null, pretty ? 2 : undefined);
228
+ await Bun.write(options.path, json);
229
+ }
230
+
231
+ /**
232
+ * Dump current state text to a file
233
+ */
234
+ async dumpStateText(options: DumpStateTextOptions): Promise<void> {
235
+ const state = await this.getState(options.state);
236
+ const text = formatStateText(state);
237
+ await Bun.write(options.path, text);
238
+ }
239
+
240
+ /**
241
+ * Dump network logs to a file
242
+ */
243
+ async dumpNetworkLogs(options: DumpNetworkOptions): Promise<void> {
244
+ const pretty = options.pretty ?? true;
245
+ const json = JSON.stringify(
246
+ this.getNetworkLogs(),
247
+ null,
248
+ pretty ? 2 : undefined,
249
+ );
250
+ await Bun.write(options.path, json);
251
+ }
252
+
253
+ /**
254
+ * Click an element
255
+ */
256
+ async click(options: ClickOptions): Promise<void> {
257
+ await actions.click(this.getPage(), options);
258
+ }
259
+
260
+ /**
261
+ * Type text into an element
262
+ */
263
+ async type(options: TypeOptions): Promise<void> {
264
+ await actions.type(this.getPage(), options);
265
+ }
266
+
267
+ /**
268
+ * Press a keyboard key
269
+ */
270
+ async press(key: string): Promise<void> {
271
+ await actions.press(this.getPage(), key);
272
+ }
273
+
274
+ /**
275
+ * Scroll the page
276
+ */
277
+ async scroll(direction: "up" | "down", amount?: number): Promise<void> {
278
+ await actions.scroll(this.getPage(), direction, amount);
279
+ }
280
+
281
+ /**
282
+ * Wait for navigation to complete
283
+ */
284
+ async waitForNavigation(options?: { timeoutMs?: number }): Promise<void> {
285
+ await actions.waitForNavigation(this.getPage(), options);
286
+ }
287
+
288
+ /**
289
+ * Wait for an element
290
+ */
291
+ async waitForElement(
292
+ selector: string,
293
+ options?: { timeoutMs?: number; state?: "attached" | "visible" },
294
+ ): Promise<void> {
295
+ await actions.waitForElement(this.getPage(), selector, options);
296
+ }
297
+
298
+ /**
299
+ * Wait for simple conditions (selector/text/url) with optional abort support
300
+ */
301
+ async waitFor(params: {
302
+ selector?: string;
303
+ text?: string;
304
+ url?: string;
305
+ notSelector?: string;
306
+ notText?: string;
307
+ timeoutMs?: number;
308
+ intervalMs?: number;
309
+ signal?: AbortSignal;
310
+ }): Promise<void> {
311
+ const {
312
+ selector,
313
+ text,
314
+ url,
315
+ notSelector,
316
+ notText,
317
+ timeoutMs,
318
+ intervalMs,
319
+ signal,
320
+ } = params;
321
+ if (!selector && !text && !url && !notSelector && !notText) {
322
+ throw new Error("Wait condition required");
323
+ }
324
+
325
+ const timeout = timeoutMs ?? this.config.timeout ?? 30000;
326
+ const interval = intervalMs ?? 200;
327
+ const page = this.getPage();
328
+ const start = Date.now();
329
+ let aborted = false;
330
+ const onAbort = () => {
331
+ aborted = true;
332
+ };
333
+
334
+ if (signal) {
335
+ if (signal.aborted) {
336
+ throw new Error("Request aborted");
337
+ }
338
+ signal.addEventListener("abort", onAbort, { once: true });
339
+ }
340
+
341
+ try {
342
+ while (true) {
343
+ if (aborted) {
344
+ throw new Error("Request aborted");
345
+ }
346
+ if (Date.now() - start > timeout) {
347
+ throw new Error(`Wait timed out after ${timeout}ms`);
348
+ }
349
+
350
+ const matched = await page.evaluate(
351
+ ({ selector, text, url, notSelector, notText }) => {
352
+ const isVisible = (target: string) => {
353
+ try {
354
+ const el = document.querySelector(target);
355
+ if (!el) {
356
+ return false;
357
+ }
358
+ const style = window.getComputedStyle(el);
359
+ const rect = el.getBoundingClientRect();
360
+ return (
361
+ style.display !== "none" &&
362
+ style.visibility !== "hidden" &&
363
+ (rect.width > 0 || rect.height > 0)
364
+ );
365
+ } catch {
366
+ return false;
367
+ }
368
+ };
369
+
370
+ const selectorMatches = selector ? isVisible(selector) : true;
371
+ const notSelectorMatches = notSelector
372
+ ? !isVisible(notSelector)
373
+ : true;
374
+ const bodyText = document.body?.innerText ?? "";
375
+ const textMatches = text ? bodyText.includes(text) : true;
376
+ const notTextMatches = notText ? !bodyText.includes(notText) : true;
377
+ const urlMatches = url ? window.location.href.includes(url) : true;
378
+ return (
379
+ selectorMatches &&
380
+ notSelectorMatches &&
381
+ textMatches &&
382
+ notTextMatches &&
383
+ urlMatches
384
+ );
385
+ },
386
+ { selector, text, url, notSelector, notText },
387
+ );
388
+
389
+ if (matched) {
390
+ return;
391
+ }
392
+
393
+ await page.waitForTimeout(interval);
394
+ }
395
+ } finally {
396
+ if (signal) {
397
+ signal.removeEventListener("abort", onAbort);
398
+ }
399
+ }
400
+ }
401
+
402
+ /**
403
+ * Wait for text to appear in the document body
404
+ */
405
+ async waitForText(
406
+ text: string,
407
+ options?: { timeoutMs?: number },
408
+ ): Promise<void> {
409
+ await this.getPage().waitForFunction(
410
+ (value) => document.body?.innerText?.includes(value) ?? false,
411
+ text,
412
+ { timeout: options?.timeoutMs ?? 30000 },
413
+ );
414
+ }
415
+
416
+ /**
417
+ * Wait for the URL to match
418
+ */
419
+ async waitForUrl(
420
+ url: string,
421
+ options?: { timeoutMs?: number },
422
+ ): Promise<void> {
423
+ await this.getPage().waitForURL(url, {
424
+ timeout: options?.timeoutMs ?? 30000,
425
+ });
426
+ }
427
+
428
+ /**
429
+ * Hover over an element
430
+ */
431
+ async hover(options: { ref?: string; index?: number }): Promise<void> {
432
+ await actions.hover(this.getPage(), options);
433
+ }
434
+
435
+ /**
436
+ * Select from a dropdown
437
+ */
438
+ async select(options: {
439
+ ref?: string;
440
+ index?: number;
441
+ value: string | string[];
442
+ }): Promise<void> {
443
+ await actions.select(this.getPage(), options);
444
+ }
445
+
446
+ /**
447
+ * Take a screenshot
448
+ */
449
+ async screenshot(options?: {
450
+ fullPage?: boolean;
451
+ path?: string;
452
+ }): Promise<string> {
453
+ return actions.screenshot(this.getPage(), options);
454
+ }
455
+
456
+ /**
457
+ * Get captured console logs
458
+ */
459
+ getConsoleLogs(): string[] {
460
+ return [...this.consoleLogs];
461
+ }
462
+
463
+ /**
464
+ * Clear captured console logs
465
+ */
466
+ clearConsoleLogs(): void {
467
+ this.consoleLogs.length = 0;
468
+ }
469
+
470
+ /**
471
+ * Get recent console errors/warnings
472
+ */
473
+ getConsoleErrors(): string[] {
474
+ return this.consoleLogs.filter((entry) =>
475
+ /^\[(error|warning)\]/i.test(entry),
476
+ );
477
+ }
478
+
479
+ /**
480
+ * Get captured network logs
481
+ */
482
+ getNetworkLogs(): NetworkEvent[] {
483
+ return [...this.networkLogs];
484
+ }
485
+
486
+ /**
487
+ * Clear captured network logs
488
+ */
489
+ clearNetworkLogs(): void {
490
+ this.networkLogs.length = 0;
491
+ }
492
+
493
+ /**
494
+ * Get recent network errors (failed requests or HTTP 4xx/5xx)
495
+ */
496
+ getNetworkErrors(): NetworkEvent[] {
497
+ return this.networkLogs.filter((event) => {
498
+ if (event.type === "failed") {
499
+ return true;
500
+ }
501
+ if (event.status && event.status >= 400) {
502
+ return true;
503
+ }
504
+ if (event.ok === false) {
505
+ return true;
506
+ }
507
+ return false;
508
+ });
509
+ }
510
+
511
+ /**
512
+ * Enable network capture
513
+ */
514
+ enableNetworkCapture(): void {
515
+ if (this.networkCaptureEnabled) {
516
+ return;
517
+ }
518
+ const page = this.getPage();
519
+ this.networkCaptureEnabled = true;
520
+ actions.setupNetworkCapture(page, this.networkLogs, this.networkLogLimit);
521
+ }
522
+
523
+ /**
524
+ * Get the underlying Playwright page for advanced usage
525
+ */
526
+ get rawPage(): Page {
527
+ return this.getPage();
528
+ }
529
+
530
+ /**
531
+ * Get the underlying Playwright context for advanced usage
532
+ */
533
+ get rawContext(): BrowserContext {
534
+ return this.getContext();
535
+ }
536
+
537
+ /**
538
+ * Get the underlying Playwright browser for advanced usage
539
+ */
540
+ get rawBrowser(): Browser {
541
+ if (!this.browser) {
542
+ throw new Error("Browser not started. Call start() first.");
543
+ }
544
+ return this.browser;
545
+ }
546
+
547
+ /**
548
+ * Save storage state to a file (and return the state)
549
+ */
550
+ async saveStorageState(path?: string): Promise<unknown> {
551
+ const state = await this.getContext().storageState();
552
+ if (path) {
553
+ await Bun.write(path, JSON.stringify(state, null, 2));
554
+ }
555
+ return state;
556
+ }
557
+ }
558
+
559
+ /**
560
+ * Create a browser instance
561
+ */
562
+ export function createBrowser(options?: AgentBrowserOptions): AgentBrowser {
563
+ return new AgentBrowser(options);
564
+ }
package/src/chrome.ts ADDED
@@ -0,0 +1,45 @@
1
+ import { existsSync } from "node:fs";
2
+ import { platform } from "node:os";
3
+
4
+ /**
5
+ * Find Chrome/Chromium executable path based on platform
6
+ */
7
+ export function findChromeExecutable(): string | undefined {
8
+ const os = platform();
9
+
10
+ if (os === "darwin") {
11
+ // macOS - prefer Chromium/Canary over Chrome (easier to distinguish)
12
+ const macPaths = [
13
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
14
+ "/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
15
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
16
+ `${process.env.HOME}/Applications/Chromium.app/Contents/MacOS/Chromium`,
17
+ `${process.env.HOME}/Applications/Google Chrome.app/Contents/MacOS/Google Chrome`,
18
+ ];
19
+ for (const p of macPaths) {
20
+ if (existsSync(p)) return p;
21
+ }
22
+ } else if (os === "linux") {
23
+ const linuxPaths = [
24
+ "/usr/bin/chromium",
25
+ "/usr/bin/chromium-browser",
26
+ "/usr/bin/google-chrome",
27
+ "/usr/bin/google-chrome-stable",
28
+ "/snap/bin/chromium",
29
+ ];
30
+ for (const p of linuxPaths) {
31
+ if (existsSync(p)) return p;
32
+ }
33
+ } else if (os === "win32") {
34
+ const winPaths = [
35
+ `${process.env.LOCALAPPDATA}\\Google\\Chrome\\Application\\chrome.exe`,
36
+ `${process.env.PROGRAMFILES}\\Google\\Chrome\\Application\\chrome.exe`,
37
+ `${process.env["PROGRAMFILES(X86)"]}\\Google\\Chrome\\Application\\chrome.exe`,
38
+ ];
39
+ for (const p of winPaths) {
40
+ if (p && existsSync(p)) return p;
41
+ }
42
+ }
43
+
44
+ return undefined;
45
+ }