ai-agent-browser 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,849 @@
1
+ """
2
+ Browser driver logic for agent-browser.
3
+
4
+ Encapsulates browser automation and IPC handling in a class-based design.
5
+ """
6
+
7
+ HELP_TEXT = """
8
+ agent-browser - Browser automation for AI agents
9
+
10
+ BROWSER CONTROL
11
+ start <url> [--visible] Start browser (blocks - run in separate terminal)
12
+ stop Close browser
13
+ status Check if browser is running
14
+ reload Reload page
15
+ goto <url> Navigate to URL
16
+ back Navigate back
17
+ forward Navigate forward
18
+ url Print current URL
19
+ viewport <w> <h> Set viewport size
20
+
21
+ SCREENSHOTS
22
+ screenshot [name] Full-page screenshot
23
+ screenshot viewport [name] Viewport only (faster)
24
+ ss [name] Alias for screenshot
25
+
26
+ INTERACTIONS
27
+ click <selector> Click element
28
+ click_nth <selector> <n> Click nth element (0-indexed)
29
+ fill <selector> <text> Fill input field
30
+ type <selector> <text> Type with key events
31
+ select <selector> <value> Select dropdown option
32
+ press <key> Press keyboard key (Enter, Tab, etc.)
33
+ scroll <direction> Scroll: up/down/top/bottom/left/right
34
+ hover <selector> Hover over element
35
+ focus <selector> Focus element
36
+ upload <selector> <path> Upload file to input
37
+ dialog <action> [text] Handle dialog: accept, dismiss
38
+ clear Clear localStorage/sessionStorage
39
+
40
+ ASSERTIONS (return [PASS]/[FAIL])
41
+ assert_visible <selector> Element is visible
42
+ assert_hidden <selector> Element is hidden
43
+ assert_text <sel> <text> Element contains text
44
+ assert_text_exact <s> <t> Text matches exactly
45
+ assert_value <sel> <val> Input value matches
46
+ assert_checked <selector> Checkbox is checked
47
+ assert_url <pattern> URL contains pattern
48
+
49
+ DATA EXTRACTION
50
+ text <selector> Get text content
51
+ value <selector> Get input value
52
+ attr <selector> <attr> Get attribute value
53
+ count <selector> Count matching elements
54
+ eval <javascript> Execute JS, return result
55
+ cookies Get all cookies (JSON)
56
+ storage Get localStorage (JSON)
57
+
58
+ DEBUGGING
59
+ console View JS console logs
60
+ network View network requests (with timing)
61
+ network_failed View failed requests only
62
+ clear_logs Clear console/network logs
63
+ wait <ms> Wait milliseconds
64
+ wait_for <selector> [ms] Wait for element (default 10s)
65
+ wait_for_text <text> Wait for text to appear
66
+ help Show this help
67
+ """
68
+
69
+ import json
70
+ import os
71
+ import sys
72
+ import time
73
+ from datetime import datetime
74
+ from pathlib import Path
75
+ from typing import Any, List, Optional, Union
76
+
77
+ from .utils import (
78
+ IPC_TIMEOUT,
79
+ DEFAULT_TIMEOUT,
80
+ WAIT_FOR_TIMEOUT,
81
+ PathTraversalError,
82
+ add_network_request,
83
+ clear_logs,
84
+ clear_state,
85
+ configure_windows_console,
86
+ format_assertion_result,
87
+ get_browser_pid,
88
+ get_command_file,
89
+ get_console_log_file,
90
+ get_console_logs,
91
+ get_network_log_file,
92
+ get_network_logs,
93
+ get_pid_file,
94
+ get_result_file,
95
+ sanitize_filename,
96
+ get_state,
97
+ get_state_file,
98
+ is_process_running,
99
+ resize_screenshot_if_needed,
100
+ save_browser_pid,
101
+ save_console_log,
102
+ save_network_logs,
103
+ save_state,
104
+ validate_path,
105
+ )
106
+
107
+
108
+ class BrowserDriver:
109
+ """Encapsulates Playwright browser automation with IPC command handling."""
110
+
111
+ def __init__(self, session_id: str = "default", output_dir: Optional[Union[str, Path]] = None) -> None:
112
+ self.session_id = sanitize_filename(session_id or "default")
113
+ output_dir_path = Path(output_dir) if output_dir else Path("./screenshots")
114
+ # For output_dir, we trust explicitly provided paths (absolute or relative)
115
+ # Sandbox validation is only for runtime file operations, not initial config
116
+ self.output_dir = output_dir_path.resolve()
117
+ self.output_dir.mkdir(parents=True, exist_ok=True)
118
+
119
+ self.command_file = get_command_file(self.session_id)
120
+ self.result_file = get_result_file(self.session_id)
121
+ self.state_file = get_state_file(self.session_id)
122
+ self.console_log_file = get_console_log_file(self.session_id)
123
+ self.network_log_file = get_network_log_file(self.session_id)
124
+ self.pid_file = get_pid_file(self.session_id)
125
+
126
+ self._command_seq = 0
127
+
128
+ def _update_state_url(self, url: str) -> None:
129
+ state = get_state(self.session_id)
130
+ state["url"] = url
131
+ state["last_update"] = datetime.now().isoformat()
132
+ save_state(self.session_id, state)
133
+
134
+ def _write_result(self, result: str, seq: int) -> None:
135
+ self.result_file.write_text(
136
+ json.dumps(
137
+ {"result": result, "seq": seq, "timestamp": datetime.now().isoformat()}
138
+ )
139
+ )
140
+
141
+ def process_command(self, page, cmd_text: str, step: int, pending_dialog: Optional[List[Any]] = None) -> str:
142
+ """
143
+ Process a single command and return the result.
144
+
145
+ Args:
146
+ page: Playwright page object
147
+ cmd_text: Full command text
148
+ step: Current step number for screenshot naming
149
+ pending_dialog: List containing pending dialog (mutable for closure)
150
+
151
+ Returns:
152
+ Result string (or "__STOP__" to signal shutdown)
153
+ """
154
+ parts = cmd_text.split(maxsplit=2)
155
+ cmd = parts[0].lower()
156
+
157
+ try:
158
+ # BROWSER CONTROL
159
+ if cmd == "stop":
160
+ return "__STOP__"
161
+ if cmd == "ping":
162
+ return f"PONG:{page.url}"
163
+ if cmd == "reload":
164
+ page.reload(wait_until="networkidle")
165
+ return f"Reloaded. URL: {page.url}"
166
+ if cmd == "goto":
167
+ new_url = cmd_text[5:].strip() if len(cmd_text) > 5 else ""
168
+ if not new_url:
169
+ return "Error: URL required"
170
+ page.goto(new_url, wait_until="networkidle")
171
+ return f"Navigated to {page.url}"
172
+ if cmd == "back":
173
+ page.go_back(wait_until="networkidle")
174
+ return f"Navigated back. URL: {page.url}"
175
+ if cmd == "forward":
176
+ page.go_forward(wait_until="networkidle")
177
+ return f"Navigated forward. URL: {page.url}"
178
+ if cmd == "viewport":
179
+ if len(parts) < 3:
180
+ return "Error: Usage: viewport <width> <height>"
181
+ try:
182
+ width = int(parts[1])
183
+ height = int(parts[2])
184
+ except ValueError:
185
+ return "Error: Width and height must be integers"
186
+ page.set_viewport_size({"width": width, "height": height})
187
+ return f"Viewport set to {width}x{height}"
188
+ if cmd == "url":
189
+ return page.url
190
+
191
+ # SCREENSHOTS
192
+ if cmd in ("screenshot", "ss"):
193
+ viewport_only = len(parts) > 1 and parts[1].lower() == "viewport"
194
+ if viewport_only:
195
+ name = parts[2] if len(parts) > 2 else f"step_{step:02d}"
196
+ else:
197
+ name = parts[1] if len(parts) > 1 else f"step_{step:02d}"
198
+ safe_name = sanitize_filename(name)
199
+ filepath = self.output_dir / f"{safe_name}.png"
200
+ page.screenshot(path=str(filepath), full_page=not viewport_only)
201
+ resize_status = resize_screenshot_if_needed(filepath)
202
+ return f"Screenshot: {filepath} [{resize_status}]"
203
+
204
+ # INTERACTION
205
+ if cmd == "click":
206
+ selector = cmd_text[6:].strip()
207
+ if not selector:
208
+ return "Error: Selector required"
209
+ page.click(selector, timeout=DEFAULT_TIMEOUT)
210
+ return f"Clicked: {selector}"
211
+
212
+ if cmd == "click_nth":
213
+ rest = cmd_text[len("click_nth") :].strip()
214
+ if not rest:
215
+ return "Error: Usage: click_nth <selector> <index>"
216
+ try:
217
+ selector, index_str = rest.rsplit(maxsplit=1)
218
+ except ValueError:
219
+ return "Error: Usage: click_nth <selector> <index>"
220
+ try:
221
+ index = int(index_str)
222
+ except ValueError:
223
+ return f"Error: Invalid index '{index_str}'"
224
+ elements = page.locator(selector)
225
+ count = elements.count()
226
+ if index >= count:
227
+ return f"Error: Index {index} out of range (found {count} elements)"
228
+ elements.nth(index).click(timeout=DEFAULT_TIMEOUT)
229
+ return f"Clicked: {selector} [index={index}]"
230
+
231
+ if cmd == "fill":
232
+ if len(parts) < 2:
233
+ return "Error: Usage: fill <selector> <text>"
234
+ selector = parts[1]
235
+ text = parts[2] if len(parts) > 2 else ""
236
+ page.fill(selector, text, timeout=DEFAULT_TIMEOUT)
237
+ return f"Filled: {selector} with '{text}'"
238
+
239
+ if cmd == "type":
240
+ if len(parts) < 2:
241
+ return "Error: Usage: type <selector> <text>"
242
+ selector = parts[1]
243
+ text = parts[2] if len(parts) > 2 else ""
244
+ page.type(selector, text, timeout=DEFAULT_TIMEOUT)
245
+ return f"Typed: '{text}' into {selector}"
246
+
247
+ if cmd == "select":
248
+ if len(parts) < 3:
249
+ return "Error: Usage: select <selector> <value>"
250
+ selector = parts[1]
251
+ value = parts[2]
252
+ page.select_option(selector, value, timeout=DEFAULT_TIMEOUT)
253
+ return f"Selected: '{value}' in {selector}"
254
+
255
+ if cmd == "press":
256
+ key = parts[1] if len(parts) > 1 else "Enter"
257
+ page.keyboard.press(key)
258
+ return f"Pressed: {key}"
259
+
260
+ if cmd == "scroll":
261
+ direction = parts[1].lower() if len(parts) > 1 else "down"
262
+ scroll_map = {
263
+ "top": "window.scrollTo(0, 0)",
264
+ "bottom": "window.scrollTo(0, document.body.scrollHeight)",
265
+ "up": "window.scrollBy(0, -500)",
266
+ "down": "window.scrollBy(0, 500)",
267
+ "left": "window.scrollBy(-500, 0)",
268
+ "right": "window.scrollBy(500, 0)",
269
+ }
270
+ if direction not in scroll_map:
271
+ return (
272
+ f"Error: Invalid direction '{direction}'. "
273
+ "Use: top/bottom/up/down/left/right"
274
+ )
275
+ page.evaluate(scroll_map[direction])
276
+ return f"Scrolled: {direction}"
277
+
278
+ if cmd == "hover":
279
+ selector = cmd_text[6:].strip()
280
+ if not selector:
281
+ return "Error: Selector required"
282
+ page.hover(selector, timeout=DEFAULT_TIMEOUT)
283
+ return f"Hovering: {selector}"
284
+
285
+ if cmd == "focus":
286
+ selector = cmd_text[6:].strip()
287
+ if not selector:
288
+ return "Error: Selector required"
289
+ page.focus(selector, timeout=DEFAULT_TIMEOUT)
290
+ return f"Focused: {selector}"
291
+
292
+ if cmd == "upload":
293
+ if len(parts) < 3:
294
+ return "Error: Usage: upload <selector> <file_path>"
295
+ selector = parts[1]
296
+ file_path = parts[2]
297
+ # Validate file path is within CWD to prevent path traversal
298
+ try:
299
+ validated_path = validate_path(file_path)
300
+ except PathTraversalError:
301
+ return f"Error: Path '{file_path}' escapes current working directory"
302
+ if not validated_path.exists():
303
+ return f"Error: File not found: {file_path}"
304
+ page.set_input_files(selector, str(validated_path), timeout=DEFAULT_TIMEOUT)
305
+ return f"Uploaded: {validated_path} to {selector}"
306
+
307
+ if cmd == "dialog":
308
+ if pending_dialog is None or pending_dialog[0] is None:
309
+ return "No pending dialog"
310
+ dialog = pending_dialog[0]
311
+ action = parts[1].lower() if len(parts) > 1 else "accept"
312
+ if action == "accept":
313
+ prompt_text = parts[2] if len(parts) > 2 else None
314
+ if prompt_text:
315
+ dialog.accept(prompt_text)
316
+ else:
317
+ dialog.accept()
318
+ pending_dialog[0] = None
319
+ return "Dialog accepted"
320
+ if action == "dismiss":
321
+ dialog.dismiss()
322
+ pending_dialog[0] = None
323
+ return "Dialog dismissed"
324
+ return f"Error: Unknown action '{action}'. Use: accept, dismiss"
325
+
326
+ # ASSERTIONS
327
+ if cmd == "assert_visible":
328
+ selector = cmd_text[14:].strip()
329
+ if not selector:
330
+ return format_assertion_result(False, "Selector required")
331
+ try:
332
+ page.wait_for_selector(selector, state="visible", timeout=DEFAULT_TIMEOUT)
333
+ return format_assertion_result(True, f"Element visible: {selector}")
334
+ except Exception:
335
+ return format_assertion_result(False, f"Element NOT visible: {selector}")
336
+
337
+ if cmd == "assert_hidden":
338
+ selector = cmd_text[13:].strip()
339
+ if not selector:
340
+ return format_assertion_result(False, "Selector required")
341
+ try:
342
+ page.wait_for_selector(selector, state="hidden", timeout=DEFAULT_TIMEOUT)
343
+ return format_assertion_result(True, f"Element hidden: {selector}")
344
+ except Exception:
345
+ return format_assertion_result(False, f"Element NOT hidden (still visible): {selector}")
346
+
347
+ if cmd == "assert_text":
348
+ rest = cmd_text[12:].strip()
349
+ space_idx = rest.find(" ")
350
+ if space_idx == -1:
351
+ return format_assertion_result(False, "Usage: assert_text <selector> <text>")
352
+ selector = rest[:space_idx]
353
+ expected = rest[space_idx + 1 :]
354
+ try:
355
+ actual = page.text_content(selector, timeout=DEFAULT_TIMEOUT) or ""
356
+ if expected in actual:
357
+ return format_assertion_result(True, f"Text found: '{expected}' in {selector}")
358
+ return format_assertion_result(
359
+ False, f"Text NOT found: '{expected}' in {selector}. Actual: '{actual[:100]}'"
360
+ )
361
+ except Exception as exc:
362
+ return format_assertion_result(False, f"Error getting text: {exc}")
363
+
364
+ if cmd == "assert_text_exact":
365
+ rest = cmd_text[18:].strip()
366
+ space_idx = rest.find(" ")
367
+ if space_idx == -1:
368
+ return format_assertion_result(False, "Usage: assert_text_exact <selector> <text>")
369
+ selector = rest[:space_idx]
370
+ expected = rest[space_idx + 1 :]
371
+ try:
372
+ actual = page.text_content(selector, timeout=DEFAULT_TIMEOUT) or ""
373
+ if actual.strip() == expected.strip():
374
+ return format_assertion_result(True, f"Text matches exactly: {selector}")
375
+ return format_assertion_result(
376
+ False, f"Text mismatch. Expected: '{expected}', Actual: '{actual[:100]}'"
377
+ )
378
+ except Exception as exc:
379
+ return format_assertion_result(False, f"Error getting text: {exc}")
380
+
381
+ if cmd == "assert_value":
382
+ rest = cmd_text[13:].strip()
383
+ space_idx = rest.find(" ")
384
+ if space_idx == -1:
385
+ return format_assertion_result(False, "Usage: assert_value <selector> <value>")
386
+ selector = rest[:space_idx]
387
+ expected = rest[space_idx + 1 :]
388
+ try:
389
+ actual = page.input_value(selector, timeout=DEFAULT_TIMEOUT)
390
+ if actual == expected:
391
+ return format_assertion_result(True, f"Value matches: {selector}")
392
+ return format_assertion_result(
393
+ False, f"Value mismatch. Expected: '{expected}', Actual: '{actual}'"
394
+ )
395
+ except Exception as exc:
396
+ return format_assertion_result(False, f"Error getting value: {exc}")
397
+
398
+ if cmd == "assert_checked":
399
+ selector = cmd_text[14:].strip()
400
+ if not selector:
401
+ return format_assertion_result(False, "Selector required")
402
+ try:
403
+ is_checked = page.is_checked(selector, timeout=DEFAULT_TIMEOUT)
404
+ if is_checked:
405
+ return format_assertion_result(True, f"Element is checked: {selector}")
406
+ return format_assertion_result(False, f"Element NOT checked: {selector}")
407
+ except Exception as exc:
408
+ return format_assertion_result(False, f"Error checking state: {exc}")
409
+
410
+ if cmd == "assert_url":
411
+ pattern = cmd_text[11:].strip()
412
+ if not pattern:
413
+ return format_assertion_result(False, "URL pattern required")
414
+ current_url = page.url
415
+ if pattern in current_url:
416
+ return format_assertion_result(True, f"URL contains '{pattern}': {current_url}")
417
+ return format_assertion_result(False, f"URL does NOT contain '{pattern}': {current_url}")
418
+
419
+ # DATA EXTRACTION
420
+ if cmd == "text":
421
+ selector = cmd_text[5:].strip()
422
+ if not selector:
423
+ return "Error: Selector required"
424
+ text = page.text_content(selector, timeout=DEFAULT_TIMEOUT)
425
+ return text if text else "(empty)"
426
+
427
+ if cmd == "value":
428
+ selector = cmd_text[6:].strip()
429
+ if not selector:
430
+ return "Error: Selector required"
431
+ value = page.input_value(selector, timeout=DEFAULT_TIMEOUT)
432
+ return value if value else "(empty)"
433
+
434
+ if cmd == "attr":
435
+ if len(parts) < 3:
436
+ return "Error: Usage: attr <selector> <attribute>"
437
+ selector = parts[1]
438
+ attribute = parts[2]
439
+ value = page.get_attribute(selector, attribute, timeout=DEFAULT_TIMEOUT)
440
+ return value if value else "(null)"
441
+
442
+ if cmd == "count":
443
+ selector = cmd_text[6:].strip()
444
+ if not selector:
445
+ return "Error: Selector required"
446
+ count = page.locator(selector).count()
447
+ return str(count)
448
+
449
+ if cmd == "eval":
450
+ js = cmd_text[5:].strip()
451
+ if not js:
452
+ return "Error: JavaScript code required"
453
+ result = page.evaluate(js)
454
+ return str(result)
455
+
456
+ if cmd == "cookies":
457
+ cookies = page.context.cookies()
458
+ return json.dumps(cookies, indent=2)
459
+
460
+ if cmd == "storage":
461
+ storage = page.evaluate("JSON.stringify(localStorage)")
462
+ return storage if storage else "{}"
463
+
464
+ # DEBUGGING
465
+ if cmd == "console":
466
+ logs = get_console_logs(self.session_id)
467
+ if not logs:
468
+ return "No console logs"
469
+ output = []
470
+ for log in logs[-20:]:
471
+ log_type = log.get("type", "log").upper()
472
+ text = log.get("text", "")[:200]
473
+ output.append(f"[{log_type}] {text}")
474
+ return "\n".join(output)
475
+
476
+ if cmd == "network":
477
+ logs = get_network_logs(self.session_id)
478
+ if not logs:
479
+ return "No network requests logged"
480
+ output = []
481
+ sorted_logs = sorted(logs.values(), key=lambda x: x.get("start_time", ""), reverse=True)[:20]
482
+ for log in reversed(sorted_logs):
483
+ method = log.get("method", "?")
484
+ url = log.get("url", "")
485
+ short_url = url[:70] + "..." if len(url) > 70 else url
486
+ status = log.get("status", "pending")
487
+ duration = log.get("duration_ms", "")
488
+ if duration:
489
+ output.append(f"{method} {status} {duration}ms {short_url}")
490
+ else:
491
+ output.append(f"{method} {status} {short_url}")
492
+ return "\n".join(output)
493
+
494
+ if cmd == "network_failed":
495
+ logs = get_network_logs(self.session_id)
496
+ failed = [l for l in logs.values() if l.get("status") == "failed"]
497
+ if not failed:
498
+ return "No failed requests"
499
+ output = []
500
+ for log in failed:
501
+ method = log.get("method", "?")
502
+ url = log.get("url", "")[:80]
503
+ failure = log.get("failure", "unknown")
504
+ output.append(f"{method} {url}\n Failure: {failure}")
505
+ return "\n".join(output)
506
+
507
+ if cmd == "clear_logs":
508
+ clear_logs(self.session_id)
509
+ return "Console and network logs cleared"
510
+
511
+ if cmd == "wait":
512
+ ms = int(parts[1]) if len(parts) > 1 else 1000
513
+ page.wait_for_timeout(ms)
514
+ return f"Waited {ms}ms"
515
+
516
+ if cmd == "wait_for":
517
+ rest = cmd_text[9:].strip()
518
+ parts_wf = rest.split()
519
+ if not parts_wf:
520
+ return "Error: Selector required"
521
+ selector = parts_wf[0]
522
+ timeout_ms = WAIT_FOR_TIMEOUT
523
+ if len(parts_wf) > 1:
524
+ try:
525
+ timeout_ms = int(parts_wf[1])
526
+ except ValueError:
527
+ pass
528
+ try:
529
+ page.wait_for_selector(selector, timeout=timeout_ms)
530
+ return f"Element appeared: {selector}"
531
+ except Exception:
532
+ return f"Timeout waiting for: {selector} (waited {timeout_ms}ms)"
533
+
534
+ if cmd == "wait_for_text":
535
+ text = cmd_text[14:].strip()
536
+ if not text:
537
+ return "Error: Text required"
538
+ try:
539
+ page.wait_for_selector(f"text={text}", timeout=WAIT_FOR_TIMEOUT)
540
+ return f"Text appeared: '{text}'"
541
+ except Exception:
542
+ return f"Timeout waiting for text: '{text}'"
543
+
544
+ if cmd == "help":
545
+ return HELP_TEXT
546
+
547
+ if cmd == "clear":
548
+ page.evaluate("localStorage.clear(); sessionStorage.clear();")
549
+ return "Cleared localStorage and sessionStorage"
550
+
551
+ return f"Unknown command: '{cmd}'. Use 'help' for available commands."
552
+
553
+ except Exception as exc:
554
+ return f"Error: {exc}"
555
+
556
+ def start(self, url: str, headless: bool = True) -> None:
557
+ """
558
+ Start browser session and enter command processing loop.
559
+
560
+ The call blocks while the browser is running; commands are processed
561
+ via IPC files scoped by session_id.
562
+ """
563
+ from playwright.sync_api import sync_playwright
564
+
565
+ self._command_seq = 0
566
+ clear_state(self.session_id)
567
+ clear_logs(self.session_id)
568
+ save_browser_pid(self.session_id)
569
+
570
+ pw = sync_playwright().start()
571
+ browser = pw.chromium.launch(headless=headless, slow_mo=0 if headless else 50)
572
+ context = browser.new_context(viewport={"width": 1280, "height": 900}, storage_state=None)
573
+ page = context.new_page()
574
+
575
+ pending_dialog: List[Optional[Any]] = [None]
576
+
577
+ def handle_dialog(dialog: Any) -> None:
578
+ pending_dialog[0] = dialog
579
+ print(f"[DIALOG] {dialog.type}: {dialog.message}")
580
+
581
+ page.on("dialog", handle_dialog)
582
+
583
+ def handle_console(msg: Any) -> None:
584
+ save_console_log(
585
+ self.session_id,
586
+ {
587
+ "type": msg.type,
588
+ "text": msg.text,
589
+ "timestamp": datetime.now().isoformat(),
590
+ "location": str(msg.location) if msg.location else None,
591
+ },
592
+ )
593
+
594
+ page.on("console", handle_console)
595
+
596
+ def handle_request(request: Any) -> None:
597
+ request_id = str(id(request))
598
+ add_network_request(
599
+ self.session_id,
600
+ request_id,
601
+ {"method": request.method, "url": request.url, "start_time": datetime.now().isoformat(), "status": "pending"},
602
+ )
603
+ request._tracking_id = request_id
604
+
605
+ def handle_response(response: Any) -> None:
606
+ request = response.request
607
+ request_id = getattr(request, "_tracking_id", None)
608
+ if request_id:
609
+ end_time = datetime.now()
610
+ logs = get_network_logs(self.session_id)
611
+ if request_id in logs:
612
+ start_time = logs[request_id].get("start_time", "")
613
+ try:
614
+ start = datetime.fromisoformat(start_time)
615
+ duration_ms = int((end_time - start).total_seconds() * 1000)
616
+ except Exception:
617
+ duration_ms = 0
618
+ logs[request_id].update(
619
+ {
620
+ "status": response.status,
621
+ "status_text": response.status_text,
622
+ "end_time": end_time.isoformat(),
623
+ "duration_ms": duration_ms,
624
+ }
625
+ )
626
+ save_network_logs(self.session_id, logs)
627
+
628
+ def handle_request_failed(request: Any) -> None:
629
+ request_id = getattr(request, "_tracking_id", str(id(request)))
630
+ failure_reason = request.failure if request.failure else "unknown"
631
+ add_network_request(
632
+ self.session_id,
633
+ request_id,
634
+ {
635
+ "method": request.method,
636
+ "url": request.url,
637
+ "status": "failed",
638
+ "failure": failure_reason,
639
+ "end_time": datetime.now().isoformat(),
640
+ },
641
+ )
642
+
643
+ page.on("request", handle_request)
644
+ page.on("response", handle_response)
645
+ page.on("requestfailed", handle_request_failed)
646
+
647
+ page.goto(url, wait_until="networkidle")
648
+
649
+ mode = "headless" if headless else "visible"
650
+ print(f"Browser started ({mode}) at {url}")
651
+ print(f"Current URL: {page.url}")
652
+ print(f"PID: {os.getpid()}")
653
+
654
+ save_state(
655
+ self.session_id,
656
+ {
657
+ "running": True,
658
+ "url": page.url,
659
+ "start_time": datetime.now().isoformat(),
660
+ "last_update": datetime.now().isoformat(),
661
+ "mode": mode,
662
+ "pid": os.getpid(),
663
+ },
664
+ )
665
+
666
+ screenshot_path = self.output_dir / f"{sanitize_filename('step_00_start')}.png"
667
+ page.screenshot(path=str(screenshot_path), full_page=False)
668
+ resize_status = resize_screenshot_if_needed(screenshot_path)
669
+ print(f"Screenshot: {screenshot_path} [{resize_status}]")
670
+
671
+ print("\nBrowser ready. Listening for commands...")
672
+ step = 1
673
+ last_seq = 0
674
+
675
+ try:
676
+ while True:
677
+ try:
678
+ if self.command_file.exists():
679
+ try:
680
+ cmd_data = json.loads(self.command_file.read_text())
681
+ cmd_text = cmd_data.get("cmd", "").strip()
682
+ cmd_seq = cmd_data.get("seq", last_seq + 1)
683
+ except (json.JSONDecodeError, KeyError):
684
+ cmd_text = self.command_file.read_text().strip()
685
+ cmd_seq = last_seq + 1
686
+
687
+ try:
688
+ self.command_file.unlink()
689
+ except OSError:
690
+ pass
691
+
692
+ if not cmd_text:
693
+ continue
694
+
695
+ try:
696
+ result = self.process_command(page, cmd_text, step, pending_dialog)
697
+
698
+ if result == "__STOP__":
699
+ self._write_result("Browser stopped", cmd_seq)
700
+ break
701
+
702
+ if cmd_text.lower().startswith(("goto", "reload", "back", "forward")):
703
+ self._update_state_url(page.url)
704
+
705
+ if result.startswith("Screenshot:"):
706
+ step += 1
707
+
708
+ self._write_result(result, cmd_seq)
709
+ last_seq = cmd_seq
710
+ print(f"[CMD] {cmd_text[:50]}{'...' if len(cmd_text) > 50 else ''}")
711
+ print(f"[OUT] {result[:100]}{'...' if len(result) > 100 else ''}")
712
+ except Exception as exc:
713
+ error_msg = f"Error: {exc}"
714
+ self._write_result(error_msg, cmd_seq)
715
+ print(f"[ERR] {error_msg}")
716
+
717
+ page.wait_for_timeout(100)
718
+
719
+ except KeyboardInterrupt:
720
+ print("\nInterrupted, closing browser...")
721
+ break
722
+ except Exception as exc:
723
+ print(f"Error in main loop: {exc}")
724
+ continue
725
+ finally:
726
+ context.close()
727
+ browser.close()
728
+ pw.stop()
729
+ clear_state(self.session_id)
730
+ print("Browser closed.")
731
+
732
+ def send_command(self, cmd: str, timeout: Optional[int] = None) -> str:
733
+ """
734
+ Send a command to the running browser and wait for result.
735
+
736
+ Args:
737
+ cmd: Command string to send
738
+ timeout: Maximum seconds to wait for result (default: IPC_TIMEOUT)
739
+ """
740
+ if timeout is None:
741
+ timeout = IPC_TIMEOUT
742
+
743
+ state = get_state(self.session_id)
744
+ if not state.get("running"):
745
+ return "Error: Browser not running. Use 'start <url>' first."
746
+
747
+ pid = state.get("pid") or get_browser_pid(self.session_id)
748
+ if pid and not is_process_running(pid):
749
+ clear_state(self.session_id)
750
+ return "Error: Browser process has died. Use 'start <url>' to restart."
751
+
752
+ self._command_seq += 1
753
+ seq = self._command_seq
754
+
755
+ if self.result_file.exists():
756
+ try:
757
+ self.result_file.unlink()
758
+ except OSError:
759
+ pass
760
+
761
+ cmd_data = json.dumps({"cmd": cmd, "seq": seq})
762
+ self.command_file.write_text(cmd_data)
763
+
764
+ for _ in range(timeout * 10):
765
+ if self.result_file.exists():
766
+ try:
767
+ result_data = json.loads(self.result_file.read_text())
768
+ result_seq = result_data.get("seq", 0)
769
+ result = result_data.get("result", "")
770
+ if result_seq == seq:
771
+ try:
772
+ self.result_file.unlink()
773
+ except OSError:
774
+ pass
775
+ return result
776
+ except (json.JSONDecodeError, OSError):
777
+ pass
778
+ time.sleep(0.1)
779
+
780
+ return "Timeout waiting for result. Browser may have crashed - check 'status'."
781
+
782
+ def status(self) -> bool:
783
+ """
784
+ Check if browser is running and print status.
785
+
786
+ Returns:
787
+ True if running, False otherwise.
788
+ """
789
+ state = get_state(self.session_id)
790
+ if not state.get("running"):
791
+ print("Browser: NOT RUNNING")
792
+ print("\nTo start: agent-browser start <url>")
793
+ return False
794
+
795
+ pid = state.get("pid") or get_browser_pid(self.session_id)
796
+ if pid and not is_process_running(pid):
797
+ print("Browser: CRASHED (process not found)")
798
+ print(f"Last known PID: {pid}")
799
+ clear_state(self.session_id)
800
+ print("\nTo restart: agent-browser start <url>")
801
+ return False
802
+
803
+ print(f"Browser: CHECKING (PID: {pid})...")
804
+ result = self.send_command("ping", timeout=3)
805
+
806
+ if result.startswith("PONG:"):
807
+ current_url = result[5:]
808
+ print(f"Browser: RUNNING ({state.get('mode', 'unknown')} mode)")
809
+ print(f"Since: {state.get('start_time', 'unknown')}")
810
+ print(f"Current URL: {current_url}")
811
+ print(f"Last state update: {state.get('last_update', 'unknown')}")
812
+ print("\nLog files:")
813
+ print(f" Console: {self.console_log_file}")
814
+ print(f" Network: {self.network_log_file}")
815
+ print(f" Screenshots: {self.output_dir}")
816
+ return True
817
+
818
+ print("Browser: NOT RESPONDING")
819
+ print(f"PID {pid} exists but browser is not accepting commands")
820
+ print(f"Response: {result}")
821
+ print("\nTry 'stop' then 'start <url>' to restart")
822
+ return False
823
+
824
+ def stop(self) -> str:
825
+ return self.send_command("stop")
826
+
827
+
828
+ def main() -> None:
829
+ """Backward-compatible entry point for direct execution."""
830
+ configure_windows_console()
831
+ driver = BrowserDriver()
832
+ if len(sys.argv) < 2:
833
+ print(__doc__)
834
+ return
835
+ cmd = sys.argv[1].lower()
836
+ if cmd == "status":
837
+ driver.status()
838
+ elif cmd == "start":
839
+ args = [a for a in sys.argv[2:] if not a.startswith("--")]
840
+ headless = "--visible" not in sys.argv
841
+ url = args[0] if args else "http://localhost:8080"
842
+ driver.start(url, headless=headless)
843
+ else:
844
+ result = driver.send_command(" ".join(sys.argv[1:]))
845
+ print(result)
846
+
847
+
848
+ if __name__ == "__main__":
849
+ main()