ai-agent-browser 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,229 @@
1
+ """
2
+ Interactive Test Runner - Claude-in-the-Loop
3
+
4
+ This runner takes screenshots and saves them for Claude Code to analyze directly.
5
+ Claude then suggests actions, and you execute them through this script.
6
+
7
+ Usage:
8
+ agent-browser interact http://localhost:5000/financial-journey/quick-start
9
+
10
+ Commands (type in console):
11
+ screenshot / ss - Take a screenshot for Claude to analyze
12
+ click <selector> - Click an element (e.g., click #submitBtn)
13
+ type <sel> <text> - Type text into element (e.g., type #userAge 30)
14
+ fill <field> <val> - Fill a form field by name
15
+ select <sel> <val> - Select dropdown option
16
+ scroll <dir> - Scroll up/down/top/bottom
17
+ wait <ms> - Wait milliseconds
18
+ eval <js> - Execute JavaScript
19
+ url - Print current URL
20
+ quit / q - Exit
21
+
22
+ The workflow:
23
+ 1. Run this script
24
+ 2. Type 'ss' to take a screenshot
25
+ 3. Ask Claude Code to read the screenshot and suggest actions
26
+ 4. Execute the suggested actions
27
+ 5. Repeat until test is complete
28
+ """
29
+
30
+ from datetime import datetime
31
+ from pathlib import Path
32
+ from typing import TYPE_CHECKING, Any, Optional, Union
33
+
34
+ if TYPE_CHECKING:
35
+ from playwright.sync_api import Browser, Page
36
+
37
+ from .utils import PathTraversalError, sanitize_filename, validate_path
38
+
39
+
40
+ class InteractiveRunner:
41
+ def __init__(
42
+ self,
43
+ start_url: str,
44
+ headless: bool = False,
45
+ session_id: str = "default",
46
+ output_dir: Optional[Union[str, Path]] = None,
47
+ ):
48
+ self.start_url = start_url
49
+ self.headless = headless
50
+ self.session_id = sanitize_filename(session_id or "default")
51
+ output_dir_path = Path(output_dir) if output_dir else Path("./screenshots/interactive")
52
+ # Validate output_dir is within CWD to prevent path traversal
53
+ try:
54
+ self.output_dir = validate_path(output_dir_path)
55
+ except PathTraversalError as e:
56
+ raise ValueError(f"Invalid output directory: {e}")
57
+ self.output_dir.mkdir(parents=True, exist_ok=True)
58
+
59
+ self._playwright: Any = None
60
+ self._browser: Optional["Browser"] = None
61
+ self._page: Optional["Page"] = None
62
+ self.screenshot_count = 0
63
+
64
+ def start(self) -> None:
65
+ """Start browser and navigate to URL."""
66
+ from playwright.sync_api import sync_playwright
67
+
68
+ print("Starting browser...")
69
+ self._playwright = sync_playwright().start()
70
+ self._browser = self._playwright.chromium.launch(
71
+ headless=self.headless,
72
+ slow_mo=100,
73
+ )
74
+ context = self._browser.new_context(viewport={"width": 1280, "height": 900})
75
+ self._page = context.new_page()
76
+
77
+ print(f"Navigating to {self.start_url}")
78
+ self._page.goto(self.start_url, wait_until="networkidle")
79
+ print("Ready! Type 'ss' to take a screenshot, 'help' for commands.\n")
80
+
81
+ def stop(self) -> None:
82
+ """Stop browser."""
83
+ if self._browser:
84
+ self._browser.close()
85
+ if self._playwright:
86
+ self._playwright.stop()
87
+ print("Browser closed.")
88
+
89
+ @property
90
+ def page(self) -> "Page":
91
+ if not self._page:
92
+ raise RuntimeError("Browser page not initialized. Call start() first.")
93
+ return self._page
94
+
95
+ def screenshot(self, name: Optional[str] = None) -> str:
96
+ """Take screenshot and return path."""
97
+ self.screenshot_count += 1
98
+ timestamp = datetime.now().strftime("%H%M%S")
99
+ label = sanitize_filename(name) if name else sanitize_filename(timestamp)
100
+ filename = f"step_{self.screenshot_count:02d}_{label}.png"
101
+ filepath = self.output_dir / filename
102
+
103
+ self.page.screenshot(path=str(filepath), full_page=True)
104
+ print(f"\nScreenshot saved: {filepath}")
105
+ print(f"Ask Claude to: Read {filepath}")
106
+ return str(filepath)
107
+
108
+ def execute_command(self, cmd: str) -> bool:
109
+ """Execute a command, return False to quit."""
110
+ parts = cmd.strip().split(maxsplit=2)
111
+ if not parts:
112
+ return True
113
+
114
+ action = parts[0].lower()
115
+
116
+ try:
117
+ if action in ("screenshot", "ss"):
118
+ name = parts[1] if len(parts) > 1 else None
119
+ self.screenshot(name)
120
+
121
+ elif action == "click":
122
+ selector = parts[1]
123
+ self.page.click(selector)
124
+ print(f"Clicked: {selector}")
125
+
126
+ elif action == "type":
127
+ selector = parts[1]
128
+ text = parts[2] if len(parts) > 2 else ""
129
+ self.page.fill(selector, text)
130
+ print(f"Typed '{text}' into {selector}")
131
+
132
+ elif action == "fill":
133
+ field = parts[1]
134
+ value = parts[2] if len(parts) > 2 else ""
135
+ filled = False
136
+ for sel in [f"#{field}", f"[name='{field}']", f"[data-testid='{field}']"]:
137
+ try:
138
+ self.page.fill(sel, value, timeout=1000)
139
+ print(f"Filled {sel} with '{value}'")
140
+ filled = True
141
+ break
142
+ except Exception:
143
+ continue
144
+ if not filled:
145
+ print(f"No matching selector found for field '{field}'")
146
+
147
+ elif action == "select":
148
+ selector = parts[1]
149
+ value = parts[2] if len(parts) > 2 else ""
150
+ self.page.select_option(selector, value)
151
+ print(f"Selected '{value}' in {selector}")
152
+
153
+ elif action == "scroll":
154
+ direction = parts[1] if len(parts) > 1 else "down"
155
+ if direction == "top":
156
+ self.page.evaluate("window.scrollTo(0, 0)")
157
+ elif direction == "bottom":
158
+ self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
159
+ elif direction == "up":
160
+ self.page.evaluate("window.scrollBy(0, -500)")
161
+ else:
162
+ self.page.evaluate("window.scrollBy(0, 500)")
163
+ print(f"Scrolled {direction}")
164
+
165
+ elif action == "wait":
166
+ ms = int(parts[1]) if len(parts) > 1 else 1000
167
+ self.page.wait_for_timeout(ms)
168
+ print(f"Waited {ms}ms")
169
+
170
+ elif action == "eval":
171
+ js = cmd[5:].strip()
172
+ if not js:
173
+ print("Error: JavaScript code required")
174
+ return True
175
+ result = self.page.evaluate(js)
176
+ print(f"Result: {result}")
177
+
178
+ elif action == "url":
179
+ print(f"Current URL: {self.page.url}")
180
+
181
+ elif action == "clear":
182
+ self.page.evaluate("localStorage.clear()")
183
+ print("Cleared localStorage")
184
+
185
+ elif action == "reload":
186
+ self.page.reload(wait_until="networkidle")
187
+ print("Page reloaded")
188
+
189
+ elif action == "back":
190
+ self.page.go_back()
191
+ print("Navigated back")
192
+
193
+ elif action == "goto":
194
+ url = parts[1] if len(parts) > 1 else self.start_url
195
+ self.page.goto(url, wait_until="networkidle")
196
+ print(f"Navigated to {url}")
197
+
198
+ elif action in ("quit", "q", "exit"):
199
+ return False
200
+
201
+ elif action == "help":
202
+ print(__doc__)
203
+
204
+ else:
205
+ print(f"Unknown command: {action}. Type 'help' for available commands.")
206
+
207
+ except Exception as exc:
208
+ print(f"Error: {exc}")
209
+
210
+ return True
211
+
212
+ def run(self) -> None:
213
+ """Start interactive loop, taking an initial screenshot."""
214
+ try:
215
+ self.start()
216
+ self.screenshot("initial")
217
+
218
+ while True:
219
+ try:
220
+ cmd = input("\n> ").strip()
221
+ if not self.execute_command(cmd):
222
+ break
223
+ except KeyboardInterrupt:
224
+ print("\nInterrupted")
225
+ break
226
+ except EOFError:
227
+ break
228
+ finally:
229
+ self.stop()
agent_browser/main.py ADDED
@@ -0,0 +1,191 @@
1
+ """Command-line interface for the agent-browser package."""
2
+
3
+ import argparse
4
+ import io
5
+ import json
6
+ from contextlib import redirect_stdout
7
+ from typing import Optional, Sequence
8
+
9
+ from .driver import BrowserDriver
10
+ from .interactive import InteractiveRunner
11
+ from .utils import configure_windows_console
12
+
13
+ DEFAULT_URL = "http://localhost:8080"
14
+
15
+
16
+ def _derive_status_label(result: str) -> str:
17
+ """Map textual results to a coarse status label for JSON output."""
18
+ normalized = result.strip().lower()
19
+ if normalized.startswith("error"):
20
+ return "ERROR"
21
+ if normalized.startswith("[fail]"):
22
+ return "FAIL"
23
+ if "timeout" in normalized:
24
+ return "TIMEOUT"
25
+ if normalized.startswith("[pass]"):
26
+ return "PASS"
27
+ return "PASS"
28
+
29
+
30
+ def build_parser() -> argparse.ArgumentParser:
31
+ parser = argparse.ArgumentParser(
32
+ prog="agent-browser",
33
+ description="Control a Playwright browser via CLI or interactive runner.",
34
+ )
35
+ parser.add_argument(
36
+ "--session",
37
+ default="default",
38
+ help="Session identifier used for IPC files (default: default).",
39
+ )
40
+ parser.add_argument(
41
+ "--output-dir",
42
+ default=None,
43
+ help="Directory to store screenshots (used when starting a session).",
44
+ )
45
+ parser.add_argument(
46
+ "--json",
47
+ action="store_true",
48
+ help="Return machine-readable JSON for command output.",
49
+ )
50
+
51
+ subparsers = parser.add_subparsers(dest="command", required=True)
52
+
53
+ start_parser = subparsers.add_parser("start", help="Start the headless driver (blocks).")
54
+ start_parser.add_argument(
55
+ "url",
56
+ nargs="?",
57
+ default=DEFAULT_URL,
58
+ help="Initial URL to open.",
59
+ )
60
+ start_parser.add_argument(
61
+ "--visible",
62
+ action="store_true",
63
+ help="Launch browser in headed mode instead of headless.",
64
+ )
65
+
66
+ interact_parser = subparsers.add_parser("interact", help="Start the interactive runner.")
67
+ interact_parser.add_argument(
68
+ "url",
69
+ nargs="?",
70
+ default=DEFAULT_URL,
71
+ help="Initial URL to open.",
72
+ )
73
+ interact_parser.add_argument(
74
+ "--headless",
75
+ action="store_true",
76
+ help="Run the interactive runner in headless mode.",
77
+ )
78
+
79
+ subparsers.add_parser("status", help="Check if the driver is running.")
80
+ subparsers.add_parser("stop", help="Stop the running driver.")
81
+
82
+ cmd_parser = subparsers.add_parser("cmd", help="Send a command to the running driver.")
83
+ cmd_parser.add_argument(
84
+ "cmd_args",
85
+ nargs=argparse.REMAINDER,
86
+ help="Command string to forward (e.g., screenshot home).",
87
+ )
88
+ cmd_parser.add_argument(
89
+ "--timeout",
90
+ type=int,
91
+ default=None,
92
+ help="Seconds to wait for a response (defaults to IPC timeout).",
93
+ )
94
+
95
+ return parser
96
+
97
+
98
+ def run_start(args: argparse.Namespace) -> None:
99
+ driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
100
+ driver.start(args.url, headless=not args.visible)
101
+
102
+
103
+ def run_interact(args: argparse.Namespace) -> None:
104
+ runner_kwargs = {
105
+ "session_id": args.session,
106
+ "output_dir": args.output_dir,
107
+ "headless": args.headless,
108
+ }
109
+ try:
110
+ runner = InteractiveRunner(args.url, **runner_kwargs)
111
+ except NotImplementedError as exc:
112
+ raise SystemExit(str(exc)) from exc
113
+
114
+ try:
115
+ if hasattr(runner, "run"):
116
+ runner.run()
117
+ elif hasattr(runner, "start"):
118
+ runner.start()
119
+ else:
120
+ raise RuntimeError("InteractiveRunner must expose a run() or start() method")
121
+ except NotImplementedError as exc:
122
+ raise SystemExit(str(exc)) from exc
123
+
124
+
125
+ def run_status(args: argparse.Namespace) -> int:
126
+ driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
127
+ if args.json:
128
+ buffer = io.StringIO()
129
+ with redirect_stdout(buffer):
130
+ is_running = driver.status()
131
+ payload = {
132
+ "status": "RUNNING" if is_running else "NOT_RUNNING",
133
+ "result": "running" if is_running else "not_running",
134
+ "details": buffer.getvalue().strip(),
135
+ }
136
+ print(json.dumps(payload))
137
+ return 0 if is_running else 1
138
+
139
+ return 0 if driver.status() else 1
140
+
141
+
142
+ def run_stop(args: argparse.Namespace) -> None:
143
+ driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
144
+ result = driver.stop()
145
+ if args.json:
146
+ payload = {"status": _derive_status_label(result), "result": result}
147
+ print(json.dumps(payload))
148
+ else:
149
+ print(result)
150
+
151
+
152
+ def run_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
153
+ cmd_text = " ".join(args.cmd_args).strip()
154
+ if not cmd_text:
155
+ parser.error("cmd requires a command string (e.g., agent-browser cmd screenshot home)")
156
+
157
+ driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
158
+ result = driver.send_command(cmd_text, timeout=args.timeout)
159
+ if args.json:
160
+ payload = {"status": _derive_status_label(result), "result": result}
161
+ print(json.dumps(payload))
162
+ else:
163
+ print(result)
164
+
165
+
166
+ def main(argv: Optional[Sequence[str]] = None) -> int:
167
+ configure_windows_console()
168
+ parser = build_parser()
169
+ args = parser.parse_args(argv)
170
+
171
+ if args.command == "start":
172
+ run_start(args)
173
+ return 0
174
+ if args.command == "interact":
175
+ run_interact(args)
176
+ return 0
177
+ if args.command == "status":
178
+ return run_status(args)
179
+ if args.command == "stop":
180
+ run_stop(args)
181
+ return 0
182
+ if args.command == "cmd":
183
+ run_cmd(args, parser)
184
+ return 0
185
+
186
+ parser.print_help()
187
+ return 1
188
+
189
+
190
+ if __name__ == "__main__":
191
+ raise SystemExit(main())