PyPI - ai-agent-browser - Versions diffs - 0.1.3__py3-none-any.whl - Mend

ai-agent-browser 0.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

agent_browser/__init__.py +13 -0
agent_browser/driver.py +849 -0
agent_browser/interactive.py +229 -0
agent_browser/main.py +191 -0
agent_browser/utils.py +365 -0
ai_agent_browser-0.1.3.dist-info/METADATA +389 -0
ai_agent_browser-0.1.3.dist-info/RECORD +11 -0
ai_agent_browser-0.1.3.dist-info/WHEEL +5 -0
ai_agent_browser-0.1.3.dist-info/entry_points.txt +2 -0
ai_agent_browser-0.1.3.dist-info/licenses/LICENSE +21 -0
ai_agent_browser-0.1.3.dist-info/top_level.txt +1 -0

agent_browser/interactive.py ADDED Viewed

@@ -0,0 +1,229 @@
+"""
+Interactive Test Runner - Claude-in-the-Loop
+This runner takes screenshots and saves them for Claude Code to analyze directly.
+Claude then suggests actions, and you execute them through this script.
+Usage:
+    agent-browser interact http://localhost:5000/financial-journey/quick-start
+Commands (type in console):
+    screenshot / ss    - Take a screenshot for Claude to analyze
+    click <selector>   - Click an element (e.g., click #submitBtn)
+    type <sel> <text>  - Type text into element (e.g., type #userAge 30)
+    fill <field> <val> - Fill a form field by name
+    select <sel> <val> - Select dropdown option
+    scroll <dir>       - Scroll up/down/top/bottom
+    wait <ms>          - Wait milliseconds
+    eval <js>          - Execute JavaScript
+    url                - Print current URL
+    quit / q           - Exit
+The workflow:
+1. Run this script
+2. Type 'ss' to take a screenshot
+3. Ask Claude Code to read the screenshot and suggest actions
+4. Execute the suggested actions
+5. Repeat until test is complete
+"""
+from datetime import datetime
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Optional, Union
+if TYPE_CHECKING:
+    from playwright.sync_api import Browser, Page
+from .utils import PathTraversalError, sanitize_filename, validate_path
+class InteractiveRunner:
+    def __init__(
+        self,
+        start_url: str,
+        headless: bool = False,
+        session_id: str = "default",
+        output_dir: Optional[Union[str, Path]] = None,
+    ):
+        self.start_url = start_url
+        self.headless = headless
+        self.session_id = sanitize_filename(session_id or "default")
+        output_dir_path = Path(output_dir) if output_dir else Path("./screenshots/interactive")
+        # Validate output_dir is within CWD to prevent path traversal
+        try:
+            self.output_dir = validate_path(output_dir_path)
+        except PathTraversalError as e:
+            raise ValueError(f"Invalid output directory: {e}")
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        self._playwright: Any = None
+        self._browser: Optional["Browser"] = None
+        self._page: Optional["Page"] = None
+        self.screenshot_count = 0
+    def start(self) -> None:
+        """Start browser and navigate to URL."""
+        from playwright.sync_api import sync_playwright
+        print("Starting browser...")
+        self._playwright = sync_playwright().start()
+        self._browser = self._playwright.chromium.launch(
+            headless=self.headless,
+            slow_mo=100,
+        )
+        context = self._browser.new_context(viewport={"width": 1280, "height": 900})
+        self._page = context.new_page()
+        print(f"Navigating to {self.start_url}")
+        self._page.goto(self.start_url, wait_until="networkidle")
+        print("Ready! Type 'ss' to take a screenshot, 'help' for commands.\n")
+    def stop(self) -> None:
+        """Stop browser."""
+        if self._browser:
+            self._browser.close()
+        if self._playwright:
+            self._playwright.stop()
+        print("Browser closed.")
+    @property
+    def page(self) -> "Page":
+        if not self._page:
+            raise RuntimeError("Browser page not initialized. Call start() first.")
+        return self._page
+    def screenshot(self, name: Optional[str] = None) -> str:
+        """Take screenshot and return path."""
+        self.screenshot_count += 1
+        timestamp = datetime.now().strftime("%H%M%S")
+        label = sanitize_filename(name) if name else sanitize_filename(timestamp)
+        filename = f"step_{self.screenshot_count:02d}_{label}.png"
+        filepath = self.output_dir / filename
+        self.page.screenshot(path=str(filepath), full_page=True)
+        print(f"\nScreenshot saved: {filepath}")
+        print(f"Ask Claude to: Read {filepath}")
+        return str(filepath)
+    def execute_command(self, cmd: str) -> bool:
+        """Execute a command, return False to quit."""
+        parts = cmd.strip().split(maxsplit=2)
+        if not parts:
+            return True
+        action = parts[0].lower()
+        try:
+            if action in ("screenshot", "ss"):
+                name = parts[1] if len(parts) > 1 else None
+                self.screenshot(name)
+            elif action == "click":
+                selector = parts[1]
+                self.page.click(selector)
+                print(f"Clicked: {selector}")
+            elif action == "type":
+                selector = parts[1]
+                text = parts[2] if len(parts) > 2 else ""
+                self.page.fill(selector, text)
+                print(f"Typed '{text}' into {selector}")
+            elif action == "fill":
+                field = parts[1]
+                value = parts[2] if len(parts) > 2 else ""
+                filled = False
+                for sel in [f"#{field}", f"[name='{field}']", f"[data-testid='{field}']"]:
+                    try:
+                        self.page.fill(sel, value, timeout=1000)
+                        print(f"Filled {sel} with '{value}'")
+                        filled = True
+                        break
+                    except Exception:
+                        continue
+                if not filled:
+                    print(f"No matching selector found for field '{field}'")
+            elif action == "select":
+                selector = parts[1]
+                value = parts[2] if len(parts) > 2 else ""
+                self.page.select_option(selector, value)
+                print(f"Selected '{value}' in {selector}")
+            elif action == "scroll":
+                direction = parts[1] if len(parts) > 1 else "down"
+                if direction == "top":
+                    self.page.evaluate("window.scrollTo(0, 0)")
+                elif direction == "bottom":
+                    self.page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+                elif direction == "up":
+                    self.page.evaluate("window.scrollBy(0, -500)")
+                else:
+                    self.page.evaluate("window.scrollBy(0, 500)")
+                print(f"Scrolled {direction}")
+            elif action == "wait":
+                ms = int(parts[1]) if len(parts) > 1 else 1000
+                self.page.wait_for_timeout(ms)
+                print(f"Waited {ms}ms")
+            elif action == "eval":
+                js = cmd[5:].strip()
+                if not js:
+                    print("Error: JavaScript code required")
+                    return True
+                result = self.page.evaluate(js)
+                print(f"Result: {result}")
+            elif action == "url":
+                print(f"Current URL: {self.page.url}")
+            elif action == "clear":
+                self.page.evaluate("localStorage.clear()")
+                print("Cleared localStorage")
+            elif action == "reload":
+                self.page.reload(wait_until="networkidle")
+                print("Page reloaded")
+            elif action == "back":
+                self.page.go_back()
+                print("Navigated back")
+            elif action == "goto":
+                url = parts[1] if len(parts) > 1 else self.start_url
+                self.page.goto(url, wait_until="networkidle")
+                print(f"Navigated to {url}")
+            elif action in ("quit", "q", "exit"):
+                return False
+            elif action == "help":
+                print(__doc__)
+            else:
+                print(f"Unknown command: {action}. Type 'help' for available commands.")
+        except Exception as exc:
+            print(f"Error: {exc}")
+        return True
+    def run(self) -> None:
+        """Start interactive loop, taking an initial screenshot."""
+        try:
+            self.start()
+            self.screenshot("initial")
+            while True:
+                try:
+                    cmd = input("\n> ").strip()
+                    if not self.execute_command(cmd):
+                        break
+                except KeyboardInterrupt:
+                    print("\nInterrupted")
+                    break
+                except EOFError:
+                    break
+        finally:
+            self.stop()

agent_browser/main.py ADDED Viewed

@@ -0,0 +1,191 @@
+"""Command-line interface for the agent-browser package."""
+import argparse
+import io
+import json
+from contextlib import redirect_stdout
+from typing import Optional, Sequence
+from .driver import BrowserDriver
+from .interactive import InteractiveRunner
+from .utils import configure_windows_console
+DEFAULT_URL = "http://localhost:8080"
+def _derive_status_label(result: str) -> str:
+    """Map textual results to a coarse status label for JSON output."""
+    normalized = result.strip().lower()
+    if normalized.startswith("error"):
+        return "ERROR"
+    if normalized.startswith("[fail]"):
+        return "FAIL"
+    if "timeout" in normalized:
+        return "TIMEOUT"
+    if normalized.startswith("[pass]"):
+        return "PASS"
+    return "PASS"
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(
+        prog="agent-browser",
+        description="Control a Playwright browser via CLI or interactive runner.",
+    )
+    parser.add_argument(
+        "--session",
+        default="default",
+        help="Session identifier used for IPC files (default: default).",
+    )
+    parser.add_argument(
+        "--output-dir",
+        default=None,
+        help="Directory to store screenshots (used when starting a session).",
+    )
+    parser.add_argument(
+        "--json",
+        action="store_true",
+        help="Return machine-readable JSON for command output.",
+    )
+    subparsers = parser.add_subparsers(dest="command", required=True)
+    start_parser = subparsers.add_parser("start", help="Start the headless driver (blocks).")
+    start_parser.add_argument(
+        "url",
+        nargs="?",
+        default=DEFAULT_URL,
+        help="Initial URL to open.",
+    )
+    start_parser.add_argument(
+        "--visible",
+        action="store_true",
+        help="Launch browser in headed mode instead of headless.",
+    )
+    interact_parser = subparsers.add_parser("interact", help="Start the interactive runner.")
+    interact_parser.add_argument(
+        "url",
+        nargs="?",
+        default=DEFAULT_URL,
+        help="Initial URL to open.",
+    )
+    interact_parser.add_argument(
+        "--headless",
+        action="store_true",
+        help="Run the interactive runner in headless mode.",
+    )
+    subparsers.add_parser("status", help="Check if the driver is running.")
+    subparsers.add_parser("stop", help="Stop the running driver.")
+    cmd_parser = subparsers.add_parser("cmd", help="Send a command to the running driver.")
+    cmd_parser.add_argument(
+        "cmd_args",
+        nargs=argparse.REMAINDER,
+        help="Command string to forward (e.g., screenshot home).",
+    )
+    cmd_parser.add_argument(
+        "--timeout",
+        type=int,
+        default=None,
+        help="Seconds to wait for a response (defaults to IPC timeout).",
+    )
+    return parser
+def run_start(args: argparse.Namespace) -> None:
+    driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
+    driver.start(args.url, headless=not args.visible)
+def run_interact(args: argparse.Namespace) -> None:
+    runner_kwargs = {
+        "session_id": args.session,
+        "output_dir": args.output_dir,
+        "headless": args.headless,
+    }
+    try:
+        runner = InteractiveRunner(args.url, **runner_kwargs)
+    except NotImplementedError as exc:
+        raise SystemExit(str(exc)) from exc
+    try:
+        if hasattr(runner, "run"):
+            runner.run()
+        elif hasattr(runner, "start"):
+            runner.start()
+        else:
+            raise RuntimeError("InteractiveRunner must expose a run() or start() method")
+    except NotImplementedError as exc:
+        raise SystemExit(str(exc)) from exc
+def run_status(args: argparse.Namespace) -> int:
+    driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
+    if args.json:
+        buffer = io.StringIO()
+        with redirect_stdout(buffer):
+            is_running = driver.status()
+        payload = {
+            "status": "RUNNING" if is_running else "NOT_RUNNING",
+            "result": "running" if is_running else "not_running",
+            "details": buffer.getvalue().strip(),
+        }
+        print(json.dumps(payload))
+        return 0 if is_running else 1
+    return 0 if driver.status() else 1
+def run_stop(args: argparse.Namespace) -> None:
+    driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
+    result = driver.stop()
+    if args.json:
+        payload = {"status": _derive_status_label(result), "result": result}
+        print(json.dumps(payload))
+    else:
+        print(result)
+def run_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser) -> None:
+    cmd_text = " ".join(args.cmd_args).strip()
+    if not cmd_text:
+        parser.error("cmd requires a command string (e.g., agent-browser cmd screenshot home)")
+    driver = BrowserDriver(session_id=args.session, output_dir=args.output_dir)
+    result = driver.send_command(cmd_text, timeout=args.timeout)
+    if args.json:
+        payload = {"status": _derive_status_label(result), "result": result}
+        print(json.dumps(payload))
+    else:
+        print(result)
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    configure_windows_console()
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    if args.command == "start":
+        run_start(args)
+        return 0
+    if args.command == "interact":
+        run_interact(args)
+        return 0
+    if args.command == "status":
+        return run_status(args)
+    if args.command == "stop":
+        run_stop(args)
+        return 0
+    if args.command == "cmd":
+        run_cmd(args, parser)
+        return 0
+    parser.print_help()
+    return 1
+if __name__ == "__main__":
+    raise SystemExit(main())