PyPI - browserwright - Versions diffs - 0.6.2__py3-none-any.whl - Mend

browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

browserwright/__init__.py +33 -0
browserwright/__main__.py +6 -0
browserwright/_executor/__init__.py +47 -0
browserwright/_executor/__main__.py +9 -0
browserwright/_executor/client.py +127 -0
browserwright/_executor/process.py +652 -0
browserwright/_executor/protocol.py +152 -0
browserwright/api.py +66 -0
browserwright/cdp.py +285 -0
browserwright/cli.py +741 -0
browserwright/daemon/__init__.py +8 -0
browserwright/daemon/_ipc.py +444 -0
browserwright/daemon/active_tab.py +183 -0
browserwright/daemon/auth.py +395 -0
browserwright/daemon/backends/__init__.py +59 -0
browserwright/daemon/backends/base.py +120 -0
browserwright/daemon/backends/cloud.py +222 -0
browserwright/daemon/backends/env.py +119 -0
browserwright/daemon/backends/extension.py +185 -0
browserwright/daemon/backends/rdp.py +214 -0
browserwright/daemon/cli.py +1437 -0
browserwright/daemon/config.py +380 -0
browserwright/daemon/doctor.py +179 -0
browserwright/daemon/errors.py +34 -0
browserwright/daemon/launch_chrome.py +353 -0
browserwright/daemon/observability.py +181 -0
browserwright/daemon/platforms.py +234 -0
browserwright/daemon/resolver.py +72 -0
browserwright/daemon/server/__init__.py +6 -0
browserwright/daemon/server/daemon.py +229 -0
browserwright/daemon/server/executor_registry.py +434 -0
browserwright/daemon/server/extension_upstream.py +677 -0
browserwright/daemon/server/facade.py +375 -0
browserwright/daemon/server/facade_extension.py +969 -0
browserwright/daemon/server/listener.py +1058 -0
browserwright/daemon/server/proxy.py +1991 -0
browserwright/daemon/server/relay.py +783 -0
browserwright/daemon/server/state.py +432 -0
browserwright/daemon/server/upstream.py +266 -0
browserwright/daemon/userscripts.py +150 -0
browserwright/discovery.py +213 -0
browserwright/errors.py +177 -0
browserwright/health.py +169 -0
browserwright/install.py +628 -0
browserwright/memory/__init__.py +15 -0
browserwright/memory/_md.py +120 -0
browserwright/memory/_yaml.py +217 -0
browserwright/memory/global_mem.py +201 -0
browserwright/memory/repl_mem.py +28 -0
browserwright/memory/session_decisions.py +53 -0
browserwright/memory/site_mem.py +381 -0
browserwright/mode_b_client.py +590 -0
browserwright/multitask.py +131 -0
browserwright/output_schema.py +99 -0
browserwright/primitives/__init__.py +67 -0
browserwright/primitives/discovery_api.py +79 -0
browserwright/primitives/http.py +42 -0
browserwright/primitives/inspect.py +876 -0
browserwright/primitives/interact.py +518 -0
browserwright/primitives/page.py +556 -0
browserwright/primitives/site.py +143 -0
browserwright/release_install.py +466 -0
browserwright/repl/__init__.py +6 -0
browserwright/repl/_namespace.py +106 -0
browserwright/repl/_smart_goto.py +236 -0
browserwright/repl/inline.py +180 -0
browserwright/repl/playwright_handle.py +449 -0
browserwright/repl/snapshot.py +150 -0
browserwright/session.py +229 -0
browserwright/session_create.py +252 -0
browserwright/session_ctx.py +24 -0
browserwright/session_registry.py +133 -0
browserwright/session_runtime.py +133 -0
browserwright/site_skills_starter/github.com/SKILL.md +14 -0
browserwright/site_skills_starter/github.com/memory.md +29 -0
browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
browserwright/site_skills_starter/google.com/SKILL.md +16 -0
browserwright/site_skills_starter/google.com/memory.md +27 -0
browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
browserwright/skill_doc.py +140 -0
browserwright/skill_runtime.md +194 -0
browserwright/subscriptions.py +213 -0
browserwright/task_runner.py +125 -0
browserwright/version.py +117 -0
browserwright-0.6.2.dist-info/METADATA +12 -0
browserwright-0.6.2.dist-info/RECORD +98 -0
browserwright-0.6.2.dist-info/WHEEL +5 -0
browserwright-0.6.2.dist-info/entry_points.txt +3 -0
browserwright-0.6.2.dist-info/top_level.txt +1 -0

browserwright/daemon/userscripts.py ADDED Viewed

@@ -0,0 +1,150 @@
+"""Parse Tampermonkey-style ``==UserScript==`` headers into structured records.
+v1 capability surface is plain page JS (no GM_* APIs); unsupported metadata
+directives are collected as warnings rather than rejected, so existing
+userscripts paste in without crashing.
+"""
+from __future__ import annotations
+import hashlib
+import re
+from dataclasses import dataclass, field
+DEFAULT_NAMESPACE = "bd.userscripts"
+_RUN_AT = {
+    "document-start": "document_start",
+    "document-end": "document_end",
+    "document-idle": "document_idle",
+    "document_start": "document_start",
+    "document_end": "document_end",
+    "document_idle": "document_idle",
+}
+_SUPPORTED = {
+    "name",
+    "namespace",
+    "match",
+    "include",
+    "exclude",
+    "run-at",
+    "version",
+    "description",
+}
+_HEADER_RE = re.compile(
+    r"//\s*==UserScript==\s*\n(.*?)//\s*==/UserScript==", re.DOTALL)
+_LINE_RE = re.compile(r"//\s*@(\S+)\s+(.*?)\s*$")
+# Chrome match-pattern grammar: ``<scheme>://<host><path>`` (or ``<all_urls>``).
+# Validating here lets the daemon reject typos loudly instead of shipping a
+# pattern that makes ``chrome.userScripts.register`` reject the whole batch on
+# the extension side (which would silently disable every resident script).
+_MATCH_PATTERN_RE = re.compile(
+    r"^(\*|https?|file|ftp|wss?)://(\*|(\*\.)?[^/*]+)?(/.*)$"
+)
+def _is_valid_match_pattern(pattern: str) -> bool:
+    return pattern == "<all_urls>" or bool(_MATCH_PATTERN_RE.match(pattern))
+class UserscriptParseError(ValueError):
+    """Raised when a userscript has no header or no match pattern."""
+@dataclass
+class Userscript:
+    name: str
+    namespace: str
+    matches: list[str]
+    exclude_matches: list[str]
+    run_at: str
+    version: str
+    description: str
+    code: str
+    warnings: list[str] = field(default_factory=list)
+    @property
+    def identity(self) -> str:
+        return f"{self.namespace}/{self.name}"
+    @property
+    def id(self) -> str:
+        return hashlib.sha1(self.identity.encode()).hexdigest()[:12]
+    def to_payload(self) -> dict:
+        return {
+            "id": self.id,
+            "identity": self.identity,
+            "name": self.name,
+            "namespace": self.namespace,
+            "matches": self.matches,
+            "excludeMatches": self.exclude_matches,
+            "runAt": self.run_at,
+            "version": self.version,
+            "description": self.description,
+            "code": self.code,
+            "warnings": self.warnings,
+        }
+def parse_userscript(text: str) -> Userscript:
+    match = _HEADER_RE.search(text)
+    if not match:
+        raise UserscriptParseError("missing ==UserScript== metadata block")
+    block = match.group(1)
+    code = text[match.end():].lstrip("\n")
+    name = ""
+    namespace = ""
+    version = ""
+    description = ""
+    run_at = "document_idle"
+    matches: list[str] = []
+    excludes: list[str] = []
+    warnings: list[str] = []
+    for line in block.splitlines():
+        line_match = _LINE_RE.match(line.strip())
+        if not line_match:
+            continue
+        key, value = line_match.group(1).lower(), line_match.group(2).strip()
+        if key == "name":
+            name = value
+        elif key == "namespace":
+            namespace = value
+        elif key in ("match", "include"):
+            if _is_valid_match_pattern(value):
+                matches.append(value)
+            else:
+                warnings.append(
+                    f"@{key} {value!r} is not a valid match pattern (ignored)")
+        elif key == "exclude":
+            if _is_valid_match_pattern(value):
+                excludes.append(value)
+            else:
+                warnings.append(
+                    f"@exclude {value!r} is not a valid match pattern (ignored)")
+        elif key == "run-at":
+            run_at = _RUN_AT.get(value, "document_idle")
+        elif key == "version":
+            version = value
+        elif key == "description":
+            description = value
+        elif key not in _SUPPORTED:
+            warnings.append(f"@{key} not supported in v1 (ignored)")
+    if not name:
+        raise UserscriptParseError("@name is required")
+    if not matches:
+        raise UserscriptParseError("at least one @match/@include is required")
+    return Userscript(
+        name=name,
+        namespace=namespace or DEFAULT_NAMESPACE,
+        matches=matches,
+        exclude_matches=excludes,
+        run_at=run_at,
+        version=version,
+        description=description,
+        code=code,
+        warnings=warnings,
+    )

browserwright/discovery.py ADDED Viewed

@@ -0,0 +1,213 @@
+"""site-skills index + simple ranking (spec §E)."""
+from __future__ import annotations
+import datetime as _dt
+import importlib.util
+import json
+import re
+from pathlib import Path
+from typing import Any, Optional
+from .memory import _md
+from .memory.site_mem import site_skills_root, site_skills_roots
+def _bundled_root() -> Path:
+    return Path(__file__).resolve().parent / "site_skills_starter"
+def _iter_site_dirs() -> list[Path]:
+    """Project → ``$BS_HOME`` → subscriptions → bundled, dedup by site name.
+    The order encodes the precedence promise: project workspace is always
+    king, then per-user, then explicit subscriptions (the user opted in to
+    them by ``browserwright sub add``), then the bundled starter set as
+    fallback.
+    """
+    from .subscriptions import iter_subscription_site_roots
+    roots = [*site_skills_roots(), *iter_subscription_site_roots(), _bundled_root()]
+    seen: set[str] = set()
+    out: list[Path] = []
+    for root in roots:
+        if not root.exists():
+            continue
+        for child in sorted(root.iterdir()):
+            if not child.is_dir():
+                continue
+            if child.name in seen:
+                continue
+            if not (child / "tasks").exists() and not (child / "memory.md").exists():
+                continue
+            seen.add(child.name)
+            out.append(child)
+    return out
+def _load_task_meta(task_py: Path) -> dict[str, Any]:
+    spec = importlib.util.spec_from_file_location(task_py.stem, task_py)
+    if not spec or not spec.loader:
+        return {"name": task_py.stem}
+    mod = importlib.util.module_from_spec(spec)
+    try:
+        spec.loader.exec_module(mod)
+    except Exception:
+        return {"name": task_py.stem, "load_error": True}
+    return {
+        "name": task_py.stem,
+        "desc": (getattr(mod, "__doc__", "") or "").strip().splitlines()[:1][0]
+                if getattr(mod, "__doc__", "") else "",
+        "args": getattr(mod, "ARGS", {}),
+        "output": getattr(mod, "OUTPUT", "Any"),
+        "tags": getattr(mod, "TAGS", []),
+        "requires_login": bool(getattr(mod, "REQUIRES_LOGIN", False)),
+        "last_verified": getattr(mod, "LAST_VERIFIED", None),
+        "broken_since": getattr(mod, "BROKEN_SINCE", None),
+        "path": str(task_py.resolve()),
+    }
+def _load_site_entry(site_dir: Path) -> dict[str, Any]:
+    fm = {}
+    if (site_dir / "memory.md").exists():
+        fm, _body = _md.parse_doc((site_dir / "memory.md").read_text(encoding="utf-8"))
+    desc = ""
+    if (site_dir / "SKILL.md").exists():
+        first = ((site_dir / "SKILL.md").read_text(encoding="utf-8")).strip().splitlines()
+        if first:
+            desc = first[0].lstrip("# ").strip()
+    tasks_dir = site_dir / "tasks"
+    tasks = []
+    if tasks_dir.exists():
+        for t in sorted(tasks_dir.glob("*.py")):
+            tasks.append(_load_task_meta(t))
+    return {
+        "site": site_dir.name,
+        "host_patterns": fm.get("host_patterns", []),
+        "aliases": fm.get("aliases", []),
+        "description_first_line": desc,
+        "tasks": tasks,
+        "path": str(site_dir.resolve()),
+    }
+def rebuild_index() -> dict[str, Any]:
+    out = {
+        "version": 1,
+        "generated_at": _dt.datetime.now(_dt.timezone.utc).isoformat(),
+        "sites": [_load_site_entry(d) for d in _iter_site_dirs()],
+    }
+    target = site_skills_root() / "index.json"
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text(json.dumps(out, indent=2, default=str), encoding="utf-8")
+    return out
+def _load_index() -> dict[str, Any]:
+    target = site_skills_root() / "index.json"
+    if not target.exists():
+        return rebuild_index()
+    try:
+        return json.loads(target.read_text(encoding="utf-8"))
+    except json.JSONDecodeError:
+        return rebuild_index()
+# ---- ranking ----------------------------------------------------------
+def _tokens(s: str) -> set[str]:
+    return {p for p in re.split(r"[\s/_\-]+", (s or "").lower()) if p}
+def _jaccard(a: set[str], b: set[str]) -> float:
+    if not a or not b:
+        return 0.0
+    return len(a & b) / len(a | b)
+def _days_since(date_str: Optional[str]) -> int:
+    if not date_str:
+        return 9999
+    try:
+        d = _dt.date.fromisoformat(date_str[:10])
+    except ValueError:
+        return 9999
+    return (_dt.date.today() - d).days
+def score(query: str, site_entry: dict, task: dict) -> float:
+    q = (query or "").lower()
+    s = 0.0
+    for alias in site_entry.get("aliases", []):
+        if alias and alias.lower() in q:
+            s += 1.0
+            break
+    for h in site_entry.get("host_patterns", []):
+        if h and h.split(".")[0] in q:
+            s += 0.5
+            break
+    s += 0.3 * _jaccard(_tokens(task.get("desc", "")), _tokens(query or ""))
+    for t in task.get("tags", []):
+        if t and t.lower() in q:
+            s += 0.2
+    if task.get("broken_since"):
+        s -= 0.5
+    if _days_since(task.get("last_verified")) < 30:
+        s += 0.1
+    return s
+def list_tasks(*, site: Optional[str] = None, query: Optional[str] = None,
+               limit: int = 20) -> list[dict[str, Any]]:
+    index = _load_index()
+    out: list[dict[str, Any]] = []
+    for entry in index.get("sites", []):
+        if site and entry["site"] != site:
+            continue
+        for t in entry.get("tasks", []):
+            row = {
+                "site": entry["site"],
+                "name": t["name"],
+                "desc": t.get("desc", ""),
+                "args": t.get("args", {}),
+                "output": t.get("output", "Any"),
+                "tags": t.get("tags", []),
+                "requires_login": t.get("requires_login", False),
+                "last_verified": t.get("last_verified"),
+                "broken_since": t.get("broken_since"),
+                "path": t.get("path"),
+            }
+            if query:
+                row["match_score"] = round(score(query, entry, t), 3)
+            out.append(row)
+    if query:
+        out.sort(key=lambda r: r.get("match_score", 0), reverse=True)
+    return out[:limit]
+def find_task_path(site: str, name: str) -> Path:
+    """Return absolute path to ``site-skills/<site>/tasks/<name>.py``,
+    consulting project → $BS_HOME → subscriptions → bundled in that order.
+    Raises ``FileNotFoundError``.
+    Site-name normalisation (Bug 1, v0.3.1): if the literal ``site`` arg
+    doesn't resolve, retry with ``host_stem(site)`` so the caller can pass
+    a raw URL or hostname (e.g. ``news.ycombinator.com`` from a CLI
+    invocation) and still hit the eTLD+1-named bundled directory
+    (``ycombinator.com``).
+    """
+    from .subscriptions import iter_subscription_site_roots
+    from .memory.site_mem import host_stem
+    roots = (*site_skills_roots(),
+             *iter_subscription_site_roots(),
+             _bundled_root())
+    candidates: list[str] = [site]
+    stem = host_stem(site)
+    if stem and stem != site:
+        candidates.append(stem)
+    for s in candidates:
+        for root in roots:
+            cand = root / s / "tasks" / f"{name}.py"
+            if cand.exists():
+                return cand
+    raise FileNotFoundError(f"{site}/{name}")

browserwright/errors.py ADDED Viewed

@@ -0,0 +1,177 @@
+"""Skill exception hierarchy (spec §A.4)."""
+class BrowserwrightError(Exception):
+    """Root of every exception Skill itself raises.
+    Every error can carry a ``fix`` — a short, concrete next-action string
+    ("call X" / "run Y") so an agent reading the error has a recovery step,
+    not just a raw transport/protocol message. This generalizes the pattern
+    that ``NeedsUserConfirm.proposal`` established: errors are actionable.
+    Subclasses MAY set a class-level ``default_fix`` so a bare ``raise`` is
+    still actionable; an explicit ``fix=`` at the raise site overrides it.
+    """
+    exit_code = 3  # default: script raised
+    default_fix = ""
+    def __init__(self, *args, fix: str = ""):
+        super().__init__(*args)
+        # Explicit fix wins; otherwise fall back to the class default.
+        self.fix = fix or type(self).default_fix
+        if self.fix and args:
+            # Surface the next-action in __str__ so an agent that only logs
+            # the message still sees the recovery step.
+            self.args = (f"{args[0]}  [fix: {self.fix}]",) + tuple(args[1:])
+class PageLoadFailed(BrowserwrightError):
+    exit_code = 3
+    default_fix = (
+        "retry with new_tab(url) then wait_for_load(); if it persists, "
+        "check the URL and network with http_get(url)"
+    )
+    def __init__(self, url: str = "", reason: str = "", fix: str = ""):
+        self.url, self.reason = url, reason
+        super().__init__(f"page load failed: {url} ({reason})", fix=fix)
+class ElementNotFound(BrowserwrightError):
+    exit_code = 3
+    default_fix = (
+        "capture_screenshot() to confirm the element is visible, then "
+        "click_at_xy(x, y); use snapshot() to list interactive elements"
+    )
+    def __init__(self, selector: str = "", timeout: float = 0.0, fix: str = ""):
+        self.selector, self.timeout = selector, timeout
+        super().__init__(f"element not found: {selector!r} after {timeout}s", fix=fix)
+class AuthWall(BrowserwrightError):
+    exit_code = 4
+    default_fix = "stop and ask the user to log in; do not type credentials from a screenshot"
+    def __init__(self, url: str = "", signals=None, fix: str = ""):
+        self.url, self.signals = url, list(signals or [])
+        super().__init__(f"auth wall at {url}: {self.signals}", fix=fix)
+class Captcha(BrowserwrightError):
+    exit_code = 5
+    default_fix = "stop and ask the user to solve the captcha; do not attempt to bypass it"
+    def __init__(self, kind: str = "unknown", url: str = "", fix: str = ""):
+        self.kind, self.url = kind, url
+        super().__init__(f"captcha ({kind}) at {url}", fix=fix)
+class NetworkError(BrowserwrightError):
+    exit_code = 3
+    default_fix = "verify the URL and connectivity, then retry; check http_get(url) for static pages"
+    def __init__(self, url: str = "", status=None, fix: str = ""):
+        self.url, self.status = url, status
+        super().__init__(f"network error: {url} (status={status})", fix=fix)
+class DaemonUnavailable(BrowserwrightError):
+    exit_code = 2
+    default_fix = (
+        "start the single global daemon: `browserwright-daemon serve` "
+        "(or run `browserwright doctor` to see what is missing)"
+    )
+    def __init__(self, detail: str = "", fix: str = ""):
+        self.detail = detail
+        super().__init__(f"daemon unavailable: {detail}", fix=fix)
+class NoSession(BrowserwrightError):
+    """No explicit session provided. Refuse rather than silently sharing a browser."""
+    exit_code = 2
+    default_fix = (
+        "run `browserwright session new --backend=<extension|rdp> --name=SESSION_LABEL` "
+        "then run `browserwright -s <id> -e 'print(snapshot())'`"
+    )
+    def __init__(self, detail: str = "", fix: str = ""):
+        self.detail = detail
+        super().__init__(
+            "no session: run `browserwright session new --backend=<extension|rdp> "
+            "--name=SESSION_LABEL` first (use the `=` form; --name is a short "
+            "session label), then pass -s <id> on every execute call. "
+            + detail,
+            fix=fix,
+        )
+class CDPError(BrowserwrightError):
+    exit_code = 3
+    default_fix = (
+        "if the message mentions an unknown method (-32601) the daemon is "
+        "likely stale — `browserwright-daemon stop` then re-run; otherwise check "
+        "the method name and params"
+    )
+    def __init__(self, method: str = "", params=None, cdp_message: str = "", fix: str = ""):
+        self.method, self.params, self.cdp_message = method, dict(params or {}), cdp_message
+        super().__init__(f"CDP {method} failed: {cdp_message}", fix=fix)
+class NeedsUserConfirm(BrowserwrightError):
+    """Raised by remember_preference (and similar) when the agent must surface
+    a confirm prompt to the user before re-calling with confirm=False."""
+    exit_code = 1
+    def __init__(self, what: str = "", proposal=None, fix: str = ""):
+        self.what, self.proposal = what, proposal
+        super().__init__(
+            f"needs user confirm: {what}",
+            # The proposal IS the next-action; mirror it into fix so the
+            # generic envelope is uniform across every error type.
+            fix=fix or "surface the proposal to the user, then re-call with confirm=True",
+        )
+def serialize(exc: BaseException) -> dict:
+    """Compact JSON-friendly representation for stderr / repl socket."""
+    out = {"type": type(exc).__name__, "msg": str(exc)}
+    for k in ("url", "selector", "timeout", "reason", "signals", "kind",
+              "status", "detail", "site", "task", "failed_check",
+              "method", "cdp_message", "what", "proposal", "fix"):
+        v = getattr(exc, k, None)
+        if v is not None and not isinstance(v, (type(None),)):
+            try:
+                import json as _json
+                _json.dumps(v)  # ensure serializable
+                out[k] = v
+            except (TypeError, ValueError):
+                out[k] = repr(v)
+    return out
+def playwright_error_fix(exc: BaseException) -> str:
+    """Best-effort recovery hint for raw Playwright exceptions.
+    This intentionally does not wrap or re-raise the original exception. It is
+    used at serialization boundaries so agent-authored ``try/except`` behavior
+    inside the executor stays native Playwright, while the surfaced error gains
+    a concrete next step.
+    """
+    msg = str(exc)
+    lower = msg.lower()
+    exc_type = type(exc).__name__
+    if "frame detached" in lower or "target closed" in lower or "page closed" in lower:
+        return "call reset() to rebuild the browser connection, then re-snapshot and retry"
+    if "timeout" not in lower and exc_type != "TimeoutError":
+        return ""
+    if "locator" in lower or "click" in lower or "fill" in lower:
+        return "call snapshot() to confirm the target still exists, then re-snapshot and retry with the current ref"
+    if "goto" in lower or "navigation" in lower:
+        return "retry page.goto(url); Browserwright will use smart waiting, or verify the site with http_get(url)"
+    return "call snapshot() to inspect the current page state, then retry the action with the current ref"