browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. browserwright/__init__.py +33 -0
  2. browserwright/__main__.py +6 -0
  3. browserwright/_executor/__init__.py +47 -0
  4. browserwright/_executor/__main__.py +9 -0
  5. browserwright/_executor/client.py +127 -0
  6. browserwright/_executor/process.py +652 -0
  7. browserwright/_executor/protocol.py +152 -0
  8. browserwright/api.py +66 -0
  9. browserwright/cdp.py +285 -0
  10. browserwright/cli.py +741 -0
  11. browserwright/daemon/__init__.py +8 -0
  12. browserwright/daemon/_ipc.py +444 -0
  13. browserwright/daemon/active_tab.py +183 -0
  14. browserwright/daemon/auth.py +395 -0
  15. browserwright/daemon/backends/__init__.py +59 -0
  16. browserwright/daemon/backends/base.py +120 -0
  17. browserwright/daemon/backends/cloud.py +222 -0
  18. browserwright/daemon/backends/env.py +119 -0
  19. browserwright/daemon/backends/extension.py +185 -0
  20. browserwright/daemon/backends/rdp.py +214 -0
  21. browserwright/daemon/cli.py +1437 -0
  22. browserwright/daemon/config.py +380 -0
  23. browserwright/daemon/doctor.py +179 -0
  24. browserwright/daemon/errors.py +34 -0
  25. browserwright/daemon/launch_chrome.py +353 -0
  26. browserwright/daemon/observability.py +181 -0
  27. browserwright/daemon/platforms.py +234 -0
  28. browserwright/daemon/resolver.py +72 -0
  29. browserwright/daemon/server/__init__.py +6 -0
  30. browserwright/daemon/server/daemon.py +229 -0
  31. browserwright/daemon/server/executor_registry.py +434 -0
  32. browserwright/daemon/server/extension_upstream.py +677 -0
  33. browserwright/daemon/server/facade.py +375 -0
  34. browserwright/daemon/server/facade_extension.py +969 -0
  35. browserwright/daemon/server/listener.py +1058 -0
  36. browserwright/daemon/server/proxy.py +1991 -0
  37. browserwright/daemon/server/relay.py +783 -0
  38. browserwright/daemon/server/state.py +432 -0
  39. browserwright/daemon/server/upstream.py +266 -0
  40. browserwright/daemon/userscripts.py +150 -0
  41. browserwright/discovery.py +213 -0
  42. browserwright/errors.py +177 -0
  43. browserwright/health.py +169 -0
  44. browserwright/install.py +628 -0
  45. browserwright/memory/__init__.py +15 -0
  46. browserwright/memory/_md.py +120 -0
  47. browserwright/memory/_yaml.py +217 -0
  48. browserwright/memory/global_mem.py +201 -0
  49. browserwright/memory/repl_mem.py +28 -0
  50. browserwright/memory/session_decisions.py +53 -0
  51. browserwright/memory/site_mem.py +381 -0
  52. browserwright/mode_b_client.py +590 -0
  53. browserwright/multitask.py +131 -0
  54. browserwright/output_schema.py +99 -0
  55. browserwright/primitives/__init__.py +67 -0
  56. browserwright/primitives/discovery_api.py +79 -0
  57. browserwright/primitives/http.py +42 -0
  58. browserwright/primitives/inspect.py +876 -0
  59. browserwright/primitives/interact.py +518 -0
  60. browserwright/primitives/page.py +556 -0
  61. browserwright/primitives/site.py +143 -0
  62. browserwright/release_install.py +466 -0
  63. browserwright/repl/__init__.py +6 -0
  64. browserwright/repl/_namespace.py +106 -0
  65. browserwright/repl/_smart_goto.py +236 -0
  66. browserwright/repl/inline.py +180 -0
  67. browserwright/repl/playwright_handle.py +449 -0
  68. browserwright/repl/snapshot.py +150 -0
  69. browserwright/session.py +229 -0
  70. browserwright/session_create.py +252 -0
  71. browserwright/session_ctx.py +24 -0
  72. browserwright/session_registry.py +133 -0
  73. browserwright/session_runtime.py +133 -0
  74. browserwright/site_skills_starter/github.com/SKILL.md +14 -0
  75. browserwright/site_skills_starter/github.com/memory.md +29 -0
  76. browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
  77. browserwright/site_skills_starter/google.com/SKILL.md +16 -0
  78. browserwright/site_skills_starter/google.com/memory.md +27 -0
  79. browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
  80. browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
  81. browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
  82. browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
  83. browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
  84. browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
  85. browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
  86. browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
  87. browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
  88. browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
  89. browserwright/skill_doc.py +140 -0
  90. browserwright/skill_runtime.md +194 -0
  91. browserwright/subscriptions.py +213 -0
  92. browserwright/task_runner.py +125 -0
  93. browserwright/version.py +117 -0
  94. browserwright-0.6.2.dist-info/METADATA +12 -0
  95. browserwright-0.6.2.dist-info/RECORD +98 -0
  96. browserwright-0.6.2.dist-info/WHEEL +5 -0
  97. browserwright-0.6.2.dist-info/entry_points.txt +3 -0
  98. browserwright-0.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,131 @@
1
+ """Multi-task fan-out (v0.3).
2
+
3
+ Runs N tasks concurrently. Each one gets its own ``Session`` (and therefore
4
+ its own ws to the daemon, its own sessionId namespace, its own
5
+ ``current_target_id``). The daemon v0.3 multi-client mux serialises traffic
6
+ into the single upstream Chrome ws; from Skill's point of view the tasks
7
+ are truly independent — `new_tab()` in task A doesn't yank the tab task B
8
+ is operating on.
9
+
10
+ This module is intentionally small. The hard work was done in #55 (the
11
+ ``ContextVar``-backed ``with_session`` machinery). Here we just iterate.
12
+
13
+ Concurrency model
14
+ -----------------
15
+ Primitives are sync. The CDP transport is thread-safe (single ``send`` lock).
16
+ So we use a ``ThreadPoolExecutor`` rather than asyncio:
17
+
18
+ - Each worker thread enters ``with_session(isolated_session())`` and runs the task.
19
+ - Sessions are independent ``ContextVar`` slots (#55 covers thread isolation).
20
+ - Daemon assigns each ws its own client id, so per-thread sessionIds don't
21
+ collide on the wire either.
22
+
23
+ Layer 3 (cron / scheduler) shells out to either ``browserwright task ...``
24
+ one-at-a-time or — for bursty work — calls this helper from Python::
25
+
26
+ from browserwright.multitask import run_tasks_concurrent
27
+ rows = run_tasks_concurrent([
28
+ ("ycombinator.com", "front_page", {"limit": 10}),
29
+ ("wikipedia.org", "lookup", {"title": "Python"}),
30
+ ], max_workers=4)
31
+ """
32
+ from __future__ import annotations
33
+
34
+ import concurrent.futures
35
+ from typing import Any, Callable, Iterable, Optional
36
+
37
+ from .errors import BrowserwrightError
38
+ from .session import isolated_session, with_session
39
+ from .task_runner import run_task
40
+
41
+
42
+ TaskSpec = tuple[str, str, dict] # (site, name, kwargs)
43
+
44
+
45
+ class TaskResult(dict):
46
+ """Single fan-out result. Acts as a dict for JSON friendliness::
47
+
48
+ {"site": "...", "name": "...", "ok": True/False,
49
+ "value": <return>, # only when ok
50
+ "error_type": "ClassName", # only when not ok
51
+ "error_msg": "...", # only when not ok
52
+ "elapsed_sec": float}
53
+ """
54
+
55
+
56
+ def _run_one(spec: TaskSpec) -> TaskResult:
57
+ """Worker: build a fresh ``Session``, push it onto the ContextVar, run.
58
+
59
+ Each worker owns its CDP transport. We close it on exit so the daemon's
60
+ client slot is freed promptly. Daemon v0.3 doesn't enforce a single-client
61
+ cap, but releasing eagerly still helps the daemon's idle policy + uiState
62
+ accounting stay accurate.
63
+ """
64
+ import time
65
+ site, name, kwargs = spec
66
+ t0 = time.monotonic()
67
+ sess = isolated_session()
68
+ try:
69
+ with with_session(sess):
70
+ value = run_task(site, name, **kwargs)
71
+ except BrowserwrightError as e:
72
+ return TaskResult(
73
+ site=site, name=name, ok=False,
74
+ error_type=type(e).__name__, error_msg=str(e),
75
+ elapsed_sec=round(time.monotonic() - t0, 3),
76
+ )
77
+ except Exception as e: # noqa: BLE001 — agent-facing catch-all
78
+ return TaskResult(
79
+ site=site, name=name, ok=False,
80
+ error_type=type(e).__name__, error_msg=str(e),
81
+ elapsed_sec=round(time.monotonic() - t0, 3),
82
+ )
83
+ finally:
84
+ sess.close()
85
+ return TaskResult(
86
+ site=site, name=name, ok=True, value=value,
87
+ elapsed_sec=round(time.monotonic() - t0, 3),
88
+ )
89
+
90
+
91
+ def run_tasks_concurrent(specs: Iterable[TaskSpec], *,
92
+ max_workers: int = 4,
93
+ warm_upstream: bool = False) -> list[TaskResult]:
94
+ """Run every (site, name, kwargs) tuple concurrently. Returns one
95
+ ``TaskResult`` per spec, in input order.
96
+
97
+ Exceptions never propagate out — each failure becomes an ``ok=False``
98
+ result. Layer 3 examines results and decides what to retry/log.
99
+
100
+ .. deprecated:: 0.3.0
101
+ The ``warm_upstream`` keyword is a no-op since the daemon shipped
102
+ the #76 pre-open buffer fix. The earlier Skill-side workaround
103
+ (sync probe on the main session before spawning workers) is no
104
+ longer needed: the daemon now per-client-buffers frames received
105
+ while ``upstream phase != CONNECTED`` and replays them after the
106
+ upstream ws opens (``PRE_OPEN_BUFFER_LIMIT=100``; overflow surfaces
107
+ as JSON-RPC error ``-32603``). The keyword is accepted for source
108
+ compatibility but has no effect; remove the argument from your
109
+ call site.
110
+
111
+ **Removal target: v0.6** (REVIEW.md F-17). After v0.6 ships the
112
+ keyword will be removed from the signature and any caller still
113
+ passing it will hit a ``TypeError``.
114
+
115
+ Notes
116
+ -----
117
+ * ``max_workers`` defaults to 4. Daemon v0.3 multi-client supports more
118
+ but Chrome itself gets stressed past ~8 concurrent navigations.
119
+ * The order of results matches the order of inputs (not completion
120
+ order) — predictable for the caller's downstream pipeline.
121
+ """
122
+ _ = warm_upstream # accepted-but-ignored; see deprecation note above.
123
+ specs = list(specs)
124
+ if not specs:
125
+ return []
126
+ with concurrent.futures.ThreadPoolExecutor(
127
+ max_workers=min(max_workers, len(specs)),
128
+ thread_name_prefix="bs-task",
129
+ ) as pool:
130
+ futures = [pool.submit(_run_one, s) for s in specs]
131
+ return [f.result() for f in futures]
@@ -0,0 +1,99 @@
1
+ """Minimal JSON-Schema-subset validator for task ``OUTPUT_SCHEMA`` (v0.2).
2
+
3
+ We don't pull in ``jsonschema`` — task schemas in practice cover one of
4
+ five shapes: list-of-dicts, dict, scalar, optional/nullable, and unions.
5
+ The validator handles those plus enough
6
+ plumbing for nested ``items`` / ``properties``. If a task needs a richer
7
+ schema it can ``pip install jsonschema`` and write its own ``validate()``;
8
+ we don't paint into a corner.
9
+
10
+ Supported keywords:
11
+ - ``type``: ``"object" | "array" | "string" | "integer" | "number" |
12
+ "boolean" | "null"`` (or a list for union types)
13
+ - ``properties``: object → property-name → sub-schema
14
+ - ``required``: list of required property names
15
+ - ``additionalProperties``: bool (default True). When False, extra keys
16
+ cause a validation error.
17
+ - ``items``: array → sub-schema applied to each element
18
+ - ``enum``: list of allowed scalar values
19
+ - ``nullable``: bool — convenience, equivalent to ``type: [..., "null"]``
20
+
21
+ Failures raise ``BrowserwrightError`` with a path-qualified message so the
22
+ agent can tell the user *which* field failed.
23
+ """
24
+ from __future__ import annotations
25
+
26
+ from typing import Any
27
+
28
+ from .errors import BrowserwrightError
29
+
30
+
31
+ _TYPE_MAP = {
32
+ "object": dict,
33
+ "array": list,
34
+ "string": str,
35
+ "integer": int,
36
+ "number": (int, float),
37
+ "boolean": bool,
38
+ "null": type(None),
39
+ }
40
+
41
+
42
+ class OutputSchemaError(BrowserwrightError):
43
+ exit_code = 3
44
+
45
+ def __init__(self, site: str, task: str, path: str, msg: str):
46
+ self.site, self.task, self.path, self.msg_short = site, task, path, msg
47
+ super().__init__(f"OUTPUT_SCHEMA mismatch in {site}/{task} at {path}: {msg}")
48
+
49
+
50
+ def validate(value: Any, schema: dict, *, site: str = "", task: str = "") -> None:
51
+ """Raise ``OutputSchemaError`` on shape mismatch. Returns None on success."""
52
+ _check(value, schema, path="$", site=site, task=task)
53
+
54
+
55
+ def _types_for(schema: dict) -> list:
56
+ t = schema.get("type")
57
+ if isinstance(t, list):
58
+ out = [_TYPE_MAP[k] for k in t if k in _TYPE_MAP]
59
+ elif isinstance(t, str):
60
+ out = [_TYPE_MAP[t]] if t in _TYPE_MAP else []
61
+ else:
62
+ out = []
63
+ if schema.get("nullable"):
64
+ out.append(type(None))
65
+ return out
66
+
67
+
68
+ def _check(value, schema, *, path, site, task):
69
+ if not isinstance(schema, dict):
70
+ return
71
+ types = _types_for(schema)
72
+ if types:
73
+ # bool is a subclass of int in Python; treat them as distinct.
74
+ if int in types and bool not in types and isinstance(value, bool):
75
+ raise OutputSchemaError(site, task, path, f"expected {types}, got bool")
76
+ if not isinstance(value, tuple(types)):
77
+ raise OutputSchemaError(
78
+ site, task, path,
79
+ f"expected one of {[t.__name__ if isinstance(t, type) else t for t in types]}, "
80
+ f"got {type(value).__name__}",
81
+ )
82
+ if "enum" in schema and value not in schema["enum"]:
83
+ raise OutputSchemaError(site, task, path,
84
+ f"value {value!r} not in enum {schema['enum']!r}")
85
+ if isinstance(value, dict) and "properties" in schema:
86
+ props = schema.get("properties", {})
87
+ for key in schema.get("required", []):
88
+ if key not in value:
89
+ raise OutputSchemaError(site, task, f"{path}.{key}", "missing required key")
90
+ for k, v in value.items():
91
+ sub = props.get(k)
92
+ if sub is not None:
93
+ _check(v, sub, path=f"{path}.{k}", site=site, task=task)
94
+ elif schema.get("additionalProperties") is False:
95
+ raise OutputSchemaError(site, task, f"{path}.{k}", "unexpected key")
96
+ if isinstance(value, list) and "items" in schema:
97
+ items_schema = schema["items"]
98
+ for i, item in enumerate(value):
99
+ _check(item, items_schema, path=f"{path}[{i}]", site=site, task=task)
@@ -0,0 +1,67 @@
1
+ """v0.5.1 primitive surface (spec §A.2).
2
+
3
+ This module is what the REPL / inline / task entry points pull into
4
+ their exec globals. Keep it boring — no decorators, no metaprogramming —
5
+ so the agent gets stable, greppable names.
6
+
7
+ v0.5.1 (F-4 catch-up) added 13 primitives previously documented but not
8
+ re-exported: type_text / press_key / scroll / fill_input / dispatch_key
9
+ / upload_file / wait_for_element / wait_for_network_idle / drain_events
10
+ / ensure_real_tab / iframe_target / http_get plus three Layer-3 helpers
11
+ (list_site_skills / load_site_skill / run_task). Three primitives remain
12
+ deferred to v0.6 with explicit footnotes in design.md §A.2:
13
+ handle_dialog, try_recover_from_drift, plus the broader Layer-3 drift
14
+ recovery scaffold.
15
+ """
16
+ from .discovery_api import ( # noqa: F401
17
+ list_site_skills,
18
+ load_site_skill,
19
+ run_task,
20
+ )
21
+ from .http import http_get # noqa: F401
22
+ from .inspect import ( # noqa: F401
23
+ capture_screenshot,
24
+ cdp,
25
+ describe_page,
26
+ diff_snapshot,
27
+ page_info,
28
+ snapshot,
29
+ )
30
+ from .interact import ( # noqa: F401
31
+ click_at_xy,
32
+ dispatch_key,
33
+ drain_events,
34
+ fill_input,
35
+ js,
36
+ press_key,
37
+ scroll,
38
+ type_text,
39
+ upload_file,
40
+ wait_for_element,
41
+ wait_for_network_idle,
42
+ )
43
+ from .page import ( # noqa: F401
44
+ attach_active,
45
+ attach_readonly,
46
+ close_tab,
47
+ current_page,
48
+ current_tab,
49
+ ensure_real_tab,
50
+ goto_url,
51
+ iframe_target,
52
+ list_tabs,
53
+ new_tab,
54
+ open,
55
+ open_background,
56
+ reload,
57
+ switch_tab,
58
+ wait,
59
+ wait_for_load,
60
+ )
61
+ from .site import ( # noqa: F401
62
+ bootstrap_site,
63
+ memory_read,
64
+ remember,
65
+ remember_global,
66
+ remember_preference,
67
+ )
@@ -0,0 +1,79 @@
1
+ """Discovery / Layer-3 task primitives surfaced into the REPL namespace.
2
+
3
+ These are thin re-exports + tiny convenience wrappers around
4
+ ``browserwright.discovery`` and ``browserwright.task_runner`` so an agent
5
+ typing ``list_site_skills()`` / ``run_task("github.com/list_issues")`` /
6
+ ``load_site_skill("github.com")`` from the REPL or inline execution
7
+ doesn't get a NameError. Spec §A.2 v0.5.1 (F-4 catch-up).
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import importlib.util
12
+ from typing import Any, Optional
13
+
14
+ from ..discovery import find_task_path, list_tasks
15
+ from ..memory.site_mem import host_stem
16
+ from ..task_runner import run_task as _run_task
17
+
18
+
19
+ def list_site_skills(*, site: Optional[str] = None,
20
+ query: Optional[str] = None) -> list[dict]:
21
+ """List bundled + user-installed tasks (alias of CLI ``list-tasks``).
22
+
23
+ Returns dicts with ``site``, ``name``, ``desc``, ``path`` and the
24
+ discovery scoring fields. ``site`` filters by stem (eTLD+1 or
25
+ legacy alias); ``query`` does substring scoring against task
26
+ metadata.
27
+ """
28
+ return list_tasks(site=site, query=query)
29
+
30
+
31
+ def load_site_skill(site: str, name: Optional[str] = None) -> Any:
32
+ """Import a site-skill task module so its ``run()``, ``ARGS``,
33
+ ``OUTPUT_SCHEMA``, etc. are reachable as attributes.
34
+
35
+ Two shapes:
36
+ - ``load_site_skill("github.com/list_issues")`` (slash form) →
37
+ load that specific task.
38
+ - ``load_site_skill("github.com", "list_issues")`` → same, split.
39
+
40
+ Pure module import; no ``run()`` invocation. Use ``run_task()`` to
41
+ actually execute. Path is resolved via ``find_task_path`` so the
42
+ eTLD+1 stem fallback applies (Bug 1 v0.3.1).
43
+ """
44
+ if name is None and "/" in site:
45
+ site, name = site.split("/", 1)
46
+ if name is None:
47
+ raise ValueError(
48
+ "load_site_skill: missing task name. Pass "
49
+ "'<site>/<name>' or two positional args."
50
+ )
51
+ path = find_task_path(host_stem(site), name)
52
+ mod_name = f"browserwright_task_{host_stem(site).replace('.', '_')}_{name}"
53
+ spec = importlib.util.spec_from_file_location(mod_name, path)
54
+ if spec is None or spec.loader is None:
55
+ raise ImportError(f"could not build importlib spec for {path}")
56
+ mod = importlib.util.module_from_spec(spec)
57
+ spec.loader.exec_module(mod)
58
+ return mod
59
+
60
+
61
+ def run_task(site: str, name: Optional[str] = None, **kwargs) -> Any:
62
+ """Execute a site-skill's ``run(args, ctx=None)`` and return its
63
+ value. Two argument shapes:
64
+
65
+ - ``run_task("github.com/list_issues", state="open")`` (slash form)
66
+ - ``run_task("github.com", "list_issues", state="open")`` (split)
67
+
68
+ Re-exports ``browserwright.task_runner.run_task`` so agents calling
69
+ this through the REPL namespace get the same isolation semantics as
70
+ the CLI ``task`` subcommand.
71
+ """
72
+ if name is None and "/" in site:
73
+ site, name = site.split("/", 1)
74
+ if name is None:
75
+ raise ValueError(
76
+ "run_task: missing task name. Pass '<site>/<name>' or two "
77
+ "positional args."
78
+ )
79
+ return _run_task(site, name, **kwargs)
@@ -0,0 +1,42 @@
1
+ """Pure-HTTP helpers — no browser required.
2
+
3
+ ``http_get`` is the canonical "I just want the bytes of a page" escape
4
+ hatch. Spec §A.2 / browser-harness pattern: pair with
5
+ ``ThreadPoolExecutor`` for bulk static-page scraping (e.g. paginated
6
+ list pages) — opening a browser for every page is wasteful.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import gzip
11
+ import os
12
+ import urllib.request
13
+ from typing import Optional
14
+
15
+
16
+ def http_get(url: str, *, headers: Optional[dict] = None,
17
+ timeout: float = 20.0) -> str:
18
+ """Plain HTTP GET. Decodes gzip automatically; returns text.
19
+
20
+ When ``BROWSER_USE_API_KEY`` is set, prefers the optional ``fetch_use``
21
+ proxy (handles bot detection / residential proxies / retries) if
22
+ installed; otherwise falls back to stdlib ``urllib`` with a vanilla
23
+ Mozilla UA + gzip Accept-Encoding header.
24
+ """
25
+ if os.environ.get("BROWSER_USE_API_KEY"):
26
+ try:
27
+ from fetch_use import fetch_sync # type: ignore[import-not-found]
28
+ return fetch_sync(
29
+ url, headers=headers, timeout_ms=int(timeout * 1000),
30
+ ).text
31
+ except ImportError:
32
+ pass
33
+
34
+ h = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip"}
35
+ if headers:
36
+ h.update(headers)
37
+ req = urllib.request.Request(url, headers=h)
38
+ with urllib.request.urlopen(req, timeout=timeout) as r:
39
+ data = r.read()
40
+ if r.headers.get("Content-Encoding") == "gzip":
41
+ data = gzip.decompress(data)
42
+ return data.decode()