browserwright 0.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- browserwright/__init__.py +33 -0
- browserwright/__main__.py +6 -0
- browserwright/_executor/__init__.py +47 -0
- browserwright/_executor/__main__.py +9 -0
- browserwright/_executor/client.py +127 -0
- browserwright/_executor/process.py +652 -0
- browserwright/_executor/protocol.py +152 -0
- browserwright/api.py +66 -0
- browserwright/cdp.py +285 -0
- browserwright/cli.py +741 -0
- browserwright/daemon/__init__.py +8 -0
- browserwright/daemon/_ipc.py +444 -0
- browserwright/daemon/active_tab.py +183 -0
- browserwright/daemon/auth.py +395 -0
- browserwright/daemon/backends/__init__.py +59 -0
- browserwright/daemon/backends/base.py +120 -0
- browserwright/daemon/backends/cloud.py +222 -0
- browserwright/daemon/backends/env.py +119 -0
- browserwright/daemon/backends/extension.py +185 -0
- browserwright/daemon/backends/rdp.py +214 -0
- browserwright/daemon/cli.py +1437 -0
- browserwright/daemon/config.py +380 -0
- browserwright/daemon/doctor.py +179 -0
- browserwright/daemon/errors.py +34 -0
- browserwright/daemon/launch_chrome.py +353 -0
- browserwright/daemon/observability.py +181 -0
- browserwright/daemon/platforms.py +234 -0
- browserwright/daemon/resolver.py +72 -0
- browserwright/daemon/server/__init__.py +6 -0
- browserwright/daemon/server/daemon.py +229 -0
- browserwright/daemon/server/executor_registry.py +434 -0
- browserwright/daemon/server/extension_upstream.py +677 -0
- browserwright/daemon/server/facade.py +375 -0
- browserwright/daemon/server/facade_extension.py +969 -0
- browserwright/daemon/server/listener.py +1058 -0
- browserwright/daemon/server/proxy.py +1991 -0
- browserwright/daemon/server/relay.py +783 -0
- browserwright/daemon/server/state.py +432 -0
- browserwright/daemon/server/upstream.py +266 -0
- browserwright/daemon/userscripts.py +150 -0
- browserwright/discovery.py +213 -0
- browserwright/errors.py +177 -0
- browserwright/health.py +169 -0
- browserwright/install.py +628 -0
- browserwright/memory/__init__.py +15 -0
- browserwright/memory/_md.py +120 -0
- browserwright/memory/_yaml.py +217 -0
- browserwright/memory/global_mem.py +201 -0
- browserwright/memory/repl_mem.py +28 -0
- browserwright/memory/session_decisions.py +53 -0
- browserwright/memory/site_mem.py +381 -0
- browserwright/mode_b_client.py +590 -0
- browserwright/multitask.py +131 -0
- browserwright/output_schema.py +99 -0
- browserwright/primitives/__init__.py +67 -0
- browserwright/primitives/discovery_api.py +79 -0
- browserwright/primitives/http.py +42 -0
- browserwright/primitives/inspect.py +876 -0
- browserwright/primitives/interact.py +518 -0
- browserwright/primitives/page.py +556 -0
- browserwright/primitives/site.py +143 -0
- browserwright/release_install.py +466 -0
- browserwright/repl/__init__.py +6 -0
- browserwright/repl/_namespace.py +106 -0
- browserwright/repl/_smart_goto.py +236 -0
- browserwright/repl/inline.py +180 -0
- browserwright/repl/playwright_handle.py +449 -0
- browserwright/repl/snapshot.py +150 -0
- browserwright/session.py +229 -0
- browserwright/session_create.py +252 -0
- browserwright/session_ctx.py +24 -0
- browserwright/session_registry.py +133 -0
- browserwright/session_runtime.py +133 -0
- browserwright/site_skills_starter/github.com/SKILL.md +14 -0
- browserwright/site_skills_starter/github.com/memory.md +29 -0
- browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
- browserwright/site_skills_starter/google.com/SKILL.md +16 -0
- browserwright/site_skills_starter/google.com/memory.md +27 -0
- browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
- browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
- browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
- browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
- browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
- browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
- browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
- browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
- browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
- browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
- browserwright/skill_doc.py +140 -0
- browserwright/skill_runtime.md +194 -0
- browserwright/subscriptions.py +213 -0
- browserwright/task_runner.py +125 -0
- browserwright/version.py +117 -0
- browserwright-0.6.2.dist-info/METADATA +12 -0
- browserwright-0.6.2.dist-info/RECORD +98 -0
- browserwright-0.6.2.dist-info/WHEEL +5 -0
- browserwright-0.6.2.dist-info/entry_points.txt +3 -0
- browserwright-0.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Multi-task fan-out (v0.3).
|
|
2
|
+
|
|
3
|
+
Runs N tasks concurrently. Each one gets its own ``Session`` (and therefore
|
|
4
|
+
its own ws to the daemon, its own sessionId namespace, its own
|
|
5
|
+
``current_target_id``). The daemon v0.3 multi-client mux serialises traffic
|
|
6
|
+
into the single upstream Chrome ws; from Skill's point of view the tasks
|
|
7
|
+
are truly independent — `new_tab()` in task A doesn't yank the tab task B
|
|
8
|
+
is operating on.
|
|
9
|
+
|
|
10
|
+
This module is intentionally small. The hard work was done in #55 (the
|
|
11
|
+
``ContextVar``-backed ``with_session`` machinery). Here we just iterate.
|
|
12
|
+
|
|
13
|
+
Concurrency model
|
|
14
|
+
-----------------
|
|
15
|
+
Primitives are sync. The CDP transport is thread-safe (single ``send`` lock).
|
|
16
|
+
So we use a ``ThreadPoolExecutor`` rather than asyncio:
|
|
17
|
+
|
|
18
|
+
- Each worker thread enters ``with_session(isolated_session())`` and runs the task.
|
|
19
|
+
- Sessions are independent ``ContextVar`` slots (#55 covers thread isolation).
|
|
20
|
+
- Daemon assigns each ws its own client id, so per-thread sessionIds don't
|
|
21
|
+
collide on the wire either.
|
|
22
|
+
|
|
23
|
+
Layer 3 (cron / scheduler) shells out to either ``browserwright task ...``
|
|
24
|
+
one-at-a-time or — for bursty work — calls this helper from Python::
|
|
25
|
+
|
|
26
|
+
from browserwright.multitask import run_tasks_concurrent
|
|
27
|
+
rows = run_tasks_concurrent([
|
|
28
|
+
("ycombinator.com", "front_page", {"limit": 10}),
|
|
29
|
+
("wikipedia.org", "lookup", {"title": "Python"}),
|
|
30
|
+
], max_workers=4)
|
|
31
|
+
"""
|
|
32
|
+
from __future__ import annotations
|
|
33
|
+
|
|
34
|
+
import concurrent.futures
|
|
35
|
+
from typing import Any, Callable, Iterable, Optional
|
|
36
|
+
|
|
37
|
+
from .errors import BrowserwrightError
|
|
38
|
+
from .session import isolated_session, with_session
|
|
39
|
+
from .task_runner import run_task
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
TaskSpec = tuple[str, str, dict] # (site, name, kwargs)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class TaskResult(dict):
|
|
46
|
+
"""Single fan-out result. Acts as a dict for JSON friendliness::
|
|
47
|
+
|
|
48
|
+
{"site": "...", "name": "...", "ok": True/False,
|
|
49
|
+
"value": <return>, # only when ok
|
|
50
|
+
"error_type": "ClassName", # only when not ok
|
|
51
|
+
"error_msg": "...", # only when not ok
|
|
52
|
+
"elapsed_sec": float}
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _run_one(spec: TaskSpec) -> TaskResult:
|
|
57
|
+
"""Worker: build a fresh ``Session``, push it onto the ContextVar, run.
|
|
58
|
+
|
|
59
|
+
Each worker owns its CDP transport. We close it on exit so the daemon's
|
|
60
|
+
client slot is freed promptly. Daemon v0.3 doesn't enforce a single-client
|
|
61
|
+
cap, but releasing eagerly still helps the daemon's idle policy + uiState
|
|
62
|
+
accounting stay accurate.
|
|
63
|
+
"""
|
|
64
|
+
import time
|
|
65
|
+
site, name, kwargs = spec
|
|
66
|
+
t0 = time.monotonic()
|
|
67
|
+
sess = isolated_session()
|
|
68
|
+
try:
|
|
69
|
+
with with_session(sess):
|
|
70
|
+
value = run_task(site, name, **kwargs)
|
|
71
|
+
except BrowserwrightError as e:
|
|
72
|
+
return TaskResult(
|
|
73
|
+
site=site, name=name, ok=False,
|
|
74
|
+
error_type=type(e).__name__, error_msg=str(e),
|
|
75
|
+
elapsed_sec=round(time.monotonic() - t0, 3),
|
|
76
|
+
)
|
|
77
|
+
except Exception as e: # noqa: BLE001 — agent-facing catch-all
|
|
78
|
+
return TaskResult(
|
|
79
|
+
site=site, name=name, ok=False,
|
|
80
|
+
error_type=type(e).__name__, error_msg=str(e),
|
|
81
|
+
elapsed_sec=round(time.monotonic() - t0, 3),
|
|
82
|
+
)
|
|
83
|
+
finally:
|
|
84
|
+
sess.close()
|
|
85
|
+
return TaskResult(
|
|
86
|
+
site=site, name=name, ok=True, value=value,
|
|
87
|
+
elapsed_sec=round(time.monotonic() - t0, 3),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def run_tasks_concurrent(specs: Iterable[TaskSpec], *,
|
|
92
|
+
max_workers: int = 4,
|
|
93
|
+
warm_upstream: bool = False) -> list[TaskResult]:
|
|
94
|
+
"""Run every (site, name, kwargs) tuple concurrently. Returns one
|
|
95
|
+
``TaskResult`` per spec, in input order.
|
|
96
|
+
|
|
97
|
+
Exceptions never propagate out — each failure becomes an ``ok=False``
|
|
98
|
+
result. Layer 3 examines results and decides what to retry/log.
|
|
99
|
+
|
|
100
|
+
.. deprecated:: 0.3.0
|
|
101
|
+
The ``warm_upstream`` keyword is a no-op since the daemon shipped
|
|
102
|
+
the #76 pre-open buffer fix. The earlier Skill-side workaround
|
|
103
|
+
(sync probe on the main session before spawning workers) is no
|
|
104
|
+
longer needed: the daemon now per-client-buffers frames received
|
|
105
|
+
while ``upstream phase != CONNECTED`` and replays them after the
|
|
106
|
+
upstream ws opens (``PRE_OPEN_BUFFER_LIMIT=100``; overflow surfaces
|
|
107
|
+
as JSON-RPC error ``-32603``). The keyword is accepted for source
|
|
108
|
+
compatibility but has no effect; remove the argument from your
|
|
109
|
+
call site.
|
|
110
|
+
|
|
111
|
+
**Removal target: v0.6** (REVIEW.md F-17). After v0.6 ships the
|
|
112
|
+
keyword will be removed from the signature and any caller still
|
|
113
|
+
passing it will hit a ``TypeError``.
|
|
114
|
+
|
|
115
|
+
Notes
|
|
116
|
+
-----
|
|
117
|
+
* ``max_workers`` defaults to 4. Daemon v0.3 multi-client supports more
|
|
118
|
+
but Chrome itself gets stressed past ~8 concurrent navigations.
|
|
119
|
+
* The order of results matches the order of inputs (not completion
|
|
120
|
+
order) — predictable for the caller's downstream pipeline.
|
|
121
|
+
"""
|
|
122
|
+
_ = warm_upstream # accepted-but-ignored; see deprecation note above.
|
|
123
|
+
specs = list(specs)
|
|
124
|
+
if not specs:
|
|
125
|
+
return []
|
|
126
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
127
|
+
max_workers=min(max_workers, len(specs)),
|
|
128
|
+
thread_name_prefix="bs-task",
|
|
129
|
+
) as pool:
|
|
130
|
+
futures = [pool.submit(_run_one, s) for s in specs]
|
|
131
|
+
return [f.result() for f in futures]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Minimal JSON-Schema-subset validator for task ``OUTPUT_SCHEMA`` (v0.2).
|
|
2
|
+
|
|
3
|
+
We don't pull in ``jsonschema`` — task schemas in practice cover one of
|
|
4
|
+
five shapes: list-of-dicts, dict, scalar, optional/nullable, and unions.
|
|
5
|
+
The validator handles those plus enough
|
|
6
|
+
plumbing for nested ``items`` / ``properties``. If a task needs a richer
|
|
7
|
+
schema it can ``pip install jsonschema`` and write its own ``validate()``;
|
|
8
|
+
we don't paint into a corner.
|
|
9
|
+
|
|
10
|
+
Supported keywords:
|
|
11
|
+
- ``type``: ``"object" | "array" | "string" | "integer" | "number" |
|
|
12
|
+
"boolean" | "null"`` (or a list for union types)
|
|
13
|
+
- ``properties``: object → property-name → sub-schema
|
|
14
|
+
- ``required``: list of required property names
|
|
15
|
+
- ``additionalProperties``: bool (default True). When False, extra keys
|
|
16
|
+
cause a validation error.
|
|
17
|
+
- ``items``: array → sub-schema applied to each element
|
|
18
|
+
- ``enum``: list of allowed scalar values
|
|
19
|
+
- ``nullable``: bool — convenience, equivalent to ``type: [..., "null"]``
|
|
20
|
+
|
|
21
|
+
Failures raise ``BrowserwrightError`` with a path-qualified message so the
|
|
22
|
+
agent can tell the user *which* field failed.
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from .errors import BrowserwrightError
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
_TYPE_MAP = {
|
|
32
|
+
"object": dict,
|
|
33
|
+
"array": list,
|
|
34
|
+
"string": str,
|
|
35
|
+
"integer": int,
|
|
36
|
+
"number": (int, float),
|
|
37
|
+
"boolean": bool,
|
|
38
|
+
"null": type(None),
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class OutputSchemaError(BrowserwrightError):
|
|
43
|
+
exit_code = 3
|
|
44
|
+
|
|
45
|
+
def __init__(self, site: str, task: str, path: str, msg: str):
|
|
46
|
+
self.site, self.task, self.path, self.msg_short = site, task, path, msg
|
|
47
|
+
super().__init__(f"OUTPUT_SCHEMA mismatch in {site}/{task} at {path}: {msg}")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def validate(value: Any, schema: dict, *, site: str = "", task: str = "") -> None:
|
|
51
|
+
"""Raise ``OutputSchemaError`` on shape mismatch. Returns None on success."""
|
|
52
|
+
_check(value, schema, path="$", site=site, task=task)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _types_for(schema: dict) -> list:
|
|
56
|
+
t = schema.get("type")
|
|
57
|
+
if isinstance(t, list):
|
|
58
|
+
out = [_TYPE_MAP[k] for k in t if k in _TYPE_MAP]
|
|
59
|
+
elif isinstance(t, str):
|
|
60
|
+
out = [_TYPE_MAP[t]] if t in _TYPE_MAP else []
|
|
61
|
+
else:
|
|
62
|
+
out = []
|
|
63
|
+
if schema.get("nullable"):
|
|
64
|
+
out.append(type(None))
|
|
65
|
+
return out
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _check(value, schema, *, path, site, task):
|
|
69
|
+
if not isinstance(schema, dict):
|
|
70
|
+
return
|
|
71
|
+
types = _types_for(schema)
|
|
72
|
+
if types:
|
|
73
|
+
# bool is a subclass of int in Python; treat them as distinct.
|
|
74
|
+
if int in types and bool not in types and isinstance(value, bool):
|
|
75
|
+
raise OutputSchemaError(site, task, path, f"expected {types}, got bool")
|
|
76
|
+
if not isinstance(value, tuple(types)):
|
|
77
|
+
raise OutputSchemaError(
|
|
78
|
+
site, task, path,
|
|
79
|
+
f"expected one of {[t.__name__ if isinstance(t, type) else t for t in types]}, "
|
|
80
|
+
f"got {type(value).__name__}",
|
|
81
|
+
)
|
|
82
|
+
if "enum" in schema and value not in schema["enum"]:
|
|
83
|
+
raise OutputSchemaError(site, task, path,
|
|
84
|
+
f"value {value!r} not in enum {schema['enum']!r}")
|
|
85
|
+
if isinstance(value, dict) and "properties" in schema:
|
|
86
|
+
props = schema.get("properties", {})
|
|
87
|
+
for key in schema.get("required", []):
|
|
88
|
+
if key not in value:
|
|
89
|
+
raise OutputSchemaError(site, task, f"{path}.{key}", "missing required key")
|
|
90
|
+
for k, v in value.items():
|
|
91
|
+
sub = props.get(k)
|
|
92
|
+
if sub is not None:
|
|
93
|
+
_check(v, sub, path=f"{path}.{k}", site=site, task=task)
|
|
94
|
+
elif schema.get("additionalProperties") is False:
|
|
95
|
+
raise OutputSchemaError(site, task, f"{path}.{k}", "unexpected key")
|
|
96
|
+
if isinstance(value, list) and "items" in schema:
|
|
97
|
+
items_schema = schema["items"]
|
|
98
|
+
for i, item in enumerate(value):
|
|
99
|
+
_check(item, items_schema, path=f"{path}[{i}]", site=site, task=task)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""v0.5.1 primitive surface (spec §A.2).
|
|
2
|
+
|
|
3
|
+
This module is what the REPL / inline / task entry points pull into
|
|
4
|
+
their exec globals. Keep it boring — no decorators, no metaprogramming —
|
|
5
|
+
so the agent gets stable, greppable names.
|
|
6
|
+
|
|
7
|
+
v0.5.1 (F-4 catch-up) added 13 primitives previously documented but not
|
|
8
|
+
re-exported: type_text / press_key / scroll / fill_input / dispatch_key
|
|
9
|
+
/ upload_file / wait_for_element / wait_for_network_idle / drain_events
|
|
10
|
+
/ ensure_real_tab / iframe_target / http_get plus three Layer-3 helpers
|
|
11
|
+
(list_site_skills / load_site_skill / run_task). Three primitives remain
|
|
12
|
+
deferred to v0.6 with explicit footnotes in design.md §A.2:
|
|
13
|
+
handle_dialog, try_recover_from_drift, plus the broader Layer-3 drift
|
|
14
|
+
recovery scaffold.
|
|
15
|
+
"""
|
|
16
|
+
from .discovery_api import ( # noqa: F401
|
|
17
|
+
list_site_skills,
|
|
18
|
+
load_site_skill,
|
|
19
|
+
run_task,
|
|
20
|
+
)
|
|
21
|
+
from .http import http_get # noqa: F401
|
|
22
|
+
from .inspect import ( # noqa: F401
|
|
23
|
+
capture_screenshot,
|
|
24
|
+
cdp,
|
|
25
|
+
describe_page,
|
|
26
|
+
diff_snapshot,
|
|
27
|
+
page_info,
|
|
28
|
+
snapshot,
|
|
29
|
+
)
|
|
30
|
+
from .interact import ( # noqa: F401
|
|
31
|
+
click_at_xy,
|
|
32
|
+
dispatch_key,
|
|
33
|
+
drain_events,
|
|
34
|
+
fill_input,
|
|
35
|
+
js,
|
|
36
|
+
press_key,
|
|
37
|
+
scroll,
|
|
38
|
+
type_text,
|
|
39
|
+
upload_file,
|
|
40
|
+
wait_for_element,
|
|
41
|
+
wait_for_network_idle,
|
|
42
|
+
)
|
|
43
|
+
from .page import ( # noqa: F401
|
|
44
|
+
attach_active,
|
|
45
|
+
attach_readonly,
|
|
46
|
+
close_tab,
|
|
47
|
+
current_page,
|
|
48
|
+
current_tab,
|
|
49
|
+
ensure_real_tab,
|
|
50
|
+
goto_url,
|
|
51
|
+
iframe_target,
|
|
52
|
+
list_tabs,
|
|
53
|
+
new_tab,
|
|
54
|
+
open,
|
|
55
|
+
open_background,
|
|
56
|
+
reload,
|
|
57
|
+
switch_tab,
|
|
58
|
+
wait,
|
|
59
|
+
wait_for_load,
|
|
60
|
+
)
|
|
61
|
+
from .site import ( # noqa: F401
|
|
62
|
+
bootstrap_site,
|
|
63
|
+
memory_read,
|
|
64
|
+
remember,
|
|
65
|
+
remember_global,
|
|
66
|
+
remember_preference,
|
|
67
|
+
)
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Discovery / Layer-3 task primitives surfaced into the REPL namespace.
|
|
2
|
+
|
|
3
|
+
These are thin re-exports + tiny convenience wrappers around
|
|
4
|
+
``browserwright.discovery`` and ``browserwright.task_runner`` so an agent
|
|
5
|
+
typing ``list_site_skills()`` / ``run_task("github.com/list_issues")`` /
|
|
6
|
+
``load_site_skill("github.com")`` from the REPL or inline execution
|
|
7
|
+
doesn't get a NameError. Spec §A.2 v0.5.1 (F-4 catch-up).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import importlib.util
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
from ..discovery import find_task_path, list_tasks
|
|
15
|
+
from ..memory.site_mem import host_stem
|
|
16
|
+
from ..task_runner import run_task as _run_task
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def list_site_skills(*, site: Optional[str] = None,
|
|
20
|
+
query: Optional[str] = None) -> list[dict]:
|
|
21
|
+
"""List bundled + user-installed tasks (alias of CLI ``list-tasks``).
|
|
22
|
+
|
|
23
|
+
Returns dicts with ``site``, ``name``, ``desc``, ``path`` and the
|
|
24
|
+
discovery scoring fields. ``site`` filters by stem (eTLD+1 or
|
|
25
|
+
legacy alias); ``query`` does substring scoring against task
|
|
26
|
+
metadata.
|
|
27
|
+
"""
|
|
28
|
+
return list_tasks(site=site, query=query)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def load_site_skill(site: str, name: Optional[str] = None) -> Any:
|
|
32
|
+
"""Import a site-skill task module so its ``run()``, ``ARGS``,
|
|
33
|
+
``OUTPUT_SCHEMA``, etc. are reachable as attributes.
|
|
34
|
+
|
|
35
|
+
Two shapes:
|
|
36
|
+
- ``load_site_skill("github.com/list_issues")`` (slash form) →
|
|
37
|
+
load that specific task.
|
|
38
|
+
- ``load_site_skill("github.com", "list_issues")`` → same, split.
|
|
39
|
+
|
|
40
|
+
Pure module import; no ``run()`` invocation. Use ``run_task()`` to
|
|
41
|
+
actually execute. Path is resolved via ``find_task_path`` so the
|
|
42
|
+
eTLD+1 stem fallback applies (Bug 1 v0.3.1).
|
|
43
|
+
"""
|
|
44
|
+
if name is None and "/" in site:
|
|
45
|
+
site, name = site.split("/", 1)
|
|
46
|
+
if name is None:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
"load_site_skill: missing task name. Pass "
|
|
49
|
+
"'<site>/<name>' or two positional args."
|
|
50
|
+
)
|
|
51
|
+
path = find_task_path(host_stem(site), name)
|
|
52
|
+
mod_name = f"browserwright_task_{host_stem(site).replace('.', '_')}_{name}"
|
|
53
|
+
spec = importlib.util.spec_from_file_location(mod_name, path)
|
|
54
|
+
if spec is None or spec.loader is None:
|
|
55
|
+
raise ImportError(f"could not build importlib spec for {path}")
|
|
56
|
+
mod = importlib.util.module_from_spec(spec)
|
|
57
|
+
spec.loader.exec_module(mod)
|
|
58
|
+
return mod
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def run_task(site: str, name: Optional[str] = None, **kwargs) -> Any:
|
|
62
|
+
"""Execute a site-skill's ``run(args, ctx=None)`` and return its
|
|
63
|
+
value. Two argument shapes:
|
|
64
|
+
|
|
65
|
+
- ``run_task("github.com/list_issues", state="open")`` (slash form)
|
|
66
|
+
- ``run_task("github.com", "list_issues", state="open")`` (split)
|
|
67
|
+
|
|
68
|
+
Re-exports ``browserwright.task_runner.run_task`` so agents calling
|
|
69
|
+
this through the REPL namespace get the same isolation semantics as
|
|
70
|
+
the CLI ``task`` subcommand.
|
|
71
|
+
"""
|
|
72
|
+
if name is None and "/" in site:
|
|
73
|
+
site, name = site.split("/", 1)
|
|
74
|
+
if name is None:
|
|
75
|
+
raise ValueError(
|
|
76
|
+
"run_task: missing task name. Pass '<site>/<name>' or two "
|
|
77
|
+
"positional args."
|
|
78
|
+
)
|
|
79
|
+
return _run_task(site, name, **kwargs)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Pure-HTTP helpers — no browser required.
|
|
2
|
+
|
|
3
|
+
``http_get`` is the canonical "I just want the bytes of a page" escape
|
|
4
|
+
hatch. Spec §A.2 / browser-harness pattern: pair with
|
|
5
|
+
``ThreadPoolExecutor`` for bulk static-page scraping (e.g. paginated
|
|
6
|
+
list pages) — opening a browser for every page is wasteful.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import gzip
|
|
11
|
+
import os
|
|
12
|
+
import urllib.request
|
|
13
|
+
from typing import Optional
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def http_get(url: str, *, headers: Optional[dict] = None,
|
|
17
|
+
timeout: float = 20.0) -> str:
|
|
18
|
+
"""Plain HTTP GET. Decodes gzip automatically; returns text.
|
|
19
|
+
|
|
20
|
+
When ``BROWSER_USE_API_KEY`` is set, prefers the optional ``fetch_use``
|
|
21
|
+
proxy (handles bot detection / residential proxies / retries) if
|
|
22
|
+
installed; otherwise falls back to stdlib ``urllib`` with a vanilla
|
|
23
|
+
Mozilla UA + gzip Accept-Encoding header.
|
|
24
|
+
"""
|
|
25
|
+
if os.environ.get("BROWSER_USE_API_KEY"):
|
|
26
|
+
try:
|
|
27
|
+
from fetch_use import fetch_sync # type: ignore[import-not-found]
|
|
28
|
+
return fetch_sync(
|
|
29
|
+
url, headers=headers, timeout_ms=int(timeout * 1000),
|
|
30
|
+
).text
|
|
31
|
+
except ImportError:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
h = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip"}
|
|
35
|
+
if headers:
|
|
36
|
+
h.update(headers)
|
|
37
|
+
req = urllib.request.Request(url, headers=h)
|
|
38
|
+
with urllib.request.urlopen(req, timeout=timeout) as r:
|
|
39
|
+
data = r.read()
|
|
40
|
+
if r.headers.get("Content-Encoding") == "gzip":
|
|
41
|
+
data = gzip.decompress(data)
|
|
42
|
+
return data.decode()
|