browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. browserwright/__init__.py +33 -0
  2. browserwright/__main__.py +6 -0
  3. browserwright/_executor/__init__.py +47 -0
  4. browserwright/_executor/__main__.py +9 -0
  5. browserwright/_executor/client.py +127 -0
  6. browserwright/_executor/process.py +652 -0
  7. browserwright/_executor/protocol.py +152 -0
  8. browserwright/api.py +66 -0
  9. browserwright/cdp.py +285 -0
  10. browserwright/cli.py +741 -0
  11. browserwright/daemon/__init__.py +8 -0
  12. browserwright/daemon/_ipc.py +444 -0
  13. browserwright/daemon/active_tab.py +183 -0
  14. browserwright/daemon/auth.py +395 -0
  15. browserwright/daemon/backends/__init__.py +59 -0
  16. browserwright/daemon/backends/base.py +120 -0
  17. browserwright/daemon/backends/cloud.py +222 -0
  18. browserwright/daemon/backends/env.py +119 -0
  19. browserwright/daemon/backends/extension.py +185 -0
  20. browserwright/daemon/backends/rdp.py +214 -0
  21. browserwright/daemon/cli.py +1437 -0
  22. browserwright/daemon/config.py +380 -0
  23. browserwright/daemon/doctor.py +179 -0
  24. browserwright/daemon/errors.py +34 -0
  25. browserwright/daemon/launch_chrome.py +353 -0
  26. browserwright/daemon/observability.py +181 -0
  27. browserwright/daemon/platforms.py +234 -0
  28. browserwright/daemon/resolver.py +72 -0
  29. browserwright/daemon/server/__init__.py +6 -0
  30. browserwright/daemon/server/daemon.py +229 -0
  31. browserwright/daemon/server/executor_registry.py +434 -0
  32. browserwright/daemon/server/extension_upstream.py +677 -0
  33. browserwright/daemon/server/facade.py +375 -0
  34. browserwright/daemon/server/facade_extension.py +969 -0
  35. browserwright/daemon/server/listener.py +1058 -0
  36. browserwright/daemon/server/proxy.py +1991 -0
  37. browserwright/daemon/server/relay.py +783 -0
  38. browserwright/daemon/server/state.py +432 -0
  39. browserwright/daemon/server/upstream.py +266 -0
  40. browserwright/daemon/userscripts.py +150 -0
  41. browserwright/discovery.py +213 -0
  42. browserwright/errors.py +177 -0
  43. browserwright/health.py +169 -0
  44. browserwright/install.py +628 -0
  45. browserwright/memory/__init__.py +15 -0
  46. browserwright/memory/_md.py +120 -0
  47. browserwright/memory/_yaml.py +217 -0
  48. browserwright/memory/global_mem.py +201 -0
  49. browserwright/memory/repl_mem.py +28 -0
  50. browserwright/memory/session_decisions.py +53 -0
  51. browserwright/memory/site_mem.py +381 -0
  52. browserwright/mode_b_client.py +590 -0
  53. browserwright/multitask.py +131 -0
  54. browserwright/output_schema.py +99 -0
  55. browserwright/primitives/__init__.py +67 -0
  56. browserwright/primitives/discovery_api.py +79 -0
  57. browserwright/primitives/http.py +42 -0
  58. browserwright/primitives/inspect.py +876 -0
  59. browserwright/primitives/interact.py +518 -0
  60. browserwright/primitives/page.py +556 -0
  61. browserwright/primitives/site.py +143 -0
  62. browserwright/release_install.py +466 -0
  63. browserwright/repl/__init__.py +6 -0
  64. browserwright/repl/_namespace.py +106 -0
  65. browserwright/repl/_smart_goto.py +236 -0
  66. browserwright/repl/inline.py +180 -0
  67. browserwright/repl/playwright_handle.py +449 -0
  68. browserwright/repl/snapshot.py +150 -0
  69. browserwright/session.py +229 -0
  70. browserwright/session_create.py +252 -0
  71. browserwright/session_ctx.py +24 -0
  72. browserwright/session_registry.py +133 -0
  73. browserwright/session_runtime.py +133 -0
  74. browserwright/site_skills_starter/github.com/SKILL.md +14 -0
  75. browserwright/site_skills_starter/github.com/memory.md +29 -0
  76. browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
  77. browserwright/site_skills_starter/google.com/SKILL.md +16 -0
  78. browserwright/site_skills_starter/google.com/memory.md +27 -0
  79. browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
  80. browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
  81. browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
  82. browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
  83. browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
  84. browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
  85. browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
  86. browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
  87. browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
  88. browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
  89. browserwright/skill_doc.py +140 -0
  90. browserwright/skill_runtime.md +194 -0
  91. browserwright/subscriptions.py +213 -0
  92. browserwright/task_runner.py +125 -0
  93. browserwright/version.py +117 -0
  94. browserwright-0.6.2.dist-info/METADATA +12 -0
  95. browserwright-0.6.2.dist-info/RECORD +98 -0
  96. browserwright-0.6.2.dist-info/WHEEL +5 -0
  97. browserwright-0.6.2.dist-info/entry_points.txt +3 -0
  98. browserwright-0.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,133 @@
1
+ """Transparent reconnect-recovery for a session's tab binding.
2
+
3
+ A session's durable extension anchor is its Chrome tab-group id. The ledger
4
+ record carries a ``runtime`` cache (``current_target_id``, ``group_id``,
5
+ ``owned_tab_ids``, ``updated_at``) as a fast path — the source of truth is the
6
+ live tab group keyed by that numeric id, recoverable via the daemon verb
7
+ ``BrowserwrightDaemon.recoverSession``.
8
+
9
+ These helpers let primitives re-attach to a session's tab across daemon
10
+ daemon restarts / extension reconnects / new ``bs run`` processes without the caller
11
+ doing anything: ``ensure_session_target`` runs a 3-step fallback (in-process →
12
+ ledger.runtime fast path → group-id recovery).
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import time
17
+ from collections import deque
18
+ from typing import Optional
19
+
20
+ from . import session_registry as reg
21
+ from .errors import CDPError
22
+
23
+
24
+ def _resolve_sid(sess) -> Optional[str]:
25
+ """Best-effort current session id from the bound record."""
26
+ rec = getattr(sess, "session_record", None)
27
+ if isinstance(rec, dict) and rec.get("id"):
28
+ return rec["id"]
29
+ return None
30
+
31
+
32
+ def _resolve_record(sess) -> Optional[dict]:
33
+ """Best-effort current session record (fresh read from the ledger)."""
34
+ sid = _resolve_sid(sess)
35
+ if not sid:
36
+ return None
37
+ return reg.get(sid)
38
+
39
+
40
+ def persist_target(target_id: str, *, group_id: Optional[int] = None,
41
+ sess=None) -> None:
42
+ """Cache the current tab binding in the ledger record's ``runtime`` field.
43
+
44
+ Called wherever a primitive sets ``current_target_id`` so a later process
45
+ can fast-path re-attach without querying the tab group."""
46
+ if sess is None:
47
+ from .session import current_session
48
+ sess = current_session()
49
+ sid = _resolve_sid(sess)
50
+ if not sid:
51
+ return
52
+ runtime = {
53
+ "current_target_id": target_id,
54
+ "group_id": group_id,
55
+ "owned_tab_ids": [],
56
+ "updated_at": time.time(),
57
+ }
58
+ try:
59
+ reg.update(sid, runtime=runtime)
60
+ except Exception:
61
+ # Caching is best-effort; never let a ledger write break a primitive.
62
+ pass
63
+
64
+
65
+ def register_recovered(sess, payload: dict) -> Optional[str]:
66
+ """Register a daemon-minted session/target in the local CDPSession tables.
67
+
68
+ Factored out of ``open_background``'s post-open registration so recovery
69
+ can reuse it. Returns the targetId (or None on a malformed payload)."""
70
+ target_id = payload.get("targetId")
71
+ session_id = payload.get("sessionId")
72
+ if not target_id or not session_id:
73
+ return None
74
+ cdp = sess.cdp
75
+ cdp._sessions[target_id] = session_id
76
+ cdp._events.setdefault(session_id, deque(maxlen=1024))
77
+ sess.current_target_id = target_id
78
+ return target_id
79
+
80
+
81
+ def ensure_session_target(sess) -> Optional[str]:
82
+ """Transparent 3-step recovery of the session's attached tab.
83
+
84
+ 1. ``sess.current_target_id`` already set → return it (in-process).
85
+ 2. ledger ``runtime.current_target_id`` → try ``cdp.attach(tid)`` (FAST
86
+ PATH, no group query). The daemon auto-reattaches the debugger; a
87
+ stale/closed tab raises → fall through.
88
+ 3. group id: ``BrowserwrightDaemon.recoverSession`` by persisted group id
89
+ → register + persist the new binding. On CDPError / empty group return
90
+ None.
91
+
92
+ Returns the targetId, or None when nothing could be recovered (brand-new
93
+ session with no tabs yet)."""
94
+ if sess.current_target_id:
95
+ return sess.current_target_id
96
+
97
+ rec = _resolve_record(sess)
98
+
99
+ # Step 2: ledger.runtime fast path.
100
+ if isinstance(rec, dict):
101
+ runtime = rec.get("runtime") or {}
102
+ tid = runtime.get("current_target_id")
103
+ if tid:
104
+ try:
105
+ sess.cdp.attach(tid)
106
+ sess.current_target_id = tid
107
+ return tid
108
+ except CDPError:
109
+ pass # stale/closed tab — fall through to group recovery
110
+
111
+ # Step 3: durable group recovery by the persisted numeric groupId — NOT the
112
+ # title (names aren't unique; the session = the tab group, keyed by id). The
113
+ # groupId is cached in ledger.runtime.group_id on every open; without it
114
+ # there's nothing to recover (a brand-new session, or Chrome itself
115
+ # restarted and reassigned group ids — which needs no recovery).
116
+ runtime = (rec.get("runtime") or {}) if isinstance(rec, dict) else {}
117
+ gid = runtime.get("group_id")
118
+ sid = rec.get("id") if isinstance(rec, dict) else _resolve_sid(sess)
119
+ if not isinstance(gid, int) or gid < 0:
120
+ return None
121
+ try:
122
+ payload = sess.cdp.send(
123
+ "BrowserwrightDaemon.recoverSession", groupId=gid, bsSession=sid,
124
+ )
125
+ except CDPError:
126
+ return None
127
+ if not payload:
128
+ return None
129
+ target_id = register_recovered(sess, payload)
130
+ if target_id is None:
131
+ return None
132
+ persist_target(target_id, group_id=payload.get("groupId"), sess=sess)
133
+ return target_id
@@ -0,0 +1,14 @@
1
+ # github.com
2
+
3
+ Browse public GitHub issues + repos without login.
4
+
5
+ ## Conventions
6
+
7
+ - Public issue / PR listings render fully without login — no auth wall.
8
+ - Use `https://github.com/<owner>/<repo>/issues?q=...` and parse the
9
+ list view; the API would be more polite but Skill is fundamentally
10
+ browser-first.
11
+
12
+ ## Tasks
13
+
14
+ - `list_issues` — owner/repo (+ optional state filter) → list of issues.
@@ -0,0 +1,29 @@
1
+ ---
2
+ site: github
3
+ host_patterns: ["github.com", "www.github.com"]
4
+ aliases: ["github", "pull request", "issue tracker", "代码仓库"]
5
+ last_updated: 2026-05-18
6
+ ---
7
+
8
+ # github.com site memory
9
+
10
+ ## 顶层 URL 结构
11
+
12
+ - repo home: https://github.com/<owner>/<repo>
13
+ - issue list: https://github.com/<owner>/<repo>/issues?state=open
14
+ - PR list: https://github.com/<owner>/<repo>/pulls?state=open
15
+
16
+ ## 稳定 selectors
17
+
18
+ - 列表项: `div[aria-label="Issues"] [data-testid="issue-pr-title-link"]` 或经典 `a.js-navigation-open`
19
+ - 标题文字: `[data-testid="issue-pr-title-link"]`
20
+ - 状态徽标: `[data-testid="issue-state"]`
21
+ - 数字编号: `a[id^=issue_]`
22
+
23
+ ## Known traps
24
+
25
+ - React-rendered "new issues UI" 在某些仓库上替代了 classic — 选择器要双路。
26
+ - 私有仓库会重定向到 login → AuthWall。
27
+ - `https://github.com/issues` 在未登录态下也会跳 login。
28
+
29
+ ## Notes
@@ -0,0 +1,55 @@
1
+ """List public GitHub issues for owner/repo without login.
2
+
3
+ Phase C surface: drives the injected Playwright ``page`` (reused, navigated in
4
+ place); reads the issue list with ``page.evaluate``.
5
+ """
6
+
7
+ ARGS = {
8
+ "owner": {"type": "str", "required": True, "desc": "github owner / org name"},
9
+ "repo": {"type": "str", "required": True, "desc": "repo name"},
10
+ "state": {"type": "str", "required": False, "default": "open",
11
+ "desc": "open / closed / all"},
12
+ "limit": {"type": "int", "required": False, "default": 20},
13
+ }
14
+
15
+ OUTPUT = "list[{number: int, title: str, url: str, state: str}]"
16
+ TAGS = ["github", "issues", "list"]
17
+ REQUIRES_LOGIN = False
18
+ ESTIMATED_DURATION_SEC = 10
19
+ LAST_VERIFIED = "2026-05-25"
20
+
21
+
22
+ def selftest():
23
+ page.goto("https://github.com/")
24
+ return page.url.startswith("https://github.com")
25
+
26
+
27
+ def run(args, ctx=None):
28
+ pg = ctx.page if ctx is not None and getattr(ctx, "page", None) else page
29
+
30
+ owner, repo = args["owner"], args["repo"]
31
+ state = args.get("state", "open")
32
+ limit = int(args.get("limit", 20))
33
+ url = f"https://github.com/{owner}/{repo}/issues?q=is%3Aissue+state%3A{state}"
34
+ pg.goto(url)
35
+ results = pg.evaluate(
36
+ """
37
+ (limit) => {
38
+ const cards = Array.from(document.querySelectorAll(
39
+ '[data-testid="issue-pr-title-link"], a.js-navigation-open'
40
+ )).filter(a => /\\/issues\\/\\d+$/.test(a.getAttribute('href') || ''));
41
+ return cards.slice(0, limit).map(a => {
42
+ const m = a.getAttribute('href').match(/\\/issues\\/(\\d+)$/);
43
+ const stateBadge = a.closest('li, div')?.querySelector('[data-testid="issue-state"]');
44
+ return {
45
+ number: m ? parseInt(m[1], 10) : null,
46
+ title: a.innerText.trim(),
47
+ url: new URL(a.getAttribute('href'), location.origin).toString(),
48
+ state: stateBadge ? stateBadge.innerText.trim().toLowerCase() : '',
49
+ };
50
+ });
51
+ }
52
+ """,
53
+ limit,
54
+ )
55
+ return results or []
@@ -0,0 +1,16 @@
1
+ # google.com
2
+
3
+ Search Google and extract the top organic results.
4
+
5
+ ## Conventions
6
+
7
+ - Navigate the bound `page` in place with `page.goto(...)` — reuse the working
8
+ tab, don't open a new one per query.
9
+ - The result selector `div.g h3` is stable across the consumer SERP. The mobile
10
+ layout uses `div[data-hveid] h3` — `tasks/search.py` falls back if needed.
11
+ - Don't try to login. Google's bot detection is aggressive; skill stays
12
+ unauthenticated and reads public SERP only.
13
+
14
+ ## Tasks
15
+
16
+ - `search` — query → `list[{title, url, snippet}]`
@@ -0,0 +1,27 @@
1
+ ---
2
+ site: google
3
+ host_patterns: ["google.com", "www.google.com"]
4
+ aliases: ["谷歌", "google search", "search the web"]
5
+ last_updated: 2026-05-18
6
+ ---
7
+
8
+ # google.com site memory
9
+
10
+ ## 顶层 URL 结构
11
+
12
+ - 搜索: https://www.google.com/search?q=<query>&hl=en
13
+ - 图片搜索: https://www.google.com/search?tbm=isch&q=<query>
14
+
15
+ ## 稳定 selectors
16
+
17
+ - 结果块: `div.g`
18
+ - 结果标题: `div.g h3` / `div[data-hveid] h3`
19
+ - 结果链接: `div.g a[href^=http]`
20
+ - 结果摘要: `div.g div[data-content-feature="1"]`
21
+
22
+ ## Known traps
23
+
24
+ - 偶尔 SERP 被 "before you continue" cookie consent 拦截:先 dismiss `button#L2AGLb` 或 `form[action*="consent"]`。
25
+ - 中文 query 在某些区域返回 "did you mean…" 重定向,结果数为 0;要识别并提示 user。
26
+
27
+ ## Notes
@@ -0,0 +1,53 @@
1
+ """Search Google for `query` and return the top N organic results.
2
+
3
+ Phase C surface: drives the injected Playwright ``page`` (reused, navigated in
4
+ place) and reads results via ``page.evaluate``.
5
+ """
6
+
7
+ ARGS = {
8
+ "query": {"type": "str", "required": True, "desc": "search query"},
9
+ "limit": {"type": "int", "required": False, "default": 10},
10
+ "hl": {"type": "str", "required": False, "default": "en", "desc": "interface language"},
11
+ }
12
+
13
+ OUTPUT = "list[{title: str, url: str, snippet: str}]"
14
+ TAGS = ["search", "general"]
15
+ REQUIRES_LOGIN = False
16
+ ESTIMATED_DURATION_SEC = 8
17
+ LAST_VERIFIED = "2026-05-25"
18
+
19
+
20
+ def selftest():
21
+ page.goto("https://www.google.com/?hl=en")
22
+ return "google.com" in page.url
23
+
24
+
25
+ def run(args, ctx=None):
26
+ from urllib.parse import quote_plus
27
+
28
+ pg = ctx.page if ctx is not None and getattr(ctx, "page", None) else page
29
+
30
+ q = args["query"]
31
+ limit = int(args.get("limit", 10))
32
+ hl = args.get("hl", "en")
33
+ pg.goto(f"https://www.google.com/search?q={quote_plus(q)}&hl={hl}")
34
+ results = pg.evaluate(
35
+ """
36
+ (limit) => Array.from(document.querySelectorAll('div.g, div[data-hveid]'))
37
+ .map(div => {
38
+ const a = div.querySelector('a[href^="http"]');
39
+ const h = div.querySelector('h3');
40
+ const sn = div.querySelector('div[data-content-feature="1"], div[role="link"] + div');
41
+ if (!a || !h) return null;
42
+ return {
43
+ title: h.innerText.trim(),
44
+ url: a.href,
45
+ snippet: (sn && sn.innerText.trim()) || '',
46
+ };
47
+ })
48
+ .filter(Boolean)
49
+ .slice(0, limit)
50
+ """,
51
+ limit,
52
+ )
53
+ return results or []
@@ -0,0 +1,7 @@
1
+ # producthunt.com
2
+
3
+ Today's Product Hunt front page.
4
+
5
+ ## Tasks
6
+
7
+ - `today` — `limit?` → list of today's products with vote counts and tagline.
@@ -0,0 +1,26 @@
1
+ ---
2
+ site: producthunt
3
+ host_patterns: ["producthunt.com", "www.producthunt.com"]
4
+ aliases: ["product hunt", "ph", "产品猎手", "今日新品"]
5
+ last_updated: 2026-05-18
6
+ ---
7
+
8
+ # producthunt.com site memory
9
+
10
+ ## 顶层 URL 结构
11
+
12
+ - 今日榜: https://www.producthunt.com/
13
+
14
+ ## Known traps
15
+
16
+ - Cookie consent 横幅可能挡住第一屏;如果选择器返回空,先 dismiss 一次再重试。
17
+ - ProductHunt 偶尔 A/B 切 React tree,hard-coded selector 会变;当 selector
18
+ miss 时改用 anchor `a[href^="/posts/"]` 兜底。
19
+
20
+ ## 稳定 selectors
21
+
22
+ - 卡片: `[data-test^="post-item"]` 或 `a[href^="/posts/"]`
23
+ - 名称: 卡片内 `a[href^="/posts/"]` 的 innerText
24
+ - vote 数: 卡片内 `[data-test="vote-button"]` 的 innerText
25
+
26
+ ## Notes
@@ -0,0 +1,64 @@
1
+ """Today's top products on Product Hunt.
2
+
3
+ Phase C surface: drives the injected Playwright ``page`` (reused, navigated in
4
+ place); scrapes the feed with ``page.evaluate``.
5
+ """
6
+
7
+ ARGS = {
8
+ "limit": {"type": "int", "required": False, "default": 20},
9
+ }
10
+
11
+ OUTPUT = "list[{name: str, url: str, tagline: str, votes: int}]"
12
+ TAGS = ["producthunt", "feed", "launches"]
13
+ REQUIRES_LOGIN = False
14
+ ESTIMATED_DURATION_SEC = 10
15
+ LAST_VERIFIED = "2026-05-25"
16
+
17
+
18
+ def selftest():
19
+ page.goto("https://www.producthunt.com/")
20
+ return "producthunt" in page.url
21
+
22
+
23
+ def run(args, ctx=None):
24
+ pg = ctx.page if ctx is not None and getattr(ctx, "page", None) else page
25
+
26
+ limit = int(args.get("limit", 20))
27
+ pg.goto("https://www.producthunt.com/")
28
+ products = pg.evaluate(
29
+ """
30
+ () => {
31
+ // Hand-rolled scrape: walk every /posts/<slug> anchor on the page
32
+ // and grab a sibling vote count if we can find one.
33
+ const seen = new Set();
34
+ const out = [];
35
+ const anchors = document.querySelectorAll('a[href^="/posts/"]');
36
+ anchors.forEach(a => {
37
+ const slug = a.getAttribute('href').replace(/^\\/posts\\//, '').split(/[?#]/)[0];
38
+ if (!slug || seen.has(slug)) return;
39
+ const card = a.closest('[data-test^="post-item"], li, article, div');
40
+ if (!card) return;
41
+ const name = a.innerText.trim();
42
+ if (!name) return;
43
+ let votes = 0;
44
+ const numberEl = card.querySelector('[data-test="vote-button"], button');
45
+ if (numberEl) {
46
+ const m = (numberEl.innerText || '').replace(/,/g, '').match(/\\d+/);
47
+ if (m) votes = parseInt(m[0], 10);
48
+ }
49
+ let tagline = '';
50
+ const para = card.querySelector('p, [class*="tagline"], [class*="description"]');
51
+ if (para && para.innerText) tagline = para.innerText.trim();
52
+ out.push({
53
+ name,
54
+ url: 'https://www.producthunt.com/posts/' + slug,
55
+ tagline,
56
+ votes,
57
+ });
58
+ seen.add(slug);
59
+ });
60
+ return out;
61
+ }
62
+ """
63
+ )
64
+ return (products or [])[:limit]
@@ -0,0 +1,7 @@
1
+ # wikipedia.org
2
+
3
+ Quick lookup of Wikipedia articles by title.
4
+
5
+ ## Tasks
6
+
7
+ - `lookup` — title (+ optional `lang`) → `{title, summary, url, sections}`
@@ -0,0 +1,22 @@
1
+ ---
2
+ site: wikipedia
3
+ host_patterns: ["wikipedia.org", "en.wikipedia.org"]
4
+ aliases: ["wikipedia", "wiki", "维基百科"]
5
+ last_updated: 2026-05-18
6
+ ---
7
+
8
+ # wikipedia.org site memory
9
+
10
+ ## 顶层 URL 结构
11
+
12
+ - 英文: https://en.wikipedia.org/wiki/<Title_With_Underscores>
13
+ - 中文: https://zh.wikipedia.org/wiki/<Title>
14
+ - 检索: https://<lang>.wikipedia.org/w/index.php?search=<query>&fulltext=1
15
+
16
+ ## 稳定 selectors
17
+
18
+ - 摘要段: `div.mw-parser-output > p:not(.mw-empty-elt)`
19
+ - 章节标题: `h2 > span.mw-headline`
20
+ - 第一段: `div.mw-parser-output > p:not(.mw-empty-elt):first-of-type`
21
+
22
+ ## Notes
@@ -0,0 +1,55 @@
1
+ """Lookup a Wikipedia article and return its first-paragraph summary + section TOC.
2
+
3
+ Phase C surface: drives the injected Playwright ``page`` (bound to the session's
4
+ current tab — reused, navigated in place). No ``new_tab`` / ``js`` primitives.
5
+ """
6
+
7
+ ARGS = {
8
+ "title": {"type": "str", "required": True, "desc": "article title (free text)"},
9
+ "lang": {"type": "str", "required": False, "default": "en",
10
+ "desc": "wikipedia language subdomain (en/zh/ja/...)"},
11
+ }
12
+
13
+ OUTPUT = "{title: str, url: str, summary: str, sections: list[str]}"
14
+ TAGS = ["wikipedia", "lookup", "reference"]
15
+ REQUIRES_LOGIN = False
16
+ ESTIMATED_DURATION_SEC = 6
17
+ LAST_VERIFIED = "2026-05-25"
18
+
19
+
20
+ def selftest():
21
+ page.goto("https://en.wikipedia.org/wiki/Wikipedia")
22
+ return "Wikipedia" in page.title()
23
+
24
+
25
+ def run(args, ctx=None):
26
+ from urllib.parse import quote
27
+
28
+ # `page` / `context` / `snapshot` are injected by the task runner (Phase C),
29
+ # mirroring the heredoc namespace. Prefer ctx if a caller passed one.
30
+ pg = ctx.page if ctx is not None and getattr(ctx, "page", None) else page
31
+
32
+ title = args["title"]
33
+ lang = args.get("lang", "en")
34
+ underscore_title = title.strip().replace(" ", "_")
35
+ url = f"https://{lang}.wikipedia.org/wiki/{quote(underscore_title)}"
36
+ pg.goto(url)
37
+ info = pg.evaluate(
38
+ """
39
+ () => {
40
+ const summary_p = document.querySelector(
41
+ 'div.mw-parser-output > p:not(.mw-empty-elt)'
42
+ );
43
+ const sections = Array.from(
44
+ document.querySelectorAll('h2 > span.mw-headline, h2 .mw-headline')
45
+ ).map(el => el.innerText.trim());
46
+ return {
47
+ title: document.title.replace(' - Wikipedia', '').trim(),
48
+ url: location.href,
49
+ summary: summary_p ? summary_p.innerText.trim() : '',
50
+ sections,
51
+ };
52
+ }
53
+ """
54
+ )
55
+ return info or {"title": title, "url": url, "summary": "", "sections": []}
@@ -0,0 +1,8 @@
1
+ # news.ycombinator.com
2
+
3
+ Hacker News front page + ask/show variants.
4
+
5
+ ## Tasks
6
+
7
+ - `front_page` — `(limit=30)` → top stories from /news
8
+ - `front_page` works for `top`, `new`, `best` via a `kind` arg.
@@ -0,0 +1,25 @@
1
+ ---
2
+ site: ycombinator.com
3
+ host_patterns: ["news.ycombinator.com", "ycombinator.com"]
4
+ aliases: ["hacker news", "hn", "黑客新闻"]
5
+ last_updated: 2026-05-18
6
+ ---
7
+
8
+ # Hacker News site memory
9
+
10
+ ## 顶层 URL 结构
11
+
12
+ - 首页 / top: https://news.ycombinator.com/
13
+ - newest: https://news.ycombinator.com/newest
14
+ - best: https://news.ycombinator.com/best
15
+
16
+ ## 稳定 selectors
17
+
18
+ - 表格行: `tr.athing` — id 属性是 story id
19
+ - 标题链接: `tr.athing span.titleline > a`
20
+ - 分数: 紧邻 athing 之后的 `tr` 下的 `span.score`
21
+ - 评论数: 同上 `tr` 下面的最后一个 `a`
22
+
23
+ ## Notes
24
+
25
+ - HN 上很少改 DOM。这里的 selector 在过去 10 年基本没变。
@@ -0,0 +1,63 @@
1
+ """Pull top stories from Hacker News (top/new/best).
2
+
3
+ Phase C surface: drives the injected Playwright ``page`` (reused, navigated in
4
+ place); reads the story list with ``page.evaluate``.
5
+ """
6
+
7
+ ARGS = {
8
+ "kind": {"type": "str", "required": False, "default": "top",
9
+ "desc": "top / new / best"},
10
+ "limit": {"type": "int", "required": False, "default": 30},
11
+ }
12
+
13
+ OUTPUT = "list[{rank: int, id: int, title: str, url: str, score: int, comments: int}]"
14
+ TAGS = ["hn", "news", "feed"]
15
+ REQUIRES_LOGIN = False
16
+ ESTIMATED_DURATION_SEC = 5
17
+ LAST_VERIFIED = "2026-05-25"
18
+
19
+ _KINDS = {
20
+ "top": "https://news.ycombinator.com/",
21
+ "new": "https://news.ycombinator.com/newest",
22
+ "best": "https://news.ycombinator.com/best",
23
+ }
24
+
25
+
26
+ def selftest():
27
+ page.goto("https://news.ycombinator.com/")
28
+ return "ycombinator.com" in page.url
29
+
30
+
31
+ def run(args, ctx=None):
32
+ pg = ctx.page if ctx is not None and getattr(ctx, "page", None) else page
33
+
34
+ kind = args.get("kind", "top")
35
+ limit = int(args.get("limit", 30))
36
+ url = _KINDS.get(kind, _KINDS["top"])
37
+ pg.goto(url)
38
+ rows = pg.evaluate(
39
+ """
40
+ () => {
41
+ const out = [];
42
+ const stories = document.querySelectorAll('tr.athing');
43
+ stories.forEach((row, i) => {
44
+ const a = row.querySelector('span.titleline > a');
45
+ if (!a) return;
46
+ const sub = row.nextElementSibling;
47
+ const score = sub ? sub.querySelector('span.score') : null;
48
+ const links = sub ? sub.querySelectorAll('a') : [];
49
+ const commentLink = links.length ? links[links.length - 1] : null;
50
+ out.push({
51
+ rank: i + 1,
52
+ id: parseInt(row.id, 10),
53
+ title: a.innerText.trim(),
54
+ url: a.href,
55
+ score: score ? parseInt(score.innerText, 10) : 0,
56
+ comments: commentLink ? parseInt(commentLink.innerText, 10) || 0 : 0,
57
+ });
58
+ });
59
+ return out;
60
+ }
61
+ """
62
+ )
63
+ return (rows or [])[:limit]