browserwright 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. browserwright/__init__.py +33 -0
  2. browserwright/__main__.py +6 -0
  3. browserwright/_executor/__init__.py +47 -0
  4. browserwright/_executor/__main__.py +9 -0
  5. browserwright/_executor/client.py +127 -0
  6. browserwright/_executor/process.py +652 -0
  7. browserwright/_executor/protocol.py +152 -0
  8. browserwright/api.py +66 -0
  9. browserwright/cdp.py +285 -0
  10. browserwright/cli.py +741 -0
  11. browserwright/daemon/__init__.py +8 -0
  12. browserwright/daemon/_ipc.py +444 -0
  13. browserwright/daemon/active_tab.py +183 -0
  14. browserwright/daemon/auth.py +395 -0
  15. browserwright/daemon/backends/__init__.py +59 -0
  16. browserwright/daemon/backends/base.py +120 -0
  17. browserwright/daemon/backends/cloud.py +222 -0
  18. browserwright/daemon/backends/env.py +119 -0
  19. browserwright/daemon/backends/extension.py +185 -0
  20. browserwright/daemon/backends/rdp.py +214 -0
  21. browserwright/daemon/cli.py +1437 -0
  22. browserwright/daemon/config.py +380 -0
  23. browserwright/daemon/doctor.py +179 -0
  24. browserwright/daemon/errors.py +34 -0
  25. browserwright/daemon/launch_chrome.py +353 -0
  26. browserwright/daemon/observability.py +181 -0
  27. browserwright/daemon/platforms.py +234 -0
  28. browserwright/daemon/resolver.py +72 -0
  29. browserwright/daemon/server/__init__.py +6 -0
  30. browserwright/daemon/server/daemon.py +229 -0
  31. browserwright/daemon/server/executor_registry.py +434 -0
  32. browserwright/daemon/server/extension_upstream.py +677 -0
  33. browserwright/daemon/server/facade.py +375 -0
  34. browserwright/daemon/server/facade_extension.py +969 -0
  35. browserwright/daemon/server/listener.py +1058 -0
  36. browserwright/daemon/server/proxy.py +1991 -0
  37. browserwright/daemon/server/relay.py +783 -0
  38. browserwright/daemon/server/state.py +432 -0
  39. browserwright/daemon/server/upstream.py +266 -0
  40. browserwright/daemon/userscripts.py +150 -0
  41. browserwright/discovery.py +213 -0
  42. browserwright/errors.py +177 -0
  43. browserwright/health.py +169 -0
  44. browserwright/install.py +628 -0
  45. browserwright/memory/__init__.py +15 -0
  46. browserwright/memory/_md.py +120 -0
  47. browserwright/memory/_yaml.py +217 -0
  48. browserwright/memory/global_mem.py +201 -0
  49. browserwright/memory/repl_mem.py +28 -0
  50. browserwright/memory/session_decisions.py +53 -0
  51. browserwright/memory/site_mem.py +381 -0
  52. browserwright/mode_b_client.py +590 -0
  53. browserwright/multitask.py +131 -0
  54. browserwright/output_schema.py +99 -0
  55. browserwright/primitives/__init__.py +67 -0
  56. browserwright/primitives/discovery_api.py +79 -0
  57. browserwright/primitives/http.py +42 -0
  58. browserwright/primitives/inspect.py +876 -0
  59. browserwright/primitives/interact.py +518 -0
  60. browserwright/primitives/page.py +556 -0
  61. browserwright/primitives/site.py +143 -0
  62. browserwright/release_install.py +466 -0
  63. browserwright/repl/__init__.py +6 -0
  64. browserwright/repl/_namespace.py +106 -0
  65. browserwright/repl/_smart_goto.py +236 -0
  66. browserwright/repl/inline.py +180 -0
  67. browserwright/repl/playwright_handle.py +449 -0
  68. browserwright/repl/snapshot.py +150 -0
  69. browserwright/session.py +229 -0
  70. browserwright/session_create.py +252 -0
  71. browserwright/session_ctx.py +24 -0
  72. browserwright/session_registry.py +133 -0
  73. browserwright/session_runtime.py +133 -0
  74. browserwright/site_skills_starter/github.com/SKILL.md +14 -0
  75. browserwright/site_skills_starter/github.com/memory.md +29 -0
  76. browserwright/site_skills_starter/github.com/tasks/list_issues.py +55 -0
  77. browserwright/site_skills_starter/google.com/SKILL.md +16 -0
  78. browserwright/site_skills_starter/google.com/memory.md +27 -0
  79. browserwright/site_skills_starter/google.com/tasks/search.py +53 -0
  80. browserwright/site_skills_starter/producthunt.com/SKILL.md +7 -0
  81. browserwright/site_skills_starter/producthunt.com/memory.md +26 -0
  82. browserwright/site_skills_starter/producthunt.com/tasks/today.py +64 -0
  83. browserwright/site_skills_starter/wikipedia.org/SKILL.md +7 -0
  84. browserwright/site_skills_starter/wikipedia.org/memory.md +22 -0
  85. browserwright/site_skills_starter/wikipedia.org/tasks/lookup.py +55 -0
  86. browserwright/site_skills_starter/ycombinator.com/SKILL.md +8 -0
  87. browserwright/site_skills_starter/ycombinator.com/memory.md +25 -0
  88. browserwright/site_skills_starter/ycombinator.com/tasks/front_page.py +63 -0
  89. browserwright/skill_doc.py +140 -0
  90. browserwright/skill_runtime.md +194 -0
  91. browserwright/subscriptions.py +213 -0
  92. browserwright/task_runner.py +125 -0
  93. browserwright/version.py +117 -0
  94. browserwright-0.6.2.dist-info/METADATA +12 -0
  95. browserwright-0.6.2.dist-info/RECORD +98 -0
  96. browserwright-0.6.2.dist-info/WHEEL +5 -0
  97. browserwright-0.6.2.dist-info/entry_points.txt +3 -0
  98. browserwright-0.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,381 @@
1
+ """Site-level memory — ``site-skills/<host-stem>/memory.md``."""
2
+ from __future__ import annotations
3
+
4
+ import datetime as _dt
5
+ import fcntl
6
+ import os
7
+ import re
8
+ import threading
9
+ from pathlib import Path
10
+ from typing import Optional
11
+ from urllib.parse import urlparse
12
+
13
+ from . import _md
14
+ from .global_mem import home_dir
15
+
16
+
17
+ # ---- redaction --------------------------------------------------------
18
+
19
+ _HIGH_ENTROPY_RE = re.compile(r"[A-Za-z0-9_\-]{32,}")
20
+ _BEARER_RE = re.compile(r"\bBearer\s+[A-Za-z0-9._\-]+", re.IGNORECASE)
21
+ _USER_PATH_RE = re.compile(r"/Users/[A-Za-z0-9_.\-]+/")
22
+ _HOME_PATH_RE = re.compile(r"/home/[A-Za-z0-9_.\-]+/")
23
+ _COOKIE_RE = re.compile(r"\b(?:set-cookie|cookie|session_id|csrf[_-]token)\s*[:=]", re.IGNORECASE)
24
+ _CARD_RE = re.compile(r"\b(?:\d[ -]?){13,19}\b")
25
+
26
+
27
+ REDACTION_REASONS = {
28
+ "high_entropy": _HIGH_ENTROPY_RE,
29
+ "bearer_token": _BEARER_RE,
30
+ "user_path": _USER_PATH_RE,
31
+ "home_path": _HOME_PATH_RE,
32
+ "cookie_or_session": _COOKIE_RE,
33
+ "card_number": _CARD_RE,
34
+ }
35
+
36
+
37
+ def redact_check(text: str) -> list[str]:
38
+ """Return a list of reason codes that ``text`` triggered.
39
+
40
+ Empty list = safe to write. We're conservative (reject on hit) per
41
+ spec §C.3 rule 2.
42
+ """
43
+ hits: list[str] = []
44
+ for reason, rx in REDACTION_REASONS.items():
45
+ if rx.search(text):
46
+ hits.append(reason)
47
+ return hits
48
+
49
+
50
+ # ---- host → directory stem -------------------------------------------
51
+
52
+
53
+ _STEM_OVERRIDES = {
54
+ # spec §B.1: short aliases that beat the algorithmic eTLD+1 choice for a
55
+ # handful of high-value sites with non-obvious natural names.
56
+ "zhipin.com": "boss-zhipin",
57
+ "boss.zhipin.com": "boss-zhipin",
58
+ "www.zhipin.com": "boss-zhipin",
59
+ "mail.google.com": "gmail",
60
+ }
61
+
62
+
63
+ # Minimum viable multi-label TLD set so eTLD+1 picks the right "registered
64
+ # name + TLD" for cc-suffix hosts like ``bbc.co.uk``. We don't ship a full
65
+ # Public Suffix List — just the buckets we'll actually hit. Add more if a
66
+ # site is being mis-stemmed in practice.
67
+ _MULTI_LABEL_TLDS = {
68
+ "co.uk", "ac.uk", "gov.uk", "org.uk", "net.uk", "me.uk",
69
+ "co.jp", "ac.jp", "or.jp", "ne.jp", "go.jp",
70
+ "co.kr", "or.kr", "ne.kr",
71
+ "com.cn", "net.cn", "org.cn", "gov.cn", "edu.cn",
72
+ "com.hk", "com.tw", "com.sg", "com.au", "net.au", "org.au",
73
+ "com.br", "com.mx", "com.ar",
74
+ "co.in", "co.nz", "co.za",
75
+ }
76
+
77
+
78
+ def _split_host(host_or_url: str) -> str:
79
+ if "://" in host_or_url:
80
+ host = urlparse(host_or_url).hostname or host_or_url
81
+ else:
82
+ host = host_or_url
83
+ # FQDN form sometimes carries a trailing dot (``github.com.``);
84
+ # treat it as equivalent to the trimmed form. REVIEW.md F-9.
85
+ return (host or "").lower().strip().strip(".")
86
+
87
+
88
+ def host_stem(host_or_url: str) -> str:
89
+ """Return the on-disk site-dir name for a host or URL.
90
+
91
+ Algorithm (changed in v0.3.1 — Bug 1 in AI E2E run):
92
+ 1. ``_STEM_OVERRIDES`` matches the literal lowercased hostname.
93
+ 2. Otherwise return **eTLD+1** — the registered name plus its TLD.
94
+ For two-label TLDs (``co.uk``, ``com.cn`` …) keep three labels.
95
+ Examples: ``news.ycombinator.com → ycombinator.com``,
96
+ ``en.wikipedia.org → wikipedia.org``,
97
+ ``www.google.com → google.com``,
98
+ ``shop.example.co.uk → example.co.uk``.
99
+
100
+ The pre-v0.3.1 algorithm returned only the first label
101
+ (``news.ycombinator.com → news``); on-disk memory written under those
102
+ short stems is still readable via ``_read_candidates()`` fallback in
103
+ ``SiteMemory.read()``.
104
+ """
105
+ host = _split_host(host_or_url)
106
+ if host in _STEM_OVERRIDES:
107
+ return _STEM_OVERRIDES[host]
108
+ parts = host.split(".") if host else []
109
+ if len(parts) < 2:
110
+ return host or "unknown"
111
+ last_two = ".".join(parts[-2:])
112
+ if last_two in _MULTI_LABEL_TLDS and len(parts) >= 3:
113
+ return ".".join(parts[-3:])
114
+ return last_two
115
+
116
+
117
+ def _legacy_host_stem(host_or_url: str) -> str:
118
+ """Pre-v0.3.1 short-label stem. Kept for *read* fallback only — any
119
+ user-written memory landed under this stem before Bug 1 was patched
120
+ (e.g. ``site-skills/news/memory.md`` for ``news.ycombinator.com``).
121
+ Never used for new writes.
122
+ """
123
+ host = _split_host(host_or_url)
124
+ if host in _STEM_OVERRIDES:
125
+ return _STEM_OVERRIDES[host]
126
+ parts = host.split(".") if host else []
127
+ if len(parts) >= 2 and parts[0] in ("www", "m"):
128
+ parts = parts[1:]
129
+ if len(parts) >= 2:
130
+ return parts[0]
131
+ return host or "unknown"
132
+
133
+
134
+ # ---- locations -------------------------------------------------------
135
+
136
+
137
+ def site_skills_root() -> Path:
138
+ """Where ``bootstrap_site`` / ``remember`` *write* a new site.
139
+
140
+ Precedence (v0.2): project-local ``./site-skills/`` if it exists →
141
+ ``$BS_HOME/site-skills/`` otherwise. Reads use ``site_skills_roots()``
142
+ which layers project on top of home on top of the bundled starter.
143
+ """
144
+ cwd = Path.cwd() / "site-skills"
145
+ if cwd.is_dir():
146
+ return cwd
147
+ return home_dir() / "site-skills"
148
+
149
+
150
+ def site_skills_roots() -> list[Path]:
151
+ """All roots consulted for *reads*, in precedence order
152
+ (highest priority first):
153
+
154
+ 1. ``./site-skills/`` — project-local, git-tracked.
155
+ 2. ``$BS_HOME/site-skills/`` — user-global, agent-written.
156
+ 3. bundled starter directory shipped with the package.
157
+
158
+ The first hit per site name wins. Discovery enforces this; writes always
159
+ target the writable ``site_skills_root()``.
160
+ """
161
+ roots: list[Path] = []
162
+ cwd = Path.cwd() / "site-skills"
163
+ if cwd.is_dir():
164
+ roots.append(cwd)
165
+ hr = home_dir() / "site-skills"
166
+ if hr.is_dir():
167
+ roots.append(hr)
168
+ return roots
169
+
170
+
171
+ def site_dir(host: str) -> Path:
172
+ return site_skills_root() / host_stem(host)
173
+
174
+
175
+ def memory_path(host: str) -> Path:
176
+ return site_dir(host) / "memory.md"
177
+
178
+
179
+ # ---- file ops --------------------------------------------------------
180
+
181
+
182
+ class _FileLock:
183
+ def __init__(self, path: Path):
184
+ self.path = path
185
+ self._fd: Optional[int] = None
186
+ self._t = threading.Lock()
187
+
188
+ def __enter__(self):
189
+ self._t.acquire()
190
+ self.path.parent.mkdir(parents=True, exist_ok=True)
191
+ self._fd = os.open(self.path, os.O_RDWR | os.O_CREAT, 0o600)
192
+ try:
193
+ fcntl.flock(self._fd, fcntl.LOCK_EX)
194
+ except OSError:
195
+ pass
196
+ return self
197
+
198
+ def __exit__(self, *exc):
199
+ try:
200
+ if self._fd is not None:
201
+ try:
202
+ fcntl.flock(self._fd, fcntl.LOCK_UN)
203
+ except OSError:
204
+ pass
205
+ os.close(self._fd)
206
+ finally:
207
+ self._fd = None
208
+ self._t.release()
209
+
210
+
211
+ _BOOT_BODY = """# {stem} site memory
212
+
213
+ This file is append-only (mostly). Each section is meant for a specific kind
214
+ of fact — see the headings below.
215
+
216
+ ## Notes
217
+
218
+ ## Known traps
219
+
220
+ ## 顶层 URL 结构
221
+
222
+ ## 私有 API
223
+
224
+ ## 用户偏好
225
+
226
+ ## Task history
227
+ """
228
+
229
+
230
+ def bootstrap_site(host: str, aliases: Optional[list[str]] = None) -> Path:
231
+ """Lazy-create ``site-skills/<stem>/`` with the canonical layout. Returns
232
+ the directory path. Idempotent: noop if already present.
233
+
234
+ spec §A.2 / §C.3: called automatically by ``remember(host, ...)`` if the
235
+ directory doesn't exist yet (US2 in-flight write).
236
+ """
237
+ stem = host_stem(host)
238
+ d = site_dir(host)
239
+ if d.exists() and (d / "memory.md").exists():
240
+ return d
241
+ d.mkdir(parents=True, exist_ok=True)
242
+ mem = d / "memory.md"
243
+ if not mem.exists():
244
+ fm = {
245
+ "site": stem,
246
+ "host_patterns": _candidate_patterns(host),
247
+ "aliases": list(aliases or []),
248
+ "last_updated": _dt.date.today().isoformat(),
249
+ }
250
+ body = _BOOT_BODY.format(stem=stem)
251
+ _md.write_atomic(mem, _md.render_doc(fm, body))
252
+ skill = d / "SKILL.md"
253
+ if not skill.exists():
254
+ skill.write_text(
255
+ f"# {stem}\n\nStub site skill. Add a section per task here as they get solidified.\n",
256
+ encoding="utf-8",
257
+ )
258
+ (d / "tasks").mkdir(exist_ok=True)
259
+ return d
260
+
261
+
262
+ def _candidate_patterns(host: str) -> list[str]:
263
+ if "://" in host:
264
+ host = urlparse(host).hostname or host
265
+ host = host.lower()
266
+ parts = host.split(".")
267
+ pats = {host}
268
+ if len(parts) >= 2 and parts[0] in ("www", "m"):
269
+ pats.add(".".join(parts[1:]))
270
+ elif len(parts) >= 2:
271
+ pats.add("www." + host)
272
+ return sorted(pats)
273
+
274
+
275
+ # ---- SiteMemory class ------------------------------------------------
276
+
277
+
278
+ class _RedactionRejected(Exception):
279
+ """remember() refused to write because the text triggered redaction."""
280
+
281
+ def __init__(self, reasons: list[str], text: str):
282
+ self.reasons, self.text = reasons, text
283
+ super().__init__(f"refused to write site memory (reasons: {reasons})")
284
+
285
+
286
+ class SiteMemory:
287
+ def __init__(self, host: str):
288
+ self.host = host
289
+ self.stem = host_stem(host)
290
+ self.dir = site_dir(host)
291
+ # Writable target is always the new-style eTLD+1 path. Reads may
292
+ # transparently fall back to the legacy short-stem path via
293
+ # ``_read_candidates`` (Bug 1 back-compat).
294
+ self.path = memory_path(host)
295
+
296
+ def _read_candidates(self) -> list[Path]:
297
+ """Memory.md paths to try, in order. New-style first; legacy short
298
+ stem second when distinct. Lets v0.3.1+ keep reading data written
299
+ under the pre-fix short stem (e.g. ``site-skills/news/memory.md``
300
+ for ``news.ycombinator.com``)."""
301
+ seen: set[Path] = set()
302
+ out: list[Path] = []
303
+ for p in (self.path,):
304
+ if p not in seen:
305
+ out.append(p)
306
+ seen.add(p)
307
+ legacy_stem = _legacy_host_stem(self.host)
308
+ if legacy_stem != self.stem:
309
+ for root in (Path.cwd() / "site-skills",
310
+ home_dir() / "site-skills"):
311
+ p = root / legacy_stem / "memory.md"
312
+ if p not in seen:
313
+ out.append(p)
314
+ seen.add(p)
315
+ return out
316
+
317
+ def ensure(self) -> None:
318
+ if not self.path.exists():
319
+ bootstrap_site(self.host)
320
+
321
+ def append(self, text: str, *, section: str = "Notes") -> Path:
322
+ text = text.strip()
323
+ if not text:
324
+ return self.path
325
+ hits = redact_check(text)
326
+ if hits:
327
+ raise _RedactionRejected(hits, text)
328
+ self.ensure()
329
+ with _FileLock(self.path):
330
+ fm, body = _md.parse_doc(self.path.read_text(encoding="utf-8"))
331
+ fm = fm or {}
332
+ fm["last_updated"] = _dt.date.today().isoformat()
333
+ new_body = _md.append_to_section(body, section, f"- {text}")
334
+ _md.write_atomic(self.path, _md.render_doc(fm, new_body))
335
+ return self.path
336
+
337
+ def read(self) -> dict:
338
+ for p in self._read_candidates():
339
+ if p.exists():
340
+ fm, body = _md.parse_doc(p.read_text(encoding="utf-8"))
341
+ return {"frontmatter": fm, "body": body}
342
+ return {"frontmatter": {}, "body": ""}
343
+
344
+ def find(self, pattern: str) -> list[tuple[int, str]]:
345
+ """Return bullet lines that match ``pattern``. Used by ``forget``."""
346
+ if not self.path.exists():
347
+ return []
348
+ _fm, body = _md.parse_doc(self.path.read_text(encoding="utf-8"))
349
+ return _md.find_matching_lines(body, pattern)
350
+
351
+ def forget(self, pattern: str, *, confirm: bool = True) -> list[str]:
352
+ """Remove every bullet whose text contains ``pattern``. Returns the
353
+ removed lines (for audit). v0.2 — see spec §10.
354
+
355
+ ``confirm=True`` makes the first call a dry-run that just returns the
356
+ matches; pass ``confirm=False`` after the user assents to perform the
357
+ actual delete. This mirrors ``remember_preference`` (US4) — destructive
358
+ ops always ask first.
359
+ """
360
+ if not self.path.exists():
361
+ return []
362
+ matches = self.find(pattern)
363
+ if not matches:
364
+ return []
365
+ if confirm:
366
+ return [ln for _i, ln in matches]
367
+ with _FileLock(self.path):
368
+ fm, body = _md.parse_doc(self.path.read_text(encoding="utf-8"))
369
+ fm = fm or {}
370
+ fm["last_updated"] = _dt.date.today().isoformat()
371
+ new_body = _md.remove_lines(body, {i for i, _ln in matches})
372
+ _md.write_atomic(self.path, _md.render_doc(fm, new_body))
373
+ return [ln for _i, ln in matches]
374
+
375
+
376
+ def site_memory(host: str) -> SiteMemory:
377
+ return SiteMemory(host)
378
+
379
+
380
+ # Re-export for callers that catch the redaction error specifically.
381
+ RedactionRejected = _RedactionRejected