phantomwright 0.1.4__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {phantomwright-0.1.4 → phantomwright-0.2.0}/PKG-INFO +7 -3
  2. phantomwright-0.2.0/phantomwright/_repo_version.py +1 -0
  3. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/scripts/observer.js +8 -0
  4. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/solver.py +45 -16
  5. phantomwright-0.2.0/phantomwright/cli/__init__.py +5 -0
  6. phantomwright-0.2.0/phantomwright/cli/client.py +201 -0
  7. phantomwright-0.2.0/phantomwright/cli/commands/__init__.py +1 -0
  8. phantomwright-0.2.0/phantomwright/cli/commands/extraction.py +137 -0
  9. phantomwright-0.2.0/phantomwright/cli/commands/info.py +169 -0
  10. phantomwright-0.2.0/phantomwright/cli/commands/interaction.py +46 -0
  11. phantomwright-0.2.0/phantomwright/cli/commands/navigation.py +70 -0
  12. phantomwright-0.2.0/phantomwright/cli/main.py +57 -0
  13. phantomwright-0.2.0/phantomwright/cli/output.py +51 -0
  14. phantomwright-0.2.0/phantomwright/cli/server.py +889 -0
  15. phantomwright-0.2.0/phantomwright/cli/session.py +63 -0
  16. {phantomwright-0.1.4 → phantomwright-0.2.0}/pyproject.toml +10 -3
  17. phantomwright-0.1.4/phantomwright/_repo_version.py +0 -1
  18. {phantomwright-0.1.4 → phantomwright-0.2.0}/.gitignore +0 -0
  19. {phantomwright-0.1.4 → phantomwright-0.2.0}/LICENSE +0 -0
  20. {phantomwright-0.1.4 → phantomwright-0.2.0}/README.md +0 -0
  21. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/__init__.py +0 -0
  22. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/_impl/__init__.py +0 -0
  23. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/_impl/_core_debug_patch.py +0 -0
  24. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/_impl/_evaluate_patch.py +0 -0
  25. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/_impl/_inconsistency_patch.py +0 -0
  26. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/async_api/__init__.py +0 -0
  27. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/__init__.py +0 -0
  28. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/scripts/shadow_root.js +0 -0
  29. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/utils/build_js.py +0 -0
  30. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/utils/consts.py +0 -0
  31. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/utils/detection.py +0 -0
  32. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/utils/dom_helpers.py +0 -0
  33. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/captcha/cloudfare/utils/shadow_root.py +0 -0
  34. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/py.typed +0 -0
  35. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/__init__.py +0 -0
  36. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/chrome.app.js +0 -0
  37. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/chrome.csi.js +0 -0
  38. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/chrome.hairline.js +0 -0
  39. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/chrome.load.times.js +0 -0
  40. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/chrome.runtime.js +0 -0
  41. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/error.prototype.js +0 -0
  42. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/iframe.contentWindow.js +0 -0
  43. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/media.codecs.js +0 -0
  44. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.hardwareConcurrency.js +0 -0
  45. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.languages.js +0 -0
  46. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.permissions.js +0 -0
  47. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.platform.js +0 -0
  48. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.plugins.js +0 -0
  49. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.userAgent.js +0 -0
  50. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/navigator.vendor.js +0 -0
  51. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/evasions/webgl.vendor.js +0 -0
  52. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/generate.magic.arrays.js +0 -0
  53. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/js/utils.js +0 -0
  54. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/stealth/stealth.py +0 -0
  55. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/sync_api/__init__.py +0 -0
  56. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/README.md +0 -0
  57. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/__init__.py +0 -0
  58. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/async_basic.py +0 -0
  59. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/async_simulator.py +0 -0
  60. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/script.py +0 -0
  61. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/sync_basic.py +0 -0
  62. {phantomwright-0.1.4 → phantomwright-0.2.0}/phantomwright/user_simulator/sync_simulator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: phantomwright
3
- Version: 0.1.4
3
+ Version: 0.2.0
4
4
  Summary: Bridging playwright-core patch + extending playwright API for stealth injection & user simulation
5
5
  Project-URL: homepage, https://github.com/ai-microsoft/phantom-wright
6
6
  Project-URL: changelog, https://github.com/ai-microsoft/phantom-wright/blob/main/CHANGELOG.md
@@ -9,8 +9,12 @@ License-Expression: MIT
9
9
  License-File: LICENSE
10
10
  Classifier: Operating System :: OS Independent
11
11
  Classifier: Programming Language :: Python :: 3
12
- Requires-Python: >=3.9
13
- Requires-Dist: phantomwright-driver==1.57.7
12
+ Requires-Python: >=3.10
13
+ Requires-Dist: aiohttp>=3.9.0
14
+ Requires-Dist: click>=8.0.0
15
+ Requires-Dist: httpx>=0.27.0
16
+ Requires-Dist: markitdown>=0.1.0
17
+ Requires-Dist: phantomwright-driver==1.58.3
14
18
  Provides-Extra: black
15
19
  Requires-Dist: black>=25.9.0; extra == 'black'
16
20
  Provides-Extra: dev
@@ -0,0 +1 @@
1
+ version = 'v0.2.0'
@@ -41,6 +41,14 @@
41
41
  }
42
42
  }
43
43
 
44
+ // Expose function to trigger rescan from Python side
45
+ window.__triggerCfRescan = function() {
46
+ const root = document.body || document.documentElement;
47
+ if (root) {
48
+ scan(root);
49
+ }
50
+ };
51
+
44
52
  function startObserve() {
45
53
  const root = document.body || document.documentElement;
46
54
  if (!root) return;
@@ -87,18 +87,27 @@ class CloudflareSolverAsync:
87
87
  return self.page_solve_state.setdefault(
88
88
  page,
89
89
  {
90
- "status": "idle", # idle | solving | done
90
+ "status": "idle", # idle | solving
91
91
  "last_url": None,
92
92
  },
93
93
  )
94
94
 
95
95
  # ---------------- js rebind ----------------
96
96
  async def _rebind(self, page: Page):
97
- await page.evaluate("""
98
- window.onCloudflareDetected = function(sel, url) {
99
- window.__cf_callback(sel, url);
100
- };
101
- """)
97
+ try:
98
+ await page.evaluate("""
99
+ window.onCloudflareDetected = function(sel, url) {
100
+ window.__cf_callback(sel, url);
101
+ };
102
+ """)
103
+ await asyncio.sleep(1)
104
+ await page.evaluate("""
105
+ if (typeof window.__triggerCfRescan === 'function') {
106
+ window.__triggerCfRescan();
107
+ }
108
+ """)
109
+ except Exception:
110
+ pass
102
111
 
103
112
  # ---------------- report helper ----------------
104
113
  def _log_final_report(self, report: dict):
@@ -113,7 +122,9 @@ class CloudflareSolverAsync:
113
122
  "timestamp": time.time(),
114
123
  }
115
124
 
116
- self.log(json.dumps(data, ensure_ascii=False))
125
+ if self.log:
126
+ log_str = json.dumps(data, ensure_ascii=False)
127
+ self.log(log_str)
117
128
 
118
129
  # ---------------- core solve ----------------
119
130
  async def _auto_solve_cf(self, page: Page):
@@ -140,7 +151,7 @@ class CloudflareSolverAsync:
140
151
  report["challenge_type"] = challenge_type.name
141
152
 
142
153
  if challenge_type is ChallengeType.TURNSTILE:
143
- await page.locator("#cf-turnstile").wait_for(10000)
154
+ await page.locator("#cf-turnstile").wait_for(timeout=10000)
144
155
 
145
156
  cf_iframes = await search_shadow_root_iframes(
146
157
  captcha_container=page,
@@ -174,13 +185,22 @@ class CloudflareSolverAsync:
174
185
  success_elements = await search_shadow_root_elements(
175
186
  iframe, 'div[id="success"]'
176
187
  )
177
- solved = bool(success_elements)
188
+
189
+ # Check if success element is actually visible
190
+ solved = False
191
+ for el in success_elements:
192
+ try:
193
+ is_visible = await el.is_visible()
194
+ if is_visible:
195
+ solved = True
196
+ break
197
+ except:
198
+ pass
178
199
  else:
179
200
  solved = not await detect_cloudflare_challenge(page)
180
201
 
181
202
  if solved:
182
- state["status"] = "done"
183
- state["last_url"] = page.url
203
+ state["status"] = "idle"
184
204
 
185
205
  report["success"] = True
186
206
  return
@@ -211,15 +231,17 @@ class CloudflareSolverAsync:
211
231
  return
212
232
 
213
233
  state = self._get_page_state(page)
234
+ current_url = page.url
235
+
236
+ # Skip if this URL was already attempted
237
+ if state["last_url"] == current_url:
238
+ return
214
239
 
215
- if state["last_url"] != page.url:
216
- state["status"] = "idle"
217
- state["last_url"] = page.url
218
-
219
- if state["status"] in ("solving", "done"):
240
+ if state["status"] == "solving":
220
241
  return
221
242
 
222
243
  state["status"] = "solving"
244
+ state["last_url"] = current_url
223
245
 
224
246
  asyncio.create_task(self._auto_solve_cf(page))
225
247
 
@@ -234,7 +256,14 @@ class CloudflareSolverAsync:
234
256
  await page.add_init_script(shadow_root_js)
235
257
 
236
258
  await page.expose_function("__cf_callback", self._make_on_cf_detected(page))
259
+
260
+ # Listen to multiple events for more reliable detection
237
261
  page.on("load", lambda: asyncio.create_task(self._rebind(page)))
262
+ page.on("domcontentloaded", lambda: asyncio.create_task(self._rebind(page)))
263
+ page.on("framenavigated", lambda frame: asyncio.create_task(self._rebind(page)) if frame == page.main_frame else None)
264
+
265
+ # Immediately rebind in case page is already loaded
266
+ asyncio.create_task(self._rebind(page))
238
267
 
239
268
  # ---------------- public api ----------------
240
269
  def start(self) -> None:
@@ -0,0 +1,5 @@
1
+ """PhantomWright CLI - Browser automation from the command line."""
2
+
3
+ from .main import cli
4
+
5
+ __all__ = ["cli"]
@@ -0,0 +1,201 @@
1
+ """Client for communicating with the browser session server."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any, Optional
6
+
7
+ import httpx
8
+
9
+ from .server import SESSION_FILE, DEFAULT_PORT
10
+
11
+
12
+ class SessionClient:
13
+ """Client for communicating with the browser session server."""
14
+
15
+ def __init__(self, port: Optional[int] = None):
16
+ self._port = port
17
+ self._base_url: Optional[str] = None
18
+
19
+ def _get_base_url(self) -> Optional[str]:
20
+ """Get the base URL for the server."""
21
+ if self._base_url:
22
+ return self._base_url
23
+
24
+ # Try to load port from session file
25
+ port = self._port
26
+ if port is None:
27
+ if SESSION_FILE.exists():
28
+ try:
29
+ with open(SESSION_FILE, "r") as f:
30
+ session_info = json.load(f)
31
+ port = session_info.get("port", DEFAULT_PORT)
32
+ except (json.JSONDecodeError, IOError):
33
+ port = DEFAULT_PORT
34
+ else:
35
+ port = DEFAULT_PORT
36
+
37
+ self._base_url = f"http://127.0.0.1:{port}"
38
+ return self._base_url
39
+
40
+ def is_server_running(self) -> bool:
41
+ """Check if the server is running."""
42
+ base_url = self._get_base_url()
43
+ if not base_url:
44
+ return False
45
+
46
+ try:
47
+ with httpx.Client(timeout=2.0) as client:
48
+ response = client.get(f"{base_url}/status")
49
+ return response.status_code == 200
50
+ except httpx.RequestError:
51
+ return False
52
+
53
+ def _request(
54
+ self,
55
+ method: str,
56
+ endpoint: str,
57
+ data: Optional[dict] = None,
58
+ timeout: float = 60.0,
59
+ ) -> dict:
60
+ """Make a request to the server."""
61
+ base_url = self._get_base_url()
62
+ if not base_url:
63
+ return {"error": "NoSession", "message": "No active browser session. Run 'phantomwright start' first."}
64
+
65
+ try:
66
+ with httpx.Client(timeout=timeout) as client:
67
+ if method == "GET":
68
+ response = client.get(f"{base_url}{endpoint}")
69
+ else:
70
+ response = client.post(f"{base_url}{endpoint}", json=data or {})
71
+
72
+ return response.json()
73
+ except httpx.ConnectError:
74
+ return {"error": "NoSession", "message": "No active browser session. Run 'phantomwright start' first."}
75
+ except httpx.RequestError as e:
76
+ return {"error": "ConnectionError", "message": str(e)}
77
+ except json.JSONDecodeError:
78
+ return {"error": "InvalidResponse", "message": "Invalid response from server"}
79
+
80
+ def get_status(self) -> dict:
81
+ """Get server status."""
82
+ return self._request("GET", "/status")
83
+
84
+ def stop(self) -> dict:
85
+ """Stop the server."""
86
+ return self._request("POST", "/stop")
87
+
88
+ def navigate(
89
+ self,
90
+ url: str,
91
+ simulate: bool = True,
92
+ cool_down: bool = True,
93
+ wait_for: Optional[str] = None,
94
+ timeout: int = 30000,
95
+ ) -> dict:
96
+ """Navigate to a URL."""
97
+ return self._request("POST", "/navigate", {
98
+ "url": url,
99
+ "simulate": simulate,
100
+ "cool_down": cool_down,
101
+ "wait_for": wait_for,
102
+ "timeout": timeout,
103
+ }, timeout=timeout / 1000 + 30)
104
+
105
+ def scroll(self, duration: int = 2000) -> dict:
106
+ """Scroll the page."""
107
+ return self._request("POST", "/scroll", {"duration": duration}, timeout=duration / 1000 + 10)
108
+
109
+ def browse(self, duration: int = 2000) -> dict:
110
+ """Simulate browsing."""
111
+ return self._request("POST", "/browse", {"duration": duration}, timeout=duration / 1000 + 10)
112
+
113
+ def click(self, selector: str, simulate: bool = True) -> dict:
114
+ """Click an element."""
115
+ return self._request("POST", "/click", {
116
+ "selector": selector,
117
+ "simulate": simulate,
118
+ })
119
+
120
+ def type_text(self, selector: str, text: str, simulate: bool = True, typos: bool = False) -> dict:
121
+ """Type text into an element."""
122
+ return self._request("POST", "/type", {
123
+ "selector": selector,
124
+ "text": text,
125
+ "simulate": simulate,
126
+ "typos": typos,
127
+ }, timeout=len(text) * 0.5 + 30)
128
+
129
+ def screenshot(
130
+ self,
131
+ selector: Optional[str] = None,
132
+ full_page: bool = False,
133
+ ) -> dict:
134
+ """Take a screenshot."""
135
+ return self._request("POST", "/screenshot", {
136
+ "selector": selector,
137
+ "full_page": full_page,
138
+ })
139
+
140
+ def get_html(self, selector: Optional[str] = None, outer: bool = True) -> dict:
141
+ """Get HTML content."""
142
+ return self._request("POST", "/html", {
143
+ "selector": selector,
144
+ "outer": outer,
145
+ })
146
+
147
+ def get_text(self, selector: str) -> dict:
148
+ """Get text content."""
149
+ return self._request("POST", "/text", {"selector": selector})
150
+
151
+ def get_attr(self, selector: str, attribute: str) -> dict:
152
+ """Get attribute value."""
153
+ return self._request("POST", "/attr", {
154
+ "selector": selector,
155
+ "attribute": attribute,
156
+ })
157
+
158
+ def query(
159
+ self,
160
+ selector: str,
161
+ limit: Optional[int] = None,
162
+ attrs: Optional[str] = None,
163
+ ) -> dict:
164
+ """Query elements."""
165
+ return self._request("POST", "/query", {
166
+ "selector": selector,
167
+ "limit": limit,
168
+ "attrs": attrs,
169
+ })
170
+
171
+ def get_url(self) -> dict:
172
+ """Get current URL."""
173
+ return self._request("GET", "/url")
174
+
175
+ def get_title(self) -> dict:
176
+ """Get page title."""
177
+ return self._request("GET", "/title")
178
+
179
+ def wait(self, selector: str, timeout: int = 30000, state: str = "visible") -> dict:
180
+ """Wait for an element."""
181
+ return self._request("POST", "/wait", {
182
+ "selector": selector,
183
+ "timeout": timeout,
184
+ "state": state,
185
+ }, timeout=timeout / 1000 + 10)
186
+
187
+ def get_markdown(self, selector: Optional[str] = None) -> dict:
188
+ """Get page content as markdown with selectors."""
189
+ return self._request("POST", "/markdown", {"selector": selector})
190
+
191
+
192
+ # Global client instance
193
+ _client: Optional[SessionClient] = None
194
+
195
+
196
+ def get_client() -> SessionClient:
197
+ """Get the global session client."""
198
+ global _client
199
+ if _client is None:
200
+ _client = SessionClient()
201
+ return _client
@@ -0,0 +1 @@
1
+ """PhantomWright CLI commands."""
@@ -0,0 +1,137 @@
1
+ """Extraction commands: screenshot, html, text, attr, query."""
2
+
3
+ import base64
4
+ import os
5
+ import click
6
+
7
+ from ..client import get_client
8
+ from ..output import output_success, output_error
9
+
10
+
11
+ @click.command()
12
+ @click.option("--path", default=None, help="File path to save screenshot")
13
+ @click.option("--selector", default=None, help="CSS selector for element screenshot")
14
+ @click.option("--full-page/--no-full-page", default=False, help="Capture full scrollable page")
15
+ def screenshot(path: str, selector: str, full_page: bool):
16
+ """Take a screenshot of the page or element."""
17
+ client = get_client()
18
+ result = client.screenshot(selector=selector, full_page=full_page)
19
+
20
+ if "error" in result:
21
+ output_error("screenshot", result["error"], result["message"], selector=selector)
22
+ return
23
+
24
+ screenshot_bytes = base64.b64decode(result["base64"])
25
+
26
+ if path:
27
+ # Ensure directory exists
28
+ dir_path = os.path.dirname(path)
29
+ if dir_path:
30
+ os.makedirs(dir_path, exist_ok=True)
31
+ with open(path, "wb") as f:
32
+ f.write(screenshot_bytes)
33
+ output_success("screenshot", {
34
+ "path": os.path.abspath(path),
35
+ "size_bytes": result["size_bytes"],
36
+ "selector": selector,
37
+ "full_page": full_page,
38
+ })
39
+ else:
40
+ # Return base64 encoded image
41
+ output_success("screenshot", {
42
+ "base64": result["base64"],
43
+ "size_bytes": result["size_bytes"],
44
+ "selector": selector,
45
+ "full_page": full_page,
46
+ })
47
+
48
+
49
+ @click.command()
50
+ @click.option("--selector", default=None, help="CSS selector to get HTML from")
51
+ @click.option("--outer/--inner", default=True, help="Get outer HTML (default) or inner HTML")
52
+ def html(selector: str, outer: bool):
53
+ """Get page or element HTML content."""
54
+ client = get_client()
55
+ result = client.get_html(selector=selector, outer=outer)
56
+
57
+ if "error" in result:
58
+ output_error("html", result["error"], result["message"], selector=selector)
59
+ else:
60
+ output_success("html", {
61
+ "html": result["html"],
62
+ "length": result["length"],
63
+ "selector": selector,
64
+ "type": result["type"],
65
+ })
66
+
67
+
68
+ @click.command()
69
+ @click.argument("selector")
70
+ def text(selector: str):
71
+ """Get text content of an element."""
72
+ client = get_client()
73
+ result = client.get_text(selector=selector)
74
+
75
+ if "error" in result:
76
+ output_error("text", result["error"], result["message"], selector=selector)
77
+ else:
78
+ output_success("text", {
79
+ "text": result["text"],
80
+ "length": result["length"],
81
+ "selector": selector,
82
+ })
83
+
84
+
85
+ @click.command()
86
+ @click.argument("selector")
87
+ @click.argument("attribute")
88
+ def attr(selector: str, attribute: str):
89
+ """Get an attribute value from an element."""
90
+ client = get_client()
91
+ result = client.get_attr(selector=selector, attribute=attribute)
92
+
93
+ if "error" in result:
94
+ output_error("attr", result["error"], result["message"], selector=selector)
95
+ else:
96
+ output_success("attr", {
97
+ "attribute": result["attribute"],
98
+ "value": result["value"],
99
+ "selector": selector,
100
+ })
101
+
102
+
103
+ @click.command()
104
+ @click.argument("selector")
105
+ @click.option("--limit", default=None, type=int, help="Maximum number of elements to return")
106
+ @click.option("--attrs", default=None, help="Comma-separated list of attributes to extract")
107
+ def query(selector: str, limit: int, attrs: str):
108
+ """Query all elements matching a selector."""
109
+ client = get_client()
110
+ result = client.query(selector=selector, limit=limit, attrs=attrs)
111
+
112
+ if "error" in result:
113
+ output_error("query", result["error"], result["message"], selector=selector)
114
+ else:
115
+ output_success("query", {
116
+ "elements": result["elements"],
117
+ "count": result["count"],
118
+ "total_matches": result.get("total_matches", result["count"]),
119
+ "selector": selector,
120
+ })
121
+
122
+
123
+ @click.command()
124
+ @click.option("--selector", default=None, help="CSS selector to convert (default: entire page)")
125
+ def markdown(selector: str):
126
+ """Get page content as markdown with embedded selectors for agent interaction."""
127
+ client = get_client()
128
+ result = client.get_markdown(selector=selector)
129
+
130
+ if "error" in result:
131
+ output_error("markdown", result["error"], result["message"], selector=selector)
132
+ else:
133
+ output_success("markdown", {
134
+ "markdown": result["markdown"],
135
+ "url": result["url"],
136
+ "title": result["title"],
137
+ })
@@ -0,0 +1,169 @@
1
+ """Info commands: url, title, wait, status, start, stop."""
2
+
3
+ import asyncio
4
+ import subprocess
5
+ import sys
6
+ import click
7
+
8
+ from ..session import load_session, clear_session, is_session_active
9
+ from ..client import get_client
10
+ from ..output import output_success, output_error
11
+
12
+
13
+ @click.command()
14
+ def url():
15
+ """Get the current page URL."""
16
+ client = get_client()
17
+ result = client.get_url()
18
+
19
+ if "error" in result:
20
+ output_error("url", result["error"], result["message"])
21
+ else:
22
+ output_success("url", {"url": result["url"]})
23
+
24
+
25
+ @click.command()
26
+ def title():
27
+ """Get the current page title."""
28
+ client = get_client()
29
+ result = client.get_title()
30
+
31
+ if "error" in result:
32
+ output_error("title", result["error"], result["message"])
33
+ else:
34
+ output_success("title", {
35
+ "title": result["title"],
36
+ "url": result["url"],
37
+ })
38
+
39
+
40
+ @click.command()
41
+ @click.argument("selector")
42
+ @click.option("--timeout", default=30000, help="Timeout in milliseconds")
43
+ @click.option("--state", default="visible", type=click.Choice(["attached", "detached", "visible", "hidden"]), help="Element state to wait for")
44
+ def wait(selector: str, timeout: int, state: str):
45
+ """Wait for an element to appear."""
46
+ client = get_client()
47
+ result = client.wait(selector, timeout=timeout, state=state)
48
+
49
+ if "error" in result:
50
+ output_error("wait", result["error"], result["message"], selector=selector, timeout=timeout)
51
+ else:
52
+ output_success("wait", {
53
+ "selector": selector,
54
+ "state": state,
55
+ "found": result["found"],
56
+ "count": result["count"],
57
+ })
58
+
59
+
60
+ @click.command()
61
+ def status():
62
+ """Get the current session status."""
63
+ client = get_client()
64
+ result = client.get_status()
65
+ output_success("status", result)
66
+
67
+
68
+ @click.command()
69
+ @click.option("--browser", default="chrome", type=click.Choice(["chrome", "msedge"]), help="Browser to use (default: chrome)")
70
+ @click.option("--headless/--no-headless", default=False, help="Run browser in headless mode")
71
+ @click.option("--stealth/--no-stealth", default=True, help="Enable stealth mode")
72
+ @click.option("--cloudflare-solver/--no-cloudflare-solver", default=True, help="Enable Cloudflare solver")
73
+ @click.option("--visualize-mouse/--no-visualize-mouse", default=None, help="Show visual cursor (default: on when not headless)")
74
+ @click.option("--port", default=9323, help="Server port (default: 9323)")
75
+ @click.option("--foreground/--no-foreground", default=False, help="Run server in foreground (default: background)")
76
+ @click.option("--user-data-dir", default=None, help="Browser user data directory for persistent sessions (preserves logins)")
77
+ def start(browser: str, headless: bool, stealth: bool, cloudflare_solver: bool, visualize_mouse: bool, port: int, foreground: bool, user_data_dir: str):
78
+ """Start a new browser session."""
79
+ # Check if already running
80
+ if is_session_active():
81
+ output_error("start", "SessionExists", "A browser session is already active. Run 'phantomwright stop' first.")
82
+ return
83
+
84
+ if not foreground:
85
+ # Start server in background subprocess
86
+ args = [
87
+ sys.executable, "-m", "phantomwright.cli.server",
88
+ "--port", str(port),
89
+ "--browser", browser,
90
+ ]
91
+ if headless:
92
+ args.append("--headless")
93
+ if not stealth:
94
+ args.append("--no-stealth")
95
+ if not cloudflare_solver:
96
+ args.append("--no-cloudflare-solver")
97
+ if visualize_mouse is not None:
98
+ args.append("--visualize-mouse" if visualize_mouse else "--no-visualize-mouse")
99
+ if user_data_dir:
100
+ args.extend(["--user-data-dir", user_data_dir])
101
+
102
+ # Use CREATE_NEW_PROCESS_GROUP on Windows, start_new_session on Unix
103
+ if sys.platform == "win32":
104
+ subprocess.Popen(
105
+ args,
106
+ creationflags=subprocess.CREATE_NEW_PROCESS_GROUP | subprocess.DETACHED_PROCESS,
107
+ stdout=subprocess.DEVNULL,
108
+ stderr=subprocess.DEVNULL,
109
+ )
110
+ else:
111
+ subprocess.Popen(
112
+ args,
113
+ start_new_session=True,
114
+ stdout=subprocess.DEVNULL,
115
+ stderr=subprocess.DEVNULL,
116
+ )
117
+
118
+ # Wait for server to start
119
+ import time
120
+ from ..client import SessionClient
121
+ client = SessionClient(port=port)
122
+ for _ in range(300): # Wait up to 30 seconds
123
+ time.sleep(0.1)
124
+ if client.is_server_running():
125
+ result = client.get_status()
126
+ output_success("start", result)
127
+ return
128
+
129
+ output_error("start", "StartupTimeout", "Server did not start within timeout.")
130
+ else:
131
+ # Run server in foreground
132
+ asyncio.run(_start_foreground(browser, headless, stealth, cloudflare_solver, visualize_mouse, port, user_data_dir))
133
+
134
+
135
+ async def _start_foreground(browser: str, headless: bool, stealth: bool, cloudflare_solver: bool, visualize_mouse: bool, port: int, user_data_dir: str):
136
+ """Start server in foreground."""
137
+ from ..server import run_server
138
+ await run_server(
139
+ port=port,
140
+ browser=browser,
141
+ headless=headless,
142
+ stealth=stealth,
143
+ cloudflare_solver=cloudflare_solver,
144
+ visualize_mouse=visualize_mouse,
145
+ user_data_dir=user_data_dir,
146
+ )
147
+
148
+
149
+ @click.command()
150
+ def stop():
151
+ """Stop the current browser session."""
152
+ client = get_client()
153
+
154
+ if not client.is_server_running():
155
+ # Check if there's a stale session file
156
+ stored_session = load_session()
157
+ if stored_session:
158
+ clear_session()
159
+ output_success("stop", {"message": "Cleaned up stale session file."})
160
+ else:
161
+ output_error("stop", "NoSession", "No active browser session to stop.")
162
+ return
163
+
164
+ result = client.stop()
165
+
166
+ if "error" in result:
167
+ output_error("stop", result["error"], result["message"])
168
+ else:
169
+ output_success("stop", {"message": "Browser session stopped."})