cmdop 0.1.21__py3-none-any.whl → 0.1.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cmdop/__init__.py +1 -1
- cmdop/_generated/rpc_messages/browser_pb2.py +135 -85
- cmdop/_generated/rpc_messages/browser_pb2.pyi +270 -2
- cmdop/_generated/rpc_messages_pb2.pyi +25 -0
- cmdop/_generated/service_pb2.py +2 -2
- cmdop/_generated/service_pb2_grpc.py +345 -0
- cmdop/client.py +2 -8
- cmdop/services/browser/__init__.py +44 -31
- cmdop/services/browser/capabilities/__init__.py +17 -0
- cmdop/services/browser/capabilities/_base.py +28 -0
- cmdop/services/browser/capabilities/_helpers.py +16 -0
- cmdop/services/browser/capabilities/dom.py +76 -0
- cmdop/services/browser/capabilities/fetch.py +45 -0
- cmdop/services/browser/capabilities/input.py +49 -0
- cmdop/services/browser/capabilities/network.py +245 -0
- cmdop/services/browser/capabilities/scroll.py +147 -0
- cmdop/services/browser/capabilities/timing.py +66 -0
- cmdop/services/browser/js/__init__.py +6 -4
- cmdop/services/browser/js/interaction.py +34 -0
- cmdop/services/browser/models.py +103 -0
- cmdop/services/browser/service/__init__.py +5 -0
- cmdop/services/browser/service/aio.py +30 -0
- cmdop/services/browser/{sync/service.py → service/sync.py} +206 -6
- cmdop/services/browser/session.py +194 -0
- {cmdop-0.1.21.dist-info → cmdop-0.1.23.dist-info}/METADATA +107 -59
- {cmdop-0.1.21.dist-info → cmdop-0.1.23.dist-info}/RECORD +29 -24
- cmdop/services/browser/aio/__init__.py +0 -6
- cmdop/services/browser/aio/service.py +0 -420
- cmdop/services/browser/aio/session.py +0 -407
- cmdop/services/browser/base/__init__.py +0 -6
- cmdop/services/browser/base/session.py +0 -124
- cmdop/services/browser/sync/__init__.py +0 -6
- cmdop/services/browser/sync/session.py +0 -644
- /cmdop/services/browser/{base/service.py → service/_helpers.py} +0 -0
- {cmdop-0.1.21.dist-info → cmdop-0.1.23.dist-info}/WHEEL +0 -0
- {cmdop-0.1.21.dist-info → cmdop-0.1.23.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Fetch capability."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from cmdop.services.browser.js import (
|
|
6
|
+
build_fetch_js,
|
|
7
|
+
build_fetch_all_js,
|
|
8
|
+
build_async_js,
|
|
9
|
+
parse_json_result,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from ._base import BaseCapability
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FetchCapability(BaseCapability):
|
|
16
|
+
"""HTTP fetch operations from browser context.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
data = session.fetch.json("/api/data")
|
|
20
|
+
results = session.fetch.all({"a": "/api/a", "b": "/api/b"})
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def json(self, url: str) -> dict | list | None:
|
|
24
|
+
"""Fetch JSON from URL."""
|
|
25
|
+
js = build_fetch_js(url)
|
|
26
|
+
return parse_json_result(self._js(js))
|
|
27
|
+
|
|
28
|
+
def all(
|
|
29
|
+
self,
|
|
30
|
+
urls: dict[str, str],
|
|
31
|
+
headers: dict[str, str] | None = None,
|
|
32
|
+
credentials: bool = False,
|
|
33
|
+
) -> dict[str, Any]:
|
|
34
|
+
"""Fetch multiple URLs in parallel. Returns {id: {data, error}}."""
|
|
35
|
+
if not urls:
|
|
36
|
+
return {}
|
|
37
|
+
js = build_fetch_all_js(urls, headers, credentials)
|
|
38
|
+
wrapped = build_async_js(js)
|
|
39
|
+
result = parse_json_result(self._js(wrapped))
|
|
40
|
+
return result if isinstance(result, dict) else {}
|
|
41
|
+
|
|
42
|
+
def execute(self, code: str) -> dict | list | str | None:
|
|
43
|
+
"""Execute async JS code (can use await, fetch, etc)."""
|
|
44
|
+
js = build_async_js(code)
|
|
45
|
+
return parse_json_result(self._js(js))
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Input capability."""
|
|
2
|
+
|
|
3
|
+
from cmdop.services.browser.js import (
|
|
4
|
+
build_click_js,
|
|
5
|
+
build_press_key_js,
|
|
6
|
+
build_click_all_by_text_js,
|
|
7
|
+
build_hover_js,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ._base import BaseCapability
|
|
11
|
+
from ._helpers import to_dict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InputCapability(BaseCapability):
|
|
15
|
+
"""Input operations: clicks, keyboard, hover.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
session.input.click_js(".button")
|
|
19
|
+
session.input.key("Escape")
|
|
20
|
+
session.input.click_all("See more")
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def click_js(self, selector: str, scroll_into_view: bool = True) -> bool:
|
|
24
|
+
"""Click using JavaScript. More reliable than native click."""
|
|
25
|
+
js = build_click_js(selector, scroll_into_view)
|
|
26
|
+
return to_dict(self._js(js)).get("success", False)
|
|
27
|
+
|
|
28
|
+
def key(self, key: str, selector: str | None = None) -> bool:
|
|
29
|
+
"""Press keyboard key. Keys: Escape, Enter, Tab, ArrowDown, etc."""
|
|
30
|
+
js = build_press_key_js(key, selector)
|
|
31
|
+
return to_dict(self._js(js)).get("success", False)
|
|
32
|
+
|
|
33
|
+
def click_all(self, text: str, role: str = "button") -> int:
|
|
34
|
+
"""Click all elements containing text. Returns count clicked."""
|
|
35
|
+
js = build_click_all_by_text_js(text, role)
|
|
36
|
+
return to_dict(self._js(js)).get("clicked", 0)
|
|
37
|
+
|
|
38
|
+
def hover_js(self, selector: str) -> bool:
|
|
39
|
+
"""Hover using JavaScript."""
|
|
40
|
+
js = build_hover_js(selector)
|
|
41
|
+
return to_dict(self._js(js)).get("success", False)
|
|
42
|
+
|
|
43
|
+
def hover(self, selector: str, timeout_ms: int = 5000) -> None:
|
|
44
|
+
"""Hover using native browser API."""
|
|
45
|
+
self._call("hover", selector, timeout_ms)
|
|
46
|
+
|
|
47
|
+
def mouse_move(self, x: int, y: int, steps: int = 10) -> None:
|
|
48
|
+
"""Move mouse to coordinates with human-like movement."""
|
|
49
|
+
self._call("mouse_move", x, y, steps)
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Network capture capability (v2.19.0)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from cmdop.services.browser.models import (
|
|
7
|
+
NetworkExchange,
|
|
8
|
+
NetworkRequest,
|
|
9
|
+
NetworkResponse,
|
|
10
|
+
NetworkTiming,
|
|
11
|
+
NetworkStats,
|
|
12
|
+
NetworkFilter,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from ._base import BaseCapability
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class NetworkCapability(BaseCapability):
|
|
19
|
+
"""Network capture operations.
|
|
20
|
+
|
|
21
|
+
Captures HTTP requests/responses made by the browser.
|
|
22
|
+
Useful for:
|
|
23
|
+
- Intercepting API responses
|
|
24
|
+
- Debugging network issues
|
|
25
|
+
- Extracting data from XHR/Fetch calls
|
|
26
|
+
|
|
27
|
+
Usage:
|
|
28
|
+
# Enable capture
|
|
29
|
+
session.network.enable()
|
|
30
|
+
|
|
31
|
+
# Navigate and trigger requests
|
|
32
|
+
session.navigate("https://example.com")
|
|
33
|
+
|
|
34
|
+
# Get all captured exchanges
|
|
35
|
+
exchanges = session.network.get_all()
|
|
36
|
+
|
|
37
|
+
# Get last API response
|
|
38
|
+
api = session.network.last("/api/data")
|
|
39
|
+
data = api.json_body()
|
|
40
|
+
|
|
41
|
+
# Filter by criteria
|
|
42
|
+
xhr = session.network.filter(
|
|
43
|
+
url_pattern="/api/",
|
|
44
|
+
methods=["POST"],
|
|
45
|
+
status_codes=[200],
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Disable capture
|
|
49
|
+
session.network.disable()
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def enable(self, max_exchanges: int = 1000, max_response_size: int = 10_000_000) -> None:
|
|
53
|
+
"""Enable network capture.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
max_exchanges: Max exchanges to keep in memory (FIFO eviction)
|
|
57
|
+
max_response_size: Max response body size in bytes
|
|
58
|
+
"""
|
|
59
|
+
self._call("network_enable", max_exchanges, max_response_size)
|
|
60
|
+
|
|
61
|
+
def disable(self) -> None:
|
|
62
|
+
"""Disable network capture."""
|
|
63
|
+
self._call("network_disable")
|
|
64
|
+
|
|
65
|
+
def get_all(self, limit: int = 0) -> list[NetworkExchange]:
|
|
66
|
+
"""Get all captured exchanges.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
limit: Max results (0 = unlimited)
|
|
70
|
+
"""
|
|
71
|
+
return self.filter(limit=limit)
|
|
72
|
+
|
|
73
|
+
def filter(
|
|
74
|
+
self,
|
|
75
|
+
url_pattern: str = "",
|
|
76
|
+
methods: list[str] | None = None,
|
|
77
|
+
status_codes: list[int] | None = None,
|
|
78
|
+
resource_types: list[str] | None = None,
|
|
79
|
+
limit: int = 0,
|
|
80
|
+
) -> list[NetworkExchange]:
|
|
81
|
+
"""Get exchanges matching filter criteria.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
url_pattern: Regex pattern for URL matching
|
|
85
|
+
methods: HTTP methods (GET, POST, etc.)
|
|
86
|
+
status_codes: HTTP status codes (200, 404, etc.)
|
|
87
|
+
resource_types: xhr, fetch, document, script, image, etc.
|
|
88
|
+
limit: Max results (0 = unlimited)
|
|
89
|
+
"""
|
|
90
|
+
data = self._call(
|
|
91
|
+
"network_get_exchanges",
|
|
92
|
+
url_pattern,
|
|
93
|
+
methods or [],
|
|
94
|
+
status_codes or [],
|
|
95
|
+
resource_types or [],
|
|
96
|
+
limit,
|
|
97
|
+
)
|
|
98
|
+
return [self._parse_exchange(e) for e in data.get("exchanges", [])]
|
|
99
|
+
|
|
100
|
+
def get(self, exchange_id: str) -> NetworkExchange | None:
|
|
101
|
+
"""Get specific exchange by ID."""
|
|
102
|
+
data = self._call("network_get_exchange", exchange_id)
|
|
103
|
+
exchange = data.get("exchange")
|
|
104
|
+
if exchange:
|
|
105
|
+
return self._parse_exchange(exchange)
|
|
106
|
+
return None
|
|
107
|
+
|
|
108
|
+
def last(self, url_pattern: str = "") -> NetworkExchange | None:
|
|
109
|
+
"""Get most recent exchange matching URL pattern.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
url_pattern: Regex pattern for URL (empty = any)
|
|
113
|
+
"""
|
|
114
|
+
data = self._call("network_get_last", url_pattern)
|
|
115
|
+
exchange = data.get("exchange")
|
|
116
|
+
if exchange:
|
|
117
|
+
return self._parse_exchange(exchange)
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
def clear(self) -> None:
|
|
121
|
+
"""Clear all captured exchanges."""
|
|
122
|
+
self._call("network_clear")
|
|
123
|
+
|
|
124
|
+
def stats(self) -> NetworkStats:
|
|
125
|
+
"""Get capture statistics."""
|
|
126
|
+
data = self._call("network_stats")
|
|
127
|
+
return NetworkStats(
|
|
128
|
+
enabled=data.get("enabled", False),
|
|
129
|
+
total_captured=data.get("total_captured", 0),
|
|
130
|
+
total_errors=data.get("total_errors", 0),
|
|
131
|
+
total_bytes=data.get("total_bytes", 0),
|
|
132
|
+
average_duration_ms=data.get("average_duration_ms", 0),
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
def export_har(
|
|
136
|
+
self,
|
|
137
|
+
url_pattern: str = "",
|
|
138
|
+
methods: list[str] | None = None,
|
|
139
|
+
status_codes: list[int] | None = None,
|
|
140
|
+
resource_types: list[str] | None = None,
|
|
141
|
+
) -> bytes:
|
|
142
|
+
"""Export captured exchanges to HAR format.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
url_pattern: Regex pattern for URL matching
|
|
146
|
+
methods: HTTP methods filter
|
|
147
|
+
status_codes: HTTP status codes filter
|
|
148
|
+
resource_types: Resource types filter
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
HAR JSON as bytes
|
|
152
|
+
"""
|
|
153
|
+
data = self._call(
|
|
154
|
+
"network_export_har",
|
|
155
|
+
url_pattern,
|
|
156
|
+
methods or [],
|
|
157
|
+
status_codes or [],
|
|
158
|
+
resource_types or [],
|
|
159
|
+
)
|
|
160
|
+
return data.get("har_data", b"")
|
|
161
|
+
|
|
162
|
+
# === Convenience Methods ===
|
|
163
|
+
|
|
164
|
+
def api_calls(self, url_pattern: str = "/api/") -> list[NetworkExchange]:
|
|
165
|
+
"""Get XHR/Fetch API calls matching pattern."""
|
|
166
|
+
return self.filter(
|
|
167
|
+
url_pattern=url_pattern,
|
|
168
|
+
resource_types=["xhr", "fetch"],
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
def last_json(self, url_pattern: str = "") -> Any:
|
|
172
|
+
"""Get JSON body from most recent matching response."""
|
|
173
|
+
exchange = self.last(url_pattern)
|
|
174
|
+
if exchange:
|
|
175
|
+
return exchange.json_body()
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
def wait_for(self, url_pattern: str, timeout_ms: int = 30000) -> NetworkExchange | None:
|
|
179
|
+
"""Wait for a matching request to be captured.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
url_pattern: Regex pattern for URL
|
|
183
|
+
timeout_ms: Timeout in milliseconds
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Matching exchange or None if timeout
|
|
187
|
+
"""
|
|
188
|
+
import time
|
|
189
|
+
start = time.time()
|
|
190
|
+
timeout_sec = timeout_ms / 1000
|
|
191
|
+
|
|
192
|
+
while time.time() - start < timeout_sec:
|
|
193
|
+
exchange = self.last(url_pattern)
|
|
194
|
+
if exchange:
|
|
195
|
+
return exchange
|
|
196
|
+
time.sleep(0.1)
|
|
197
|
+
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
# === Internal ===
|
|
201
|
+
|
|
202
|
+
def _parse_exchange(self, data: dict[str, Any]) -> NetworkExchange:
|
|
203
|
+
"""Parse exchange from dict."""
|
|
204
|
+
request_data = data.get("request", {})
|
|
205
|
+
response_data = data.get("response")
|
|
206
|
+
timing_data = data.get("timing", {})
|
|
207
|
+
|
|
208
|
+
request = NetworkRequest(
|
|
209
|
+
url=request_data.get("url", ""),
|
|
210
|
+
method=request_data.get("method", "GET"),
|
|
211
|
+
headers=request_data.get("headers", {}),
|
|
212
|
+
body=request_data.get("body", b""),
|
|
213
|
+
content_type=request_data.get("content_type", ""),
|
|
214
|
+
resource_type=request_data.get("resource_type", ""),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
response = None
|
|
218
|
+
if response_data:
|
|
219
|
+
response = NetworkResponse(
|
|
220
|
+
status=response_data.get("status", 0),
|
|
221
|
+
status_text=response_data.get("status_text", ""),
|
|
222
|
+
headers=response_data.get("headers", {}),
|
|
223
|
+
body=response_data.get("body", b""),
|
|
224
|
+
content_type=response_data.get("content_type", ""),
|
|
225
|
+
size=response_data.get("size", 0),
|
|
226
|
+
from_cache=response_data.get("from_cache", False),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
timing = NetworkTiming(
|
|
230
|
+
started_at_ms=timing_data.get("started_at_ms", 0),
|
|
231
|
+
ended_at_ms=timing_data.get("ended_at_ms", 0),
|
|
232
|
+
duration_ms=timing_data.get("duration_ms", 0),
|
|
233
|
+
wait_time_ms=timing_data.get("wait_time_ms", 0),
|
|
234
|
+
receive_time_ms=timing_data.get("receive_time_ms", 0),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return NetworkExchange(
|
|
238
|
+
id=data.get("id", ""),
|
|
239
|
+
request=request,
|
|
240
|
+
response=response,
|
|
241
|
+
timing=timing,
|
|
242
|
+
error=data.get("error", ""),
|
|
243
|
+
frame_id=data.get("frame_id", ""),
|
|
244
|
+
initiator=data.get("initiator", ""),
|
|
245
|
+
)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Scroll capability."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
from cmdop.services.browser.js import (
|
|
7
|
+
build_scroll_js,
|
|
8
|
+
build_scroll_to_bottom_js,
|
|
9
|
+
build_get_scroll_info_js,
|
|
10
|
+
build_infinite_scroll_js,
|
|
11
|
+
)
|
|
12
|
+
from cmdop.services.browser.models import ScrollResult, ScrollInfo, InfiniteScrollResult
|
|
13
|
+
|
|
14
|
+
from ._base import BaseCapability
|
|
15
|
+
from ._helpers import to_dict
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ScrollCapability(BaseCapability):
|
|
19
|
+
"""Scroll operations.
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
session.scroll.js("down", 500)
|
|
23
|
+
session.scroll.to_bottom()
|
|
24
|
+
info = session.scroll.info()
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def js(
|
|
28
|
+
self,
|
|
29
|
+
direction: str = "down",
|
|
30
|
+
amount: int = 500,
|
|
31
|
+
selector: str | None = None,
|
|
32
|
+
smooth: bool = True,
|
|
33
|
+
human_like: bool = False,
|
|
34
|
+
container: str | None = None,
|
|
35
|
+
) -> ScrollResult:
|
|
36
|
+
"""Scroll using JavaScript. Use when native scroll doesn't work."""
|
|
37
|
+
js = build_scroll_js(direction, amount, selector, smooth, human_like, container)
|
|
38
|
+
data = to_dict(self._js(js))
|
|
39
|
+
return ScrollResult(
|
|
40
|
+
success=data.get("success", False),
|
|
41
|
+
scroll_y=int(data.get("scrollY", 0)),
|
|
42
|
+
scrolled_by=int(data.get("scrolledBy", 0)),
|
|
43
|
+
at_bottom=data.get("atBottom", False),
|
|
44
|
+
error=data.get("error"),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def to_bottom(self) -> ScrollResult:
|
|
48
|
+
"""Scroll to page bottom."""
|
|
49
|
+
data = to_dict(self._js(build_scroll_to_bottom_js()))
|
|
50
|
+
return ScrollResult(
|
|
51
|
+
success=data.get("success", False),
|
|
52
|
+
scroll_y=int(data.get("scrollY", 0)),
|
|
53
|
+
scrolled_by=int(data.get("scrolledBy", 0)),
|
|
54
|
+
at_bottom=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def to_element(self, selector: str) -> ScrollResult:
|
|
58
|
+
"""Scroll element into view."""
|
|
59
|
+
return self.js(selector=selector)
|
|
60
|
+
|
|
61
|
+
def info(self) -> ScrollInfo:
|
|
62
|
+
"""Get scroll position and page dimensions."""
|
|
63
|
+
data = to_dict(self._js(build_get_scroll_info_js()))
|
|
64
|
+
return ScrollInfo(
|
|
65
|
+
scroll_x=int(data.get("scrollX", 0)),
|
|
66
|
+
scroll_y=int(data.get("scrollY", 0)),
|
|
67
|
+
page_height=int(data.get("pageHeight", 0)),
|
|
68
|
+
page_width=int(data.get("pageWidth", 0)),
|
|
69
|
+
viewport_height=int(data.get("viewportHeight", 0)),
|
|
70
|
+
viewport_width=int(data.get("viewportWidth", 0)),
|
|
71
|
+
at_bottom=data.get("atBottom", False),
|
|
72
|
+
at_top=data.get("atTop", True),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def native(
|
|
76
|
+
self,
|
|
77
|
+
direction: str = "down",
|
|
78
|
+
amount: int = 500,
|
|
79
|
+
selector: str | None = None,
|
|
80
|
+
smooth: bool = True,
|
|
81
|
+
) -> ScrollResult:
|
|
82
|
+
"""Scroll using native browser API."""
|
|
83
|
+
data = self._call("scroll", direction, amount, selector, smooth)
|
|
84
|
+
return ScrollResult(
|
|
85
|
+
success=True,
|
|
86
|
+
scroll_y=data.get("scroll_y", 0),
|
|
87
|
+
scrolled_by=data.get("scrolled_by", 0),
|
|
88
|
+
at_bottom=data.get("at_bottom", False),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def collect(
|
|
92
|
+
self,
|
|
93
|
+
seen_keys: set[str],
|
|
94
|
+
key_selector: str = "a[href]",
|
|
95
|
+
key_attr: str = "href",
|
|
96
|
+
container_selector: str = "body",
|
|
97
|
+
) -> InfiniteScrollResult:
|
|
98
|
+
"""Extract new keys for infinite scroll patterns. Updates seen_keys in-place."""
|
|
99
|
+
js = build_infinite_scroll_js(list(seen_keys), key_selector, key_attr, container_selector)
|
|
100
|
+
data = to_dict(self._js(js))
|
|
101
|
+
new_keys = data.get("new_keys", [])
|
|
102
|
+
seen_keys.update(new_keys)
|
|
103
|
+
return InfiniteScrollResult(
|
|
104
|
+
new_keys=new_keys,
|
|
105
|
+
at_bottom=data.get("at_bottom", False),
|
|
106
|
+
total_seen=data.get("total_seen", len(seen_keys)),
|
|
107
|
+
error=data.get("error"),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def infinite(
|
|
111
|
+
self,
|
|
112
|
+
extract_fn: Callable[[], list[Any]],
|
|
113
|
+
limit: int = 100,
|
|
114
|
+
max_scrolls: int = 50,
|
|
115
|
+
max_no_new: int = 3,
|
|
116
|
+
scroll_amount: int = 800,
|
|
117
|
+
delay: float = 1.0,
|
|
118
|
+
) -> list[Any]:
|
|
119
|
+
"""Smart infinite scroll with extraction.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
extract_fn: Returns new items each call (dedup is caller's job)
|
|
123
|
+
limit: Stop after this many items
|
|
124
|
+
max_scrolls: Max scroll attempts
|
|
125
|
+
max_no_new: Stop after N scrolls with no new items
|
|
126
|
+
scroll_amount: Pixels per scroll
|
|
127
|
+
delay: Seconds between scrolls
|
|
128
|
+
"""
|
|
129
|
+
items: list[Any] = []
|
|
130
|
+
no_new = 0
|
|
131
|
+
|
|
132
|
+
for _ in range(max_scrolls):
|
|
133
|
+
new = extract_fn()
|
|
134
|
+
if new:
|
|
135
|
+
items.extend(new)
|
|
136
|
+
no_new = 0
|
|
137
|
+
if len(items) >= limit:
|
|
138
|
+
break
|
|
139
|
+
else:
|
|
140
|
+
no_new += 1
|
|
141
|
+
if no_new >= max_no_new:
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
self.js("down", scroll_amount)
|
|
145
|
+
time.sleep(delay)
|
|
146
|
+
|
|
147
|
+
return items[:limit]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Timing capability."""
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Callable, TypeVar
|
|
7
|
+
|
|
8
|
+
from ._base import BaseCapability
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TimingCapability(BaseCapability):
|
|
14
|
+
"""Timing operations: wait, delays, timeouts.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
session.timing.wait(1000)
|
|
18
|
+
session.timing.random(0.5, 2.0)
|
|
19
|
+
result, ok = session.timing.timeout(lambda: slow_fn(), 30)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def wait(self, ms: int, jitter: float = 0.1) -> None:
|
|
23
|
+
"""Wait milliseconds with jitter (±10% by default)."""
|
|
24
|
+
actual = (ms / 1000) * (1 + random.uniform(-jitter, jitter))
|
|
25
|
+
time.sleep(actual)
|
|
26
|
+
|
|
27
|
+
def seconds(self, sec: float, jitter: float = 0.1) -> None:
|
|
28
|
+
"""Wait seconds with jitter."""
|
|
29
|
+
self.wait(int(sec * 1000), jitter)
|
|
30
|
+
|
|
31
|
+
def random(self, min_sec: float = 0.5, max_sec: float = 1.5) -> None:
|
|
32
|
+
"""Wait random time between min and max seconds."""
|
|
33
|
+
time.sleep(min_sec + random.random() * (max_sec - min_sec))
|
|
34
|
+
|
|
35
|
+
def timeout(
|
|
36
|
+
self,
|
|
37
|
+
fn: Callable[[], T],
|
|
38
|
+
seconds: float = 60.0,
|
|
39
|
+
on_timeout: Callable[[], None] | None = None,
|
|
40
|
+
) -> tuple[T | None, bool]:
|
|
41
|
+
"""Run function with timeout. Returns (result, success)."""
|
|
42
|
+
result: list[T | None] = [None]
|
|
43
|
+
error: list[Exception | None] = [None]
|
|
44
|
+
done = threading.Event()
|
|
45
|
+
|
|
46
|
+
def run():
|
|
47
|
+
try:
|
|
48
|
+
result[0] = fn()
|
|
49
|
+
except Exception as e:
|
|
50
|
+
error[0] = e
|
|
51
|
+
finally:
|
|
52
|
+
done.set()
|
|
53
|
+
|
|
54
|
+
threading.Thread(target=run, daemon=True).start()
|
|
55
|
+
|
|
56
|
+
if done.wait(timeout=seconds):
|
|
57
|
+
if error[0]:
|
|
58
|
+
raise error[0]
|
|
59
|
+
return result[0], True
|
|
60
|
+
|
|
61
|
+
if on_timeout:
|
|
62
|
+
try:
|
|
63
|
+
on_timeout()
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
return None, False
|
|
@@ -7,29 +7,30 @@ This module provides JavaScript code generators for common browser operations:
|
|
|
7
7
|
- Interaction: Hover, select, modals
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from
|
|
10
|
+
from .core import (
|
|
11
11
|
parse_json_result,
|
|
12
12
|
build_async_js,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from .fetch import (
|
|
16
16
|
build_fetch_js,
|
|
17
17
|
build_fetch_all_js,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from .scroll import (
|
|
21
21
|
build_scroll_js,
|
|
22
22
|
build_scroll_to_bottom_js,
|
|
23
23
|
build_infinite_scroll_js,
|
|
24
24
|
build_get_scroll_info_js,
|
|
25
25
|
)
|
|
26
26
|
|
|
27
|
-
from
|
|
27
|
+
from .interaction import (
|
|
28
28
|
build_hover_js,
|
|
29
29
|
build_select_js,
|
|
30
30
|
build_close_modal_js,
|
|
31
31
|
build_click_all_by_text_js,
|
|
32
32
|
build_press_key_js,
|
|
33
|
+
build_click_js,
|
|
33
34
|
)
|
|
34
35
|
|
|
35
36
|
__all__ = [
|
|
@@ -50,4 +51,5 @@ __all__ = [
|
|
|
50
51
|
"build_close_modal_js",
|
|
51
52
|
"build_click_all_by_text_js",
|
|
52
53
|
"build_press_key_js",
|
|
54
|
+
"build_click_js",
|
|
53
55
|
]
|
|
@@ -178,3 +178,37 @@ def build_click_all_by_text_js(text: str, role: str = "button") -> str:
|
|
|
178
178
|
return JSON.stringify({{ clicked: clicked }});
|
|
179
179
|
}})()
|
|
180
180
|
"""
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def build_click_js(selector: str, scroll_into_view: bool = True) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Build JS to click element via JavaScript (more reliable than CDP click).
|
|
186
|
+
|
|
187
|
+
This is useful when native CDP click hangs or doesn't work properly.
|
|
188
|
+
Uses document.querySelector to find element and calls .click() directly.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
selector: CSS selector for the element to click
|
|
192
|
+
scroll_into_view: If True, scroll element into view before clicking (default: True)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
JS code that returns { success: true/false, error?: string }
|
|
196
|
+
"""
|
|
197
|
+
scroll_code = 'el.scrollIntoView({block: "center", behavior: "instant"});' if scroll_into_view else ''
|
|
198
|
+
selector_escaped = json.dumps(selector)
|
|
199
|
+
|
|
200
|
+
return f"""
|
|
201
|
+
(function() {{
|
|
202
|
+
const el = document.querySelector({selector_escaped});
|
|
203
|
+
if (!el) {{
|
|
204
|
+
return JSON.stringify({{ success: false, error: 'Element not found' }});
|
|
205
|
+
}}
|
|
206
|
+
try {{
|
|
207
|
+
{scroll_code}
|
|
208
|
+
el.click();
|
|
209
|
+
return JSON.stringify({{ success: true }});
|
|
210
|
+
}} catch (e) {{
|
|
211
|
+
return JSON.stringify({{ success: false, error: e.message }});
|
|
212
|
+
}}
|
|
213
|
+
}})()
|
|
214
|
+
"""
|