cmdop 0.1.21__py3-none-any.whl → 0.1.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cmdop/__init__.py +1 -1
- cmdop/client.py +2 -8
- cmdop/services/browser/__init__.py +44 -31
- cmdop/services/browser/capabilities/__init__.py +15 -0
- cmdop/services/browser/capabilities/_base.py +28 -0
- cmdop/services/browser/capabilities/_helpers.py +16 -0
- cmdop/services/browser/capabilities/dom.py +76 -0
- cmdop/services/browser/capabilities/fetch.py +46 -0
- cmdop/services/browser/capabilities/input.py +49 -0
- cmdop/services/browser/capabilities/scroll.py +147 -0
- cmdop/services/browser/capabilities/timing.py +66 -0
- cmdop/services/browser/js/__init__.py +6 -4
- cmdop/services/browser/js/interaction.py +34 -0
- cmdop/services/browser/service/__init__.py +5 -0
- cmdop/services/browser/service/aio.py +30 -0
- cmdop/services/browser/{sync/service.py → service/sync.py} +2 -2
- cmdop/services/browser/session.py +166 -0
- {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/METADATA +69 -60
- {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/RECORD +22 -18
- cmdop/services/browser/aio/__init__.py +0 -6
- cmdop/services/browser/aio/service.py +0 -420
- cmdop/services/browser/aio/session.py +0 -407
- cmdop/services/browser/base/__init__.py +0 -6
- cmdop/services/browser/base/session.py +0 -124
- cmdop/services/browser/sync/__init__.py +0 -6
- cmdop/services/browser/sync/session.py +0 -644
- /cmdop/services/browser/{base/service.py → service/_helpers.py} +0 -0
- {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/WHEEL +0 -0
- {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/licenses/LICENSE +0 -0
cmdop/__init__.py
CHANGED
cmdop/client.py
CHANGED
|
@@ -439,15 +439,9 @@ class AsyncCMDOPClient:
|
|
|
439
439
|
@property
|
|
440
440
|
def browser(self) -> AsyncBrowserService:
|
|
441
441
|
"""
|
|
442
|
-
Async browser service
|
|
442
|
+
Async browser service (stub - not implemented).
|
|
443
443
|
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
Example:
|
|
447
|
-
>>> async with client.browser.create_session() as session:
|
|
448
|
-
... await session.navigate("https://google.com")
|
|
449
|
-
... await session.type("input[name='q']", "Python")
|
|
450
|
-
... results = await session.extract(".result-title")
|
|
444
|
+
Use sync CMDOPClient.browser instead.
|
|
451
445
|
"""
|
|
452
446
|
if self._browser is None:
|
|
453
447
|
self._browser = AsyncBrowserService(self._transport)
|
|
@@ -1,46 +1,59 @@
|
|
|
1
|
-
"""Browser
|
|
2
|
-
|
|
3
|
-
Structure:
|
|
4
|
-
browser/
|
|
5
|
-
models.py - Data models (BrowserCookie, BrowserState)
|
|
6
|
-
js.py - JavaScript code builders
|
|
7
|
-
base/ - Base classes (BaseSession, BaseServiceMixin)
|
|
8
|
-
sync/ - Sync implementation (BrowserSession, BrowserService)
|
|
9
|
-
aio/ - Async implementation (AsyncBrowserSession, AsyncBrowserService)
|
|
1
|
+
"""Browser SDK with capability-based API.
|
|
10
2
|
|
|
11
3
|
Usage:
|
|
12
|
-
|
|
13
|
-
|
|
4
|
+
from cmdop.services.browser import BrowserSession
|
|
5
|
+
|
|
6
|
+
with service.create_session() as session:
|
|
14
7
|
session.navigate("https://example.com")
|
|
15
|
-
data = session.fetch_all(urls, headers={"Accept": "application/json"})
|
|
16
8
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
data = await session.fetch_all(urls, headers={"Accept": "application/json"})
|
|
21
|
-
"""
|
|
9
|
+
# Capabilities
|
|
10
|
+
session.scroll.js("down", 500)
|
|
11
|
+
session.scroll.to_bottom()
|
|
22
12
|
|
|
23
|
-
|
|
13
|
+
session.input.click_js(".button")
|
|
14
|
+
session.input.key("Escape")
|
|
24
15
|
|
|
25
|
-
|
|
26
|
-
from cmdop.services.browser.sync.session import BrowserSession
|
|
27
|
-
from cmdop.services.browser.sync.service import BrowserService
|
|
16
|
+
session.timing.wait(1000)
|
|
28
17
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
18
|
+
soup = session.dom.soup()
|
|
19
|
+
data = session.fetch.json("/api/data")
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from .session import BrowserSession
|
|
23
|
+
from .service.sync import BrowserService
|
|
24
|
+
from .service.aio import AsyncBrowserService
|
|
25
|
+
from .models import (
|
|
26
|
+
BrowserCookie,
|
|
27
|
+
BrowserState,
|
|
28
|
+
PageInfo,
|
|
29
|
+
ScrollResult,
|
|
30
|
+
ScrollInfo,
|
|
31
|
+
InfiniteScrollResult,
|
|
32
|
+
)
|
|
33
|
+
from .capabilities import (
|
|
34
|
+
ScrollCapability,
|
|
35
|
+
InputCapability,
|
|
36
|
+
TimingCapability,
|
|
37
|
+
DOMCapability,
|
|
38
|
+
FetchCapability,
|
|
39
|
+
)
|
|
32
40
|
|
|
33
41
|
__all__ = [
|
|
42
|
+
# Session & Service
|
|
43
|
+
"BrowserSession",
|
|
44
|
+
"BrowserService",
|
|
45
|
+
"AsyncBrowserService",
|
|
34
46
|
# Models
|
|
35
47
|
"BrowserCookie",
|
|
36
48
|
"BrowserState",
|
|
37
49
|
"PageInfo",
|
|
38
50
|
"ScrollResult",
|
|
39
|
-
"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
"
|
|
43
|
-
|
|
44
|
-
"
|
|
45
|
-
"
|
|
51
|
+
"ScrollInfo",
|
|
52
|
+
"InfiniteScrollResult",
|
|
53
|
+
# Capabilities
|
|
54
|
+
"ScrollCapability",
|
|
55
|
+
"InputCapability",
|
|
56
|
+
"TimingCapability",
|
|
57
|
+
"DOMCapability",
|
|
58
|
+
"FetchCapability",
|
|
46
59
|
]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Browser capabilities."""
|
|
2
|
+
|
|
3
|
+
from .scroll import ScrollCapability
|
|
4
|
+
from .input import InputCapability
|
|
5
|
+
from .timing import TimingCapability
|
|
6
|
+
from .dom import DOMCapability
|
|
7
|
+
from .fetch import FetchCapability
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"ScrollCapability",
|
|
11
|
+
"InputCapability",
|
|
12
|
+
"TimingCapability",
|
|
13
|
+
"DOMCapability",
|
|
14
|
+
"FetchCapability",
|
|
15
|
+
]
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Base capability class."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from ..session import BrowserSession
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseCapability:
|
|
11
|
+
"""Base class for all capabilities.
|
|
12
|
+
|
|
13
|
+
Capabilities group related browser operations and delegate
|
|
14
|
+
actual execution to the session.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
__slots__ = ("_s",)
|
|
18
|
+
|
|
19
|
+
def __init__(self, session: "BrowserSession") -> None:
|
|
20
|
+
self._s = session
|
|
21
|
+
|
|
22
|
+
def _js(self, script: str) -> str:
|
|
23
|
+
"""Execute JS via session."""
|
|
24
|
+
return self._s.execute_script(script)
|
|
25
|
+
|
|
26
|
+
def _call(self, method: str, *args: Any, **kwargs: Any) -> Any:
|
|
27
|
+
"""Call service method via session."""
|
|
28
|
+
return self._s._call_service(method, *args, **kwargs)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Shared parsing helpers for capabilities."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
from cmdop.services.browser.js import parse_json_result
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def to_dict(raw: str) -> dict[str, Any]:
|
|
8
|
+
"""Parse JS result to dict, default empty."""
|
|
9
|
+
result = parse_json_result(raw)
|
|
10
|
+
return result if isinstance(result, dict) else {}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def to_list(raw: str) -> list[Any]:
|
|
14
|
+
"""Parse JS result to list, default empty."""
|
|
15
|
+
result = parse_json_result(raw)
|
|
16
|
+
return result if isinstance(result, list) else []
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""DOM capability."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from cmdop.services.browser.js import build_select_js, build_close_modal_js
|
|
7
|
+
from cmdop.services.browser.parsing import parse_html, SoupWrapper
|
|
8
|
+
|
|
9
|
+
from ._base import BaseCapability
|
|
10
|
+
from ._helpers import to_dict
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from bs4 import BeautifulSoup
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DOMCapability(BaseCapability):
|
|
17
|
+
"""DOM operations: extraction, forms, modals.
|
|
18
|
+
|
|
19
|
+
Usage:
|
|
20
|
+
html = session.dom.html()
|
|
21
|
+
soup = session.dom.soup()
|
|
22
|
+
session.dom.select("#country", value="US")
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def html(self, selector: str | None = None) -> str:
|
|
26
|
+
"""Get page HTML."""
|
|
27
|
+
return self._call("get_html", selector)
|
|
28
|
+
|
|
29
|
+
def text(self, selector: str | None = None) -> str:
|
|
30
|
+
"""Get page text content."""
|
|
31
|
+
return self._call("get_text", selector)
|
|
32
|
+
|
|
33
|
+
def soup(self, selector: str | None = None) -> SoupWrapper:
|
|
34
|
+
"""Get HTML as SoupWrapper with chainable API."""
|
|
35
|
+
return SoupWrapper(html=self.html(selector))
|
|
36
|
+
|
|
37
|
+
def parse(self, html: str | None = None, selector: str | None = None) -> "BeautifulSoup":
|
|
38
|
+
"""Parse HTML with BeautifulSoup."""
|
|
39
|
+
if html is None:
|
|
40
|
+
html = self.html(selector)
|
|
41
|
+
return parse_html(html)
|
|
42
|
+
|
|
43
|
+
def select(
|
|
44
|
+
self,
|
|
45
|
+
selector: str,
|
|
46
|
+
value: str | None = None,
|
|
47
|
+
text: str | None = None,
|
|
48
|
+
) -> dict:
|
|
49
|
+
"""Select dropdown option by value or text."""
|
|
50
|
+
js = build_select_js(selector, value, text)
|
|
51
|
+
return to_dict(self._js(js))
|
|
52
|
+
|
|
53
|
+
def close_modal(self, selectors: list[str] | None = None) -> bool:
|
|
54
|
+
"""Try to close modal/dialog."""
|
|
55
|
+
js = build_close_modal_js(selectors)
|
|
56
|
+
return to_dict(self._js(js)).get("success", False)
|
|
57
|
+
|
|
58
|
+
def extract(
|
|
59
|
+
self, selector: str, attr: str | None = None, limit: int = 100
|
|
60
|
+
) -> list[str]:
|
|
61
|
+
"""Extract text or attribute from elements."""
|
|
62
|
+
return self._call("extract", selector, attr, limit)
|
|
63
|
+
|
|
64
|
+
def extract_regex(
|
|
65
|
+
self, pattern: str, from_html: bool = False, limit: int = 100
|
|
66
|
+
) -> list[str]:
|
|
67
|
+
"""Extract matches using regex pattern."""
|
|
68
|
+
return self._call("extract_regex", pattern, from_html, limit)
|
|
69
|
+
|
|
70
|
+
def validate_selectors(self, item: str, fields: dict[str, str]) -> dict:
|
|
71
|
+
"""Validate CSS selectors on page."""
|
|
72
|
+
return self._call("validate_selectors", item, fields)
|
|
73
|
+
|
|
74
|
+
def extract_data(self, item: str, fields_json: str, limit: int = 100) -> dict:
|
|
75
|
+
"""Extract structured data from page."""
|
|
76
|
+
return self._call("extract_data", item, fields_json, limit)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Fetch capability."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from cmdop.services.browser.js import (
|
|
6
|
+
build_fetch_js,
|
|
7
|
+
build_fetch_all_js,
|
|
8
|
+
build_async_js,
|
|
9
|
+
parse_json_result,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from ._base import BaseCapability
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FetchCapability(BaseCapability):
|
|
16
|
+
"""HTTP fetch operations from browser context.
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
data = session.fetch.json("/api/data")
|
|
20
|
+
results = session.fetch.all({"a": "/api/a", "b": "/api/b"})
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def json(self, url: str) -> dict | list | None:
|
|
24
|
+
"""Fetch JSON from URL."""
|
|
25
|
+
js = build_fetch_js(url)
|
|
26
|
+
return parse_json_result(self._js(js))
|
|
27
|
+
|
|
28
|
+
def all(
|
|
29
|
+
self,
|
|
30
|
+
urls: dict[str, str],
|
|
31
|
+
headers: dict[str, str] | None = None,
|
|
32
|
+
credentials: bool = False,
|
|
33
|
+
) -> dict[str, Any]:
|
|
34
|
+
"""Fetch multiple URLs in parallel. Returns {id: {data, error}}."""
|
|
35
|
+
if not urls:
|
|
36
|
+
return {}
|
|
37
|
+
js = build_fetch_all_js(urls, headers, credentials)
|
|
38
|
+
# fetch_all returns via execute_js (async wrapper)
|
|
39
|
+
wrapped = build_async_js(js.replace("return ", ""))
|
|
40
|
+
result = parse_json_result(self._js(wrapped))
|
|
41
|
+
return result if isinstance(result, dict) else {}
|
|
42
|
+
|
|
43
|
+
def execute(self, code: str) -> dict | list | str | None:
|
|
44
|
+
"""Execute async JS code (can use await, fetch, etc)."""
|
|
45
|
+
js = build_async_js(code)
|
|
46
|
+
return parse_json_result(self._js(js))
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Input capability."""
|
|
2
|
+
|
|
3
|
+
from cmdop.services.browser.js import (
|
|
4
|
+
build_click_js,
|
|
5
|
+
build_press_key_js,
|
|
6
|
+
build_click_all_by_text_js,
|
|
7
|
+
build_hover_js,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ._base import BaseCapability
|
|
11
|
+
from ._helpers import to_dict
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InputCapability(BaseCapability):
|
|
15
|
+
"""Input operations: clicks, keyboard, hover.
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
session.input.click_js(".button")
|
|
19
|
+
session.input.key("Escape")
|
|
20
|
+
session.input.click_all("See more")
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def click_js(self, selector: str, scroll_into_view: bool = True) -> bool:
|
|
24
|
+
"""Click using JavaScript. More reliable than native click."""
|
|
25
|
+
js = build_click_js(selector, scroll_into_view)
|
|
26
|
+
return to_dict(self._js(js)).get("success", False)
|
|
27
|
+
|
|
28
|
+
def key(self, key: str, selector: str | None = None) -> bool:
|
|
29
|
+
"""Press keyboard key. Keys: Escape, Enter, Tab, ArrowDown, etc."""
|
|
30
|
+
js = build_press_key_js(key, selector)
|
|
31
|
+
return to_dict(self._js(js)).get("success", False)
|
|
32
|
+
|
|
33
|
+
def click_all(self, text: str, role: str = "button") -> int:
|
|
34
|
+
"""Click all elements containing text. Returns count clicked."""
|
|
35
|
+
js = build_click_all_by_text_js(text, role)
|
|
36
|
+
return to_dict(self._js(js)).get("clicked", 0)
|
|
37
|
+
|
|
38
|
+
def hover_js(self, selector: str) -> bool:
|
|
39
|
+
"""Hover using JavaScript."""
|
|
40
|
+
js = build_hover_js(selector)
|
|
41
|
+
return to_dict(self._js(js)).get("success", False)
|
|
42
|
+
|
|
43
|
+
def hover(self, selector: str, timeout_ms: int = 5000) -> None:
|
|
44
|
+
"""Hover using native browser API."""
|
|
45
|
+
self._call("hover", selector, timeout_ms)
|
|
46
|
+
|
|
47
|
+
def mouse_move(self, x: int, y: int, steps: int = 10) -> None:
|
|
48
|
+
"""Move mouse to coordinates with human-like movement."""
|
|
49
|
+
self._call("mouse_move", x, y, steps)
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Scroll capability."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
from cmdop.services.browser.js import (
|
|
7
|
+
build_scroll_js,
|
|
8
|
+
build_scroll_to_bottom_js,
|
|
9
|
+
build_get_scroll_info_js,
|
|
10
|
+
build_infinite_scroll_js,
|
|
11
|
+
)
|
|
12
|
+
from cmdop.services.browser.models import ScrollResult, ScrollInfo, InfiniteScrollResult
|
|
13
|
+
|
|
14
|
+
from ._base import BaseCapability
|
|
15
|
+
from ._helpers import to_dict
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ScrollCapability(BaseCapability):
|
|
19
|
+
"""Scroll operations.
|
|
20
|
+
|
|
21
|
+
Usage:
|
|
22
|
+
session.scroll.js("down", 500)
|
|
23
|
+
session.scroll.to_bottom()
|
|
24
|
+
info = session.scroll.info()
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def js(
|
|
28
|
+
self,
|
|
29
|
+
direction: str = "down",
|
|
30
|
+
amount: int = 500,
|
|
31
|
+
selector: str | None = None,
|
|
32
|
+
smooth: bool = True,
|
|
33
|
+
human_like: bool = False,
|
|
34
|
+
container: str | None = None,
|
|
35
|
+
) -> ScrollResult:
|
|
36
|
+
"""Scroll using JavaScript. Use when native scroll doesn't work."""
|
|
37
|
+
js = build_scroll_js(direction, amount, selector, smooth, human_like, container)
|
|
38
|
+
data = to_dict(self._js(js))
|
|
39
|
+
return ScrollResult(
|
|
40
|
+
success=data.get("success", False),
|
|
41
|
+
scroll_y=int(data.get("scrollY", 0)),
|
|
42
|
+
scrolled_by=int(data.get("scrolledBy", 0)),
|
|
43
|
+
at_bottom=data.get("atBottom", False),
|
|
44
|
+
error=data.get("error"),
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
def to_bottom(self) -> ScrollResult:
|
|
48
|
+
"""Scroll to page bottom."""
|
|
49
|
+
data = to_dict(self._js(build_scroll_to_bottom_js()))
|
|
50
|
+
return ScrollResult(
|
|
51
|
+
success=data.get("success", False),
|
|
52
|
+
scroll_y=int(data.get("scrollY", 0)),
|
|
53
|
+
scrolled_by=int(data.get("scrolledBy", 0)),
|
|
54
|
+
at_bottom=True,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def to_element(self, selector: str) -> ScrollResult:
|
|
58
|
+
"""Scroll element into view."""
|
|
59
|
+
return self.js(selector=selector)
|
|
60
|
+
|
|
61
|
+
def info(self) -> ScrollInfo:
|
|
62
|
+
"""Get scroll position and page dimensions."""
|
|
63
|
+
data = to_dict(self._js(build_get_scroll_info_js()))
|
|
64
|
+
return ScrollInfo(
|
|
65
|
+
scroll_x=int(data.get("scrollX", 0)),
|
|
66
|
+
scroll_y=int(data.get("scrollY", 0)),
|
|
67
|
+
page_height=int(data.get("pageHeight", 0)),
|
|
68
|
+
page_width=int(data.get("pageWidth", 0)),
|
|
69
|
+
viewport_height=int(data.get("viewportHeight", 0)),
|
|
70
|
+
viewport_width=int(data.get("viewportWidth", 0)),
|
|
71
|
+
at_bottom=data.get("atBottom", False),
|
|
72
|
+
at_top=data.get("atTop", True),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
def native(
|
|
76
|
+
self,
|
|
77
|
+
direction: str = "down",
|
|
78
|
+
amount: int = 500,
|
|
79
|
+
selector: str | None = None,
|
|
80
|
+
smooth: bool = True,
|
|
81
|
+
) -> ScrollResult:
|
|
82
|
+
"""Scroll using native browser API."""
|
|
83
|
+
data = self._call("scroll", direction, amount, selector, smooth)
|
|
84
|
+
return ScrollResult(
|
|
85
|
+
success=True,
|
|
86
|
+
scroll_y=data.get("scroll_y", 0),
|
|
87
|
+
scrolled_by=data.get("scrolled_by", 0),
|
|
88
|
+
at_bottom=data.get("at_bottom", False),
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def collect(
|
|
92
|
+
self,
|
|
93
|
+
seen_keys: set[str],
|
|
94
|
+
key_selector: str = "a[href]",
|
|
95
|
+
key_attr: str = "href",
|
|
96
|
+
container_selector: str = "body",
|
|
97
|
+
) -> InfiniteScrollResult:
|
|
98
|
+
"""Extract new keys for infinite scroll patterns. Updates seen_keys in-place."""
|
|
99
|
+
js = build_infinite_scroll_js(list(seen_keys), key_selector, key_attr, container_selector)
|
|
100
|
+
data = to_dict(self._js(js))
|
|
101
|
+
new_keys = data.get("new_keys", [])
|
|
102
|
+
seen_keys.update(new_keys)
|
|
103
|
+
return InfiniteScrollResult(
|
|
104
|
+
new_keys=new_keys,
|
|
105
|
+
at_bottom=data.get("at_bottom", False),
|
|
106
|
+
total_seen=data.get("total_seen", len(seen_keys)),
|
|
107
|
+
error=data.get("error"),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def infinite(
|
|
111
|
+
self,
|
|
112
|
+
extract_fn: Callable[[], list[Any]],
|
|
113
|
+
limit: int = 100,
|
|
114
|
+
max_scrolls: int = 50,
|
|
115
|
+
max_no_new: int = 3,
|
|
116
|
+
scroll_amount: int = 800,
|
|
117
|
+
delay: float = 1.0,
|
|
118
|
+
) -> list[Any]:
|
|
119
|
+
"""Smart infinite scroll with extraction.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
extract_fn: Returns new items each call (dedup is caller's job)
|
|
123
|
+
limit: Stop after this many items
|
|
124
|
+
max_scrolls: Max scroll attempts
|
|
125
|
+
max_no_new: Stop after N scrolls with no new items
|
|
126
|
+
scroll_amount: Pixels per scroll
|
|
127
|
+
delay: Seconds between scrolls
|
|
128
|
+
"""
|
|
129
|
+
items: list[Any] = []
|
|
130
|
+
no_new = 0
|
|
131
|
+
|
|
132
|
+
for _ in range(max_scrolls):
|
|
133
|
+
new = extract_fn()
|
|
134
|
+
if new:
|
|
135
|
+
items.extend(new)
|
|
136
|
+
no_new = 0
|
|
137
|
+
if len(items) >= limit:
|
|
138
|
+
break
|
|
139
|
+
else:
|
|
140
|
+
no_new += 1
|
|
141
|
+
if no_new >= max_no_new:
|
|
142
|
+
break
|
|
143
|
+
|
|
144
|
+
self.js("down", scroll_amount)
|
|
145
|
+
time.sleep(delay)
|
|
146
|
+
|
|
147
|
+
return items[:limit]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Timing capability."""
|
|
2
|
+
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
5
|
+
import threading
|
|
6
|
+
from typing import Callable, TypeVar
|
|
7
|
+
|
|
8
|
+
from ._base import BaseCapability
|
|
9
|
+
|
|
10
|
+
T = TypeVar("T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TimingCapability(BaseCapability):
|
|
14
|
+
"""Timing operations: wait, delays, timeouts.
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
session.timing.wait(1000)
|
|
18
|
+
session.timing.random(0.5, 2.0)
|
|
19
|
+
result, ok = session.timing.timeout(lambda: slow_fn(), 30)
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def wait(self, ms: int, jitter: float = 0.1) -> None:
|
|
23
|
+
"""Wait milliseconds with jitter (±10% by default)."""
|
|
24
|
+
actual = (ms / 1000) * (1 + random.uniform(-jitter, jitter))
|
|
25
|
+
time.sleep(actual)
|
|
26
|
+
|
|
27
|
+
def seconds(self, sec: float, jitter: float = 0.1) -> None:
|
|
28
|
+
"""Wait seconds with jitter."""
|
|
29
|
+
self.wait(int(sec * 1000), jitter)
|
|
30
|
+
|
|
31
|
+
def random(self, min_sec: float = 0.5, max_sec: float = 1.5) -> None:
|
|
32
|
+
"""Wait random time between min and max seconds."""
|
|
33
|
+
time.sleep(min_sec + random.random() * (max_sec - min_sec))
|
|
34
|
+
|
|
35
|
+
def timeout(
|
|
36
|
+
self,
|
|
37
|
+
fn: Callable[[], T],
|
|
38
|
+
seconds: float = 60.0,
|
|
39
|
+
on_timeout: Callable[[], None] | None = None,
|
|
40
|
+
) -> tuple[T | None, bool]:
|
|
41
|
+
"""Run function with timeout. Returns (result, success)."""
|
|
42
|
+
result: list[T | None] = [None]
|
|
43
|
+
error: list[Exception | None] = [None]
|
|
44
|
+
done = threading.Event()
|
|
45
|
+
|
|
46
|
+
def run():
|
|
47
|
+
try:
|
|
48
|
+
result[0] = fn()
|
|
49
|
+
except Exception as e:
|
|
50
|
+
error[0] = e
|
|
51
|
+
finally:
|
|
52
|
+
done.set()
|
|
53
|
+
|
|
54
|
+
threading.Thread(target=run, daemon=True).start()
|
|
55
|
+
|
|
56
|
+
if done.wait(timeout=seconds):
|
|
57
|
+
if error[0]:
|
|
58
|
+
raise error[0]
|
|
59
|
+
return result[0], True
|
|
60
|
+
|
|
61
|
+
if on_timeout:
|
|
62
|
+
try:
|
|
63
|
+
on_timeout()
|
|
64
|
+
except Exception:
|
|
65
|
+
pass
|
|
66
|
+
return None, False
|
|
@@ -7,29 +7,30 @@ This module provides JavaScript code generators for common browser operations:
|
|
|
7
7
|
- Interaction: Hover, select, modals
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
from
|
|
10
|
+
from .core import (
|
|
11
11
|
parse_json_result,
|
|
12
12
|
build_async_js,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from .fetch import (
|
|
16
16
|
build_fetch_js,
|
|
17
17
|
build_fetch_all_js,
|
|
18
18
|
)
|
|
19
19
|
|
|
20
|
-
from
|
|
20
|
+
from .scroll import (
|
|
21
21
|
build_scroll_js,
|
|
22
22
|
build_scroll_to_bottom_js,
|
|
23
23
|
build_infinite_scroll_js,
|
|
24
24
|
build_get_scroll_info_js,
|
|
25
25
|
)
|
|
26
26
|
|
|
27
|
-
from
|
|
27
|
+
from .interaction import (
|
|
28
28
|
build_hover_js,
|
|
29
29
|
build_select_js,
|
|
30
30
|
build_close_modal_js,
|
|
31
31
|
build_click_all_by_text_js,
|
|
32
32
|
build_press_key_js,
|
|
33
|
+
build_click_js,
|
|
33
34
|
)
|
|
34
35
|
|
|
35
36
|
__all__ = [
|
|
@@ -50,4 +51,5 @@ __all__ = [
|
|
|
50
51
|
"build_close_modal_js",
|
|
51
52
|
"build_click_all_by_text_js",
|
|
52
53
|
"build_press_key_js",
|
|
54
|
+
"build_click_js",
|
|
53
55
|
]
|
|
@@ -178,3 +178,37 @@ def build_click_all_by_text_js(text: str, role: str = "button") -> str:
|
|
|
178
178
|
return JSON.stringify({{ clicked: clicked }});
|
|
179
179
|
}})()
|
|
180
180
|
"""
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def build_click_js(selector: str, scroll_into_view: bool = True) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Build JS to click element via JavaScript (more reliable than CDP click).
|
|
186
|
+
|
|
187
|
+
This is useful when native CDP click hangs or doesn't work properly.
|
|
188
|
+
Uses document.querySelector to find element and calls .click() directly.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
selector: CSS selector for the element to click
|
|
192
|
+
scroll_into_view: If True, scroll element into view before clicking (default: True)
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
JS code that returns { success: true/false, error?: string }
|
|
196
|
+
"""
|
|
197
|
+
scroll_code = 'el.scrollIntoView({block: "center", behavior: "instant"});' if scroll_into_view else ''
|
|
198
|
+
selector_escaped = json.dumps(selector)
|
|
199
|
+
|
|
200
|
+
return f"""
|
|
201
|
+
(function() {{
|
|
202
|
+
const el = document.querySelector({selector_escaped});
|
|
203
|
+
if (!el) {{
|
|
204
|
+
return JSON.stringify({{ success: false, error: 'Element not found' }});
|
|
205
|
+
}}
|
|
206
|
+
try {{
|
|
207
|
+
{scroll_code}
|
|
208
|
+
el.click();
|
|
209
|
+
return JSON.stringify({{ success: true }});
|
|
210
|
+
}} catch (e) {{
|
|
211
|
+
return JSON.stringify({{ success: false, error: e.message }});
|
|
212
|
+
}}
|
|
213
|
+
}})()
|
|
214
|
+
"""
|