cmdop 0.1.21__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. cmdop/__init__.py +1 -1
  2. cmdop/client.py +2 -8
  3. cmdop/services/browser/__init__.py +44 -31
  4. cmdop/services/browser/capabilities/__init__.py +15 -0
  5. cmdop/services/browser/capabilities/_base.py +28 -0
  6. cmdop/services/browser/capabilities/_helpers.py +16 -0
  7. cmdop/services/browser/capabilities/dom.py +76 -0
  8. cmdop/services/browser/capabilities/fetch.py +46 -0
  9. cmdop/services/browser/capabilities/input.py +49 -0
  10. cmdop/services/browser/capabilities/scroll.py +147 -0
  11. cmdop/services/browser/capabilities/timing.py +66 -0
  12. cmdop/services/browser/js/__init__.py +6 -4
  13. cmdop/services/browser/js/interaction.py +34 -0
  14. cmdop/services/browser/service/__init__.py +5 -0
  15. cmdop/services/browser/service/aio.py +30 -0
  16. cmdop/services/browser/{sync/service.py → service/sync.py} +2 -2
  17. cmdop/services/browser/session.py +166 -0
  18. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/METADATA +69 -60
  19. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/RECORD +22 -18
  20. cmdop/services/browser/aio/__init__.py +0 -6
  21. cmdop/services/browser/aio/service.py +0 -420
  22. cmdop/services/browser/aio/session.py +0 -407
  23. cmdop/services/browser/base/__init__.py +0 -6
  24. cmdop/services/browser/base/session.py +0 -124
  25. cmdop/services/browser/sync/__init__.py +0 -6
  26. cmdop/services/browser/sync/session.py +0 -644
  27. /cmdop/services/browser/{base/service.py → service/_helpers.py} +0 -0
  28. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/WHEEL +0 -0
  29. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -1,407 +0,0 @@
1
- """Asynchronous browser session."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import TYPE_CHECKING, Any, Awaitable, Callable, TypeVar
6
-
7
- import asyncio
8
-
9
- from cmdop.logging import get_logger
10
-
11
- _log = get_logger("cmdop.browser")
12
-
13
- T = TypeVar("T")
14
-
15
- from cmdop.services.browser.base.session import BaseSession
16
- from cmdop.services.browser.models import (
17
- BrowserCookie,
18
- BrowserState,
19
- PageInfo,
20
- ScrollInfo,
21
- ScrollResult,
22
- )
23
- from cmdop.services.browser.js import parse_json_result
24
- from cmdop.services.browser.parsing import parse_html as _parse_html, SoupWrapper
25
-
26
- if TYPE_CHECKING:
27
- from cmdop.services.browser.aio.service import AsyncBrowserService
28
-
29
-
30
- class AsyncBrowserSession(BaseSession):
31
- """
32
- Asynchronous browser session with fluent API.
33
-
34
- Usage:
35
- async with client.browser.create_session() as session:
36
- await session.navigate("https://example.com")
37
- await session.click("button.submit")
38
- data = await session.fetch_all(urls) # credentials + accept header by default
39
- """
40
-
41
- _service: AsyncBrowserService
42
-
43
- def _call_service(self, method: str, *args: Any, **kwargs: Any) -> Any:
44
- """Not used in async - methods call service directly with await."""
45
- raise NotImplementedError("Use async methods directly")
46
-
47
- # === Navigation & Interaction ===
48
-
49
- async def navigate(self, url: str, timeout_ms: int = 30000) -> str:
50
- """Navigate to URL. Returns final URL."""
51
- return await self._service.navigate(self._session_id, url, timeout_ms)
52
-
53
- async def click(
54
- self,
55
- selector: str,
56
- timeout_ms: int = 5000,
57
- move_cursor: bool = False,
58
- ) -> None:
59
- """
60
- Click element by CSS selector.
61
-
62
- Args:
63
- selector: CSS selector
64
- timeout_ms: Timeout in milliseconds
65
- move_cursor: If True, move cursor to element before clicking (human-like)
66
- """
67
- await self._service.click(self._session_id, selector, timeout_ms, move_cursor)
68
-
69
- async def type(
70
- self,
71
- selector: str,
72
- text: str,
73
- human_like: bool = False,
74
- clear_first: bool = True,
75
- ) -> None:
76
- """Type text into element."""
77
- await self._service.type(
78
- self._session_id, selector, text, human_like, clear_first
79
- )
80
-
81
- async def wait_for(self, selector: str, timeout_ms: int = 30000) -> bool:
82
- """Wait for element to appear."""
83
- return await self._service.wait_for(self._session_id, selector, timeout_ms)
84
-
85
- # === Extraction ===
86
-
87
- async def extract(
88
- self, selector: str, attr: str | None = None, limit: int = 100
89
- ) -> list[str]:
90
- """Extract text/attributes from elements."""
91
- return await self._service.extract(self._session_id, selector, attr, limit)
92
-
93
- async def extract_regex(
94
- self, pattern: str, from_html: bool = False, limit: int = 100
95
- ) -> list[str]:
96
- """Extract data using regex pattern."""
97
- return await self._service.extract_regex(
98
- self._session_id, pattern, from_html, limit
99
- )
100
-
101
- async def get_html(self, selector: str | None = None) -> str:
102
- """Get page HTML."""
103
- return await self._service.get_html(self._session_id, selector)
104
-
105
- async def get_text(self, selector: str | None = None) -> str:
106
- """Get page text content."""
107
- return await self._service.get_text(self._session_id, selector)
108
-
109
- # === JavaScript Execution ===
110
-
111
- async def execute_script(self, script: str) -> str:
112
- """Execute JavaScript (raw, no wrapper)."""
113
- return await self._service.execute_script(self._session_id, script)
114
-
115
- async def execute_js(self, code: str, raw: bool = False) -> dict | list | str | None:
116
- """
117
- Execute async JavaScript with auto-wrap.
118
-
119
- Code is wrapped in async IIFE with try/catch and JSON serialization.
120
-
121
- Args:
122
- code: JS code to execute (can use await)
123
- raw: If True, return raw JSON string. Default False (parse to dict/list).
124
- """
125
- js = self._build_execute_js(code)
126
- result = await self.execute_script(js)
127
- return self._parse_execute_js(result, raw)
128
-
129
- async def fetch_json(self, url: str) -> dict | list | None:
130
- """Fetch JSON from URL using JS fetch()."""
131
- js = self._build_fetch_json(url)
132
- result = await self.execute_script(js)
133
- return parse_json_result(result)
134
-
135
- async def fetch_all(
136
- self,
137
- urls: dict[str, str],
138
- headers: dict[str, str] | None = None,
139
- credentials: bool = False,
140
- ) -> dict[str, Any]:
141
- """
142
- Fetch multiple URLs in parallel.
143
-
144
- Args:
145
- urls: Dict of {id: url} to fetch
146
- headers: Optional headers (accept: application/json by default)
147
- credentials: Include credentials/cookies (default False, may break CORS)
148
-
149
- Returns:
150
- Dict of {id: {data: ..., error: ...}}
151
- """
152
- if not urls:
153
- return {}
154
- js = self._build_fetch_all(urls, headers, credentials)
155
- result = await self.execute_js(js)
156
- return self._parse_fetch_all(result)
157
-
158
- # === State & Cookies ===
159
-
160
- async def screenshot(self, full_page: bool = False) -> bytes:
161
- """Take screenshot."""
162
- return await self._service.screenshot(self._session_id, full_page)
163
-
164
- async def get_state(self) -> BrowserState:
165
- """Get current browser state."""
166
- return await self._service.get_state(self._session_id)
167
-
168
- async def set_cookies(self, cookies: list[BrowserCookie | dict]) -> None:
169
- """Set browser cookies."""
170
- await self._service.set_cookies(self._session_id, cookies)
171
-
172
- async def get_cookies(self, domain: str = "") -> list[BrowserCookie]:
173
- """Get browser cookies."""
174
- return await self._service.get_cookies(self._session_id, domain)
175
-
176
- # === Parser helpers ===
177
-
178
- async def validate_selectors(self, item: str, fields: dict[str, str]) -> dict:
179
- """Validate CSS selectors on page."""
180
- return await self._service.validate_selectors(self._session_id, item, fields)
181
-
182
- async def extract_data(self, item: str, fields_json: str, limit: int = 100) -> dict:
183
- """Extract structured data from page."""
184
- return await self._service.extract_data(
185
- self._session_id, item, fields_json, limit
186
- )
187
-
188
- # === HTML Parsing ===
189
-
190
- async def parse_html(self, html: str | None = None, selector: str | None = None) -> "BeautifulSoup":
191
- """Parse HTML with BeautifulSoup."""
192
- if html is None:
193
- html = await self.get_html(selector)
194
- return _parse_html(html)
195
-
196
- async def soup(self, selector: str | None = None) -> SoupWrapper:
197
- """Get page HTML as SoupWrapper."""
198
- html = await self.get_html(selector)
199
- return SoupWrapper(html=html)
200
-
201
- # === Mouse & Scroll (native, not JS) ===
202
-
203
- async def mouse_move(self, x: int, y: int, steps: int = 10) -> None:
204
- """
205
- Move mouse to coordinates with human-like movement.
206
-
207
- Args:
208
- x: Target X coordinate
209
- y: Target Y coordinate
210
- steps: Number of intermediate steps (1 = instant, >1 = smooth)
211
-
212
- Example:
213
- await browser.mouse_move(500, 300) # Smooth move
214
- await browser.mouse_move(100, 100, steps=1) # Instant move
215
- """
216
- await self._service.mouse_move(self._session_id, x, y, steps)
217
-
218
- async def scroll(
219
- self,
220
- direction: str = "down",
221
- amount: int = 500,
222
- selector: str | None = None,
223
- smooth: bool = True,
224
- ) -> ScrollResult:
225
- """
226
- Scroll the page (native, not JS).
227
-
228
- Args:
229
- direction: "up", "down", "left", "right"
230
- amount: Pixels to scroll
231
- selector: If provided, scroll element into view instead
232
- smooth: Use smooth scroll animation (default True)
233
-
234
- Returns:
235
- ScrollResult with position info
236
-
237
- Example:
238
- # Fast scroll
239
- await browser.scroll("down", 800)
240
-
241
- # Scroll element into view
242
- await browser.scroll(selector=".target-element")
243
- """
244
- result = await self._service.scroll(self._session_id, direction, amount, selector or "", smooth)
245
- return result
246
-
247
- async def scroll_to(self, selector: str) -> ScrollResult:
248
- """Scroll element into view."""
249
- return await self.scroll(selector=selector)
250
-
251
- async def scroll_to_bottom(self) -> ScrollResult:
252
- """Scroll to page bottom."""
253
- js = self._build_scroll_to_bottom()
254
- result = await self.execute_script(js)
255
- data = parse_json_result(result) or {}
256
- return ScrollResult(
257
- success=data.get("success", False),
258
- scroll_y=int(data.get("scrollY", 0)),
259
- scrolled_by=int(data.get("scrolledBy", 0)),
260
- at_bottom=True,
261
- )
262
-
263
- async def get_scroll_info(self) -> ScrollInfo:
264
- """Get current scroll position and page dimensions (JS-based)."""
265
- js = self._build_get_scroll_info()
266
- result = await self.execute_script(js)
267
- data = parse_json_result(result) or {}
268
- return ScrollInfo(
269
- scroll_x=int(data.get("scrollX", 0)),
270
- scroll_y=int(data.get("scrollY", 0)),
271
- page_height=int(data.get("pageHeight", 0)),
272
- page_width=int(data.get("pageWidth", 0)),
273
- viewport_height=int(data.get("viewportHeight", 0)),
274
- viewport_width=int(data.get("viewportWidth", 0)),
275
- at_bottom=data.get("atBottom", False),
276
- at_top=data.get("atTop", True),
277
- )
278
-
279
- async def get_page_info(self) -> PageInfo:
280
- """Get comprehensive page info (native)."""
281
- return await self._service.get_page_info(self._session_id)
282
-
283
- # === UI Interaction Helpers ===
284
-
285
- async def hover(self, selector: str, timeout_ms: int = 5000) -> None:
286
- """Hover over element (native)."""
287
- await self._service.hover(self._session_id, selector, timeout_ms)
288
-
289
- async def select(self, selector: str, value: str | None = None, text: str | None = None) -> dict:
290
- """Select option from dropdown."""
291
- js = self._build_select(selector, value, text)
292
- result = await self.execute_script(js)
293
- return parse_json_result(result) or {}
294
-
295
- async def close_modal(self, selectors: list[str] | None = None) -> bool:
296
- """Try to close modal/dialog."""
297
- js = self._build_close_modal(selectors)
298
- result = await self.execute_script(js)
299
- data = parse_json_result(result) or {}
300
- return data.get("success", False)
301
-
302
- async def wait(self, ms: int, jitter: float = 0.1) -> None:
303
- """
304
- Wait for specified milliseconds with optional jitter.
305
-
306
- Args:
307
- ms: Wait time in milliseconds
308
- jitter: Random variation ±jitter (default 10%, so 1000ms becomes 900-1100ms)
309
- """
310
- import random
311
- actual = (ms / 1000) * (1 + random.uniform(-jitter, jitter))
312
- await asyncio.sleep(actual)
313
-
314
- async def wait_seconds(self, seconds: float, jitter: float = 0.1) -> None:
315
- """
316
- Wait for specified seconds with optional jitter.
317
-
318
- Args:
319
- seconds: Base wait time in seconds
320
- jitter: Random variation ±jitter (default 10%, so 1.0s becomes 0.9-1.1s)
321
- """
322
- await self.wait(int(seconds * 1000), jitter)
323
-
324
- async def wait_random(self, min_sec: float = 0.5, max_sec: float = 1.5) -> None:
325
- """Wait for random time between min and max seconds."""
326
- import random
327
- await asyncio.sleep(min_sec + random.random() * (max_sec - min_sec))
328
-
329
- async def with_timeout(
330
- self,
331
- coro: Awaitable[T],
332
- timeout_sec: float = 60.0,
333
- on_timeout: Callable[[], Awaitable[None]] | None = None,
334
- ) -> tuple[T | None, bool]:
335
- """
336
- Run a coroutine with a timeout. Skips if it hangs.
337
-
338
- Args:
339
- coro: Coroutine to run
340
- timeout_sec: Timeout in seconds (default 60)
341
- on_timeout: Optional async cleanup function to call on timeout
342
-
343
- Returns:
344
- Tuple of (result, success). If timeout, returns (None, False).
345
-
346
- Example:
347
- # Simple usage
348
- result, ok = await browser.with_timeout(
349
- process_listing(browser, listing),
350
- timeout_sec=30,
351
- )
352
- if not ok:
353
- print("Skipped due to timeout")
354
-
355
- # With cleanup
356
- result, ok = await browser.with_timeout(
357
- enrich_listing(browser, item),
358
- timeout_sec=60,
359
- on_timeout=lambda: browser.press_key('Escape'),
360
- )
361
- """
362
- try:
363
- result = await asyncio.wait_for(coro, timeout=timeout_sec)
364
- return result, True
365
- except asyncio.TimeoutError:
366
- _log.warning("[timeout] Coroutine timed out after %.1fs", timeout_sec)
367
- if on_timeout:
368
- try:
369
- await on_timeout()
370
- except Exception as e:
371
- _log.debug("[timeout] Cleanup failed: %s", e)
372
- return None, False
373
-
374
- async def click_all_by_text(self, text: str, role: str = "button") -> int:
375
- """Click all elements containing specific text."""
376
- js = self._build_click_all_by_text(text, role)
377
- result = await self.execute_script(js)
378
- data = parse_json_result(result) or {}
379
- return data.get("clicked", 0)
380
-
381
- async def press_key(self, key: str, selector: str | None = None) -> bool:
382
- """
383
- Press a keyboard key.
384
-
385
- Args:
386
- key: Key to press (e.g., 'Escape', 'Enter', 'Tab', 'ArrowDown')
387
- selector: Optional CSS selector to target. If None, targets activeElement.
388
-
389
- Returns:
390
- True if key was pressed successfully
391
- """
392
- js = self._build_press_key(key, selector)
393
- result = await self.execute_script(js)
394
- data = parse_json_result(result) or {}
395
- return data.get("success", False)
396
-
397
- # === Context Manager ===
398
-
399
- async def close(self) -> None:
400
- """Close browser session."""
401
- await self._service.close_session(self._session_id)
402
-
403
- async def __aenter__(self) -> "AsyncBrowserSession":
404
- return self
405
-
406
- async def __aexit__(self, *args: Any) -> None:
407
- await self.close()
@@ -1,6 +0,0 @@
1
- """Base classes for browser sessions and services."""
2
-
3
- from cmdop.services.browser.base.session import BaseSession
4
- from cmdop.services.browser.base.service import BaseServiceMixin
5
-
6
- __all__ = ["BaseSession", "BaseServiceMixin"]
@@ -1,124 +0,0 @@
1
- """Base session class with shared logic."""
2
-
3
- from __future__ import annotations
4
-
5
- from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING, Any
7
-
8
- from cmdop.services.browser.models import BrowserCookie, BrowserState
9
- from cmdop.services.browser.js import (
10
- build_async_js,
11
- build_fetch_js,
12
- build_fetch_all_js,
13
- build_scroll_js,
14
- build_scroll_to_bottom_js,
15
- build_get_scroll_info_js,
16
- build_hover_js,
17
- build_select_js,
18
- build_close_modal_js,
19
- build_click_all_by_text_js,
20
- build_press_key_js,
21
- parse_json_result,
22
- )
23
-
24
- if TYPE_CHECKING:
25
- from cmdop.services.browser.base.service import BaseServiceMixin
26
-
27
-
28
- class BaseSession(ABC):
29
- """
30
- Abstract base for browser sessions.
31
-
32
- Subclasses must implement:
33
- - _execute_script(script) -> str
34
- - All service delegation methods (navigate, click, etc.)
35
- - Context manager methods (__enter__/__exit__ or __aenter__/__aexit__)
36
- """
37
-
38
- def __init__(self, service: BaseServiceMixin, session_id: str) -> None:
39
- self._service = service
40
- self._session_id = session_id
41
-
42
- @property
43
- def session_id(self) -> str:
44
- return self._session_id
45
-
46
- # === Abstract methods (sync/async differ) ===
47
-
48
- @abstractmethod
49
- def _call_service(self, method: str, *args: Any, **kwargs: Any) -> Any:
50
- """Call service method. Sync returns value, async returns coroutine."""
51
- ...
52
-
53
- # === Shared helpers (pure logic, no I/O) ===
54
-
55
- def _build_execute_js(self, code: str) -> str:
56
- """Build wrapped async JS code."""
57
- return build_async_js(code)
58
-
59
- def _parse_execute_js(self, result: str, raw: bool) -> dict | list | str | None:
60
- """Parse execute_js result."""
61
- if raw:
62
- return result
63
- return parse_json_result(result)
64
-
65
- def _build_fetch_json(self, url: str) -> str:
66
- """Build fetch JS for single URL."""
67
- return build_fetch_js(url)
68
-
69
- def _build_fetch_all(
70
- self,
71
- urls: dict[str, str],
72
- headers: dict[str, str] | None,
73
- credentials: bool,
74
- ) -> str:
75
- """Build fetch_all JS code."""
76
- return build_fetch_all_js(urls, headers, credentials)
77
-
78
- def _parse_fetch_all(self, result: Any) -> dict[str, Any]:
79
- """Ensure fetch_all returns dict."""
80
- return result if isinstance(result, dict) else {}
81
-
82
- # === Scroll helpers ===
83
-
84
- def _build_scroll(
85
- self,
86
- direction: str,
87
- amount: int,
88
- selector: str | None,
89
- smooth: bool,
90
- human_like: bool,
91
- container: str | None = None,
92
- ) -> str:
93
- """Build scroll JS."""
94
- return build_scroll_js(direction, amount, selector, smooth, human_like, container)
95
-
96
- def _build_scroll_to_bottom(self) -> str:
97
- """Build scroll to bottom JS."""
98
- return build_scroll_to_bottom_js()
99
-
100
- def _build_get_scroll_info(self) -> str:
101
- """Build get scroll info JS."""
102
- return build_get_scroll_info_js()
103
-
104
- # === Interaction helpers ===
105
-
106
- def _build_hover(self, selector: str) -> str:
107
- """Build hover JS."""
108
- return build_hover_js(selector)
109
-
110
- def _build_select(self, selector: str, value: str | None, text: str | None) -> str:
111
- """Build select JS."""
112
- return build_select_js(selector, value, text)
113
-
114
- def _build_close_modal(self, selectors: list[str] | None) -> str:
115
- """Build close modal JS."""
116
- return build_close_modal_js(selectors)
117
-
118
- def _build_click_all_by_text(self, text: str, role: str) -> str:
119
- """Build click all by text JS."""
120
- return build_click_all_by_text_js(text, role)
121
-
122
- def _build_press_key(self, key: str, selector: str | None) -> str:
123
- """Build press key JS."""
124
- return build_press_key_js(key, selector)
@@ -1,6 +0,0 @@
1
- """Synchronous browser session and service."""
2
-
3
- from cmdop.services.browser.sync.session import BrowserSession
4
- from cmdop.services.browser.sync.service import BrowserService
5
-
6
- __all__ = ["BrowserSession", "BrowserService"]