cmdop 0.1.21__py3-none-any.whl → 0.1.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. cmdop/__init__.py +1 -1
  2. cmdop/client.py +2 -8
  3. cmdop/services/browser/__init__.py +44 -31
  4. cmdop/services/browser/capabilities/__init__.py +15 -0
  5. cmdop/services/browser/capabilities/_base.py +28 -0
  6. cmdop/services/browser/capabilities/_helpers.py +16 -0
  7. cmdop/services/browser/capabilities/dom.py +76 -0
  8. cmdop/services/browser/capabilities/fetch.py +46 -0
  9. cmdop/services/browser/capabilities/input.py +49 -0
  10. cmdop/services/browser/capabilities/scroll.py +147 -0
  11. cmdop/services/browser/capabilities/timing.py +66 -0
  12. cmdop/services/browser/js/__init__.py +6 -4
  13. cmdop/services/browser/js/interaction.py +34 -0
  14. cmdop/services/browser/service/__init__.py +5 -0
  15. cmdop/services/browser/service/aio.py +30 -0
  16. cmdop/services/browser/{sync/service.py → service/sync.py} +2 -2
  17. cmdop/services/browser/session.py +166 -0
  18. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/METADATA +69 -60
  19. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/RECORD +22 -18
  20. cmdop/services/browser/aio/__init__.py +0 -6
  21. cmdop/services/browser/aio/service.py +0 -420
  22. cmdop/services/browser/aio/session.py +0 -407
  23. cmdop/services/browser/base/__init__.py +0 -6
  24. cmdop/services/browser/base/session.py +0 -124
  25. cmdop/services/browser/sync/__init__.py +0 -6
  26. cmdop/services/browser/sync/session.py +0 -644
  27. /cmdop/services/browser/{base/service.py → service/_helpers.py} +0 -0
  28. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/WHEEL +0 -0
  29. {cmdop-0.1.21.dist-info → cmdop-0.1.22.dist-info}/licenses/LICENSE +0 -0
@@ -1,644 +0,0 @@
1
- """Synchronous browser session."""
2
-
3
- from __future__ import annotations
4
-
5
- import time
6
- import threading
7
- from typing import TYPE_CHECKING, Any, Callable, TypeVar
8
-
9
- T = TypeVar("T")
10
-
11
- from cmdop.logging import get_logger
12
- from cmdop.services.browser.base.session import BaseSession
13
-
14
- # Module-level logger for browser actions
15
- _log = get_logger("cmdop.browser")
16
- from cmdop.services.browser.models import (
17
- BrowserCookie,
18
- BrowserState,
19
- PageInfo,
20
- ScrollInfo,
21
- ScrollResult,
22
- InfiniteScrollResult,
23
- )
24
- from cmdop.services.browser.js import (
25
- parse_json_result,
26
- build_infinite_scroll_js,
27
- )
28
- from cmdop.services.browser.parsing import (
29
- parse_html as _parse_html,
30
- SoupWrapper,
31
- )
32
-
33
- if TYPE_CHECKING:
34
- from bs4 import BeautifulSoup
35
- from cmdop.services.browser.sync.service import BrowserService
36
-
37
-
38
- class BrowserSession(BaseSession):
39
- """
40
- Synchronous browser session with fluent API.
41
-
42
- Usage:
43
- with client.browser.create_session() as session:
44
- session.navigate("https://example.com")
45
- session.click("button.submit")
46
- data = session.fetch_all(urls) # credentials + accept header by default
47
- """
48
-
49
- _service: BrowserService
50
-
51
- def _call_service(self, method: str, *args: Any, **kwargs: Any) -> Any:
52
- """Call service method synchronously."""
53
- return getattr(self._service, method)(self._session_id, *args, **kwargs)
54
-
55
- # === Navigation & Interaction ===
56
-
57
- def navigate(self, url: str, timeout_ms: int = 30000) -> str:
58
- """Navigate to URL. Returns final URL."""
59
- _log.debug("[navigate] %s", url[:80])
60
- result = self._service.navigate(self._session_id, url, timeout_ms)
61
- _log.debug("[navigate] → %s", result[:80] if result else "")
62
- return result
63
-
64
- def click(
65
- self,
66
- selector: str,
67
- timeout_ms: int = 5000,
68
- move_cursor: bool = False,
69
- ) -> None:
70
- """
71
- Click element by CSS selector.
72
-
73
- Args:
74
- selector: CSS selector
75
- timeout_ms: Timeout in milliseconds
76
- move_cursor: If True, move cursor to element before clicking (human-like)
77
- """
78
- _log.debug("[click] %s%s", selector[:60], " (move_cursor)" if move_cursor else "")
79
- self._service.click(self._session_id, selector, timeout_ms, move_cursor)
80
-
81
- def type(
82
- self,
83
- selector: str,
84
- text: str,
85
- human_like: bool = False,
86
- clear_first: bool = True,
87
- ) -> None:
88
- """Type text into element."""
89
- _log.debug("[type] %s → '%s'", selector[:40], text[:30] if len(text) <= 30 else text[:27] + "...")
90
- self._service.type(self._session_id, selector, text, human_like, clear_first)
91
-
92
- def wait_for(self, selector: str, timeout_ms: int = 30000) -> bool:
93
- """Wait for element to appear."""
94
- return self._service.wait_for(self._session_id, selector, timeout_ms)
95
-
96
- # === Extraction ===
97
-
98
- def extract(
99
- self, selector: str, attr: str | None = None, limit: int = 100
100
- ) -> list[str]:
101
- """Extract text/attributes from elements."""
102
- return self._service.extract(self._session_id, selector, attr, limit)
103
-
104
- def extract_regex(
105
- self, pattern: str, from_html: bool = False, limit: int = 100
106
- ) -> list[str]:
107
- """Extract data using regex pattern."""
108
- return self._service.extract_regex(self._session_id, pattern, from_html, limit)
109
-
110
- def get_html(self, selector: str | None = None) -> str:
111
- """Get page HTML."""
112
- return self._service.get_html(self._session_id, selector)
113
-
114
- def get_text(self, selector: str | None = None) -> str:
115
- """Get page text content."""
116
- return self._service.get_text(self._session_id, selector)
117
-
118
- def parse_html(self, html: str | None = None, selector: str | None = None) -> "BeautifulSoup":
119
- """
120
- Parse HTML with BeautifulSoup.
121
-
122
- Args:
123
- html: HTML string to parse. If None, fetches from page.
124
- selector: CSS selector to get HTML from (if html not provided).
125
-
126
- Returns:
127
- BeautifulSoup object for easy parsing.
128
-
129
- Raises:
130
- ImportError: If beautifulsoup4 not installed.
131
-
132
- Example:
133
- soup = browser.parse_html() # Parse entire page
134
- soup = browser.parse_html(selector="[role='feed']") # Parse feed only
135
-
136
- for item in soup.select('.item'):
137
- title = item.select_one('h2').get_text(strip=True)
138
- """
139
- if html is None:
140
- html = self.get_html(selector)
141
- return _parse_html(html)
142
-
143
- def soup(self, selector: str | None = None) -> SoupWrapper:
144
- """
145
- Get page HTML as SoupWrapper with chainable API.
146
-
147
- Args:
148
- selector: CSS selector to limit scope (optional)
149
-
150
- Returns:
151
- SoupWrapper with convenience methods
152
-
153
- Example:
154
- # Get all links
155
- links = browser.soup().links("a.product")
156
-
157
- # Get text from elements
158
- titles = browser.soup("[role='feed']").texts("h2")
159
-
160
- # Chain selects
161
- for item in browser.soup().select(".item"):
162
- title = item.select_one("h2").text()
163
- url = item.attr("href")
164
- """
165
- html = self.get_html(selector)
166
- return SoupWrapper(html=html)
167
-
168
- # === JavaScript Execution ===
169
-
170
- def execute_script(self, script: str) -> str:
171
- """Execute JavaScript (raw, no wrapper)."""
172
- return self._service.execute_script(self._session_id, script)
173
-
174
- def execute_js(self, code: str, raw: bool = False) -> dict | list | str | None:
175
- """
176
- Execute async JavaScript with auto-wrap.
177
-
178
- Code is wrapped in async IIFE with try/catch and JSON serialization.
179
-
180
- Args:
181
- code: JS code to execute (can use await)
182
- raw: If True, return raw JSON string. Default False (parse to dict/list).
183
-
184
- Example:
185
- result = session.execute_js('''
186
- const resp = await fetch('/api/data');
187
- return await resp.json();
188
- ''')
189
- """
190
- js = self._build_execute_js(code)
191
- result = self.execute_script(js)
192
- return self._parse_execute_js(result, raw)
193
-
194
- def fetch_json(self, url: str) -> dict | list | None:
195
- """Fetch JSON from URL using JS fetch()."""
196
- js = self._build_fetch_json(url)
197
- result = self.execute_script(js)
198
- return parse_json_result(result)
199
-
200
- def fetch_all(
201
- self,
202
- urls: dict[str, str],
203
- headers: dict[str, str] | None = None,
204
- credentials: bool = False,
205
- ) -> dict[str, Any]:
206
- """
207
- Fetch multiple URLs in parallel.
208
-
209
- Args:
210
- urls: Dict of {id: url} to fetch
211
- headers: Optional headers (accept: application/json by default)
212
- credentials: Include credentials/cookies (default False, may break CORS)
213
-
214
- Returns:
215
- Dict of {id: {data: ..., error: ...}}
216
- """
217
- if not urls:
218
- return {}
219
- js = self._build_fetch_all(urls, headers, credentials)
220
- result = self.execute_js(js)
221
- return self._parse_fetch_all(result)
222
-
223
- # === State & Cookies ===
224
-
225
- def screenshot(self, full_page: bool = False) -> bytes:
226
- """Take screenshot."""
227
- return self._service.screenshot(self._session_id, full_page)
228
-
229
- def get_state(self) -> BrowserState:
230
- """Get current browser state."""
231
- return self._service.get_state(self._session_id)
232
-
233
- def set_cookies(self, cookies: list[BrowserCookie | dict]) -> None:
234
- """Set browser cookies."""
235
- self._service.set_cookies(self._session_id, cookies)
236
-
237
- def get_cookies(self, domain: str = "") -> list[BrowserCookie]:
238
- """Get browser cookies."""
239
- return self._service.get_cookies(self._session_id, domain)
240
-
241
- # === Parser helpers ===
242
-
243
- def validate_selectors(self, item: str, fields: dict[str, str]) -> dict:
244
- """Validate CSS selectors on page."""
245
- return self._service.validate_selectors(self._session_id, item, fields)
246
-
247
- def extract_data(self, item: str, fields_json: str, limit: int = 100) -> dict:
248
- """Extract structured data from page."""
249
- return self._service.extract_data(self._session_id, item, fields_json, limit)
250
-
251
- # === Mouse & Scroll (native, not JS) ===
252
-
253
- def mouse_move(self, x: int, y: int, steps: int = 10) -> None:
254
- """
255
- Move mouse to coordinates with human-like movement.
256
-
257
- Args:
258
- x: Target X coordinate
259
- y: Target Y coordinate
260
- steps: Number of intermediate steps (1 = instant, >1 = smooth)
261
-
262
- Example:
263
- browser.mouse_move(500, 300) # Smooth move
264
- browser.mouse_move(100, 100, steps=1) # Instant move
265
- """
266
- _log.debug("[mouse_move] x=%d, y=%d, steps=%d", x, y, steps)
267
- self._service.mouse_move(self._session_id, x, y, steps)
268
-
269
- def scroll(
270
- self,
271
- direction: str = "down",
272
- amount: int = 500,
273
- selector: str | None = None,
274
- smooth: bool = True,
275
- ) -> ScrollResult:
276
- """
277
- Scroll the page (native, not JS).
278
-
279
- Args:
280
- direction: "up", "down", "left", "right"
281
- amount: Pixels to scroll
282
- selector: If provided, scroll element into view instead
283
- smooth: Use smooth scroll animation (default True)
284
-
285
- Returns:
286
- ScrollResult with position info
287
-
288
- Example:
289
- # Fast scroll
290
- browser.scroll("down", 800)
291
-
292
- # Scroll element into view
293
- browser.scroll(selector=".target-element")
294
- """
295
- if selector:
296
- _log.debug("[scroll_to] %s", selector[:60])
297
- else:
298
- _log.debug("[scroll] %s %dpx", direction, amount)
299
-
300
- data = self._service.scroll(self._session_id, direction, amount, selector, smooth)
301
- scroll_result = ScrollResult(
302
- success=True,
303
- scroll_y=data.get("scroll_y", 0),
304
- scrolled_by=data.get("scrolled_by", 0),
305
- at_bottom=data.get("at_bottom", False),
306
- )
307
- _log.debug("[scroll] → y=%d, by=%d, bottom=%s", scroll_result.scroll_y, scroll_result.scrolled_by, scroll_result.at_bottom)
308
- return scroll_result
309
-
310
- # def scroll_js(
311
- # self,
312
- # direction: str = "down",
313
- # amount: int = 500,
314
- # selector: str | None = None,
315
- # smooth: bool = True,
316
- # human_like: bool = False,
317
- # container: str | None = None,
318
- # ) -> ScrollResult:
319
- # """Scroll the page (JS fallback)."""
320
- # js = self._build_scroll(direction, amount, selector, smooth, human_like, container)
321
- # result = self.execute_script(js)
322
- # data = parse_json_result(result) or {}
323
- # return ScrollResult(
324
- # success=data.get("success", False),
325
- # scroll_y=int(data.get("scrollY", 0)),
326
- # scrolled_by=int(data.get("scrolledBy", 0)),
327
- # at_bottom=data.get("atBottom", False),
328
- # error=data.get("error"),
329
- # )
330
-
331
- def scroll_to(self, selector: str) -> ScrollResult:
332
- """Scroll element into view."""
333
- _log.debug("[scroll_to] %s", selector[:60])
334
- return self.scroll(selector=selector)
335
-
336
- def scroll_to_bottom(self) -> ScrollResult:
337
- """Scroll to page bottom."""
338
- js = self._build_scroll_to_bottom()
339
- result = self.execute_script(js)
340
- data = parse_json_result(result) or {}
341
- return ScrollResult(
342
- success=data.get("success", False),
343
- scroll_y=int(data.get("scrollY", 0)),
344
- scrolled_by=int(data.get("scrolledBy", 0)),
345
- at_bottom=True,
346
- )
347
-
348
- def get_scroll_info(self) -> ScrollInfo:
349
- """Get current scroll position and page dimensions (JS-based)."""
350
- js = self._build_get_scroll_info()
351
- result = self.execute_script(js)
352
- data = parse_json_result(result) or {}
353
- return ScrollInfo(
354
- scroll_x=int(data.get("scrollX", 0)),
355
- scroll_y=int(data.get("scrollY", 0)),
356
- page_height=int(data.get("pageHeight", 0)),
357
- page_width=int(data.get("pageWidth", 0)),
358
- viewport_height=int(data.get("viewportHeight", 0)),
359
- viewport_width=int(data.get("viewportWidth", 0)),
360
- at_bottom=data.get("atBottom", False),
361
- at_top=data.get("atTop", True),
362
- )
363
-
364
- def get_page_info(self) -> PageInfo:
365
- """Get comprehensive page info (native)."""
366
- return self._service.get_page_info(self._session_id)
367
-
368
- def scroll_and_collect(
369
- self,
370
- seen_keys: set[str],
371
- key_selector: str = "a[href]",
372
- key_attr: str = "href",
373
- container_selector: str = "body",
374
- ) -> InfiniteScrollResult:
375
- """
376
- Extract new keys from page, for infinite scroll patterns.
377
-
378
- Args:
379
- seen_keys: Set of already seen keys (will be updated)
380
- key_selector: CSS selector for elements with keys
381
- key_attr: Attribute to use as key
382
- container_selector: Container to search in
383
-
384
- Returns:
385
- InfiniteScrollResult with new keys found
386
- """
387
- js = build_infinite_scroll_js(
388
- list(seen_keys), key_selector, key_attr, container_selector
389
- )
390
- result = self.execute_script(js)
391
- data = parse_json_result(result) or {}
392
-
393
- new_keys = data.get("new_keys", [])
394
- seen_keys.update(new_keys)
395
-
396
- return InfiniteScrollResult(
397
- new_keys=new_keys,
398
- at_bottom=data.get("at_bottom", False),
399
- total_seen=data.get("total_seen", len(seen_keys)),
400
- error=data.get("error"),
401
- )
402
-
403
- def infinite_scroll(
404
- self,
405
- extract_fn: Callable[[], list[Any]],
406
- limit: int = 100,
407
- max_scrolls: int = 50,
408
- max_no_new: int = 3,
409
- scroll_amount: int = 800,
410
- delay: float = 1.0,
411
- ) -> list[Any]:
412
- """
413
- Smart infinite scroll with extraction.
414
-
415
- Args:
416
- extract_fn: Function that extracts and returns new items (deduplication is caller's responsibility)
417
- limit: Stop after collecting this many items
418
- max_scrolls: Maximum scroll attempts
419
- max_no_new: Stop after this many scrolls with no new items
420
- scroll_amount: Pixels to scroll each time
421
- delay: Seconds to wait between scrolls
422
-
423
- Returns:
424
- List of all extracted items
425
- """
426
- all_items: list[Any] = []
427
- no_new_count = 0
428
-
429
- for _ in range(max_scrolls):
430
- new_items = extract_fn()
431
-
432
- if new_items:
433
- all_items.extend(new_items)
434
- no_new_count = 0
435
- if len(all_items) >= limit:
436
- break
437
- else:
438
- no_new_count += 1
439
- if no_new_count >= max_no_new:
440
- break
441
-
442
- self.scroll("down", scroll_amount)
443
- time.sleep(delay)
444
-
445
- return all_items[:limit]
446
-
447
- # === UI Interaction Helpers ===
448
-
449
- def hover(self, selector: str, timeout_ms: int = 5000) -> None:
450
- """Hover over element (native, not JS)."""
451
- _log.debug("[hover] %s", selector[:60])
452
- self._service.hover(self._session_id, selector, timeout_ms)
453
-
454
- # def hover_js(self, selector: str) -> bool:
455
- # """Hover over element (JS fallback)."""
456
- # js = self._build_hover(selector)
457
- # result = self.execute_script(js)
458
- # data = parse_json_result(result) or {}
459
- # return data.get("success", False)
460
-
461
- def select(
462
- self,
463
- selector: str,
464
- value: str | None = None,
465
- text: str | None = None,
466
- ) -> dict:
467
- """
468
- Select option from dropdown.
469
-
470
- Args:
471
- selector: CSS selector for <select> element
472
- value: Option value to select
473
- text: Option text to select (if value not provided)
474
-
475
- Returns:
476
- Dict with selected_value and selected_text
477
- """
478
- js = self._build_select(selector, value, text)
479
- result = self.execute_script(js)
480
- return parse_json_result(result) or {}
481
-
482
- def close_modal(self, selectors: list[str] | None = None) -> bool:
483
- """
484
- Try to close modal/dialog.
485
-
486
- Args:
487
- selectors: Custom close button selectors to try
488
-
489
- Returns:
490
- True if modal was closed
491
- """
492
- js = self._build_close_modal(selectors)
493
- result = self.execute_script(js)
494
- data = parse_json_result(result) or {}
495
- return data.get("success", False)
496
-
497
- def wait(self, ms: int, jitter: float = 0.1) -> None:
498
- """
499
- Wait for specified milliseconds with optional jitter.
500
-
501
- Args:
502
- ms: Wait time in milliseconds
503
- jitter: Random variation ±jitter (default 10%, so 1000ms becomes 900-1100ms)
504
- """
505
- import random
506
- actual = (ms / 1000) * (1 + random.uniform(-jitter, jitter))
507
- time.sleep(actual)
508
-
509
- def wait_seconds(self, seconds: float, jitter: float = 0.1) -> None:
510
- """
511
- Wait for specified seconds with optional jitter.
512
-
513
- Args:
514
- seconds: Base wait time in seconds
515
- jitter: Random variation ±jitter (default 10%, so 1.0s becomes 0.9-1.1s)
516
- """
517
- self.wait(int(seconds * 1000), jitter)
518
-
519
- def wait_random(self, min_sec: float = 0.5, max_sec: float = 1.5) -> None:
520
- """Wait for random time between min and max seconds."""
521
- import random
522
- time.sleep(min_sec + random.random() * (max_sec - min_sec))
523
-
524
- def with_timeout(
525
- self,
526
- fn: Callable[[], T],
527
- timeout_sec: float = 60.0,
528
- on_timeout: Callable[[], None] | None = None,
529
- ) -> tuple[T | None, bool]:
530
- """
531
- Run a function with a timeout. Skips if it hangs.
532
-
533
- Args:
534
- fn: Function to run (no arguments, use lambda/closure for args)
535
- timeout_sec: Timeout in seconds (default 60)
536
- on_timeout: Optional cleanup function to call on timeout
537
-
538
- Returns:
539
- Tuple of (result, success). If timeout, returns (None, False).
540
-
541
- Example:
542
- # Simple usage
543
- result, ok = browser.with_timeout(
544
- lambda: process_listing(browser, listing),
545
- timeout_sec=30,
546
- )
547
- if not ok:
548
- print("Skipped due to timeout")
549
- continue
550
-
551
- # With cleanup
552
- result, ok = browser.with_timeout(
553
- lambda: enrich_listing(browser, item),
554
- timeout_sec=60,
555
- on_timeout=lambda: browser.press_key('Escape'),
556
- )
557
- """
558
- result_container: list[T | None] = [None]
559
- exception_container: list[Exception | None] = [None]
560
- completed = threading.Event()
561
-
562
- def target() -> None:
563
- try:
564
- result_container[0] = fn()
565
- except Exception as e:
566
- exception_container[0] = e
567
- finally:
568
- completed.set()
569
-
570
- thread = threading.Thread(target=target, daemon=True)
571
- thread.start()
572
-
573
- if completed.wait(timeout=timeout_sec):
574
- # Completed in time
575
- if exception_container[0]:
576
- raise exception_container[0]
577
- return result_container[0], True
578
- else:
579
- # Timeout - run cleanup if provided
580
- _log.warning("[timeout] Function timed out after %.1fs", timeout_sec)
581
- if on_timeout:
582
- try:
583
- on_timeout()
584
- except Exception as e:
585
- _log.debug("[timeout] Cleanup failed: %s", e)
586
- return None, False
587
-
588
- def click_all_by_text(self, text: str, role: str = "button") -> int:
589
- """
590
- Click all elements containing specific text.
591
-
592
- Args:
593
- text: Text to match (case-insensitive)
594
- role: Element role to filter (default: "button")
595
-
596
- Returns:
597
- Number of elements clicked
598
-
599
- Example:
600
- browser.click_all_by_text("See more") # Expand all posts
601
- browser.click_all_by_text("Load more", role="link")
602
- """
603
- _log.debug("[click_all_by_text] '%s' role=%s", text, role)
604
- js = self._build_click_all_by_text(text, role)
605
- result = self.execute_script(js)
606
- data = parse_json_result(result) or {}
607
- clicked = data.get("clicked", 0)
608
- _log.debug("[click_all_by_text] → clicked %d", clicked)
609
- return clicked
610
-
611
- def press_key(self, key: str, selector: str | None = None) -> bool:
612
- """
613
- Press a keyboard key.
614
-
615
- Args:
616
- key: Key to press (e.g., 'Escape', 'Enter', 'Tab', 'ArrowDown')
617
- selector: Optional CSS selector to target. If None, targets activeElement.
618
-
619
- Returns:
620
- True if key was pressed successfully
621
-
622
- Example:
623
- browser.press_key('Escape') # Close modal
624
- browser.press_key('Enter', 'input.search') # Submit search
625
- browser.press_key('Tab') # Move focus
626
- """
627
- target = selector[:40] if selector else "activeElement"
628
- _log.debug("[press_key] %s → %s", key, target)
629
- js = self._build_press_key(key, selector)
630
- result = self.execute_script(js)
631
- data = parse_json_result(result) or {}
632
- return data.get("success", False)
633
-
634
- # === Context Manager ===
635
-
636
- def close(self) -> None:
637
- """Close browser session."""
638
- self._service.close_session(self._session_id)
639
-
640
- def __enter__(self) -> "BrowserSession":
641
- return self
642
-
643
- def __exit__(self, *args: Any) -> None:
644
- self.close()
File without changes