iflow-mcp_janspoerer-mcp_browser_use 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/METADATA +26 -0
  2. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/RECORD +50 -0
  3. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/WHEEL +5 -0
  4. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/entry_points.txt +2 -0
  5. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/licenses/LICENSE +201 -0
  6. iflow_mcp_janspoerer_mcp_browser_use-0.1.0.dist-info/top_level.txt +1 -0
  7. mcp_browser_use/__init__.py +2 -0
  8. mcp_browser_use/__main__.py +1347 -0
  9. mcp_browser_use/actions/__init__.py +1 -0
  10. mcp_browser_use/actions/elements.py +173 -0
  11. mcp_browser_use/actions/extraction.py +864 -0
  12. mcp_browser_use/actions/keyboard.py +43 -0
  13. mcp_browser_use/actions/navigation.py +73 -0
  14. mcp_browser_use/actions/screenshots.py +85 -0
  15. mcp_browser_use/browser/__init__.py +1 -0
  16. mcp_browser_use/browser/chrome.py +150 -0
  17. mcp_browser_use/browser/chrome_executable.py +204 -0
  18. mcp_browser_use/browser/chrome_launcher.py +330 -0
  19. mcp_browser_use/browser/chrome_process.py +104 -0
  20. mcp_browser_use/browser/devtools.py +230 -0
  21. mcp_browser_use/browser/driver.py +322 -0
  22. mcp_browser_use/browser/process.py +133 -0
  23. mcp_browser_use/cleaners.py +530 -0
  24. mcp_browser_use/config/__init__.py +30 -0
  25. mcp_browser_use/config/environment.py +155 -0
  26. mcp_browser_use/config/paths.py +97 -0
  27. mcp_browser_use/constants.py +68 -0
  28. mcp_browser_use/context.py +150 -0
  29. mcp_browser_use/context_pack.py +85 -0
  30. mcp_browser_use/decorators/__init__.py +13 -0
  31. mcp_browser_use/decorators/ensure.py +84 -0
  32. mcp_browser_use/decorators/envelope.py +83 -0
  33. mcp_browser_use/decorators/locking.py +172 -0
  34. mcp_browser_use/helpers.py +173 -0
  35. mcp_browser_use/helpers_context.py +261 -0
  36. mcp_browser_use/locking/__init__.py +1 -0
  37. mcp_browser_use/locking/action_lock.py +190 -0
  38. mcp_browser_use/locking/file_mutex.py +139 -0
  39. mcp_browser_use/locking/window_registry.py +178 -0
  40. mcp_browser_use/tools/__init__.py +59 -0
  41. mcp_browser_use/tools/browser_management.py +260 -0
  42. mcp_browser_use/tools/debugging.py +195 -0
  43. mcp_browser_use/tools/extraction.py +58 -0
  44. mcp_browser_use/tools/interaction.py +323 -0
  45. mcp_browser_use/tools/navigation.py +84 -0
  46. mcp_browser_use/tools/screenshots.py +116 -0
  47. mcp_browser_use/utils/__init__.py +1 -0
  48. mcp_browser_use/utils/diagnostics.py +85 -0
  49. mcp_browser_use/utils/html_utils.py +118 -0
  50. mcp_browser_use/utils/retry.py +57 -0
@@ -0,0 +1,118 @@
1
+ """HTML processing and cleaning utilities.
2
+
3
+ This module consolidates HTML cleaning functions from both cleaners.py and helpers.py.
4
+ """
5
+
6
+ import re
7
+ from typing import Tuple, Dict, Optional, Sequence, Pattern, Union
8
+ from bs4 import BeautifulSoup, Comment
9
+
10
+
11
+ # Re-export from cleaners.py
12
+ from ..cleaners import (
13
+ NOISE_ID_CLASS_PAT,
14
+ HIDDEN_CLASS_PAT,
15
+ approx_token_count,
16
+ CDN_HOST_PATS,
17
+ _build_cdn_pats,
18
+ _is_cdn_url,
19
+ _filter_srcset,
20
+ basic_prune,
21
+ extract_outline,
22
+ )
23
+
24
+
25
+ def remove_unwanted_tags(html_content: str, aggressive: bool = False) -> str:
26
+ """
27
+ Remove unwanted tags from HTML.
28
+
29
+ Args:
30
+ html_content: Raw HTML string
31
+ aggressive: If True, removes additional tags like svg, iframe, comments, headers, footers, navigation
32
+
33
+ Returns:
34
+ Cleaned HTML string with whitespace collapsed
35
+ """
36
+ soup = BeautifulSoup(html_content, 'html.parser')
37
+
38
+ # Always remove these
39
+ basic_removals = ['script', 'style', 'meta', 'link', 'noscript']
40
+
41
+ # Aggressive mode removes more
42
+ if aggressive:
43
+ basic_removals.extend([
44
+ 'svg', 'iframe', 'canvas', 'form'
45
+ ])
46
+
47
+ for tag in soup.find_all(basic_removals):
48
+ tag.extract()
49
+
50
+ # Remove HTML comments in aggressive mode
51
+ if aggressive:
52
+ for comment in soup.find_all(string=lambda text: isinstance(text, Comment)):
53
+ comment.extract()
54
+
55
+ # Remove hidden inputs
56
+ for hidden_input in soup.find_all('input', {'type': 'hidden'}):
57
+ hidden_input.extract()
58
+
59
+ # Remove headers, footers, and navigation (huge space savers for e-commerce sites)
60
+ for tag in soup.find_all(['header', 'footer', 'nav']):
61
+ tag.extract()
62
+
63
+ # Remove common navigation/menu class patterns (but be more selective)
64
+ for tag in soup.find_all(class_=lambda c: c and any(x in str(c).lower() for x in ['-header', '-footer', '-navigation', 'nav-main', '-menu', '-flyout', '-dropdown', 'breadcrumb'])):
65
+ tag.extract()
66
+
67
+ # Remove all attributes except critical ones for product data
68
+ critical_attrs = {'href', 'src', 'alt', 'title', 'class', 'id', 'type', 'name', 'value'}
69
+ for tag in soup.find_all(True):
70
+ # Remove all non-critical attributes
71
+ attrs_to_remove = [attr for attr in tag.attrs if attr not in critical_attrs]
72
+ for attr in attrs_to_remove:
73
+ del tag[attr]
74
+
75
+ # Also remove data-* attributes (often just for JS functionality)
76
+ data_attrs = [attr for attr in tag.attrs if attr.startswith('data-')]
77
+ for attr in data_attrs:
78
+ del tag[attr]
79
+
80
+ # Remove empty tags after cleaning, but preserve structural tags like body, html, divs with children
81
+ # Only remove leaf nodes that are empty
82
+ for tag in soup.find_all():
83
+ if tag.name not in ['html', 'head', 'body'] and not tag.get_text(strip=True) and not tag.find_all(['img', 'input', 'br', 'hr', 'a']):
84
+ tag.extract()
85
+
86
+ return str(soup)
87
+
88
+
89
+ def get_cleaned_html(driver, aggressive: bool = False) -> str:
90
+ """
91
+ Get cleaned HTML from the current page.
92
+
93
+ Args:
94
+ driver: Selenium WebDriver instance
95
+ aggressive: If True, applies aggressive HTML cleaning
96
+
97
+ Returns:
98
+ Cleaned HTML string
99
+ """
100
+ html_content = driver.page_source
101
+ return remove_unwanted_tags(html_content, aggressive=aggressive)
102
+
103
+
104
+ __all__ = [
105
+ # From cleaners.py
106
+ 'NOISE_ID_CLASS_PAT',
107
+ 'HIDDEN_CLASS_PAT',
108
+ 'approx_token_count',
109
+ 'CDN_HOST_PATS',
110
+ '_build_cdn_pats',
111
+ '_is_cdn_url',
112
+ '_filter_srcset',
113
+ 'basic_prune',
114
+ 'extract_outline',
115
+ # From helpers.py
116
+ 'remove_unwanted_tags',
117
+ 'get_cleaned_html',
118
+ ]
@@ -0,0 +1,57 @@
1
+ """Retry logic and error handling utilities."""
2
+
3
+ import time
4
+ import random
5
+ import json
6
+ from typing import Callable, Optional
7
+ from selenium.common.exceptions import (
8
+ NoSuchWindowException,
9
+ StaleElementReferenceException,
10
+ WebDriverException,
11
+ )
12
+
13
+
14
+ def retry_op(fn: Callable, retries: int = 2, base_delay: float = 0.15):
15
+ """
16
+ Retry a function call that may fail due to transient Selenium exceptions.
17
+
18
+ Args:
19
+ fn: The function to call
20
+ retries: Number of retry attempts (default: 2)
21
+ base_delay: Base delay between retries in seconds (default: 0.15)
22
+
23
+ Returns:
24
+ The result of the function call
25
+
26
+ Raises:
27
+ The last exception if all retries fail
28
+ """
29
+ for attempt in range(retries + 1):
30
+ try:
31
+ return fn()
32
+ except (NoSuchWindowException, StaleElementReferenceException, WebDriverException):
33
+ if attempt == retries:
34
+ raise
35
+ time.sleep(base_delay * (1.0 + random.random()))
36
+
37
+
38
+ def _read_json(path: str) -> Optional[dict]:
39
+ """
40
+ Read a JSON file and return its contents as a dict.
41
+
42
+ Args:
43
+ path: Path to the JSON file
44
+
45
+ Returns:
46
+ Dictionary from JSON file, or None if file doesn't exist or is invalid
47
+ """
48
+ try:
49
+ with open(path, "r") as f:
50
+ return json.load(f)
51
+ except Exception:
52
+ return None
53
+
54
+
55
+ def _now() -> float:
56
+ """Get current time as a float timestamp."""
57
+ return time.time()