browser-dog 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ from .browser import BrowserDog, retry
2
+
3
+ __all__ = ['BrowserDog', 'retry']
4
+ __version__ = '0.1.0'
browser_dog/browser.py ADDED
@@ -0,0 +1,218 @@
1
+ import time
2
+ import random
3
+ import json
4
+ import os
5
+ import logging
6
+ from functools import wraps
7
+ from selenium import webdriver
8
+ from selenium.webdriver.chrome.options import Options
9
+ from selenium.common.exceptions import WebDriverException, TimeoutException
10
+ from colorama import Fore, Style
11
+
12
+ # 配置日志
13
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def retry(max_retries=3, delay=2):
18
+ """简单的重试装饰器,提升操作健壮性"""
19
+ def decorator(func):
20
+ @wraps(func)
21
+ def wrapper(*args, **kwargs):
22
+ for attempt in range(1, max_retries + 1):
23
+ try:
24
+ return func(*args, **kwargs)
25
+ except (WebDriverException, TimeoutException) as e:
26
+ wait_time = delay * attempt + random.uniform(0, 1)
27
+ logger.warning(f"[Attempt {attempt}/{max_retries}] {func.__name__} failed: {e}, retrying in {wait_time:.1f}s")
28
+ if attempt == max_retries:
29
+ raise
30
+ time.sleep(wait_time)
31
+ return wrapper
32
+ return decorator
33
+
34
+
35
+ class BrowserDog:
36
+ def __init__(self, cookies_json: str, base_url: str, headless=False):
37
+ """
38
+ Initializes the Chrome Driver with anti-detection and robustness improvements.
39
+ """
40
+ try:
41
+ print(Fore.BLACK + "=" * 30 + " Initializing Driver " + "=" * 30 + Style.RESET_ALL)
42
+ self.options = Options()
43
+
44
+ # 稳定性与无GPU配置
45
+ self.options.add_argument("--disable-gpu")
46
+ self.options.add_argument("--no-sandbox")
47
+ self.options.add_argument("--disable-dev-shm-usage")
48
+
49
+ # 反反爬虫基础配置
50
+ self.options.add_experimental_option("excludeSwitches", ["enable-automation"])
51
+ self.options.add_experimental_option("useAutomationExtension", False)
52
+ self.options.add_argument("--disable-blink-features=AutomationControlled")
53
+
54
+ # 随机 User-Agent
55
+ ua_list = [
56
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
57
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
58
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
59
+ ]
60
+ self.options.add_argument(f"--user-agent={random.choice(ua_list)}")
61
+
62
+ # headless mode
63
+ if headless:
64
+ self.options.add_argument("--headless=new") # 使用新版headless,更难被检测
65
+
66
+ self.driver = webdriver.Chrome(options=self.options)
67
+ self.driver.maximize_window()
68
+
69
+ # CDP 注入核心反检测 JS (必须在 get(url) 之前执行)
70
+ self.driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
71
+ "source": """
72
+ Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
73
+ window.chrome = { runtime: {}, loadTimes: function(){}, csi: function(){}, app: {} };
74
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
75
+ """
76
+ })
77
+
78
+ print(Fore.GREEN + "Opening URL Page: " + base_url + Style.RESET_ALL)
79
+ self.driver.get(base_url)
80
+ self.medium_wait()
81
+
82
+ # 健壮的 Cookie 加载逻辑
83
+ self._load_cookies(cookies_json, base_url)
84
+
85
+ except Exception as e:
86
+ print(Fore.RED + f'Initialization Error: {e}' + Style.RESET_ALL)
87
+ self.close() # 初始化失败时清理资源
88
+ raise
89
+
90
+ def _load_cookies(self, cookies_json: str, base_url: str):
91
+ """内部方法:更健壮地加载和处理 Cookies"""
92
+ if not os.path.exists(cookies_json):
93
+ print(Fore.RED + f"No cookies file found at {cookies_json}, please login first." + Style.RESET_ALL)
94
+ return
95
+
96
+ with open(cookies_json, 'r') as file:
97
+ try:
98
+ cookies = json.loads(file.read())
99
+ except json.JSONDecodeError:
100
+ print(Fore.RED + "Cookies file is not valid JSON." + Style.RESET_ALL)
101
+ return
102
+
103
+ for cookie in cookies:
104
+ try:
105
+ # 过滤过期 Cookie
106
+ if 'expiry' in cookie and cookie['expiry'] < time.time():
107
+ continue
108
+
109
+ # 修复 sameSite 兼容性问题 (Selenium 要求严格匹配)
110
+ if 'sameSite' in cookie and cookie['sameSite'] not in ['Strict', 'Lax', 'None']:
111
+ cookie['sameSite'] = 'None'
112
+ elif 'sameSite' not in cookie:
113
+ cookie['sameSite'] = 'None'
114
+
115
+ # 移除可能导致报错的冗余字段
116
+ cookie.pop('storeId', None)
117
+ cookie.pop('id', None)
118
+ cookie.pop('session', None)
119
+
120
+ self.driver.add_cookie(cookie)
121
+ except Exception as e:
122
+ # 单条 Cookie 失败不影响整体
123
+ logger.debug(f"Skipped cookie {cookie.get('name')}: {e}")
124
+ continue
125
+
126
+ print(Fore.GREEN + "Refreshing the page to apply cookies" + Style.RESET_ALL)
127
+ self.driver.refresh()
128
+ self.medium_wait()
129
+ print(Fore.GREEN + "Successfully logged in, start browsing..." + Style.RESET_ALL)
130
+
131
+ # 上下文管理器,确保浏览器一定会被关闭,防止僵尸进程
132
+ def __enter__(self):
133
+ return self
134
+
135
+ def __exit__(self, exc_type, exc_val, exc_tb):
136
+ self.close()
137
+ return False
138
+
139
+ def close(self):
140
+ """安全关闭浏览器"""
141
+ if self.driver:
142
+ try:
143
+ self.driver.quit()
144
+ except Exception:
145
+ pass
146
+ finally:
147
+ self.driver = None
148
+
149
+ def get_driver(self):
150
+ return self.driver
151
+
152
+ # 模拟人类平滑滚动
153
+ def scroll_to_bottom(self):
154
+ """平滑滚动到底部"""
155
+ self.driver.execute_script("""
156
+ var scrollHeight = document.body.scrollHeight;
157
+ var currentPos = window.pageYOffset;
158
+ var step = Math.floor(scrollHeight / 10); // 分10步滚到底
159
+ var interval = setInterval(function() {
160
+ currentPos += step;
161
+ window.scrollTo(0, currentPos);
162
+ if (currentPos >= scrollHeight) clearInterval(interval);
163
+ }, 200 + Math.floor(Math.random() * 200));
164
+ """)
165
+ self.medium_wait() # 等待滚动完成
166
+
167
+ def scroll_to_top(self):
168
+ """平滑滚动到顶部"""
169
+ self.driver.execute_script("""
170
+ var currentPos = window.pageYOffset;
171
+ var step = Math.floor(currentPos / 8);
172
+ var interval = setInterval(function() {
173
+ currentPos -= step;
174
+ window.scrollTo(0, currentPos);
175
+ if (currentPos <= 0) clearInterval(interval);
176
+ }, 200 + Math.floor(Math.random() * 200));
177
+ """)
178
+ self.medium_wait()
179
+
180
+ def scroll_to_middle(self):
181
+ """瞬间滚动到中间"""
182
+ self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
183
+
184
+ def scroll_to_random(self):
185
+ """瞬间滚动到随机位置"""
186
+ self.driver.execute_script("window.scrollTo(0, Math.floor(Math.random() * document.body.scrollHeight));")
187
+
188
+ # 反爬检测与截图调试
189
+ def detect_block_or_captcha(self) -> bool:
190
+ """检测是否遇到验证码或封禁"""
191
+ try:
192
+ page_text = self.driver.find_element("tag name", "body").text.lower()
193
+ indicators = ["captcha", "challenge", "verify you are human", "access denied", "unusual activity"]
194
+ for ind in indicators:
195
+ if ind in page_text:
196
+ print(Fore.RED + f"⚠️ Detected block/captcha indicator: {ind}" + Style.RESET_ALL)
197
+ self.take_screenshot("block_detected")
198
+ return True
199
+ except Exception:
200
+ pass
201
+ return False
202
+
203
+ def take_screenshot(self, name="debug"):
204
+ """截图保存,方便排查问题"""
205
+ os.makedirs("screenshots", exist_ok=True)
206
+ filepath = f"screenshots/{name}_{int(time.time())}.png"
207
+ self.driver.save_screenshot(filepath)
208
+ print(Fore.YELLOW + f"Screenshot saved to: {filepath}" + Style.RESET_ALL)
209
+
210
+ # 等待方法
211
+ def short_wait(self):
212
+ time.sleep(random.uniform(1, 3))
213
+
214
+ def medium_wait(self):
215
+ time.sleep(random.uniform(3, 5))
216
+
217
+ def long_wait(self):
218
+ time.sleep(random.uniform(5, 10))
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.4
2
+ Name: browser_dog
3
+ Version: 0.1.0
4
+ Summary: A stealthy Chrome automation toolkit driven by cookies, with anti-detection and smooth scrolling.
5
+ Author-email: Chandler Song <275737875@qq.com>
6
+ License: MIT
7
+ Keywords: selenium,automation,browser,cookies,anti-detection,scraping
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Natural Language :: Chinese (Simplified)
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.8
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Requires-Python: >=3.8
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: selenium>=4.0.0
24
+ Requires-Dist: colorama
25
+ Dynamic: license-file
26
+
27
+ # Browser Dog
28
+
29
+ A stealthy Chrome automation toolkit driven by cookies, with anti-detection and smooth scrolling.
30
+
31
+ ## Features
32
+
33
+ - **Anti-Detection**: Random User-Agent, CDP injection, webdriver property hiding
34
+ - **Robust Cookie Loading**: Expiry filtering, sameSite compatibility, redundant field cleanup
35
+ - **Smooth Scrolling**: Human-like progressive scrolling simulation
36
+ - **Captcha Detection**: Automatic detection of blocks and captchas with screenshot debugging
37
+ - **Context Manager**: Automatic resource cleanup to prevent zombie processes
38
+ - **Retry Mechanism**: Built-in retry decorator for resilient operations
39
+
40
+ ## Installation
41
+
42
+ ```bash
43
+ pip install browser_dog
44
+ ```
45
+
46
+ ## Quick Start
47
+
48
+ ### Basic Usage
49
+
50
+ ```python
51
+ from browser_dog import BrowserDog
52
+
53
+ # Initialize with cookies and base URL
54
+ dog = BrowserDog('cookies.json', 'https://example.com', headless=False)
55
+ driver = dog.get_driver()
56
+
57
+ # Navigate and interact
58
+ driver.get("https://example.com/page")
59
+ dog.medium_wait()
60
+
61
+ # Get page content
62
+ html = driver.page_source
63
+ print(html)
64
+
65
+ # Clean up
66
+ dog.close()
67
+ ```
68
+
69
+ ### Using Context Manager (Recommended)
70
+
71
+ ```python
72
+ from browser_dog import BrowserDog
73
+
74
+ with BrowserDog('cookies.json', 'https://example.com') as dog:
75
+ driver = dog.get_driver()
76
+ driver.get("https://example.com/page")
77
+
78
+ # Scroll smoothly
79
+ dog.scroll_to_bottom()
80
+ dog.scroll_to_top()
81
+
82
+ # Check for blocks
83
+ if dog.detect_block_or_captcha():
84
+ print("Blocked!")
85
+ ```
86
+
87
+ ### Using Retry Decorator
88
+
89
+ ```python
90
+ from browser_dog import BrowserDog, retry
91
+
92
+ dog = BrowserDog('cookies.json', 'https://example.com')
93
+
94
+ @retry(max_retries=3, delay=2)
95
+ def fetch_data():
96
+ driver = dog.get_driver()
97
+ driver.get("https://example.com/api")
98
+ return driver.page_source
99
+
100
+ data = fetch_data()
101
+ ```
102
+
103
+ ### Headless Mode
104
+
105
+ ```python
106
+ from browser_dog import BrowserDog
107
+
108
+ dog = BrowserDog('cookies.json', 'https://example.com', headless=True)
109
+ driver = dog.get_driver()
110
+ ```
111
+
112
+ ## Preparing Cookies
113
+
114
+ 1. Install the Chrome extension [EditThisCookie](https://chrome.google.com/webstore/detail/editthiscookie/)
115
+ 2. Navigate to your target website and login
116
+ 3. Export cookies to a JSON file (e.g., `cookies.json`)
117
+
118
+ ## Project Structure
119
+
120
+ ```
121
+ browser_dog-0.1.0/
122
+ ├── browser_dog/
123
+ │ ├── __init__.py # Package exports (BrowserDog, retry)
124
+ │ └── browser.py # Core BrowserDog class implementation
125
+ ├── LICENSE # MIT License
126
+ ├── README.md # This file
127
+ └── pyproject.toml # Package configuration (PEP 621)
128
+ ```
129
+
130
+ ## API Reference
131
+
132
+ ### BrowserDog
133
+
134
+ #### Constructor
135
+
136
+ ```python
137
+ BrowserDog(cookies_json: str, base_url: str, headless: bool = False)
138
+ ```
139
+
140
+ - `cookies_json`: Path to cookies JSON file
141
+ - `base_url`: Base URL to initialize the browser
142
+ - `headless`: Enable headless mode (default: False)
143
+
144
+ #### Methods
145
+
146
+ | Method | Description |
147
+ |--------|-------------|
148
+ | `get_driver()` | Returns the Selenium WebDriver instance |
149
+ | `close()` | Safely closes the browser |
150
+ | `scroll_to_bottom()` | Smooth scroll to page bottom |
151
+ | `scroll_to_top()` | Smooth scroll to page top |
152
+ | `scroll_to_middle()` | Instant scroll to page middle |
153
+ | `scroll_to_random()` | Instant scroll to random position |
154
+ | `detect_block_or_captcha()` | Detect captcha or block (returns bool) |
155
+ | `take_screenshot(name)` | Save screenshot for debugging |
156
+ | `short_wait()` | Wait 1-3 seconds |
157
+ | `medium_wait()` | Wait 3-5 seconds |
158
+ | `long_wait()` | Wait 5-10 seconds |
159
+
160
+ ### retry Decorator
161
+
162
+ ```python
163
+ @retry(max_retries: int = 3, delay: int = 2)
164
+ ```
165
+
166
+ Automatically retries failed operations with exponential backoff.
167
+
168
+ ## Disclaimer
169
+
170
+ This tool is provided for educational, research, and legitimate automation testing purposes only. By using Browser Dog, you agree to the following:
171
+
172
+ 1. **Lawful Use Only**: You must not use this tool for any illegal activities, including but not limited to malicious web scraping, unauthorized data collection, bypassing security measures, or violating the terms of service of any website or platform.
173
+
174
+ 2. **Compliance with Laws and Policies**: You are solely responsible for ensuring that your use of this tool complies with all applicable local, national, and international laws, regulations, and the terms of service of the websites you interact with.
175
+
176
+ 3. **No Warranty**: This software is provided "as is" without warranty of any kind. The authors and contributors assume no liability for any claims, damages, or losses arising from the use of this tool.
177
+
178
+ 4. **User Responsibility**: You assume full responsibility for any risks associated with using this tool, including but not limited to account suspension, legal action, or reputational damage resulting from misuse or unauthorized use.
179
+
180
+ 5. **Ethical Use**: Respect website policies, rate limits, and privacy rights. Do not use this tool to harvest personal data, conduct unauthorized surveillance, or engage in any activity that infringes on the rights of others.
181
+
182
+ The authors of this project expressly disclaim any responsibility for misuse, abuse, or any unlawful activities conducted using this software. If you are uncertain whether your intended use complies with applicable laws or policies, consult with a legal professional before proceeding.
183
+
184
+ ## License
185
+
186
+ MIT License - see [LICENSE](LICENSE) file for details.
@@ -0,0 +1,7 @@
1
+ browser_dog/__init__.py,sha256=TNNeoDjf631dtWF7iOXDVAbz9Whw5dseVQgLIEx_hxE,95
2
+ browser_dog/browser.py,sha256=NVUPF_ytJtAm0nasDwSv_6AK9MtykoQ16w7QHzd1zag,8999
3
+ browser_dog-0.1.0.dist-info/licenses/LICENSE,sha256=Y7rwROhJgtoFLoTfqIDUTbcEGNe2PngU_MbrkKUB3ak,1089
4
+ browser_dog-0.1.0.dist-info/METADATA,sha256=6SpAmfbLUgN47KVAguwivnnB0HgClbGD8FPEGEWiY1U,6369
5
+ browser_dog-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ browser_dog-0.1.0.dist-info/top_level.txt,sha256=XWlGA1ZcojOsWt9bnmldMhjst2bLZjyAW8lCscwlduE,12
7
+ browser_dog-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Chandler Song
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ browser_dog