changedetection.io-cloak-browser 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- changedetection_io_cloak_browser-0.1.0/PKG-INFO +186 -0
- changedetection_io_cloak_browser-0.1.0/README.md +151 -0
- changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/PKG-INFO +186 -0
- changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/SOURCES.txt +11 -0
- changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/dependency_links.txt +1 -0
- changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/entry_points.txt +2 -0
- changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/requires.txt +3 -0
- changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/top_level.txt +1 -0
- changedetection_io_cloak_browser-0.1.0/changedetection_cloak_browser/__init__.py +2 -0
- changedetection_io_cloak_browser-0.1.0/changedetection_cloak_browser/fetcher.py +397 -0
- changedetection_io_cloak_browser-0.1.0/changedetection_cloak_browser/static/cloakbrowser-logo.svg +20 -0
- changedetection_io_cloak_browser-0.1.0/setup.cfg +4 -0
- changedetection_io_cloak_browser-0.1.0/setup.py +50 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: changedetection.io-cloak-browser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CloakBrowser stealth fetcher plugin for changedetection.io (anti-bot bypass)
|
|
5
|
+
Home-page: https://github.com/dgtlmoon/changedetection.io-cloak-browser
|
|
6
|
+
Author: dgtlmoon
|
|
7
|
+
Author-email: dgtlmoon@gmail.com
|
|
8
|
+
Keywords: changedetection web monitoring anti-bot stealth chromium cloakbrowser cloudflare bypass
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: System Administrators
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Site Management
|
|
18
|
+
Classifier: Topic :: System :: Monitoring
|
|
19
|
+
Classifier: Topic :: Security
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: changedetection.io>=0.54.5
|
|
23
|
+
Requires-Dist: cloakbrowser>=0.3.0
|
|
24
|
+
Requires-Dist: playwright>=1.40.0
|
|
25
|
+
Dynamic: author
|
|
26
|
+
Dynamic: author-email
|
|
27
|
+
Dynamic: classifier
|
|
28
|
+
Dynamic: description
|
|
29
|
+
Dynamic: description-content-type
|
|
30
|
+
Dynamic: home-page
|
|
31
|
+
Dynamic: keywords
|
|
32
|
+
Dynamic: requires-dist
|
|
33
|
+
Dynamic: requires-python
|
|
34
|
+
Dynamic: summary
|
|
35
|
+
|
|
36
|
+
# changedetection.io-cloak-browser
|
|
37
|
+
|
|
38
|
+
Stealth browser fetcher plugin for [changedetection.io](https://github.com/dgtlmoon/changedetection.io) powered by [CloakBrowser](https://github.com/CloakHQ/CloakBrowser).
|
|
39
|
+
|
|
40
|
+
- ✅ Bypasses **Cloudflare Turnstile**, reCAPTCHA v3, FingerprintJS, BrowserScan and more
|
|
41
|
+
- ✅ 33 source-level C++ patches compiled into Chromium — no JS injection tricks
|
|
42
|
+
- ✅ Full **browser steps** support (same Playwright page API)
|
|
43
|
+
- ✅ Full **screenshot** and **visual selector** support
|
|
44
|
+
- ✅ Drop-in replacement — select "CloakBrowser" per-watch in the UI
|
|
45
|
+
- ✅ Proxy support with optional geo-IP auto-detection
|
|
46
|
+
- ✅ Human-like behaviour mode to avoid timing-based detection
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- changedetection.io >= 0.54.5
|
|
51
|
+
- Python 3.10+
|
|
52
|
+
- ~200 MB disk space for the CloakBrowser Chromium binary (downloaded automatically on first run)
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
### 1. Install the plugin
|
|
57
|
+
|
|
58
|
+
**Docker (docker-compose.yml):**
|
|
59
|
+
|
|
60
|
+
```yaml
|
|
61
|
+
services:
|
|
62
|
+
changedetection:
|
|
63
|
+
environment:
|
|
64
|
+
- EXTRA_PACKAGES=changedetection.io-cloak-browser
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Local install:**
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install changedetection.io-cloak-browser
|
|
71
|
+
# Download the Chromium binary
|
|
72
|
+
python -m cloakbrowser install
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 2. Select CloakBrowser for a watch
|
|
76
|
+
|
|
77
|
+
In the changedetection.io UI, open any watch → **Edit** → **Fetch** tab → choose
|
|
78
|
+
**"CloakBrowser - Stealth Chromium (anti-bot bypass)"** from the fetcher dropdown.
|
|
79
|
+
|
|
80
|
+
### 3. (Optional) Configure via environment variables
|
|
81
|
+
|
|
82
|
+
| Variable | Default | Description |
|
|
83
|
+
|---|---|---|
|
|
84
|
+
| `CLOAKBROWSER_HUMANIZE` | `true` | Enable human-like mouse/keyboard/scroll behaviour |
|
|
85
|
+
| `playwright_proxy_server` | *(none)* | Proxy URL, e.g. `http://proxy:8080` |
|
|
86
|
+
| `playwright_proxy_username` | *(none)* | Proxy username |
|
|
87
|
+
| `playwright_proxy_password` | *(none)* | Proxy password |
|
|
88
|
+
| `PLAYWRIGHT_SERVICE_WORKERS` | `allow` | `allow` or `block` service workers |
|
|
89
|
+
| `WEBDRIVER_DELAY_BEFORE_CONTENT_READY` | `5` | Seconds to wait after page load |
|
|
90
|
+
| `SCREENSHOT_MAX_HEIGHT` | `20000` | Maximum screenshot height in pixels |
|
|
91
|
+
|
|
92
|
+
## Docker Compose Example
|
|
93
|
+
|
|
94
|
+
```yaml
|
|
95
|
+
version: '3'
|
|
96
|
+
|
|
97
|
+
services:
|
|
98
|
+
changedetection:
|
|
99
|
+
image: ghcr.io/dgtlmoon/changedetection.io:latest
|
|
100
|
+
container_name: changedetection
|
|
101
|
+
volumes:
|
|
102
|
+
- ./datastore:/datastore
|
|
103
|
+
environment:
|
|
104
|
+
- EXTRA_PACKAGES=changedetection.io-cloak-browser
|
|
105
|
+
- CLOAKBROWSER_HUMANIZE=true
|
|
106
|
+
# Optional proxy
|
|
107
|
+
# - playwright_proxy_server=http://proxy.example.com:8080
|
|
108
|
+
# - playwright_proxy_username=user
|
|
109
|
+
# - playwright_proxy_password=pass
|
|
110
|
+
ports:
|
|
111
|
+
- "5000:5000"
|
|
112
|
+
restart: unless-stopped
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## How It Works
|
|
116
|
+
|
|
117
|
+
CloakBrowser is a patched Chromium binary with 33 source-level C++ modifications that
|
|
118
|
+
make it indistinguishable from a real user's Chrome browser. Unlike JavaScript-injection
|
|
119
|
+
approaches (which detection services can identify), these patches operate at the binary level.
|
|
120
|
+
|
|
121
|
+
The Python `cloakbrowser` package wraps the Playwright Python library but connects to the
|
|
122
|
+
patched binary instead of stock Chromium. This means **the page API is 100% identical to
|
|
123
|
+
Playwright** — browser steps, screenshots, visual selectors, and JS execution all work
|
|
124
|
+
unchanged.
|
|
125
|
+
|
|
126
|
+
### Detection bypass results
|
|
127
|
+
|
|
128
|
+
| Service | Stock Playwright | CloakBrowser |
|
|
129
|
+
|---|---|---|
|
|
130
|
+
| reCAPTCHA v3 score | 0.1 (bot) | **0.9** (human) |
|
|
131
|
+
| Cloudflare Turnstile | FAIL | **PASS** |
|
|
132
|
+
| FingerprintJS | DETECTED | **PASS** |
|
|
133
|
+
| BrowserScan | DETECTED | **NORMAL** |
|
|
134
|
+
| `navigator.webdriver` | `true` | **`false`** |
|
|
135
|
+
| TLS fingerprint | Mismatch | **Identical to Chrome** |
|
|
136
|
+
|
|
137
|
+
## Browser Steps
|
|
138
|
+
|
|
139
|
+
CloakBrowser fully supports all changedetection.io browser steps:
|
|
140
|
+
|
|
141
|
+
- Click element / Click element if exists
|
|
142
|
+
- Enter text in field
|
|
143
|
+
- Execute JS
|
|
144
|
+
- Wait for text / Wait for seconds
|
|
145
|
+
- Scroll down
|
|
146
|
+
- Check/uncheck checkbox
|
|
147
|
+
- Select by label
|
|
148
|
+
- Remove elements
|
|
149
|
+
- … and all others
|
|
150
|
+
|
|
151
|
+
This works because CloakBrowser pages are standard Playwright page objects — the browser
|
|
152
|
+
steps engine requires no modification.
|
|
153
|
+
|
|
154
|
+
## Troubleshooting
|
|
155
|
+
|
|
156
|
+
**Plugin not loading?**
|
|
157
|
+
```python
|
|
158
|
+
from changedetectionio.pluggy_interface import plugin_manager
|
|
159
|
+
print([name for name, _ in plugin_manager.list_name_plugin()])
|
|
160
|
+
# Should include: cloak_browser
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Binary not downloaded?**
|
|
164
|
+
```bash
|
|
165
|
+
python -m cloakbrowser install
|
|
166
|
+
python -m cloakbrowser info
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Test that the fetcher registers:**
|
|
170
|
+
```python
|
|
171
|
+
from changedetectionio.content_fetchers import available_fetchers
|
|
172
|
+
print(available_fetchers())
|
|
173
|
+
# Should include: ('html_cloakbrowser', 'CloakBrowser - Stealth Chromium (anti-bot bypass)')
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Check for CloakBrowser updates:**
|
|
177
|
+
```bash
|
|
178
|
+
python -m cloakbrowser update
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
MIT License — see [LICENSE](LICENSE).
|
|
184
|
+
|
|
185
|
+
CloakBrowser binary: free-to-use, no redistribution. See
|
|
186
|
+
[CloakBrowser BINARY-LICENSE](https://github.com/CloakHQ/CloakBrowser/blob/main/BINARY-LICENSE.md).
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# changedetection.io-cloak-browser
|
|
2
|
+
|
|
3
|
+
Stealth browser fetcher plugin for [changedetection.io](https://github.com/dgtlmoon/changedetection.io) powered by [CloakBrowser](https://github.com/CloakHQ/CloakBrowser).
|
|
4
|
+
|
|
5
|
+
- ✅ Bypasses **Cloudflare Turnstile**, reCAPTCHA v3, FingerprintJS, BrowserScan and more
|
|
6
|
+
- ✅ 33 source-level C++ patches compiled into Chromium — no JS injection tricks
|
|
7
|
+
- ✅ Full **browser steps** support (same Playwright page API)
|
|
8
|
+
- ✅ Full **screenshot** and **visual selector** support
|
|
9
|
+
- ✅ Drop-in replacement — select "CloakBrowser" per-watch in the UI
|
|
10
|
+
- ✅ Proxy support with optional geo-IP auto-detection
|
|
11
|
+
- ✅ Human-like behaviour mode to avoid timing-based detection
|
|
12
|
+
|
|
13
|
+
## Requirements
|
|
14
|
+
|
|
15
|
+
- changedetection.io >= 0.54.5
|
|
16
|
+
- Python 3.10+
|
|
17
|
+
- ~200 MB disk space for the CloakBrowser Chromium binary (downloaded automatically on first run)
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
### 1. Install the plugin
|
|
22
|
+
|
|
23
|
+
**Docker (docker-compose.yml):**
|
|
24
|
+
|
|
25
|
+
```yaml
|
|
26
|
+
services:
|
|
27
|
+
changedetection:
|
|
28
|
+
environment:
|
|
29
|
+
- EXTRA_PACKAGES=changedetection.io-cloak-browser
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**Local install:**
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install changedetection.io-cloak-browser
|
|
36
|
+
# Download the Chromium binary
|
|
37
|
+
python -m cloakbrowser install
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
### 2. Select CloakBrowser for a watch
|
|
41
|
+
|
|
42
|
+
In the changedetection.io UI, open any watch → **Edit** → **Fetch** tab → choose
|
|
43
|
+
**"CloakBrowser - Stealth Chromium (anti-bot bypass)"** from the fetcher dropdown.
|
|
44
|
+
|
|
45
|
+
### 3. (Optional) Configure via environment variables
|
|
46
|
+
|
|
47
|
+
| Variable | Default | Description |
|
|
48
|
+
|---|---|---|
|
|
49
|
+
| `CLOAKBROWSER_HUMANIZE` | `true` | Enable human-like mouse/keyboard/scroll behaviour |
|
|
50
|
+
| `playwright_proxy_server` | *(none)* | Proxy URL, e.g. `http://proxy:8080` |
|
|
51
|
+
| `playwright_proxy_username` | *(none)* | Proxy username |
|
|
52
|
+
| `playwright_proxy_password` | *(none)* | Proxy password |
|
|
53
|
+
| `PLAYWRIGHT_SERVICE_WORKERS` | `allow` | `allow` or `block` service workers |
|
|
54
|
+
| `WEBDRIVER_DELAY_BEFORE_CONTENT_READY` | `5` | Seconds to wait after page load |
|
|
55
|
+
| `SCREENSHOT_MAX_HEIGHT` | `20000` | Maximum screenshot height in pixels |
|
|
56
|
+
|
|
57
|
+
## Docker Compose Example
|
|
58
|
+
|
|
59
|
+
```yaml
|
|
60
|
+
version: '3'
|
|
61
|
+
|
|
62
|
+
services:
|
|
63
|
+
changedetection:
|
|
64
|
+
image: ghcr.io/dgtlmoon/changedetection.io:latest
|
|
65
|
+
container_name: changedetection
|
|
66
|
+
volumes:
|
|
67
|
+
- ./datastore:/datastore
|
|
68
|
+
environment:
|
|
69
|
+
- EXTRA_PACKAGES=changedetection.io-cloak-browser
|
|
70
|
+
- CLOAKBROWSER_HUMANIZE=true
|
|
71
|
+
# Optional proxy
|
|
72
|
+
# - playwright_proxy_server=http://proxy.example.com:8080
|
|
73
|
+
# - playwright_proxy_username=user
|
|
74
|
+
# - playwright_proxy_password=pass
|
|
75
|
+
ports:
|
|
76
|
+
- "5000:5000"
|
|
77
|
+
restart: unless-stopped
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## How It Works
|
|
81
|
+
|
|
82
|
+
CloakBrowser is a patched Chromium binary with 33 source-level C++ modifications that
|
|
83
|
+
make it indistinguishable from a real user's Chrome browser. Unlike JavaScript-injection
|
|
84
|
+
approaches (which detection services can identify), these patches operate at the binary level.
|
|
85
|
+
|
|
86
|
+
The Python `cloakbrowser` package wraps the Playwright Python library but connects to the
|
|
87
|
+
patched binary instead of stock Chromium. This means **the page API is 100% identical to
|
|
88
|
+
Playwright** — browser steps, screenshots, visual selectors, and JS execution all work
|
|
89
|
+
unchanged.
|
|
90
|
+
|
|
91
|
+
### Detection bypass results
|
|
92
|
+
|
|
93
|
+
| Service | Stock Playwright | CloakBrowser |
|
|
94
|
+
|---|---|---|
|
|
95
|
+
| reCAPTCHA v3 score | 0.1 (bot) | **0.9** (human) |
|
|
96
|
+
| Cloudflare Turnstile | FAIL | **PASS** |
|
|
97
|
+
| FingerprintJS | DETECTED | **PASS** |
|
|
98
|
+
| BrowserScan | DETECTED | **NORMAL** |
|
|
99
|
+
| `navigator.webdriver` | `true` | **`false`** |
|
|
100
|
+
| TLS fingerprint | Mismatch | **Identical to Chrome** |
|
|
101
|
+
|
|
102
|
+
## Browser Steps
|
|
103
|
+
|
|
104
|
+
CloakBrowser fully supports all changedetection.io browser steps:
|
|
105
|
+
|
|
106
|
+
- Click element / Click element if exists
|
|
107
|
+
- Enter text in field
|
|
108
|
+
- Execute JS
|
|
109
|
+
- Wait for text / Wait for seconds
|
|
110
|
+
- Scroll down
|
|
111
|
+
- Check/uncheck checkbox
|
|
112
|
+
- Select by label
|
|
113
|
+
- Remove elements
|
|
114
|
+
- … and all others
|
|
115
|
+
|
|
116
|
+
This works because CloakBrowser pages are standard Playwright page objects — the browser
|
|
117
|
+
steps engine requires no modification.
|
|
118
|
+
|
|
119
|
+
## Troubleshooting
|
|
120
|
+
|
|
121
|
+
**Plugin not loading?**
|
|
122
|
+
```python
|
|
123
|
+
from changedetectionio.pluggy_interface import plugin_manager
|
|
124
|
+
print([name for name, _ in plugin_manager.list_name_plugin()])
|
|
125
|
+
# Should include: cloak_browser
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Binary not downloaded?**
|
|
129
|
+
```bash
|
|
130
|
+
python -m cloakbrowser install
|
|
131
|
+
python -m cloakbrowser info
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
**Test that the fetcher registers:**
|
|
135
|
+
```python
|
|
136
|
+
from changedetectionio.content_fetchers import available_fetchers
|
|
137
|
+
print(available_fetchers())
|
|
138
|
+
# Should include: ('html_cloakbrowser', 'CloakBrowser - Stealth Chromium (anti-bot bypass)')
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Check for CloakBrowser updates:**
|
|
142
|
+
```bash
|
|
143
|
+
python -m cloakbrowser update
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
MIT License — see [LICENSE](LICENSE).
|
|
149
|
+
|
|
150
|
+
CloakBrowser binary: free-to-use, no redistribution. See
|
|
151
|
+
[CloakBrowser BINARY-LICENSE](https://github.com/CloakHQ/CloakBrowser/blob/main/BINARY-LICENSE.md).
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: changedetection.io-cloak-browser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CloakBrowser stealth fetcher plugin for changedetection.io (anti-bot bypass)
|
|
5
|
+
Home-page: https://github.com/dgtlmoon/changedetection.io-cloak-browser
|
|
6
|
+
Author: dgtlmoon
|
|
7
|
+
Author-email: dgtlmoon@gmail.com
|
|
8
|
+
Keywords: changedetection web monitoring anti-bot stealth chromium cloakbrowser cloudflare bypass
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: Intended Audience :: System Administrators
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Internet :: WWW/HTTP :: Site Management
|
|
18
|
+
Classifier: Topic :: System :: Monitoring
|
|
19
|
+
Classifier: Topic :: Security
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
Requires-Dist: changedetection.io>=0.54.5
|
|
23
|
+
Requires-Dist: cloakbrowser>=0.3.0
|
|
24
|
+
Requires-Dist: playwright>=1.40.0
|
|
25
|
+
Dynamic: author
|
|
26
|
+
Dynamic: author-email
|
|
27
|
+
Dynamic: classifier
|
|
28
|
+
Dynamic: description
|
|
29
|
+
Dynamic: description-content-type
|
|
30
|
+
Dynamic: home-page
|
|
31
|
+
Dynamic: keywords
|
|
32
|
+
Dynamic: requires-dist
|
|
33
|
+
Dynamic: requires-python
|
|
34
|
+
Dynamic: summary
|
|
35
|
+
|
|
36
|
+
# changedetection.io-cloak-browser
|
|
37
|
+
|
|
38
|
+
Stealth browser fetcher plugin for [changedetection.io](https://github.com/dgtlmoon/changedetection.io) powered by [CloakBrowser](https://github.com/CloakHQ/CloakBrowser).
|
|
39
|
+
|
|
40
|
+
- ✅ Bypasses **Cloudflare Turnstile**, reCAPTCHA v3, FingerprintJS, BrowserScan and more
|
|
41
|
+
- ✅ 33 source-level C++ patches compiled into Chromium — no JS injection tricks
|
|
42
|
+
- ✅ Full **browser steps** support (same Playwright page API)
|
|
43
|
+
- ✅ Full **screenshot** and **visual selector** support
|
|
44
|
+
- ✅ Drop-in replacement — select "CloakBrowser" per-watch in the UI
|
|
45
|
+
- ✅ Proxy support with optional geo-IP auto-detection
|
|
46
|
+
- ✅ Human-like behaviour mode to avoid timing-based detection
|
|
47
|
+
|
|
48
|
+
## Requirements
|
|
49
|
+
|
|
50
|
+
- changedetection.io >= 0.54.5
|
|
51
|
+
- Python 3.10+
|
|
52
|
+
- ~200 MB disk space for the CloakBrowser Chromium binary (downloaded automatically on first run)
|
|
53
|
+
|
|
54
|
+
## Quick Start
|
|
55
|
+
|
|
56
|
+
### 1. Install the plugin
|
|
57
|
+
|
|
58
|
+
**Docker (docker-compose.yml):**
|
|
59
|
+
|
|
60
|
+
```yaml
|
|
61
|
+
services:
|
|
62
|
+
changedetection:
|
|
63
|
+
environment:
|
|
64
|
+
- EXTRA_PACKAGES=changedetection.io-cloak-browser
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Local install:**
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install changedetection.io-cloak-browser
|
|
71
|
+
# Download the Chromium binary
|
|
72
|
+
python -m cloakbrowser install
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
### 2. Select CloakBrowser for a watch
|
|
76
|
+
|
|
77
|
+
In the changedetection.io UI, open any watch → **Edit** → **Fetch** tab → choose
|
|
78
|
+
**"CloakBrowser - Stealth Chromium (anti-bot bypass)"** from the fetcher dropdown.
|
|
79
|
+
|
|
80
|
+
### 3. (Optional) Configure via environment variables
|
|
81
|
+
|
|
82
|
+
| Variable | Default | Description |
|
|
83
|
+
|---|---|---|
|
|
84
|
+
| `CLOAKBROWSER_HUMANIZE` | `true` | Enable human-like mouse/keyboard/scroll behaviour |
|
|
85
|
+
| `playwright_proxy_server` | *(none)* | Proxy URL, e.g. `http://proxy:8080` |
|
|
86
|
+
| `playwright_proxy_username` | *(none)* | Proxy username |
|
|
87
|
+
| `playwright_proxy_password` | *(none)* | Proxy password |
|
|
88
|
+
| `PLAYWRIGHT_SERVICE_WORKERS` | `allow` | `allow` or `block` service workers |
|
|
89
|
+
| `WEBDRIVER_DELAY_BEFORE_CONTENT_READY` | `5` | Seconds to wait after page load |
|
|
90
|
+
| `SCREENSHOT_MAX_HEIGHT` | `20000` | Maximum screenshot height in pixels |
|
|
91
|
+
|
|
92
|
+
## Docker Compose Example
|
|
93
|
+
|
|
94
|
+
```yaml
|
|
95
|
+
version: '3'
|
|
96
|
+
|
|
97
|
+
services:
|
|
98
|
+
changedetection:
|
|
99
|
+
image: ghcr.io/dgtlmoon/changedetection.io:latest
|
|
100
|
+
container_name: changedetection
|
|
101
|
+
volumes:
|
|
102
|
+
- ./datastore:/datastore
|
|
103
|
+
environment:
|
|
104
|
+
- EXTRA_PACKAGES=changedetection.io-cloak-browser
|
|
105
|
+
- CLOAKBROWSER_HUMANIZE=true
|
|
106
|
+
# Optional proxy
|
|
107
|
+
# - playwright_proxy_server=http://proxy.example.com:8080
|
|
108
|
+
# - playwright_proxy_username=user
|
|
109
|
+
# - playwright_proxy_password=pass
|
|
110
|
+
ports:
|
|
111
|
+
- "5000:5000"
|
|
112
|
+
restart: unless-stopped
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## How It Works
|
|
116
|
+
|
|
117
|
+
CloakBrowser is a patched Chromium binary with 33 source-level C++ modifications that
|
|
118
|
+
make it indistinguishable from a real user's Chrome browser. Unlike JavaScript-injection
|
|
119
|
+
approaches (which detection services can identify), these patches operate at the binary level.
|
|
120
|
+
|
|
121
|
+
The Python `cloakbrowser` package wraps the Playwright Python library but connects to the
|
|
122
|
+
patched binary instead of stock Chromium. This means **the page API is 100% identical to
|
|
123
|
+
Playwright** — browser steps, screenshots, visual selectors, and JS execution all work
|
|
124
|
+
unchanged.
|
|
125
|
+
|
|
126
|
+
### Detection bypass results
|
|
127
|
+
|
|
128
|
+
| Service | Stock Playwright | CloakBrowser |
|
|
129
|
+
|---|---|---|
|
|
130
|
+
| reCAPTCHA v3 score | 0.1 (bot) | **0.9** (human) |
|
|
131
|
+
| Cloudflare Turnstile | FAIL | **PASS** |
|
|
132
|
+
| FingerprintJS | DETECTED | **PASS** |
|
|
133
|
+
| BrowserScan | DETECTED | **NORMAL** |
|
|
134
|
+
| `navigator.webdriver` | `true` | **`false`** |
|
|
135
|
+
| TLS fingerprint | Mismatch | **Identical to Chrome** |
|
|
136
|
+
|
|
137
|
+
## Browser Steps
|
|
138
|
+
|
|
139
|
+
CloakBrowser fully supports all changedetection.io browser steps:
|
|
140
|
+
|
|
141
|
+
- Click element / Click element if exists
|
|
142
|
+
- Enter text in field
|
|
143
|
+
- Execute JS
|
|
144
|
+
- Wait for text / Wait for seconds
|
|
145
|
+
- Scroll down
|
|
146
|
+
- Check/uncheck checkbox
|
|
147
|
+
- Select by label
|
|
148
|
+
- Remove elements
|
|
149
|
+
- … and all others
|
|
150
|
+
|
|
151
|
+
This works because CloakBrowser pages are standard Playwright page objects — the browser
|
|
152
|
+
steps engine requires no modification.
|
|
153
|
+
|
|
154
|
+
## Troubleshooting
|
|
155
|
+
|
|
156
|
+
**Plugin not loading?**
|
|
157
|
+
```python
|
|
158
|
+
from changedetectionio.pluggy_interface import plugin_manager
|
|
159
|
+
print([name for name, _ in plugin_manager.list_name_plugin()])
|
|
160
|
+
# Should include: cloak_browser
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Binary not downloaded?**
|
|
164
|
+
```bash
|
|
165
|
+
python -m cloakbrowser install
|
|
166
|
+
python -m cloakbrowser info
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
**Test that the fetcher registers:**
|
|
170
|
+
```python
|
|
171
|
+
from changedetectionio.content_fetchers import available_fetchers
|
|
172
|
+
print(available_fetchers())
|
|
173
|
+
# Should include: ('html_cloakbrowser', 'CloakBrowser - Stealth Chromium (anti-bot bypass)')
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
**Check for CloakBrowser updates:**
|
|
177
|
+
```bash
|
|
178
|
+
python -m cloakbrowser update
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## License
|
|
182
|
+
|
|
183
|
+
MIT License — see [LICENSE](LICENSE).
|
|
184
|
+
|
|
185
|
+
CloakBrowser binary: free-to-use, no redistribution. See
|
|
186
|
+
[CloakBrowser BINARY-LICENSE](https://github.com/CloakHQ/CloakBrowser/blob/main/BINARY-LICENSE.md).
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
setup.py
|
|
3
|
+
changedetection.io_cloak_browser.egg-info/PKG-INFO
|
|
4
|
+
changedetection.io_cloak_browser.egg-info/SOURCES.txt
|
|
5
|
+
changedetection.io_cloak_browser.egg-info/dependency_links.txt
|
|
6
|
+
changedetection.io_cloak_browser.egg-info/entry_points.txt
|
|
7
|
+
changedetection.io_cloak_browser.egg-info/requires.txt
|
|
8
|
+
changedetection.io_cloak_browser.egg-info/top_level.txt
|
|
9
|
+
changedetection_cloak_browser/__init__.py
|
|
10
|
+
changedetection_cloak_browser/fetcher.py
|
|
11
|
+
changedetection_cloak_browser/static/cloakbrowser-logo.svg
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
changedetection_io_cloak_browser-0.1.0/changedetection.io_cloak_browser.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
changedetection_cloak_browser
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import gc
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from urllib.parse import urlparse
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
from changedetectionio.pluggy_interface import hookimpl
|
|
9
|
+
|
|
10
|
+
_STATIC_DIR = os.path.join(os.path.dirname(__file__), 'static')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@hookimpl
|
|
14
|
+
def plugin_static_path():
|
|
15
|
+
"""Return the path to this plugin's static files directory."""
|
|
16
|
+
return _STATIC_DIR
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@hookimpl
|
|
20
|
+
def register_content_fetcher():
|
|
21
|
+
"""Register the CloakBrowser fetcher with changedetection.io.
|
|
22
|
+
|
|
23
|
+
All changedetectionio.content_fetchers imports are deferred to here to avoid
|
|
24
|
+
a circular import: pluggy_interface (line 215: load_setuptools_entrypoints) loads
|
|
25
|
+
this module, which would otherwise import content_fetchers/__init__.py, which
|
|
26
|
+
imports register_builtin_fetchers from pluggy_interface — but pluggy_interface
|
|
27
|
+
is only partially initialised at that point.
|
|
28
|
+
|
|
29
|
+
By the time this function is called (from content_fetchers.get_plugin_fetchers),
|
|
30
|
+
pluggy_interface is fully initialised.
|
|
31
|
+
"""
|
|
32
|
+
from changedetectionio.content_fetchers import (
|
|
33
|
+
SCREENSHOT_MAX_HEIGHT_DEFAULT,
|
|
34
|
+
visualselector_xpath_selectors,
|
|
35
|
+
XPATH_ELEMENT_JS,
|
|
36
|
+
INSTOCK_DATA_JS,
|
|
37
|
+
FAVICON_FETCHER_JS,
|
|
38
|
+
)
|
|
39
|
+
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
|
40
|
+
from changedetectionio.content_fetchers.exceptions import (
|
|
41
|
+
BrowserStepsStepException,
|
|
42
|
+
EmptyReply,
|
|
43
|
+
Non200ErrorCodeReceived,
|
|
44
|
+
PageUnloadable,
|
|
45
|
+
ScreenshotUnavailable,
|
|
46
|
+
)
|
|
47
|
+
# CloakBrowser pages are standard Playwright page objects, so we can reuse
|
|
48
|
+
# the Playwright screenshot helper directly.
|
|
49
|
+
from changedetectionio.content_fetchers.playwright import capture_full_page_async
|
|
50
|
+
|
|
51
|
+
class fetcher(Fetcher):
|
|
52
|
+
fetcher_description = "CloakBrowser - Stealth Chromium (anti-bot bypass)"
|
|
53
|
+
|
|
54
|
+
# CloakBrowser pages are full Playwright pages — all features work unchanged
|
|
55
|
+
supports_browser_steps = True
|
|
56
|
+
supports_screenshots = True
|
|
57
|
+
supports_xpath_element_data = True
|
|
58
|
+
|
|
59
|
+
proxy = None
|
|
60
|
+
|
|
61
|
+
def __init__(self, proxy_override=None, custom_browser_connection_url=None, **kwargs):
|
|
62
|
+
super().__init__(**kwargs)
|
|
63
|
+
|
|
64
|
+
# CloakBrowser launches a local browser; no remote CDP URL is used.
|
|
65
|
+
# custom_browser_connection_url accepted for API compatibility but ignored.
|
|
66
|
+
if custom_browser_connection_url:
|
|
67
|
+
logger.warning(
|
|
68
|
+
"CloakBrowser fetcher: custom_browser_connection_url is ignored — "
|
|
69
|
+
"CloakBrowser always launches a local browser"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Reuse the same playwright_proxy_* env vars for consistency
|
|
73
|
+
proxy_args = {}
|
|
74
|
+
for k in ['bypass', 'server', 'username', 'password']:
|
|
75
|
+
v = os.getenv('playwright_proxy_' + k, False)
|
|
76
|
+
if v:
|
|
77
|
+
proxy_args[k] = v.strip('"')
|
|
78
|
+
|
|
79
|
+
if proxy_args:
|
|
80
|
+
self.proxy = proxy_args
|
|
81
|
+
|
|
82
|
+
if proxy_override:
|
|
83
|
+
self.proxy = {'server': proxy_override}
|
|
84
|
+
|
|
85
|
+
if self.proxy:
|
|
86
|
+
parsed = urlparse(self.proxy.get('server', ''))
|
|
87
|
+
if parsed.username:
|
|
88
|
+
self.proxy['username'] = parsed.username
|
|
89
|
+
self.proxy['password'] = parsed.password
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def get_status_icon_data(cls):
|
|
93
|
+
return {
|
|
94
|
+
'group': 'plugin',
|
|
95
|
+
'filename': 'cloakbrowser-logo.svg',
|
|
96
|
+
'alt': 'Using CloakBrowser (stealth)',
|
|
97
|
+
'title': 'CloakBrowser — Stealth Chromium with anti-bot bypass',
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
async def get_browsersteps_browser(cls, proxy=None, keepalive_ms=None):
|
|
102
|
+
"""Launch a local CloakBrowser instance for the browser steps live UI.
|
|
103
|
+
|
|
104
|
+
Called by browser_steps/__init__.py instead of the default CDP path when
|
|
105
|
+
this fetcher is selected for the watch. Returns (browser, None) — no
|
|
106
|
+
playwright_context is needed because CloakBrowser manages its own process.
|
|
107
|
+
"""
|
|
108
|
+
from cloakbrowser import launch_async
|
|
109
|
+
|
|
110
|
+
proxy_url = cls._proxy_dict_to_url(proxy) if proxy else None
|
|
111
|
+
|
|
112
|
+
humanize_raw = os.getenv('CLOAKBROWSER_HUMANIZE', 'true').lower()
|
|
113
|
+
humanize = humanize_raw not in ('false', '0', 'no')
|
|
114
|
+
|
|
115
|
+
browser = await launch_async(
|
|
116
|
+
headless=True,
|
|
117
|
+
proxy=proxy_url,
|
|
118
|
+
humanize=humanize,
|
|
119
|
+
)
|
|
120
|
+
return (browser, None)
|
|
121
|
+
|
|
122
|
+
@staticmethod
|
|
123
|
+
def _proxy_dict_to_url(proxy):
|
|
124
|
+
"""Convert a Playwright-style proxy dict to a URL string for CloakBrowser."""
|
|
125
|
+
if not proxy:
|
|
126
|
+
return None
|
|
127
|
+
server = proxy.get('server', '')
|
|
128
|
+
username = proxy.get('username')
|
|
129
|
+
password = proxy.get('password')
|
|
130
|
+
if username and server:
|
|
131
|
+
parsed = urlparse(server)
|
|
132
|
+
return f"{parsed.scheme}://{username}:{password}@{parsed.hostname}:{parsed.port}"
|
|
133
|
+
return server or None
|
|
134
|
+
|
|
135
|
+
def _build_proxy_url(self):
|
|
136
|
+
"""Convert this instance's proxy dict to a URL string for CloakBrowser."""
|
|
137
|
+
return self._proxy_dict_to_url(self.proxy)
|
|
138
|
+
|
|
139
|
+
async def screenshot_step(self, step_n=''):
|
|
140
|
+
super().screenshot_step(step_n=step_n)
|
|
141
|
+
watch_uuid = getattr(self, 'watch_uuid', None)
|
|
142
|
+
screenshot = await capture_full_page_async(
|
|
143
|
+
page=self.page,
|
|
144
|
+
screenshot_format=self.screenshot_format,
|
|
145
|
+
watch_uuid=watch_uuid,
|
|
146
|
+
lock_viewport_elements=self.lock_viewport_elements,
|
|
147
|
+
)
|
|
148
|
+
try:
|
|
149
|
+
await self.page.request_gc()
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
|
|
153
|
+
if self.browser_steps_screenshot_path is not None:
|
|
154
|
+
destination = os.path.join(self.browser_steps_screenshot_path, f'step_{step_n}.jpeg')
|
|
155
|
+
logger.debug(f"Saving step screenshot to {destination}")
|
|
156
|
+
with open(destination, 'wb') as f:
|
|
157
|
+
f.write(screenshot)
|
|
158
|
+
del screenshot
|
|
159
|
+
gc.collect()
|
|
160
|
+
|
|
161
|
+
async def save_step_html(self, step_n):
|
|
162
|
+
super().save_step_html(step_n=step_n)
|
|
163
|
+
content = await self.page.content()
|
|
164
|
+
try:
|
|
165
|
+
await self.page.request_gc()
|
|
166
|
+
except Exception:
|
|
167
|
+
pass
|
|
168
|
+
destination = os.path.join(self.browser_steps_screenshot_path, f'step_{step_n}.html')
|
|
169
|
+
logger.debug(f"Saving step HTML to {destination}")
|
|
170
|
+
with open(destination, 'w', encoding='utf-8') as f:
|
|
171
|
+
f.write(content)
|
|
172
|
+
del content
|
|
173
|
+
gc.collect()
|
|
174
|
+
|
|
175
|
+
async def run(
|
|
176
|
+
self,
|
|
177
|
+
fetch_favicon=True,
|
|
178
|
+
current_include_filters=None,
|
|
179
|
+
empty_pages_are_a_change=False,
|
|
180
|
+
ignore_status_codes=False,
|
|
181
|
+
is_binary=False,
|
|
182
|
+
request_body=None,
|
|
183
|
+
request_headers=None,
|
|
184
|
+
request_method=None,
|
|
185
|
+
screenshot_format=None,
|
|
186
|
+
timeout=None,
|
|
187
|
+
url=None,
|
|
188
|
+
watch_uuid=None,
|
|
189
|
+
):
|
|
190
|
+
from cloakbrowser import launch_async
|
|
191
|
+
import time
|
|
192
|
+
|
|
193
|
+
self.delete_browser_steps_screenshots()
|
|
194
|
+
self.watch_uuid = watch_uuid
|
|
195
|
+
|
|
196
|
+
browser = None
|
|
197
|
+
context = None
|
|
198
|
+
response = None
|
|
199
|
+
|
|
200
|
+
proxy_url = self._build_proxy_url()
|
|
201
|
+
|
|
202
|
+
humanize_raw = os.getenv('CLOAKBROWSER_HUMANIZE', 'true').lower()
|
|
203
|
+
humanize = humanize_raw not in ('false', '0', 'no')
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
browser = await launch_async(
|
|
207
|
+
headless=True,
|
|
208
|
+
proxy=proxy_url,
|
|
209
|
+
humanize=humanize,
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# CloakBrowser returns standard Playwright browser objects —
|
|
213
|
+
# new_context() and all page methods work identically to Playwright
|
|
214
|
+
context = await browser.new_context(
|
|
215
|
+
accept_downloads=False,
|
|
216
|
+
bypass_csp=True,
|
|
217
|
+
extra_http_headers=request_headers or {},
|
|
218
|
+
ignore_https_errors=True,
|
|
219
|
+
service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'),
|
|
220
|
+
user_agent=manage_user_agent(headers=request_headers or {}),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
self.page = await context.new_page()
|
|
224
|
+
self.page.on(
|
|
225
|
+
"console",
|
|
226
|
+
lambda msg: logger.debug(f"CloakBrowser console: {url} {msg.type}: {msg.text} {msg.args}"),
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# steppable_browser_interface works unchanged because CloakBrowser
|
|
230
|
+
# pages are Playwright pages — same API, same error classes
|
|
231
|
+
from changedetectionio.browser_steps.browser_steps import steppable_browser_interface
|
|
232
|
+
browsersteps_interface = steppable_browser_interface(start_url=url)
|
|
233
|
+
browsersteps_interface.page = self.page
|
|
234
|
+
|
|
235
|
+
response = await browsersteps_interface.action_goto_url(value=url)
|
|
236
|
+
|
|
237
|
+
if response is None:
|
|
238
|
+
raise EmptyReply(url=url, status_code=None)
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
self.headers = await response.all_headers()
|
|
242
|
+
except TypeError:
|
|
243
|
+
self.headers = response.all_headers()
|
|
244
|
+
|
|
245
|
+
try:
|
|
246
|
+
if self.webdriver_js_execute_code and len(self.webdriver_js_execute_code):
|
|
247
|
+
await browsersteps_interface.action_execute_js(
|
|
248
|
+
value=self.webdriver_js_execute_code, selector=None
|
|
249
|
+
)
|
|
250
|
+
except Exception as e:
|
|
251
|
+
logger.debug(f"CloakBrowser > Error executing custom JS: {e}")
|
|
252
|
+
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
|
253
|
+
|
|
254
|
+
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
|
255
|
+
await self.page.wait_for_timeout(extra_wait * 1000)
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
self.status_code = response.status
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.critical(f"CloakBrowser > Response had no status_code: {e}")
|
|
261
|
+
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
|
262
|
+
|
|
263
|
+
if fetch_favicon:
|
|
264
|
+
try:
|
|
265
|
+
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
|
266
|
+
try:
|
|
267
|
+
await self.page.request_gc()
|
|
268
|
+
except Exception:
|
|
269
|
+
pass
|
|
270
|
+
except Exception as e:
|
|
271
|
+
logger.error(f"CloakBrowser > Error fetching favicon: {e}, continuing.")
|
|
272
|
+
|
|
273
|
+
if self.status_code != 200 and not ignore_status_codes:
|
|
274
|
+
screenshot = await capture_full_page_async(
|
|
275
|
+
self.page,
|
|
276
|
+
screenshot_format=self.screenshot_format,
|
|
277
|
+
watch_uuid=watch_uuid,
|
|
278
|
+
lock_viewport_elements=self.lock_viewport_elements,
|
|
279
|
+
)
|
|
280
|
+
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
|
281
|
+
|
|
282
|
+
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
|
283
|
+
raise EmptyReply(url=url, status_code=response.status)
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
if self.browser_steps:
|
|
287
|
+
try:
|
|
288
|
+
await self.iterate_browser_steps(start_url=url)
|
|
289
|
+
except BrowserStepsStepException:
|
|
290
|
+
raise
|
|
291
|
+
await self.page.wait_for_timeout(extra_wait * 1000)
|
|
292
|
+
|
|
293
|
+
now = time.time()
|
|
294
|
+
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
|
295
|
+
|
|
296
|
+
if current_include_filters is not None:
|
|
297
|
+
await self.page.evaluate(f"var include_filters={json.dumps(current_include_filters)}")
|
|
298
|
+
else:
|
|
299
|
+
await self.page.evaluate("var include_filters=''")
|
|
300
|
+
try:
|
|
301
|
+
await self.page.request_gc()
|
|
302
|
+
except Exception:
|
|
303
|
+
pass
|
|
304
|
+
|
|
305
|
+
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
|
306
|
+
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
|
307
|
+
"max_height": MAX_TOTAL_HEIGHT,
|
|
308
|
+
})
|
|
309
|
+
try:
|
|
310
|
+
await self.page.request_gc()
|
|
311
|
+
except Exception:
|
|
312
|
+
pass
|
|
313
|
+
|
|
314
|
+
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
|
315
|
+
try:
|
|
316
|
+
await self.page.request_gc()
|
|
317
|
+
except Exception:
|
|
318
|
+
pass
|
|
319
|
+
|
|
320
|
+
self.content = await self.page.content()
|
|
321
|
+
try:
|
|
322
|
+
await self.page.request_gc()
|
|
323
|
+
except Exception:
|
|
324
|
+
pass
|
|
325
|
+
|
|
326
|
+
logger.debug(f"CloakBrowser > Scraped xPath/instock data in {time.time() - now:.2f}s")
|
|
327
|
+
|
|
328
|
+
self.screenshot = await capture_full_page_async(
|
|
329
|
+
page=self.page,
|
|
330
|
+
screenshot_format=self.screenshot_format,
|
|
331
|
+
watch_uuid=watch_uuid,
|
|
332
|
+
lock_viewport_elements=self.lock_viewport_elements,
|
|
333
|
+
)
|
|
334
|
+
try:
|
|
335
|
+
await self.page.request_gc()
|
|
336
|
+
except Exception:
|
|
337
|
+
pass
|
|
338
|
+
gc.collect()
|
|
339
|
+
|
|
340
|
+
except ScreenshotUnavailable:
|
|
341
|
+
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
|
342
|
+
|
|
343
|
+
finally:
|
|
344
|
+
try:
|
|
345
|
+
if hasattr(self, 'page') and self.page:
|
|
346
|
+
try:
|
|
347
|
+
await self.page.request_gc()
|
|
348
|
+
except Exception:
|
|
349
|
+
pass
|
|
350
|
+
await asyncio.wait_for(self.page.close(), timeout=5.0)
|
|
351
|
+
except asyncio.TimeoutError:
|
|
352
|
+
logger.warning(f"CloakBrowser > Timed out closing page for {url}")
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.warning(f"CloakBrowser > Error closing page for {url}: {e}")
|
|
355
|
+
finally:
|
|
356
|
+
self.page = None
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
if context:
|
|
360
|
+
await asyncio.wait_for(context.close(), timeout=5.0)
|
|
361
|
+
except asyncio.TimeoutError:
|
|
362
|
+
logger.warning(f"CloakBrowser > Timed out closing context for {url}")
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.warning(f"CloakBrowser > Error closing context for {url}: {e}")
|
|
365
|
+
finally:
|
|
366
|
+
context = None
|
|
367
|
+
|
|
368
|
+
try:
|
|
369
|
+
if browser:
|
|
370
|
+
await asyncio.wait_for(browser.close(), timeout=5.0)
|
|
371
|
+
except asyncio.TimeoutError:
|
|
372
|
+
logger.warning(f"CloakBrowser > Timed out closing browser for {url}")
|
|
373
|
+
except Exception as e:
|
|
374
|
+
logger.warning(f"CloakBrowser > Error closing browser for {url}: {e}")
|
|
375
|
+
finally:
|
|
376
|
+
browser = None
|
|
377
|
+
|
|
378
|
+
gc.collect()
|
|
379
|
+
|
|
380
|
+
async def quit(self, watch=None):
|
|
381
|
+
pass
|
|
382
|
+
|
|
383
|
+
def get_error(self):
|
|
384
|
+
return self.error
|
|
385
|
+
|
|
386
|
+
def get_last_status_code(self):
|
|
387
|
+
return self.status_code
|
|
388
|
+
|
|
389
|
+
def is_ready(self):
|
|
390
|
+
try:
|
|
391
|
+
import cloakbrowser # noqa: F401
|
|
392
|
+
return True
|
|
393
|
+
except ImportError:
|
|
394
|
+
logger.error("CloakBrowser fetcher: 'cloakbrowser' package is not installed")
|
|
395
|
+
return False
|
|
396
|
+
|
|
397
|
+
return ('html_cloakbrowser', fetcher)
|
changedetection_io_cloak_browser-0.1.0/changedetection_cloak_browser/static/cloakbrowser-logo.svg
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 32 32" width="32" height="32">
|
|
2
|
+
<!-- Browser window -->
|
|
3
|
+
<rect x="2" y="5" width="28" height="22" rx="3" ry="3" fill="#1a1a2e" stroke="#6c63ff" stroke-width="1.5"/>
|
|
4
|
+
<!-- Title bar -->
|
|
5
|
+
<rect x="2" y="5" width="28" height="7" rx="3" ry="3" fill="#6c63ff"/>
|
|
6
|
+
<rect x="2" y="9" width="28" height="3" fill="#6c63ff"/>
|
|
7
|
+
<!-- Window controls -->
|
|
8
|
+
<circle cx="7" cy="8.5" r="1.5" fill="#ff6b6b"/>
|
|
9
|
+
<circle cx="12" cy="8.5" r="1.5" fill="#ffd93d"/>
|
|
10
|
+
<!-- Ghost / cloak mask in browser body -->
|
|
11
|
+
<ellipse cx="16" cy="19" rx="7" ry="6" fill="#6c63ff" opacity="0.85"/>
|
|
12
|
+
<rect x="9" y="19" width="14" height="4" fill="#6c63ff" opacity="0.85"/>
|
|
13
|
+
<!-- Ghost feet -->
|
|
14
|
+
<path d="M9 23 Q10.5 25 12 23 Q13.5 25 15 23 Q16.5 25 18 23 Q19.5 25 21 23 L23 23 L23 19 L9 19 Z" fill="#6c63ff" opacity="0.85"/>
|
|
15
|
+
<!-- Ghost eyes -->
|
|
16
|
+
<circle cx="13" cy="18" r="1.8" fill="#1a1a2e"/>
|
|
17
|
+
<circle cx="19" cy="18" r="1.8" fill="#1a1a2e"/>
|
|
18
|
+
<circle cx="13.5" cy="17.5" r="0.7" fill="white"/>
|
|
19
|
+
<circle cx="19.5" cy="17.5" r="0.7" fill="white"/>
|
|
20
|
+
</svg>
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
from setuptools import setup, find_packages
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
here = os.path.abspath(os.path.dirname(__file__))
|
|
6
|
+
|
|
7
|
+
with open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
|
|
8
|
+
long_description = f.read()
|
|
9
|
+
|
|
10
|
+
setup(
|
|
11
|
+
name='changedetection.io-cloak-browser',
|
|
12
|
+
version='0.1.0',
|
|
13
|
+
description='CloakBrowser stealth fetcher plugin for changedetection.io (anti-bot bypass)',
|
|
14
|
+
long_description=long_description,
|
|
15
|
+
long_description_content_type='text/markdown',
|
|
16
|
+
author='dgtlmoon',
|
|
17
|
+
author_email='dgtlmoon@gmail.com',
|
|
18
|
+
url='https://github.com/dgtlmoon/changedetection.io-cloak-browser',
|
|
19
|
+
packages=find_packages(),
|
|
20
|
+
include_package_data=True,
|
|
21
|
+
package_data={
|
|
22
|
+
'changedetection_cloak_browser': ['static/*'],
|
|
23
|
+
},
|
|
24
|
+
install_requires=[
|
|
25
|
+
'changedetection.io>=0.54.5',
|
|
26
|
+
'cloakbrowser>=0.3.0',
|
|
27
|
+
'playwright>=1.40.0',
|
|
28
|
+
],
|
|
29
|
+
# Register as a changedetectionio plugin via entry_points
|
|
30
|
+
entry_points={
|
|
31
|
+
'changedetectionio': [
|
|
32
|
+
'cloak_browser = changedetection_cloak_browser.fetcher',
|
|
33
|
+
],
|
|
34
|
+
},
|
|
35
|
+
python_requires='>=3.10',
|
|
36
|
+
classifiers=[
|
|
37
|
+
'Development Status :: 4 - Beta',
|
|
38
|
+
'Intended Audience :: Developers',
|
|
39
|
+
'Intended Audience :: System Administrators',
|
|
40
|
+
'License :: OSI Approved :: MIT License',
|
|
41
|
+
'Programming Language :: Python :: 3',
|
|
42
|
+
'Programming Language :: Python :: 3.10',
|
|
43
|
+
'Programming Language :: Python :: 3.11',
|
|
44
|
+
'Programming Language :: Python :: 3.12',
|
|
45
|
+
'Topic :: Internet :: WWW/HTTP :: Site Management',
|
|
46
|
+
'Topic :: System :: Monitoring',
|
|
47
|
+
'Topic :: Security',
|
|
48
|
+
],
|
|
49
|
+
keywords='changedetection web monitoring anti-bot stealth chromium cloakbrowser cloudflare bypass',
|
|
50
|
+
)
|