fleet-framework 0.1.1__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/PKG-INFO +6 -2
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/__init__.py +5 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/browser.py +113 -4
- fleet_framework-0.2.0/fleet_browser/cloak.py +120 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/fingerprint.py +6 -1
- fleet_framework-0.2.0/fleet_browser/smart_router.py +354 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/PKG-INFO +6 -2
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/SOURCES.txt +2 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/requires.txt +6 -1
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/pyproject.toml +7 -2
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/LICENSE +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/README.md +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/cli.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/backend.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/config.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/context.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/contract.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/country_presets.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/events.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/local_runner.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/logging.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/memory_backend.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/metrics.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/otel.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/primitives.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/protocol.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/proxy.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/reconcile.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/sqlite_backend.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/store.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/api.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/app.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/auth.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/broadcaster.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/router.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/static/style.css +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/templates/index.html +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/metrics_route.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/ratelimit.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/ws_router.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/agent.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/reconcile_loop.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/slot_runner.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/ws_client.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/cert.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/humanizer.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/pool.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/proxy_extension.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/solver.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/stealth.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/bypasser.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/harvest.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/replay.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/solver.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_content/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_content/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_content/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_detect/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_detect/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_detect/detect.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/dependency_links.txt +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/entry_points.txt +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/top_level.txt +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_headers/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_headers/profiles.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_jobs/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_jobs/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_jobs/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_marketplace/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_marketplace/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_marketplace/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_news/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_news/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_news/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_place/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_place/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_place/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_provider_dataimpulse/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_provider_evomi/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_serp/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_serp/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_serp/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_social/__init__.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_social/automation.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_social/contracts.py +0 -0
- {fleet_framework-0.1.1 → fleet_framework-0.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -21,8 +21,12 @@ Requires-Dist: click>=8.1.0
|
|
|
21
21
|
Provides-Extra: browser
|
|
22
22
|
Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
|
+
Requires-Dist: mitmproxy>=12.0; extra == "browser"
|
|
24
25
|
Provides-Extra: cloudflare
|
|
25
26
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
27
|
+
Provides-Extra: cloak
|
|
28
|
+
Requires-Dist: fleet-framework[browser]; extra == "cloak"
|
|
29
|
+
Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
|
|
26
30
|
Provides-Extra: otel
|
|
27
31
|
Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
|
|
28
32
|
Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
|
|
@@ -31,7 +35,7 @@ Provides-Extra: test
|
|
|
31
35
|
Requires-Dist: pytest>=8.0; extra == "test"
|
|
32
36
|
Requires-Dist: pytest-asyncio>=0.23; extra == "test"
|
|
33
37
|
Provides-Extra: all
|
|
34
|
-
Requires-Dist: fleet-framework[browser,cloudflare,otel]; extra == "all"
|
|
38
|
+
Requires-Dist: fleet-framework[browser,cloak,cloudflare,otel]; extra == "all"
|
|
35
39
|
Dynamic: license-file
|
|
36
40
|
|
|
37
41
|
# Fleet
|
|
@@ -3,9 +3,11 @@ from fleet_browser.fingerprint import Fingerprint, FingerprintFactory
|
|
|
3
3
|
from fleet_browser.humanizer import Humanizer
|
|
4
4
|
from fleet_browser.pool import BrowserPool, slot
|
|
5
5
|
from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
|
|
6
|
+
from fleet_browser.smart_router import DEFAULT_RULES, RouterStats, SmartRouter, SmartRule
|
|
6
7
|
from fleet_browser.stealth import FingerprintStealth, NoOpStealth, Stealth
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
10
|
+
"DEFAULT_RULES",
|
|
9
11
|
"BrowserConfig",
|
|
10
12
|
"BrowserPool",
|
|
11
13
|
"ChromiumWorker",
|
|
@@ -14,6 +16,9 @@ __all__ = [
|
|
|
14
16
|
"FingerprintStealth",
|
|
15
17
|
"Humanizer",
|
|
16
18
|
"NoOpStealth",
|
|
19
|
+
"RouterStats",
|
|
20
|
+
"SmartRouter",
|
|
21
|
+
"SmartRule",
|
|
17
22
|
"Stealth",
|
|
18
23
|
"build_proxy_auth_extension",
|
|
19
24
|
"parse_proxy_url",
|
|
@@ -13,7 +13,14 @@ from typing import Optional
|
|
|
13
13
|
import psutil
|
|
14
14
|
from DrissionPage import ChromiumOptions, ChromiumPage
|
|
15
15
|
|
|
16
|
+
from fleet_browser.cloak import (
|
|
17
|
+
CLOAK_IGNORE_DEFAULT_ARGS,
|
|
18
|
+
cloak_stealth_args,
|
|
19
|
+
ensure_cloak_binary,
|
|
20
|
+
resolve_engine,
|
|
21
|
+
)
|
|
16
22
|
from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
|
|
23
|
+
from fleet_browser.smart_router import DEFAULT_RULES, SmartRouter, SmartRule
|
|
17
24
|
from fleet_browser.stealth import Stealth
|
|
18
25
|
|
|
19
26
|
logger = logging.getLogger(__name__)
|
|
@@ -120,12 +127,64 @@ class BrowserConfig:
|
|
|
120
127
|
|
|
121
128
|
extra_args: tuple[str, ...] = ()
|
|
122
129
|
|
|
130
|
+
engine: str = "auto"
|
|
131
|
+
"""Which Chromium build to launch. `"auto"` picks `"cloak"` when the
|
|
132
|
+
cloakbrowser package is installed, else falls back to `"chrome"`.
|
|
133
|
+
Set explicitly to override:
|
|
134
|
+
- `"chrome"`: use the system Chromium / Brave / Thorium auto-detect path
|
|
135
|
+
- `"cloak"`: use the CloakBrowser patched binary (downloads on first
|
|
136
|
+
use, raises if `cloakbrowser` isn't installed)
|
|
137
|
+
Cloak applies source-level canvas/WebGL/audio/font/WebRTC patches that
|
|
138
|
+
JS-level stealth can't reach. Stack FingerprintStealth on top to add
|
|
139
|
+
per-launch UA/version diversity from the uaforge corpus."""
|
|
140
|
+
|
|
141
|
+
cloak_cache_dir: Optional[str] = None
|
|
142
|
+
"""Override the cloak binary cache dir (default ~/.cloakbrowser/)."""
|
|
143
|
+
|
|
144
|
+
cloak_fingerprint_seed: Optional[int] = None
|
|
145
|
+
"""Force a specific cloak --fingerprint=N seed. Default: random per
|
|
146
|
+
launch. Pin a seed for reproducible tests."""
|
|
147
|
+
|
|
148
|
+
cloak_platform: str = "windows"
|
|
149
|
+
"""`--fingerprint-platform` flag for cloak's binary. Must match the
|
|
150
|
+
OS family FingerprintStealth's UA claims (uaforge default is Windows).
|
|
151
|
+
Set 'macos' if you switch uaforge to a Mac corpus."""
|
|
152
|
+
|
|
153
|
+
cloak_timezone: Optional[str] = None
|
|
154
|
+
"""IANA timezone for cloak's `--fingerprint-timezone` flag. Set when
|
|
155
|
+
using residential proxies so the spoofed timezone matches the egress
|
|
156
|
+
geo (otherwise CF flags the mismatch). Falls back to system tz."""
|
|
157
|
+
|
|
158
|
+
cloak_webrtc_ip: Optional[str] = None
|
|
159
|
+
"""Public IP cloak should advertise via WebRTC. Set to the proxy's exit
|
|
160
|
+
IP when using residential — otherwise WebRTC leaks your real LAN IP
|
|
161
|
+
and CF correlates the mismatch."""
|
|
162
|
+
|
|
163
|
+
smart_routing: bool = True
|
|
164
|
+
"""When True AND `proxy` is set, spawn a local mitmproxy that does per-flow
|
|
165
|
+
routing: only the rules-matching flows (e.g. /search) go through the paid
|
|
166
|
+
upstream, everything else goes direct. Cuts residential bandwidth ~85–95%
|
|
167
|
+
on Google/Bing/etc. without changing the SERP fetch's IP. Disable only when
|
|
168
|
+
you specifically need every byte through the proxy (rare)."""
|
|
169
|
+
|
|
170
|
+
smart_routing_rules: tuple[SmartRule, ...] = DEFAULT_RULES
|
|
171
|
+
"""Per-flow routing rules. Built-ins handle Google/Bing/Yahoo/DDG /search
|
|
172
|
+
plus a block-list of trackers. Plugins can extend with additional rules
|
|
173
|
+
via tuple concat: `DEFAULT_RULES + (SmartRule(host="amazon.com", path="/s"),)`."""
|
|
174
|
+
|
|
123
175
|
|
|
124
176
|
class ChromiumWorker:
|
|
125
177
|
|
|
126
178
|
def __init__(self, config: BrowserConfig) -> None:
|
|
127
179
|
self.config = config
|
|
128
180
|
self.page: Optional[ChromiumPage] = None
|
|
181
|
+
self._smart_router: Optional[SmartRouter] = None
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def smart_router(self) -> Optional[SmartRouter]:
|
|
185
|
+
"""The active SmartRouter for this worker, or None if smart routing
|
|
186
|
+
wasn't engaged. Stats are populated after `stop()` runs."""
|
|
187
|
+
return self._smart_router
|
|
129
188
|
self._user_data_dir: Optional[Path] = None
|
|
130
189
|
self._owns_user_data_dir = False
|
|
131
190
|
|
|
@@ -133,9 +192,16 @@ class ChromiumWorker:
|
|
|
133
192
|
if self.page is not None:
|
|
134
193
|
return self.page
|
|
135
194
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
195
|
+
engine = resolve_engine(self.config.engine)
|
|
196
|
+
if engine == "cloak":
|
|
197
|
+
# Caller may still pin browser_binary explicitly — honor it.
|
|
198
|
+
binary = self.config.browser_binary or ensure_cloak_binary(
|
|
199
|
+
self.config.cloak_cache_dir
|
|
200
|
+
)
|
|
201
|
+
else:
|
|
202
|
+
binary = self.config.browser_binary or _find_browser_binary(
|
|
203
|
+
prefer=self.config.prefer_browser,
|
|
204
|
+
)
|
|
139
205
|
|
|
140
206
|
self._user_data_dir = Path(tempfile.mkdtemp(prefix="chromium-worker-"))
|
|
141
207
|
self._owns_user_data_dir = True
|
|
@@ -147,6 +213,23 @@ class ChromiumWorker:
|
|
|
147
213
|
opts.set_user_data_path(str(self._user_data_dir))
|
|
148
214
|
opts.set_argument(f"--window-size={self.config.window_size[0]},{self.config.window_size[1]}")
|
|
149
215
|
|
|
216
|
+
if engine == "cloak":
|
|
217
|
+
for arg in cloak_stealth_args(
|
|
218
|
+
platform=self.config.cloak_platform,
|
|
219
|
+
timezone=self.config.cloak_timezone,
|
|
220
|
+
webrtc_ip=self.config.cloak_webrtc_ip,
|
|
221
|
+
seed=self.config.cloak_fingerprint_seed,
|
|
222
|
+
):
|
|
223
|
+
opts.set_argument(arg)
|
|
224
|
+
# Suppress Chromium defaults that leak automation signals.
|
|
225
|
+
# DrissionPage doesn't expose ignore_default_args; pass as
|
|
226
|
+
# --disable-features-replacement via plain args. The cloak
|
|
227
|
+
# binary recognises --no-enable-automation as an inversion.
|
|
228
|
+
for kill in CLOAK_IGNORE_DEFAULT_ARGS:
|
|
229
|
+
# Chromium accepts --disable-foo as the inverse of --enable-foo.
|
|
230
|
+
inv = kill.replace("--enable-", "--disable-", 1)
|
|
231
|
+
opts.set_argument(inv)
|
|
232
|
+
|
|
150
233
|
# HttpsUpgrades is off so self-signed HTTPS (TokenServer) isn't rewritten.
|
|
151
234
|
# Brave's ad/tracker/sync features both pollute the fingerprint and
|
|
152
235
|
# sometimes break Turnstile's iframe; disable them when running Brave.
|
|
@@ -180,7 +263,26 @@ class ChromiumWorker:
|
|
|
180
263
|
opts.set_argument("--headless=new")
|
|
181
264
|
|
|
182
265
|
if self.config.proxy:
|
|
183
|
-
|
|
266
|
+
# When smart_routing is on, spawn a SmartRouter and point Chromium
|
|
267
|
+
# at THAT instead of the upstream proxy. The router decides per-flow
|
|
268
|
+
# whether to forward upstream (paid) or direct (free). 85-95%
|
|
269
|
+
# bandwidth reduction on search engines.
|
|
270
|
+
effective_proxy = self.config.proxy
|
|
271
|
+
if self.config.smart_routing:
|
|
272
|
+
try:
|
|
273
|
+
self._smart_router = SmartRouter(
|
|
274
|
+
upstream_proxy=self.config.proxy,
|
|
275
|
+
rules=self.config.smart_routing_rules,
|
|
276
|
+
)
|
|
277
|
+
self._smart_router.start()
|
|
278
|
+
effective_proxy = self._smart_router.proxy_url
|
|
279
|
+
except Exception:
|
|
280
|
+
logger.exception(
|
|
281
|
+
"smart-router failed to start, falling back to direct upstream proxy"
|
|
282
|
+
)
|
|
283
|
+
self._smart_router = None
|
|
284
|
+
|
|
285
|
+
proxy_spec = parse_proxy_url(effective_proxy)
|
|
184
286
|
# Chromium 122+ accepts user:pass in --proxy-server. The MV2
|
|
185
287
|
# auth-extension is kept as a fallback for older Chromium.
|
|
186
288
|
if proxy_spec.has_auth:
|
|
@@ -249,6 +351,13 @@ class ChromiumWorker:
|
|
|
249
351
|
pass
|
|
250
352
|
self.page = None
|
|
251
353
|
|
|
354
|
+
if self._smart_router is not None:
|
|
355
|
+
try:
|
|
356
|
+
self._smart_router.stop()
|
|
357
|
+
except Exception:
|
|
358
|
+
logger.exception("[browser] smart-router stop failed")
|
|
359
|
+
# Keep the reference so callers can read stats after stop.
|
|
360
|
+
|
|
252
361
|
# SIGKILL anything still running with our --user-data-dir; page.quit
|
|
253
362
|
# sometimes leaves renderer/zygote/utility children orphaned to PID 1.
|
|
254
363
|
if self._user_data_dir is not None:
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Cloak engine integration — uses the CloakBrowser patched Chromium binary.
|
|
2
|
+
|
|
3
|
+
CloakBrowser is a separately-licensed binary (free for own-business use,
|
|
4
|
+
OEM license required for browser-as-a-service distribution). See:
|
|
5
|
+
https://github.com/CloakHQ/CloakBrowser
|
|
6
|
+
|
|
7
|
+
The wrapper Python package is MIT-licensed and pulled in via the optional
|
|
8
|
+
extra: `pip install fleet-framework[cloak]`. The binary is downloaded on
|
|
9
|
+
first use to ~/.cloakbrowser/ (override with CLOAKBROWSER_CACHE_DIR).
|
|
10
|
+
|
|
11
|
+
Combining cloak (source-level canvas/WebGL/audio/font/WebRTC patches) with
|
|
12
|
+
FingerprintStealth (CDP-level UA + Sec-CH-UA + navigator overrides) gives:
|
|
13
|
+
- Deep, JS-undetectable spoofing of low-level signals (cloak)
|
|
14
|
+
- Per-launch diversity of UA versions from the uaforge corpus (FingerprintStealth)
|
|
15
|
+
|
|
16
|
+
That diversity matters: cloak's binary patches use the latest Chrome only,
|
|
17
|
+
so without the CDP layer every harvest looks like the same Chrome version.
|
|
18
|
+
With CDP override on top, each launch picks a different version from uaforge
|
|
19
|
+
while keeping the renderer-level fingerprint coherent.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
|
+
import random
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_CLOAK_AVAILABLE: Optional[bool] = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def cloak_available() -> bool:
|
|
36
|
+
"""Return True if the cloakbrowser wrapper package is importable.
|
|
37
|
+
|
|
38
|
+
The binary may still need to be downloaded — that's `ensure_cloak_binary`'s
|
|
39
|
+
job. This is just the cheap import probe.
|
|
40
|
+
"""
|
|
41
|
+
global _CLOAK_AVAILABLE
|
|
42
|
+
if _CLOAK_AVAILABLE is None:
|
|
43
|
+
try:
|
|
44
|
+
import cloakbrowser # noqa: F401
|
|
45
|
+
_CLOAK_AVAILABLE = True
|
|
46
|
+
except ImportError:
|
|
47
|
+
_CLOAK_AVAILABLE = False
|
|
48
|
+
return _CLOAK_AVAILABLE
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def resolve_engine(name: str) -> str:
|
|
52
|
+
"""Resolve `engine="auto"` to `"cloak"` if available, else `"chrome"`."""
|
|
53
|
+
if name != "auto":
|
|
54
|
+
return name
|
|
55
|
+
return "cloak" if cloak_available() else "chrome"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def ensure_cloak_binary(cache_dir: Optional[str] = None) -> str:
|
|
59
|
+
"""Return path to the patched Chromium binary; download if missing.
|
|
60
|
+
|
|
61
|
+
Delegates to cloakbrowser.download.ensure_binary. ~250-400 MB download
|
|
62
|
+
on first use; cached at ~/.cloakbrowser/ (or `cache_dir`) thereafter.
|
|
63
|
+
Cloakbrowser checks for updates hourly when launched.
|
|
64
|
+
"""
|
|
65
|
+
if not cloak_available():
|
|
66
|
+
raise RuntimeError(
|
|
67
|
+
"engine='cloak' requires cloakbrowser. "
|
|
68
|
+
"Install with: pip install fleet-framework[cloak]"
|
|
69
|
+
)
|
|
70
|
+
if cache_dir:
|
|
71
|
+
os.environ.setdefault("CLOAKBROWSER_CACHE_DIR", str(Path(cache_dir).expanduser()))
|
|
72
|
+
from cloakbrowser.download import ensure_binary
|
|
73
|
+
path = ensure_binary()
|
|
74
|
+
return str(path)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def cloak_stealth_args(
|
|
78
|
+
*,
|
|
79
|
+
platform: str = "windows",
|
|
80
|
+
timezone: Optional[str] = None,
|
|
81
|
+
webrtc_ip: Optional[str] = None,
|
|
82
|
+
seed: Optional[int] = None,
|
|
83
|
+
) -> tuple[str, ...]:
|
|
84
|
+
"""Return Chromium command-line flags that activate cloak's source patches.
|
|
85
|
+
|
|
86
|
+
The `seed` drives canvas/WebGL/audio noise variation per-launch. Pass a
|
|
87
|
+
stable seed if you need reproducibility (e.g. testing). `platform` should
|
|
88
|
+
match what FingerprintStealth's UA claims — keep them aligned (default
|
|
89
|
+
'windows' matches uaforge's Windows corpus).
|
|
90
|
+
"""
|
|
91
|
+
s = seed if seed is not None else random.randint(10000, 99999)
|
|
92
|
+
args = [
|
|
93
|
+
f"--fingerprint={s}",
|
|
94
|
+
f"--fingerprint-platform={platform}",
|
|
95
|
+
]
|
|
96
|
+
if timezone:
|
|
97
|
+
args.append(f"--fingerprint-timezone={timezone}")
|
|
98
|
+
if webrtc_ip:
|
|
99
|
+
args.append(f"--fingerprint-webrtc-ip={webrtc_ip}")
|
|
100
|
+
return tuple(args)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
CLOAK_IGNORE_DEFAULT_ARGS: tuple[str, ...] = (
|
|
104
|
+
"--enable-automation",
|
|
105
|
+
"--enable-unsafe-swiftshader",
|
|
106
|
+
)
|
|
107
|
+
"""Default args Playwright/DrissionPage pass that leak automation signals.
|
|
108
|
+
|
|
109
|
+
`--enable-automation` exposes `navigator.webdriver=true`. `--enable-unsafe-
|
|
110
|
+
swiftshader` forces SwiftShader's distinctive WebGL renderer string. We pass
|
|
111
|
+
these to Chromium's launch options so DrissionPage stops setting them."""
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
__all__ = [
|
|
115
|
+
"CLOAK_IGNORE_DEFAULT_ARGS",
|
|
116
|
+
"cloak_available",
|
|
117
|
+
"cloak_stealth_args",
|
|
118
|
+
"ensure_cloak_binary",
|
|
119
|
+
"resolve_engine",
|
|
120
|
+
]
|
|
@@ -180,7 +180,12 @@ class FingerprintFactory:
|
|
|
180
180
|
"pip install git+https://github.com/sarperavci/uaforge.git"
|
|
181
181
|
)
|
|
182
182
|
if allowed_os is None:
|
|
183
|
-
|
|
183
|
+
# Default to Windows regardless of host. The host's actual OS
|
|
184
|
+
# is irrelevant — what matters is which UA + client-hint corpus
|
|
185
|
+
# the fleet's fingerprints are drawn from. Windows is the highest-
|
|
186
|
+
# entropy population (~70% of real browsing traffic) and matches
|
|
187
|
+
# the cloak engine's `--fingerprint-platform=windows` default.
|
|
188
|
+
allowed_os = ("windows",)
|
|
184
189
|
self._gen = UserAgentGenerator(seed=seed)
|
|
185
190
|
self._rng = random.Random(seed)
|
|
186
191
|
self._min_chromium = min_chromium_version
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Smart per-flow proxy router for browser-based automation.
|
|
2
|
+
|
|
3
|
+
The problem this solves: residential proxy bandwidth costs $0.50-1.00/GB.
|
|
4
|
+
A vanilla Chromium load of a Google SERP through a residential proxy burns
|
|
5
|
+
5+ MB per query (Google's own ML model downloads, gstatic CDN assets, JS
|
|
6
|
+
bundles). Only the search-result document actually needs the residential
|
|
7
|
+
IP — Google's bot-wall scores the document fetch, not its subresources.
|
|
8
|
+
|
|
9
|
+
How it works: a local mitmproxy subprocess acts as Chromium's proxy. Per-flow
|
|
10
|
+
the addon decides:
|
|
11
|
+
|
|
12
|
+
- HOST + PATH matches a "paid" rule → forward through the upstream
|
|
13
|
+
residential proxy → captures the IP-reputation signal
|
|
14
|
+
- Otherwise → flow.server_conn.via = None → forward direct → free, doesn't
|
|
15
|
+
affect the SERP fetch
|
|
16
|
+
- Matches a "block" rule → return 204 immediately
|
|
17
|
+
|
|
18
|
+
In our measurements: 1.31 MB → 0.17 MB paid per Google query. 87% reduction.
|
|
19
|
+
|
|
20
|
+
ChromiumWorker spins up a SmartRouter automatically when `BrowserConfig.proxy`
|
|
21
|
+
is set AND `smart_routing=True` (the default). Per-process subprocess. Stats
|
|
22
|
+
(paid/direct/blocked bytes + counts) are written to a temp file and read on
|
|
23
|
+
stop, so callers can attribute cost per task.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
import os
|
|
31
|
+
import shutil
|
|
32
|
+
import signal
|
|
33
|
+
import socket
|
|
34
|
+
import subprocess
|
|
35
|
+
import tempfile
|
|
36
|
+
import textwrap
|
|
37
|
+
import time
|
|
38
|
+
from dataclasses import asdict, dataclass, field
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
from typing import Optional
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class SmartRule:
|
|
47
|
+
"""One routing rule. Fields are AND-ed when both are set.
|
|
48
|
+
|
|
49
|
+
`host_match` is matched as a SUFFIX so `google.com` covers www.google.com,
|
|
50
|
+
images.google.com, etc. Use a leading "." for exact subdomain ("google.com"
|
|
51
|
+
matches "google.com" but ".google.com" matches "*.google.com" only).
|
|
52
|
+
|
|
53
|
+
`path_prefix` is a literal prefix on the URL path. Empty = match any path.
|
|
54
|
+
|
|
55
|
+
`action`:
|
|
56
|
+
- "paid" → forward through the upstream residential proxy
|
|
57
|
+
- "direct" → forward direct, bypassing the upstream
|
|
58
|
+
- "block" → drop the request (returns 204; useful for trackers/ads)
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
host_match: str
|
|
62
|
+
path_prefix: str = ""
|
|
63
|
+
action: str = "paid"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
DEFAULT_RULES: tuple[SmartRule, ...] = (
|
|
67
|
+
SmartRule(host_match="www.google.com", path_prefix="/search", action="paid"),
|
|
68
|
+
SmartRule(host_match="www.bing.com", path_prefix="/search", action="paid"),
|
|
69
|
+
SmartRule(host_match="search.yahoo.com", path_prefix="/search", action="paid"),
|
|
70
|
+
SmartRule(host_match="duckduckgo.com", path_prefix="/?q", action="paid"),
|
|
71
|
+
SmartRule(host_match="duckduckgo.com", path_prefix="/html", action="paid"),
|
|
72
|
+
# Block known-noise endpoints we never want to fetch (also free, so the
|
|
73
|
+
# blocking saves on direct bandwidth + latency, not paid cost).
|
|
74
|
+
SmartRule(host_match="googletagmanager.com", action="block"),
|
|
75
|
+
SmartRule(host_match="googlesyndication.com", action="block"),
|
|
76
|
+
SmartRule(host_match="googleadservices.com", action="block"),
|
|
77
|
+
SmartRule(host_match="doubleclick.net", action="block"),
|
|
78
|
+
SmartRule(host_match="google-analytics.com", action="block"),
|
|
79
|
+
SmartRule(host_match="optimizationguide-pa.googleapis.com", action="block"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class RouterStats:
|
|
85
|
+
paid_bytes: int = 0
|
|
86
|
+
paid_requests: int = 0
|
|
87
|
+
direct_bytes: int = 0
|
|
88
|
+
direct_requests: int = 0
|
|
89
|
+
blocked_requests: int = 0
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _pick_free_port() -> int:
|
|
93
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
94
|
+
s.bind(("127.0.0.1", 0))
|
|
95
|
+
return s.getsockname()[1]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _is_port_open(host: str, port: int) -> bool:
|
|
99
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
100
|
+
s.settimeout(0.15)
|
|
101
|
+
try:
|
|
102
|
+
s.connect((host, port))
|
|
103
|
+
return True
|
|
104
|
+
except (socket.timeout, ConnectionRefusedError, OSError):
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _find_mitmdump() -> Optional[str]:
|
|
109
|
+
"""Locate the mitmdump binary. Returns None if not installed."""
|
|
110
|
+
p = shutil.which("mitmdump")
|
|
111
|
+
if p:
|
|
112
|
+
return p
|
|
113
|
+
# mitmproxy is often pip-installed under the venv's bin
|
|
114
|
+
here = Path(__file__).resolve()
|
|
115
|
+
for candidate in (
|
|
116
|
+
here.parents[2] / ".venv" / "bin" / "mitmdump",
|
|
117
|
+
Path.home() / ".local" / "bin" / "mitmdump",
|
|
118
|
+
):
|
|
119
|
+
if candidate.exists() and os.access(candidate, os.X_OK):
|
|
120
|
+
return str(candidate)
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class SmartRouter:
|
|
126
|
+
"""Embedded mitmproxy subprocess that does per-flow upstream routing.
|
|
127
|
+
|
|
128
|
+
Lifecycle: start() spawns mitmdump, blocks until its listen port is open;
|
|
129
|
+
stop() sends SIGTERM and reads the stats file. Designed for one router
|
|
130
|
+
per ChromiumWorker — cheap to spin up (~150ms cold start).
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
upstream_proxy: str
|
|
134
|
+
"""Upstream residential proxy URL with auth: http://user:pass@host:port."""
|
|
135
|
+
|
|
136
|
+
rules: tuple[SmartRule, ...] = field(default_factory=lambda: DEFAULT_RULES)
|
|
137
|
+
"""Routing rules. First matching rule wins. Default for non-matches: direct."""
|
|
138
|
+
|
|
139
|
+
default_action: str = "direct"
|
|
140
|
+
"""Action when no rule matches. "direct" = free, "paid" = everything paid."""
|
|
141
|
+
|
|
142
|
+
listen_host: str = "127.0.0.1"
|
|
143
|
+
listen_port: int = 0
|
|
144
|
+
"""0 = auto-pick free port."""
|
|
145
|
+
|
|
146
|
+
_proc: Optional[subprocess.Popen] = field(default=None, repr=False, init=False)
|
|
147
|
+
_stats_path: Optional[Path] = field(default=None, repr=False, init=False)
|
|
148
|
+
_addon_path: Optional[Path] = field(default=None, repr=False, init=False)
|
|
149
|
+
_stats: RouterStats = field(default_factory=RouterStats, init=False)
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def proxy_url(self) -> str:
|
|
153
|
+
return f"http://{self.listen_host}:{self.listen_port}"
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def stats(self) -> RouterStats:
|
|
157
|
+
return self._stats
|
|
158
|
+
|
|
159
|
+
def start(self, *, startup_timeout: float = 10.0) -> None:
|
|
160
|
+
mitmdump = _find_mitmdump()
|
|
161
|
+
if mitmdump is None:
|
|
162
|
+
raise RuntimeError(
|
|
163
|
+
"SmartRouter requires mitmproxy. "
|
|
164
|
+
"Install with: pip install 'fleet-framework[browser]'"
|
|
165
|
+
)
|
|
166
|
+
if self.listen_port == 0:
|
|
167
|
+
self.listen_port = _pick_free_port()
|
|
168
|
+
|
|
169
|
+
# Strip auth from upstream URL and pass separately.
|
|
170
|
+
from urllib.parse import urlparse
|
|
171
|
+
u = urlparse(self.upstream_proxy)
|
|
172
|
+
upstream_clean = f"{u.scheme}://{u.hostname}:{u.port or 80}"
|
|
173
|
+
upstream_auth = f"{u.username}:{u.password}" if u.username else None
|
|
174
|
+
|
|
175
|
+
# Persist stats to a tmp file the subprocess writes to.
|
|
176
|
+
self._stats_path = Path(tempfile.mktemp(prefix="smart-router-stats-", suffix=".json"))
|
|
177
|
+
# Generate the addon script — embeds the rules so the subprocess has them.
|
|
178
|
+
rules_repr = [asdict(r) for r in self.rules]
|
|
179
|
+
self._addon_path = Path(tempfile.mktemp(prefix="smart-router-addon-", suffix=".py"))
|
|
180
|
+
self._addon_path.write_text(_render_addon_script(
|
|
181
|
+
rules_repr, self.default_action, str(self._stats_path)
|
|
182
|
+
))
|
|
183
|
+
|
|
184
|
+
cmd = [
|
|
185
|
+
mitmdump,
|
|
186
|
+
"-p", str(self.listen_port),
|
|
187
|
+
"--mode", f"upstream:{upstream_clean}",
|
|
188
|
+
"--set", "ssl_insecure=true",
|
|
189
|
+
"-s", str(self._addon_path),
|
|
190
|
+
"--quiet",
|
|
191
|
+
]
|
|
192
|
+
if upstream_auth:
|
|
193
|
+
cmd.extend(["--upstream-auth", upstream_auth])
|
|
194
|
+
|
|
195
|
+
self._proc = subprocess.Popen(
|
|
196
|
+
cmd,
|
|
197
|
+
stdout=subprocess.DEVNULL,
|
|
198
|
+
stderr=subprocess.PIPE,
|
|
199
|
+
start_new_session=True,
|
|
200
|
+
)
|
|
201
|
+
# Wait for the listen socket.
|
|
202
|
+
deadline = time.monotonic() + startup_timeout
|
|
203
|
+
while time.monotonic() < deadline:
|
|
204
|
+
if _is_port_open(self.listen_host, self.listen_port):
|
|
205
|
+
logger.info(
|
|
206
|
+
"smart-router @ %s, %d rules, upstream=%s",
|
|
207
|
+
self.proxy_url, len(self.rules), upstream_clean,
|
|
208
|
+
)
|
|
209
|
+
return
|
|
210
|
+
if self._proc.poll() is not None:
|
|
211
|
+
err = (self._proc.stderr.read() if self._proc.stderr else b"").decode(errors="replace")
|
|
212
|
+
self._cleanup_files()
|
|
213
|
+
raise RuntimeError(f"SmartRouter mitmdump exited early: {err[:400]}")
|
|
214
|
+
time.sleep(0.1)
|
|
215
|
+
self.stop()
|
|
216
|
+
raise RuntimeError(f"SmartRouter failed to start within {startup_timeout}s")
|
|
217
|
+
|
|
218
|
+
def stop(self) -> None:
|
|
219
|
+
# Read stats BEFORE killing the process — the addon flushes on every
|
|
220
|
+
# response, so the file is current up to the last completed flow.
|
|
221
|
+
self._read_stats()
|
|
222
|
+
if self._proc is not None:
|
|
223
|
+
try:
|
|
224
|
+
# Term the whole process group; mitmdump spawns helper threads.
|
|
225
|
+
os.killpg(os.getpgid(self._proc.pid), signal.SIGTERM)
|
|
226
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
227
|
+
pass
|
|
228
|
+
try:
|
|
229
|
+
self._proc.wait(timeout=3)
|
|
230
|
+
except subprocess.TimeoutExpired:
|
|
231
|
+
try:
|
|
232
|
+
os.killpg(os.getpgid(self._proc.pid), signal.SIGKILL)
|
|
233
|
+
except (ProcessLookupError, OSError):
|
|
234
|
+
pass
|
|
235
|
+
self._proc.wait(timeout=2)
|
|
236
|
+
self._proc = None
|
|
237
|
+
# Re-read stats one more time in case any flows finished between the
|
|
238
|
+
# first read and the SIGTERM.
|
|
239
|
+
self._read_stats()
|
|
240
|
+
self._cleanup_files()
|
|
241
|
+
logger.info(
|
|
242
|
+
"smart-router stopped. paid=%d B / %d req, direct=%d B / %d req, blocked=%d req",
|
|
243
|
+
self._stats.paid_bytes, self._stats.paid_requests,
|
|
244
|
+
self._stats.direct_bytes, self._stats.direct_requests,
|
|
245
|
+
self._stats.blocked_requests,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def __enter__(self) -> "SmartRouter":
|
|
249
|
+
self.start()
|
|
250
|
+
return self
|
|
251
|
+
|
|
252
|
+
def __exit__(self, *exc) -> None:
|
|
253
|
+
self.stop()
|
|
254
|
+
|
|
255
|
+
def _read_stats(self) -> None:
|
|
256
|
+
if self._stats_path is None or not self._stats_path.exists():
|
|
257
|
+
return
|
|
258
|
+
try:
|
|
259
|
+
data = json.loads(self._stats_path.read_text() or "{}")
|
|
260
|
+
self._stats = RouterStats(
|
|
261
|
+
paid_bytes=int(data.get("paid_bytes", 0)),
|
|
262
|
+
paid_requests=int(data.get("paid_requests", 0)),
|
|
263
|
+
direct_bytes=int(data.get("direct_bytes", 0)),
|
|
264
|
+
direct_requests=int(data.get("direct_requests", 0)),
|
|
265
|
+
blocked_requests=int(data.get("blocked_requests", 0)),
|
|
266
|
+
)
|
|
267
|
+
except (json.JSONDecodeError, ValueError, OSError):
|
|
268
|
+
logger.debug("smart-router stats file unreadable", exc_info=True)
|
|
269
|
+
|
|
270
|
+
def _cleanup_files(self) -> None:
|
|
271
|
+
for p in (self._stats_path, self._addon_path):
|
|
272
|
+
if p is not None and p.exists():
|
|
273
|
+
try:
|
|
274
|
+
p.unlink()
|
|
275
|
+
except OSError:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _render_addon_script(rules: list[dict], default_action: str, stats_path: str) -> str:
|
|
280
|
+
"""Generate the mitmproxy addon script. Embeds the rules as a literal so
|
|
281
|
+
the subprocess has them at startup without a separate config file."""
|
|
282
|
+
return textwrap.dedent(f"""
|
|
283
|
+
import json
|
|
284
|
+
from mitmproxy import http
|
|
285
|
+
from pathlib import Path
|
|
286
|
+
|
|
287
|
+
RULES = {json.dumps(rules)}
|
|
288
|
+
DEFAULT_ACTION = {default_action!r}
|
|
289
|
+
STATS_PATH = {stats_path!r}
|
|
290
|
+
|
|
291
|
+
_stats = {{
|
|
292
|
+
"paid_bytes": 0, "paid_requests": 0,
|
|
293
|
+
"direct_bytes": 0, "direct_requests": 0,
|
|
294
|
+
"blocked_requests": 0,
|
|
295
|
+
}}
|
|
296
|
+
|
|
297
|
+
def _host_matches(host, pattern):
|
|
298
|
+
return host == pattern or host.endswith("." + pattern) or host.endswith(pattern)
|
|
299
|
+
|
|
300
|
+
def _pick_action(host, path):
|
|
301
|
+
for r in RULES:
|
|
302
|
+
if not _host_matches(host, r["host_match"]):
|
|
303
|
+
continue
|
|
304
|
+
if r["path_prefix"] and not path.startswith(r["path_prefix"]):
|
|
305
|
+
continue
|
|
306
|
+
return r["action"]
|
|
307
|
+
return DEFAULT_ACTION
|
|
308
|
+
|
|
309
|
+
def _persist():
|
|
310
|
+
try:
|
|
311
|
+
Path(STATS_PATH).write_text(json.dumps(_stats))
|
|
312
|
+
except OSError:
|
|
313
|
+
pass
|
|
314
|
+
|
|
315
|
+
def request(flow):
|
|
316
|
+
host = flow.request.pretty_host
|
|
317
|
+
path = flow.request.path
|
|
318
|
+
action = _pick_action(host, path)
|
|
319
|
+
flow.metadata["smart_action"] = action
|
|
320
|
+
if action == "block":
|
|
321
|
+
_stats["blocked_requests"] += 1
|
|
322
|
+
flow.response = http.Response.make(
|
|
323
|
+
204, b"", {{"x-smart-router": "blocked"}}
|
|
324
|
+
)
|
|
325
|
+
_persist()
|
|
326
|
+
return
|
|
327
|
+
if action == "direct" and flow.server_conn.via is not None:
|
|
328
|
+
try:
|
|
329
|
+
flow.server_conn.via = None
|
|
330
|
+
except Exception:
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
def response(flow):
|
|
334
|
+
action = flow.metadata.get("smart_action", "direct")
|
|
335
|
+
body = flow.response.raw_content or b""
|
|
336
|
+
resp_h = sum(len(k) + len(v) + 4 for k, v in flow.response.headers.items())
|
|
337
|
+
req_h = sum(len(k) + len(v) + 4 for k, v in flow.request.headers.items())
|
|
338
|
+
wire = len(body) + resp_h + req_h + len(flow.request.raw_content or b"")
|
|
339
|
+
if action == "paid":
|
|
340
|
+
_stats["paid_bytes"] += wire
|
|
341
|
+
_stats["paid_requests"] += 1
|
|
342
|
+
elif action == "direct":
|
|
343
|
+
_stats["direct_bytes"] += wire
|
|
344
|
+
_stats["direct_requests"] += 1
|
|
345
|
+
_persist()
|
|
346
|
+
""").strip()
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
__all__ = [
|
|
350
|
+
"DEFAULT_RULES",
|
|
351
|
+
"RouterStats",
|
|
352
|
+
"SmartRouter",
|
|
353
|
+
"SmartRule",
|
|
354
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -21,8 +21,12 @@ Requires-Dist: click>=8.1.0
|
|
|
21
21
|
Provides-Extra: browser
|
|
22
22
|
Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
|
+
Requires-Dist: mitmproxy>=12.0; extra == "browser"
|
|
24
25
|
Provides-Extra: cloudflare
|
|
25
26
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
27
|
+
Provides-Extra: cloak
|
|
28
|
+
Requires-Dist: fleet-framework[browser]; extra == "cloak"
|
|
29
|
+
Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
|
|
26
30
|
Provides-Extra: otel
|
|
27
31
|
Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
|
|
28
32
|
Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
|
|
@@ -31,7 +35,7 @@ Provides-Extra: test
|
|
|
31
35
|
Requires-Dist: pytest>=8.0; extra == "test"
|
|
32
36
|
Requires-Dist: pytest-asyncio>=0.23; extra == "test"
|
|
33
37
|
Provides-Extra: all
|
|
34
|
-
Requires-Dist: fleet-framework[browser,cloudflare,otel]; extra == "all"
|
|
38
|
+
Requires-Dist: fleet-framework[browser,cloak,cloudflare,otel]; extra == "all"
|
|
35
39
|
Dynamic: license-file
|
|
36
40
|
|
|
37
41
|
# Fleet
|
|
@@ -42,10 +42,12 @@ fleet/worker/ws_client.py
|
|
|
42
42
|
fleet_browser/__init__.py
|
|
43
43
|
fleet_browser/browser.py
|
|
44
44
|
fleet_browser/cert.py
|
|
45
|
+
fleet_browser/cloak.py
|
|
45
46
|
fleet_browser/fingerprint.py
|
|
46
47
|
fleet_browser/humanizer.py
|
|
47
48
|
fleet_browser/pool.py
|
|
48
49
|
fleet_browser/proxy_extension.py
|
|
50
|
+
fleet_browser/smart_router.py
|
|
49
51
|
fleet_browser/solver.py
|
|
50
52
|
fleet_browser/stealth.py
|
|
51
53
|
fleet_cloudflare/__init__.py
|
|
@@ -9,11 +9,16 @@ psutil>=5.9.0
|
|
|
9
9
|
click>=8.1.0
|
|
10
10
|
|
|
11
11
|
[all]
|
|
12
|
-
fleet-framework[browser,cloudflare,otel]
|
|
12
|
+
fleet-framework[browser,cloak,cloudflare,otel]
|
|
13
13
|
|
|
14
14
|
[browser]
|
|
15
15
|
DrissionPage>=4.1.0
|
|
16
16
|
cryptography>=42.0.0
|
|
17
|
+
mitmproxy>=12.0
|
|
18
|
+
|
|
19
|
+
[cloak]
|
|
20
|
+
fleet-framework[browser]
|
|
21
|
+
cloakbrowser>=0.3
|
|
17
22
|
|
|
18
23
|
[cloudflare]
|
|
19
24
|
fleet-framework[browser]
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "fleet-framework"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -26,10 +26,15 @@ dependencies = [
|
|
|
26
26
|
browser = [
|
|
27
27
|
"DrissionPage>=4.1.0",
|
|
28
28
|
"cryptography>=42.0.0",
|
|
29
|
+
"mitmproxy>=12.0", # SmartRouter subprocess for per-flow proxy routing
|
|
29
30
|
]
|
|
30
31
|
cloudflare = [
|
|
31
32
|
"fleet-framework[browser]",
|
|
32
33
|
]
|
|
34
|
+
cloak = [
|
|
35
|
+
"fleet-framework[browser]",
|
|
36
|
+
"cloakbrowser>=0.3",
|
|
37
|
+
]
|
|
33
38
|
otel = [
|
|
34
39
|
"opentelemetry-api>=1.27.0",
|
|
35
40
|
"opentelemetry-sdk>=1.27.0",
|
|
@@ -40,7 +45,7 @@ test = [
|
|
|
40
45
|
"pytest-asyncio>=0.23",
|
|
41
46
|
]
|
|
42
47
|
all = [
|
|
43
|
-
"fleet-framework[browser,cloudflare,otel]",
|
|
48
|
+
"fleet-framework[browser,cloudflare,cloak,otel]",
|
|
44
49
|
]
|
|
45
50
|
|
|
46
51
|
[project.scripts]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|