fleet-framework 0.1.1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/PKG-INFO +6 -2
  2. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/__init__.py +5 -0
  3. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/browser.py +113 -4
  4. fleet_framework-0.2.0/fleet_browser/cloak.py +120 -0
  5. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/fingerprint.py +6 -1
  6. fleet_framework-0.2.0/fleet_browser/smart_router.py +354 -0
  7. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/PKG-INFO +6 -2
  8. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/SOURCES.txt +2 -0
  9. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/requires.txt +6 -1
  10. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/pyproject.toml +7 -2
  11. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/LICENSE +0 -0
  12. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/README.md +0 -0
  13. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/__init__.py +0 -0
  14. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/cli.py +0 -0
  15. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/__init__.py +0 -0
  16. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/automation.py +0 -0
  17. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/backend.py +0 -0
  18. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/config.py +0 -0
  19. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/context.py +0 -0
  20. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/contract.py +0 -0
  21. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/country_presets.py +0 -0
  22. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/events.py +0 -0
  23. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/local_runner.py +0 -0
  24. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/logging.py +0 -0
  25. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/memory_backend.py +0 -0
  26. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/metrics.py +0 -0
  27. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/otel.py +0 -0
  28. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/primitives.py +0 -0
  29. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/protocol.py +0 -0
  30. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/proxy.py +0 -0
  31. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/reconcile.py +0 -0
  32. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/sqlite_backend.py +0 -0
  33. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/core/store.py +0 -0
  34. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/__init__.py +0 -0
  35. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/api.py +0 -0
  36. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/app.py +0 -0
  37. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/auth.py +0 -0
  38. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/broadcaster.py +0 -0
  39. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/__init__.py +0 -0
  40. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/router.py +0 -0
  41. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/static/style.css +0 -0
  42. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/dashboard/templates/index.html +0 -0
  43. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/metrics_route.py +0 -0
  44. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/ratelimit.py +0 -0
  45. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/master/ws_router.py +0 -0
  46. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/__init__.py +0 -0
  47. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/agent.py +0 -0
  48. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/reconcile_loop.py +0 -0
  49. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/slot_runner.py +0 -0
  50. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet/worker/ws_client.py +0 -0
  51. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/cert.py +0 -0
  52. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/humanizer.py +0 -0
  53. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/pool.py +0 -0
  54. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/proxy_extension.py +0 -0
  55. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/solver.py +0 -0
  56. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_browser/stealth.py +0 -0
  57. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/__init__.py +0 -0
  58. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/bypasser.py +0 -0
  59. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/harvest.py +0 -0
  60. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/replay.py +0 -0
  61. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_cloudflare/solver.py +0 -0
  62. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_content/__init__.py +0 -0
  63. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_content/automation.py +0 -0
  64. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_content/contracts.py +0 -0
  65. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_detect/__init__.py +0 -0
  66. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_detect/contracts.py +0 -0
  67. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_detect/detect.py +0 -0
  68. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/dependency_links.txt +0 -0
  69. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/entry_points.txt +0 -0
  70. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_framework.egg-info/top_level.txt +0 -0
  71. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_headers/__init__.py +0 -0
  72. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_headers/profiles.py +0 -0
  73. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_jobs/__init__.py +0 -0
  74. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_jobs/automation.py +0 -0
  75. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_jobs/contracts.py +0 -0
  76. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_marketplace/__init__.py +0 -0
  77. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_marketplace/automation.py +0 -0
  78. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_marketplace/contracts.py +0 -0
  79. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_news/__init__.py +0 -0
  80. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_news/automation.py +0 -0
  81. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_news/contracts.py +0 -0
  82. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_place/__init__.py +0 -0
  83. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_place/automation.py +0 -0
  84. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_place/contracts.py +0 -0
  85. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_provider_dataimpulse/__init__.py +0 -0
  86. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_provider_evomi/__init__.py +0 -0
  87. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_serp/__init__.py +0 -0
  88. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_serp/automation.py +0 -0
  89. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_serp/contracts.py +0 -0
  90. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_social/__init__.py +0 -0
  91. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_social/automation.py +0 -0
  92. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/fleet_social/contracts.py +0 -0
  93. {fleet_framework-0.1.1 → fleet_framework-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-framework
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
5
5
  Author: Sarper Avci
6
6
  License: MIT
@@ -21,8 +21,12 @@ Requires-Dist: click>=8.1.0
21
21
  Provides-Extra: browser
22
22
  Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
23
23
  Requires-Dist: cryptography>=42.0.0; extra == "browser"
24
+ Requires-Dist: mitmproxy>=12.0; extra == "browser"
24
25
  Provides-Extra: cloudflare
25
26
  Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
27
+ Provides-Extra: cloak
28
+ Requires-Dist: fleet-framework[browser]; extra == "cloak"
29
+ Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
26
30
  Provides-Extra: otel
27
31
  Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
28
32
  Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
@@ -31,7 +35,7 @@ Provides-Extra: test
31
35
  Requires-Dist: pytest>=8.0; extra == "test"
32
36
  Requires-Dist: pytest-asyncio>=0.23; extra == "test"
33
37
  Provides-Extra: all
34
- Requires-Dist: fleet-framework[browser,cloudflare,otel]; extra == "all"
38
+ Requires-Dist: fleet-framework[browser,cloak,cloudflare,otel]; extra == "all"
35
39
  Dynamic: license-file
36
40
 
37
41
  # Fleet
@@ -3,9 +3,11 @@ from fleet_browser.fingerprint import Fingerprint, FingerprintFactory
3
3
  from fleet_browser.humanizer import Humanizer
4
4
  from fleet_browser.pool import BrowserPool, slot
5
5
  from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
6
+ from fleet_browser.smart_router import DEFAULT_RULES, RouterStats, SmartRouter, SmartRule
6
7
  from fleet_browser.stealth import FingerprintStealth, NoOpStealth, Stealth
7
8
 
8
9
  __all__ = [
10
+ "DEFAULT_RULES",
9
11
  "BrowserConfig",
10
12
  "BrowserPool",
11
13
  "ChromiumWorker",
@@ -14,6 +16,9 @@ __all__ = [
14
16
  "FingerprintStealth",
15
17
  "Humanizer",
16
18
  "NoOpStealth",
19
+ "RouterStats",
20
+ "SmartRouter",
21
+ "SmartRule",
17
22
  "Stealth",
18
23
  "build_proxy_auth_extension",
19
24
  "parse_proxy_url",
@@ -13,7 +13,14 @@ from typing import Optional
13
13
  import psutil
14
14
  from DrissionPage import ChromiumOptions, ChromiumPage
15
15
 
16
+ from fleet_browser.cloak import (
17
+ CLOAK_IGNORE_DEFAULT_ARGS,
18
+ cloak_stealth_args,
19
+ ensure_cloak_binary,
20
+ resolve_engine,
21
+ )
16
22
  from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
23
+ from fleet_browser.smart_router import DEFAULT_RULES, SmartRouter, SmartRule
17
24
  from fleet_browser.stealth import Stealth
18
25
 
19
26
  logger = logging.getLogger(__name__)
@@ -120,12 +127,64 @@ class BrowserConfig:
120
127
 
121
128
  extra_args: tuple[str, ...] = ()
122
129
 
130
+ engine: str = "auto"
131
+ """Which Chromium build to launch. `"auto"` picks `"cloak"` when the
132
+ cloakbrowser package is installed, else falls back to `"chrome"`.
133
+ Set explicitly to override:
134
+ - `"chrome"`: use the system Chromium / Brave / Thorium auto-detect path
135
+ - `"cloak"`: use the CloakBrowser patched binary (downloads on first
136
+ use, raises if `cloakbrowser` isn't installed)
137
+ Cloak applies source-level canvas/WebGL/audio/font/WebRTC patches that
138
+ JS-level stealth can't reach. Stack FingerprintStealth on top to add
139
+ per-launch UA/version diversity from the uaforge corpus."""
140
+
141
+ cloak_cache_dir: Optional[str] = None
142
+ """Override the cloak binary cache dir (default ~/.cloakbrowser/)."""
143
+
144
+ cloak_fingerprint_seed: Optional[int] = None
145
+ """Force a specific cloak --fingerprint=N seed. Default: random per
146
+ launch. Pin a seed for reproducible tests."""
147
+
148
+ cloak_platform: str = "windows"
149
+ """`--fingerprint-platform` flag for cloak's binary. Must match the
150
+ OS family FingerprintStealth's UA claims (uaforge default is Windows).
151
+ Set 'macos' if you switch uaforge to a Mac corpus."""
152
+
153
+ cloak_timezone: Optional[str] = None
154
+ """IANA timezone for cloak's `--fingerprint-timezone` flag. Set when
155
+ using residential proxies so the spoofed timezone matches the egress
156
+ geo (otherwise CF flags the mismatch). Falls back to system tz."""
157
+
158
+ cloak_webrtc_ip: Optional[str] = None
159
+ """Public IP cloak should advertise via WebRTC. Set to the proxy's exit
160
+ IP when using residential — otherwise WebRTC leaks your real LAN IP
161
+ and CF correlates the mismatch."""
162
+
163
+ smart_routing: bool = True
164
+ """When True AND `proxy` is set, spawn a local mitmproxy that does per-flow
165
+ routing: only the rules-matching flows (e.g. /search) go through the paid
166
+ upstream, everything else goes direct. Cuts residential bandwidth ~85–95%
167
+ on Google/Bing/etc. without changing the SERP fetch's IP. Disable only when
168
+ you specifically need every byte through the proxy (rare)."""
169
+
170
+ smart_routing_rules: tuple[SmartRule, ...] = DEFAULT_RULES
171
+ """Per-flow routing rules. Built-ins handle Google/Bing/Yahoo/DDG /search
172
+ plus a block-list of trackers. Plugins can extend with additional rules
173
+ via tuple concat: `DEFAULT_RULES + (SmartRule(host="amazon.com", path="/s"),)`."""
174
+
123
175
 
124
176
  class ChromiumWorker:
125
177
 
126
178
  def __init__(self, config: BrowserConfig) -> None:
127
179
  self.config = config
128
180
  self.page: Optional[ChromiumPage] = None
181
+ self._smart_router: Optional[SmartRouter] = None
182
+
183
+ @property
184
+ def smart_router(self) -> Optional[SmartRouter]:
185
+ """The active SmartRouter for this worker, or None if smart routing
186
+ wasn't engaged. Stats are populated after `stop()` runs."""
187
+ return self._smart_router
129
188
  self._user_data_dir: Optional[Path] = None
130
189
  self._owns_user_data_dir = False
131
190
 
@@ -133,9 +192,16 @@ class ChromiumWorker:
133
192
  if self.page is not None:
134
193
  return self.page
135
194
 
136
- binary = self.config.browser_binary or _find_browser_binary(
137
- prefer=self.config.prefer_browser,
138
- )
195
+ engine = resolve_engine(self.config.engine)
196
+ if engine == "cloak":
197
+ # Caller may still pin browser_binary explicitly — honor it.
198
+ binary = self.config.browser_binary or ensure_cloak_binary(
199
+ self.config.cloak_cache_dir
200
+ )
201
+ else:
202
+ binary = self.config.browser_binary or _find_browser_binary(
203
+ prefer=self.config.prefer_browser,
204
+ )
139
205
 
140
206
  self._user_data_dir = Path(tempfile.mkdtemp(prefix="chromium-worker-"))
141
207
  self._owns_user_data_dir = True
@@ -147,6 +213,23 @@ class ChromiumWorker:
147
213
  opts.set_user_data_path(str(self._user_data_dir))
148
214
  opts.set_argument(f"--window-size={self.config.window_size[0]},{self.config.window_size[1]}")
149
215
 
216
+ if engine == "cloak":
217
+ for arg in cloak_stealth_args(
218
+ platform=self.config.cloak_platform,
219
+ timezone=self.config.cloak_timezone,
220
+ webrtc_ip=self.config.cloak_webrtc_ip,
221
+ seed=self.config.cloak_fingerprint_seed,
222
+ ):
223
+ opts.set_argument(arg)
224
+ # Suppress Chromium defaults that leak automation signals.
225
+ # DrissionPage doesn't expose ignore_default_args; pass as
226
+ # --disable-features-replacement via plain args. The cloak
227
+ # binary recognises --no-enable-automation as an inversion.
228
+ for kill in CLOAK_IGNORE_DEFAULT_ARGS:
229
+ # Chromium accepts --disable-foo as the inverse of --enable-foo.
230
+ inv = kill.replace("--enable-", "--disable-", 1)
231
+ opts.set_argument(inv)
232
+
150
233
  # HttpsUpgrades is off so self-signed HTTPS (TokenServer) isn't rewritten.
151
234
  # Brave's ad/tracker/sync features both pollute the fingerprint and
152
235
  # sometimes break Turnstile's iframe; disable them when running Brave.
@@ -180,7 +263,26 @@ class ChromiumWorker:
180
263
  opts.set_argument("--headless=new")
181
264
 
182
265
  if self.config.proxy:
183
- proxy_spec = parse_proxy_url(self.config.proxy)
266
+ # When smart_routing is on, spawn a SmartRouter and point Chromium
267
+ # at THAT instead of the upstream proxy. The router decides per-flow
268
+ # whether to forward upstream (paid) or direct (free). 85-95%
269
+ # bandwidth reduction on search engines.
270
+ effective_proxy = self.config.proxy
271
+ if self.config.smart_routing:
272
+ try:
273
+ self._smart_router = SmartRouter(
274
+ upstream_proxy=self.config.proxy,
275
+ rules=self.config.smart_routing_rules,
276
+ )
277
+ self._smart_router.start()
278
+ effective_proxy = self._smart_router.proxy_url
279
+ except Exception:
280
+ logger.exception(
281
+ "smart-router failed to start, falling back to direct upstream proxy"
282
+ )
283
+ self._smart_router = None
284
+
285
+ proxy_spec = parse_proxy_url(effective_proxy)
184
286
  # Chromium 122+ accepts user:pass in --proxy-server. The MV2
185
287
  # auth-extension is kept as a fallback for older Chromium.
186
288
  if proxy_spec.has_auth:
@@ -249,6 +351,13 @@ class ChromiumWorker:
249
351
  pass
250
352
  self.page = None
251
353
 
354
+ if self._smart_router is not None:
355
+ try:
356
+ self._smart_router.stop()
357
+ except Exception:
358
+ logger.exception("[browser] smart-router stop failed")
359
+ # Keep the reference so callers can read stats after stop.
360
+
252
361
  # SIGKILL anything still running with our --user-data-dir; page.quit
253
362
  # sometimes leaves renderer/zygote/utility children orphaned to PID 1.
254
363
  if self._user_data_dir is not None:
@@ -0,0 +1,120 @@
1
+ """Cloak engine integration — uses the CloakBrowser patched Chromium binary.
2
+
3
+ CloakBrowser is a separately-licensed binary (free for own-business use,
4
+ OEM license required for browser-as-a-service distribution). See:
5
+ https://github.com/CloakHQ/CloakBrowser
6
+
7
+ The wrapper Python package is MIT-licensed and pulled in via the optional
8
+ extra: `pip install fleet-framework[cloak]`. The binary is downloaded on
9
+ first use to ~/.cloakbrowser/ (override with CLOAKBROWSER_CACHE_DIR).
10
+
11
+ Combining cloak (source-level canvas/WebGL/audio/font/WebRTC patches) with
12
+ FingerprintStealth (CDP-level UA + Sec-CH-UA + navigator overrides) gives:
13
+ - Deep, JS-undetectable spoofing of low-level signals (cloak)
14
+ - Per-launch diversity of UA versions from the uaforge corpus (FingerprintStealth)
15
+
16
+ That diversity matters: cloak's binary patches use the latest Chrome only,
17
+ so without the CDP layer every harvest looks like the same Chrome version.
18
+ With CDP override on top, each launch picks a different version from uaforge
19
+ while keeping the renderer-level fingerprint coherent.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ import os
25
+ import random
26
+ from pathlib import Path
27
+ from typing import Optional
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ _CLOAK_AVAILABLE: Optional[bool] = None
33
+
34
+
35
+ def cloak_available() -> bool:
36
+ """Return True if the cloakbrowser wrapper package is importable.
37
+
38
+ The binary may still need to be downloaded — that's `ensure_cloak_binary`'s
39
+ job. This is just the cheap import probe.
40
+ """
41
+ global _CLOAK_AVAILABLE
42
+ if _CLOAK_AVAILABLE is None:
43
+ try:
44
+ import cloakbrowser # noqa: F401
45
+ _CLOAK_AVAILABLE = True
46
+ except ImportError:
47
+ _CLOAK_AVAILABLE = False
48
+ return _CLOAK_AVAILABLE
49
+
50
+
51
+ def resolve_engine(name: str) -> str:
52
+ """Resolve `engine="auto"` to `"cloak"` if available, else `"chrome"`."""
53
+ if name != "auto":
54
+ return name
55
+ return "cloak" if cloak_available() else "chrome"
56
+
57
+
58
+ def ensure_cloak_binary(cache_dir: Optional[str] = None) -> str:
59
+ """Return path to the patched Chromium binary; download if missing.
60
+
61
+ Delegates to cloakbrowser.download.ensure_binary. ~250-400 MB download
62
+ on first use; cached at ~/.cloakbrowser/ (or `cache_dir`) thereafter.
63
+ Cloakbrowser checks for updates hourly when launched.
64
+ """
65
+ if not cloak_available():
66
+ raise RuntimeError(
67
+ "engine='cloak' requires cloakbrowser. "
68
+ "Install with: pip install fleet-framework[cloak]"
69
+ )
70
+ if cache_dir:
71
+ os.environ.setdefault("CLOAKBROWSER_CACHE_DIR", str(Path(cache_dir).expanduser()))
72
+ from cloakbrowser.download import ensure_binary
73
+ path = ensure_binary()
74
+ return str(path)
75
+
76
+
77
+ def cloak_stealth_args(
78
+ *,
79
+ platform: str = "windows",
80
+ timezone: Optional[str] = None,
81
+ webrtc_ip: Optional[str] = None,
82
+ seed: Optional[int] = None,
83
+ ) -> tuple[str, ...]:
84
+ """Return Chromium command-line flags that activate cloak's source patches.
85
+
86
+ The `seed` drives canvas/WebGL/audio noise variation per-launch. Pass a
87
+ stable seed if you need reproducibility (e.g. testing). `platform` should
88
+ match what FingerprintStealth's UA claims — keep them aligned (default
89
+ 'windows' matches uaforge's Windows corpus).
90
+ """
91
+ s = seed if seed is not None else random.randint(10000, 99999)
92
+ args = [
93
+ f"--fingerprint={s}",
94
+ f"--fingerprint-platform={platform}",
95
+ ]
96
+ if timezone:
97
+ args.append(f"--fingerprint-timezone={timezone}")
98
+ if webrtc_ip:
99
+ args.append(f"--fingerprint-webrtc-ip={webrtc_ip}")
100
+ return tuple(args)
101
+
102
+
103
+ CLOAK_IGNORE_DEFAULT_ARGS: tuple[str, ...] = (
104
+ "--enable-automation",
105
+ "--enable-unsafe-swiftshader",
106
+ )
107
+ """Default args Playwright/DrissionPage pass that leak automation signals.
108
+
109
+ `--enable-automation` exposes `navigator.webdriver=true`. `--enable-unsafe-
110
+ swiftshader` forces SwiftShader's distinctive WebGL renderer string. We pass
111
+ these to Chromium's launch options so DrissionPage stops setting them."""
112
+
113
+
114
+ __all__ = [
115
+ "CLOAK_IGNORE_DEFAULT_ARGS",
116
+ "cloak_available",
117
+ "cloak_stealth_args",
118
+ "ensure_cloak_binary",
119
+ "resolve_engine",
120
+ ]
@@ -180,7 +180,12 @@ class FingerprintFactory:
180
180
  "pip install git+https://github.com/sarperavci/uaforge.git"
181
181
  )
182
182
  if allowed_os is None:
183
- allowed_os = (_detect_host_os(),)
183
+ # Default to Windows regardless of host. The host's actual OS
184
+ # is irrelevant — what matters is which UA + client-hint corpus
185
+ # the fleet's fingerprints are drawn from. Windows is the highest-
186
+ # entropy population (~70% of real browsing traffic) and matches
187
+ # the cloak engine's `--fingerprint-platform=windows` default.
188
+ allowed_os = ("windows",)
184
189
  self._gen = UserAgentGenerator(seed=seed)
185
190
  self._rng = random.Random(seed)
186
191
  self._min_chromium = min_chromium_version
@@ -0,0 +1,354 @@
1
+ """Smart per-flow proxy router for browser-based automation.
2
+
3
+ The problem this solves: residential proxy bandwidth costs $0.50-1.00/GB.
4
+ A vanilla Chromium load of a Google SERP through a residential proxy burns
5
+ 5+ MB per query (Google's own ML model downloads, gstatic CDN assets, JS
6
+ bundles). Only the search-result document actually needs the residential
7
+ IP — Google's bot-wall scores the document fetch, not its subresources.
8
+
9
+ How it works: a local mitmproxy subprocess acts as Chromium's proxy. Per-flow
10
+ the addon decides:
11
+
12
+ - HOST + PATH matches a "paid" rule → forward through the upstream
13
+ residential proxy → captures the IP-reputation signal
14
+ - Otherwise → flow.server_conn.via = None → forward direct → free, doesn't
15
+ affect the SERP fetch
16
+ - Matches a "block" rule → return 204 immediately
17
+
18
+ In our measurements: 1.31 MB → 0.17 MB paid per Google query. 87% reduction.
19
+
20
+ ChromiumWorker spins up a SmartRouter automatically when `BrowserConfig.proxy`
21
+ is set AND `smart_routing=True` (the default). Per-process subprocess. Stats
22
+ (paid/direct/blocked bytes + counts) are written to a temp file and read on
23
+ stop, so callers can attribute cost per task.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import logging
30
+ import os
31
+ import shutil
32
+ import signal
33
+ import socket
34
+ import subprocess
35
+ import tempfile
36
+ import textwrap
37
+ import time
38
+ from dataclasses import asdict, dataclass, field
39
+ from pathlib import Path
40
+ from typing import Optional
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class SmartRule:
47
+ """One routing rule. Fields are AND-ed when both are set.
48
+
49
+ `host_match` is matched as a SUFFIX so `google.com` covers www.google.com,
50
+ images.google.com, etc. Use a leading "." for exact subdomain ("google.com"
51
+ matches "google.com" but ".google.com" matches "*.google.com" only).
52
+
53
+ `path_prefix` is a literal prefix on the URL path. Empty = match any path.
54
+
55
+ `action`:
56
+ - "paid" → forward through the upstream residential proxy
57
+ - "direct" → forward direct, bypassing the upstream
58
+ - "block" → drop the request (returns 204; useful for trackers/ads)
59
+ """
60
+
61
+ host_match: str
62
+ path_prefix: str = ""
63
+ action: str = "paid"
64
+
65
+
66
+ DEFAULT_RULES: tuple[SmartRule, ...] = (
67
+ SmartRule(host_match="www.google.com", path_prefix="/search", action="paid"),
68
+ SmartRule(host_match="www.bing.com", path_prefix="/search", action="paid"),
69
+ SmartRule(host_match="search.yahoo.com", path_prefix="/search", action="paid"),
70
+ SmartRule(host_match="duckduckgo.com", path_prefix="/?q", action="paid"),
71
+ SmartRule(host_match="duckduckgo.com", path_prefix="/html", action="paid"),
72
+ # Block known-noise endpoints we never want to fetch (also free, so the
73
+ # blocking saves on direct bandwidth + latency, not paid cost).
74
+ SmartRule(host_match="googletagmanager.com", action="block"),
75
+ SmartRule(host_match="googlesyndication.com", action="block"),
76
+ SmartRule(host_match="googleadservices.com", action="block"),
77
+ SmartRule(host_match="doubleclick.net", action="block"),
78
+ SmartRule(host_match="google-analytics.com", action="block"),
79
+ SmartRule(host_match="optimizationguide-pa.googleapis.com", action="block"),
80
+ )
81
+
82
+
83
+ @dataclass
84
+ class RouterStats:
85
+ paid_bytes: int = 0
86
+ paid_requests: int = 0
87
+ direct_bytes: int = 0
88
+ direct_requests: int = 0
89
+ blocked_requests: int = 0
90
+
91
+
92
+ def _pick_free_port() -> int:
93
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
94
+ s.bind(("127.0.0.1", 0))
95
+ return s.getsockname()[1]
96
+
97
+
98
+ def _is_port_open(host: str, port: int) -> bool:
99
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
100
+ s.settimeout(0.15)
101
+ try:
102
+ s.connect((host, port))
103
+ return True
104
+ except (socket.timeout, ConnectionRefusedError, OSError):
105
+ return False
106
+
107
+
108
+ def _find_mitmdump() -> Optional[str]:
109
+ """Locate the mitmdump binary. Returns None if not installed."""
110
+ p = shutil.which("mitmdump")
111
+ if p:
112
+ return p
113
+ # mitmproxy is often pip-installed under the venv's bin
114
+ here = Path(__file__).resolve()
115
+ for candidate in (
116
+ here.parents[2] / ".venv" / "bin" / "mitmdump",
117
+ Path.home() / ".local" / "bin" / "mitmdump",
118
+ ):
119
+ if candidate.exists() and os.access(candidate, os.X_OK):
120
+ return str(candidate)
121
+ return None
122
+
123
+
124
+ @dataclass
125
+ class SmartRouter:
126
+ """Embedded mitmproxy subprocess that does per-flow upstream routing.
127
+
128
+ Lifecycle: start() spawns mitmdump, blocks until its listen port is open;
129
+ stop() sends SIGTERM and reads the stats file. Designed for one router
130
+ per ChromiumWorker — cheap to spin up (~150ms cold start).
131
+ """
132
+
133
+ upstream_proxy: str
134
+ """Upstream residential proxy URL with auth: http://user:pass@host:port."""
135
+
136
+ rules: tuple[SmartRule, ...] = field(default_factory=lambda: DEFAULT_RULES)
137
+ """Routing rules. First matching rule wins. Default for non-matches: direct."""
138
+
139
+ default_action: str = "direct"
140
+ """Action when no rule matches. "direct" = free, "paid" = everything paid."""
141
+
142
+ listen_host: str = "127.0.0.1"
143
+ listen_port: int = 0
144
+ """0 = auto-pick free port."""
145
+
146
+ _proc: Optional[subprocess.Popen] = field(default=None, repr=False, init=False)
147
+ _stats_path: Optional[Path] = field(default=None, repr=False, init=False)
148
+ _addon_path: Optional[Path] = field(default=None, repr=False, init=False)
149
+ _stats: RouterStats = field(default_factory=RouterStats, init=False)
150
+
151
+ @property
152
+ def proxy_url(self) -> str:
153
+ return f"http://{self.listen_host}:{self.listen_port}"
154
+
155
+ @property
156
+ def stats(self) -> RouterStats:
157
+ return self._stats
158
+
159
+ def start(self, *, startup_timeout: float = 10.0) -> None:
160
+ mitmdump = _find_mitmdump()
161
+ if mitmdump is None:
162
+ raise RuntimeError(
163
+ "SmartRouter requires mitmproxy. "
164
+ "Install with: pip install 'fleet-framework[browser]'"
165
+ )
166
+ if self.listen_port == 0:
167
+ self.listen_port = _pick_free_port()
168
+
169
+ # Strip auth from upstream URL and pass separately.
170
+ from urllib.parse import urlparse
171
+ u = urlparse(self.upstream_proxy)
172
+ upstream_clean = f"{u.scheme}://{u.hostname}:{u.port or 80}"
173
+ upstream_auth = f"{u.username}:{u.password}" if u.username else None
174
+
175
+ # Persist stats to a tmp file the subprocess writes to.
176
+ self._stats_path = Path(tempfile.mktemp(prefix="smart-router-stats-", suffix=".json"))
177
+ # Generate the addon script — embeds the rules so the subprocess has them.
178
+ rules_repr = [asdict(r) for r in self.rules]
179
+ self._addon_path = Path(tempfile.mktemp(prefix="smart-router-addon-", suffix=".py"))
180
+ self._addon_path.write_text(_render_addon_script(
181
+ rules_repr, self.default_action, str(self._stats_path)
182
+ ))
183
+
184
+ cmd = [
185
+ mitmdump,
186
+ "-p", str(self.listen_port),
187
+ "--mode", f"upstream:{upstream_clean}",
188
+ "--set", "ssl_insecure=true",
189
+ "-s", str(self._addon_path),
190
+ "--quiet",
191
+ ]
192
+ if upstream_auth:
193
+ cmd.extend(["--upstream-auth", upstream_auth])
194
+
195
+ self._proc = subprocess.Popen(
196
+ cmd,
197
+ stdout=subprocess.DEVNULL,
198
+ stderr=subprocess.PIPE,
199
+ start_new_session=True,
200
+ )
201
+ # Wait for the listen socket.
202
+ deadline = time.monotonic() + startup_timeout
203
+ while time.monotonic() < deadline:
204
+ if _is_port_open(self.listen_host, self.listen_port):
205
+ logger.info(
206
+ "smart-router @ %s, %d rules, upstream=%s",
207
+ self.proxy_url, len(self.rules), upstream_clean,
208
+ )
209
+ return
210
+ if self._proc.poll() is not None:
211
+ err = (self._proc.stderr.read() if self._proc.stderr else b"").decode(errors="replace")
212
+ self._cleanup_files()
213
+ raise RuntimeError(f"SmartRouter mitmdump exited early: {err[:400]}")
214
+ time.sleep(0.1)
215
+ self.stop()
216
+ raise RuntimeError(f"SmartRouter failed to start within {startup_timeout}s")
217
+
218
+ def stop(self) -> None:
219
+ # Read stats BEFORE killing the process — the addon flushes on every
220
+ # response, so the file is current up to the last completed flow.
221
+ self._read_stats()
222
+ if self._proc is not None:
223
+ try:
224
+ # Term the whole process group; mitmdump spawns helper threads.
225
+ os.killpg(os.getpgid(self._proc.pid), signal.SIGTERM)
226
+ except (ProcessLookupError, PermissionError, OSError):
227
+ pass
228
+ try:
229
+ self._proc.wait(timeout=3)
230
+ except subprocess.TimeoutExpired:
231
+ try:
232
+ os.killpg(os.getpgid(self._proc.pid), signal.SIGKILL)
233
+ except (ProcessLookupError, OSError):
234
+ pass
235
+ self._proc.wait(timeout=2)
236
+ self._proc = None
237
+ # Re-read stats one more time in case any flows finished between the
238
+ # first read and the SIGTERM.
239
+ self._read_stats()
240
+ self._cleanup_files()
241
+ logger.info(
242
+ "smart-router stopped. paid=%d B / %d req, direct=%d B / %d req, blocked=%d req",
243
+ self._stats.paid_bytes, self._stats.paid_requests,
244
+ self._stats.direct_bytes, self._stats.direct_requests,
245
+ self._stats.blocked_requests,
246
+ )
247
+
248
+ def __enter__(self) -> "SmartRouter":
249
+ self.start()
250
+ return self
251
+
252
+ def __exit__(self, *exc) -> None:
253
+ self.stop()
254
+
255
+ def _read_stats(self) -> None:
256
+ if self._stats_path is None or not self._stats_path.exists():
257
+ return
258
+ try:
259
+ data = json.loads(self._stats_path.read_text() or "{}")
260
+ self._stats = RouterStats(
261
+ paid_bytes=int(data.get("paid_bytes", 0)),
262
+ paid_requests=int(data.get("paid_requests", 0)),
263
+ direct_bytes=int(data.get("direct_bytes", 0)),
264
+ direct_requests=int(data.get("direct_requests", 0)),
265
+ blocked_requests=int(data.get("blocked_requests", 0)),
266
+ )
267
+ except (json.JSONDecodeError, ValueError, OSError):
268
+ logger.debug("smart-router stats file unreadable", exc_info=True)
269
+
270
+ def _cleanup_files(self) -> None:
271
+ for p in (self._stats_path, self._addon_path):
272
+ if p is not None and p.exists():
273
+ try:
274
+ p.unlink()
275
+ except OSError:
276
+ pass
277
+
278
+
279
+ def _render_addon_script(rules: list[dict], default_action: str, stats_path: str) -> str:
280
+ """Generate the mitmproxy addon script. Embeds the rules as a literal so
281
+ the subprocess has them at startup without a separate config file."""
282
+ return textwrap.dedent(f"""
283
+ import json
284
+ from mitmproxy import http
285
+ from pathlib import Path
286
+
287
+ RULES = {json.dumps(rules)}
288
+ DEFAULT_ACTION = {default_action!r}
289
+ STATS_PATH = {stats_path!r}
290
+
291
+ _stats = {{
292
+ "paid_bytes": 0, "paid_requests": 0,
293
+ "direct_bytes": 0, "direct_requests": 0,
294
+ "blocked_requests": 0,
295
+ }}
296
+
297
+ def _host_matches(host, pattern):
298
+ return host == pattern or host.endswith("." + pattern) or host.endswith(pattern)
299
+
300
+ def _pick_action(host, path):
301
+ for r in RULES:
302
+ if not _host_matches(host, r["host_match"]):
303
+ continue
304
+ if r["path_prefix"] and not path.startswith(r["path_prefix"]):
305
+ continue
306
+ return r["action"]
307
+ return DEFAULT_ACTION
308
+
309
+ def _persist():
310
+ try:
311
+ Path(STATS_PATH).write_text(json.dumps(_stats))
312
+ except OSError:
313
+ pass
314
+
315
+ def request(flow):
316
+ host = flow.request.pretty_host
317
+ path = flow.request.path
318
+ action = _pick_action(host, path)
319
+ flow.metadata["smart_action"] = action
320
+ if action == "block":
321
+ _stats["blocked_requests"] += 1
322
+ flow.response = http.Response.make(
323
+ 204, b"", {{"x-smart-router": "blocked"}}
324
+ )
325
+ _persist()
326
+ return
327
+ if action == "direct" and flow.server_conn.via is not None:
328
+ try:
329
+ flow.server_conn.via = None
330
+ except Exception:
331
+ pass
332
+
333
+ def response(flow):
334
+ action = flow.metadata.get("smart_action", "direct")
335
+ body = flow.response.raw_content or b""
336
+ resp_h = sum(len(k) + len(v) + 4 for k, v in flow.response.headers.items())
337
+ req_h = sum(len(k) + len(v) + 4 for k, v in flow.request.headers.items())
338
+ wire = len(body) + resp_h + req_h + len(flow.request.raw_content or b"")
339
+ if action == "paid":
340
+ _stats["paid_bytes"] += wire
341
+ _stats["paid_requests"] += 1
342
+ elif action == "direct":
343
+ _stats["direct_bytes"] += wire
344
+ _stats["direct_requests"] += 1
345
+ _persist()
346
+ """).strip()
347
+
348
+
349
+ __all__ = [
350
+ "DEFAULT_RULES",
351
+ "RouterStats",
352
+ "SmartRouter",
353
+ "SmartRule",
354
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-framework
3
- Version: 0.1.1
3
+ Version: 0.2.0
4
4
  Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
5
5
  Author: Sarper Avci
6
6
  License: MIT
@@ -21,8 +21,12 @@ Requires-Dist: click>=8.1.0
21
21
  Provides-Extra: browser
22
22
  Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
23
23
  Requires-Dist: cryptography>=42.0.0; extra == "browser"
24
+ Requires-Dist: mitmproxy>=12.0; extra == "browser"
24
25
  Provides-Extra: cloudflare
25
26
  Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
27
+ Provides-Extra: cloak
28
+ Requires-Dist: fleet-framework[browser]; extra == "cloak"
29
+ Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
26
30
  Provides-Extra: otel
27
31
  Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
28
32
  Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
@@ -31,7 +35,7 @@ Provides-Extra: test
31
35
  Requires-Dist: pytest>=8.0; extra == "test"
32
36
  Requires-Dist: pytest-asyncio>=0.23; extra == "test"
33
37
  Provides-Extra: all
34
- Requires-Dist: fleet-framework[browser,cloudflare,otel]; extra == "all"
38
+ Requires-Dist: fleet-framework[browser,cloak,cloudflare,otel]; extra == "all"
35
39
  Dynamic: license-file
36
40
 
37
41
  # Fleet
@@ -42,10 +42,12 @@ fleet/worker/ws_client.py
42
42
  fleet_browser/__init__.py
43
43
  fleet_browser/browser.py
44
44
  fleet_browser/cert.py
45
+ fleet_browser/cloak.py
45
46
  fleet_browser/fingerprint.py
46
47
  fleet_browser/humanizer.py
47
48
  fleet_browser/pool.py
48
49
  fleet_browser/proxy_extension.py
50
+ fleet_browser/smart_router.py
49
51
  fleet_browser/solver.py
50
52
  fleet_browser/stealth.py
51
53
  fleet_cloudflare/__init__.py
@@ -9,11 +9,16 @@ psutil>=5.9.0
9
9
  click>=8.1.0
10
10
 
11
11
  [all]
12
- fleet-framework[browser,cloudflare,otel]
12
+ fleet-framework[browser,cloak,cloudflare,otel]
13
13
 
14
14
  [browser]
15
15
  DrissionPage>=4.1.0
16
16
  cryptography>=42.0.0
17
+ mitmproxy>=12.0
18
+
19
+ [cloak]
20
+ fleet-framework[browser]
21
+ cloakbrowser>=0.3
17
22
 
18
23
  [cloudflare]
19
24
  fleet-framework[browser]
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "fleet-framework"
7
- version = "0.1.1"
7
+ version = "0.2.0"
8
8
  description = "generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -26,10 +26,15 @@ dependencies = [
26
26
  browser = [
27
27
  "DrissionPage>=4.1.0",
28
28
  "cryptography>=42.0.0",
29
+ "mitmproxy>=12.0", # SmartRouter subprocess for per-flow proxy routing
29
30
  ]
30
31
  cloudflare = [
31
32
  "fleet-framework[browser]",
32
33
  ]
34
+ cloak = [
35
+ "fleet-framework[browser]",
36
+ "cloakbrowser>=0.3",
37
+ ]
33
38
  otel = [
34
39
  "opentelemetry-api>=1.27.0",
35
40
  "opentelemetry-sdk>=1.27.0",
@@ -40,7 +45,7 @@ test = [
40
45
  "pytest-asyncio>=0.23",
41
46
  ]
42
47
  all = [
43
- "fleet-framework[browser,cloudflare,otel]",
48
+ "fleet-framework[browser,cloudflare,cloak,otel]",
44
49
  ]
45
50
 
46
51
  [project.scripts]
File without changes