fleet-framework 0.1.2__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/PKG-INFO +2 -1
  2. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/__init__.py +5 -0
  3. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/browser.py +47 -1
  4. fleet_framework-0.2.0/fleet_browser/smart_router.py +354 -0
  5. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/PKG-INFO +2 -1
  6. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/SOURCES.txt +1 -0
  7. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/requires.txt +1 -0
  8. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/pyproject.toml +2 -1
  9. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/LICENSE +0 -0
  10. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/README.md +0 -0
  11. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/__init__.py +0 -0
  12. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/cli.py +0 -0
  13. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/__init__.py +0 -0
  14. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/automation.py +0 -0
  15. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/backend.py +0 -0
  16. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/config.py +0 -0
  17. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/context.py +0 -0
  18. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/contract.py +0 -0
  19. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/country_presets.py +0 -0
  20. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/events.py +0 -0
  21. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/local_runner.py +0 -0
  22. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/logging.py +0 -0
  23. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/memory_backend.py +0 -0
  24. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/metrics.py +0 -0
  25. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/otel.py +0 -0
  26. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/primitives.py +0 -0
  27. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/protocol.py +0 -0
  28. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/proxy.py +0 -0
  29. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/reconcile.py +0 -0
  30. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/sqlite_backend.py +0 -0
  31. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/store.py +0 -0
  32. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/__init__.py +0 -0
  33. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/api.py +0 -0
  34. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/app.py +0 -0
  35. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/auth.py +0 -0
  36. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/broadcaster.py +0 -0
  37. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/__init__.py +0 -0
  38. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/router.py +0 -0
  39. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/static/style.css +0 -0
  40. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/templates/index.html +0 -0
  41. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/metrics_route.py +0 -0
  42. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/ratelimit.py +0 -0
  43. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/ws_router.py +0 -0
  44. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/__init__.py +0 -0
  45. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/agent.py +0 -0
  46. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/reconcile_loop.py +0 -0
  47. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/slot_runner.py +0 -0
  48. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/ws_client.py +0 -0
  49. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/cert.py +0 -0
  50. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/cloak.py +0 -0
  51. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/fingerprint.py +0 -0
  52. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/humanizer.py +0 -0
  53. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/pool.py +0 -0
  54. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/proxy_extension.py +0 -0
  55. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/solver.py +0 -0
  56. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/stealth.py +0 -0
  57. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/__init__.py +0 -0
  58. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/bypasser.py +0 -0
  59. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/harvest.py +0 -0
  60. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/replay.py +0 -0
  61. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/solver.py +0 -0
  62. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_content/__init__.py +0 -0
  63. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_content/automation.py +0 -0
  64. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_content/contracts.py +0 -0
  65. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_detect/__init__.py +0 -0
  66. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_detect/contracts.py +0 -0
  67. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_detect/detect.py +0 -0
  68. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/dependency_links.txt +0 -0
  69. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/entry_points.txt +0 -0
  70. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/top_level.txt +0 -0
  71. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_headers/__init__.py +0 -0
  72. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_headers/profiles.py +0 -0
  73. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_jobs/__init__.py +0 -0
  74. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_jobs/automation.py +0 -0
  75. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_jobs/contracts.py +0 -0
  76. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_marketplace/__init__.py +0 -0
  77. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_marketplace/automation.py +0 -0
  78. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_marketplace/contracts.py +0 -0
  79. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_news/__init__.py +0 -0
  80. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_news/automation.py +0 -0
  81. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_news/contracts.py +0 -0
  82. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_place/__init__.py +0 -0
  83. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_place/automation.py +0 -0
  84. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_place/contracts.py +0 -0
  85. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_provider_dataimpulse/__init__.py +0 -0
  86. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_provider_evomi/__init__.py +0 -0
  87. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_serp/__init__.py +0 -0
  88. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_serp/automation.py +0 -0
  89. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_serp/contracts.py +0 -0
  90. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_social/__init__.py +0 -0
  91. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_social/automation.py +0 -0
  92. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_social/contracts.py +0 -0
  93. {fleet_framework-0.1.2 → fleet_framework-0.2.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-framework
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
5
5
  Author: Sarper Avci
6
6
  License: MIT
@@ -21,6 +21,7 @@ Requires-Dist: click>=8.1.0
21
21
  Provides-Extra: browser
22
22
  Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
23
23
  Requires-Dist: cryptography>=42.0.0; extra == "browser"
24
+ Requires-Dist: mitmproxy>=12.0; extra == "browser"
24
25
  Provides-Extra: cloudflare
25
26
  Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
26
27
  Provides-Extra: cloak
@@ -3,9 +3,11 @@ from fleet_browser.fingerprint import Fingerprint, FingerprintFactory
3
3
  from fleet_browser.humanizer import Humanizer
4
4
  from fleet_browser.pool import BrowserPool, slot
5
5
  from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
6
+ from fleet_browser.smart_router import DEFAULT_RULES, RouterStats, SmartRouter, SmartRule
6
7
  from fleet_browser.stealth import FingerprintStealth, NoOpStealth, Stealth
7
8
 
8
9
  __all__ = [
10
+ "DEFAULT_RULES",
9
11
  "BrowserConfig",
10
12
  "BrowserPool",
11
13
  "ChromiumWorker",
@@ -14,6 +16,9 @@ __all__ = [
14
16
  "FingerprintStealth",
15
17
  "Humanizer",
16
18
  "NoOpStealth",
19
+ "RouterStats",
20
+ "SmartRouter",
21
+ "SmartRule",
17
22
  "Stealth",
18
23
  "build_proxy_auth_extension",
19
24
  "parse_proxy_url",
@@ -20,6 +20,7 @@ from fleet_browser.cloak import (
20
20
  resolve_engine,
21
21
  )
22
22
  from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
23
+ from fleet_browser.smart_router import DEFAULT_RULES, SmartRouter, SmartRule
23
24
  from fleet_browser.stealth import Stealth
24
25
 
25
26
  logger = logging.getLogger(__name__)
@@ -159,12 +160,31 @@ class BrowserConfig:
159
160
  IP when using residential — otherwise WebRTC leaks your real LAN IP
160
161
  and CF correlates the mismatch."""
161
162
 
163
+ smart_routing: bool = True
164
+ """When True AND `proxy` is set, spawn a local mitmproxy that does per-flow
165
+ routing: only the rules-matching flows (e.g. /search) go through the paid
166
+ upstream, everything else goes direct. Cuts residential bandwidth ~85–95%
167
+ on Google/Bing/etc. without changing the SERP fetch's IP. Disable only when
168
+ you specifically need every byte through the proxy (rare)."""
169
+
170
+ smart_routing_rules: tuple[SmartRule, ...] = DEFAULT_RULES
171
+ """Per-flow routing rules. Built-ins handle Google/Bing/Yahoo/DDG /search
172
+ plus a block-list of trackers. Plugins can extend with additional rules
173
+ via tuple concat: `DEFAULT_RULES + (SmartRule(host="amazon.com", path="/s"),)`."""
174
+
162
175
 
163
176
  class ChromiumWorker:
164
177
 
165
178
  def __init__(self, config: BrowserConfig) -> None:
166
179
  self.config = config
167
180
  self.page: Optional[ChromiumPage] = None
181
+ self._smart_router: Optional[SmartRouter] = None
182
+
183
+ @property
184
+ def smart_router(self) -> Optional[SmartRouter]:
185
+ """The active SmartRouter for this worker, or None if smart routing
186
+ wasn't engaged. Stats are populated after `stop()` runs."""
187
+ return self._smart_router
168
188
  self._user_data_dir: Optional[Path] = None
169
189
  self._owns_user_data_dir = False
170
190
 
@@ -243,7 +263,26 @@ class ChromiumWorker:
243
263
  opts.set_argument("--headless=new")
244
264
 
245
265
  if self.config.proxy:
246
- proxy_spec = parse_proxy_url(self.config.proxy)
266
+ # When smart_routing is on, spawn a SmartRouter and point Chromium
267
+ # at THAT instead of the upstream proxy. The router decides per-flow
268
+ # whether to forward upstream (paid) or direct (free). 85-95%
269
+ # bandwidth reduction on search engines.
270
+ effective_proxy = self.config.proxy
271
+ if self.config.smart_routing:
272
+ try:
273
+ self._smart_router = SmartRouter(
274
+ upstream_proxy=self.config.proxy,
275
+ rules=self.config.smart_routing_rules,
276
+ )
277
+ self._smart_router.start()
278
+ effective_proxy = self._smart_router.proxy_url
279
+ except Exception:
280
+ logger.exception(
281
+ "smart-router failed to start, falling back to direct upstream proxy"
282
+ )
283
+ self._smart_router = None
284
+
285
+ proxy_spec = parse_proxy_url(effective_proxy)
247
286
  # Chromium 122+ accepts user:pass in --proxy-server. The MV2
248
287
  # auth-extension is kept as a fallback for older Chromium.
249
288
  if proxy_spec.has_auth:
@@ -312,6 +351,13 @@ class ChromiumWorker:
312
351
  pass
313
352
  self.page = None
314
353
 
354
+ if self._smart_router is not None:
355
+ try:
356
+ self._smart_router.stop()
357
+ except Exception:
358
+ logger.exception("[browser] smart-router stop failed")
359
+ # Keep the reference so callers can read stats after stop.
360
+
315
361
  # SIGKILL anything still running with our --user-data-dir; page.quit
316
362
  # sometimes leaves renderer/zygote/utility children orphaned to PID 1.
317
363
  if self._user_data_dir is not None:
@@ -0,0 +1,354 @@
1
+ """Smart per-flow proxy router for browser-based automation.
2
+
3
+ The problem this solves: residential proxy bandwidth costs $0.50-1.00/GB.
4
+ A vanilla Chromium load of a Google SERP through a residential proxy burns
5
+ 5+ MB per query (Google's own ML model downloads, gstatic CDN assets, JS
6
+ bundles). Only the search-result document actually needs the residential
7
+ IP — Google's bot-wall scores the document fetch, not its subresources.
8
+
9
+ How it works: a local mitmproxy subprocess acts as Chromium's proxy. Per-flow
10
+ the addon decides:
11
+
12
+ - HOST + PATH matches a "paid" rule → forward through the upstream
13
+ residential proxy → captures the IP-reputation signal
14
+ - Otherwise → flow.server_conn.via = None → forward direct → free, doesn't
15
+ affect the SERP fetch
16
+ - Matches a "block" rule → return 204 immediately
17
+
18
+ In our measurements: 1.31 MB → 0.17 MB paid per Google query. 87% reduction.
19
+
20
+ ChromiumWorker spins up a SmartRouter automatically when `BrowserConfig.proxy`
21
+ is set AND `smart_routing=True` (the default). Per-process subprocess. Stats
22
+ (paid/direct/blocked bytes + counts) are written to a temp file and read on
23
+ stop, so callers can attribute cost per task.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import logging
30
+ import os
31
+ import shutil
32
+ import signal
33
+ import socket
34
+ import subprocess
35
+ import tempfile
36
+ import textwrap
37
+ import time
38
+ from dataclasses import asdict, dataclass, field
39
+ from pathlib import Path
40
+ from typing import Optional
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class SmartRule:
47
+ """One routing rule. Fields are AND-ed when both are set.
48
+
49
+ `host_match` is matched as a SUFFIX so `google.com` covers www.google.com,
50
+ images.google.com, etc. Use a leading "." for exact subdomain ("google.com"
51
+ matches "google.com" but ".google.com" matches "*.google.com" only).
52
+
53
+ `path_prefix` is a literal prefix on the URL path. Empty = match any path.
54
+
55
+ `action`:
56
+ - "paid" → forward through the upstream residential proxy
57
+ - "direct" → forward direct, bypassing the upstream
58
+ - "block" → drop the request (returns 204; useful for trackers/ads)
59
+ """
60
+
61
+ host_match: str
62
+ path_prefix: str = ""
63
+ action: str = "paid"
64
+
65
+
66
+ DEFAULT_RULES: tuple[SmartRule, ...] = (
67
+ SmartRule(host_match="www.google.com", path_prefix="/search", action="paid"),
68
+ SmartRule(host_match="www.bing.com", path_prefix="/search", action="paid"),
69
+ SmartRule(host_match="search.yahoo.com", path_prefix="/search", action="paid"),
70
+ SmartRule(host_match="duckduckgo.com", path_prefix="/?q", action="paid"),
71
+ SmartRule(host_match="duckduckgo.com", path_prefix="/html", action="paid"),
72
+ # Block known-noise endpoints we never want to fetch (also free, so the
73
+ # blocking saves on direct bandwidth + latency, not paid cost).
74
+ SmartRule(host_match="googletagmanager.com", action="block"),
75
+ SmartRule(host_match="googlesyndication.com", action="block"),
76
+ SmartRule(host_match="googleadservices.com", action="block"),
77
+ SmartRule(host_match="doubleclick.net", action="block"),
78
+ SmartRule(host_match="google-analytics.com", action="block"),
79
+ SmartRule(host_match="optimizationguide-pa.googleapis.com", action="block"),
80
+ )
81
+
82
+
83
+ @dataclass
84
+ class RouterStats:
85
+ paid_bytes: int = 0
86
+ paid_requests: int = 0
87
+ direct_bytes: int = 0
88
+ direct_requests: int = 0
89
+ blocked_requests: int = 0
90
+
91
+
92
+ def _pick_free_port() -> int:
93
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
94
+ s.bind(("127.0.0.1", 0))
95
+ return s.getsockname()[1]
96
+
97
+
98
+ def _is_port_open(host: str, port: int) -> bool:
99
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
100
+ s.settimeout(0.15)
101
+ try:
102
+ s.connect((host, port))
103
+ return True
104
+ except (socket.timeout, ConnectionRefusedError, OSError):
105
+ return False
106
+
107
+
108
+ def _find_mitmdump() -> Optional[str]:
109
+ """Locate the mitmdump binary. Returns None if not installed."""
110
+ p = shutil.which("mitmdump")
111
+ if p:
112
+ return p
113
+ # mitmproxy is often pip-installed under the venv's bin
114
+ here = Path(__file__).resolve()
115
+ for candidate in (
116
+ here.parents[2] / ".venv" / "bin" / "mitmdump",
117
+ Path.home() / ".local" / "bin" / "mitmdump",
118
+ ):
119
+ if candidate.exists() and os.access(candidate, os.X_OK):
120
+ return str(candidate)
121
+ return None
122
+
123
+
124
+ @dataclass
125
+ class SmartRouter:
126
+ """Embedded mitmproxy subprocess that does per-flow upstream routing.
127
+
128
+ Lifecycle: start() spawns mitmdump, blocks until its listen port is open;
129
+ stop() sends SIGTERM and reads the stats file. Designed for one router
130
+ per ChromiumWorker — cheap to spin up (~150ms cold start).
131
+ """
132
+
133
+ upstream_proxy: str
134
+ """Upstream residential proxy URL with auth: http://user:pass@host:port."""
135
+
136
+ rules: tuple[SmartRule, ...] = field(default_factory=lambda: DEFAULT_RULES)
137
+ """Routing rules. First matching rule wins. Default for non-matches: direct."""
138
+
139
+ default_action: str = "direct"
140
+ """Action when no rule matches. "direct" = free, "paid" = everything paid."""
141
+
142
+ listen_host: str = "127.0.0.1"
143
+ listen_port: int = 0
144
+ """0 = auto-pick free port."""
145
+
146
+ _proc: Optional[subprocess.Popen] = field(default=None, repr=False, init=False)
147
+ _stats_path: Optional[Path] = field(default=None, repr=False, init=False)
148
+ _addon_path: Optional[Path] = field(default=None, repr=False, init=False)
149
+ _stats: RouterStats = field(default_factory=RouterStats, init=False)
150
+
151
+ @property
152
+ def proxy_url(self) -> str:
153
+ return f"http://{self.listen_host}:{self.listen_port}"
154
+
155
+ @property
156
+ def stats(self) -> RouterStats:
157
+ return self._stats
158
+
159
+ def start(self, *, startup_timeout: float = 10.0) -> None:
160
+ mitmdump = _find_mitmdump()
161
+ if mitmdump is None:
162
+ raise RuntimeError(
163
+ "SmartRouter requires mitmproxy. "
164
+ "Install with: pip install 'fleet-framework[browser]'"
165
+ )
166
+ if self.listen_port == 0:
167
+ self.listen_port = _pick_free_port()
168
+
169
+ # Strip auth from upstream URL and pass separately.
170
+ from urllib.parse import urlparse
171
+ u = urlparse(self.upstream_proxy)
172
+ upstream_clean = f"{u.scheme}://{u.hostname}:{u.port or 80}"
173
+ upstream_auth = f"{u.username}:{u.password}" if u.username else None
174
+
175
+ # Persist stats to a tmp file the subprocess writes to.
176
+ self._stats_path = Path(tempfile.mktemp(prefix="smart-router-stats-", suffix=".json"))
177
+ # Generate the addon script — embeds the rules so the subprocess has them.
178
+ rules_repr = [asdict(r) for r in self.rules]
179
+ self._addon_path = Path(tempfile.mktemp(prefix="smart-router-addon-", suffix=".py"))
180
+ self._addon_path.write_text(_render_addon_script(
181
+ rules_repr, self.default_action, str(self._stats_path)
182
+ ))
183
+
184
+ cmd = [
185
+ mitmdump,
186
+ "-p", str(self.listen_port),
187
+ "--mode", f"upstream:{upstream_clean}",
188
+ "--set", "ssl_insecure=true",
189
+ "-s", str(self._addon_path),
190
+ "--quiet",
191
+ ]
192
+ if upstream_auth:
193
+ cmd.extend(["--upstream-auth", upstream_auth])
194
+
195
+ self._proc = subprocess.Popen(
196
+ cmd,
197
+ stdout=subprocess.DEVNULL,
198
+ stderr=subprocess.PIPE,
199
+ start_new_session=True,
200
+ )
201
+ # Wait for the listen socket.
202
+ deadline = time.monotonic() + startup_timeout
203
+ while time.monotonic() < deadline:
204
+ if _is_port_open(self.listen_host, self.listen_port):
205
+ logger.info(
206
+ "smart-router @ %s, %d rules, upstream=%s",
207
+ self.proxy_url, len(self.rules), upstream_clean,
208
+ )
209
+ return
210
+ if self._proc.poll() is not None:
211
+ err = (self._proc.stderr.read() if self._proc.stderr else b"").decode(errors="replace")
212
+ self._cleanup_files()
213
+ raise RuntimeError(f"SmartRouter mitmdump exited early: {err[:400]}")
214
+ time.sleep(0.1)
215
+ self.stop()
216
+ raise RuntimeError(f"SmartRouter failed to start within {startup_timeout}s")
217
+
218
+ def stop(self) -> None:
219
+ # Read stats BEFORE killing the process — the addon flushes on every
220
+ # response, so the file is current up to the last completed flow.
221
+ self._read_stats()
222
+ if self._proc is not None:
223
+ try:
224
+ # Term the whole process group; mitmdump spawns helper threads.
225
+ os.killpg(os.getpgid(self._proc.pid), signal.SIGTERM)
226
+ except (ProcessLookupError, PermissionError, OSError):
227
+ pass
228
+ try:
229
+ self._proc.wait(timeout=3)
230
+ except subprocess.TimeoutExpired:
231
+ try:
232
+ os.killpg(os.getpgid(self._proc.pid), signal.SIGKILL)
233
+ except (ProcessLookupError, OSError):
234
+ pass
235
+ self._proc.wait(timeout=2)
236
+ self._proc = None
237
+ # Re-read stats one more time in case any flows finished between the
238
+ # first read and the SIGTERM.
239
+ self._read_stats()
240
+ self._cleanup_files()
241
+ logger.info(
242
+ "smart-router stopped. paid=%d B / %d req, direct=%d B / %d req, blocked=%d req",
243
+ self._stats.paid_bytes, self._stats.paid_requests,
244
+ self._stats.direct_bytes, self._stats.direct_requests,
245
+ self._stats.blocked_requests,
246
+ )
247
+
248
+ def __enter__(self) -> "SmartRouter":
249
+ self.start()
250
+ return self
251
+
252
+ def __exit__(self, *exc) -> None:
253
+ self.stop()
254
+
255
+ def _read_stats(self) -> None:
256
+ if self._stats_path is None or not self._stats_path.exists():
257
+ return
258
+ try:
259
+ data = json.loads(self._stats_path.read_text() or "{}")
260
+ self._stats = RouterStats(
261
+ paid_bytes=int(data.get("paid_bytes", 0)),
262
+ paid_requests=int(data.get("paid_requests", 0)),
263
+ direct_bytes=int(data.get("direct_bytes", 0)),
264
+ direct_requests=int(data.get("direct_requests", 0)),
265
+ blocked_requests=int(data.get("blocked_requests", 0)),
266
+ )
267
+ except (json.JSONDecodeError, ValueError, OSError):
268
+ logger.debug("smart-router stats file unreadable", exc_info=True)
269
+
270
+ def _cleanup_files(self) -> None:
271
+ for p in (self._stats_path, self._addon_path):
272
+ if p is not None and p.exists():
273
+ try:
274
+ p.unlink()
275
+ except OSError:
276
+ pass
277
+
278
+
279
+ def _render_addon_script(rules: list[dict], default_action: str, stats_path: str) -> str:
280
+ """Generate the mitmproxy addon script. Embeds the rules as a literal so
281
+ the subprocess has them at startup without a separate config file."""
282
+ return textwrap.dedent(f"""
283
+ import json
284
+ from mitmproxy import http
285
+ from pathlib import Path
286
+
287
+ RULES = {json.dumps(rules)}
288
+ DEFAULT_ACTION = {default_action!r}
289
+ STATS_PATH = {stats_path!r}
290
+
291
+ _stats = {{
292
+ "paid_bytes": 0, "paid_requests": 0,
293
+ "direct_bytes": 0, "direct_requests": 0,
294
+ "blocked_requests": 0,
295
+ }}
296
+
297
+ def _host_matches(host, pattern):
298
+ return host == pattern or host.endswith("." + pattern) or host.endswith(pattern)
299
+
300
+ def _pick_action(host, path):
301
+ for r in RULES:
302
+ if not _host_matches(host, r["host_match"]):
303
+ continue
304
+ if r["path_prefix"] and not path.startswith(r["path_prefix"]):
305
+ continue
306
+ return r["action"]
307
+ return DEFAULT_ACTION
308
+
309
+ def _persist():
310
+ try:
311
+ Path(STATS_PATH).write_text(json.dumps(_stats))
312
+ except OSError:
313
+ pass
314
+
315
+ def request(flow):
316
+ host = flow.request.pretty_host
317
+ path = flow.request.path
318
+ action = _pick_action(host, path)
319
+ flow.metadata["smart_action"] = action
320
+ if action == "block":
321
+ _stats["blocked_requests"] += 1
322
+ flow.response = http.Response.make(
323
+ 204, b"", {{"x-smart-router": "blocked"}}
324
+ )
325
+ _persist()
326
+ return
327
+ if action == "direct" and flow.server_conn.via is not None:
328
+ try:
329
+ flow.server_conn.via = None
330
+ except Exception:
331
+ pass
332
+
333
+ def response(flow):
334
+ action = flow.metadata.get("smart_action", "direct")
335
+ body = flow.response.raw_content or b""
336
+ resp_h = sum(len(k) + len(v) + 4 for k, v in flow.response.headers.items())
337
+ req_h = sum(len(k) + len(v) + 4 for k, v in flow.request.headers.items())
338
+ wire = len(body) + resp_h + req_h + len(flow.request.raw_content or b"")
339
+ if action == "paid":
340
+ _stats["paid_bytes"] += wire
341
+ _stats["paid_requests"] += 1
342
+ elif action == "direct":
343
+ _stats["direct_bytes"] += wire
344
+ _stats["direct_requests"] += 1
345
+ _persist()
346
+ """).strip()
347
+
348
+
349
+ __all__ = [
350
+ "DEFAULT_RULES",
351
+ "RouterStats",
352
+ "SmartRouter",
353
+ "SmartRule",
354
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: fleet-framework
3
- Version: 0.1.2
3
+ Version: 0.2.0
4
4
  Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
5
5
  Author: Sarper Avci
6
6
  License: MIT
@@ -21,6 +21,7 @@ Requires-Dist: click>=8.1.0
21
21
  Provides-Extra: browser
22
22
  Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
23
23
  Requires-Dist: cryptography>=42.0.0; extra == "browser"
24
+ Requires-Dist: mitmproxy>=12.0; extra == "browser"
24
25
  Provides-Extra: cloudflare
25
26
  Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
26
27
  Provides-Extra: cloak
@@ -47,6 +47,7 @@ fleet_browser/fingerprint.py
47
47
  fleet_browser/humanizer.py
48
48
  fleet_browser/pool.py
49
49
  fleet_browser/proxy_extension.py
50
+ fleet_browser/smart_router.py
50
51
  fleet_browser/solver.py
51
52
  fleet_browser/stealth.py
52
53
  fleet_cloudflare/__init__.py
@@ -14,6 +14,7 @@ fleet-framework[browser,cloak,cloudflare,otel]
14
14
  [browser]
15
15
  DrissionPage>=4.1.0
16
16
  cryptography>=42.0.0
17
+ mitmproxy>=12.0
17
18
 
18
19
  [cloak]
19
20
  fleet-framework[browser]
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "fleet-framework"
7
- version = "0.1.2"
7
+ version = "0.2.0"
8
8
  description = "generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -26,6 +26,7 @@ dependencies = [
26
26
  browser = [
27
27
  "DrissionPage>=4.1.0",
28
28
  "cryptography>=42.0.0",
29
+ "mitmproxy>=12.0", # SmartRouter subprocess for per-flow proxy routing
29
30
  ]
30
31
  cloudflare = [
31
32
  "fleet-framework[browser]",
File without changes