fleet-framework 0.1.2__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/PKG-INFO +2 -1
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/__init__.py +5 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/browser.py +47 -1
- fleet_framework-0.2.0/fleet_browser/smart_router.py +354 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/PKG-INFO +2 -1
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/SOURCES.txt +1 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/requires.txt +1 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/pyproject.toml +2 -1
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/LICENSE +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/README.md +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/cli.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/backend.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/config.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/context.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/contract.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/country_presets.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/events.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/local_runner.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/logging.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/memory_backend.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/metrics.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/otel.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/primitives.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/protocol.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/proxy.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/reconcile.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/sqlite_backend.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/core/store.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/api.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/app.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/auth.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/broadcaster.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/router.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/static/style.css +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/dashboard/templates/index.html +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/metrics_route.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/ratelimit.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/master/ws_router.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/agent.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/reconcile_loop.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/slot_runner.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet/worker/ws_client.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/cert.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/cloak.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/fingerprint.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/humanizer.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/pool.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/proxy_extension.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/solver.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_browser/stealth.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/bypasser.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/harvest.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/replay.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_cloudflare/solver.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_content/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_content/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_content/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_detect/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_detect/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_detect/detect.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/dependency_links.txt +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/entry_points.txt +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/top_level.txt +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_headers/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_headers/profiles.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_jobs/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_jobs/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_jobs/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_marketplace/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_marketplace/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_marketplace/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_news/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_news/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_news/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_place/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_place/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_place/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_provider_dataimpulse/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_provider_evomi/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_serp/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_serp/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_serp/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_social/__init__.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_social/automation.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_social/contracts.py +0 -0
- {fleet_framework-0.1.2 → fleet_framework-0.2.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,7 @@ Requires-Dist: click>=8.1.0
|
|
|
21
21
|
Provides-Extra: browser
|
|
22
22
|
Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
|
+
Requires-Dist: mitmproxy>=12.0; extra == "browser"
|
|
24
25
|
Provides-Extra: cloudflare
|
|
25
26
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
26
27
|
Provides-Extra: cloak
|
|
@@ -3,9 +3,11 @@ from fleet_browser.fingerprint import Fingerprint, FingerprintFactory
|
|
|
3
3
|
from fleet_browser.humanizer import Humanizer
|
|
4
4
|
from fleet_browser.pool import BrowserPool, slot
|
|
5
5
|
from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
|
|
6
|
+
from fleet_browser.smart_router import DEFAULT_RULES, RouterStats, SmartRouter, SmartRule
|
|
6
7
|
from fleet_browser.stealth import FingerprintStealth, NoOpStealth, Stealth
|
|
7
8
|
|
|
8
9
|
__all__ = [
|
|
10
|
+
"DEFAULT_RULES",
|
|
9
11
|
"BrowserConfig",
|
|
10
12
|
"BrowserPool",
|
|
11
13
|
"ChromiumWorker",
|
|
@@ -14,6 +16,9 @@ __all__ = [
|
|
|
14
16
|
"FingerprintStealth",
|
|
15
17
|
"Humanizer",
|
|
16
18
|
"NoOpStealth",
|
|
19
|
+
"RouterStats",
|
|
20
|
+
"SmartRouter",
|
|
21
|
+
"SmartRule",
|
|
17
22
|
"Stealth",
|
|
18
23
|
"build_proxy_auth_extension",
|
|
19
24
|
"parse_proxy_url",
|
|
@@ -20,6 +20,7 @@ from fleet_browser.cloak import (
|
|
|
20
20
|
resolve_engine,
|
|
21
21
|
)
|
|
22
22
|
from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
|
|
23
|
+
from fleet_browser.smart_router import DEFAULT_RULES, SmartRouter, SmartRule
|
|
23
24
|
from fleet_browser.stealth import Stealth
|
|
24
25
|
|
|
25
26
|
logger = logging.getLogger(__name__)
|
|
@@ -159,12 +160,31 @@ class BrowserConfig:
|
|
|
159
160
|
IP when using residential — otherwise WebRTC leaks your real LAN IP
|
|
160
161
|
and CF correlates the mismatch."""
|
|
161
162
|
|
|
163
|
+
smart_routing: bool = True
|
|
164
|
+
"""When True AND `proxy` is set, spawn a local mitmproxy that does per-flow
|
|
165
|
+
routing: only the rules-matching flows (e.g. /search) go through the paid
|
|
166
|
+
upstream, everything else goes direct. Cuts residential bandwidth ~85–95%
|
|
167
|
+
on Google/Bing/etc. without changing the SERP fetch's IP. Disable only when
|
|
168
|
+
you specifically need every byte through the proxy (rare)."""
|
|
169
|
+
|
|
170
|
+
smart_routing_rules: tuple[SmartRule, ...] = DEFAULT_RULES
|
|
171
|
+
"""Per-flow routing rules. Built-ins handle Google/Bing/Yahoo/DDG /search
|
|
172
|
+
plus a block-list of trackers. Plugins can extend with additional rules
|
|
173
|
+
via tuple concat: `DEFAULT_RULES + (SmartRule(host="amazon.com", path="/s"),)`."""
|
|
174
|
+
|
|
162
175
|
|
|
163
176
|
class ChromiumWorker:
|
|
164
177
|
|
|
165
178
|
def __init__(self, config: BrowserConfig) -> None:
|
|
166
179
|
self.config = config
|
|
167
180
|
self.page: Optional[ChromiumPage] = None
|
|
181
|
+
self._smart_router: Optional[SmartRouter] = None
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def smart_router(self) -> Optional[SmartRouter]:
|
|
185
|
+
"""The active SmartRouter for this worker, or None if smart routing
|
|
186
|
+
wasn't engaged. Stats are populated after `stop()` runs."""
|
|
187
|
+
return self._smart_router
|
|
168
188
|
self._user_data_dir: Optional[Path] = None
|
|
169
189
|
self._owns_user_data_dir = False
|
|
170
190
|
|
|
@@ -243,7 +263,26 @@ class ChromiumWorker:
|
|
|
243
263
|
opts.set_argument("--headless=new")
|
|
244
264
|
|
|
245
265
|
if self.config.proxy:
|
|
246
|
-
|
|
266
|
+
# When smart_routing is on, spawn a SmartRouter and point Chromium
|
|
267
|
+
# at THAT instead of the upstream proxy. The router decides per-flow
|
|
268
|
+
# whether to forward upstream (paid) or direct (free). 85-95%
|
|
269
|
+
# bandwidth reduction on search engines.
|
|
270
|
+
effective_proxy = self.config.proxy
|
|
271
|
+
if self.config.smart_routing:
|
|
272
|
+
try:
|
|
273
|
+
self._smart_router = SmartRouter(
|
|
274
|
+
upstream_proxy=self.config.proxy,
|
|
275
|
+
rules=self.config.smart_routing_rules,
|
|
276
|
+
)
|
|
277
|
+
self._smart_router.start()
|
|
278
|
+
effective_proxy = self._smart_router.proxy_url
|
|
279
|
+
except Exception:
|
|
280
|
+
logger.exception(
|
|
281
|
+
"smart-router failed to start, falling back to direct upstream proxy"
|
|
282
|
+
)
|
|
283
|
+
self._smart_router = None
|
|
284
|
+
|
|
285
|
+
proxy_spec = parse_proxy_url(effective_proxy)
|
|
247
286
|
# Chromium 122+ accepts user:pass in --proxy-server. The MV2
|
|
248
287
|
# auth-extension is kept as a fallback for older Chromium.
|
|
249
288
|
if proxy_spec.has_auth:
|
|
@@ -312,6 +351,13 @@ class ChromiumWorker:
|
|
|
312
351
|
pass
|
|
313
352
|
self.page = None
|
|
314
353
|
|
|
354
|
+
if self._smart_router is not None:
|
|
355
|
+
try:
|
|
356
|
+
self._smart_router.stop()
|
|
357
|
+
except Exception:
|
|
358
|
+
logger.exception("[browser] smart-router stop failed")
|
|
359
|
+
# Keep the reference so callers can read stats after stop.
|
|
360
|
+
|
|
315
361
|
# SIGKILL anything still running with our --user-data-dir; page.quit
|
|
316
362
|
# sometimes leaves renderer/zygote/utility children orphaned to PID 1.
|
|
317
363
|
if self._user_data_dir is not None:
|
|
@@ -0,0 +1,354 @@
|
|
|
1
|
+
"""Smart per-flow proxy router for browser-based automation.
|
|
2
|
+
|
|
3
|
+
The problem this solves: residential proxy bandwidth costs $0.50-1.00/GB.
|
|
4
|
+
A vanilla Chromium load of a Google SERP through a residential proxy burns
|
|
5
|
+
5+ MB per query (Google's own ML model downloads, gstatic CDN assets, JS
|
|
6
|
+
bundles). Only the search-result document actually needs the residential
|
|
7
|
+
IP — Google's bot-wall scores the document fetch, not its subresources.
|
|
8
|
+
|
|
9
|
+
How it works: a local mitmproxy subprocess acts as Chromium's proxy. Per-flow
|
|
10
|
+
the addon decides:
|
|
11
|
+
|
|
12
|
+
- HOST + PATH matches a "paid" rule → forward through the upstream
|
|
13
|
+
residential proxy → captures the IP-reputation signal
|
|
14
|
+
- Otherwise → flow.server_conn.via = None → forward direct → free, doesn't
|
|
15
|
+
affect the SERP fetch
|
|
16
|
+
- Matches a "block" rule → return 204 immediately
|
|
17
|
+
|
|
18
|
+
In our measurements: 1.31 MB → 0.17 MB paid per Google query. 87% reduction.
|
|
19
|
+
|
|
20
|
+
ChromiumWorker spins up a SmartRouter automatically when `BrowserConfig.proxy`
|
|
21
|
+
is set AND `smart_routing=True` (the default). Per-process subprocess. Stats
|
|
22
|
+
(paid/direct/blocked bytes + counts) are written to a temp file and read on
|
|
23
|
+
stop, so callers can attribute cost per task.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import logging
|
|
30
|
+
import os
|
|
31
|
+
import shutil
|
|
32
|
+
import signal
|
|
33
|
+
import socket
|
|
34
|
+
import subprocess
|
|
35
|
+
import tempfile
|
|
36
|
+
import textwrap
|
|
37
|
+
import time
|
|
38
|
+
from dataclasses import asdict, dataclass, field
|
|
39
|
+
from pathlib import Path
|
|
40
|
+
from typing import Optional
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass(frozen=True)
|
|
46
|
+
class SmartRule:
|
|
47
|
+
"""One routing rule. Fields are AND-ed when both are set.
|
|
48
|
+
|
|
49
|
+
`host_match` is matched as a SUFFIX so `google.com` covers www.google.com,
|
|
50
|
+
images.google.com, etc. Use a leading "." for exact subdomain ("google.com"
|
|
51
|
+
matches "google.com" but ".google.com" matches "*.google.com" only).
|
|
52
|
+
|
|
53
|
+
`path_prefix` is a literal prefix on the URL path. Empty = match any path.
|
|
54
|
+
|
|
55
|
+
`action`:
|
|
56
|
+
- "paid" → forward through the upstream residential proxy
|
|
57
|
+
- "direct" → forward direct, bypassing the upstream
|
|
58
|
+
- "block" → drop the request (returns 204; useful for trackers/ads)
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
host_match: str
|
|
62
|
+
path_prefix: str = ""
|
|
63
|
+
action: str = "paid"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
DEFAULT_RULES: tuple[SmartRule, ...] = (
|
|
67
|
+
SmartRule(host_match="www.google.com", path_prefix="/search", action="paid"),
|
|
68
|
+
SmartRule(host_match="www.bing.com", path_prefix="/search", action="paid"),
|
|
69
|
+
SmartRule(host_match="search.yahoo.com", path_prefix="/search", action="paid"),
|
|
70
|
+
SmartRule(host_match="duckduckgo.com", path_prefix="/?q", action="paid"),
|
|
71
|
+
SmartRule(host_match="duckduckgo.com", path_prefix="/html", action="paid"),
|
|
72
|
+
# Block known-noise endpoints we never want to fetch (also free, so the
|
|
73
|
+
# blocking saves on direct bandwidth + latency, not paid cost).
|
|
74
|
+
SmartRule(host_match="googletagmanager.com", action="block"),
|
|
75
|
+
SmartRule(host_match="googlesyndication.com", action="block"),
|
|
76
|
+
SmartRule(host_match="googleadservices.com", action="block"),
|
|
77
|
+
SmartRule(host_match="doubleclick.net", action="block"),
|
|
78
|
+
SmartRule(host_match="google-analytics.com", action="block"),
|
|
79
|
+
SmartRule(host_match="optimizationguide-pa.googleapis.com", action="block"),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class RouterStats:
|
|
85
|
+
paid_bytes: int = 0
|
|
86
|
+
paid_requests: int = 0
|
|
87
|
+
direct_bytes: int = 0
|
|
88
|
+
direct_requests: int = 0
|
|
89
|
+
blocked_requests: int = 0
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _pick_free_port() -> int:
|
|
93
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
94
|
+
s.bind(("127.0.0.1", 0))
|
|
95
|
+
return s.getsockname()[1]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _is_port_open(host: str, port: int) -> bool:
|
|
99
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
100
|
+
s.settimeout(0.15)
|
|
101
|
+
try:
|
|
102
|
+
s.connect((host, port))
|
|
103
|
+
return True
|
|
104
|
+
except (socket.timeout, ConnectionRefusedError, OSError):
|
|
105
|
+
return False
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _find_mitmdump() -> Optional[str]:
|
|
109
|
+
"""Locate the mitmdump binary. Returns None if not installed."""
|
|
110
|
+
p = shutil.which("mitmdump")
|
|
111
|
+
if p:
|
|
112
|
+
return p
|
|
113
|
+
# mitmproxy is often pip-installed under the venv's bin
|
|
114
|
+
here = Path(__file__).resolve()
|
|
115
|
+
for candidate in (
|
|
116
|
+
here.parents[2] / ".venv" / "bin" / "mitmdump",
|
|
117
|
+
Path.home() / ".local" / "bin" / "mitmdump",
|
|
118
|
+
):
|
|
119
|
+
if candidate.exists() and os.access(candidate, os.X_OK):
|
|
120
|
+
return str(candidate)
|
|
121
|
+
return None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
@dataclass
|
|
125
|
+
class SmartRouter:
|
|
126
|
+
"""Embedded mitmproxy subprocess that does per-flow upstream routing.
|
|
127
|
+
|
|
128
|
+
Lifecycle: start() spawns mitmdump, blocks until its listen port is open;
|
|
129
|
+
stop() sends SIGTERM and reads the stats file. Designed for one router
|
|
130
|
+
per ChromiumWorker — cheap to spin up (~150ms cold start).
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
upstream_proxy: str
|
|
134
|
+
"""Upstream residential proxy URL with auth: http://user:pass@host:port."""
|
|
135
|
+
|
|
136
|
+
rules: tuple[SmartRule, ...] = field(default_factory=lambda: DEFAULT_RULES)
|
|
137
|
+
"""Routing rules. First matching rule wins. Default for non-matches: direct."""
|
|
138
|
+
|
|
139
|
+
default_action: str = "direct"
|
|
140
|
+
"""Action when no rule matches. "direct" = free, "paid" = everything paid."""
|
|
141
|
+
|
|
142
|
+
listen_host: str = "127.0.0.1"
|
|
143
|
+
listen_port: int = 0
|
|
144
|
+
"""0 = auto-pick free port."""
|
|
145
|
+
|
|
146
|
+
_proc: Optional[subprocess.Popen] = field(default=None, repr=False, init=False)
|
|
147
|
+
_stats_path: Optional[Path] = field(default=None, repr=False, init=False)
|
|
148
|
+
_addon_path: Optional[Path] = field(default=None, repr=False, init=False)
|
|
149
|
+
_stats: RouterStats = field(default_factory=RouterStats, init=False)
|
|
150
|
+
|
|
151
|
+
@property
|
|
152
|
+
def proxy_url(self) -> str:
|
|
153
|
+
return f"http://{self.listen_host}:{self.listen_port}"
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def stats(self) -> RouterStats:
|
|
157
|
+
return self._stats
|
|
158
|
+
|
|
159
|
+
def start(self, *, startup_timeout: float = 10.0) -> None:
|
|
160
|
+
mitmdump = _find_mitmdump()
|
|
161
|
+
if mitmdump is None:
|
|
162
|
+
raise RuntimeError(
|
|
163
|
+
"SmartRouter requires mitmproxy. "
|
|
164
|
+
"Install with: pip install 'fleet-framework[browser]'"
|
|
165
|
+
)
|
|
166
|
+
if self.listen_port == 0:
|
|
167
|
+
self.listen_port = _pick_free_port()
|
|
168
|
+
|
|
169
|
+
# Strip auth from upstream URL and pass separately.
|
|
170
|
+
from urllib.parse import urlparse
|
|
171
|
+
u = urlparse(self.upstream_proxy)
|
|
172
|
+
upstream_clean = f"{u.scheme}://{u.hostname}:{u.port or 80}"
|
|
173
|
+
upstream_auth = f"{u.username}:{u.password}" if u.username else None
|
|
174
|
+
|
|
175
|
+
# Persist stats to a tmp file the subprocess writes to.
|
|
176
|
+
self._stats_path = Path(tempfile.mktemp(prefix="smart-router-stats-", suffix=".json"))
|
|
177
|
+
# Generate the addon script — embeds the rules so the subprocess has them.
|
|
178
|
+
rules_repr = [asdict(r) for r in self.rules]
|
|
179
|
+
self._addon_path = Path(tempfile.mktemp(prefix="smart-router-addon-", suffix=".py"))
|
|
180
|
+
self._addon_path.write_text(_render_addon_script(
|
|
181
|
+
rules_repr, self.default_action, str(self._stats_path)
|
|
182
|
+
))
|
|
183
|
+
|
|
184
|
+
cmd = [
|
|
185
|
+
mitmdump,
|
|
186
|
+
"-p", str(self.listen_port),
|
|
187
|
+
"--mode", f"upstream:{upstream_clean}",
|
|
188
|
+
"--set", "ssl_insecure=true",
|
|
189
|
+
"-s", str(self._addon_path),
|
|
190
|
+
"--quiet",
|
|
191
|
+
]
|
|
192
|
+
if upstream_auth:
|
|
193
|
+
cmd.extend(["--upstream-auth", upstream_auth])
|
|
194
|
+
|
|
195
|
+
self._proc = subprocess.Popen(
|
|
196
|
+
cmd,
|
|
197
|
+
stdout=subprocess.DEVNULL,
|
|
198
|
+
stderr=subprocess.PIPE,
|
|
199
|
+
start_new_session=True,
|
|
200
|
+
)
|
|
201
|
+
# Wait for the listen socket.
|
|
202
|
+
deadline = time.monotonic() + startup_timeout
|
|
203
|
+
while time.monotonic() < deadline:
|
|
204
|
+
if _is_port_open(self.listen_host, self.listen_port):
|
|
205
|
+
logger.info(
|
|
206
|
+
"smart-router @ %s, %d rules, upstream=%s",
|
|
207
|
+
self.proxy_url, len(self.rules), upstream_clean,
|
|
208
|
+
)
|
|
209
|
+
return
|
|
210
|
+
if self._proc.poll() is not None:
|
|
211
|
+
err = (self._proc.stderr.read() if self._proc.stderr else b"").decode(errors="replace")
|
|
212
|
+
self._cleanup_files()
|
|
213
|
+
raise RuntimeError(f"SmartRouter mitmdump exited early: {err[:400]}")
|
|
214
|
+
time.sleep(0.1)
|
|
215
|
+
self.stop()
|
|
216
|
+
raise RuntimeError(f"SmartRouter failed to start within {startup_timeout}s")
|
|
217
|
+
|
|
218
|
+
def stop(self) -> None:
|
|
219
|
+
# Read stats BEFORE killing the process — the addon flushes on every
|
|
220
|
+
# response, so the file is current up to the last completed flow.
|
|
221
|
+
self._read_stats()
|
|
222
|
+
if self._proc is not None:
|
|
223
|
+
try:
|
|
224
|
+
# Term the whole process group; mitmdump spawns helper threads.
|
|
225
|
+
os.killpg(os.getpgid(self._proc.pid), signal.SIGTERM)
|
|
226
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
227
|
+
pass
|
|
228
|
+
try:
|
|
229
|
+
self._proc.wait(timeout=3)
|
|
230
|
+
except subprocess.TimeoutExpired:
|
|
231
|
+
try:
|
|
232
|
+
os.killpg(os.getpgid(self._proc.pid), signal.SIGKILL)
|
|
233
|
+
except (ProcessLookupError, OSError):
|
|
234
|
+
pass
|
|
235
|
+
self._proc.wait(timeout=2)
|
|
236
|
+
self._proc = None
|
|
237
|
+
# Re-read stats one more time in case any flows finished between the
|
|
238
|
+
# first read and the SIGTERM.
|
|
239
|
+
self._read_stats()
|
|
240
|
+
self._cleanup_files()
|
|
241
|
+
logger.info(
|
|
242
|
+
"smart-router stopped. paid=%d B / %d req, direct=%d B / %d req, blocked=%d req",
|
|
243
|
+
self._stats.paid_bytes, self._stats.paid_requests,
|
|
244
|
+
self._stats.direct_bytes, self._stats.direct_requests,
|
|
245
|
+
self._stats.blocked_requests,
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
def __enter__(self) -> "SmartRouter":
|
|
249
|
+
self.start()
|
|
250
|
+
return self
|
|
251
|
+
|
|
252
|
+
def __exit__(self, *exc) -> None:
|
|
253
|
+
self.stop()
|
|
254
|
+
|
|
255
|
+
def _read_stats(self) -> None:
|
|
256
|
+
if self._stats_path is None or not self._stats_path.exists():
|
|
257
|
+
return
|
|
258
|
+
try:
|
|
259
|
+
data = json.loads(self._stats_path.read_text() or "{}")
|
|
260
|
+
self._stats = RouterStats(
|
|
261
|
+
paid_bytes=int(data.get("paid_bytes", 0)),
|
|
262
|
+
paid_requests=int(data.get("paid_requests", 0)),
|
|
263
|
+
direct_bytes=int(data.get("direct_bytes", 0)),
|
|
264
|
+
direct_requests=int(data.get("direct_requests", 0)),
|
|
265
|
+
blocked_requests=int(data.get("blocked_requests", 0)),
|
|
266
|
+
)
|
|
267
|
+
except (json.JSONDecodeError, ValueError, OSError):
|
|
268
|
+
logger.debug("smart-router stats file unreadable", exc_info=True)
|
|
269
|
+
|
|
270
|
+
def _cleanup_files(self) -> None:
|
|
271
|
+
for p in (self._stats_path, self._addon_path):
|
|
272
|
+
if p is not None and p.exists():
|
|
273
|
+
try:
|
|
274
|
+
p.unlink()
|
|
275
|
+
except OSError:
|
|
276
|
+
pass
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _render_addon_script(rules: list[dict], default_action: str, stats_path: str) -> str:
|
|
280
|
+
"""Generate the mitmproxy addon script. Embeds the rules as a literal so
|
|
281
|
+
the subprocess has them at startup without a separate config file."""
|
|
282
|
+
return textwrap.dedent(f"""
|
|
283
|
+
import json
|
|
284
|
+
from mitmproxy import http
|
|
285
|
+
from pathlib import Path
|
|
286
|
+
|
|
287
|
+
RULES = {json.dumps(rules)}
|
|
288
|
+
DEFAULT_ACTION = {default_action!r}
|
|
289
|
+
STATS_PATH = {stats_path!r}
|
|
290
|
+
|
|
291
|
+
_stats = {{
|
|
292
|
+
"paid_bytes": 0, "paid_requests": 0,
|
|
293
|
+
"direct_bytes": 0, "direct_requests": 0,
|
|
294
|
+
"blocked_requests": 0,
|
|
295
|
+
}}
|
|
296
|
+
|
|
297
|
+
def _host_matches(host, pattern):
|
|
298
|
+
return host == pattern or host.endswith("." + pattern) or host.endswith(pattern)
|
|
299
|
+
|
|
300
|
+
def _pick_action(host, path):
|
|
301
|
+
for r in RULES:
|
|
302
|
+
if not _host_matches(host, r["host_match"]):
|
|
303
|
+
continue
|
|
304
|
+
if r["path_prefix"] and not path.startswith(r["path_prefix"]):
|
|
305
|
+
continue
|
|
306
|
+
return r["action"]
|
|
307
|
+
return DEFAULT_ACTION
|
|
308
|
+
|
|
309
|
+
def _persist():
|
|
310
|
+
try:
|
|
311
|
+
Path(STATS_PATH).write_text(json.dumps(_stats))
|
|
312
|
+
except OSError:
|
|
313
|
+
pass
|
|
314
|
+
|
|
315
|
+
def request(flow):
|
|
316
|
+
host = flow.request.pretty_host
|
|
317
|
+
path = flow.request.path
|
|
318
|
+
action = _pick_action(host, path)
|
|
319
|
+
flow.metadata["smart_action"] = action
|
|
320
|
+
if action == "block":
|
|
321
|
+
_stats["blocked_requests"] += 1
|
|
322
|
+
flow.response = http.Response.make(
|
|
323
|
+
204, b"", {{"x-smart-router": "blocked"}}
|
|
324
|
+
)
|
|
325
|
+
_persist()
|
|
326
|
+
return
|
|
327
|
+
if action == "direct" and flow.server_conn.via is not None:
|
|
328
|
+
try:
|
|
329
|
+
flow.server_conn.via = None
|
|
330
|
+
except Exception:
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
def response(flow):
|
|
334
|
+
action = flow.metadata.get("smart_action", "direct")
|
|
335
|
+
body = flow.response.raw_content or b""
|
|
336
|
+
resp_h = sum(len(k) + len(v) + 4 for k, v in flow.response.headers.items())
|
|
337
|
+
req_h = sum(len(k) + len(v) + 4 for k, v in flow.request.headers.items())
|
|
338
|
+
wire = len(body) + resp_h + req_h + len(flow.request.raw_content or b"")
|
|
339
|
+
if action == "paid":
|
|
340
|
+
_stats["paid_bytes"] += wire
|
|
341
|
+
_stats["paid_requests"] += 1
|
|
342
|
+
elif action == "direct":
|
|
343
|
+
_stats["direct_bytes"] += wire
|
|
344
|
+
_stats["direct_requests"] += 1
|
|
345
|
+
_persist()
|
|
346
|
+
""").strip()
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
__all__ = [
|
|
350
|
+
"DEFAULT_RULES",
|
|
351
|
+
"RouterStats",
|
|
352
|
+
"SmartRouter",
|
|
353
|
+
"SmartRule",
|
|
354
|
+
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -21,6 +21,7 @@ Requires-Dist: click>=8.1.0
|
|
|
21
21
|
Provides-Extra: browser
|
|
22
22
|
Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
|
+
Requires-Dist: mitmproxy>=12.0; extra == "browser"
|
|
24
25
|
Provides-Extra: cloudflare
|
|
25
26
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
26
27
|
Provides-Extra: cloak
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "fleet-framework"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.2.0"
|
|
8
8
|
description = "generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -26,6 +26,7 @@ dependencies = [
|
|
|
26
26
|
browser = [
|
|
27
27
|
"DrissionPage>=4.1.0",
|
|
28
28
|
"cryptography>=42.0.0",
|
|
29
|
+
"mitmproxy>=12.0", # SmartRouter subprocess for per-flow proxy routing
|
|
29
30
|
]
|
|
30
31
|
cloudflare = [
|
|
31
32
|
"fleet-framework[browser]",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fleet_framework-0.1.2 → fleet_framework-0.2.0}/fleet_framework.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|