fleet-framework 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/PKG-INFO +5 -4
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/cli.py +1 -1
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/automation.py +2 -0
- fleet_framework-0.3.0/fleet/core/browser.py +292 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/context.py +2 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/local_runner.py +30 -2
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/worker/agent.py +14 -1
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/worker/slot_runner.py +12 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/__init__.py +4 -6
- fleet_framework-0.3.0/fleet_browser/browser.py +271 -0
- fleet_framework-0.3.0/fleet_browser/display.py +95 -0
- fleet_framework-0.3.0/fleet_browser/pool.py +397 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/proxy_extension.py +46 -30
- fleet_framework-0.3.0/fleet_browser/smart_router.py +830 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/stealth.py +29 -24
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_cloudflare/bypasser.py +23 -39
- fleet_framework-0.3.0/fleet_cloudflare/harvest.py +207 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_framework.egg-info/PKG-INFO +5 -4
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_framework.egg-info/SOURCES.txt +2 -1
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_framework.egg-info/requires.txt +4 -3
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/pyproject.toml +21 -4
- fleet_framework-0.2.0/fleet_browser/browser.py +0 -386
- fleet_framework-0.2.0/fleet_browser/humanizer.py +0 -157
- fleet_framework-0.2.0/fleet_browser/pool.py +0 -241
- fleet_framework-0.2.0/fleet_browser/smart_router.py +0 -354
- fleet_framework-0.2.0/fleet_cloudflare/harvest.py +0 -266
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/LICENSE +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/README.md +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/backend.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/config.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/contract.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/country_presets.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/events.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/logging.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/memory_backend.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/metrics.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/otel.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/primitives.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/protocol.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/proxy.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/reconcile.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/sqlite_backend.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/core/store.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/api.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/app.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/auth.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/broadcaster.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/dashboard/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/dashboard/router.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/dashboard/static/style.css +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/dashboard/templates/index.html +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/metrics_route.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/ratelimit.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/master/ws_router.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/worker/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/worker/reconcile_loop.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet/worker/ws_client.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/cert.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/cloak.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/fingerprint.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_browser/solver.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_cloudflare/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_cloudflare/replay.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_cloudflare/solver.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_content/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_content/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_content/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_detect/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_detect/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_detect/detect.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_framework.egg-info/dependency_links.txt +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_framework.egg-info/entry_points.txt +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_framework.egg-info/top_level.txt +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_headers/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_headers/profiles.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_jobs/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_jobs/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_jobs/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_marketplace/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_marketplace/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_marketplace/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_news/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_news/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_news/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_place/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_place/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_place/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_provider_dataimpulse/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_provider_evomi/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_serp/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_serp/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_serp/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_social/__init__.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_social/automation.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/fleet_social/contracts.py +0 -0
- {fleet_framework-0.2.0 → fleet_framework-0.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -19,14 +19,15 @@ Requires-Dist: jinja2>=3.1.0
|
|
|
19
19
|
Requires-Dist: psutil>=5.9.0
|
|
20
20
|
Requires-Dist: click>=8.1.0
|
|
21
21
|
Provides-Extra: browser
|
|
22
|
-
Requires-Dist:
|
|
22
|
+
Requires-Dist: cloakbrowser>=0.3; extra == "browser"
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
|
-
Requires-Dist: mitmproxy>=
|
|
24
|
+
Requires-Dist: mitmproxy>=11.0; extra == "browser"
|
|
25
|
+
Requires-Dist: curl_cffi>=0.7; extra == "browser"
|
|
26
|
+
Requires-Dist: pyvirtualdisplay>=3.0; extra == "browser"
|
|
25
27
|
Provides-Extra: cloudflare
|
|
26
28
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
27
29
|
Provides-Extra: cloak
|
|
28
30
|
Requires-Dist: fleet-framework[browser]; extra == "cloak"
|
|
29
|
-
Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
|
|
30
31
|
Provides-Extra: otel
|
|
31
32
|
Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
|
|
32
33
|
Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
|
|
@@ -249,7 +249,7 @@ def describe_cmd(automation_type: str) -> None:
|
|
|
249
249
|
click.echo(json.dumps(doc["config"], indent=2))
|
|
250
250
|
if "queue" in doc:
|
|
251
251
|
click.echo("")
|
|
252
|
-
click.echo(
|
|
252
|
+
click.echo("task payload schema (the shape -k / -p builds):")
|
|
253
253
|
click.echo(json.dumps(doc["queue"]["payload"], indent=2))
|
|
254
254
|
if "stream" in doc:
|
|
255
255
|
click.echo("")
|
|
@@ -7,6 +7,7 @@ from typing import Any, ClassVar, Generic, Optional, TypeVar
|
|
|
7
7
|
|
|
8
8
|
from pydantic import BaseModel
|
|
9
9
|
|
|
10
|
+
from fleet.core.browser import BrowserNeeds
|
|
10
11
|
from fleet.core.config import BaseConfig
|
|
11
12
|
from fleet.core.context import Context
|
|
12
13
|
from fleet.core.contract import Pool, Queue, Stream
|
|
@@ -33,6 +34,7 @@ class BaseAutomation(Generic[C]):
|
|
|
33
34
|
Output: ClassVar[Optional[type[BaseModel]]] = None # stream payload type (one stream per automation, by convention)
|
|
34
35
|
TaskPayload: ClassVar[Optional[type[BaseModel]]] = None # BatchAutomation task payload
|
|
35
36
|
Pools: ClassVar[dict[str, Pool]] = {} # pools this automation owns
|
|
37
|
+
Browser: ClassVar[Optional[BrowserNeeds]] = None # opt-in browser-pool requirement; framework provisions ctx.browser
|
|
36
38
|
|
|
37
39
|
automation_type: ClassVar[str] = ""
|
|
38
40
|
|
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""Browser pool as a first-class core resource.
|
|
2
|
+
|
|
3
|
+
`BrowserNeeds` is the declarative side: an automation declares what kind of
|
|
4
|
+
browser-pool it needs at the class level, the same way it declares
|
|
5
|
+
`TaskPayload`, `Output`, and `Pools`. The worker bootstrap reads these
|
|
6
|
+
declarations, provisions one shared `BrowserResource` per unique need, and
|
|
7
|
+
injects it as `ctx.browser`.
|
|
8
|
+
|
|
9
|
+
Plugins should never `import fleet_browser` directly. They:
|
|
10
|
+
|
|
11
|
+
class MyAutomation(BatchAutomation):
|
|
12
|
+
Browser = BrowserNeeds(size=4, max_uses=1, engine="cloak")
|
|
13
|
+
|
|
14
|
+
async def run_one(self, task, ctx):
|
|
15
|
+
async with ctx.browser.acquire(proxy=ctx.proxy_url) as page:
|
|
16
|
+
await page.goto(...)
|
|
17
|
+
|
|
18
|
+
`fleet_browser` stays an optional install — `BrowserResource.start()` only
|
|
19
|
+
imports it when an automation actually declares a `Browser` need.
|
|
20
|
+
|
|
21
|
+
Design notes:
|
|
22
|
+
|
|
23
|
+
* The Browser is pooled and pre-warmed at worker boot; SmartRouter is per-
|
|
24
|
+
acquire because each task usually has its own proxy URL (residential
|
|
25
|
+
sticky-session rotation). mitmdump can't be reconfigured mid-flight, so
|
|
26
|
+
pooling it doesn't help — a fresh router per task is cheap (~150 ms).
|
|
27
|
+
|
|
28
|
+
* Each acquire creates a new Playwright BrowserContext so cookies / storage
|
|
29
|
+
/ page state from a previous task can't leak into the next. The cloak
|
|
30
|
+
fingerprint stays at the Browser-launch level, so fingerprint refresh per
|
|
31
|
+
task requires `max_uses=1` (dispose + respawn the Browser between uses).
|
|
32
|
+
"""
|
|
33
|
+
from __future__ import annotations
|
|
34
|
+
|
|
35
|
+
import asyncio
|
|
36
|
+
import logging
|
|
37
|
+
from contextlib import asynccontextmanager
|
|
38
|
+
from dataclasses import dataclass, field
|
|
39
|
+
from typing import TYPE_CHECKING, Any, AsyncIterator, Optional
|
|
40
|
+
|
|
41
|
+
if TYPE_CHECKING:
|
|
42
|
+
from playwright.async_api import Page
|
|
43
|
+
|
|
44
|
+
logger = logging.getLogger(__name__)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class BrowserNeeds:
|
|
49
|
+
"""Declarative browser-pool requirement attached to an automation class.
|
|
50
|
+
|
|
51
|
+
The worker boots one ``BrowserResource`` per *unique* ``BrowserNeeds``
|
|
52
|
+
across all loaded automations — automations that declare identical
|
|
53
|
+
needs share a pool.
|
|
54
|
+
|
|
55
|
+
Attributes
|
|
56
|
+
----------
|
|
57
|
+
size : int
|
|
58
|
+
Pool size — number of pre-warmed Browsers kept hot. Lease is
|
|
59
|
+
instant while at least one slot is idle.
|
|
60
|
+
|
|
61
|
+
max_uses : int
|
|
62
|
+
How many acquires one Browser handles before being recycled.
|
|
63
|
+
``1`` = fresh cloak fingerprint per task (recommended against
|
|
64
|
+
Google-class bot walls). Larger values reuse the Browser across
|
|
65
|
+
many tasks, with cookies cleared between (cheaper but the cloak
|
|
66
|
+
seed persists).
|
|
67
|
+
|
|
68
|
+
max_age_seconds : float
|
|
69
|
+
Hard recycling window. A Browser past this age is disposed even
|
|
70
|
+
if its use count is below ``max_uses`` — defends against process
|
|
71
|
+
memory leaks.
|
|
72
|
+
|
|
73
|
+
engine : str
|
|
74
|
+
Browser engine. ``"auto"`` resolves to ``"cloak"`` when
|
|
75
|
+
cloakbrowser is installed, ``"chrome"`` is stock Playwright
|
|
76
|
+
Chromium.
|
|
77
|
+
|
|
78
|
+
headless : bool
|
|
79
|
+
Run headless. Default ``False`` because headed-on-Xvfb beats
|
|
80
|
+
headless for anti-bot fingerprint stability; the framework
|
|
81
|
+
auto-starts a managed Xvfb so the deployment doesn't need to.
|
|
82
|
+
|
|
83
|
+
smart_routing : bool
|
|
84
|
+
When ``True`` and the caller passes ``proxy=...`` to ``acquire``,
|
|
85
|
+
a per-task SmartRouter is spun up so paid flows go via curl_cffi
|
|
86
|
+
with a real Chrome JA3. Set ``False`` to bypass the router and
|
|
87
|
+
use the proxy directly (cheaper but loses CDN-family JA3 coherence).
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
size: int = 4
|
|
91
|
+
max_uses: int = 1
|
|
92
|
+
max_age_seconds: float = 3600.0
|
|
93
|
+
engine: str = "auto"
|
|
94
|
+
headless: bool = False
|
|
95
|
+
smart_routing: bool = True
|
|
96
|
+
|
|
97
|
+
def __post_init__(self) -> None:
|
|
98
|
+
if self.size <= 0:
|
|
99
|
+
raise ValueError("BrowserNeeds.size must be >= 1")
|
|
100
|
+
if self.max_uses <= 0:
|
|
101
|
+
raise ValueError("BrowserNeeds.max_uses must be >= 1")
|
|
102
|
+
if self.max_age_seconds <= 0:
|
|
103
|
+
raise ValueError("BrowserNeeds.max_age_seconds must be > 0")
|
|
104
|
+
if self.engine not in ("auto", "cloak", "chrome"):
|
|
105
|
+
raise ValueError(
|
|
106
|
+
f"BrowserNeeds.engine must be one of auto/cloak/chrome, "
|
|
107
|
+
f"got {self.engine!r}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class BrowserResource:
|
|
113
|
+
"""Process-wide handle wrapping a pre-warmed BrowserPool.
|
|
114
|
+
|
|
115
|
+
Constructed by the worker bootstrap (one per unique BrowserNeeds),
|
|
116
|
+
started before any slot runs, stopped on worker shutdown. Plugins
|
|
117
|
+
access it via ``ctx.browser`` and call ``.acquire(proxy=...)`` per
|
|
118
|
+
task.
|
|
119
|
+
"""
|
|
120
|
+
|
|
121
|
+
needs: BrowserNeeds
|
|
122
|
+
_pool: Any = field(default=None, init=False, repr=False) # BrowserPool, lazy import
|
|
123
|
+
_started: bool = field(default=False, init=False, repr=False)
|
|
124
|
+
|
|
125
|
+
async def start(self) -> None:
|
|
126
|
+
"""Pre-warm the underlying BrowserPool.
|
|
127
|
+
|
|
128
|
+
Imports fleet_browser lazily so the core stays importable in
|
|
129
|
+
deployments that don't need browser automation.
|
|
130
|
+
"""
|
|
131
|
+
if self._started:
|
|
132
|
+
return
|
|
133
|
+
try:
|
|
134
|
+
from fleet_browser import BrowserConfig, make_browser_pool
|
|
135
|
+
except ImportError as e:
|
|
136
|
+
raise RuntimeError(
|
|
137
|
+
"BrowserNeeds declared but fleet_browser is not installed. "
|
|
138
|
+
"Install with: pip install fleet-framework[browser]"
|
|
139
|
+
) from e
|
|
140
|
+
|
|
141
|
+
template = BrowserConfig(
|
|
142
|
+
page_url="about:blank",
|
|
143
|
+
proxy=None, # Browser launches without proxy; per-acquire context sets it
|
|
144
|
+
engine=self.needs.engine,
|
|
145
|
+
headless=self.needs.headless,
|
|
146
|
+
smart_routing=False, # we manage SmartRouter per-acquire
|
|
147
|
+
)
|
|
148
|
+
self._pool = make_browser_pool(
|
|
149
|
+
size=self.needs.size,
|
|
150
|
+
config_template=template,
|
|
151
|
+
max_uses=self.needs.max_uses,
|
|
152
|
+
max_age_seconds=self.needs.max_age_seconds,
|
|
153
|
+
)
|
|
154
|
+
await self._pool.start()
|
|
155
|
+
self._started = True
|
|
156
|
+
logger.info(
|
|
157
|
+
"browser-resource started: size=%d max_uses=%d engine=%s",
|
|
158
|
+
self.needs.size, self.needs.max_uses, self.needs.engine,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
async def stop(self) -> None:
|
|
162
|
+
"""Dispose every pooled Browser. Idempotent."""
|
|
163
|
+
if not self._started:
|
|
164
|
+
return
|
|
165
|
+
try:
|
|
166
|
+
await self._pool.stop()
|
|
167
|
+
finally:
|
|
168
|
+
self._started = False
|
|
169
|
+
self._pool = None
|
|
170
|
+
logger.info("browser-resource stopped")
|
|
171
|
+
|
|
172
|
+
@asynccontextmanager
|
|
173
|
+
async def acquire(
|
|
174
|
+
self, *, proxy: Optional[str] = None,
|
|
175
|
+
) -> AsyncIterator["Page"]:
|
|
176
|
+
"""Lease a Page bound to this task's proxy.
|
|
177
|
+
|
|
178
|
+
Implementation: leases a Browser from the pool, spins up a
|
|
179
|
+
per-task SmartRouter (if ``smart_routing`` and a proxy was
|
|
180
|
+
provided), creates a fresh BrowserContext pointed at the router,
|
|
181
|
+
yields a new Page. On exit the context is closed and the router
|
|
182
|
+
stopped; the Browser returns to the pool (or is disposed per
|
|
183
|
+
``max_uses``).
|
|
184
|
+
"""
|
|
185
|
+
if not self._started:
|
|
186
|
+
raise RuntimeError("BrowserResource not started")
|
|
187
|
+
|
|
188
|
+
async with self._pool.acquire() as base_page:
|
|
189
|
+
# base_page is the launch-time page from the pool entry; we don't
|
|
190
|
+
# use it directly because it lives in a shared context. New
|
|
191
|
+
# context per acquire = clean cookie/storage isolation.
|
|
192
|
+
browser = base_page.context.browser
|
|
193
|
+
if browser is None:
|
|
194
|
+
raise RuntimeError("pooled page has no parent browser")
|
|
195
|
+
|
|
196
|
+
router = None
|
|
197
|
+
proxy_settings: Optional[dict] = None
|
|
198
|
+
ignore_https = False
|
|
199
|
+
if proxy and self.needs.smart_routing:
|
|
200
|
+
from fleet_browser.smart_router import SmartRouter
|
|
201
|
+
router = SmartRouter(upstream_proxy=proxy)
|
|
202
|
+
router.start()
|
|
203
|
+
proxy_settings = {"server": router.proxy_url}
|
|
204
|
+
ignore_https = True # browser must trust mitm's self-signed CA
|
|
205
|
+
elif proxy:
|
|
206
|
+
proxy_settings = _proxy_settings_from_url(proxy)
|
|
207
|
+
|
|
208
|
+
new_ctx = await browser.new_context(
|
|
209
|
+
proxy=proxy_settings,
|
|
210
|
+
ignore_https_errors=ignore_https,
|
|
211
|
+
)
|
|
212
|
+
page = await new_ctx.new_page()
|
|
213
|
+
# Expose the router on the page so callers can read stats /
|
|
214
|
+
# families without having to thread it through.
|
|
215
|
+
try:
|
|
216
|
+
page._smart_router = router # type: ignore[attr-defined]
|
|
217
|
+
except Exception:
|
|
218
|
+
pass
|
|
219
|
+
try:
|
|
220
|
+
yield page
|
|
221
|
+
finally:
|
|
222
|
+
try:
|
|
223
|
+
await new_ctx.close()
|
|
224
|
+
except Exception:
|
|
225
|
+
logger.debug("context close failed", exc_info=True)
|
|
226
|
+
if router is not None:
|
|
227
|
+
try:
|
|
228
|
+
router.stop()
|
|
229
|
+
except Exception:
|
|
230
|
+
logger.debug("router stop failed", exc_info=True)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _proxy_settings_from_url(url: str) -> dict:
|
|
234
|
+
"""Convert ``http://user:pass@host:port`` into Playwright proxy settings."""
|
|
235
|
+
from urllib.parse import urlparse
|
|
236
|
+
u = urlparse(url)
|
|
237
|
+
server = f"{u.scheme}://{u.hostname}:{u.port or 80}"
|
|
238
|
+
out: dict[str, str] = {"server": server}
|
|
239
|
+
if u.username:
|
|
240
|
+
out["username"] = u.username
|
|
241
|
+
out["password"] = u.password or ""
|
|
242
|
+
return out
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
class BrowserRegistry:
|
|
246
|
+
"""Holds one BrowserResource per unique BrowserNeeds across all loaded
|
|
247
|
+
automations. Built once at worker boot, queried by SlotRunner to attach
|
|
248
|
+
the right resource to each Context."""
|
|
249
|
+
|
|
250
|
+
def __init__(self) -> None:
|
|
251
|
+
self._resources: dict[BrowserNeeds, BrowserResource] = {}
|
|
252
|
+
self._started: bool = False
|
|
253
|
+
|
|
254
|
+
def register(self, needs: BrowserNeeds) -> BrowserResource:
|
|
255
|
+
"""Return the shared resource for these needs, creating one if needed.
|
|
256
|
+
Must be called before ``start_all``."""
|
|
257
|
+
if needs in self._resources:
|
|
258
|
+
return self._resources[needs]
|
|
259
|
+
res = BrowserResource(needs=needs)
|
|
260
|
+
self._resources[needs] = res
|
|
261
|
+
return res
|
|
262
|
+
|
|
263
|
+
async def start_all(self) -> None:
|
|
264
|
+
"""Pre-warm every registered resource in parallel."""
|
|
265
|
+
if self._started:
|
|
266
|
+
return
|
|
267
|
+
if not self._resources:
|
|
268
|
+
self._started = True
|
|
269
|
+
return
|
|
270
|
+
await asyncio.gather(*(r.start() for r in self._resources.values()))
|
|
271
|
+
self._started = True
|
|
272
|
+
|
|
273
|
+
async def stop_all(self) -> None:
|
|
274
|
+
"""Stop every resource. Idempotent."""
|
|
275
|
+
if not self._started:
|
|
276
|
+
return
|
|
277
|
+
await asyncio.gather(
|
|
278
|
+
*(r.stop() for r in self._resources.values()),
|
|
279
|
+
return_exceptions=True,
|
|
280
|
+
)
|
|
281
|
+
self._started = False
|
|
282
|
+
self._resources.clear()
|
|
283
|
+
|
|
284
|
+
def __len__(self) -> int:
|
|
285
|
+
return len(self._resources)
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
__all__ = [
|
|
289
|
+
"BrowserNeeds",
|
|
290
|
+
"BrowserRegistry",
|
|
291
|
+
"BrowserResource",
|
|
292
|
+
]
|
|
@@ -9,6 +9,7 @@ from pydantic import BaseModel
|
|
|
9
9
|
|
|
10
10
|
if TYPE_CHECKING:
|
|
11
11
|
from fleet.core.backend import Backend
|
|
12
|
+
from fleet.core.browser import BrowserResource
|
|
12
13
|
from fleet.core.config import BaseConfig
|
|
13
14
|
from fleet.core.contract import Pool, Queue, Stream
|
|
14
15
|
from fleet.core.events import EventBus
|
|
@@ -37,6 +38,7 @@ class Context(Generic[C]):
|
|
|
37
38
|
_automation_cls: Any = None # type[BaseAutomation], set by SlotRunner
|
|
38
39
|
_proxy: Optional["ProxyHandle"] = None
|
|
39
40
|
proxy_url: Optional[str] = None
|
|
41
|
+
browser: Optional["BrowserResource"] = None # set when automation declares Browser
|
|
40
42
|
extra: dict[str, Any] = field(default_factory=dict)
|
|
41
43
|
|
|
42
44
|
# ---- proxy ----
|
|
@@ -29,6 +29,7 @@ from fleet.core.automation import (
|
|
|
29
29
|
ContinuousAutomation,
|
|
30
30
|
Task,
|
|
31
31
|
)
|
|
32
|
+
from fleet.core.browser import BrowserRegistry
|
|
32
33
|
from fleet.core.context import Context
|
|
33
34
|
from fleet.core.memory_backend import InMemoryBackend
|
|
34
35
|
from fleet.core.metrics import SlotMetrics
|
|
@@ -72,8 +73,12 @@ def _build_context(
|
|
|
72
73
|
log: logging.Logger,
|
|
73
74
|
worker_id: str,
|
|
74
75
|
slot_id: int = 0,
|
|
76
|
+
browser_registry: Optional[BrowserRegistry] = None,
|
|
75
77
|
) -> tuple[Context, asyncio.Event]:
|
|
76
78
|
shutdown = asyncio.Event()
|
|
79
|
+
browser_resource = None
|
|
80
|
+
if browser_registry is not None and cls.Browser is not None:
|
|
81
|
+
browser_resource = browser_registry.register(cls.Browser)
|
|
77
82
|
ctx = Context(
|
|
78
83
|
automation_type=cls.automation_type,
|
|
79
84
|
worker_id=worker_id,
|
|
@@ -86,6 +91,7 @@ def _build_context(
|
|
|
86
91
|
_events=_StubEventBus(), # type: ignore[arg-type]
|
|
87
92
|
_metrics=SlotMetrics(),
|
|
88
93
|
_automation_cls=cls,
|
|
94
|
+
browser=browser_resource,
|
|
89
95
|
)
|
|
90
96
|
return ctx, shutdown
|
|
91
97
|
|
|
@@ -124,7 +130,19 @@ async def run_local(
|
|
|
124
130
|
]
|
|
125
131
|
|
|
126
132
|
emit_cb = emit or _default_emit
|
|
127
|
-
|
|
133
|
+
|
|
134
|
+
# Provision the browser pool BEFORE building the context so ctx.browser
|
|
135
|
+
# is ready by the time run_one() fires. Plugins that don't declare
|
|
136
|
+
# Browser get a noop registry and ctx.browser stays None.
|
|
137
|
+
browser_registry = BrowserRegistry()
|
|
138
|
+
if cls.Browser is not None:
|
|
139
|
+
browser_registry.register(cls.Browser)
|
|
140
|
+
await browser_registry.start_all()
|
|
141
|
+
|
|
142
|
+
ctx, _shutdown = _build_context(
|
|
143
|
+
cls, cfg, emit_cb, backend, log, worker_id,
|
|
144
|
+
browser_registry=browser_registry,
|
|
145
|
+
)
|
|
128
146
|
|
|
129
147
|
sem = asyncio.Semaphore(max(1, concurrency))
|
|
130
148
|
ok = 0
|
|
@@ -154,6 +172,7 @@ async def run_local(
|
|
|
154
172
|
await automation.cleanup(ctx)
|
|
155
173
|
except Exception:
|
|
156
174
|
log.exception("cleanup() raised")
|
|
175
|
+
await browser_registry.stop_all()
|
|
157
176
|
await backend.aclose()
|
|
158
177
|
|
|
159
178
|
return {"ok": ok, "failed": failed}
|
|
@@ -193,8 +212,16 @@ async def run_local_continuous(
|
|
|
193
212
|
emit_count += 1
|
|
194
213
|
await (emit or _default_emit)(payload)
|
|
195
214
|
|
|
215
|
+
browser_registry = BrowserRegistry()
|
|
216
|
+
if cls.Browser is not None:
|
|
217
|
+
browser_registry.register(cls.Browser)
|
|
218
|
+
await browser_registry.start_all()
|
|
219
|
+
|
|
196
220
|
contexts = [
|
|
197
|
-
_build_context(
|
|
221
|
+
_build_context(
|
|
222
|
+
cls, cfg, _counting_emit, backend, log, worker_id,
|
|
223
|
+
slot_id=i, browser_registry=browser_registry,
|
|
224
|
+
)
|
|
198
225
|
for i in range(max(1, slots))
|
|
199
226
|
]
|
|
200
227
|
|
|
@@ -226,6 +253,7 @@ async def run_local_continuous(
|
|
|
226
253
|
await automation.cleanup(contexts[0][0])
|
|
227
254
|
except Exception:
|
|
228
255
|
log.exception("cleanup() raised")
|
|
256
|
+
await browser_registry.stop_all()
|
|
229
257
|
await backend.aclose()
|
|
230
258
|
|
|
231
259
|
return {"emits": emit_count}
|
|
@@ -12,6 +12,7 @@ import httpx
|
|
|
12
12
|
import psutil
|
|
13
13
|
|
|
14
14
|
from fleet.core.automation import BaseAutomation, load_entry_points
|
|
15
|
+
from fleet.core.browser import BrowserRegistry
|
|
15
16
|
from fleet.core.protocol import ConfigChanged, Drain, Hardware
|
|
16
17
|
from fleet.worker.reconcile_loop import ReconcileLoop
|
|
17
18
|
from fleet.worker.slot_runner import SlotRunner
|
|
@@ -56,13 +57,24 @@ class Agent:
|
|
|
56
57
|
self._slot_runner: SlotRunner | None = None
|
|
57
58
|
self._stop = asyncio.Event()
|
|
58
59
|
|
|
60
|
+
# Shared resources for plugins to consume via ctx. Browser pool is
|
|
61
|
+
# the only one for now; future ones (HTTP session, cache, etc.) can
|
|
62
|
+
# follow the same opt-in declaration pattern on BaseAutomation.
|
|
63
|
+
self._browser_registry = BrowserRegistry()
|
|
64
|
+
if self._auto.Browser is not None:
|
|
65
|
+
self._browser_registry.register(self._auto.Browser)
|
|
66
|
+
|
|
59
67
|
async def run(self) -> None:
|
|
60
68
|
ws_url = (
|
|
61
69
|
self._master_url.replace("http://", "ws://").replace("https://", "wss://")
|
|
62
70
|
+ f"/api/v1/automations/{self._auto.automation_type}/workers/{self._worker_id}/control"
|
|
63
71
|
)
|
|
72
|
+
# Pre-warm shared resources BEFORE accepting slot work.
|
|
73
|
+
await self._browser_registry.start_all()
|
|
74
|
+
|
|
64
75
|
self._slot_runner = SlotRunner(
|
|
65
|
-
self._auto, self._worker_id, self._redis_url, self._on_output
|
|
76
|
+
self._auto, self._worker_id, self._redis_url, self._on_output,
|
|
77
|
+
browser_registry=self._browser_registry,
|
|
66
78
|
)
|
|
67
79
|
self._reconcile = ReconcileLoop(self._slot_runner.make_task, self._cfg_cls)
|
|
68
80
|
|
|
@@ -88,6 +100,7 @@ class Agent:
|
|
|
88
100
|
await self._reconcile.stop()
|
|
89
101
|
if self._ws is not None:
|
|
90
102
|
await self._ws.stop()
|
|
103
|
+
await self._browser_registry.stop_all()
|
|
91
104
|
|
|
92
105
|
async def stop(self) -> None:
|
|
93
106
|
self._stop.set()
|
|
@@ -14,6 +14,7 @@ from fleet.core.automation import (
|
|
|
14
14
|
Task,
|
|
15
15
|
)
|
|
16
16
|
from fleet.core.backend import Backend, backend_from_url
|
|
17
|
+
from fleet.core.browser import BrowserRegistry
|
|
17
18
|
from fleet.core.context import Context
|
|
18
19
|
from fleet.core.events import EventBus
|
|
19
20
|
from fleet.core.logging import bind_log_context, reset_log_context
|
|
@@ -39,6 +40,8 @@ class SlotRunner:
|
|
|
39
40
|
worker_id: str,
|
|
40
41
|
redis_url: str,
|
|
41
42
|
on_output: Any, # async (slot_id, payload_dict) -> None
|
|
43
|
+
*,
|
|
44
|
+
browser_registry: Optional[BrowserRegistry] = None,
|
|
42
45
|
) -> None:
|
|
43
46
|
self._auto = automation
|
|
44
47
|
self._auto_cls = type(automation)
|
|
@@ -47,6 +50,7 @@ class SlotRunner:
|
|
|
47
50
|
self._on_output = on_output
|
|
48
51
|
self._backend: Optional[Backend] = None
|
|
49
52
|
self._provider_cache: Optional[tuple[str, str, ProxyProvider]] = None
|
|
53
|
+
self._browser_registry = browser_registry
|
|
50
54
|
|
|
51
55
|
async def _ensure_backend(self) -> Backend:
|
|
52
56
|
if self._backend is None:
|
|
@@ -77,6 +81,13 @@ class SlotRunner:
|
|
|
77
81
|
except Exception:
|
|
78
82
|
log.exception("provider acquire failed; running without proxy")
|
|
79
83
|
|
|
84
|
+
# Resolve browser resource from registry if the automation declared
|
|
85
|
+
# a Browser need. Same instance is shared across all slots of this
|
|
86
|
+
# automation in this worker — the pool is process-wide.
|
|
87
|
+
browser_resource = None
|
|
88
|
+
if self._browser_registry is not None and self._auto_cls.Browser is not None:
|
|
89
|
+
browser_resource = self._browser_registry.register(self._auto_cls.Browser)
|
|
90
|
+
|
|
80
91
|
ctx = Context(
|
|
81
92
|
automation_type=self._auto.automation_type,
|
|
82
93
|
worker_id=self._worker_id,
|
|
@@ -91,6 +102,7 @@ class SlotRunner:
|
|
|
91
102
|
_automation_cls=self._auto_cls,
|
|
92
103
|
_proxy=proxy_handle,
|
|
93
104
|
proxy_url=proxy_url,
|
|
105
|
+
browser=browser_resource,
|
|
94
106
|
)
|
|
95
107
|
|
|
96
108
|
if isinstance(self._auto, ContinuousAutomation):
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from fleet_browser.browser import BrowserConfig, ChromiumWorker
|
|
2
|
+
from fleet_browser.display import ensure_virtual_display
|
|
2
3
|
from fleet_browser.fingerprint import Fingerprint, FingerprintFactory
|
|
3
|
-
from fleet_browser.
|
|
4
|
-
from fleet_browser.pool import BrowserPool, slot
|
|
5
|
-
from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
|
|
4
|
+
from fleet_browser.pool import BrowserPool, make_browser_pool, slot
|
|
6
5
|
from fleet_browser.smart_router import DEFAULT_RULES, RouterStats, SmartRouter, SmartRule
|
|
7
6
|
from fleet_browser.stealth import FingerprintStealth, NoOpStealth, Stealth
|
|
8
7
|
|
|
@@ -14,13 +13,12 @@ __all__ = [
|
|
|
14
13
|
"Fingerprint",
|
|
15
14
|
"FingerprintFactory",
|
|
16
15
|
"FingerprintStealth",
|
|
17
|
-
"Humanizer",
|
|
18
16
|
"NoOpStealth",
|
|
19
17
|
"RouterStats",
|
|
20
18
|
"SmartRouter",
|
|
21
19
|
"SmartRule",
|
|
22
20
|
"Stealth",
|
|
23
|
-
"
|
|
24
|
-
"
|
|
21
|
+
"ensure_virtual_display",
|
|
22
|
+
"make_browser_pool",
|
|
25
23
|
"slot",
|
|
26
24
|
]
|