fleet-framework 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/PKG-INFO +5 -2
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/cli.py +228 -0
- fleet_framework-0.1.2/fleet/core/local_runner.py +234 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/browser.py +66 -3
- fleet_framework-0.1.2/fleet_browser/cloak.py +120 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/fingerprint.py +6 -1
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_cloudflare/replay.py +6 -6
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/PKG-INFO +5 -2
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/SOURCES.txt +2 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/requires.txt +5 -1
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/pyproject.toml +6 -2
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/LICENSE +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/README.md +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/backend.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/config.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/context.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/contract.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/country_presets.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/events.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/logging.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/memory_backend.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/metrics.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/otel.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/primitives.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/protocol.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/proxy.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/reconcile.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/sqlite_backend.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/core/store.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/api.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/app.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/auth.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/broadcaster.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/dashboard/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/dashboard/router.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/dashboard/static/style.css +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/dashboard/templates/index.html +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/metrics_route.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/ratelimit.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/master/ws_router.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/worker/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/worker/agent.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/worker/reconcile_loop.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/worker/slot_runner.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet/worker/ws_client.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/cert.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/humanizer.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/pool.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/proxy_extension.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/solver.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_browser/stealth.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_cloudflare/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_cloudflare/bypasser.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_cloudflare/harvest.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_cloudflare/solver.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_content/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_content/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_content/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_detect/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_detect/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_detect/detect.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/dependency_links.txt +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/entry_points.txt +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/top_level.txt +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_headers/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_headers/profiles.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_jobs/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_jobs/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_jobs/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_marketplace/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_marketplace/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_marketplace/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_news/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_news/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_news/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_place/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_place/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_place/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_provider_dataimpulse/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_provider_evomi/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_serp/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_serp/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_serp/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_social/__init__.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_social/automation.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_social/contracts.py +0 -0
- {fleet_framework-0.1.0 → fleet_framework-0.1.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -23,6 +23,9 @@ Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
|
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
24
|
Provides-Extra: cloudflare
|
|
25
25
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
26
|
+
Provides-Extra: cloak
|
|
27
|
+
Requires-Dist: fleet-framework[browser]; extra == "cloak"
|
|
28
|
+
Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
|
|
26
29
|
Provides-Extra: otel
|
|
27
30
|
Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
|
|
28
31
|
Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
|
|
@@ -31,7 +34,7 @@ Provides-Extra: test
|
|
|
31
34
|
Requires-Dist: pytest>=8.0; extra == "test"
|
|
32
35
|
Requires-Dist: pytest-asyncio>=0.23; extra == "test"
|
|
33
36
|
Provides-Extra: all
|
|
34
|
-
Requires-Dist: fleet-framework[browser,cloudflare,otel]; extra == "all"
|
|
37
|
+
Requires-Dist: fleet-framework[browser,cloak,cloudflare,otel]; extra == "all"
|
|
35
38
|
Dynamic: license-file
|
|
36
39
|
|
|
37
40
|
# Fleet
|
|
@@ -208,6 +208,234 @@ def submit(
|
|
|
208
208
|
click.echo(json.dumps(r.json()))
|
|
209
209
|
|
|
210
210
|
|
|
211
|
+
@cli.command(name="describe")
|
|
212
|
+
@click.argument("automation_type")
|
|
213
|
+
def describe_cmd(automation_type: str) -> None:
|
|
214
|
+
"""Print an automation's TaskPayload, Config and Output schemas.
|
|
215
|
+
|
|
216
|
+
Useful before `fleet run`: shows you which `-k key=value` pairs the
|
|
217
|
+
automation accepts.
|
|
218
|
+
"""
|
|
219
|
+
from fleet.core.automation import (
|
|
220
|
+
BatchAutomation,
|
|
221
|
+
ContinuousAutomation,
|
|
222
|
+
catalog_doc,
|
|
223
|
+
load_entry_points,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
reg = load_entry_points()
|
|
227
|
+
cls = reg.get(automation_type)
|
|
228
|
+
if cls is None:
|
|
229
|
+
available = ", ".join(sorted(reg)) or "(none installed)"
|
|
230
|
+
click.echo(
|
|
231
|
+
f"automation '{automation_type}' not registered. available: {available}",
|
|
232
|
+
err=True,
|
|
233
|
+
)
|
|
234
|
+
sys.exit(2)
|
|
235
|
+
|
|
236
|
+
if issubclass(cls, BatchAutomation):
|
|
237
|
+
kind = "BatchAutomation"
|
|
238
|
+
elif issubclass(cls, ContinuousAutomation):
|
|
239
|
+
kind = "ContinuousAutomation"
|
|
240
|
+
else:
|
|
241
|
+
kind = cls.__mro__[1].__name__
|
|
242
|
+
|
|
243
|
+
doc = catalog_doc(cls)
|
|
244
|
+
click.echo(f"name: {cls.automation_type}")
|
|
245
|
+
click.echo(f"class: {cls.__module__}.{cls.__name__}")
|
|
246
|
+
click.echo(f"kind: {kind}")
|
|
247
|
+
click.echo("")
|
|
248
|
+
click.echo("config schema:")
|
|
249
|
+
click.echo(json.dumps(doc["config"], indent=2))
|
|
250
|
+
if "queue" in doc:
|
|
251
|
+
click.echo("")
|
|
252
|
+
click.echo(f"task payload schema (the shape -k / -p builds):")
|
|
253
|
+
click.echo(json.dumps(doc["queue"]["payload"], indent=2))
|
|
254
|
+
if "stream" in doc:
|
|
255
|
+
click.echo("")
|
|
256
|
+
click.echo("emit (Output) schema:")
|
|
257
|
+
click.echo(json.dumps(doc["stream"]["payload"], indent=2))
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _coerce_kv(value: str) -> object:
|
|
261
|
+
"""Best-effort JSON, falling back to the raw string. So `--kv n=10` is an
|
|
262
|
+
int while `--kv q=test` is a string and `--kv tags=[\"a\",\"b\"]` is a list."""
|
|
263
|
+
try:
|
|
264
|
+
return json.loads(value)
|
|
265
|
+
except (json.JSONDecodeError, ValueError):
|
|
266
|
+
return value
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _parse_range(spec: str) -> list[int]:
|
|
270
|
+
"""`'1-10'` → [1..10]; `'3'` → [3]."""
|
|
271
|
+
if "-" not in spec:
|
|
272
|
+
return [int(spec)]
|
|
273
|
+
lo, hi = spec.split("-", 1)
|
|
274
|
+
return list(range(int(lo), int(hi) + 1))
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
@cli.command(name="run", context_settings={"show_default": True})
|
|
278
|
+
@click.argument("automation_type")
|
|
279
|
+
@click.option("-k", "--kv", "kvs", multiple=True, metavar="KEY=VALUE",
|
|
280
|
+
help="Set a payload field; repeatable. Values are JSON-coerced "
|
|
281
|
+
"(so `n=10` is int, `q=test` is str). Combined into a base "
|
|
282
|
+
"payload that --pages / --repeat / -p expand from.")
|
|
283
|
+
@click.option("-p", "--payload", "payloads_json", multiple=True, metavar="JSON",
|
|
284
|
+
help="One full JSON payload per occurrence. Each becomes a "
|
|
285
|
+
"separate task. Skips the --kv base payload.")
|
|
286
|
+
@click.option("--pages", "pages_spec", default=None, metavar="N-M",
|
|
287
|
+
help="Expand the base payload into one task per integer page in "
|
|
288
|
+
"[N, M], setting `page=N` on each.")
|
|
289
|
+
@click.option("--repeat", "repeat_n", default=None, type=int,
|
|
290
|
+
help="Submit the base payload N times.")
|
|
291
|
+
@click.option("--from-jsonl", "from_jsonl", default=None, type=click.Path(exists=True, dir_okay=False),
|
|
292
|
+
help="Read one JSON payload per line; each line becomes one task.")
|
|
293
|
+
@click.option("--config", "config_json", default=None, metavar="JSON",
|
|
294
|
+
help="JSON config override for the automation's Config schema.")
|
|
295
|
+
@click.option("--concurrency", default=1, type=int,
|
|
296
|
+
help="Max parallel tasks. Defaults to 1 for deterministic output.")
|
|
297
|
+
@click.option("--timeout", default=None, type=float,
|
|
298
|
+
help="Per-task wall-clock timeout, in seconds.")
|
|
299
|
+
@click.option("--duration", "duration_seconds", default=None, type=float,
|
|
300
|
+
help="ContinuousAutomation only: run for N seconds then stop. "
|
|
301
|
+
"Default 5s when running a ContinuousAutomation.")
|
|
302
|
+
@click.option("--slots", "slots", default=1, type=int,
|
|
303
|
+
help="ContinuousAutomation only: number of parallel slot "
|
|
304
|
+
"coroutines to spawn.")
|
|
305
|
+
def run_cmd(
|
|
306
|
+
automation_type: str,
|
|
307
|
+
kvs: tuple[str, ...],
|
|
308
|
+
payloads_json: tuple[str, ...],
|
|
309
|
+
pages_spec: str | None,
|
|
310
|
+
repeat_n: int | None,
|
|
311
|
+
from_jsonl: str | None,
|
|
312
|
+
config_json: str | None,
|
|
313
|
+
concurrency: int,
|
|
314
|
+
timeout: float | None,
|
|
315
|
+
duration_seconds: float | None,
|
|
316
|
+
slots: int,
|
|
317
|
+
) -> None:
|
|
318
|
+
"""Run an automation in-process for testing. No master/worker needed.
|
|
319
|
+
|
|
320
|
+
Each `ctx.emit(...)` is printed to stdout as one JSON line.
|
|
321
|
+
|
|
322
|
+
Works for both BatchAutomation and ContinuousAutomation; use
|
|
323
|
+
--pages/--repeat/-p for batch, --duration/--slots for continuous.
|
|
324
|
+
|
|
325
|
+
Examples:
|
|
326
|
+
|
|
327
|
+
Batch (run one task per page):
|
|
328
|
+
fleet run serp-bing -k q=anthropic -k locale=en-US --pages 1-3
|
|
329
|
+
fleet run serp-bing -p '{"q":"anthropic","page":1}'
|
|
330
|
+
fleet run cf-harvester -k url=https://example.com --repeat 5
|
|
331
|
+
fleet run serp-bing --from-jsonl queries.jsonl
|
|
332
|
+
|
|
333
|
+
Continuous (run slot for N seconds):
|
|
334
|
+
fleet run hello-world --duration 5 --slots 2
|
|
335
|
+
fleet run news-firehose --duration 30 -k feeds=https://...
|
|
336
|
+
|
|
337
|
+
Use `fleet describe <type>` to see the payload + config schemas.
|
|
338
|
+
"""
|
|
339
|
+
from fleet.core.automation import BatchAutomation, ContinuousAutomation, load_entry_points
|
|
340
|
+
from fleet.core.local_runner import run_local, run_local_continuous
|
|
341
|
+
|
|
342
|
+
reg = load_entry_points()
|
|
343
|
+
cls = reg.get(automation_type)
|
|
344
|
+
if cls is None:
|
|
345
|
+
available = ", ".join(sorted(reg)) or "(none installed)"
|
|
346
|
+
click.echo(
|
|
347
|
+
f"automation '{automation_type}' not registered. available: {available}",
|
|
348
|
+
err=True,
|
|
349
|
+
)
|
|
350
|
+
sys.exit(2)
|
|
351
|
+
|
|
352
|
+
config = None
|
|
353
|
+
if config_json:
|
|
354
|
+
try:
|
|
355
|
+
config = json.loads(config_json)
|
|
356
|
+
except json.JSONDecodeError as e:
|
|
357
|
+
click.echo(f"--config: {e}", err=True)
|
|
358
|
+
sys.exit(2)
|
|
359
|
+
|
|
360
|
+
base: dict[str, object] = {}
|
|
361
|
+
for spec in kvs:
|
|
362
|
+
if "=" not in spec:
|
|
363
|
+
click.echo(f"--kv expects KEY=VALUE, got: {spec!r}", err=True)
|
|
364
|
+
sys.exit(2)
|
|
365
|
+
k, v = spec.split("=", 1)
|
|
366
|
+
base[k] = _coerce_kv(v)
|
|
367
|
+
|
|
368
|
+
if issubclass(cls, ContinuousAutomation):
|
|
369
|
+
cont_config = config if config is not None else (base or None)
|
|
370
|
+
try:
|
|
371
|
+
summary = asyncio.run(run_local_continuous(
|
|
372
|
+
cls,
|
|
373
|
+
duration_seconds=duration_seconds if duration_seconds is not None else 5.0,
|
|
374
|
+
config=cont_config,
|
|
375
|
+
slots=slots,
|
|
376
|
+
))
|
|
377
|
+
except (ValueError, TypeError) as e:
|
|
378
|
+
click.echo(f"error: {e}", err=True)
|
|
379
|
+
sys.exit(1)
|
|
380
|
+
click.echo(f"# ran continuous: {summary['emits']} emits", err=True)
|
|
381
|
+
return
|
|
382
|
+
|
|
383
|
+
if not issubclass(cls, BatchAutomation):
|
|
384
|
+
click.echo(
|
|
385
|
+
f"'{automation_type}' is neither BatchAutomation nor ContinuousAutomation",
|
|
386
|
+
err=True,
|
|
387
|
+
)
|
|
388
|
+
sys.exit(1)
|
|
389
|
+
|
|
390
|
+
payloads: list[dict] = []
|
|
391
|
+
if payloads_json:
|
|
392
|
+
for raw in payloads_json:
|
|
393
|
+
try:
|
|
394
|
+
payloads.append(json.loads(raw))
|
|
395
|
+
except json.JSONDecodeError as e:
|
|
396
|
+
click.echo(f"--payload {raw!r}: {e}", err=True)
|
|
397
|
+
sys.exit(2)
|
|
398
|
+
if from_jsonl:
|
|
399
|
+
with open(from_jsonl) as f:
|
|
400
|
+
for lineno, line in enumerate(f, 1):
|
|
401
|
+
line = line.strip()
|
|
402
|
+
if not line:
|
|
403
|
+
continue
|
|
404
|
+
try:
|
|
405
|
+
payloads.append(json.loads(line))
|
|
406
|
+
except json.JSONDecodeError as e:
|
|
407
|
+
click.echo(f"{from_jsonl}:{lineno}: {e}", err=True)
|
|
408
|
+
sys.exit(2)
|
|
409
|
+
if pages_spec:
|
|
410
|
+
for page in _parse_range(pages_spec):
|
|
411
|
+
payloads.append({**base, "page": page})
|
|
412
|
+
elif repeat_n is not None:
|
|
413
|
+
for _ in range(repeat_n):
|
|
414
|
+
payloads.append({**base})
|
|
415
|
+
elif not payloads:
|
|
416
|
+
payloads.append(base)
|
|
417
|
+
|
|
418
|
+
try:
|
|
419
|
+
summary = asyncio.run(run_local(
|
|
420
|
+
cls,
|
|
421
|
+
payloads,
|
|
422
|
+
config=config,
|
|
423
|
+
concurrency=concurrency,
|
|
424
|
+
timeout=timeout,
|
|
425
|
+
))
|
|
426
|
+
except (ValueError, TypeError) as e:
|
|
427
|
+
click.echo(f"error: {e}", err=True)
|
|
428
|
+
sys.exit(1)
|
|
429
|
+
|
|
430
|
+
click.echo(
|
|
431
|
+
f"# ran {summary['ok']} ok, {summary['failed']} failed "
|
|
432
|
+
f"({len(payloads)} total)",
|
|
433
|
+
err=True,
|
|
434
|
+
)
|
|
435
|
+
if summary["failed"] and summary["ok"] == 0:
|
|
436
|
+
sys.exit(1)
|
|
437
|
+
|
|
438
|
+
|
|
211
439
|
@cli.command()
|
|
212
440
|
@click.option("--master-url", envvar="MASTER_URL", default=None)
|
|
213
441
|
@click.option("--admin-token", envvar="FLEET_ADMIN_TOKEN", default=None)
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
"""Run an automation in-process without a master/worker/backend stack.
|
|
2
|
+
|
|
3
|
+
For CLI smoke-tests (`fleet run <type>`) and unit-style integration: spins
|
|
4
|
+
up an InMemoryBackend, stubs the event bus, validates payload/config
|
|
5
|
+
against the automation's declared schemas, and drives the right entry
|
|
6
|
+
point. Each `ctx.emit(...)` is forwarded to an emit callback (stdout by
|
|
7
|
+
default).
|
|
8
|
+
|
|
9
|
+
- BatchAutomation: drives `run_one` once per payload, returns when all done.
|
|
10
|
+
- ContinuousAutomation: drives `run_slot` for `duration_seconds` then signals
|
|
11
|
+
shutdown and returns.
|
|
12
|
+
|
|
13
|
+
Not suitable for production — there's no DLQ, no retries, no observability.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import asyncio
|
|
19
|
+
import json
|
|
20
|
+
import logging
|
|
21
|
+
import sys
|
|
22
|
+
from typing import Any, AsyncIterator, Awaitable, Callable, Optional
|
|
23
|
+
|
|
24
|
+
from pydantic import BaseModel, ValidationError
|
|
25
|
+
|
|
26
|
+
from fleet.core.automation import (
|
|
27
|
+
BaseAutomation,
|
|
28
|
+
BatchAutomation,
|
|
29
|
+
ContinuousAutomation,
|
|
30
|
+
Task,
|
|
31
|
+
)
|
|
32
|
+
from fleet.core.context import Context
|
|
33
|
+
from fleet.core.memory_backend import InMemoryBackend
|
|
34
|
+
from fleet.core.metrics import SlotMetrics
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class _StubEventBus:
|
|
38
|
+
"""No-op event bus for in-process runs. publish/subscribe go nowhere."""
|
|
39
|
+
|
|
40
|
+
async def publish(self, topic: str, payload: dict[str, Any]) -> None:
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
async def subscribe(self, topic: str) -> AsyncIterator[dict[str, Any]]:
|
|
44
|
+
if False:
|
|
45
|
+
yield # type: ignore[unreachable]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _coerce_payload(cls: type[BaseAutomation], raw: dict[str, Any]) -> BaseModel:
|
|
49
|
+
if cls.TaskPayload is None:
|
|
50
|
+
raise TypeError(
|
|
51
|
+
f"{cls.automation_type}: automation has no TaskPayload schema; "
|
|
52
|
+
"run_local() requires a BatchAutomation"
|
|
53
|
+
)
|
|
54
|
+
try:
|
|
55
|
+
return cls.TaskPayload.model_validate(raw)
|
|
56
|
+
except ValidationError as e:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"payload does not match {cls.TaskPayload.__name__} schema: {e}"
|
|
59
|
+
) from e
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
async def _default_emit(payload: dict[str, Any]) -> None:
|
|
63
|
+
sys.stdout.write(json.dumps(payload, default=str) + "\n")
|
|
64
|
+
sys.stdout.flush()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _build_context(
|
|
68
|
+
cls: type[BaseAutomation],
|
|
69
|
+
cfg: Any,
|
|
70
|
+
emit_cb: Callable[[dict[str, Any]], Awaitable[None]],
|
|
71
|
+
backend: InMemoryBackend,
|
|
72
|
+
log: logging.Logger,
|
|
73
|
+
worker_id: str,
|
|
74
|
+
slot_id: int = 0,
|
|
75
|
+
) -> tuple[Context, asyncio.Event]:
|
|
76
|
+
shutdown = asyncio.Event()
|
|
77
|
+
ctx = Context(
|
|
78
|
+
automation_type=cls.automation_type,
|
|
79
|
+
worker_id=worker_id,
|
|
80
|
+
slot_id=slot_id,
|
|
81
|
+
config=cfg,
|
|
82
|
+
shutdown=shutdown,
|
|
83
|
+
logger=log,
|
|
84
|
+
_emit_raw=emit_cb,
|
|
85
|
+
_backend=backend,
|
|
86
|
+
_events=_StubEventBus(), # type: ignore[arg-type]
|
|
87
|
+
_metrics=SlotMetrics(),
|
|
88
|
+
_automation_cls=cls,
|
|
89
|
+
)
|
|
90
|
+
return ctx, shutdown
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def run_local(
|
|
94
|
+
cls: type[BaseAutomation],
|
|
95
|
+
payloads: list[dict[str, Any]],
|
|
96
|
+
*,
|
|
97
|
+
config: Optional[dict[str, Any]] = None,
|
|
98
|
+
emit: Optional[Callable[[dict[str, Any]], Awaitable[None]]] = None,
|
|
99
|
+
worker_id: str = "local",
|
|
100
|
+
concurrency: int = 1,
|
|
101
|
+
timeout: Optional[float] = None,
|
|
102
|
+
logger: Optional[logging.Logger] = None,
|
|
103
|
+
) -> dict[str, int]:
|
|
104
|
+
"""Drive `cls.run_one` for each payload in-process.
|
|
105
|
+
|
|
106
|
+
Returns a `{ok, failed}` summary. Validation errors raise before any
|
|
107
|
+
task runs; per-task runtime errors are logged and counted as `failed`.
|
|
108
|
+
"""
|
|
109
|
+
if not issubclass(cls, BatchAutomation):
|
|
110
|
+
raise TypeError(
|
|
111
|
+
f"{cls.__name__} must be a BatchAutomation to run via run_local "
|
|
112
|
+
f"(got {cls.__mro__[1].__name__})"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
automation = cls()
|
|
116
|
+
backend = InMemoryBackend()
|
|
117
|
+
log = logger or logging.getLogger(cls.automation_type or cls.__name__)
|
|
118
|
+
|
|
119
|
+
cfg = cls.Config.model_validate(config or {})
|
|
120
|
+
|
|
121
|
+
tasks = [
|
|
122
|
+
Task(id=f"local-{i}", payload=_coerce_payload(cls, p))
|
|
123
|
+
for i, p in enumerate(payloads)
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
emit_cb = emit or _default_emit
|
|
127
|
+
ctx, _shutdown = _build_context(cls, cfg, emit_cb, backend, log, worker_id)
|
|
128
|
+
|
|
129
|
+
sem = asyncio.Semaphore(max(1, concurrency))
|
|
130
|
+
ok = 0
|
|
131
|
+
failed = 0
|
|
132
|
+
|
|
133
|
+
async def _run_one(task: Task) -> None:
|
|
134
|
+
nonlocal ok, failed
|
|
135
|
+
async with sem:
|
|
136
|
+
try:
|
|
137
|
+
coro = automation.run_one(task, ctx) # type: ignore[attr-defined]
|
|
138
|
+
if timeout is not None:
|
|
139
|
+
await asyncio.wait_for(coro, timeout=timeout)
|
|
140
|
+
else:
|
|
141
|
+
await coro
|
|
142
|
+
ok += 1
|
|
143
|
+
except asyncio.TimeoutError:
|
|
144
|
+
failed += 1
|
|
145
|
+
log.error("task %s timed out after %.1fs", task.id, timeout)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
failed += 1
|
|
148
|
+
log.exception("task %s failed: %s", task.id, e)
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
await asyncio.gather(*(_run_one(t) for t in tasks))
|
|
152
|
+
finally:
|
|
153
|
+
try:
|
|
154
|
+
await automation.cleanup(ctx)
|
|
155
|
+
except Exception:
|
|
156
|
+
log.exception("cleanup() raised")
|
|
157
|
+
await backend.aclose()
|
|
158
|
+
|
|
159
|
+
return {"ok": ok, "failed": failed}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def run_local_continuous(
|
|
163
|
+
cls: type[BaseAutomation],
|
|
164
|
+
*,
|
|
165
|
+
duration_seconds: float = 5.0,
|
|
166
|
+
config: Optional[dict[str, Any]] = None,
|
|
167
|
+
emit: Optional[Callable[[dict[str, Any]], Awaitable[None]]] = None,
|
|
168
|
+
worker_id: str = "local",
|
|
169
|
+
slots: int = 1,
|
|
170
|
+
logger: Optional[logging.Logger] = None,
|
|
171
|
+
) -> dict[str, int]:
|
|
172
|
+
"""Drive `cls.run_slot` for `duration_seconds`, then signal shutdown.
|
|
173
|
+
|
|
174
|
+
Spawns `slots` parallel slot coroutines so you can smoke-test
|
|
175
|
+
cross-slot uniqueness (nonces, dedupe locks). Returns `{emits}` once
|
|
176
|
+
all slots have stopped.
|
|
177
|
+
"""
|
|
178
|
+
if not issubclass(cls, ContinuousAutomation):
|
|
179
|
+
raise TypeError(
|
|
180
|
+
f"{cls.__name__} must be a ContinuousAutomation to run via "
|
|
181
|
+
f"run_local_continuous (got {cls.__mro__[1].__name__})"
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
automation = cls()
|
|
185
|
+
backend = InMemoryBackend()
|
|
186
|
+
log = logger or logging.getLogger(cls.automation_type or cls.__name__)
|
|
187
|
+
cfg = cls.Config.model_validate(config or {})
|
|
188
|
+
|
|
189
|
+
emit_count = 0
|
|
190
|
+
|
|
191
|
+
async def _counting_emit(payload: dict[str, Any]) -> None:
|
|
192
|
+
nonlocal emit_count
|
|
193
|
+
emit_count += 1
|
|
194
|
+
await (emit or _default_emit)(payload)
|
|
195
|
+
|
|
196
|
+
contexts = [
|
|
197
|
+
_build_context(cls, cfg, _counting_emit, backend, log, worker_id, slot_id=i)
|
|
198
|
+
for i in range(max(1, slots))
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
async def _drive(ctx: Context, shutdown: asyncio.Event) -> None:
|
|
202
|
+
try:
|
|
203
|
+
await automation.run_slot(ctx) # type: ignore[attr-defined]
|
|
204
|
+
except asyncio.CancelledError:
|
|
205
|
+
pass
|
|
206
|
+
except Exception as e:
|
|
207
|
+
log.exception("slot %d crashed: %s", ctx.slot_id, e)
|
|
208
|
+
|
|
209
|
+
coros = [_drive(ctx, sd) for ctx, sd in contexts]
|
|
210
|
+
tasks = [asyncio.create_task(c) for c in coros]
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
await asyncio.sleep(duration_seconds)
|
|
214
|
+
finally:
|
|
215
|
+
for _ctx, sd in contexts:
|
|
216
|
+
sd.set()
|
|
217
|
+
# Give slots a brief grace period to notice shutdown.
|
|
218
|
+
try:
|
|
219
|
+
await asyncio.wait_for(asyncio.gather(*tasks, return_exceptions=True), timeout=2.0)
|
|
220
|
+
except asyncio.TimeoutError:
|
|
221
|
+
for t in tasks:
|
|
222
|
+
if not t.done():
|
|
223
|
+
t.cancel()
|
|
224
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
225
|
+
try:
|
|
226
|
+
await automation.cleanup(contexts[0][0])
|
|
227
|
+
except Exception:
|
|
228
|
+
log.exception("cleanup() raised")
|
|
229
|
+
await backend.aclose()
|
|
230
|
+
|
|
231
|
+
return {"emits": emit_count}
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
__all__ = ["run_local", "run_local_continuous"]
|
|
@@ -13,6 +13,12 @@ from typing import Optional
|
|
|
13
13
|
import psutil
|
|
14
14
|
from DrissionPage import ChromiumOptions, ChromiumPage
|
|
15
15
|
|
|
16
|
+
from fleet_browser.cloak import (
|
|
17
|
+
CLOAK_IGNORE_DEFAULT_ARGS,
|
|
18
|
+
cloak_stealth_args,
|
|
19
|
+
ensure_cloak_binary,
|
|
20
|
+
resolve_engine,
|
|
21
|
+
)
|
|
16
22
|
from fleet_browser.proxy_extension import build_proxy_auth_extension, parse_proxy_url
|
|
17
23
|
from fleet_browser.stealth import Stealth
|
|
18
24
|
|
|
@@ -120,6 +126,39 @@ class BrowserConfig:
|
|
|
120
126
|
|
|
121
127
|
extra_args: tuple[str, ...] = ()
|
|
122
128
|
|
|
129
|
+
engine: str = "auto"
|
|
130
|
+
"""Which Chromium build to launch. `"auto"` picks `"cloak"` when the
|
|
131
|
+
cloakbrowser package is installed, else falls back to `"chrome"`.
|
|
132
|
+
Set explicitly to override:
|
|
133
|
+
- `"chrome"`: use the system Chromium / Brave / Thorium auto-detect path
|
|
134
|
+
- `"cloak"`: use the CloakBrowser patched binary (downloads on first
|
|
135
|
+
use, raises if `cloakbrowser` isn't installed)
|
|
136
|
+
Cloak applies source-level canvas/WebGL/audio/font/WebRTC patches that
|
|
137
|
+
JS-level stealth can't reach. Stack FingerprintStealth on top to add
|
|
138
|
+
per-launch UA/version diversity from the uaforge corpus."""
|
|
139
|
+
|
|
140
|
+
cloak_cache_dir: Optional[str] = None
|
|
141
|
+
"""Override the cloak binary cache dir (default ~/.cloakbrowser/)."""
|
|
142
|
+
|
|
143
|
+
cloak_fingerprint_seed: Optional[int] = None
|
|
144
|
+
"""Force a specific cloak --fingerprint=N seed. Default: random per
|
|
145
|
+
launch. Pin a seed for reproducible tests."""
|
|
146
|
+
|
|
147
|
+
cloak_platform: str = "windows"
|
|
148
|
+
"""`--fingerprint-platform` flag for cloak's binary. Must match the
|
|
149
|
+
OS family FingerprintStealth's UA claims (uaforge default is Windows).
|
|
150
|
+
Set 'macos' if you switch uaforge to a Mac corpus."""
|
|
151
|
+
|
|
152
|
+
cloak_timezone: Optional[str] = None
|
|
153
|
+
"""IANA timezone for cloak's `--fingerprint-timezone` flag. Set when
|
|
154
|
+
using residential proxies so the spoofed timezone matches the egress
|
|
155
|
+
geo (otherwise CF flags the mismatch). Falls back to system tz."""
|
|
156
|
+
|
|
157
|
+
cloak_webrtc_ip: Optional[str] = None
|
|
158
|
+
"""Public IP cloak should advertise via WebRTC. Set to the proxy's exit
|
|
159
|
+
IP when using residential — otherwise WebRTC leaks your real LAN IP
|
|
160
|
+
and CF correlates the mismatch."""
|
|
161
|
+
|
|
123
162
|
|
|
124
163
|
class ChromiumWorker:
|
|
125
164
|
|
|
@@ -133,9 +172,16 @@ class ChromiumWorker:
|
|
|
133
172
|
if self.page is not None:
|
|
134
173
|
return self.page
|
|
135
174
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
175
|
+
engine = resolve_engine(self.config.engine)
|
|
176
|
+
if engine == "cloak":
|
|
177
|
+
# Caller may still pin browser_binary explicitly — honor it.
|
|
178
|
+
binary = self.config.browser_binary or ensure_cloak_binary(
|
|
179
|
+
self.config.cloak_cache_dir
|
|
180
|
+
)
|
|
181
|
+
else:
|
|
182
|
+
binary = self.config.browser_binary or _find_browser_binary(
|
|
183
|
+
prefer=self.config.prefer_browser,
|
|
184
|
+
)
|
|
139
185
|
|
|
140
186
|
self._user_data_dir = Path(tempfile.mkdtemp(prefix="chromium-worker-"))
|
|
141
187
|
self._owns_user_data_dir = True
|
|
@@ -147,6 +193,23 @@ class ChromiumWorker:
|
|
|
147
193
|
opts.set_user_data_path(str(self._user_data_dir))
|
|
148
194
|
opts.set_argument(f"--window-size={self.config.window_size[0]},{self.config.window_size[1]}")
|
|
149
195
|
|
|
196
|
+
if engine == "cloak":
|
|
197
|
+
for arg in cloak_stealth_args(
|
|
198
|
+
platform=self.config.cloak_platform,
|
|
199
|
+
timezone=self.config.cloak_timezone,
|
|
200
|
+
webrtc_ip=self.config.cloak_webrtc_ip,
|
|
201
|
+
seed=self.config.cloak_fingerprint_seed,
|
|
202
|
+
):
|
|
203
|
+
opts.set_argument(arg)
|
|
204
|
+
# Suppress Chromium defaults that leak automation signals.
|
|
205
|
+
# DrissionPage doesn't expose ignore_default_args; pass as
|
|
206
|
+
# --disable-features-replacement via plain args. The cloak
|
|
207
|
+
# binary recognises --no-enable-automation as an inversion.
|
|
208
|
+
for kill in CLOAK_IGNORE_DEFAULT_ARGS:
|
|
209
|
+
# Chromium accepts --disable-foo as the inverse of --enable-foo.
|
|
210
|
+
inv = kill.replace("--enable-", "--disable-", 1)
|
|
211
|
+
opts.set_argument(inv)
|
|
212
|
+
|
|
150
213
|
# HttpsUpgrades is off so self-signed HTTPS (TokenServer) isn't rewritten.
|
|
151
214
|
# Brave's ad/tracker/sync features both pollute the fingerprint and
|
|
152
215
|
# sometimes break Turnstile's iframe; disable them when running Brave.
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Cloak engine integration — uses the CloakBrowser patched Chromium binary.
|
|
2
|
+
|
|
3
|
+
CloakBrowser is a separately-licensed binary (free for own-business use,
|
|
4
|
+
OEM license required for browser-as-a-service distribution). See:
|
|
5
|
+
https://github.com/CloakHQ/CloakBrowser
|
|
6
|
+
|
|
7
|
+
The wrapper Python package is MIT-licensed and pulled in via the optional
|
|
8
|
+
extra: `pip install fleet-framework[cloak]`. The binary is downloaded on
|
|
9
|
+
first use to ~/.cloakbrowser/ (override with CLOAKBROWSER_CACHE_DIR).
|
|
10
|
+
|
|
11
|
+
Combining cloak (source-level canvas/WebGL/audio/font/WebRTC patches) with
|
|
12
|
+
FingerprintStealth (CDP-level UA + Sec-CH-UA + navigator overrides) gives:
|
|
13
|
+
- Deep, JS-undetectable spoofing of low-level signals (cloak)
|
|
14
|
+
- Per-launch diversity of UA versions from the uaforge corpus (FingerprintStealth)
|
|
15
|
+
|
|
16
|
+
That diversity matters: cloak's binary patches use the latest Chrome only,
|
|
17
|
+
so without the CDP layer every harvest looks like the same Chrome version.
|
|
18
|
+
With CDP override on top, each launch picks a different version from uaforge
|
|
19
|
+
while keeping the renderer-level fingerprint coherent.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
import os
|
|
25
|
+
import random
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Optional
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_CLOAK_AVAILABLE: Optional[bool] = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def cloak_available() -> bool:
|
|
36
|
+
"""Return True if the cloakbrowser wrapper package is importable.
|
|
37
|
+
|
|
38
|
+
The binary may still need to be downloaded — that's `ensure_cloak_binary`'s
|
|
39
|
+
job. This is just the cheap import probe.
|
|
40
|
+
"""
|
|
41
|
+
global _CLOAK_AVAILABLE
|
|
42
|
+
if _CLOAK_AVAILABLE is None:
|
|
43
|
+
try:
|
|
44
|
+
import cloakbrowser # noqa: F401
|
|
45
|
+
_CLOAK_AVAILABLE = True
|
|
46
|
+
except ImportError:
|
|
47
|
+
_CLOAK_AVAILABLE = False
|
|
48
|
+
return _CLOAK_AVAILABLE
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def resolve_engine(name: str) -> str:
|
|
52
|
+
"""Resolve `engine="auto"` to `"cloak"` if available, else `"chrome"`."""
|
|
53
|
+
if name != "auto":
|
|
54
|
+
return name
|
|
55
|
+
return "cloak" if cloak_available() else "chrome"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def ensure_cloak_binary(cache_dir: Optional[str] = None) -> str:
|
|
59
|
+
"""Return path to the patched Chromium binary; download if missing.
|
|
60
|
+
|
|
61
|
+
Delegates to cloakbrowser.download.ensure_binary. ~250-400 MB download
|
|
62
|
+
on first use; cached at ~/.cloakbrowser/ (or `cache_dir`) thereafter.
|
|
63
|
+
Cloakbrowser checks for updates hourly when launched.
|
|
64
|
+
"""
|
|
65
|
+
if not cloak_available():
|
|
66
|
+
raise RuntimeError(
|
|
67
|
+
"engine='cloak' requires cloakbrowser. "
|
|
68
|
+
"Install with: pip install fleet-framework[cloak]"
|
|
69
|
+
)
|
|
70
|
+
if cache_dir:
|
|
71
|
+
os.environ.setdefault("CLOAKBROWSER_CACHE_DIR", str(Path(cache_dir).expanduser()))
|
|
72
|
+
from cloakbrowser.download import ensure_binary
|
|
73
|
+
path = ensure_binary()
|
|
74
|
+
return str(path)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def cloak_stealth_args(
|
|
78
|
+
*,
|
|
79
|
+
platform: str = "windows",
|
|
80
|
+
timezone: Optional[str] = None,
|
|
81
|
+
webrtc_ip: Optional[str] = None,
|
|
82
|
+
seed: Optional[int] = None,
|
|
83
|
+
) -> tuple[str, ...]:
|
|
84
|
+
"""Return Chromium command-line flags that activate cloak's source patches.
|
|
85
|
+
|
|
86
|
+
The `seed` drives canvas/WebGL/audio noise variation per-launch. Pass a
|
|
87
|
+
stable seed if you need reproducibility (e.g. testing). `platform` should
|
|
88
|
+
match what FingerprintStealth's UA claims — keep them aligned (default
|
|
89
|
+
'windows' matches uaforge's Windows corpus).
|
|
90
|
+
"""
|
|
91
|
+
s = seed if seed is not None else random.randint(10000, 99999)
|
|
92
|
+
args = [
|
|
93
|
+
f"--fingerprint={s}",
|
|
94
|
+
f"--fingerprint-platform={platform}",
|
|
95
|
+
]
|
|
96
|
+
if timezone:
|
|
97
|
+
args.append(f"--fingerprint-timezone={timezone}")
|
|
98
|
+
if webrtc_ip:
|
|
99
|
+
args.append(f"--fingerprint-webrtc-ip={webrtc_ip}")
|
|
100
|
+
return tuple(args)
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
CLOAK_IGNORE_DEFAULT_ARGS: tuple[str, ...] = (
|
|
104
|
+
"--enable-automation",
|
|
105
|
+
"--enable-unsafe-swiftshader",
|
|
106
|
+
)
|
|
107
|
+
"""Default args Playwright/DrissionPage pass that leak automation signals.
|
|
108
|
+
|
|
109
|
+
`--enable-automation` exposes `navigator.webdriver=true`. `--enable-unsafe-
|
|
110
|
+
swiftshader` forces SwiftShader's distinctive WebGL renderer string. We pass
|
|
111
|
+
these to Chromium's launch options so DrissionPage stops setting them."""
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
__all__ = [
|
|
115
|
+
"CLOAK_IGNORE_DEFAULT_ARGS",
|
|
116
|
+
"cloak_available",
|
|
117
|
+
"cloak_stealth_args",
|
|
118
|
+
"ensure_cloak_binary",
|
|
119
|
+
"resolve_engine",
|
|
120
|
+
]
|
|
@@ -180,7 +180,12 @@ class FingerprintFactory:
|
|
|
180
180
|
"pip install git+https://github.com/sarperavci/uaforge.git"
|
|
181
181
|
)
|
|
182
182
|
if allowed_os is None:
|
|
183
|
-
|
|
183
|
+
# Default to Windows regardless of host. The host's actual OS
|
|
184
|
+
# is irrelevant — what matters is which UA + client-hint corpus
|
|
185
|
+
# the fleet's fingerprints are drawn from. Windows is the highest-
|
|
186
|
+
# entropy population (~70% of real browsing traffic) and matches
|
|
187
|
+
# the cloak engine's `--fingerprint-platform=windows` default.
|
|
188
|
+
allowed_os = ("windows",)
|
|
184
189
|
self._gen = UserAgentGenerator(seed=seed)
|
|
185
190
|
self._rng = random.Random(seed)
|
|
186
191
|
self._min_chromium = min_chromium_version
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""Replay helpers: turn a HarvestResult into a pre-configured HTTP client.
|
|
1
|
+
"""Replay helpers: turn a HarvestResult into a pre-configured async HTTP client.
|
|
2
2
|
|
|
3
3
|
Cloudflare ties the clearance cookie to the (IP, UA, Sec-CH-UA-*) tuple
|
|
4
4
|
that earned it. Replaying with the wrong combination earns a fresh
|
|
@@ -26,7 +26,7 @@ def replay_with_httpx(
|
|
|
26
26
|
proxy: Optional[str] = None,
|
|
27
27
|
timeout: float = 30.0,
|
|
28
28
|
) -> Any:
|
|
29
|
-
"""Return an `httpx.
|
|
29
|
+
"""Return an `httpx.AsyncClient` configured with the harvested cookie + headers.
|
|
30
30
|
|
|
31
31
|
Apply `result.cookies` scoped to the target URL's host, set
|
|
32
32
|
`result.headers` as defaults, and optionally route through a proxy
|
|
@@ -39,7 +39,7 @@ def replay_with_httpx(
|
|
|
39
39
|
cookies = httpx.Cookies()
|
|
40
40
|
for name, value in (result.cookies or {}).items():
|
|
41
41
|
cookies.set(name, value, domain=domain, path="/")
|
|
42
|
-
return httpx.
|
|
42
|
+
return httpx.AsyncClient(
|
|
43
43
|
headers={**(result.headers or {})},
|
|
44
44
|
cookies=cookies,
|
|
45
45
|
timeout=timeout,
|
|
@@ -56,21 +56,21 @@ def replay_with_curl_cffi(
|
|
|
56
56
|
timeout: float = 30.0,
|
|
57
57
|
impersonate: str = "chrome",
|
|
58
58
|
) -> Any:
|
|
59
|
-
"""Return a `curl_cffi.requests.
|
|
59
|
+
"""Return a `curl_cffi.requests.AsyncSession` configured the same way.
|
|
60
60
|
|
|
61
61
|
Requires `curl_cffi` installed by the caller. Useful when the target
|
|
62
62
|
inspects TLS/JA3 — curl_cffi's `impersonate=` flag matches Chrome's
|
|
63
63
|
fingerprint at the TLS layer, which a vanilla httpx client can't.
|
|
64
64
|
"""
|
|
65
65
|
try:
|
|
66
|
-
from curl_cffi import
|
|
66
|
+
from curl_cffi.requests import AsyncSession
|
|
67
67
|
except ImportError as e:
|
|
68
68
|
raise RuntimeError(
|
|
69
69
|
"replay_with_curl_cffi requires `pip install curl_cffi`"
|
|
70
70
|
) from e
|
|
71
71
|
|
|
72
72
|
domain = _cookie_domain(target_url)
|
|
73
|
-
sess =
|
|
73
|
+
sess = AsyncSession(impersonate=impersonate, timeout=timeout)
|
|
74
74
|
sess.headers.update(result.headers or {})
|
|
75
75
|
for name, value in (result.cookies or {}).items():
|
|
76
76
|
sess.cookies.set(name, value, domain=domain, path="/")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: fleet-framework
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)
|
|
5
5
|
Author: Sarper Avci
|
|
6
6
|
License: MIT
|
|
@@ -23,6 +23,9 @@ Requires-Dist: DrissionPage>=4.1.0; extra == "browser"
|
|
|
23
23
|
Requires-Dist: cryptography>=42.0.0; extra == "browser"
|
|
24
24
|
Provides-Extra: cloudflare
|
|
25
25
|
Requires-Dist: fleet-framework[browser]; extra == "cloudflare"
|
|
26
|
+
Provides-Extra: cloak
|
|
27
|
+
Requires-Dist: fleet-framework[browser]; extra == "cloak"
|
|
28
|
+
Requires-Dist: cloakbrowser>=0.3; extra == "cloak"
|
|
26
29
|
Provides-Extra: otel
|
|
27
30
|
Requires-Dist: opentelemetry-api>=1.27.0; extra == "otel"
|
|
28
31
|
Requires-Dist: opentelemetry-sdk>=1.27.0; extra == "otel"
|
|
@@ -31,7 +34,7 @@ Provides-Extra: test
|
|
|
31
34
|
Requires-Dist: pytest>=8.0; extra == "test"
|
|
32
35
|
Requires-Dist: pytest-asyncio>=0.23; extra == "test"
|
|
33
36
|
Provides-Extra: all
|
|
34
|
-
Requires-Dist: fleet-framework[browser,cloudflare,otel]; extra == "all"
|
|
37
|
+
Requires-Dist: fleet-framework[browser,cloak,cloudflare,otel]; extra == "all"
|
|
35
38
|
Dynamic: license-file
|
|
36
39
|
|
|
37
40
|
# Fleet
|
|
@@ -11,6 +11,7 @@ fleet/core/context.py
|
|
|
11
11
|
fleet/core/contract.py
|
|
12
12
|
fleet/core/country_presets.py
|
|
13
13
|
fleet/core/events.py
|
|
14
|
+
fleet/core/local_runner.py
|
|
14
15
|
fleet/core/logging.py
|
|
15
16
|
fleet/core/memory_backend.py
|
|
16
17
|
fleet/core/metrics.py
|
|
@@ -41,6 +42,7 @@ fleet/worker/ws_client.py
|
|
|
41
42
|
fleet_browser/__init__.py
|
|
42
43
|
fleet_browser/browser.py
|
|
43
44
|
fleet_browser/cert.py
|
|
45
|
+
fleet_browser/cloak.py
|
|
44
46
|
fleet_browser/fingerprint.py
|
|
45
47
|
fleet_browser/humanizer.py
|
|
46
48
|
fleet_browser/pool.py
|
|
@@ -9,12 +9,16 @@ psutil>=5.9.0
|
|
|
9
9
|
click>=8.1.0
|
|
10
10
|
|
|
11
11
|
[all]
|
|
12
|
-
fleet-framework[browser,cloudflare,otel]
|
|
12
|
+
fleet-framework[browser,cloak,cloudflare,otel]
|
|
13
13
|
|
|
14
14
|
[browser]
|
|
15
15
|
DrissionPage>=4.1.0
|
|
16
16
|
cryptography>=42.0.0
|
|
17
17
|
|
|
18
|
+
[cloak]
|
|
19
|
+
fleet-framework[browser]
|
|
20
|
+
cloakbrowser>=0.3
|
|
21
|
+
|
|
18
22
|
[cloudflare]
|
|
19
23
|
fleet-framework[browser]
|
|
20
24
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "fleet-framework"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
description = "generic distributed-automation framework — master/worker, browser pool, anti-bot helpers, and abstract automation contracts (SERP, content, news, place, marketplace, jobs, social)"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -30,6 +30,10 @@ browser = [
|
|
|
30
30
|
cloudflare = [
|
|
31
31
|
"fleet-framework[browser]",
|
|
32
32
|
]
|
|
33
|
+
cloak = [
|
|
34
|
+
"fleet-framework[browser]",
|
|
35
|
+
"cloakbrowser>=0.3",
|
|
36
|
+
]
|
|
33
37
|
otel = [
|
|
34
38
|
"opentelemetry-api>=1.27.0",
|
|
35
39
|
"opentelemetry-sdk>=1.27.0",
|
|
@@ -40,7 +44,7 @@ test = [
|
|
|
40
44
|
"pytest-asyncio>=0.23",
|
|
41
45
|
]
|
|
42
46
|
all = [
|
|
43
|
-
"fleet-framework[browser,cloudflare,otel]",
|
|
47
|
+
"fleet-framework[browser,cloudflare,cloak,otel]",
|
|
44
48
|
]
|
|
45
49
|
|
|
46
50
|
[project.scripts]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{fleet_framework-0.1.0 → fleet_framework-0.1.2}/fleet_framework.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|