phantomfetch 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/PKG-INFO +1 -1
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/pyproject.toml +1 -1
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/engines/browser/cdp.py +32 -6
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/fetch.py +7 -3
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/README.md +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/__init__.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/cache.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/captcha.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/engines/__init__.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/engines/base.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/engines/browser/__init__.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/engines/browser/actions.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/engines/curl.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/pool.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/telemetry.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/tools/selector_builder.py +0 -0
- {phantomfetch-0.5.0 → phantomfetch-0.5.2}/src/phantomfetch/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: phantomfetch
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities
|
|
5
5
|
Keywords: web-scraping,playwright,curl-cffi,async,browser-automation,http-client,agentic,anti-detection
|
|
6
6
|
Author: CosmicBull
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import random
|
|
2
3
|
import re
|
|
3
4
|
import time
|
|
4
5
|
from typing import TYPE_CHECKING, Any, Literal, Optional
|
|
@@ -82,6 +83,8 @@ class CDPEngine:
|
|
|
82
83
|
cloak_binary_path: str | None = None,
|
|
83
84
|
persistent_context_dir: str | None = None,
|
|
84
85
|
ignore_https_errors: bool = False,
|
|
86
|
+
max_retries: int = 3,
|
|
87
|
+
retry_backoff_base: float = 2.0,
|
|
85
88
|
):
|
|
86
89
|
"""
|
|
87
90
|
Args:
|
|
@@ -126,7 +129,9 @@ class CDPEngine:
|
|
|
126
129
|
persist across sessions). When set, uses CloakBrowser's
|
|
127
130
|
persistent context API. Enables incognito bypass.
|
|
128
131
|
ignore_https_errors: Ignore TLS certificate errors (useful for proxies that
|
|
129
|
-
|
|
132
|
+
do SSL inspection/MITM). Default False.
|
|
133
|
+
max_retries: Max retries for CDP endpoint connection attempts. Default 3.
|
|
134
|
+
retry_backoff_base: Exponential backoff base for retries. Default 2.0.
|
|
130
135
|
"""
|
|
131
136
|
self.cdp_endpoint = cdp_endpoint
|
|
132
137
|
self.headless = headless
|
|
@@ -148,6 +153,8 @@ class CDPEngine:
|
|
|
148
153
|
self.cloak_binary_path = cloak_binary_path
|
|
149
154
|
self.persistent_context_dir = persistent_context_dir
|
|
150
155
|
self.ignore_https_errors = ignore_https_errors
|
|
156
|
+
self.max_retries = max_retries
|
|
157
|
+
self.retry_backoff_base = retry_backoff_base
|
|
151
158
|
|
|
152
159
|
self._cloak_browser_available = False
|
|
153
160
|
self._cloak_context: Any = None
|
|
@@ -185,11 +192,30 @@ class CDPEngine:
|
|
|
185
192
|
self._existing_page: Any = None
|
|
186
193
|
|
|
187
194
|
async def connect(self) -> None:
|
|
188
|
-
"""Initialize Playwright and connect to browser."""
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
195
|
+
"""Initialize Playwright and connect to browser with retry."""
|
|
196
|
+
last_error: Exception | None = None
|
|
197
|
+
for attempt in range(self.max_retries):
|
|
198
|
+
try:
|
|
199
|
+
if self.cloak_browser:
|
|
200
|
+
await self._connect_cloakbrowser()
|
|
201
|
+
else:
|
|
202
|
+
await self._connect_playwright()
|
|
203
|
+
return
|
|
204
|
+
except Exception as e:
|
|
205
|
+
last_error = e
|
|
206
|
+
if attempt < self.max_retries - 1:
|
|
207
|
+
wait = self.retry_backoff_base**attempt * (0.5 + random.random())
|
|
208
|
+
logger.warning(
|
|
209
|
+
f"[cdp] Connect attempt {attempt + 1}/{self.max_retries} failed: {e}. "
|
|
210
|
+
f"Retrying in {wait:.2f}s..."
|
|
211
|
+
)
|
|
212
|
+
await asyncio.sleep(wait)
|
|
213
|
+
else:
|
|
214
|
+
logger.error(
|
|
215
|
+
f"[cdp] Connect attempt {attempt + 1}/{self.max_retries} failed: {e}"
|
|
216
|
+
)
|
|
217
|
+
if last_error:
|
|
218
|
+
raise last_error
|
|
193
219
|
|
|
194
220
|
async def _connect_cloakbrowser(self) -> None:
|
|
195
221
|
"""Connect using CloakBrowser's stealth Chromium binary.
|
|
@@ -47,7 +47,7 @@ class Fetcher:
|
|
|
47
47
|
# Advanced CDP
|
|
48
48
|
cdp_use_existing_page: bool = True,
|
|
49
49
|
cdp_connection_type: str = "cdp",
|
|
50
|
-
backend: Literal["rebrowser", "playwright", "patchright"] = "
|
|
50
|
+
backend: Literal["rebrowser", "playwright", "patchright"] = "playwright",
|
|
51
51
|
# BrowserForge fingerprinting
|
|
52
52
|
fingerprint: bool = True,
|
|
53
53
|
fingerprint_options: dict[str, Any] | None = None,
|
|
@@ -59,6 +59,8 @@ class Fetcher:
|
|
|
59
59
|
cloak_browser_geoip: bool = False,
|
|
60
60
|
cloak_binary_path: str | None = None,
|
|
61
61
|
persistent_context_dir: str | None = None,
|
|
62
|
+
browser_max_retries: int = 3,
|
|
63
|
+
browser_retry_backoff_base: float = 2.0,
|
|
62
64
|
):
|
|
63
65
|
"""
|
|
64
66
|
Initialize the Fetcher.
|
|
@@ -143,6 +145,8 @@ class Fetcher:
|
|
|
143
145
|
cloak_browser_geoip=cloak_browser_geoip,
|
|
144
146
|
cloak_binary_path=cloak_binary_path,
|
|
145
147
|
persistent_context_dir=persistent_context_dir,
|
|
148
|
+
max_retries=browser_max_retries,
|
|
149
|
+
retry_backoff_base=browser_retry_backoff_base,
|
|
146
150
|
)
|
|
147
151
|
self._browser = self._cdp_engine
|
|
148
152
|
|
|
@@ -160,11 +164,11 @@ class Fetcher:
|
|
|
160
164
|
self.max_retries = max_retries
|
|
161
165
|
|
|
162
166
|
async def __aenter__(self) -> "Fetcher":
|
|
163
|
-
await self._browser.connect()
|
|
164
167
|
return self
|
|
165
168
|
|
|
166
169
|
async def __aexit__(self, *args: Any) -> None:
|
|
167
|
-
|
|
170
|
+
if self._cdp_engine:
|
|
171
|
+
await self._cdp_engine.disconnect()
|
|
168
172
|
|
|
169
173
|
async def start(self) -> None:
|
|
170
174
|
"""
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|