phantomfetch 0.5.0__tar.gz → 0.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: phantomfetch
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities
5
5
  Keywords: web-scraping,playwright,curl-cffi,async,browser-automation,http-client,agentic,anti-detection
6
6
  Author: CosmicBull
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phantomfetch"
3
- version = "0.5.0"
3
+ version = "0.5.2"
4
4
  description = "High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -1,4 +1,5 @@
1
1
  import asyncio
2
+ import random
2
3
  import re
3
4
  import time
4
5
  from typing import TYPE_CHECKING, Any, Literal, Optional
@@ -82,6 +83,8 @@ class CDPEngine:
82
83
  cloak_binary_path: str | None = None,
83
84
  persistent_context_dir: str | None = None,
84
85
  ignore_https_errors: bool = False,
86
+ max_retries: int = 3,
87
+ retry_backoff_base: float = 2.0,
85
88
  ):
86
89
  """
87
90
  Args:
@@ -126,7 +129,9 @@ class CDPEngine:
126
129
  persist across sessions). When set, uses CloakBrowser's
127
130
  persistent context API. Enables incognito bypass.
128
131
  ignore_https_errors: Ignore TLS certificate errors (useful for proxies that
129
- do SSL inspection/MITM). Default False.
132
+ do SSL inspection/MITM). Default False.
133
+ max_retries: Max retries for CDP endpoint connection attempts. Default 3.
134
+ retry_backoff_base: Exponential backoff base for retries. Default 2.0.
130
135
  """
131
136
  self.cdp_endpoint = cdp_endpoint
132
137
  self.headless = headless
@@ -148,6 +153,8 @@ class CDPEngine:
148
153
  self.cloak_binary_path = cloak_binary_path
149
154
  self.persistent_context_dir = persistent_context_dir
150
155
  self.ignore_https_errors = ignore_https_errors
156
+ self.max_retries = max_retries
157
+ self.retry_backoff_base = retry_backoff_base
151
158
 
152
159
  self._cloak_browser_available = False
153
160
  self._cloak_context: Any = None
@@ -185,11 +192,30 @@ class CDPEngine:
185
192
  self._existing_page: Any = None
186
193
 
187
194
  async def connect(self) -> None:
188
- """Initialize Playwright and connect to browser."""
189
- if self.cloak_browser:
190
- await self._connect_cloakbrowser()
191
- else:
192
- await self._connect_playwright()
195
+ """Initialize Playwright and connect to browser with retry."""
196
+ last_error: Exception | None = None
197
+ for attempt in range(self.max_retries):
198
+ try:
199
+ if self.cloak_browser:
200
+ await self._connect_cloakbrowser()
201
+ else:
202
+ await self._connect_playwright()
203
+ return
204
+ except Exception as e:
205
+ last_error = e
206
+ if attempt < self.max_retries - 1:
207
+ wait = self.retry_backoff_base**attempt * (0.5 + random.random())
208
+ logger.warning(
209
+ f"[cdp] Connect attempt {attempt + 1}/{self.max_retries} failed: {e}. "
210
+ f"Retrying in {wait:.2f}s..."
211
+ )
212
+ await asyncio.sleep(wait)
213
+ else:
214
+ logger.error(
215
+ f"[cdp] Connect attempt {attempt + 1}/{self.max_retries} failed: {e}"
216
+ )
217
+ if last_error:
218
+ raise last_error
193
219
 
194
220
  async def _connect_cloakbrowser(self) -> None:
195
221
  """Connect using CloakBrowser's stealth Chromium binary.
@@ -47,7 +47,7 @@ class Fetcher:
47
47
  # Advanced CDP
48
48
  cdp_use_existing_page: bool = True,
49
49
  cdp_connection_type: str = "cdp",
50
- backend: Literal["rebrowser", "playwright", "patchright"] = "rebrowser",
50
+ backend: Literal["rebrowser", "playwright", "patchright"] = "playwright",
51
51
  # BrowserForge fingerprinting
52
52
  fingerprint: bool = True,
53
53
  fingerprint_options: dict[str, Any] | None = None,
@@ -59,6 +59,8 @@ class Fetcher:
59
59
  cloak_browser_geoip: bool = False,
60
60
  cloak_binary_path: str | None = None,
61
61
  persistent_context_dir: str | None = None,
62
+ browser_max_retries: int = 3,
63
+ browser_retry_backoff_base: float = 2.0,
62
64
  ):
63
65
  """
64
66
  Initialize the Fetcher.
@@ -143,6 +145,8 @@ class Fetcher:
143
145
  cloak_browser_geoip=cloak_browser_geoip,
144
146
  cloak_binary_path=cloak_binary_path,
145
147
  persistent_context_dir=persistent_context_dir,
148
+ max_retries=browser_max_retries,
149
+ retry_backoff_base=browser_retry_backoff_base,
146
150
  )
147
151
  self._browser = self._cdp_engine
148
152
 
@@ -160,11 +164,11 @@ class Fetcher:
160
164
  self.max_retries = max_retries
161
165
 
162
166
  async def __aenter__(self) -> "Fetcher":
163
- await self._browser.connect()
164
167
  return self
165
168
 
166
169
  async def __aexit__(self, *args: Any) -> None:
167
- await self._browser.disconnect()
170
+ if self._cdp_engine:
171
+ await self._cdp_engine.disconnect()
168
172
 
169
173
  async def start(self) -> None:
170
174
  """
File without changes