phantomfetch 0.5.8__tar.gz → 0.5.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/PKG-INFO +4 -1
  2. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/pyproject.toml +3 -2
  3. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/browser/actions.py +40 -0
  4. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/browser/cdp.py +19 -6
  5. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/fetch.py +7 -0
  6. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/README.md +0 -0
  7. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/__init__.py +0 -0
  8. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/cache.py +0 -0
  9. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/captcha.py +0 -0
  10. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/__init__.py +0 -0
  11. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/base.py +0 -0
  12. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/browser/__init__.py +0 -0
  13. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/curl.py +0 -0
  14. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/pool.py +0 -0
  15. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/presets.py +0 -0
  16. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/registry.py +0 -0
  17. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/strategy_advisor.py +0 -0
  18. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/telemetry.py +0 -0
  19. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/tools/selector_builder.py +0 -0
  20. {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: phantomfetch
3
- Version: 0.5.8
3
+ Version: 0.5.10
4
4
  Summary: High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities
5
5
  Keywords: web-scraping,playwright,curl-cffi,async,browser-automation,http-client,agentic,anti-detection
6
6
  Author: CosmicBull
@@ -28,11 +28,13 @@ Requires-Dist: beautifulsoup4>=4.14.3
28
28
  Requires-Dist: cloakbrowser[geoip]>=0.3.22 ; extra == 'all'
29
29
  Requires-Dist: camoufox[geoip]>=0.4.11 ; extra == 'all'
30
30
  Requires-Dist: rebrowser-playwright>=1.52.0 ; extra == 'all'
31
+ Requires-Dist: patchright>=1.52.0 ; extra == 'all'
31
32
  Requires-Dist: maxminddb>=2.0.0 ; extra == 'all'
32
33
  Requires-Dist: camoufox[geoip]>=0.4.11 ; extra == 'camoufox'
33
34
  Requires-Dist: cloakbrowser>=0.3.22 ; extra == 'cloakbrowser'
34
35
  Requires-Dist: cloakbrowser[geoip]>=0.3.22 ; extra == 'geoip'
35
36
  Requires-Dist: maxminddb>=2.0.0 ; extra == 'geoip'
37
+ Requires-Dist: patchright>=1.52.0 ; extra == 'patchright'
36
38
  Requires-Dist: rebrowser-playwright>=1.52.0 ; extra == 'rebrowser'
37
39
  Requires-Python: >=3.13
38
40
  Project-URL: Homepage, https://github.com/iristech-systems/PhantomFetch
@@ -44,6 +46,7 @@ Provides-Extra: all
44
46
  Provides-Extra: camoufox
45
47
  Provides-Extra: cloakbrowser
46
48
  Provides-Extra: geoip
49
+ Provides-Extra: patchright
47
50
  Provides-Extra: rebrowser
48
51
  Description-Content-Type: text/markdown
49
52
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "phantomfetch"
3
- version = "0.5.8"
3
+ version = "0.5.10"
4
4
  description = "High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -47,7 +47,8 @@ cloakbrowser = ["cloakbrowser>=0.3.22"]
47
47
  geoip = ["cloakbrowser[geoip]>=0.3.22", "maxminddb>=2.0.0"]
48
48
  camoufox = ["camoufox[geoip]>=0.4.11"]
49
49
  rebrowser = ["rebrowser-playwright>=1.52.0"]
50
- all = ["cloakbrowser[geoip]>=0.3.22", "camoufox[geoip]>=0.4.11", "rebrowser-playwright>=1.52.0", "maxminddb>=2.0.0"]
50
+ patchright = ["patchright>=1.52.0"]
51
+ all = ["cloakbrowser[geoip]>=0.3.22", "camoufox[geoip]>=0.4.11", "rebrowser-playwright>=1.52.0", "patchright>=1.52.0", "maxminddb>=2.0.0"]
51
52
 
52
53
  [project.urls]
53
54
  Homepage = "https://github.com/iristech-systems/PhantomFetch"
@@ -186,6 +186,46 @@ async def execute_actions(
186
186
  result.error = "Loop requires selector and child actions"
187
187
  else:
188
188
  limit = action.max_iterations or 100
189
+ js_supported_actions = {
190
+ "wait",
191
+ "click",
192
+ "extract",
193
+ "evaluate",
194
+ }
195
+ has_deferred_actions = any(
196
+ sub_action.action not in js_supported_actions
197
+ for sub_action in action.actions
198
+ )
199
+
200
+ if has_deferred_actions:
201
+ import msgspec
202
+
203
+ logger.debug(
204
+ f"[browser] Loop '{action.selector}' contains deferred actions; using Python fallback (limit={limit})"
205
+ )
206
+
207
+ loop_locator = ctx.locator(action.selector)
208
+ loop_count = min(await loop_locator.count(), limit)
209
+ loop_results = []
210
+
211
+ for i in range(loop_count):
212
+ iter_ctx = loop_locator.nth(i)
213
+ iter_results = await execute_actions(
214
+ iter_ctx, action.actions
215
+ )
216
+ loop_results.append(
217
+ {
218
+ "index": i,
219
+ "results": [
220
+ msgspec.to_builtins(ir)
221
+ for ir in iter_results
222
+ ],
223
+ }
224
+ )
225
+
226
+ result.data = loop_results
227
+ continue
228
+
189
229
  logger.debug(
190
230
  f"[browser] Compiling loop for '{action.selector}' to native JS (limit={limit})"
191
231
  )
@@ -217,6 +217,7 @@ class CDPEngine:
217
217
  self._browser: Any = None
218
218
  self._existing_context: Any = None
219
219
  self._existing_page: Any = None
220
+ self._connect_lock = asyncio.Lock()
220
221
 
221
222
  async def connect(self) -> None:
222
223
  """Initialize Playwright and connect to browser with retry."""
@@ -442,6 +443,17 @@ class CDPEngine:
442
443
  self._camoufox_context = None
443
444
  self._camoufox_proxy_key = None
444
445
 
446
+ async def start(self) -> None:
447
+ """Start the browser engine."""
448
+ async with self._connect_lock:
449
+ await self.connect()
450
+
451
+ async def restart(self) -> None:
452
+ """Restart the browser engine."""
453
+ async with self._connect_lock:
454
+ await self.disconnect()
455
+ await self.connect()
456
+
445
457
  async def _handle_route(self, route: "Route") -> None:
446
458
  """Handle network requests for caching."""
447
459
  if not self.cache:
@@ -601,12 +613,13 @@ class CDPEngine:
601
613
 
602
614
  # Ensure browser is running.
603
615
  # CloakBrowser persistent mode uses _cloak_context (not _browser).
604
- needs_connect = not self._browser and not (
605
- (self._cloak_browser_available and self.persistent_context_dir)
606
- or (self._camoufox_available and self.persistent_context_dir)
607
- )
608
- if needs_connect:
609
- await self.connect()
616
+ async with self._connect_lock:
617
+ needs_connect = not self._browser and not (
618
+ (self._cloak_browser_available and self.persistent_context_dir)
619
+ or (self._camoufox_available and self.persistent_context_dir)
620
+ )
621
+ if needs_connect:
622
+ await self.connect()
610
623
 
611
624
  # Create context
612
625
  # We create a fresh context for each request to ensure isolation
@@ -203,6 +203,13 @@ class Fetcher:
203
203
  if self._browser:
204
204
  await self._browser.disconnect()
205
205
 
206
+ async def restart_browser(self) -> None:
207
+ """
208
+ Restart the browser engine.
209
+ """
210
+ if self._browser and hasattr(self._browser, "restart"):
211
+ await self._browser.restart()
212
+
206
213
  def save_session(self, path: str) -> None:
207
214
  """
208
215
  Save the current session storage (cookies, localStorage) to a file.
File without changes