phantomfetch 0.5.8__tar.gz → 0.5.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/PKG-INFO +4 -1
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/pyproject.toml +3 -2
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/browser/actions.py +40 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/browser/cdp.py +19 -6
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/fetch.py +7 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/README.md +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/__init__.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/cache.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/captcha.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/__init__.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/base.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/browser/__init__.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/engines/curl.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/pool.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/presets.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/registry.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/strategy_advisor.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/telemetry.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/tools/selector_builder.py +0 -0
- {phantomfetch-0.5.8 → phantomfetch-0.5.10}/src/phantomfetch/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: phantomfetch
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.10
|
|
4
4
|
Summary: High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities
|
|
5
5
|
Keywords: web-scraping,playwright,curl-cffi,async,browser-automation,http-client,agentic,anti-detection
|
|
6
6
|
Author: CosmicBull
|
|
@@ -28,11 +28,13 @@ Requires-Dist: beautifulsoup4>=4.14.3
|
|
|
28
28
|
Requires-Dist: cloakbrowser[geoip]>=0.3.22 ; extra == 'all'
|
|
29
29
|
Requires-Dist: camoufox[geoip]>=0.4.11 ; extra == 'all'
|
|
30
30
|
Requires-Dist: rebrowser-playwright>=1.52.0 ; extra == 'all'
|
|
31
|
+
Requires-Dist: patchright>=1.52.0 ; extra == 'all'
|
|
31
32
|
Requires-Dist: maxminddb>=2.0.0 ; extra == 'all'
|
|
32
33
|
Requires-Dist: camoufox[geoip]>=0.4.11 ; extra == 'camoufox'
|
|
33
34
|
Requires-Dist: cloakbrowser>=0.3.22 ; extra == 'cloakbrowser'
|
|
34
35
|
Requires-Dist: cloakbrowser[geoip]>=0.3.22 ; extra == 'geoip'
|
|
35
36
|
Requires-Dist: maxminddb>=2.0.0 ; extra == 'geoip'
|
|
37
|
+
Requires-Dist: patchright>=1.52.0 ; extra == 'patchright'
|
|
36
38
|
Requires-Dist: rebrowser-playwright>=1.52.0 ; extra == 'rebrowser'
|
|
37
39
|
Requires-Python: >=3.13
|
|
38
40
|
Project-URL: Homepage, https://github.com/iristech-systems/PhantomFetch
|
|
@@ -44,6 +46,7 @@ Provides-Extra: all
|
|
|
44
46
|
Provides-Extra: camoufox
|
|
45
47
|
Provides-Extra: cloakbrowser
|
|
46
48
|
Provides-Extra: geoip
|
|
49
|
+
Provides-Extra: patchright
|
|
47
50
|
Provides-Extra: rebrowser
|
|
48
51
|
Description-Content-Type: text/markdown
|
|
49
52
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "phantomfetch"
|
|
3
|
-
version = "0.5.
|
|
3
|
+
version = "0.5.10"
|
|
4
4
|
description = "High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.13"
|
|
@@ -47,7 +47,8 @@ cloakbrowser = ["cloakbrowser>=0.3.22"]
|
|
|
47
47
|
geoip = ["cloakbrowser[geoip]>=0.3.22", "maxminddb>=2.0.0"]
|
|
48
48
|
camoufox = ["camoufox[geoip]>=0.4.11"]
|
|
49
49
|
rebrowser = ["rebrowser-playwright>=1.52.0"]
|
|
50
|
-
|
|
50
|
+
patchright = ["patchright>=1.52.0"]
|
|
51
|
+
all = ["cloakbrowser[geoip]>=0.3.22", "camoufox[geoip]>=0.4.11", "rebrowser-playwright>=1.52.0", "patchright>=1.52.0", "maxminddb>=2.0.0"]
|
|
51
52
|
|
|
52
53
|
[project.urls]
|
|
53
54
|
Homepage = "https://github.com/iristech-systems/PhantomFetch"
|
|
@@ -186,6 +186,46 @@ async def execute_actions(
|
|
|
186
186
|
result.error = "Loop requires selector and child actions"
|
|
187
187
|
else:
|
|
188
188
|
limit = action.max_iterations or 100
|
|
189
|
+
js_supported_actions = {
|
|
190
|
+
"wait",
|
|
191
|
+
"click",
|
|
192
|
+
"extract",
|
|
193
|
+
"evaluate",
|
|
194
|
+
}
|
|
195
|
+
has_deferred_actions = any(
|
|
196
|
+
sub_action.action not in js_supported_actions
|
|
197
|
+
for sub_action in action.actions
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
if has_deferred_actions:
|
|
201
|
+
import msgspec
|
|
202
|
+
|
|
203
|
+
logger.debug(
|
|
204
|
+
f"[browser] Loop '{action.selector}' contains deferred actions; using Python fallback (limit={limit})"
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
loop_locator = ctx.locator(action.selector)
|
|
208
|
+
loop_count = min(await loop_locator.count(), limit)
|
|
209
|
+
loop_results = []
|
|
210
|
+
|
|
211
|
+
for i in range(loop_count):
|
|
212
|
+
iter_ctx = loop_locator.nth(i)
|
|
213
|
+
iter_results = await execute_actions(
|
|
214
|
+
iter_ctx, action.actions
|
|
215
|
+
)
|
|
216
|
+
loop_results.append(
|
|
217
|
+
{
|
|
218
|
+
"index": i,
|
|
219
|
+
"results": [
|
|
220
|
+
msgspec.to_builtins(ir)
|
|
221
|
+
for ir in iter_results
|
|
222
|
+
],
|
|
223
|
+
}
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
result.data = loop_results
|
|
227
|
+
continue
|
|
228
|
+
|
|
189
229
|
logger.debug(
|
|
190
230
|
f"[browser] Compiling loop for '{action.selector}' to native JS (limit={limit})"
|
|
191
231
|
)
|
|
@@ -217,6 +217,7 @@ class CDPEngine:
|
|
|
217
217
|
self._browser: Any = None
|
|
218
218
|
self._existing_context: Any = None
|
|
219
219
|
self._existing_page: Any = None
|
|
220
|
+
self._connect_lock = asyncio.Lock()
|
|
220
221
|
|
|
221
222
|
async def connect(self) -> None:
|
|
222
223
|
"""Initialize Playwright and connect to browser with retry."""
|
|
@@ -442,6 +443,17 @@ class CDPEngine:
|
|
|
442
443
|
self._camoufox_context = None
|
|
443
444
|
self._camoufox_proxy_key = None
|
|
444
445
|
|
|
446
|
+
async def start(self) -> None:
|
|
447
|
+
"""Start the browser engine."""
|
|
448
|
+
async with self._connect_lock:
|
|
449
|
+
await self.connect()
|
|
450
|
+
|
|
451
|
+
async def restart(self) -> None:
|
|
452
|
+
"""Restart the browser engine."""
|
|
453
|
+
async with self._connect_lock:
|
|
454
|
+
await self.disconnect()
|
|
455
|
+
await self.connect()
|
|
456
|
+
|
|
445
457
|
async def _handle_route(self, route: "Route") -> None:
|
|
446
458
|
"""Handle network requests for caching."""
|
|
447
459
|
if not self.cache:
|
|
@@ -601,12 +613,13 @@ class CDPEngine:
|
|
|
601
613
|
|
|
602
614
|
# Ensure browser is running.
|
|
603
615
|
# CloakBrowser persistent mode uses _cloak_context (not _browser).
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
616
|
+
async with self._connect_lock:
|
|
617
|
+
needs_connect = not self._browser and not (
|
|
618
|
+
(self._cloak_browser_available and self.persistent_context_dir)
|
|
619
|
+
or (self._camoufox_available and self.persistent_context_dir)
|
|
620
|
+
)
|
|
621
|
+
if needs_connect:
|
|
622
|
+
await self.connect()
|
|
610
623
|
|
|
611
624
|
# Create context
|
|
612
625
|
# We create a fresh context for each request to ensure isolation
|
|
@@ -203,6 +203,13 @@ class Fetcher:
|
|
|
203
203
|
if self._browser:
|
|
204
204
|
await self._browser.disconnect()
|
|
205
205
|
|
|
206
|
+
async def restart_browser(self) -> None:
|
|
207
|
+
"""
|
|
208
|
+
Restart the browser engine.
|
|
209
|
+
"""
|
|
210
|
+
if self._browser and hasattr(self._browser, "restart"):
|
|
211
|
+
await self._browser.restart()
|
|
212
|
+
|
|
206
213
|
def save_session(self, path: str) -> None:
|
|
207
214
|
"""
|
|
208
215
|
Save the current session storage (cookies, localStorage) to a file.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|