phantomfetch 0.6.1__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/PKG-INFO +1 -1
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/pyproject.toml +1 -1
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/engines/browser/actions.py +21 -17
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/README.md +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/__init__.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/cache.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/captcha.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/engines/__init__.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/engines/base.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/engines/browser/__init__.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/engines/browser/cdp.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/engines/curl.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/fetch.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/pool.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/presets.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/registry.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/strategy_advisor.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/telemetry.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/tools/adaptive/__init__.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/tools/adaptive/fingerprint.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/tools/adaptive/matcher.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/tools/adaptive/store.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/tools/selector_builder.py +0 -0
- {phantomfetch-0.6.1 → phantomfetch-0.6.2}/src/phantomfetch/types.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: phantomfetch
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: High-performance agentic web scraping library combining curl-cffi speed with Playwright browser capabilities
|
|
5
5
|
Keywords: web-scraping,playwright,curl-cffi,async,browser-automation,http-client,agentic,anti-detection
|
|
6
6
|
Author: CosmicBull
|
|
@@ -58,6 +58,11 @@ async def _human_mouse_move(page: "Page", element_handle: Any):
|
|
|
58
58
|
# We can add a slight overshoot/correction if we want to be fancy, but `steps` is 1st iterated humanization.
|
|
59
59
|
|
|
60
60
|
|
|
61
|
+
|
|
62
|
+
def _is_page(obj: Any) -> bool:
|
|
63
|
+
return obj.__class__.__name__ == "Page"
|
|
64
|
+
|
|
65
|
+
|
|
61
66
|
async def execute_actions(
|
|
62
67
|
page: "Page | Locator", actions: list[Action]
|
|
63
68
|
) -> list["ActionResult"]:
|
|
@@ -71,7 +76,6 @@ async def execute_actions(
|
|
|
71
76
|
Returns:
|
|
72
77
|
List of ActionResult objects
|
|
73
78
|
"""
|
|
74
|
-
from playwright.async_api import Page
|
|
75
79
|
|
|
76
80
|
from ...types import ActionResult
|
|
77
81
|
|
|
@@ -142,7 +146,7 @@ async def execute_actions(
|
|
|
142
146
|
|
|
143
147
|
# If scope is explicitly 'page', force usage of root page
|
|
144
148
|
if action.scope == "page":
|
|
145
|
-
ctx = page if
|
|
149
|
+
ctx = page if _is_page(page) else page.page
|
|
146
150
|
|
|
147
151
|
start_time = time.perf_counter()
|
|
148
152
|
result = ActionResult(action=action, success=True)
|
|
@@ -162,7 +166,7 @@ async def execute_actions(
|
|
|
162
166
|
case "wait":
|
|
163
167
|
if action.selector:
|
|
164
168
|
state = action.state or "visible"
|
|
165
|
-
if
|
|
169
|
+
if _is_page(ctx):
|
|
166
170
|
await ctx.wait_for_selector(
|
|
167
171
|
action.selector,
|
|
168
172
|
timeout=action.timeout,
|
|
@@ -174,7 +178,7 @@ async def execute_actions(
|
|
|
174
178
|
timeout=action.timeout, state=state
|
|
175
179
|
)
|
|
176
180
|
elif action.timeout:
|
|
177
|
-
target_page = ctx if
|
|
181
|
+
target_page = ctx if _is_page(ctx) else ctx.page
|
|
178
182
|
await target_page.wait_for_timeout(action.timeout)
|
|
179
183
|
|
|
180
184
|
case "loop":
|
|
@@ -216,7 +220,7 @@ async def execute_actions(
|
|
|
216
220
|
if action.human_like:
|
|
217
221
|
# Human-like click
|
|
218
222
|
# Resolve handle first
|
|
219
|
-
if
|
|
223
|
+
if _is_page(ctx):
|
|
220
224
|
handle = await ctx.wait_for_selector(
|
|
221
225
|
action.selector,
|
|
222
226
|
timeout=action.timeout,
|
|
@@ -233,7 +237,7 @@ async def execute_actions(
|
|
|
233
237
|
if handle:
|
|
234
238
|
# Need page for mouse move
|
|
235
239
|
target_page = (
|
|
236
|
-
ctx if
|
|
240
|
+
ctx if _is_page(ctx) else ctx.page
|
|
237
241
|
)
|
|
238
242
|
await _human_mouse_move(target_page, handle)
|
|
239
243
|
await handle.click(delay=random.randint(50, 150))
|
|
@@ -243,7 +247,7 @@ async def execute_actions(
|
|
|
243
247
|
timeout=action.timeout,
|
|
244
248
|
)
|
|
245
249
|
# Context click (no selector)
|
|
246
|
-
elif
|
|
250
|
+
elif _is_page(ctx):
|
|
247
251
|
result.success = False
|
|
248
252
|
result.error = "Click action on Page requires a selector"
|
|
249
253
|
elif action.human_like:
|
|
@@ -262,7 +266,7 @@ async def execute_actions(
|
|
|
262
266
|
val_str = str(action.value)
|
|
263
267
|
if action.human_like:
|
|
264
268
|
await ctx.click(action.selector, timeout=action.timeout)
|
|
265
|
-
target_page = ctx if
|
|
269
|
+
target_page = ctx if _is_page(ctx) else ctx.page
|
|
266
270
|
await _human_type(target_page, val_str)
|
|
267
271
|
else:
|
|
268
272
|
await ctx.fill(
|
|
@@ -271,7 +275,7 @@ async def execute_actions(
|
|
|
271
275
|
timeout=action.timeout,
|
|
272
276
|
)
|
|
273
277
|
# Input into self (ctx is locator)
|
|
274
|
-
elif
|
|
278
|
+
elif _is_page(ctx):
|
|
275
279
|
result.success = False
|
|
276
280
|
result.error = "Input action on Page requires a selector"
|
|
277
281
|
else:
|
|
@@ -286,7 +290,7 @@ async def execute_actions(
|
|
|
286
290
|
case "scroll":
|
|
287
291
|
# Scroll usually implies page-level or element-level scroll
|
|
288
292
|
# For now, keep page level logic mostly
|
|
289
|
-
target_page = ctx if
|
|
293
|
+
target_page = ctx if _is_page(ctx) else ctx.page
|
|
290
294
|
|
|
291
295
|
if action.selector == "top":
|
|
292
296
|
await target_page.evaluate("window.scrollTo(0, 0)")
|
|
@@ -444,13 +448,13 @@ async def execute_actions(
|
|
|
444
448
|
# locator.locator(selector).select_option(...) logic
|
|
445
449
|
value=str(action.value),
|
|
446
450
|
timeout=action.timeout,
|
|
447
|
-
) if
|
|
451
|
+
) if _is_page(ctx) else await ctx.locator(
|
|
448
452
|
action.selector
|
|
449
453
|
).select_option(str(action.value), timeout=action.timeout)
|
|
450
454
|
|
|
451
455
|
case "hover":
|
|
452
456
|
if action.selector:
|
|
453
|
-
if
|
|
457
|
+
if _is_page(ctx):
|
|
454
458
|
await ctx.hover(action.selector, timeout=action.timeout)
|
|
455
459
|
else:
|
|
456
460
|
await ctx.locator(action.selector).hover(
|
|
@@ -465,7 +469,7 @@ async def execute_actions(
|
|
|
465
469
|
kwargs = {}
|
|
466
470
|
if action.full_page:
|
|
467
471
|
kwargs["full_page"] = True
|
|
468
|
-
if not
|
|
472
|
+
if not _is_page(ctx):
|
|
469
473
|
# If we are in a Locator (e.g. inside loop loop), but want full page,
|
|
470
474
|
# we must switch to the page context.
|
|
471
475
|
screenshot_ctx = ctx.page
|
|
@@ -491,7 +495,7 @@ async def execute_actions(
|
|
|
491
495
|
result.data = img_bytes
|
|
492
496
|
|
|
493
497
|
case "wait_for_load":
|
|
494
|
-
target_page = ctx if
|
|
498
|
+
target_page = ctx if _is_page(ctx) else ctx.page
|
|
495
499
|
await target_page.wait_for_load_state(
|
|
496
500
|
"networkidle", timeout=action.timeout
|
|
497
501
|
)
|
|
@@ -504,7 +508,7 @@ async def execute_actions(
|
|
|
504
508
|
case "validate":
|
|
505
509
|
try:
|
|
506
510
|
state = action.state or "attached"
|
|
507
|
-
if
|
|
511
|
+
if _is_page(ctx):
|
|
508
512
|
await ctx.wait_for_selector(
|
|
509
513
|
action.selector,
|
|
510
514
|
timeout=action.timeout or 5000,
|
|
@@ -523,7 +527,7 @@ async def execute_actions(
|
|
|
523
527
|
|
|
524
528
|
case "solve_captcha":
|
|
525
529
|
# Requires Page context for solver
|
|
526
|
-
target_page = ctx if
|
|
530
|
+
target_page = ctx if _is_page(ctx) else ctx.page
|
|
527
531
|
|
|
528
532
|
if action.provider in ("cdp", "scraping_browser"):
|
|
529
533
|
from ...captcha import CDPSolver
|
|
@@ -552,7 +556,7 @@ async def execute_actions(
|
|
|
552
556
|
if action.selector:
|
|
553
557
|
# Check visibility/existence
|
|
554
558
|
try:
|
|
555
|
-
if
|
|
559
|
+
if _is_page(ctx):
|
|
556
560
|
# Use strict=False, state=visible/attached?
|
|
557
561
|
# Just check count > 0 or wait with short timeout?
|
|
558
562
|
# Let's use is_visible or check count to avoid waiting if timeout=0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|