phantomwright 0.0.4__tar.gz → 0.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {phantomwright-0.0.4 → phantomwright-0.0.7}/PKG-INFO +18 -4
- {phantomwright-0.0.4 → phantomwright-0.0.7}/README.md +16 -2
- phantomwright-0.0.7/phantomwright/__init__.py +8 -0
- phantomwright-0.0.7/phantomwright/_impl/_core_debug_patch.py +38 -0
- phantomwright-0.0.7/phantomwright/_impl/_evaluate_patch.py +54 -0
- phantomwright-0.0.7/phantomwright/_repo_version.py +1 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/async_api/__init__.py +1 -3
- phantomwright-0.0.7/phantomwright/stealth/stealth.py +229 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/sync_api/__init__.py +1 -3
- {phantomwright-0.0.4 → phantomwright-0.0.7}/pyproject.toml +2 -2
- phantomwright-0.0.4/phantomwright/__init__.py +0 -5
- phantomwright-0.0.4/phantomwright/_impl/_user_sim/_mouse_move_sim.py +0 -36
- phantomwright-0.0.4/phantomwright/_impl/_user_sim/_mouse_wheel_sim.py +0 -18
- phantomwright-0.0.4/phantomwright/async_api/_patch_evaluate.py +0 -26
- phantomwright-0.0.4/phantomwright/async_api/_patch_goto.py +0 -35
- phantomwright-0.0.4/phantomwright/stealth/case_insensitive_dict.py +0 -77
- phantomwright-0.0.4/phantomwright/stealth/context_managers.py +0 -37
- phantomwright-0.0.4/phantomwright/stealth/stealth.py +0 -524
- phantomwright-0.0.4/phantomwright/sync_api/_patch_evaluate.py +0 -26
- phantomwright-0.0.4/phantomwright/sync_api/_patch_goto.py +0 -35
- {phantomwright-0.0.4 → phantomwright-0.0.7}/.gitignore +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/LICENSE +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/_impl/__init__.py +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/__init__.py +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/chrome.app.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/chrome.csi.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/chrome.hairline.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/chrome.load.times.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/chrome.runtime.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/error.prototype.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/iframe.contentWindow.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/media.codecs.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.hardwareConcurrency.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.languages.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.permissions.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.platform.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.plugins.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.userAgent.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/navigator.vendor.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/evasions/webgl.vendor.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/generate.magic.arrays.js +0 -0
- {phantomwright-0.0.4 → phantomwright-0.0.7}/phantomwright/stealth/js/utils.js +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: phantomwright
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.7
|
|
4
4
|
Summary: Bridging playwright-core patch + extending playwright API for stealth injection & user simulation
|
|
5
5
|
Project-URL: homepage, https://github.com/ai-microsoft/phantom-wright
|
|
6
6
|
Project-URL: changelog, https://github.com/ai-microsoft/phantom-wright/blob/main/CHANGELOG.md
|
|
@@ -10,7 +10,7 @@ License-File: LICENSE
|
|
|
10
10
|
Classifier: Operating System :: OS Independent
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Requires-Python: >=3.9
|
|
13
|
-
Requires-Dist: patchright==1.56.
|
|
13
|
+
Requires-Dist: patchright-fork==1.56.2
|
|
14
14
|
Provides-Extra: black
|
|
15
15
|
Requires-Dist: black>=25.9.0; extra == 'black'
|
|
16
16
|
Provides-Extra: dev
|
|
@@ -34,8 +34,8 @@ Phantomwright is a library that bridging playwright-core patch + extending playw
|
|
|
34
34
|
uv venv
|
|
35
35
|
.venv\Scripts\activate
|
|
36
36
|
uv sync --extra dev
|
|
37
|
-
uv run
|
|
38
|
-
uv run
|
|
37
|
+
uv run patchright_fork install-deps
|
|
38
|
+
uv run patchright_fork install
|
|
39
39
|
uv run pytest
|
|
40
40
|
```
|
|
41
41
|
|
|
@@ -45,6 +45,20 @@ Phantomwright is a library that bridging playwright-core patch + extending playw
|
|
|
45
45
|
uv venv --clear
|
|
46
46
|
```
|
|
47
47
|
|
|
48
|
+
### Debug Playwright Core
|
|
49
|
+
|
|
50
|
+
Phantomwright provide the ability to not only debug playwright-python, but also attach to node process that run playwright-core.
|
|
51
|
+
|
|
52
|
+
You need to first open `Chrome` and goto `chrome://inspect`, then `Open dedicated DevTools for Node`
|
|
53
|
+
|
|
54
|
+
In `Connection` tab, add connection to `localhost:9229`
|
|
55
|
+
|
|
56
|
+
Then you just need to choose debug session `Core Repro: Select Case`, then pick (or write your own) one minimal repro case.
|
|
57
|
+
|
|
58
|
+
Node process will stopped at the first place and you can debug playwright-core.
|
|
59
|
+
|
|
60
|
+
This is to provide the ability to debug and send PR to patchright to fix critical issue.
|
|
61
|
+
|
|
48
62
|
### Thanks
|
|
49
63
|
|
|
50
64
|
+ [patchright](https://pypi.org/project/patchright/)
|
|
@@ -14,8 +14,8 @@ Phantomwright is a library that bridging playwright-core patch + extending playw
|
|
|
14
14
|
uv venv
|
|
15
15
|
.venv\Scripts\activate
|
|
16
16
|
uv sync --extra dev
|
|
17
|
-
uv run
|
|
18
|
-
uv run
|
|
17
|
+
uv run patchright_fork install-deps
|
|
18
|
+
uv run patchright_fork install
|
|
19
19
|
uv run pytest
|
|
20
20
|
```
|
|
21
21
|
|
|
@@ -25,6 +25,20 @@ Phantomwright is a library that bridging playwright-core patch + extending playw
|
|
|
25
25
|
uv venv --clear
|
|
26
26
|
```
|
|
27
27
|
|
|
28
|
+
### Debug Playwright Core
|
|
29
|
+
|
|
30
|
+
Phantomwright provide the ability to not only debug playwright-python, but also attach to node process that run playwright-core.
|
|
31
|
+
|
|
32
|
+
You need to first open `Chrome` and goto `chrome://inspect`, then `Open dedicated DevTools for Node`
|
|
33
|
+
|
|
34
|
+
In `Connection` tab, add connection to `localhost:9229`
|
|
35
|
+
|
|
36
|
+
Then you just need to choose debug session `Core Repro: Select Case`, then pick (or write your own) one minimal repro case.
|
|
37
|
+
|
|
38
|
+
Node process will stopped at the first place and you can debug playwright-core.
|
|
39
|
+
|
|
40
|
+
This is to provide the ability to debug and send PR to patchright to fix critical issue.
|
|
41
|
+
|
|
28
42
|
### Thanks
|
|
29
43
|
|
|
30
44
|
+ [patchright](https://pypi.org/project/patchright/)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import patchright_fork._impl._transport as t
|
|
4
|
+
|
|
5
|
+
_patched = False
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def do_patch() -> None:
|
|
9
|
+
"""
|
|
10
|
+
Automatically enable Node.js debug mode if PHANTOMWRIGHT_DEBUG environment variable is set.
|
|
11
|
+
|
|
12
|
+
Environment variables:
|
|
13
|
+
PHANTOMWRIGHT_DEBUG: Set to "1" or "true" to enable debug mode
|
|
14
|
+
PHANTOMWRIGHT_DEBUG_PORT: Debug port (default: 9229)
|
|
15
|
+
PHANTOMWRIGHT_DEBUG_BREAK: Set to "0" or "false" to disable break on start
|
|
16
|
+
"""
|
|
17
|
+
global _patched
|
|
18
|
+
if _patched:
|
|
19
|
+
return
|
|
20
|
+
|
|
21
|
+
debug_env = os.environ.get("PHANTOMWRIGHT_DEBUG", "").lower()
|
|
22
|
+
if debug_env not in ("1", "true"):
|
|
23
|
+
return
|
|
24
|
+
|
|
25
|
+
port = int(os.environ.get("PHANTOMWRIGHT_DEBUG_PORT", "9229"))
|
|
26
|
+
break_on_start = os.environ.get("PHANTOMWRIGHT_DEBUG_BREAK", "1").lower() not in ("0", "false")
|
|
27
|
+
inspect_flag = f"--inspect-brk={port}" if break_on_start else f"--inspect={port}"
|
|
28
|
+
|
|
29
|
+
orig_create = t.asyncio.create_subprocess_exec
|
|
30
|
+
|
|
31
|
+
async def patched_create(*args, **kwargs):
|
|
32
|
+
args = list(args)
|
|
33
|
+
args.insert(1, inspect_flag)
|
|
34
|
+
print(f"[PhantomWright Debug] Launching Node with: {args}")
|
|
35
|
+
return await orig_create(*args, **kwargs)
|
|
36
|
+
|
|
37
|
+
t.asyncio.create_subprocess_exec = patched_create
|
|
38
|
+
_patched = True
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from functools import wraps
|
|
2
|
+
|
|
3
|
+
from patchright_fork.async_api import Page as AsyncPage
|
|
4
|
+
from patchright_fork.sync_api import Page as SyncPage
|
|
5
|
+
|
|
6
|
+
def do_patch() -> None:
|
|
7
|
+
""" Aysnc / Sync version of evaluate override"""
|
|
8
|
+
_original_sync_evaluate = SyncPage.evaluate
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@wraps(_original_sync_evaluate)
|
|
12
|
+
def _hooked_sync_evaluate(self, *args, **kwargs):
|
|
13
|
+
# Ensure isolated_context defaults to False if not provided
|
|
14
|
+
kwargs.setdefault("isolated_context", False)
|
|
15
|
+
return _original_sync_evaluate(self, *args, **kwargs)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
SyncPage.evaluate = _hooked_sync_evaluate
|
|
19
|
+
|
|
20
|
+
_original_sync_evaluate_handle = SyncPage.evaluate_handle
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@wraps(_original_sync_evaluate_handle)
|
|
24
|
+
def _hooked_sync_evaluate_handle(self, *args, **kwargs):
|
|
25
|
+
# Ensure isolated_context defaults to False if not provided
|
|
26
|
+
kwargs.setdefault("isolated_context", False)
|
|
27
|
+
return _original_sync_evaluate_handle(self, *args, **kwargs)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
SyncPage.evaluate_handle = _hooked_sync_evaluate_handle
|
|
31
|
+
|
|
32
|
+
_original_async_evaluate = AsyncPage.evaluate
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@wraps(_original_async_evaluate)
|
|
36
|
+
async def _hooked_async_evaluate(self, *args, **kwargs):
|
|
37
|
+
# Ensure isolated_context defaults to False if not provided
|
|
38
|
+
kwargs.setdefault("isolated_context", False)
|
|
39
|
+
return await _original_async_evaluate(self, *args, **kwargs)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
AsyncPage.evaluate = _hooked_async_evaluate
|
|
43
|
+
|
|
44
|
+
_original_async_evaluate_handle = AsyncPage.evaluate_handle
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@wraps(_original_async_evaluate_handle)
|
|
48
|
+
async def _hooked_async_evaluate_handle(self, *args, **kwargs):
|
|
49
|
+
# Ensure isolated_context defaults to False if not provided
|
|
50
|
+
kwargs.setdefault("isolated_context", False)
|
|
51
|
+
return await _original_async_evaluate_handle(self, *args, **kwargs)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
AsyncPage.evaluate_handle = _hooked_async_evaluate_handle
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = 'v0.0.7'
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
from
|
|
2
|
-
from ._patch_goto import *
|
|
3
|
-
from ._patch_evaluate import *
|
|
1
|
+
from patchright_fork.async_api import expect, async_playwright, Accessibility, APIRequest, APIRequestContext, APIResponse, Browser, BrowserContext, BrowserType, CDPSession, ChromiumBrowserContext, ConsoleMessage, Cookie, Dialog, Download, ElementHandle, Error, FileChooser, FilePayload, FloatRect, Frame, FrameLocator, Geolocation, HttpCredentials, JSHandle, Keyboard, Locator, Mouse, Page, PdfMargins, Position, Playwright, ProxySettings, Request, ResourceTiming, Response, Route, Selectors, SourceLocation, StorageState, StorageStateCookie, TimeoutError, Touchscreen, Video, ViewportSize, WebError, WebSocket, WebSocketRoute, Worker
|
|
4
2
|
|
|
5
3
|
__all__ = [
|
|
6
4
|
"expect",
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, Union, Tuple, Optional
|
|
5
|
+
import warnings
|
|
6
|
+
|
|
7
|
+
from patchright_fork import async_api, sync_api
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def from_file(name) -> str:
|
|
11
|
+
return (Path(__file__).parent / "js" / name).read_text()
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
SCRIPTS: Dict[str, str] = {
|
|
15
|
+
"generate_magic_arrays": from_file("generate.magic.arrays.js"),
|
|
16
|
+
"utils": from_file("utils.js"),
|
|
17
|
+
"chrome_app": from_file("evasions/chrome.app.js"),
|
|
18
|
+
"chrome_csi": from_file("evasions/chrome.csi.js"),
|
|
19
|
+
"chrome_hairline": from_file("evasions/chrome.hairline.js"),
|
|
20
|
+
"chrome_load_times": from_file("evasions/chrome.load.times.js"),
|
|
21
|
+
"chrome_runtime": from_file("evasions/chrome.runtime.js"),
|
|
22
|
+
"iframe_content_window": from_file("evasions/iframe.contentWindow.js"),
|
|
23
|
+
"media_codecs": from_file("evasions/media.codecs.js"),
|
|
24
|
+
"navigator_hardware_concurrency": from_file("evasions/navigator.hardwareConcurrency.js"),
|
|
25
|
+
"navigator_languages": from_file("evasions/navigator.languages.js"),
|
|
26
|
+
"navigator_permissions": from_file("evasions/navigator.permissions.js"),
|
|
27
|
+
"navigator_platform": from_file("evasions/navigator.platform.js"),
|
|
28
|
+
"navigator_plugins": from_file("evasions/navigator.plugins.js"),
|
|
29
|
+
"navigator_user_agent": from_file("evasions/navigator.userAgent.js"),
|
|
30
|
+
"navigator_vendor": from_file("evasions/navigator.vendor.js"),
|
|
31
|
+
"error_prototype": from_file("evasions/error.prototype.js"),
|
|
32
|
+
"webgl_vendor": from_file("evasions/webgl.vendor.js"),
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Stealth:
|
|
37
|
+
"""
|
|
38
|
+
Playwright stealth configuration that applies stealth strategies to Playwright.
|
|
39
|
+
The stealth strategies are contained in ./js package and are basic javascript scripts that are executed
|
|
40
|
+
on every page.goto() called.
|
|
41
|
+
Note:
|
|
42
|
+
All init scripts are combined by playwright into one script and then executed this means
|
|
43
|
+
the scripts should not have conflicting constants/variables etc. !
|
|
44
|
+
This also means scripts can be extended by overriding enabled_scripts generator:
|
|
45
|
+
```
|
|
46
|
+
@property
|
|
47
|
+
def enabled_scripts():
|
|
48
|
+
yield 'console.log("first script")'
|
|
49
|
+
yield from super().enabled_scripts()
|
|
50
|
+
yield 'console.log("last script")'
|
|
51
|
+
```
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
_USER_AGENT_OVERRIDE_PIGGYBACK_KEY = "_stealth_user_agent"
|
|
55
|
+
_SEC_CH_UA_OVERRIDE_PIGGYBACK_KEY = "_stealth_sec_ch_ua"
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
*,
|
|
60
|
+
chrome_app: bool = True,
|
|
61
|
+
chrome_csi: bool = True,
|
|
62
|
+
chrome_load_times: bool = True,
|
|
63
|
+
chrome_runtime: bool = False,
|
|
64
|
+
hairline: bool = True,
|
|
65
|
+
iframe_content_window: bool = True,
|
|
66
|
+
media_codecs: bool = True,
|
|
67
|
+
navigator_hardware_concurrency: bool = True,
|
|
68
|
+
navigator_languages: bool = True,
|
|
69
|
+
navigator_permissions: bool = True,
|
|
70
|
+
navigator_platform: bool = True,
|
|
71
|
+
navigator_plugins: bool = True,
|
|
72
|
+
navigator_user_agent: bool = True,
|
|
73
|
+
navigator_vendor: bool = True,
|
|
74
|
+
error_prototype: bool = True,
|
|
75
|
+
sec_ch_ua: bool = True,
|
|
76
|
+
webgl_vendor: bool = True,
|
|
77
|
+
navigator_languages_override: Tuple[str, str] = ("en-US", "en"),
|
|
78
|
+
navigator_platform_override: str = "Win32",
|
|
79
|
+
navigator_user_agent_override: Optional[str] = None,
|
|
80
|
+
navigator_vendor_override: str = None,
|
|
81
|
+
sec_ch_ua_override: Optional[str] = None,
|
|
82
|
+
webgl_renderer_override: str = None,
|
|
83
|
+
webgl_vendor_override: str = None,
|
|
84
|
+
init_scripts_only: bool = False,
|
|
85
|
+
script_logging: bool = False,
|
|
86
|
+
):
|
|
87
|
+
# scripts to load
|
|
88
|
+
self.chrome_app: bool = chrome_app
|
|
89
|
+
self.chrome_csi: bool = chrome_csi
|
|
90
|
+
self.chrome_load_times: bool = chrome_load_times
|
|
91
|
+
self.chrome_runtime: bool = chrome_runtime
|
|
92
|
+
self.hairline: bool = hairline
|
|
93
|
+
self.iframe_content_window: bool = iframe_content_window
|
|
94
|
+
self.media_codecs: bool = media_codecs
|
|
95
|
+
self.navigator_hardware_concurrency: int = navigator_hardware_concurrency
|
|
96
|
+
self.navigator_languages: bool = navigator_languages
|
|
97
|
+
self.navigator_permissions: bool = navigator_permissions
|
|
98
|
+
self.navigator_platform: bool = navigator_platform
|
|
99
|
+
self.navigator_plugins: bool = navigator_plugins
|
|
100
|
+
self.navigator_user_agent: bool = navigator_user_agent
|
|
101
|
+
self.navigator_vendor: bool = navigator_vendor
|
|
102
|
+
self.error_prototype: bool = error_prototype
|
|
103
|
+
self.sec_ch_ua: bool = sec_ch_ua
|
|
104
|
+
self.webgl_vendor: bool = webgl_vendor
|
|
105
|
+
|
|
106
|
+
# warn if an override was provided for a disabled option
|
|
107
|
+
self._check_for_disabled_options_overridden(locals())
|
|
108
|
+
# evasion options
|
|
109
|
+
self.navigator_languages_override: Tuple[str, str] = navigator_languages_override or ("en-US", "en")
|
|
110
|
+
self.navigator_platform_override: Optional[str] = navigator_platform_override
|
|
111
|
+
self.navigator_user_agent_override: Optional[str] = navigator_user_agent_override
|
|
112
|
+
self.navigator_vendor_override: str = navigator_vendor_override or None
|
|
113
|
+
if sec_ch_ua_override is None and self.navigator_user_agent_override is not None:
|
|
114
|
+
# we can get sec_ch_ua override for "free" here if we can parse the Chrome version string
|
|
115
|
+
self.sec_ch_ua_override = self._get_greased_chrome_sec_ua_ch(self.navigator_user_agent_override)
|
|
116
|
+
else:
|
|
117
|
+
self.sec_ch_ua_override: Optional[str] = sec_ch_ua_override
|
|
118
|
+
self.webgl_renderer_override: str = webgl_renderer_override or "Intel Iris OpenGL Engine"
|
|
119
|
+
self.webgl_vendor_override: str = webgl_vendor_override or "Intel Inc."
|
|
120
|
+
# other options
|
|
121
|
+
self.init_scripts_only: bool = init_scripts_only
|
|
122
|
+
self.script_logging = script_logging
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def script_payload(self) -> str:
|
|
126
|
+
"""
|
|
127
|
+
Generates an immediately invoked function expression for all enabled scripts
|
|
128
|
+
Returns: string of enabled scripts in IIFE
|
|
129
|
+
"""
|
|
130
|
+
scripts_block = "\n".join(self.enabled_scripts)
|
|
131
|
+
if len(scripts_block) == 0:
|
|
132
|
+
return ""
|
|
133
|
+
return "(() => {\n" + scripts_block + "\n})();"
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def options_payload(self) -> str:
|
|
137
|
+
opts = {
|
|
138
|
+
"navigator_hardware_concurrency": self.navigator_hardware_concurrency,
|
|
139
|
+
"navigator_languages_override": self.navigator_languages_override,
|
|
140
|
+
"navigator_platform": self.navigator_platform_override,
|
|
141
|
+
"navigator_user_agent": self.navigator_user_agent_override,
|
|
142
|
+
"navigator_vendor": self.navigator_vendor_override,
|
|
143
|
+
"webgl_renderer": self.webgl_renderer_override,
|
|
144
|
+
"webgl_vendor": self.webgl_vendor_override,
|
|
145
|
+
"script_logging": self.script_logging,
|
|
146
|
+
}
|
|
147
|
+
return f"const opts = {json.dumps(opts)};"
|
|
148
|
+
|
|
149
|
+
@property
|
|
150
|
+
def enabled_scripts(self):
|
|
151
|
+
evasion_script_block = "\n".join(self._evasion_scripts)
|
|
152
|
+
if len(evasion_script_block) == 0:
|
|
153
|
+
return ""
|
|
154
|
+
|
|
155
|
+
yield self.options_payload
|
|
156
|
+
yield SCRIPTS["utils"]
|
|
157
|
+
yield SCRIPTS["generate_magic_arrays"]
|
|
158
|
+
yield evasion_script_block
|
|
159
|
+
|
|
160
|
+
@property
|
|
161
|
+
def _evasion_scripts(self) -> str:
|
|
162
|
+
if self.chrome_app:
|
|
163
|
+
yield SCRIPTS["chrome_app"]
|
|
164
|
+
if self.chrome_csi:
|
|
165
|
+
yield SCRIPTS["chrome_csi"]
|
|
166
|
+
if self.hairline:
|
|
167
|
+
yield SCRIPTS["chrome_hairline"]
|
|
168
|
+
if self.chrome_load_times:
|
|
169
|
+
yield SCRIPTS["chrome_load_times"]
|
|
170
|
+
if self.chrome_runtime:
|
|
171
|
+
yield SCRIPTS["chrome_runtime"]
|
|
172
|
+
if self.iframe_content_window:
|
|
173
|
+
yield SCRIPTS["iframe_content_window"]
|
|
174
|
+
if self.media_codecs:
|
|
175
|
+
yield SCRIPTS["media_codecs"]
|
|
176
|
+
if self.navigator_languages:
|
|
177
|
+
yield SCRIPTS["navigator_languages"]
|
|
178
|
+
if self.navigator_permissions:
|
|
179
|
+
yield SCRIPTS["navigator_permissions"]
|
|
180
|
+
if self.navigator_platform:
|
|
181
|
+
yield SCRIPTS["navigator_platform"]
|
|
182
|
+
if self.navigator_plugins:
|
|
183
|
+
yield SCRIPTS["navigator_plugins"]
|
|
184
|
+
if self.navigator_user_agent:
|
|
185
|
+
yield SCRIPTS["navigator_user_agent"]
|
|
186
|
+
if self.navigator_vendor:
|
|
187
|
+
yield SCRIPTS["navigator_vendor"]
|
|
188
|
+
if self.error_prototype:
|
|
189
|
+
yield SCRIPTS["error_prototype"]
|
|
190
|
+
if self.webgl_vendor:
|
|
191
|
+
yield SCRIPTS["webgl_vendor"]
|
|
192
|
+
|
|
193
|
+
async def apply_stealth_async(self, page_or_context: Union[async_api.Page, async_api.BrowserContext]) -> None:
|
|
194
|
+
if len(self.script_payload) > 0:
|
|
195
|
+
await page_or_context.add_init_script(self.script_payload)
|
|
196
|
+
|
|
197
|
+
def apply_stealth_sync(self, page_or_context: Union[sync_api.Page, sync_api.BrowserContext]) -> None:
|
|
198
|
+
if len(self.script_payload) > 0:
|
|
199
|
+
page_or_context.add_init_script(self.script_payload)
|
|
200
|
+
|
|
201
|
+
@staticmethod
|
|
202
|
+
def _check_for_disabled_options_overridden(packed_kwargs: Dict[str, Any]) -> None:
|
|
203
|
+
for key in ALL_EVASIONS_DISABLED_KWARGS.keys():
|
|
204
|
+
if not packed_kwargs.get(key) and packed_kwargs.get(f"{key}_override") is not None:
|
|
205
|
+
warnings.warn(
|
|
206
|
+
f"{key} is False, but an override ({key}_override) was provided, "
|
|
207
|
+
f"which is probably not what you intended to do",
|
|
208
|
+
stacklevel=3,
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
ALL_EVASIONS_DISABLED_KWARGS = {
|
|
212
|
+
"chrome_app": False,
|
|
213
|
+
"chrome_csi": False,
|
|
214
|
+
"chrome_load_times": False,
|
|
215
|
+
"chrome_runtime": False,
|
|
216
|
+
"hairline": False,
|
|
217
|
+
"iframe_content_window": False,
|
|
218
|
+
"media_codecs": False,
|
|
219
|
+
"navigator_hardware_concurrency": False,
|
|
220
|
+
"navigator_languages": False,
|
|
221
|
+
"navigator_permissions": False,
|
|
222
|
+
"navigator_platform": False,
|
|
223
|
+
"navigator_plugins": False,
|
|
224
|
+
"navigator_user_agent": False,
|
|
225
|
+
"navigator_vendor": False,
|
|
226
|
+
"error_prototype": False,
|
|
227
|
+
"sec_ch_ua": False,
|
|
228
|
+
"webgl_vendor": False,
|
|
229
|
+
}
|
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
from
|
|
2
|
-
from ._patch_goto import *
|
|
3
|
-
from ._patch_evaluate import *
|
|
1
|
+
from patchright_fork.sync_api import expect, Accessibility, APIRequest, APIRequestContext, APIResponse, Browser, BrowserContext, BrowserType, CDPSession, ChromiumBrowserContext, ConsoleMessage, Cookie, Dialog, Download, ElementHandle, Error, FileChooser, FilePayload, FloatRect, Frame, FrameLocator, Geolocation, HttpCredentials, JSHandle, Keyboard, Locator, Mouse, Page, PdfMargins, Position, Playwright, ProxySettings, Request, ResourceTiming, Response, Route, Selectors, SourceLocation, StorageState, StorageStateCookie, sync_playwright, TimeoutError, Touchscreen, Video, ViewportSize, WebError, WebSocket, WebSocketRoute, Worker
|
|
4
2
|
|
|
5
3
|
__all__ = [
|
|
6
4
|
"expect",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "phantomwright"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.7"
|
|
4
4
|
description = "Bridging playwright-core patch + extending playwright API for stealth injection & user simulation"
|
|
5
5
|
authors = [
|
|
6
6
|
{name="Hang Yin", email="hangyin@microsoft.com"},
|
|
@@ -11,7 +11,7 @@ readme = "README.md"
|
|
|
11
11
|
requires-python = ">=3.9"
|
|
12
12
|
|
|
13
13
|
dependencies = [
|
|
14
|
-
"patchright==1.56.
|
|
14
|
+
"patchright-fork==1.56.2",
|
|
15
15
|
]
|
|
16
16
|
|
|
17
17
|
classifiers = [
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
import random
|
|
2
|
-
import logging
|
|
3
|
-
import asyncio
|
|
4
|
-
|
|
5
|
-
logger = logging.getLogger(__name__)
|
|
6
|
-
|
|
7
|
-
async def async_mouse_move(page):
|
|
8
|
-
try:
|
|
9
|
-
await asyncio.sleep(random.uniform(0.5, 1.5))
|
|
10
|
-
viewport = page.viewport_size
|
|
11
|
-
if not viewport:
|
|
12
|
-
return
|
|
13
|
-
width, height = viewport['width'], viewport['height']
|
|
14
|
-
for _ in range(random.randint(3, 7)):
|
|
15
|
-
x = random.randint(100, width - 100)
|
|
16
|
-
y = random.randint(100, height - 100)
|
|
17
|
-
await page.mouse.move(x, y)
|
|
18
|
-
await asyncio.sleep(random.uniform(0.1, 0.3))
|
|
19
|
-
except Exception as e:
|
|
20
|
-
logger.error(f"Error simulating user activity: {e}")
|
|
21
|
-
|
|
22
|
-
def sync_mouse_move(page):
|
|
23
|
-
try:
|
|
24
|
-
import time
|
|
25
|
-
time.sleep(random.uniform(0.5, 1.5))
|
|
26
|
-
viewport = page.viewport_size
|
|
27
|
-
if not viewport:
|
|
28
|
-
return
|
|
29
|
-
width, height = viewport['width'], viewport['height']
|
|
30
|
-
for _ in range(random.randint(3, 7)):
|
|
31
|
-
x = random.randint(100, width - 100)
|
|
32
|
-
y = random.randint(100, height - 100)
|
|
33
|
-
page.mouse.move(x, y)
|
|
34
|
-
time.sleep(random.uniform(0.1, 0.3))
|
|
35
|
-
except Exception as e:
|
|
36
|
-
logger.error(f"Error simulating user activity: {e}")
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
import random
|
|
2
|
-
import logging
|
|
3
|
-
|
|
4
|
-
logger = logging.getLogger(__name__)
|
|
5
|
-
|
|
6
|
-
async def async_mouse_wheel(page):
|
|
7
|
-
try:
|
|
8
|
-
scroll_amount = random.randint(100, 300)
|
|
9
|
-
await page.mouse.wheel(0, scroll_amount)
|
|
10
|
-
except Exception as e:
|
|
11
|
-
logger.error(f"Error simulating mouse wheel activity: {e}")
|
|
12
|
-
|
|
13
|
-
def sync_mouse_wheel(page):
|
|
14
|
-
try:
|
|
15
|
-
scroll_amount = random.randint(100, 300)
|
|
16
|
-
page.mouse.wheel(0, scroll_amount)
|
|
17
|
-
except Exception as e:
|
|
18
|
-
logger.error(f"Error simulating mouse wheel activity: {e}")
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from functools import wraps
|
|
2
|
-
from patchright.async_api import Page
|
|
3
|
-
|
|
4
|
-
_original_evaluate = Page.evaluate
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
@wraps(_original_evaluate)
|
|
8
|
-
async def _hooked_evaluate(self, *args, **kwargs):
|
|
9
|
-
# Ensure isolated_context defaults to False if not provided
|
|
10
|
-
kwargs.setdefault("isolated_context", False)
|
|
11
|
-
return await _original_evaluate(self, *args, **kwargs)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
Page.evaluate = _hooked_evaluate
|
|
15
|
-
|
|
16
|
-
_original_evaluate_handle = Page.evaluate_handle
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
@wraps(_original_evaluate_handle)
|
|
20
|
-
async def _hooked_evaluate_handle(self, *args, **kwargs):
|
|
21
|
-
# Ensure isolated_context defaults to False if not provided
|
|
22
|
-
kwargs.setdefault("isolated_context", False)
|
|
23
|
-
return await _original_evaluate_handle(self, *args, **kwargs)
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
Page.evaluate_handle = _hooked_evaluate_handle
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import asyncio
|
|
2
|
-
from functools import wraps
|
|
3
|
-
import logging
|
|
4
|
-
import random
|
|
5
|
-
from patchright.async_api import Page
|
|
6
|
-
from phantomwright._impl._user_sim._mouse_move_sim import async_mouse_move
|
|
7
|
-
from phantomwright._impl._user_sim._mouse_wheel_sim import async_mouse_wheel
|
|
8
|
-
|
|
9
|
-
logger = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
async def _user_sim_hook(self, *args, **kwargs):
|
|
13
|
-
url = kwargs.get("url")
|
|
14
|
-
if url is None and args:
|
|
15
|
-
url = args[0]
|
|
16
|
-
logger.info(f"Navigating to {url} (async)")
|
|
17
|
-
await async_mouse_move(self)
|
|
18
|
-
await asyncio.sleep(random.uniform(0.1, 0.3))
|
|
19
|
-
await async_mouse_wheel(self)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if not hasattr(Page.goto, "_pre_hooks"):
|
|
23
|
-
_original_goto = Page.goto
|
|
24
|
-
|
|
25
|
-
@wraps(_original_goto)
|
|
26
|
-
async def _hooked_goto(self, *args, **kwargs):
|
|
27
|
-
for hook in _hooked_goto._pre_hooks:
|
|
28
|
-
await hook(self, *args, **kwargs)
|
|
29
|
-
return await _original_goto(self, *args, **kwargs)
|
|
30
|
-
|
|
31
|
-
_hooked_goto._pre_hooks = []
|
|
32
|
-
Page.goto = _hooked_goto
|
|
33
|
-
|
|
34
|
-
if _user_sim_hook not in Page.goto._pre_hooks:
|
|
35
|
-
Page.goto._pre_hooks.append(_user_sim_hook)
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
from collections.abc import MutableMapping
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
# straight from: https://github.com/kennethreitz/requests/blob/master/src/requests/structures.py
|
|
5
|
-
class CaseInsensitiveDict(MutableMapping):
|
|
6
|
-
"""A case-insensitive ``dict``-like object.
|
|
7
|
-
|
|
8
|
-
Implements all methods and operations of
|
|
9
|
-
``MutableMapping`` as well as dict's ``copy``. Also
|
|
10
|
-
provides ``lower_items``.
|
|
11
|
-
|
|
12
|
-
All keys are expected to be strings. The structure remembers the
|
|
13
|
-
case of the last key to be set, and ``iter(instance)``,
|
|
14
|
-
``keys()``, ``items()``, ``iterkeys()``, and ``iteritems()``
|
|
15
|
-
will contain case-sensitive keys. However, querying and contains
|
|
16
|
-
testing is case insensitive::
|
|
17
|
-
|
|
18
|
-
cid = CaseInsensitiveDict()
|
|
19
|
-
cid['Accept'] = 'application/json'
|
|
20
|
-
cid['aCCEPT'] == 'application/json' # True
|
|
21
|
-
list(cid) == ['Accept'] # True
|
|
22
|
-
|
|
23
|
-
For example, ``headers['content-encoding']`` will return the
|
|
24
|
-
value of a ``'Content-Encoding'`` response header, regardless
|
|
25
|
-
of how the header name was originally stored.
|
|
26
|
-
|
|
27
|
-
If the constructor, ``.update``, or equality comparison
|
|
28
|
-
operations are given keys that have equal ``.lower()``s, the
|
|
29
|
-
behavior is undefined.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def __init__(self, data=None, **kwargs):
|
|
33
|
-
self._store = {}
|
|
34
|
-
if data is None:
|
|
35
|
-
data = {}
|
|
36
|
-
self.update(data, **kwargs)
|
|
37
|
-
|
|
38
|
-
def __setitem__(self, key, value):
|
|
39
|
-
# Use the lowercased key for lookups, but store the actual
|
|
40
|
-
# key alongside the value.
|
|
41
|
-
self._store[key.lower()] = (key, value)
|
|
42
|
-
|
|
43
|
-
def __getitem__(self, key):
|
|
44
|
-
return self._store[key.lower()][1]
|
|
45
|
-
|
|
46
|
-
def __delitem__(self, key):
|
|
47
|
-
del self._store[key.lower()]
|
|
48
|
-
|
|
49
|
-
def __iter__(self):
|
|
50
|
-
return (casedkey for casedkey, mappedvalue in self._store.values())
|
|
51
|
-
|
|
52
|
-
def __len__(self):
|
|
53
|
-
return len(self._store)
|
|
54
|
-
|
|
55
|
-
def lower_items(self):
|
|
56
|
-
"""Like iteritems(), but with all lowercase keys."""
|
|
57
|
-
return ((lowerkey, keyval[1]) for (lowerkey, keyval) in self._store.items())
|
|
58
|
-
|
|
59
|
-
def __eq__(self, other):
|
|
60
|
-
from collections.abc import Mapping
|
|
61
|
-
|
|
62
|
-
if isinstance(other, Mapping):
|
|
63
|
-
other = CaseInsensitiveDict(other)
|
|
64
|
-
else:
|
|
65
|
-
return NotImplemented
|
|
66
|
-
# Compare insensitively
|
|
67
|
-
return dict(self.lower_items()) == dict(other.lower_items())
|
|
68
|
-
|
|
69
|
-
# Copy is required
|
|
70
|
-
def copy(self):
|
|
71
|
-
return CaseInsensitiveDict(self._store.values())
|
|
72
|
-
|
|
73
|
-
def __repr__(self):
|
|
74
|
-
return str(dict(self.items()))
|
|
75
|
-
|
|
76
|
-
def items(self):
|
|
77
|
-
pass
|