python-playwright-helper 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of python-playwright-helper might be problematic. Click here for more details.
- playwright_helper/__init__.py +11 -0
- playwright_helper/libs/__init__.py +11 -0
- playwright_helper/libs/base_po.py +137 -0
- playwright_helper/libs/browser_pool.py +72 -0
- playwright_helper/libs/executor.py +243 -0
- playwright_helper/middlewares/__init__.py +11 -0
- playwright_helper/middlewares/stealth.py +125 -0
- playwright_helper/utils/__init__.py +11 -0
- playwright_helper/utils/browser_utils.py +24 -0
- playwright_helper/utils/file_handle.py +33 -0
- playwright_helper/utils/log_utils.py +15 -0
- playwright_helper/utils/po_utils.py +113 -0
- playwright_helper/utils/template_data.py +5190 -0
- playwright_helper/utils/type_utils.py +129 -0
- python_playwright_helper-0.2.8.dist-info/METADATA +247 -0
- python_playwright_helper-0.2.8.dist-info/RECORD +19 -0
- python_playwright_helper-0.2.8.dist-info/WHEEL +5 -0
- python_playwright_helper-0.2.8.dist-info/licenses/LICENSE +201 -0
- python_playwright_helper-0.2.8.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: __init__.py
|
|
6
|
+
# Description: 源码包
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: __init__.py
|
|
6
|
+
# Description: 核心包
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: base_po.py
|
|
6
|
+
# Description: po对象基础类
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
import asyncio
|
|
13
|
+
from logging import Logger
|
|
14
|
+
from typing import List, Any, cast
|
|
15
|
+
from playwright.async_api import Page, Locator, TimeoutError as PlaywrightTimeoutError
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BasePo(object):
|
|
19
|
+
__page: Page
|
|
20
|
+
|
|
21
|
+
def __init__(self, page: Page, url: str):
|
|
22
|
+
self.url = url
|
|
23
|
+
self.__page = page
|
|
24
|
+
|
|
25
|
+
def get_page(self) -> Page:
|
|
26
|
+
return self.__page
|
|
27
|
+
|
|
28
|
+
def is_current_page(self) -> bool:
|
|
29
|
+
return self.iss_current_page(self.__page, self.url)
|
|
30
|
+
|
|
31
|
+
def get_url_domain(self) -> str:
|
|
32
|
+
if isinstance(self.__page, Page):
|
|
33
|
+
page_slice: List[str] = self.__page.url.split("/")
|
|
34
|
+
return f"{page_slice[0]}://{page_slice[2]}"
|
|
35
|
+
else:
|
|
36
|
+
raise AttributeError("PO对象中的page属性未被初始化")
|
|
37
|
+
|
|
38
|
+
def get_url(self) -> str:
|
|
39
|
+
if self.__page.url.find("://") != -1:
|
|
40
|
+
return self.__page.url.split("?")[0]
|
|
41
|
+
else:
|
|
42
|
+
return self.__page.url
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def iss_current_page(page: Page, url: str) -> bool:
|
|
46
|
+
if isinstance(page, Page):
|
|
47
|
+
page_url_prefix = page.url.split("?")[0]
|
|
48
|
+
url_prefix = url.split("?")[0]
|
|
49
|
+
if page_url_prefix.endswith(url_prefix):
|
|
50
|
+
return True
|
|
51
|
+
else:
|
|
52
|
+
return False
|
|
53
|
+
else:
|
|
54
|
+
return False
|
|
55
|
+
|
|
56
|
+
@staticmethod
|
|
57
|
+
async def exists(locator):
|
|
58
|
+
return await locator.count() > 0
|
|
59
|
+
|
|
60
|
+
@staticmethod
|
|
61
|
+
async def exists_one(locator):
|
|
62
|
+
return await locator.count() == 1
|
|
63
|
+
|
|
64
|
+
async def get_locator(self, selector: str, timeout: float = 3.0) -> Locator:
|
|
65
|
+
"""
|
|
66
|
+
获取页面元素locator
|
|
67
|
+
:param selector: 选择器表达式
|
|
68
|
+
:param timeout: 超时时间(秒)
|
|
69
|
+
:return: 元素对象
|
|
70
|
+
:return:
|
|
71
|
+
"""
|
|
72
|
+
locator = self.__page.locator(selector)
|
|
73
|
+
try:
|
|
74
|
+
await locator.first.wait_for(state='visible', timeout=timeout * 1000)
|
|
75
|
+
return locator
|
|
76
|
+
except (PlaywrightTimeoutError,):
|
|
77
|
+
raise PlaywrightTimeoutError(f"元素 '{selector}' 未在 {timeout} 秒内找到")
|
|
78
|
+
except Exception as e:
|
|
79
|
+
raise RuntimeError(f"检查元素时发生错误: {str(e)}")
|
|
80
|
+
|
|
81
|
+
@staticmethod
|
|
82
|
+
async def get_sub_locator(locator: Locator, selector: str, timeout: float = 3.0) -> Locator:
|
|
83
|
+
"""
|
|
84
|
+
获取页面locator的子locator
|
|
85
|
+
:param locator: 页面Locator对象
|
|
86
|
+
:param selector: 选择器表达式
|
|
87
|
+
:param timeout: 超时时间(秒)
|
|
88
|
+
:return: 元素对象
|
|
89
|
+
:return:
|
|
90
|
+
"""
|
|
91
|
+
locator_inner = locator.locator(selector)
|
|
92
|
+
try:
|
|
93
|
+
await locator_inner.first.wait_for(state='visible', timeout=timeout * 1000)
|
|
94
|
+
return locator_inner
|
|
95
|
+
except (PlaywrightTimeoutError,):
|
|
96
|
+
raise PlaywrightTimeoutError(f"元素 '{selector}' 未在 {timeout} 秒内找到")
|
|
97
|
+
except Exception as e:
|
|
98
|
+
raise RuntimeError(f"检查元素时发生错误: {str(e)}")
|
|
99
|
+
|
|
100
|
+
@classmethod
|
|
101
|
+
async def handle_po_cookie_tip(cls, page: Any, logger: Logger, timeout: float = 3.0,
|
|
102
|
+
selectors: List[str] = None) -> None:
|
|
103
|
+
selectors_inner: List[str] = [
|
|
104
|
+
'//div[@id="isReadedCookie"]/button',
|
|
105
|
+
'//button[@id="continue-btn"]/span[normalize-space(text())="同意"]'
|
|
106
|
+
]
|
|
107
|
+
if selectors:
|
|
108
|
+
selectors_inner.extend(selectors)
|
|
109
|
+
for selector in selectors_inner:
|
|
110
|
+
try:
|
|
111
|
+
page_inner = cast(cls, page)
|
|
112
|
+
cookie: Locator = await cls.get_locator(self=page_inner, selector=selector, timeout=timeout)
|
|
113
|
+
logger.info(
|
|
114
|
+
f'找到页面中存在cookie提示:[本网站使用cookie,用于在您的电脑中储存信息。这些cookie可以使网站正常运行,以及帮助我们改进用户体验。使用本网站,即表示您接受放置这些cookie。]')
|
|
115
|
+
await cookie.click(button="left")
|
|
116
|
+
logger.info("【同意】按钮点击完成")
|
|
117
|
+
await asyncio.sleep(1)
|
|
118
|
+
return
|
|
119
|
+
except (Exception,):
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
async def url_wait_for(self, url: str, timeout: float = 3.0) -> None:
|
|
123
|
+
"""
|
|
124
|
+
url_suffix格式:
|
|
125
|
+
/shopping/oneway/SHA,PVG-URC/2026-01-08
|
|
126
|
+
https://www.ceair.com/shopping/oneway/SHA,PVG-URC/2026-01-08
|
|
127
|
+
:param url:
|
|
128
|
+
:param timeout:
|
|
129
|
+
:return:
|
|
130
|
+
"""
|
|
131
|
+
for _ in range(int(timeout) * 10):
|
|
132
|
+
if self.iss_current_page(page=self.__page, url=url):
|
|
133
|
+
return
|
|
134
|
+
await asyncio.sleep(delay=0.1)
|
|
135
|
+
if url.find("://") == -1:
|
|
136
|
+
url = self.get_url_domain() + url
|
|
137
|
+
raise RuntimeError(f"无法打开/加载页面<{url}>")
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: browser_pool.py
|
|
6
|
+
# Description: 浏览器池,一次起 Chrome,并发复用
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
import asyncio
|
|
13
|
+
from logging import Logger
|
|
14
|
+
from typing import Any, Optional
|
|
15
|
+
from playwright.async_api import Browser, async_playwright, Playwright
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BrowserPool:
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
*,
|
|
22
|
+
size: int,
|
|
23
|
+
logger: Logger,
|
|
24
|
+
**launch_config: Any,
|
|
25
|
+
):
|
|
26
|
+
self.size = size
|
|
27
|
+
self.logger = logger
|
|
28
|
+
self.launch_config = launch_config
|
|
29
|
+
self._queue: asyncio.Queue[Browser] = asyncio.Queue()
|
|
30
|
+
self._started: bool = False
|
|
31
|
+
self._playwright: Optional[Playwright] = None
|
|
32
|
+
|
|
33
|
+
async def start(self, playwright: Playwright = None):
|
|
34
|
+
if self._started:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
if playwright:
|
|
38
|
+
self._playwright = playwright
|
|
39
|
+
|
|
40
|
+
if not self._playwright:
|
|
41
|
+
self._playwright = await async_playwright().start()
|
|
42
|
+
|
|
43
|
+
self.logger.debug(f"[BrowserPool] start size={self.size}")
|
|
44
|
+
|
|
45
|
+
for i in range(self.size):
|
|
46
|
+
self.logger.debug(f"[BrowserPool] launching browser {i}")
|
|
47
|
+
browser = await self._playwright.chromium.launch(
|
|
48
|
+
**self.launch_config
|
|
49
|
+
)
|
|
50
|
+
await self._queue.put(browser)
|
|
51
|
+
|
|
52
|
+
self._started = True
|
|
53
|
+
self.logger.debug("[BrowserPool] started")
|
|
54
|
+
|
|
55
|
+
async def acquire(self) -> Browser:
|
|
56
|
+
self.logger.debug("[BrowserPool] acquire waiting...")
|
|
57
|
+
browser = await self._queue.get()
|
|
58
|
+
self.logger.debug("[BrowserPool] acquire ok")
|
|
59
|
+
return browser
|
|
60
|
+
|
|
61
|
+
async def release(self, browser: Browser):
|
|
62
|
+
self.logger.debug("[BrowserPool] release")
|
|
63
|
+
await self._queue.put(browser)
|
|
64
|
+
|
|
65
|
+
async def stop(self):
|
|
66
|
+
self.logger.debug("[BrowserPool] stopping")
|
|
67
|
+
while not self._queue.empty():
|
|
68
|
+
browser = await self._queue.get()
|
|
69
|
+
await browser.close()
|
|
70
|
+
if self._playwright:
|
|
71
|
+
await self._playwright.stop()
|
|
72
|
+
self.logger.debug("[BrowserPool] stopped")
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: executor.py
|
|
6
|
+
# Description: 执行器模块
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
import os
|
|
13
|
+
import time
|
|
14
|
+
import uuid
|
|
15
|
+
import asyncio
|
|
16
|
+
from logging import Logger
|
|
17
|
+
from contextlib import asynccontextmanager
|
|
18
|
+
from playwright_helper.utils.type_utils import RunResult
|
|
19
|
+
from playwright._impl._api_structures import ViewportSize
|
|
20
|
+
from playwright_helper.utils.log_utils import logger as log
|
|
21
|
+
from playwright_helper.libs.browser_pool import BrowserPool
|
|
22
|
+
from playwright_helper.utils.file_handle import get_caller_dir
|
|
23
|
+
from typing import Any, List, Optional, cast, Callable, Literal, Dict
|
|
24
|
+
from playwright.async_api import Page, Browser, BrowserContext, TimeoutError as PlaywrightTimeoutError, \
|
|
25
|
+
Error as PlaywrightError, async_playwright
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PlaywrightBrowserExecutor:
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
*,
|
|
32
|
+
logger: Logger = log,
|
|
33
|
+
browser_pool: Optional[BrowserPool] = None,
|
|
34
|
+
mode: Literal["persistent", "storage"] = "storage",
|
|
35
|
+
middlewares: Optional[List[Callable]] = None,
|
|
36
|
+
retries: int = 1,
|
|
37
|
+
record_video: bool = False,
|
|
38
|
+
record_trace: bool = False,
|
|
39
|
+
video_dir: str = None,
|
|
40
|
+
trace_dir: str = None,
|
|
41
|
+
screenshot_dir: str = None,
|
|
42
|
+
viewport: Optional[ViewportSize] = None,
|
|
43
|
+
user_agent: Optional[str] = None,
|
|
44
|
+
storage_state: Optional[Dict[str, Any]] = None,
|
|
45
|
+
**browser_config: Any,
|
|
46
|
+
):
|
|
47
|
+
self.mode = mode
|
|
48
|
+
self.logger = logger
|
|
49
|
+
self.browser_pool = browser_pool
|
|
50
|
+
self.middlewares = middlewares or []
|
|
51
|
+
self.retries = retries
|
|
52
|
+
self.user_agent = user_agent
|
|
53
|
+
self.viewport = viewport
|
|
54
|
+
|
|
55
|
+
self.record_video = record_video
|
|
56
|
+
self.record_trace = record_trace
|
|
57
|
+
self.video_dir = video_dir or get_caller_dir()
|
|
58
|
+
self.trace_dir = trace_dir or get_caller_dir()
|
|
59
|
+
self.screenshot_dir = screenshot_dir or get_caller_dir()
|
|
60
|
+
self.storage_state = storage_state
|
|
61
|
+
self.browser_config = browser_config
|
|
62
|
+
|
|
63
|
+
self._playwright = None
|
|
64
|
+
|
|
65
|
+
if self.mode == "storage" and not self.browser_pool:
|
|
66
|
+
raise ValueError("storage 模式必须提供 browser_pool")
|
|
67
|
+
|
|
68
|
+
async def _safe_screenshot(self, page: Page, name: str = None):
|
|
69
|
+
try:
|
|
70
|
+
os.makedirs(self.screenshot_dir, exist_ok=True)
|
|
71
|
+
if name is None or "unknown" in name:
|
|
72
|
+
name = f"error_{int(time.time())}"
|
|
73
|
+
path = os.path.join(self.screenshot_dir, f"{name}.png")
|
|
74
|
+
await page.screenshot(path=path)
|
|
75
|
+
self.logger.debug(f"[Screenshot Saved] {path}")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
self.logger.error(f"[Screenshot Failed] {e}")
|
|
78
|
+
|
|
79
|
+
async def start(self):
|
|
80
|
+
"""Executor 生命周期开始(进程级调用一次)"""
|
|
81
|
+
if not self._playwright:
|
|
82
|
+
self._playwright = await async_playwright().start()
|
|
83
|
+
|
|
84
|
+
# storage 模式:BrowserPool 需要 playwright
|
|
85
|
+
if self.mode == "storage" and self.browser_pool:
|
|
86
|
+
await self.browser_pool.start(self._playwright)
|
|
87
|
+
|
|
88
|
+
async def stop(self):
|
|
89
|
+
"""Executor 生命周期结束"""
|
|
90
|
+
if self.mode == "persistent" and self._playwright:
|
|
91
|
+
await self._playwright.stop()
|
|
92
|
+
self._playwright = None
|
|
93
|
+
|
|
94
|
+
@asynccontextmanager
|
|
95
|
+
async def session(self, storage_state: Optional[Dict[str, Any]] = None):
|
|
96
|
+
context: BrowserContext = cast(BrowserContext, None)
|
|
97
|
+
try:
|
|
98
|
+
context = await self._create_context(storage_state=storage_state or self.storage_state)
|
|
99
|
+
page: Page = await context.new_page()
|
|
100
|
+
yield page
|
|
101
|
+
finally:
|
|
102
|
+
if context:
|
|
103
|
+
await self._cleanup_context(context)
|
|
104
|
+
|
|
105
|
+
async def _create_context(self, storage_state: Optional[Dict[str, Any]] = None) -> BrowserContext:
|
|
106
|
+
task_id = str(uuid.uuid4())
|
|
107
|
+
|
|
108
|
+
if self.mode == "persistent":
|
|
109
|
+
self.logger.debug("[Executor] mode=persistent")
|
|
110
|
+
# persistent 模式:不走 pool
|
|
111
|
+
context = await self._playwright.chromium.launch_persistent_context(
|
|
112
|
+
record_video_dir=self.video_dir if self.record_video else None,
|
|
113
|
+
**self.browser_config
|
|
114
|
+
)
|
|
115
|
+
else:
|
|
116
|
+
# 并发模式:BrowserPool
|
|
117
|
+
self.logger.debug("[Executor] mode=storage")
|
|
118
|
+
|
|
119
|
+
browser: Browser = await self.browser_pool.acquire()
|
|
120
|
+
|
|
121
|
+
context = await browser.new_context(
|
|
122
|
+
storage_state=storage_state or self.storage_state,
|
|
123
|
+
viewport=self.viewport,
|
|
124
|
+
user_agent=self.user_agent,
|
|
125
|
+
record_video_dir=self.video_dir if self.record_video else None,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# 关键:挂载资源,供 cleanup 使用
|
|
129
|
+
context._browser = browser # type: ignore[attr-defined]
|
|
130
|
+
|
|
131
|
+
# task_id 给 trace / video 用
|
|
132
|
+
context._task_id = task_id # type: ignore[attr-defined]
|
|
133
|
+
|
|
134
|
+
if self.record_trace:
|
|
135
|
+
await context.tracing.start(
|
|
136
|
+
screenshots=True,
|
|
137
|
+
snapshots=True,
|
|
138
|
+
sources=True,
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return context
|
|
142
|
+
|
|
143
|
+
async def _cleanup_context(self, context: BrowserContext):
|
|
144
|
+
if getattr(context, "_closed", False):
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
context._closed = True # type: ignore
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
await context.close()
|
|
151
|
+
except Exception as e:
|
|
152
|
+
self.logger.warning(f"[Cleanup] ignore close error: {e}")
|
|
153
|
+
|
|
154
|
+
browser = getattr(context, "_browser", None)
|
|
155
|
+
if browser:
|
|
156
|
+
try:
|
|
157
|
+
self.logger.debug("[Executor] Release browser to pool")
|
|
158
|
+
await self.browser_pool.release(browser)
|
|
159
|
+
except Exception as e:
|
|
160
|
+
self.logger.error(f"[Cleanup] ignore release error: {e}")
|
|
161
|
+
|
|
162
|
+
async def _run_callback_chain(self, *, callback: Callable, page: Page, context: BrowserContext, **kwargs) -> Any:
|
|
163
|
+
# Run middlewares before callback
|
|
164
|
+
for mw in self.middlewares:
|
|
165
|
+
await mw(page=page, logger=self.logger, context=context, **kwargs)
|
|
166
|
+
|
|
167
|
+
# Main callback
|
|
168
|
+
return await callback(page=page, logger=self.logger, context=context, **kwargs)
|
|
169
|
+
|
|
170
|
+
async def run(
|
|
171
|
+
self, *, callback: Callable, storage_state: Optional[Dict[str, Any]] = None, **kwargs: Any
|
|
172
|
+
) -> RunResult:
|
|
173
|
+
attempt = 0
|
|
174
|
+
last_error: Optional[Exception] = None
|
|
175
|
+
task_id: Optional[str] = "unknown"
|
|
176
|
+
result: Any = None
|
|
177
|
+
|
|
178
|
+
while attempt <= self.retries:
|
|
179
|
+
page = None
|
|
180
|
+
context: BrowserContext = cast(BrowserContext, None)
|
|
181
|
+
try:
|
|
182
|
+
context = await self._create_context(storage_state=storage_state)
|
|
183
|
+
task_id = getattr(context, "_task_id", "unknown")
|
|
184
|
+
page = await context.new_page()
|
|
185
|
+
|
|
186
|
+
result = await self._run_callback_chain(
|
|
187
|
+
callback=callback, page=page, context=context, **kwargs
|
|
188
|
+
)
|
|
189
|
+
self.logger.info(f"[Task<{task_id}> Success]")
|
|
190
|
+
return RunResult(
|
|
191
|
+
success=True,
|
|
192
|
+
attempts=attempt + 1,
|
|
193
|
+
task_id=task_id,
|
|
194
|
+
error=last_error,
|
|
195
|
+
result=result
|
|
196
|
+
)
|
|
197
|
+
except (asyncio.CancelledError, KeyboardInterrupt):
|
|
198
|
+
# ⚠️ 不要当成错误
|
|
199
|
+
self.logger.warning(f"[Task<{task_id}> Cancelled]")
|
|
200
|
+
|
|
201
|
+
# 清理资源
|
|
202
|
+
if context and self.record_trace:
|
|
203
|
+
os.makedirs(self.trace_dir, exist_ok=True)
|
|
204
|
+
trace_path = os.path.join(self.trace_dir, f"{task_id}.zip")
|
|
205
|
+
await context.tracing.stop(path=trace_path)
|
|
206
|
+
self.logger.debug(f"[Trace Saved] {trace_path}")
|
|
207
|
+
|
|
208
|
+
# ❗关键:不要 raise
|
|
209
|
+
return RunResult(
|
|
210
|
+
success=False,
|
|
211
|
+
attempts=attempt + 1,
|
|
212
|
+
error=asyncio.CancelledError(),
|
|
213
|
+
task_id=task_id,
|
|
214
|
+
result=result
|
|
215
|
+
)
|
|
216
|
+
except (PlaywrightTimeoutError, PlaywrightError, Exception) as e:
|
|
217
|
+
last_error = e
|
|
218
|
+
self.logger.error(f"[Task<{task_id}> Attempt {attempt + 1} Failed] {e}")
|
|
219
|
+
if page:
|
|
220
|
+
await self._safe_screenshot(page=page, name=task_id)
|
|
221
|
+
|
|
222
|
+
if context and self.record_trace:
|
|
223
|
+
os.makedirs(self.trace_dir, exist_ok=True)
|
|
224
|
+
trace_path = os.path.join(self.trace_dir, f"{task_id}.zip")
|
|
225
|
+
await context.tracing.stop(path=trace_path)
|
|
226
|
+
self.logger.debug(f"[Trace Saved] {trace_path}")
|
|
227
|
+
|
|
228
|
+
attempt += 1
|
|
229
|
+
if attempt <= self.retries:
|
|
230
|
+
await asyncio.sleep(1)
|
|
231
|
+
finally:
|
|
232
|
+
if context:
|
|
233
|
+
await self._cleanup_context(context)
|
|
234
|
+
# 所有重试结束,仍然失败
|
|
235
|
+
self.logger.error(f"[Task<{task_id}> Final Failure]")
|
|
236
|
+
|
|
237
|
+
return RunResult(
|
|
238
|
+
success=False,
|
|
239
|
+
attempts=attempt,
|
|
240
|
+
error=last_error,
|
|
241
|
+
task_id=task_id,
|
|
242
|
+
result=result
|
|
243
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: __init__.py
|
|
6
|
+
# Description: 中间件包
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: stealth.py
|
|
6
|
+
# Description: 安全代理中间件
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/13
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
from typing import Any
|
|
13
|
+
from logging import Logger
|
|
14
|
+
from playwright.async_api import Page
|
|
15
|
+
from playwright_stealth import Stealth
|
|
16
|
+
|
|
17
|
+
CHROME_STEALTH_ARGS = [
|
|
18
|
+
# 核心防检测
|
|
19
|
+
'--disable-blink-features=AutomationControlled',
|
|
20
|
+
'--disable-automation-controlled-blink-features',
|
|
21
|
+
|
|
22
|
+
# 隐藏"Chrome正受到自动测试软件控制"提示
|
|
23
|
+
'--disable-infobars',
|
|
24
|
+
'--disable-popup-blocking',
|
|
25
|
+
|
|
26
|
+
# 性能优化
|
|
27
|
+
'--no-first-run',
|
|
28
|
+
'--no-default-browser-check',
|
|
29
|
+
'--disable-default-apps',
|
|
30
|
+
'--disable-translate',
|
|
31
|
+
|
|
32
|
+
# 禁用自动化标志
|
|
33
|
+
'--disable-background-timer-throttling',
|
|
34
|
+
'--disable-backgrounding-occluded-windows',
|
|
35
|
+
'--disable-renderer-backgrounding',
|
|
36
|
+
|
|
37
|
+
# 网络和安全
|
|
38
|
+
'--disable-web-security',
|
|
39
|
+
'--disable-features=VizDisplayCompositor',
|
|
40
|
+
'--disable-features=RendererCodeIntegrity',
|
|
41
|
+
'--remote-debugging-port=0', # 随机端口
|
|
42
|
+
|
|
43
|
+
# 硬件相关(减少特征)
|
|
44
|
+
'--disable-gpu',
|
|
45
|
+
'--disable-software-rasterizer',
|
|
46
|
+
'--disable-dev-shm-usage',
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
IGNORE_ARGS = [
|
|
50
|
+
'--enable-automation',
|
|
51
|
+
'--enable-automation-controlled-blink-features',
|
|
52
|
+
'--password-store=basic', # 避免密码存储提示
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36"
|
|
56
|
+
|
|
57
|
+
viewport = {'width': 1920, 'height': 1080}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def _setup_stealth_page(page: Page):
|
|
61
|
+
"""设置页面为隐身模式"""
|
|
62
|
+
# 修改 navigator.webdriver
|
|
63
|
+
await page.add_init_script("""
|
|
64
|
+
// 进一步修改 navigator 属性
|
|
65
|
+
Object.defineProperty(navigator, 'webdriver', {
|
|
66
|
+
get: () => undefined,
|
|
67
|
+
});
|
|
68
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
69
|
+
get: () => [1, 2, 3, 4, 5],
|
|
70
|
+
});
|
|
71
|
+
Object.defineProperty(navigator, 'languages', {
|
|
72
|
+
get: () => ['zh-CN', 'zh', 'en'],
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
// 删除 webdriver 属性
|
|
76
|
+
delete navigator.__proto__.webdriver;
|
|
77
|
+
|
|
78
|
+
// 修改 plugins
|
|
79
|
+
Object.defineProperty(navigator, 'plugins', {
|
|
80
|
+
get: () => [{
|
|
81
|
+
name: 'Chrome PDF Plugin',
|
|
82
|
+
filename: 'internal-pdf-viewer'
|
|
83
|
+
}],
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
// 修改 languages
|
|
87
|
+
Object.defineProperty(navigator, 'languages', {
|
|
88
|
+
get: () => ['zh-CN', 'zh', 'en-US', 'en'],
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
// 修改 platform
|
|
92
|
+
Object.defineProperty(navigator, 'platform', {
|
|
93
|
+
get: () => 'Win32',
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// 隐藏 chrome 对象
|
|
97
|
+
window.chrome = {
|
|
98
|
+
runtime: {},
|
|
99
|
+
loadTimes: function(){},
|
|
100
|
+
csi: function(){},
|
|
101
|
+
app: {}
|
|
102
|
+
};
|
|
103
|
+
""")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
# 创建 stealth 配置
|
|
107
|
+
stealth = Stealth(
|
|
108
|
+
navigator_webdriver=True, # 隐藏 webdriver
|
|
109
|
+
navigator_plugins=True, # 修改插件
|
|
110
|
+
navigator_languages=True, # 修改语言
|
|
111
|
+
navigator_platform=True, # 修改平台
|
|
112
|
+
navigator_user_agent=False, # 修改 UA
|
|
113
|
+
script_logging=False, # 生产环境关闭日志
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
async def stealth_middleware(
|
|
118
|
+
*,
|
|
119
|
+
page: Page,
|
|
120
|
+
logger: Logger,
|
|
121
|
+
**kwargs: Any
|
|
122
|
+
):
|
|
123
|
+
await stealth.apply_stealth_async(page_or_context=page)
|
|
124
|
+
await _setup_stealth_page(page)
|
|
125
|
+
logger.debug("[Middleware] Stealth applied")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: __init__.py
|
|
6
|
+
# Description: 工具包
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/08
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: browser_utils.py
|
|
6
|
+
# Description: 浏览器工具模块
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/18
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
import asyncio
|
|
13
|
+
from playwright.async_api import BrowserContext, Page
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
async def switch_for_table_window(browser: BrowserContext, url_keyword: str, wait_time: float = 10.0) -> Page:
|
|
17
|
+
# 最多等待 wait_time 秒
|
|
18
|
+
for _ in range(int(wait_time) * 10):
|
|
19
|
+
await asyncio.sleep(delay=0.1)
|
|
20
|
+
for page in browser.pages:
|
|
21
|
+
if url_keyword.lower() in page.url.lower():
|
|
22
|
+
await page.bring_to_front()
|
|
23
|
+
return page
|
|
24
|
+
raise RuntimeError(f"根据关键信息<{url_keyword}>,没有找到浏览器的page对象")
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: playwright-helper
|
|
5
|
+
# FileName: file_handle.py
|
|
6
|
+
# Description: 文件处理工具模块
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/12/08
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
import os
|
|
13
|
+
import inspect
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_caller_dir() -> str:
|
|
17
|
+
# 获取调用者的 frame
|
|
18
|
+
frame = inspect.stack()[1]
|
|
19
|
+
caller_file = frame.filename # 调用者文件的完整路径
|
|
20
|
+
return os.path.dirname(os.path.abspath(caller_file))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def save_image(file_name: str, img_bytes: bytes) -> None:
|
|
24
|
+
"""
|
|
25
|
+
保存验证码图片到本地。
|
|
26
|
+
若文件已存在,会自动覆盖。
|
|
27
|
+
"""
|
|
28
|
+
# 确保目录存在
|
|
29
|
+
os.makedirs(os.path.dirname(file_name), exist_ok=True)
|
|
30
|
+
|
|
31
|
+
# "wb" 会覆盖已有文件
|
|
32
|
+
with open(file_name, "wb") as f:
|
|
33
|
+
f.write(img_bytes)
|