qrpa 1.1.32__py3-none-any.whl → 1.1.34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of qrpa might be problematic. Click here for more details.

qrpa/fun_web.py CHANGED
@@ -1,258 +1,258 @@
1
- import json
2
- from typing import Optional, Union
3
- from playwright.sync_api import Page
4
-
5
- from .fun_base import log, send_exception
6
- from .time_utils import get_current_datetime
7
-
8
- import inspect
9
-
10
- def fetch(page: Page, url: str, params: Optional[Union[dict, list, str]] = None, headers: Optional[dict] = None, config:
11
- Optional[dict] = None) -> dict:
12
- """
13
- 发送 HTTP POST 请求,支持自定义 headers 和重定向处理。
14
-
15
- :param page: Playwright 的 Page 对象
16
- :param url: 请求地址
17
- :param params: 请求参数(dict、list、str 或 None)
18
- :param headers: 自定义 headers 字典
19
- :param config: 请求配置字典
20
- :return: 服务器返回的 JSON 响应(dict)
21
- """
22
- if params is not None and not isinstance(params, (dict, list, str)):
23
- raise ValueError("params 参数必须是 dict、list、str 或 None")
24
- if headers is not None and not isinstance(headers, dict):
25
- raise ValueError("headers 参数必须是 dict 或 None")
26
-
27
- try:
28
- page.wait_for_load_state('load')
29
- response = page.evaluate("""
30
- async ({ url, params, extraHeaders, config }) => {
31
- try {
32
- const defaultHeaders = {
33
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
34
- 'x-requested-with': 'XMLHttpRequest',
35
- };
36
-
37
- const headers = Object.assign({}, defaultHeaders, extraHeaders || {});
38
- const options = {
39
- method: 'POST',
40
- credentials: 'include',
41
- redirect: 'follow', // 明确设置跟随重定向
42
- headers: headers
43
- };
44
-
45
- // 应用额外配置
46
- if (config) {
47
- Object.assign(options, config);
48
- }
49
-
50
- if (params !== null) {
51
- if (typeof params === 'string') {
52
- options.headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
53
- options.body = params;
54
- } else {
55
- options.headers['Content-Type'] = 'application/json';
56
- options.body = JSON.stringify(params);
57
- }
58
- }
59
-
60
- const response = await fetch(url, options);
61
-
62
- // 处理重定向
63
- if (response.redirected) {
64
- console.log(`请求被重定向到: ${response.url}`);
65
- }
66
-
67
- if (!response.ok) {
68
- // 如果是重定向相关的状态码,尝试获取响应内容
69
- if (response.status >= 300 && response.status < 400) {
70
- const text = await response.text();
71
- return {
72
- "error": "redirect_error",
73
- "message": `HTTP ${response.status} - ${response.statusText}`,
74
- "redirect_url": response.url,
75
- "response_text": text,
76
- "status": response.status
77
- };
78
- }
79
- throw new Error(`HTTP ${response.status} - ${response.statusText}`);
80
- }
81
-
82
- // 尝试解析 JSON,如果失败则返回文本内容
83
- const contentType = response.headers.get('content-type');
84
- if (contentType && contentType.includes('application/json')) {
85
- return await response.json();
86
- } else {
87
- const text = await response.text();
88
- return { "content": text, "content_type": contentType, "final_url": response.url };
89
- }
90
- } catch (error) {
91
- return { "error": "fetch_failed", "message": error.message };
92
- }
93
- }
94
- """, {"url": url, "params": params, "extraHeaders": headers, "config": config})
95
-
96
- return response
97
- except Exception as e:
98
- raise send_exception()
99
- # return {"error": "fetch error", "message": str(e)}
100
-
101
- def fetch_via_iframe(page: Page, target_domain: str, url: str, params: Optional[Union[dict, list, str]] = None, config:
102
- Optional[dict] = None) -> dict:
103
- """
104
- 方案 2:在 iframe 内部执行 fetch 请求,绕过 CORS 限制
105
-
106
- :param page: Playwright 的 Page 对象
107
- :param url: 目标请求的 URL
108
- :param target_domain: 目标 iframe 所在的域名(用于匹配 iframe)
109
- :param params: 请求参数(dict、list、str 或 None)
110
- :return: 服务器返回的 JSON 响应(dict)
111
- """
112
- if params is not None and not isinstance(params, (dict, list, str)):
113
- raise ValueError("params 参数必须是 dict、list、str 或 None")
114
- response = None
115
- try:
116
- # 获取所有 iframe,查找目标域名的 iframe
117
- frames = page.frames
118
- target_frame = None
119
- for frame in frames:
120
- if target_domain in frame.url:
121
- target_frame = frame
122
- break
123
-
124
- if not target_frame:
125
- return {"error": "iframe_not_found", "message": f"未找到包含 {target_domain} 的 iframe"}
126
-
127
- response = target_frame.evaluate("""
128
- async ({ url, params }) => {
129
- try {
130
- const options = {
131
- method: 'POST',
132
- credentials: 'include',
133
- headers: {
134
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
135
- 'x-requested-with': 'XMLHttpRequest',
136
- }
137
- };
138
-
139
- if (params !== null) {
140
- if (typeof params === 'string') {
141
- options.headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
142
- options.body = params;
143
- } else {
144
- options.headers['Content-Type'] = 'application/json';
145
- options.body = JSON.stringify(params);
146
- }
147
- }
148
-
149
- const response = await fetch(url, options);
150
- if (!response.ok) {
151
- throw new Error(`HTTP ${response.status} - ${response.statusText}`);
152
- }
153
- return await response.json();
154
- } catch (error) {
155
- return { "error": "iframe_fetch_failed", "message": error.message };
156
- }
157
- }
158
- """, {"url": url, "params": params})
159
-
160
- return response
161
- except Exception as e:
162
- raise send_exception()
163
- # return {"error": "iframe_exception", "message": str(e)}
164
-
165
- # 找到一个页面里面所有的iframe
166
- def find_all_iframe(page: Page):
167
- frames = page.frames
168
- for frame in frames:
169
- log("找到 iframe:", frame.url)
170
- return [frame.url for frame in frames]
171
-
172
- # 全屏幕截图
173
- def full_screen_shot(web_page: Page, config):
174
- # 设置页面的视口大小为一个较大的值,确保截图高清
175
- web_page.set_viewport_size({"width": 1920, "height": 1080})
176
- # 截取全页面的高清截图
177
- full_screenshot_image_path = f'{config.auto_dir}/screenshot/{get_current_datetime()}.png'
178
- web_page.screenshot(path=full_screenshot_image_path, full_page=True)
179
- return full_screenshot_image_path
180
-
181
- def fetch_get(page: Page, url: str, headers: Optional[dict] = None, config: Optional[dict] = None) -> dict:
182
- """
183
- 发送 HTTP GET 请求,支持自定义 headers 和配置,支持重定向处理。
184
-
185
- :param page: Playwright 的 Page 对象
186
- :param url: 请求地址
187
- :param headers: 自定义 headers 字典
188
- :param config: 请求配置字典,可包含 credentials, mode, referrer, referrerPolicy 等
189
- :return: 服务器返回的 JSON 响应(dict)
190
- """
191
- if headers is not None and not isinstance(headers, dict):
192
- raise ValueError("headers 参数必须是 dict 或 None")
193
- if config is not None and not isinstance(config, dict):
194
- raise ValueError("config 参数必须是 dict 或 None")
195
-
196
- try:
197
- page.wait_for_load_state('load')
198
- response = page.evaluate("""
199
- async ({ url, extraHeaders, config }) => {
200
- try {
201
- const defaultHeaders = {
202
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
203
- };
204
-
205
- const defaultConfig = {
206
- method: 'GET',
207
- credentials: 'include',
208
- mode: 'cors',
209
- redirect: 'follow' // 明确设置跟随重定向
210
- };
211
-
212
- const headers = Object.assign({}, defaultHeaders, extraHeaders || {});
213
- const options = Object.assign({}, defaultConfig, config || {}, { headers: headers });
214
-
215
- const response = await fetch(url, options);
216
-
217
- // 处理重定向
218
- if (response.redirected) {
219
- console.log(`请求被重定向到: ${response.url}`);
220
- }
221
-
222
- if (!response.ok) {
223
- // 如果是重定向相关的状态码,尝试获取响应内容
224
- if (response.status >= 300 && response.status < 400) {
225
- const text = await response.text();
226
- return {
227
- "error": "redirect_error",
228
- "message": `HTTP ${response.status} - ${response.statusText}`,
229
- "redirect_url": response.url,
230
- "response_text": text,
231
- "status": response.status
232
- };
233
- }
234
- throw new Error(`HTTP ${response.status} - ${response.statusText}`);
235
- }
236
-
237
- // 尝试解析 JSON,如果失败则返回文本内容
238
- const contentType = response.headers.get('content-type');
239
- if (contentType && contentType.includes('application/json')) {
240
- return await response.json();
241
- } else {
242
- const text = await response.text();
243
- return { "content": text, "content_type": contentType, "final_url": response.url };
244
- }
245
- } catch (error) {
246
- return { "error": "fetch_failed", "message": error.message };
247
- }
248
- }
249
- """, {"url": url, "extraHeaders": headers, "config": config})
250
-
251
- return response
252
- except Exception as e:
253
- raise send_exception()
254
-
255
- def safe_goto(page, url, **kwargs):
256
- caller = inspect.stack()[1]
257
- log(f"[DEBUG] goto called from {caller.filename}:{caller.lineno} url={url}")
258
- return page.goto(url, **kwargs)
1
+ import json
2
+ from typing import Optional, Union
3
+ from playwright.sync_api import Page
4
+
5
+ from .fun_base import log, send_exception
6
+ from .time_utils import get_current_datetime
7
+
8
+ import inspect
9
+
10
+ def fetch(page: Page, url: str, params: Optional[Union[dict, list, str]] = None, headers: Optional[dict] = None, config:
11
+ Optional[dict] = None) -> dict:
12
+ """
13
+ 发送 HTTP POST 请求,支持自定义 headers 和重定向处理。
14
+
15
+ :param page: Playwright 的 Page 对象
16
+ :param url: 请求地址
17
+ :param params: 请求参数(dict、list、str 或 None)
18
+ :param headers: 自定义 headers 字典
19
+ :param config: 请求配置字典
20
+ :return: 服务器返回的 JSON 响应(dict)
21
+ """
22
+ if params is not None and not isinstance(params, (dict, list, str)):
23
+ raise ValueError("params 参数必须是 dict、list、str 或 None")
24
+ if headers is not None and not isinstance(headers, dict):
25
+ raise ValueError("headers 参数必须是 dict 或 None")
26
+
27
+ try:
28
+ page.wait_for_load_state('load')
29
+ response = page.evaluate("""
30
+ async ({ url, params, extraHeaders, config }) => {
31
+ try {
32
+ const defaultHeaders = {
33
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
34
+ 'x-requested-with': 'XMLHttpRequest',
35
+ };
36
+
37
+ const headers = Object.assign({}, defaultHeaders, extraHeaders || {});
38
+ const options = {
39
+ method: 'POST',
40
+ credentials: 'include',
41
+ redirect: 'follow', // 明确设置跟随重定向
42
+ headers: headers
43
+ };
44
+
45
+ // 应用额外配置
46
+ if (config) {
47
+ Object.assign(options, config);
48
+ }
49
+
50
+ if (params !== null) {
51
+ if (typeof params === 'string') {
52
+ options.headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
53
+ options.body = params;
54
+ } else {
55
+ options.headers['Content-Type'] = 'application/json';
56
+ options.body = JSON.stringify(params);
57
+ }
58
+ }
59
+
60
+ const response = await fetch(url, options);
61
+
62
+ // 处理重定向
63
+ if (response.redirected) {
64
+ console.log(`请求被重定向到: ${response.url}`);
65
+ }
66
+
67
+ if (!response.ok) {
68
+ // 如果是重定向相关的状态码,尝试获取响应内容
69
+ if (response.status >= 300 && response.status < 400) {
70
+ const text = await response.text();
71
+ return {
72
+ "error": "redirect_error",
73
+ "message": `HTTP ${response.status} - ${response.statusText}`,
74
+ "redirect_url": response.url,
75
+ "response_text": text,
76
+ "status": response.status
77
+ };
78
+ }
79
+ throw new Error(`HTTP ${response.status} - ${response.statusText}`);
80
+ }
81
+
82
+ // 尝试解析 JSON,如果失败则返回文本内容
83
+ const contentType = response.headers.get('content-type');
84
+ if (contentType && contentType.includes('application/json')) {
85
+ return await response.json();
86
+ } else {
87
+ const text = await response.text();
88
+ return { "content": text, "content_type": contentType, "final_url": response.url };
89
+ }
90
+ } catch (error) {
91
+ return { "error": "fetch_failed", "message": error.message };
92
+ }
93
+ }
94
+ """, {"url": url, "params": params, "extraHeaders": headers, "config": config})
95
+
96
+ return response
97
+ except Exception as e:
98
+ raise send_exception()
99
+ # return {"error": "fetch error", "message": str(e)}
100
+
101
+ def fetch_via_iframe(page: Page, target_domain: str, url: str, params: Optional[Union[dict, list, str]] = None, config:
102
+ Optional[dict] = None) -> dict:
103
+ """
104
+ 方案 2:在 iframe 内部执行 fetch 请求,绕过 CORS 限制
105
+
106
+ :param page: Playwright 的 Page 对象
107
+ :param url: 目标请求的 URL
108
+ :param target_domain: 目标 iframe 所在的域名(用于匹配 iframe)
109
+ :param params: 请求参数(dict、list、str 或 None)
110
+ :return: 服务器返回的 JSON 响应(dict)
111
+ """
112
+ if params is not None and not isinstance(params, (dict, list, str)):
113
+ raise ValueError("params 参数必须是 dict、list、str 或 None")
114
+ response = None
115
+ try:
116
+ # 获取所有 iframe,查找目标域名的 iframe
117
+ frames = page.frames
118
+ target_frame = None
119
+ for frame in frames:
120
+ if target_domain in frame.url:
121
+ target_frame = frame
122
+ break
123
+
124
+ if not target_frame:
125
+ return {"error": "iframe_not_found", "message": f"未找到包含 {target_domain} 的 iframe"}
126
+
127
+ response = target_frame.evaluate("""
128
+ async ({ url, params }) => {
129
+ try {
130
+ const options = {
131
+ method: 'POST',
132
+ credentials: 'include',
133
+ headers: {
134
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36',
135
+ 'x-requested-with': 'XMLHttpRequest',
136
+ }
137
+ };
138
+
139
+ if (params !== null) {
140
+ if (typeof params === 'string') {
141
+ options.headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=UTF-8';
142
+ options.body = params;
143
+ } else {
144
+ options.headers['Content-Type'] = 'application/json';
145
+ options.body = JSON.stringify(params);
146
+ }
147
+ }
148
+
149
+ const response = await fetch(url, options);
150
+ if (!response.ok) {
151
+ throw new Error(`HTTP ${response.status} - ${response.statusText}`);
152
+ }
153
+ return await response.json();
154
+ } catch (error) {
155
+ return { "error": "iframe_fetch_failed", "message": error.message };
156
+ }
157
+ }
158
+ """, {"url": url, "params": params})
159
+
160
+ return response
161
+ except Exception as e:
162
+ raise send_exception()
163
+ # return {"error": "iframe_exception", "message": str(e)}
164
+
165
+ # 找到一个页面里面所有的iframe
166
+ def find_all_iframe(page: Page):
167
+ frames = page.frames
168
+ for frame in frames:
169
+ log("找到 iframe:", frame.url)
170
+ return [frame.url for frame in frames]
171
+
172
+ # 全屏幕截图
173
+ def full_screen_shot(web_page: Page, config):
174
+ # 设置页面的视口大小为一个较大的值,确保截图高清
175
+ web_page.set_viewport_size({"width": 1920, "height": 1080})
176
+ # 截取全页面的高清截图
177
+ full_screenshot_image_path = f'{config.auto_dir}/screenshot/{get_current_datetime()}.png'
178
+ web_page.screenshot(path=full_screenshot_image_path, full_page=True)
179
+ return full_screenshot_image_path
180
+
181
+ def fetch_get(page: Page, url: str, headers: Optional[dict] = None, config: Optional[dict] = None) -> dict:
182
+ """
183
+ 发送 HTTP GET 请求,支持自定义 headers 和配置,支持重定向处理。
184
+
185
+ :param page: Playwright 的 Page 对象
186
+ :param url: 请求地址
187
+ :param headers: 自定义 headers 字典
188
+ :param config: 请求配置字典,可包含 credentials, mode, referrer, referrerPolicy 等
189
+ :return: 服务器返回的 JSON 响应(dict)
190
+ """
191
+ if headers is not None and not isinstance(headers, dict):
192
+ raise ValueError("headers 参数必须是 dict 或 None")
193
+ if config is not None and not isinstance(config, dict):
194
+ raise ValueError("config 参数必须是 dict 或 None")
195
+
196
+ try:
197
+ page.wait_for_load_state('load')
198
+ response = page.evaluate("""
199
+ async ({ url, extraHeaders, config }) => {
200
+ try {
201
+ const defaultHeaders = {
202
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36',
203
+ };
204
+
205
+ const defaultConfig = {
206
+ method: 'GET',
207
+ credentials: 'include',
208
+ mode: 'cors',
209
+ redirect: 'follow' // 明确设置跟随重定向
210
+ };
211
+
212
+ const headers = Object.assign({}, defaultHeaders, extraHeaders || {});
213
+ const options = Object.assign({}, defaultConfig, config || {}, { headers: headers });
214
+
215
+ const response = await fetch(url, options);
216
+
217
+ // 处理重定向
218
+ if (response.redirected) {
219
+ console.log(`请求被重定向到: ${response.url}`);
220
+ }
221
+
222
+ if (!response.ok) {
223
+ // 如果是重定向相关的状态码,尝试获取响应内容
224
+ if (response.status >= 300 && response.status < 400) {
225
+ const text = await response.text();
226
+ return {
227
+ "error": "redirect_error",
228
+ "message": `HTTP ${response.status} - ${response.statusText}`,
229
+ "redirect_url": response.url,
230
+ "response_text": text,
231
+ "status": response.status
232
+ };
233
+ }
234
+ throw new Error(`HTTP ${response.status} - ${response.statusText}`);
235
+ }
236
+
237
+ // 尝试解析 JSON,如果失败则返回文本内容
238
+ const contentType = response.headers.get('content-type');
239
+ if (contentType && contentType.includes('application/json')) {
240
+ return await response.json();
241
+ } else {
242
+ const text = await response.text();
243
+ return { "content": text, "content_type": contentType, "final_url": response.url };
244
+ }
245
+ } catch (error) {
246
+ return { "error": "fetch_failed", "message": error.message };
247
+ }
248
+ }
249
+ """, {"url": url, "extraHeaders": headers, "config": config})
250
+
251
+ return response
252
+ except Exception as e:
253
+ raise send_exception()
254
+
255
+ def safe_goto(page, url, **kwargs):
256
+ caller = inspect.stack()[1]
257
+ log(f"[DEBUG] goto called from {caller.filename}:{caller.lineno} url={url}")
258
+ return page.goto(url, **kwargs)