python-qlv-helper 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- python_qlv_helper-0.2.0.dist-info/METADATA +252 -0
- python_qlv_helper-0.2.0.dist-info/RECORD +32 -0
- python_qlv_helper-0.2.0.dist-info/WHEEL +5 -0
- python_qlv_helper-0.2.0.dist-info/licenses/LICENSE +201 -0
- python_qlv_helper-0.2.0.dist-info/top_level.txt +1 -0
- qlv_helper/__init__.py +11 -0
- qlv_helper/controller/__init__.py +11 -0
- qlv_helper/controller/domestic_activity_order.py +24 -0
- qlv_helper/controller/main_page.py +30 -0
- qlv_helper/controller/order_detail.py +35 -0
- qlv_helper/controller/order_table.py +145 -0
- qlv_helper/controller/user_login.py +119 -0
- qlv_helper/http/__init__.py +11 -0
- qlv_helper/http/main_page.py +41 -0
- qlv_helper/http/order_page.py +313 -0
- qlv_helper/http/order_table_page.py +323 -0
- qlv_helper/po/__init__.py +11 -0
- qlv_helper/po/base_po.py +40 -0
- qlv_helper/po/domestic_activity_order_page.py +129 -0
- qlv_helper/po/login_page.py +136 -0
- qlv_helper/po/main_page.py +71 -0
- qlv_helper/po/wechat_auth_page.py +68 -0
- qlv_helper/utils/__init__.py +11 -0
- qlv_helper/utils/browser_utils.py +25 -0
- qlv_helper/utils/datetime_utils.py +16 -0
- qlv_helper/utils/file_handle.py +33 -0
- qlv_helper/utils/html_utils.py +59 -0
- qlv_helper/utils/ocr_helper.py +83 -0
- qlv_helper/utils/po_utils.py +113 -0
- qlv_helper/utils/stealth_browser.py +100 -0
- qlv_helper/utils/type_utils.py +111 -0
- qlv_helper/utils/windows_utils.py +36 -0
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: qlv-helper
|
|
5
|
+
# FileName: activity_order_table_page.py
|
|
6
|
+
# Description: 活动订单表页面的HTTP响应处理模块
|
|
7
|
+
# Author: zhouhanlin
|
|
8
|
+
# CreateDate: 2025/11/30
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
import re
|
|
13
|
+
import aiohttp
|
|
14
|
+
from html import unescape
|
|
15
|
+
from copy import deepcopy
|
|
16
|
+
from urllib.parse import unquote
|
|
17
|
+
from collections import OrderedDict
|
|
18
|
+
from typing import Dict, Any, Optional, List
|
|
19
|
+
from bs4 import BeautifulSoup, ResultSet, Tag
|
|
20
|
+
from http_helper.client.async_proxy import HttpClientFactory
|
|
21
|
+
from qlv_helper.utils.type_utils import get_key_by_index, get_value_by_index, safe_convert_advanced
|
|
22
|
+
|
|
23
|
+
kwargs = {
|
|
24
|
+
"orderbykey": None,
|
|
25
|
+
"orderbydescOrAsc": None,
|
|
26
|
+
"oldorderbykey": "ID",
|
|
27
|
+
"isorderby": False,
|
|
28
|
+
"ID": 0,
|
|
29
|
+
"IOrderNO": None,
|
|
30
|
+
"hid": 1,
|
|
31
|
+
"JumpPageFromPage": 2,
|
|
32
|
+
"PageCountFormPage": 100
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
async def get_domestic_activity_order_page_html(
|
|
37
|
+
domain: str, protocol: str = "http", retry: int = 1, timeout: int = 5, enable_log: bool = True,
|
|
38
|
+
cookie_jar: Optional[aiohttp.CookieJar] = None, playwright_state: Dict[str, Any] = None, current_page: int = 1,
|
|
39
|
+
pages: int = 1, order_http_client: HttpClientFactory = None, is_end: bool = True
|
|
40
|
+
) -> Dict[str, Any]:
|
|
41
|
+
if not order_http_client:
|
|
42
|
+
order_http_client = HttpClientFactory(
|
|
43
|
+
protocol=protocol if protocol == "http" else "https",
|
|
44
|
+
domain=domain,
|
|
45
|
+
timeout=timeout,
|
|
46
|
+
retry=retry,
|
|
47
|
+
enable_log=enable_log,
|
|
48
|
+
cookie_jar=cookie_jar,
|
|
49
|
+
playwright_state=playwright_state
|
|
50
|
+
)
|
|
51
|
+
url = "/OrderList/GuoNei_ActivityOrders"
|
|
52
|
+
if current_page == 1:
|
|
53
|
+
return await order_http_client.request(method="get", url=url, is_end=is_end)
|
|
54
|
+
else:
|
|
55
|
+
json_data = deepcopy(kwargs)
|
|
56
|
+
json_data["JumpPageFromPage"] = current_page
|
|
57
|
+
json_data["PageCountFormPage"] = pages
|
|
58
|
+
return await order_http_client.request(method="post", url=url, is_end=is_end, json_data=json_data)
|
|
59
|
+
|
|
60
|
+
async def get_domestic_ticket_outed_page_html(
|
|
61
|
+
domain: str, protocol: str = "http", retry: int = 1, timeout: int = 5, enable_log: bool = True,
|
|
62
|
+
cookie_jar: Optional[aiohttp.CookieJar] = None, playwright_state: Dict[str, Any] = None, current_page: int = 1,
|
|
63
|
+
pages: int = 1, order_http_client: HttpClientFactory = None, is_end: bool = True
|
|
64
|
+
) -> Dict[str, Any]:
|
|
65
|
+
if not order_http_client:
|
|
66
|
+
order_http_client = HttpClientFactory(
|
|
67
|
+
protocol=protocol if protocol == "http" else "https",
|
|
68
|
+
domain=domain,
|
|
69
|
+
timeout=timeout,
|
|
70
|
+
retry=retry,
|
|
71
|
+
enable_log=enable_log,
|
|
72
|
+
cookie_jar=cookie_jar,
|
|
73
|
+
playwright_state=playwright_state
|
|
74
|
+
)
|
|
75
|
+
url = "/OrderList/GuoNei_TicketOuted"
|
|
76
|
+
if current_page == 1:
|
|
77
|
+
return await order_http_client.request(method="get", url=url, is_end=is_end)
|
|
78
|
+
else:
|
|
79
|
+
json_data = deepcopy(kwargs)
|
|
80
|
+
json_data["JumpPageFromPage"] = current_page
|
|
81
|
+
json_data["PageCountFormPage"] = pages
|
|
82
|
+
return await order_http_client.request(method="post", url=url, is_end=is_end, json_data=json_data)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def processing_static_headers() -> OrderedDict[str, str]:
|
|
86
|
+
return OrderedDict([
|
|
87
|
+
("source_ota", "来源"), # 0
|
|
88
|
+
("id", "订单号"), # 1
|
|
89
|
+
("raw_order_no", "平台订单号"), # 2
|
|
90
|
+
("adult_pnr", "成人PNR"), # 3
|
|
91
|
+
("payment_time", "支付时间"), # 4
|
|
92
|
+
("sec_from_pay", "支付秒数"), # 5
|
|
93
|
+
("last_time_ticket", "出票时限"), # 6
|
|
94
|
+
("remaining_time", "剩余秒数"), # 7
|
|
95
|
+
("dat_dep", "起飞时间"), # 8
|
|
96
|
+
("policy", "政策"), # 9
|
|
97
|
+
("total_people", "总人数"), # 10
|
|
98
|
+
("receipted", "总价"), # 11
|
|
99
|
+
("stat_opration", "订单操作"), # 12
|
|
100
|
+
("more_seats", "余座"), # 13
|
|
101
|
+
("operation", "操作"), # 14
|
|
102
|
+
])
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def completed_static_headers() -> OrderedDict[str, str]:
|
|
106
|
+
return OrderedDict([
|
|
107
|
+
("source_ota", "来源"), # 0
|
|
108
|
+
("id", "订单号"), # 1
|
|
109
|
+
("raw_order_no", "平台订单号"), # 2
|
|
110
|
+
("adult_pnr", "成人PNR"), # 3
|
|
111
|
+
("payment_time", "支付时间"), # 4
|
|
112
|
+
("checkout_ticket", "下单时间"), # 5
|
|
113
|
+
("dat_dep", "起飞时间"), # 6
|
|
114
|
+
("policy_name", "政策"), # 7
|
|
115
|
+
("total_people", "总人数"), # 8
|
|
116
|
+
("receipted", "总价"), # 9
|
|
117
|
+
("stat_opration", "订单操作"), # 10
|
|
118
|
+
("operation", "操作"), # 11
|
|
119
|
+
])
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def extend_headers() -> OrderedDict[str, str]:
|
|
123
|
+
return OrderedDict([
|
|
124
|
+
("detail_link", "详情链接"), # 0
|
|
125
|
+
("child_pnr", "儿童PNR"), # 1
|
|
126
|
+
("code_dep", "起飞机场"), # 2
|
|
127
|
+
("code_arr", "抵达机场"), # 3
|
|
128
|
+
("flight_no", "航班号"), # 4
|
|
129
|
+
("cabin", "舱位"), # 5
|
|
130
|
+
("total_adult", "成人数量"), # 6
|
|
131
|
+
("total_child", "儿童数量"), # 7
|
|
132
|
+
])
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def parse_order_table_headers(html: BeautifulSoup) -> OrderedDict[str, str]:
|
|
136
|
+
"""
|
|
137
|
+
解析订单表头
|
|
138
|
+
"""
|
|
139
|
+
headers = OrderedDict()
|
|
140
|
+
header_rows = html.find_all('tr')
|
|
141
|
+
|
|
142
|
+
# 处理表头,注意有些th有colspan属性
|
|
143
|
+
for th in header_rows[0].find_all('th'):
|
|
144
|
+
header_text = th.get_text(strip=True)
|
|
145
|
+
|
|
146
|
+
# 检查th内部是否有多个span
|
|
147
|
+
spans = th.find_all('span')
|
|
148
|
+
if len(spans) > 1:
|
|
149
|
+
# 如果有多个span,每个span都作为一个独立的表头
|
|
150
|
+
for span in spans:
|
|
151
|
+
span_text = span.get_text(strip=True)
|
|
152
|
+
if span_text:
|
|
153
|
+
if span_text.find("(") != -1:
|
|
154
|
+
span_text = span_text.replace("(", "/")
|
|
155
|
+
span_text = span_text.replace(")", "/")
|
|
156
|
+
for x in span_text.split("/"):
|
|
157
|
+
headers[x] = x
|
|
158
|
+
else:
|
|
159
|
+
headers[span_text] = span_text
|
|
160
|
+
elif header_text.find("|") != -1:
|
|
161
|
+
header_text_slice = header_text.split("|")
|
|
162
|
+
header_text_slice = [x.strip() for x in header_text_slice]
|
|
163
|
+
for x in header_text_slice:
|
|
164
|
+
headers[x] = x
|
|
165
|
+
else:
|
|
166
|
+
headers[header_text] = header_text
|
|
167
|
+
|
|
168
|
+
return headers
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def clean_cell_content(cell):
|
|
172
|
+
"""
|
|
173
|
+
清理单元格内容
|
|
174
|
+
"""
|
|
175
|
+
# 提取文本
|
|
176
|
+
text = cell.get_text(' ', strip=True)
|
|
177
|
+
# 解码HTML实体
|
|
178
|
+
text = unescape(text)
|
|
179
|
+
# 清理多余空格
|
|
180
|
+
text = re.sub(r'\s+', ' ', text).strip()
|
|
181
|
+
|
|
182
|
+
return text
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def extract_special_attributes(cells, order_data: Dict[str, Any], header: OrderedDict[str, str]):
|
|
186
|
+
"""
|
|
187
|
+
提取单元格的特殊属性
|
|
188
|
+
"""
|
|
189
|
+
sec_from_pay_key = get_key_by_index(header, 5)
|
|
190
|
+
remaining_time_key = get_key_by_index(header, 7)
|
|
191
|
+
stat_opration_key = get_key_by_index(header, 12)
|
|
192
|
+
for i, cell in enumerate(cells):
|
|
193
|
+
# 提取支付秒数
|
|
194
|
+
if 'secFrmPay' in cell.get('class', []):
|
|
195
|
+
tag_value = cell.get('tag', '')
|
|
196
|
+
if tag_value:
|
|
197
|
+
order_data[sec_from_pay_key] = int(tag_value)
|
|
198
|
+
else:
|
|
199
|
+
order_data[sec_from_pay_key] = None
|
|
200
|
+
|
|
201
|
+
# 提取剩余时间
|
|
202
|
+
if 'remainingtime' in cell.get('class', []):
|
|
203
|
+
remaining_time = cell.get('remainingtime', '')
|
|
204
|
+
state = cell.get('state', '')
|
|
205
|
+
if remaining_time:
|
|
206
|
+
order_data[remaining_time_key] = int(remaining_time)
|
|
207
|
+
else:
|
|
208
|
+
order_data[remaining_time_key] = None
|
|
209
|
+
if state:
|
|
210
|
+
order_data[stat_opration_key] = state
|
|
211
|
+
else:
|
|
212
|
+
order_data[stat_opration_key] = None
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def parse_order_row(cells: ResultSet[Tag], headers: OrderedDict, extend_headers: OrderedDict, row_index: int):
|
|
216
|
+
"""
|
|
217
|
+
解析单行订单数据
|
|
218
|
+
"""
|
|
219
|
+
order_data = dict()
|
|
220
|
+
|
|
221
|
+
for i, cell in enumerate(cells):
|
|
222
|
+
try:
|
|
223
|
+
if i >= len(headers):
|
|
224
|
+
break
|
|
225
|
+
|
|
226
|
+
header_value = get_value_by_index(ordered_dict=headers, index=i)
|
|
227
|
+
header_key = get_key_by_index(ordered_dict=headers, index=i)
|
|
228
|
+
cell_text = clean_cell_content(cell)
|
|
229
|
+
|
|
230
|
+
# 特殊字段处理
|
|
231
|
+
if header_key == "id":
|
|
232
|
+
if "紧急" in cell_text:
|
|
233
|
+
cell_text = cell_text.split(" ")[0]
|
|
234
|
+
order_data[header_key] = int(cell_text)
|
|
235
|
+
|
|
236
|
+
# 提取订单号链接
|
|
237
|
+
detail_link_key = get_key_by_index(ordered_dict=extend_headers, index=0)
|
|
238
|
+
link = cell.find('a')
|
|
239
|
+
if link and 'OrderNavigation' in link.get('href', ''):
|
|
240
|
+
order_data[detail_link_key] = unquote(link.get('href', ""))
|
|
241
|
+
else:
|
|
242
|
+
order_data[detail_link_key] = None
|
|
243
|
+
elif header_value == '操作':
|
|
244
|
+
# 解析操作按钮信息
|
|
245
|
+
cell_text_slice = cell_text.split(" ")
|
|
246
|
+
order_data[header_key] = cell_text_slice[0]
|
|
247
|
+
elif 'PNR' in header_value:
|
|
248
|
+
# 清理PNR信息
|
|
249
|
+
pnr_text = cell_text.replace(' | ', ' | ')
|
|
250
|
+
adult_pnr_key = get_key_by_index(ordered_dict=headers, index=3)
|
|
251
|
+
child_pnr_key = get_key_by_index(ordered_dict=extend_headers, index=1)
|
|
252
|
+
pnr_text_slice = pnr_text.split("|")
|
|
253
|
+
order_data[adult_pnr_key] = pnr_text_slice[0].strip()
|
|
254
|
+
order_data[child_pnr_key] = pnr_text_slice[-1].strip() if pnr_text_slice[-1] else None
|
|
255
|
+
elif header_value in "起飞时间":
|
|
256
|
+
cell_text_slice = cell_text.split(" ")
|
|
257
|
+
if len(cell_text_slice) == 5:
|
|
258
|
+
code_dep_key = get_key_by_index(ordered_dict=extend_headers, index=2)
|
|
259
|
+
code_arr_key = get_key_by_index(ordered_dict=extend_headers, index=3)
|
|
260
|
+
flight_no_key = get_key_by_index(ordered_dict=extend_headers, index=4)
|
|
261
|
+
cabin_key = get_key_by_index(ordered_dict=extend_headers, index=5)
|
|
262
|
+
order_data[header_key] = f"{cell_text_slice[0]} {cell_text_slice[1]}"
|
|
263
|
+
itinerary_slice = cell_text_slice[2].split("-")
|
|
264
|
+
order_data[code_dep_key] = itinerary_slice[0]
|
|
265
|
+
order_data[code_arr_key] = itinerary_slice[-1]
|
|
266
|
+
order_data[flight_no_key] = cell_text_slice[3]
|
|
267
|
+
order_data[cabin_key] = cell_text_slice[4]
|
|
268
|
+
elif header_value in "总人数":
|
|
269
|
+
cell_text = cell_text.replace("【", "/")
|
|
270
|
+
cell_text = cell_text.replace("】", "/")
|
|
271
|
+
cell_text_slice = cell_text.split("/")
|
|
272
|
+
order_data[header_key] = safe_convert_advanced(cell_text_slice[0])
|
|
273
|
+
total_adult_key = get_key_by_index(ordered_dict=extend_headers, index=6)
|
|
274
|
+
total_child_key = get_key_by_index(ordered_dict=extend_headers, index=7)
|
|
275
|
+
order_data[total_adult_key] = safe_convert_advanced(cell_text_slice[1])
|
|
276
|
+
order_data[total_child_key] = safe_convert_advanced(cell_text_slice[2])
|
|
277
|
+
else:
|
|
278
|
+
order_data[header_key] = safe_convert_advanced(cell_text)
|
|
279
|
+
|
|
280
|
+
# 提取特殊属性
|
|
281
|
+
extract_special_attributes(cells=cells, order_data=order_data, header=headers)
|
|
282
|
+
except Exception as e:
|
|
283
|
+
print(f"解析第{row_index + 1}行时出错: {e}")
|
|
284
|
+
|
|
285
|
+
return order_data
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def parse_order_table(html: str, table_state: str = "proccessing") -> List[Optional[Dict[str, Any]]]:
|
|
289
|
+
"""
|
|
290
|
+
解析订单表格数据
|
|
291
|
+
"""
|
|
292
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
293
|
+
orders = list()
|
|
294
|
+
|
|
295
|
+
# 找到目标表格
|
|
296
|
+
table = soup.find('table', {'class': 'table table_hover table_border table_center'})
|
|
297
|
+
if not table:
|
|
298
|
+
return orders
|
|
299
|
+
|
|
300
|
+
# 解析表头(处理复杂的th结构)
|
|
301
|
+
if table_state == "proccessing":
|
|
302
|
+
headers = processing_static_headers()
|
|
303
|
+
elif table_state == "completed":
|
|
304
|
+
headers = completed_static_headers()
|
|
305
|
+
else:
|
|
306
|
+
headers = parse_order_table_headers(html=table)
|
|
307
|
+
extend = extend_headers()
|
|
308
|
+
|
|
309
|
+
# 解析数据行(从第二个tr开始)
|
|
310
|
+
data_rows = table.find_all('tr')[1:] # 跳过表头行
|
|
311
|
+
|
|
312
|
+
for i, row in enumerate(data_rows):
|
|
313
|
+
# 跳过隐藏的input行
|
|
314
|
+
if row.find('input', {'name': 'hidOrderID'}):
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
cells = row.find_all(['td', 'th'])
|
|
318
|
+
|
|
319
|
+
order_data = parse_order_row(cells, headers=headers, extend_headers=extend, row_index=i)
|
|
320
|
+
if order_data:
|
|
321
|
+
orders.append(order_data)
|
|
322
|
+
|
|
323
|
+
return orders
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: qlv-helper
|
|
5
|
+
# FileName: __init__.py
|
|
6
|
+
# Description: 页面对象封装逻辑
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/11/25
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
qlv_helper/po/base_po.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: qlv-helper
|
|
5
|
+
# FileName: base_po.py
|
|
6
|
+
# Description: po对象基础类
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/11/25
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
from typing import List
|
|
13
|
+
from playwright.async_api import Page
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BasePo(object):
|
|
17
|
+
__page: Page
|
|
18
|
+
|
|
19
|
+
def __init__(self, page: Page, url: str):
|
|
20
|
+
self.url = url
|
|
21
|
+
self.__page = page
|
|
22
|
+
if self.is_current_page() is False:
|
|
23
|
+
raise ValueError("page参数值无效")
|
|
24
|
+
|
|
25
|
+
def get_page(self) -> Page:
|
|
26
|
+
return self.__page
|
|
27
|
+
|
|
28
|
+
def is_current_page(self) -> bool:
|
|
29
|
+
url = self.__page.url.split("?")[0]
|
|
30
|
+
if isinstance(self.__page, Page) and url.endswith(self.url):
|
|
31
|
+
return True
|
|
32
|
+
else:
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
def get_url_domain(self) -> str:
|
|
36
|
+
if isinstance(self.__page, Page):
|
|
37
|
+
page_slice: List[str] = self.__page.url.split("/")
|
|
38
|
+
return f"{page_slice[0]}://{page_slice[2]}"
|
|
39
|
+
else:
|
|
40
|
+
raise AttributeError("PO对象中的page属性未被初始化")
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: qlv-helper
|
|
5
|
+
# FileName: domestic_activity_order_page.py
|
|
6
|
+
# Description: 国内活动订单页面对象
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/11/25
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
from bs4 import BeautifulSoup
|
|
13
|
+
from qlv_helper.po.base_po import BasePo
|
|
14
|
+
from typing import Tuple, Union, List, Any, Dict
|
|
15
|
+
from playwright.async_api import Page, TimeoutError as PlaywrightTimeoutError, Locator
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DomesticActivityOrderPage(BasePo):
|
|
19
|
+
__table_selector: str = '//tbody'
|
|
20
|
+
__page: Page
|
|
21
|
+
|
|
22
|
+
def __init__(self, page: Page, url: str = "/OrderList/GuoNei_ActivityOrders") -> None:
|
|
23
|
+
super().__init__(page, url)
|
|
24
|
+
self.__page = page
|
|
25
|
+
|
|
26
|
+
async def get_flight_table(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
|
|
27
|
+
try:
|
|
28
|
+
locator = self.__page.locator(self.__table_selector)
|
|
29
|
+
if locator:
|
|
30
|
+
await locator.wait_for(state='visible', timeout=timeout * 1000)
|
|
31
|
+
return True, locator
|
|
32
|
+
else:
|
|
33
|
+
return False, '没有找到国内活动订单页面中的【航班Table】列表'
|
|
34
|
+
except PlaywrightTimeoutError:
|
|
35
|
+
return False, f"元素 '{self.__table_selector}' 未在 {timeout} 秒内找到"
|
|
36
|
+
except Exception as e:
|
|
37
|
+
return False, f"检查元素时发生错误: {str(e)}"
|
|
38
|
+
|
|
39
|
+
"""
|
|
40
|
+
------------------------------------------------------------
|
|
41
|
+
1. 解析 TH(表头)
|
|
42
|
+
------------------------------------------------------------
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def parse_tbody_headers(tbody_html: str):
|
|
47
|
+
soup = BeautifulSoup(tbody_html, "html.parser")
|
|
48
|
+
first_row = soup.find("tr")
|
|
49
|
+
|
|
50
|
+
if not first_row:
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
headers = [th.get_text(strip=True) for th in first_row.find_all("th")]
|
|
54
|
+
return headers
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
------------------------------------------------------------
|
|
58
|
+
2. 解析 TR/TD(数据行)
|
|
59
|
+
------------------------------------------------------------
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
@staticmethod
|
|
63
|
+
def parse_tbody_rows(tbody_html: str, headers: List[str]) -> List[Dict[str, Any]]:
|
|
64
|
+
soup = BeautifulSoup(tbody_html, "html.parser")
|
|
65
|
+
rows = []
|
|
66
|
+
|
|
67
|
+
for tr in soup.find_all("tr"):
|
|
68
|
+
tds = tr.find_all("td")
|
|
69
|
+
if not tds:
|
|
70
|
+
continue
|
|
71
|
+
|
|
72
|
+
values = [td.get_text(strip=True) for td in tds]
|
|
73
|
+
|
|
74
|
+
if headers and len(headers) == len(values):
|
|
75
|
+
row = dict(zip(headers, values))
|
|
76
|
+
else:
|
|
77
|
+
row = values # 无 th 或数量不一致,使用 list
|
|
78
|
+
rows.append(row)
|
|
79
|
+
|
|
80
|
+
return rows
|
|
81
|
+
|
|
82
|
+
"""
|
|
83
|
+
------------------------------------------------------------
|
|
84
|
+
3. 主流程:分页 + 每页解析 tbody
|
|
85
|
+
------------------------------------------------------------
|
|
86
|
+
"""
|
|
87
|
+
async def parse_table_with_pagination(self, refresh_wait_time: float = 10.0) -> List[Dict[str, Any]]:
|
|
88
|
+
"""
|
|
89
|
+
refresh_wait_time: 翻页后等待时间
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
all_rows = []
|
|
93
|
+
headers = None
|
|
94
|
+
next_button_locator: str = '//a[@id="A_NextPage_1"]'
|
|
95
|
+
while True:
|
|
96
|
+
try:
|
|
97
|
+
# --- 1. 解析 tbody ---
|
|
98
|
+
tbody = self.__page.locator(self.__table_selector)
|
|
99
|
+
tbody_html = await tbody.inner_html()
|
|
100
|
+
|
|
101
|
+
# 首次解析表头
|
|
102
|
+
if headers is None:
|
|
103
|
+
headers = self.parse_tbody_headers(tbody_html=tbody_html)
|
|
104
|
+
|
|
105
|
+
# 解析当前页的数据
|
|
106
|
+
rows = self.parse_tbody_rows(tbody_html=tbody_html, headers=headers)
|
|
107
|
+
all_rows.extend(rows)
|
|
108
|
+
print(all_rows)
|
|
109
|
+
|
|
110
|
+
# --- 2. 获取当前页数与总页数 ---
|
|
111
|
+
current_page: int = int(await self.__page.locator('//label[@id="Lab_PageIndex"][1]').inner_text())
|
|
112
|
+
pages: int = int(await self.__page.locator('//label[@id="Lab_PageCount"][1]').inner_text())
|
|
113
|
+
|
|
114
|
+
print(current_page, pages)
|
|
115
|
+
# --- 3. 如果到最后一页,退出 ---
|
|
116
|
+
if current_page >= pages:
|
|
117
|
+
break
|
|
118
|
+
|
|
119
|
+
# --- 4. 点击下一页 ---
|
|
120
|
+
await self.__page.locator(selector=next_button_locator).click()
|
|
121
|
+
|
|
122
|
+
# 等待页面刷新(简单但稳)
|
|
123
|
+
await self.__page.wait_for_timeout(timeout=refresh_wait_time)
|
|
124
|
+
except PlaywrightTimeoutError:
|
|
125
|
+
all_rows.append(dict(error_message=f"元素 '{self.__table_selector}' 未在 {timeout} 秒内找到"))
|
|
126
|
+
except Exception as e:
|
|
127
|
+
all_rows.append(dict(error_message=f"检查元素时发生错误: {str(e)}"))
|
|
128
|
+
|
|
129
|
+
return all_rows
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
4
|
+
# ProjectName: qlv-helper
|
|
5
|
+
# FileName: login.py
|
|
6
|
+
# Description: 登录页
|
|
7
|
+
# Author: ASUS
|
|
8
|
+
# CreateDate: 2025/11/25
|
|
9
|
+
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
|
+
# ---------------------------------------------------------------------------------------------------------
|
|
11
|
+
"""
|
|
12
|
+
from typing import Tuple, Union
|
|
13
|
+
from qlv_helper.po.base_po import BasePo
|
|
14
|
+
from qlv_helper.utils.ocr_helper import get_image_text
|
|
15
|
+
from playwright.async_api import Page, TimeoutError as PlaywrightTimeoutError, Locator
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class LoginPage(BasePo):
|
|
19
|
+
__page: Page
|
|
20
|
+
|
|
21
|
+
def __init__(self, page: Page, url: str = "/Home/Login") -> None:
|
|
22
|
+
super().__init__(page, url)
|
|
23
|
+
self.__page = page
|
|
24
|
+
|
|
25
|
+
async def get_login_username_input(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
|
|
26
|
+
"""
|
|
27
|
+
获取登录页面的用户名输入框
|
|
28
|
+
:param timeout: 超时时间(秒)
|
|
29
|
+
:return: (是否存在, 错误信息|元素对象)
|
|
30
|
+
:return:
|
|
31
|
+
"""
|
|
32
|
+
selector: str = '//input[@id="UserName"]'
|
|
33
|
+
try:
|
|
34
|
+
locator = self.__page.locator(selector)
|
|
35
|
+
if locator:
|
|
36
|
+
await locator.wait_for(state='visible', timeout=timeout * 1000)
|
|
37
|
+
return True, locator
|
|
38
|
+
else:
|
|
39
|
+
return False, '没有找到登录页面中的【用户名】输入框'
|
|
40
|
+
except PlaywrightTimeoutError:
|
|
41
|
+
return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
|
|
42
|
+
except Exception as e:
|
|
43
|
+
return False, f"检查元素时发生错误: {str(e)}"
|
|
44
|
+
|
|
45
|
+
async def get_login_password_input(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
|
|
46
|
+
"""
|
|
47
|
+
获取登录页面的密码输入框
|
|
48
|
+
:param timeout: 超时时间(秒)
|
|
49
|
+
:return: (是否存在, 错误信息|元素对象)
|
|
50
|
+
:return:
|
|
51
|
+
"""
|
|
52
|
+
selector: str = '//input[@id="Password"]'
|
|
53
|
+
try:
|
|
54
|
+
locator = self.__page.locator(selector)
|
|
55
|
+
if locator:
|
|
56
|
+
await locator.wait_for(state='visible', timeout=timeout * 1000)
|
|
57
|
+
return True, locator
|
|
58
|
+
else:
|
|
59
|
+
return False, '没有找到登录页面中的【密码】输入框'
|
|
60
|
+
except PlaywrightTimeoutError:
|
|
61
|
+
return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
|
|
62
|
+
except Exception as e:
|
|
63
|
+
return False, f"检查元素时发生错误: {str(e)}"
|
|
64
|
+
|
|
65
|
+
async def get_login_number_code_input(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
|
|
66
|
+
"""
|
|
67
|
+
获取登录页面的数字验证码输入框,第一层验证码
|
|
68
|
+
:param timeout: 超时时间(秒)
|
|
69
|
+
:return: (是否存在, 错误信息|元素对象)
|
|
70
|
+
:return:
|
|
71
|
+
"""
|
|
72
|
+
selector: str = '//input[@id="Code"]'
|
|
73
|
+
try:
|
|
74
|
+
locator = self.__page.locator(selector)
|
|
75
|
+
if locator:
|
|
76
|
+
await locator.wait_for(state='visible', timeout=timeout * 1000)
|
|
77
|
+
return True, locator
|
|
78
|
+
else:
|
|
79
|
+
return False, '没有找到登录页面中的【数字验证码】输入框'
|
|
80
|
+
except PlaywrightTimeoutError:
|
|
81
|
+
return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
|
|
82
|
+
except Exception as e:
|
|
83
|
+
return False, f"检查元素时发生错误: {str(e)}"
|
|
84
|
+
|
|
85
|
+
async def get_login_btn(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
|
|
86
|
+
"""
|
|
87
|
+
获取登录页面的登录按钮
|
|
88
|
+
:param timeout: 超时时间(秒)
|
|
89
|
+
:return: (是否存在, 错误信息|元素对象)
|
|
90
|
+
:return:
|
|
91
|
+
"""
|
|
92
|
+
selector: str = '//input[@class="login-btn"]'
|
|
93
|
+
try:
|
|
94
|
+
locator = self.__page.locator(selector)
|
|
95
|
+
if locator:
|
|
96
|
+
await locator.wait_for(state='visible', timeout=timeout * 1000)
|
|
97
|
+
return True, locator
|
|
98
|
+
else:
|
|
99
|
+
return False, '没有找到登录页面中的【登录】按钮'
|
|
100
|
+
except PlaywrightTimeoutError:
|
|
101
|
+
return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
|
|
102
|
+
except Exception as e:
|
|
103
|
+
return False, f"检查元素时发生错误: {str(e)}"
|
|
104
|
+
|
|
105
|
+
async def get_number_code(self, timeout: float = 5.0) -> Tuple[bool, str]:
|
|
106
|
+
selector: str = '//div[@id="signup_forms"]//img'
|
|
107
|
+
return await get_image_text(page=self.__page, selector=selector, timeout=timeout)
|
|
108
|
+
|
|
109
|
+
async def is_exist_login_warn(self, timeout: float = 5.0) -> bool:
|
|
110
|
+
selector: str = '//p[@class="login_warn"]'
|
|
111
|
+
try:
|
|
112
|
+
locator = self.__page.locator(selector)
|
|
113
|
+
if locator:
|
|
114
|
+
text: str = await locator.text_content(timeout=timeout * 1000)
|
|
115
|
+
if text.strip() != "":
|
|
116
|
+
return True
|
|
117
|
+
else:
|
|
118
|
+
return False
|
|
119
|
+
else:
|
|
120
|
+
return False
|
|
121
|
+
except (PlaywrightTimeoutError, Exception):
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
async def get_wechat_entrance(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
|
|
125
|
+
selector: str = '//img[@src="/images/weixin.png"]'
|
|
126
|
+
try:
|
|
127
|
+
locator = self.__page.locator(selector)
|
|
128
|
+
if locator:
|
|
129
|
+
await locator.wait_for(state='visible', timeout=timeout * 1000)
|
|
130
|
+
return True, locator
|
|
131
|
+
else:
|
|
132
|
+
return False, '没有找到登录页面中的【微信】快捷登录入口'
|
|
133
|
+
except PlaywrightTimeoutError:
|
|
134
|
+
return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
|
|
135
|
+
except Exception as e:
|
|
136
|
+
return False, f"检查元素时发生错误: {str(e)}"
|