python-qlv-helper 0.5.7__py3-none-any.whl → 0.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,111 +9,115 @@
9
9
  # Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
10
10
  # ---------------------------------------------------------------------------------------------------------
11
11
  """
12
+ import os
12
13
  import asyncio
13
- from typing import Tuple
14
+ import traceback
15
+ from logging import Logger
16
+ from datetime import datetime
17
+ from typing import Dict, Any, Optional
14
18
  from qlv_helper.po.login_page import LoginPage
15
- from playwright.async_api import BrowserContext
16
- from qlv_helper.po.wechat_auth_page import WechatAuthPage
17
- from qlv_helper.utils.browser_utils import switch_for_table_window
18
- from qlv_helper.utils.po_utils import on_click_locator, locator_input_element
19
-
20
-
21
- async def _username_login(login_po: LoginPage, username: str, password: str, timeout: float = 5.0) -> Tuple[bool, str]:
22
- # 1. 输入用户名
23
- is_success, username_input = await login_po.get_login_username_input(timeout=timeout)
24
- if is_success is False:
25
- return is_success, username_input
26
- await locator_input_element(locator=username_input, text=username.strip())
27
-
28
- # 2. 输入密码
29
- is_success, password_input = await login_po.get_login_password_input(timeout=timeout)
30
- if is_success is False:
31
- return is_success, username_input
32
- await locator_input_element(locator=password_input, text=password.strip())
33
-
34
- # 3. 获取一层验证码
35
- is_success, code_str = await login_po.get_number_code(timeout=timeout)
36
- if is_success is False:
37
- return is_success, code_str
38
-
39
- # 4. 输入一层验证码
40
- is_success, code_input = await login_po.get_login_number_code_input(timeout=timeout)
41
- if is_success is False:
42
- return is_success, code_input
43
- await locator_input_element(locator=code_input, text=code_str.lower())
44
-
45
- # 5. 点击登录
46
- is_success, login_btn = await login_po.get_login_btn(timeout=timeout)
47
- if is_success is False:
48
- return is_success, login_btn
49
- await on_click_locator(locator=login_btn)
50
-
51
-
52
- async def _wechat_login(browser: BrowserContext, login_po: LoginPage, timeout: float = 5.0) -> Tuple[bool, str]:
53
- # 1. 点击微信登录快捷入口
54
- is_success, wechat_entrance = await login_po.get_wechat_entrance(timeout=timeout)
55
- if is_success is False:
56
- return is_success, wechat_entrance
57
- await on_click_locator(locator=wechat_entrance)
58
-
59
- page_new = await switch_for_table_window(browser=browser, url_keyword="open.weixin.qq.com", wait_time=int(timeout))
60
- wachat_po = WechatAuthPage(page=page_new)
61
-
62
- # 2. 点击【微信快捷登录】按钮
63
- is_success, wechat_quick_login_btn = await wachat_po.get_wechat_quick_login_btn(timeout=timeout)
64
- if is_success is False:
65
- return is_success, wechat_quick_login_btn
66
- await on_click_locator(locator=wechat_quick_login_btn)
67
-
68
- # 3. 点击微信弹框的中【允许】按钮
69
- return await wachat_po.on_click_allow_btn(timeout=int(timeout) * 3)
70
-
71
-
72
- async def username_login(
73
- login_po: LoginPage, username: str, password: str, timeout: float = 5.0, retry: int = 3
74
- ) -> Tuple[bool, str]:
75
- # 1. 第一次全流程的登录
76
- await _username_login(login_po=login_po, username=username, password=password, timeout=timeout)
77
- for _ in range(retry):
78
- # 2. 判断是否为当前页
79
- if login_po.is_current_page() is False:
80
- return True, f"账号:{username} 登录成功"
81
-
82
- # 3. 判断是否存在登录警告,存在的话,继续输入验证码,再次登录
83
- is_warn: bool = await login_po.is_exist_login_warn(timeout=timeout)
84
- if is_warn is True:
85
- # 4. 获取一层验证码
86
- is_success, code_str = await login_po.get_number_code(timeout=timeout)
87
- if is_success is False:
88
- return is_success, code_str
89
-
90
- # 5. 输入一层验证码
91
- is_success, code_input = await login_po.get_login_number_code_input(timeout=timeout)
92
- if is_success is False:
93
- return is_success, code_input
94
- await locator_input_element(locator=code_input, text=code_str.lower())
19
+ import qlv_helper.config.url_const as url_const
20
+ from playwright.async_api import Page, ElementHandle
21
+ from qlv_helper.utils.ocr_helper import get_image_text
22
+
23
+ async def _username_login(
24
+ *, login_po: LoginPage, logger: Logger, username: str, password: str, screenshot_dir: str, api_key: str,
25
+ secret_key: str, timeout: float = 5.0, attempt: int = 10
26
+ ) -> Optional[Dict[str, Any]]:
27
+ for index in range(1, attempt + 1):
28
+ try:
29
+ # 1. 输入用户名
30
+ username_input = await login_po.get_login_username_input(timeout=timeout)
31
+ await username_input.fill(value=username)
32
+ logger.info(f"登录页面,用户名<{username}>输入完成")
33
+ except (Exception,):
34
+ pass
35
+ try:
36
+ # 2. 输入密码
37
+ password_input = await login_po.get_login_password_input(timeout=timeout)
38
+ await password_input.fill(value=password)
39
+ logger.info(f"登录页面,用户密码<{password}>输入完成")
40
+ except (Exception,):
41
+ pass
42
+ try:
43
+ # 3. 首次获取验证码,并点击
44
+ # captcha_1 = await login_po.get_captcha(timeout=timeout)
45
+ # await captcha_1.click(button="left")
46
+ # await asyncio.sleep(delay=3)
47
+
48
+ # 4. 再次获取验证码
49
+ captcha_2 = await login_po.get_captcha(timeout=timeout)
50
+ # 4.1 获取验证码类型
51
+ captcha_type: int = await login_po.get_captcha_type(locator=captcha_2, timeout=timeout)
52
+ logger.info(f"登录页面,验证码类型<{captcha_type}>获取成功")
53
+ # 4.2 获取验证码图片,直接截图获取原始图片字节,不刷新图片
54
+ image: ElementHandle = await login_po.get_captcha_image(timeout=timeout)
55
+ dt_str: str = datetime.now().strftime("%Y%m%d%H%M%S")
56
+ fn: str = os.path.join(screenshot_dir, f"captcha_{username}_{captcha_type}_{dt_str}.png")
57
+ await image.screenshot(path=fn, timeout=timeout * 1000)
58
+ logger.info(f"登录页面,验证码图片已经生成,图片路径:{fn}")
59
+ # 4.3 获取验证码内容
60
+ capthcha_text = await get_image_text(
61
+ image_path=fn, captcha_type=captcha_type, api_key=api_key, secret_key=secret_key
62
+ )
63
+ logger.info(f"登录页面,验证码内容:<{capthcha_text}>识别成功")
64
+
65
+ # 5. 获取验证码输入框
66
+ captcha_input = await login_po.get_login_captcha_input(timeout=timeout)
67
+ await captcha_input.fill(value=capthcha_text)
68
+ logger.info(f"登录页面,验证码<{capthcha_text}>输入完成")
95
69
 
96
70
  # 6. 点击登录
97
- is_success, login_btn = await login_po.get_login_btn(timeout=timeout)
98
- if is_success is False:
99
- return is_success, login_btn
100
- await on_click_locator(locator=login_btn)
101
- else:
102
- # 7. 重复一次全流程的登录
103
- await _username_login(login_po=login_po, username=username, password=password, timeout=timeout)
104
-
105
- await asyncio.sleep(delay=timeout)
106
-
107
- return True, f"账号:{username} 一次登录流程结束"
71
+ login_btn = await login_po.get_login_btn(timeout=timeout)
72
+ await login_btn.click(button="left")
73
+ logger.info(f"登录页面,【登录】按钮点击完成")
74
+ await asyncio.sleep(delay=3)
75
+
76
+ # 7. 验证登录是否成功
77
+ result = login_po.is_current_page()
78
+ if result is False:
79
+ logger.info(f"用户<{username}>登录成功,登录流程结束")
80
+
81
+ # 9. 获取当前cookie,不指定 path,Playwright 会返回 JSON 字符串
82
+ return await login_po.get_page().context.storage_state()
83
+ else:
84
+ raise RuntimeError("登录失败")
85
+ except (RuntimeError,):
86
+ if index == attempt:
87
+ logger.error(f"尝试登录<{attempt}>次,均失败,登录结束")
88
+ else:
89
+ logger.error(f"第<{index}>次登录失败,等待下一次登录")
90
+ except (Exception,):
91
+ logger.error(traceback.format_exc())
92
+ if index == attempt:
93
+ logger.error(f"尝试登录<{attempt}>次,均失败,登录结束")
94
+ else:
95
+ logger.error(f"第<{index}>次登录失败,等待下一次登录")
96
+
97
+
98
+ async def open_login_page(
99
+ *, page: Page, logger: Logger, qlv_protocol: str, qlv_domain: str, timeout: float = 60.0
100
+ ) -> LoginPage:
101
+ url_prefix = f"{qlv_protocol}://{qlv_domain}"
102
+ login_url = url_prefix + url_const.login_url
103
+ await page.goto(login_url)
104
+
105
+ login_po = LoginPage(page=page, url=login_url)
106
+ await login_po.url_wait_for(url=login_url, timeout=timeout)
107
+ logger.info(f"即将进入登录页,页面URL<{login_url}>")
108
+ return login_po
108
109
 
109
110
 
110
- async def wechat_login(
111
- browser: BrowserContext, login_po: LoginPage, timeout: float = 5.0, retry: int = 3
112
- ) -> Tuple[bool, str]:
113
- for index in range(retry):
114
- # 全流程的登录
115
- is_success, message = await _wechat_login(browser=browser, login_po=login_po, timeout=timeout)
116
-
117
- # 判断是否为当前页
118
- if is_success is True or index == retry - 1:
119
- return is_success, message
111
+ async def username_login(
112
+ *, page: Page, logger: Logger, qlv_protocol: str, qlv_domain: str, username: str, screenshot_dir: str,
113
+ password: str, api_key: str, secret_key: str, timeout: float = 60.0, attempt: int = 10, **kwargs: Any
114
+ ) -> Dict[str, Any]:
115
+ # 1. 打开登录页面
116
+ login_po = await open_login_page(
117
+ page=page, logger=logger, qlv_domain=qlv_domain, qlv_protocol=qlv_protocol, timeout=timeout
118
+ )
119
+ # 2. 一次全流程的登录
120
+ return await _username_login(
121
+ login_po=login_po, logger=logger, username=username, password=password, screenshot_dir=screenshot_dir,
122
+ timeout=timeout, api_key=api_key, secret_key=secret_key, attempt=attempt
123
+ )
@@ -0,0 +1,50 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ # ---------------------------------------------------------------------------------------------------------
4
+ # ProjectName: qlv-helper
5
+ # FileName: wechat_login.py
6
+ # Description: 微信登录模块
7
+ # Author: ASUS
8
+ # CreateDate: 2025/12/31
9
+ # Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
10
+ # ---------------------------------------------------------------------------------------------------------
11
+ """
12
+ from typing import Tuple
13
+ from qlv_helper.po.login_page import LoginPage
14
+ from playwright.async_api import BrowserContext
15
+
16
+ from qlv_helper.utils.po_utils import on_click_locator
17
+ from qlv_helper.po.wechat_auth_page import WechatAuthPage
18
+ from qlv_helper.utils.browser_utils import switch_for_table_window
19
+
20
+
21
+ async def _wechat_login(browser: BrowserContext, login_po: LoginPage, timeout: float = 5.0) -> Tuple[bool, str]:
22
+ # 1. 点击微信登录快捷入口
23
+ is_success, wechat_entrance = await login_po.get_wechat_entrance(timeout=timeout)
24
+ if is_success is False:
25
+ return is_success, wechat_entrance
26
+ await on_click_locator(locator=wechat_entrance)
27
+
28
+ page_new = await switch_for_table_window(browser=browser, url_keyword="open.weixin.qq.com", wait_time=int(timeout))
29
+ wachat_po = WechatAuthPage(page=page_new)
30
+
31
+ # 2. 点击【微信快捷登录】按钮
32
+ is_success, wechat_quick_login_btn = await wachat_po.get_wechat_quick_login_btn(timeout=timeout)
33
+ if is_success is False:
34
+ return is_success, wechat_quick_login_btn
35
+ await on_click_locator(locator=wechat_quick_login_btn)
36
+
37
+ # 3. 点击微信弹框的中【允许】按钮
38
+ return await wachat_po.on_click_allow_btn(timeout=int(timeout) * 3)
39
+
40
+
41
+ async def wechat_login(
42
+ *, browser: BrowserContext, login_po: LoginPage, timeout: float = 5.0, retry: int = 3
43
+ ) -> Tuple[bool, str]:
44
+ for index in range(1, retry + 1):
45
+ # 全流程的登录
46
+ is_success, message = await _wechat_login(browser=browser, login_po=login_po, timeout=timeout)
47
+
48
+ # 判断是否为当前页
49
+ if is_success is True or index == retry:
50
+ return is_success, message
@@ -10,20 +10,23 @@
10
10
  # ---------------------------------------------------------------------------------------------------------
11
11
  """
12
12
  import re
13
+ import json
13
14
  import aiohttp
14
15
  from datetime import datetime
16
+ from urllib.parse import quote
15
17
  from bs4 import BeautifulSoup, Tag
16
18
  from collections import OrderedDict
17
19
  from typing import Dict, Any, Optional, List
18
20
  from qlv_helper.utils.type_utils import convert_cn_to_en
19
21
  from http_helper.client.async_proxy import HttpClientFactory
20
22
  from qlv_helper.utils.datetime_utils import get_current_dtstr
23
+ from flight_helper.models.dto.procurement import FillProcurementInputDTO
21
24
  from qlv_helper.utils.type_utils import get_key_by_index, get_value_by_index, safe_convert_advanced
22
25
 
23
26
 
24
27
  async def get_order_page_html(
25
- order_id: int, domain: str, protocol: str = "http", retry: int = 1, timeout: int = 5, enable_log: bool = True,
26
- cookie_jar: Optional[aiohttp.CookieJar] = None, playwright_state: Dict[str, Any] = None
28
+ *, order_id: int, domain: str, protocol: str = "http", retry: int = 1, timeout: int = 5,
29
+ enable_log: bool = True, cookie_jar: Optional[aiohttp.CookieJar] = None, playwright_state: Dict[str, Any] = None
27
30
  ) -> Dict[str, Any]:
28
31
  order_http_client = HttpClientFactory(
29
32
  protocol=protocol if protocol == "http" else "https",
@@ -41,6 +44,121 @@ async def get_order_page_html(
41
44
  )
42
45
 
43
46
 
47
+ async def fill_procurement_info_with_http(
48
+ *, order_id: int, qlv_domain: str, amount: float, pre_order_id: str, platform_user_id: str, user_password: str,
49
+ passengers: List[str], fids: str, pids: List[str], transaction_id: str, qlv_protocol: str = "http",
50
+ retry: int = 1, timeout: int = 5, enable_log: bool = True, cookie_jar: Optional[aiohttp.CookieJar] = None,
51
+ playwright_state: Dict[str, Any] = None, data_list: Optional[List[Dict[str, Any]]] = None
52
+ ) -> Dict[str, Any]:
53
+ client = HttpClientFactory(
54
+ protocol=qlv_protocol, domain=qlv_domain, timeout=timeout, enable_log=enable_log, retry=retry,
55
+ cookie_jar=cookie_jar or aiohttp.CookieJar(), playwright_state=playwright_state
56
+ )
57
+
58
+ headers = {
59
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
60
+ "Referer": f"{qlv_protocol}://{qlv_domain}/OrderProcessing/NewTicket/{order_id}?&r={datetime.now().strftime("%Y%m%d%H%M%S")}",
61
+ }
62
+ if data_list:
63
+ data = data_list
64
+ else:
65
+ remark = f"{platform_user_id}/{user_password}"
66
+ pName = "," + ",".join(passengers) + ","
67
+ pids = ",".join(pids)
68
+ data = [
69
+ {"tradingDat": datetime.now().strftime("%Y-%m-%d %H:%M"), "outTktPF": "G航司官网", "outTktLoginCode": "",
70
+ "typeName": "VCC", "accountID": "8", "accountName": "VCC", "transactionAmount": f"{amount}",
71
+ "mainCheckNumber": "", "airCoOrderID": f"{pre_order_id}", "QuotaResultAmount": "0.00",
72
+ "remark": f"{quote(remark)}", "flightIdx": ",1,", "pName": f"{pName}", "orderID": f"{order_id}",
73
+ "businessTypeName": "机票", "tradingItems": "机票支出", "actualAmount": 0, "pType": "成人",
74
+ "fids": f"{fids}", "pids": f"{pids}", "iscandel": "true", "isbatch": "false",
75
+ "MainCheckNumberValus": f"{transaction_id}",
76
+ "OfficeNo": "", "PriceStdActual": "0.00", "ReturnAmount": "0.0000", "OffsetReturnAmount": "0.00",
77
+ "profitRemark": "", "preSaleType": "", "ErrorType": "", "OutTktPFTypeID": "34", "OutTicketAccount": "",
78
+ "OutTicketAccountID": "", "OutTicketPWD": "", "OutTicketTel": "", "OutTicketPNR": ""}
79
+ ]
80
+ data = f"list={json.dumps(data)}&isPayAll=true&delTransactionids=&OutTicketLossType&OutTicketLossRemark="
81
+ return await client.request(
82
+ method="POST", url="/OrderProcessing/PurchaseInfoSave",
83
+ headers=headers, is_end=True, data=data.encode("utf-8")
84
+ )
85
+
86
+
87
+ async def fill_procurement_dto_with_http(
88
+ *, fill_procurement_dto: FillProcurementInputDTO, retry: int = 1, timeout: int = 5, enable_log: bool = True,
89
+ cookie_jar: Optional[aiohttp.CookieJar] = None, playwright_state: Dict[str, Any] = None,
90
+ data_list: Optional[List[Dict[str, Any]]] = None
91
+ ) -> Dict[str, Any]:
92
+ client = HttpClientFactory(
93
+ protocol=fill_procurement_dto.pl_protocol, domain=fill_procurement_dto.pl_domain, timeout=timeout, retry=retry,
94
+ enable_log=enable_log, cookie_jar=cookie_jar or aiohttp.CookieJar(), playwright_state=playwright_state
95
+ )
96
+
97
+ headers = {
98
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
99
+ "Referer": f"{fill_procurement_dto.pl_protocol}://{fill_procurement_dto.pl_domain}/OrderProcessing/NewTicket/{fill_procurement_dto.order_no}?&r={datetime.now().strftime("%Y%m%d%H%M%S")}",
100
+ }
101
+ if data_list:
102
+ data = data_list
103
+ else:
104
+ pName = "," + ",".join(
105
+ fill_procurement_dto.passenger_names) + "," if fill_procurement_dto.passenger_names else ''
106
+ pids = ",".join(fill_procurement_dto.passenger_ids) if fill_procurement_dto.passenger_ids else ''
107
+ data = [{
108
+ "tradingDat": datetime.now().strftime("%Y-%m-%d %H:%M"),
109
+ "outTktPF": f"{fill_procurement_dto.out_ticket_platform or ''}", "outTktLoginCode": "",
110
+ "typeName": f"{fill_procurement_dto.type_name or ''}",
111
+ "accountID": f"{fill_procurement_dto.purchase_account_id or ''}",
112
+ "accountName": f"{fill_procurement_dto.purchase_account or ''}",
113
+ "transactionAmount": f"{fill_procurement_dto.transaction_amount}",
114
+ "mainCheckNumber": "",
115
+ "airCoOrderID": f"{fill_procurement_dto.air_co_order_id}", "QuotaResultAmount": "0.00",
116
+ "remark": f"{quote(fill_procurement_dto.remark) or ''}",
117
+ "flightIdx": f",{fill_procurement_dto.segment_index or '1'},", "pName": f"{pName}",
118
+ "orderID": f"{fill_procurement_dto.order_no}",
119
+ "businessTypeName": "机票", "tradingItems": "机票支出", "actualAmount": 0,
120
+ "pType": f"{fill_procurement_dto.passenger_type}",
121
+ "fids": f"{fill_procurement_dto.flight_ids or ''}",
122
+ "pids": f"{pids or ''}",
123
+ "iscandel": "true", "isbatch": "false",
124
+ "MainCheckNumberValus": f"{fill_procurement_dto.pay_transaction}",
125
+ "OfficeNo": "", "PriceStdActual": "0.00", "ReturnAmount": "0.0000", "OffsetReturnAmount": "0.00",
126
+ "profitRemark": "", "preSaleType": "", "ErrorType": "",
127
+ "OutTktPFTypeID": f"{fill_procurement_dto.out_ticket_platform_type_id or ''}",
128
+ "OutTicketAccount": f"{fill_procurement_dto.out_ticket_account or ''}",
129
+ "OutTicketAccountID": f"{fill_procurement_dto.out_ticket_account_id or ''}",
130
+ "OutTicketPWD": f"{fill_procurement_dto.out_ticket_account_password or ''}",
131
+ "OutTicketTel": f"{fill_procurement_dto.out_ticket_mobile or ''}",
132
+ "OutTicketPNR": ""}
133
+ ]
134
+ data = f"list={json.dumps(data)}&isPayAll=true&delTransactionids=&OutTicketLossType&OutTicketLossRemark="
135
+ return await client.request(
136
+ method="POST", url="/OrderProcessing/PurchaseInfoSave",
137
+ headers=headers, is_end=True, data=data.encode("utf-8")
138
+ )
139
+
140
+
141
+ async def fill_itinerary_info_with_http(
142
+ *, order_id: int, qlv_domain: str, pid: str, tid: str, transaction_id: str, itinerary_id: str, retry: int = 1,
143
+ qlv_protocol: str = "http", timeout: int = 5, enable_log: bool = True,
144
+ cookie_jar: Optional[aiohttp.CookieJar] = None, playwright_state: Dict[str, Any] = None
145
+ ) -> Dict[str, Any]:
146
+ client = HttpClientFactory(
147
+ protocol=qlv_protocol, domain=qlv_domain, timeout=timeout, enable_log=enable_log, retry=retry,
148
+ cookie_jar=cookie_jar or aiohttp.CookieJar(), playwright_state=playwright_state
149
+ )
150
+
151
+ headers = {
152
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
153
+ "Referer": f"{qlv_protocol}://{qlv_domain}/OrderProcessing/NewTicket_show/{order_id}?&r={datetime.now().strftime("%Y%m%d%H%M%S")}",
154
+ }
155
+ data = f"OrderID={order_id}&OrderPID={pid}&OrderTID={tid}&TicketNo={itinerary_id}&ZJTransactionID={transaction_id}"
156
+ return await client.request(
157
+ method="POST", url="/OrderProcessing/TicketNoSave",
158
+ headers=headers, is_end=True, data=data.encode("utf-8")
159
+ )
160
+
161
+
44
162
  def order_info_static_headers() -> OrderedDict[str, str]:
45
163
  return OrderedDict([
46
164
  ("receipted_ota", "OTA实收"), # 0
@@ -154,6 +272,9 @@ def flight_extend_headers() -> OrderedDict[str, str]:
154
272
  ("id_valid_dat", " 证件有效期"), # 9
155
273
  ("code_dep", " 起飞机场"), # 10
156
274
  ("code_arr", " 抵达机场"), # 11
275
+ ("pid", "乘客ID"), # 12
276
+ ("fid", "航段ID"), # 13
277
+ ("tid", "乘客表ID"), # 14
157
278
  ])
158
279
 
159
280
 
@@ -269,7 +390,20 @@ def parse_order_flight_table_passenger_info(raw: Tag, headers: OrderedDict[str,
269
390
  nationality = guobies[0].get_text(strip=True) if len(guobies) > 0 else ""
270
391
  issue_country = guobies[1].get_text(strip=True) if len(guobies) > 1 else ""
271
392
 
272
- return {
393
+ a1 = raw.find("a", id=lambda x: x and x.startswith("IDNo_"))
394
+ a2 = raw.find("a", id=lambda x: x and x.startswith("detrni_"))
395
+ a3 = raw.find("a", id=lambda x: x and x.startswith("detrnif_"))
396
+ pid = None
397
+ if a1:
398
+ full_id = a1["id"] # IDNo_279778
399
+ pid = full_id.split("_")[-1]
400
+ elif a2:
401
+ full_id = a2["id"] # detrni_279778
402
+ pid = full_id.split("_")[-1]
403
+ elif a3:
404
+ full_id = a3["id"] # detrnif_279778
405
+ pid = full_id.split("_")[-1]
406
+ result = {
273
407
  get_key_by_index(index=0, ordered_dict=headers): name, # 姓名
274
408
  get_key_by_index(index=1, ordered_dict=headers): ptype, # 类型: 成人/儿童
275
409
  get_key_by_index(index=2, ordered_dict=headers): id_type, # 证件类型
@@ -279,8 +413,12 @@ def parse_order_flight_table_passenger_info(raw: Tag, headers: OrderedDict[str,
279
413
  get_key_by_index(index=6, ordered_dict=headers): sex, # 性别
280
414
  get_key_by_index(index=7, ordered_dict=headers): nationality, # 国籍
281
415
  get_key_by_index(index=8, ordered_dict=headers): issue_country, # 签发国
282
- get_key_by_index(index=9, ordered_dict=headers): id_valid # 证件有效期
416
+ get_key_by_index(index=9, ordered_dict=headers): id_valid, # 证件有效期
283
417
  }
418
+ if pid is not None:
419
+ # 乘客ID
420
+ result[get_key_by_index(index=12, ordered_dict=headers)] = pid
421
+ return result
284
422
 
285
423
 
286
424
  def parse_order_flight_table_row(
@@ -288,8 +426,10 @@ def parse_order_flight_table_row(
288
426
  ) -> Dict[str, Any]:
289
427
  """解析航班表每一行的数据"""
290
428
  tds = tr.find_all("td", recursive=False)
291
- values = {}
292
-
429
+ values = {
430
+ get_key_by_index(index=12, ordered_dict=extend_headers): tr["pid"],
431
+ get_key_by_index(index=14, ordered_dict=extend_headers): tr["tid"]
432
+ }
293
433
  for idx, td in enumerate(tds):
294
434
  if idx >= len(headers):
295
435
  continue
@@ -303,6 +443,14 @@ def parse_order_flight_table_row(
303
443
  else:
304
444
  raw = clean_order_flight_table(html=td)
305
445
  if "行程" in value:
446
+ fid = ""
447
+ input_tag = td.find('input', {'name': 'fid'})
448
+ fid_key = get_key_by_index(index=13, ordered_dict=extend_headers)
449
+ if input_tag:
450
+ match = re.search(r'\d+', input_tag.get('value'))
451
+ if match:
452
+ fid = match.group()
453
+ values[fid_key] = fid
306
454
  code_dep_key = get_key_by_index(index=10, ordered_dict=extend_headers)
307
455
  code_arr_key = get_key_by_index(index=11, ordered_dict=extend_headers)
308
456
  raw_slice = raw.split("-")
@@ -313,7 +461,6 @@ def parse_order_flight_table_row(
313
461
  values[key] = raw
314
462
  else:
315
463
  values[key] = safe_convert_advanced(raw)
316
-
317
464
  return values
318
465
 
319
466
 
@@ -326,6 +473,8 @@ def extract_structured_table_data(table: Tag) -> List[Optional[Dict[str, Any]]]:
326
473
  for tr in table.find_all("tr")[1:]: # 跳过表头
327
474
  rows.append(parse_order_flight_table_row(tr=tr, headers=headers, extend_headers=extend))
328
475
 
476
+ if rows:
477
+ rows = list({i["id_no"]: i for i in sorted(rows, key=lambda x: bool(x.get("fid")))}.values())
329
478
  return rows
330
479
 
331
480
 
@@ -333,14 +482,12 @@ def parser_order_flight_table(html: str) -> List[Optional[Dict[str, Any]]]:
333
482
  """解析航班表"""
334
483
  soup = BeautifulSoup(html, 'html.parser')
335
484
  # 三个主要的order_sort div
336
- order_sections = soup.find_all('div', class_='order_sort')
337
- section = order_sections[3] if len(order_sections) > 3 else Tag(name="")
485
+ table_sections = soup.find_all('table', class_='table table_border table_center')
486
+ table = table_sections[2] if len(table_sections) > 2 else Tag(name="")
338
487
  results = list()
339
488
 
340
- tables = section.find_all('table', class_='table table_border table_center')
341
- for table in tables:
342
- table_data = extract_structured_table_data(table)
343
- if table_data:
344
- results.extend(table_data)
489
+ table_data = extract_structured_table_data(table)
490
+ if table_data:
491
+ results.extend(table_data)
345
492
 
346
493
  return results
@@ -23,6 +23,96 @@ class DomesticActivityOrderPage(BasePo):
23
23
  super().__init__(page, url)
24
24
  self.__page = page
25
25
 
26
+ async def get_flight_table_locator(self, timeout: float = 5.0) -> Locator:
27
+ """
28
+ 获取table
29
+ :param timeout:
30
+ :return:
31
+ """
32
+ selecor: str = 'xpath=//table[@class="table table_hover table_border table_center"]//tbody'
33
+ return await self.get_locator(selector=selecor, timeout=timeout)
34
+
35
+ async def get_flight_table_trs_locator(self, timeout: float = 5.0) -> Locator:
36
+ """
37
+ 获取table所有tr locator对象
38
+ :param timeout:
39
+ :return:
40
+ """
41
+ selecor: str = 'xpath=//table[@class="table table_hover table_border table_center"]/tbody/tr'
42
+ return await self.get_locator(selector=selecor, timeout=timeout)
43
+
44
+ async def get_flight_table_tds_th(self, locator: Locator, timeout: float = 5.0) -> Locator:
45
+ """
46
+ 获取table所有tr下的th locator对象
47
+ :param locator:
48
+ :param timeout:
49
+ :return:
50
+ """
51
+ selecor: str = 'xpath=./th'
52
+ return await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
53
+
54
+ async def get_flight_table_trs_td(self, locator: Locator, timeout: float = 5.0) -> Locator:
55
+ """
56
+ 获取table所有tr下的td locator对象
57
+ :param locator:
58
+ :param timeout:
59
+ :return:
60
+ """
61
+ selecor: str = 'xpath=./td'
62
+ return await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
63
+
64
+ async def get_flight_table_td_order_id(self, locator: Locator, timeout: float = 5.0) -> str:
65
+ """
66
+ 获取table 行中的订单id
67
+ :param locator:
68
+ :param timeout:
69
+ :return:
70
+ """
71
+ selecor: str = 'xpath=./a'
72
+ locator = await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
73
+ return (await locator.inner_text()).strip()
74
+
75
+ async def get_flight_table_td_urgant(self, locator: Locator, timeout: float = 5.0) -> str:
76
+ """
77
+ 获取table 行中的紧急状态
78
+ :param locator:
79
+ :param timeout:
80
+ :return:
81
+ """
82
+ selecor: str = 'xpath=./font'
83
+ locator = await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
84
+ return (await locator.inner_text()).strip()
85
+
86
+ async def get_flight_table_td_operation_lock_btn(self, locator: Locator, timeout: float = 5.0) -> Locator:
87
+ """
88
+ 获取table 行中的锁单按钮
89
+ :param locator:
90
+ :param timeout:
91
+ :return:
92
+ """
93
+ selecor: str = 'xpath=./a'
94
+ return await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
95
+
96
+ async def get_flight_table_td_operation_pop_btn(self, locator: Locator, timeout: float = 5.0) -> Locator:
97
+ """
98
+ 获取table 行中的剔出按钮
99
+ :param locator:
100
+ :param timeout:
101
+ :return:
102
+ """
103
+ selecor: str = 'xpath=./button'
104
+ return await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
105
+
106
+ async def get_flight_table_td_operation_substitute_btn(self, locator: Locator, timeout: float = 5.0) -> Locator:
107
+ """
108
+ 获取table 行中的补位按钮
109
+ :param locator:
110
+ :param timeout:
111
+ :return:
112
+ """
113
+ selecor: str = 'xpath=./a'
114
+ return await self.get_sub_locator(locator=locator, selector=selecor, timeout=timeout)
115
+
26
116
  async def get_flight_table(self, timeout: float = 5.0) -> Tuple[bool, Union[Locator, str]]:
27
117
  try:
28
118
  locator = self.__page.locator(self.__table_selector)
@@ -84,6 +174,7 @@ class DomesticActivityOrderPage(BasePo):
84
174
  3. 主流程:分页 + 每页解析 tbody
85
175
  ------------------------------------------------------------
86
176
  """
177
+
87
178
  async def parse_table_with_pagination(self, refresh_wait_time: float = 10.0) -> List[Dict[str, Any]]:
88
179
  """
89
180
  refresh_wait_time: 翻页后等待时间