python-qlv-helper 0.6.0__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_qlv_helper-0.6.0.dist-info → python_qlv_helper-0.9.3.dist-info}/METADATA +9 -7
- {python_qlv_helper-0.6.0.dist-info → python_qlv_helper-0.9.3.dist-info}/RECORD +17 -16
- qlv_helper/config/url_const.py +2 -0
- qlv_helper/controller/main_page.py +18 -2
- qlv_helper/controller/order_detail.py +202 -212
- qlv_helper/controller/order_table.py +178 -16
- qlv_helper/controller/user_login.py +106 -102
- qlv_helper/controller/wechat_login.py +50 -0
- qlv_helper/http/order_page.py +187 -24
- qlv_helper/po/domestic_activity_order_page.py +91 -0
- qlv_helper/po/login_page.py +36 -74
- qlv_helper/po/main_page.py +7 -38
- qlv_helper/po/order_detail_page.py +46 -1
- qlv_helper/utils/ocr_helper.py +38 -43
- {python_qlv_helper-0.6.0.dist-info → python_qlv_helper-0.9.3.dist-info}/WHEEL +0 -0
- {python_qlv_helper-0.6.0.dist-info → python_qlv_helper-0.9.3.dist-info}/licenses/LICENSE +0 -0
- {python_qlv_helper-0.6.0.dist-info → python_qlv_helper-0.9.3.dist-info}/top_level.txt +0 -0
qlv_helper/utils/ocr_helper.py
CHANGED
|
@@ -9,11 +9,13 @@
|
|
|
9
9
|
# Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
|
|
10
10
|
# ---------------------------------------------------------------------------------------------------------
|
|
11
11
|
"""
|
|
12
|
+
import json
|
|
13
|
+
import asyncio
|
|
12
14
|
import ddddocr
|
|
13
15
|
import requests
|
|
14
|
-
from typing import
|
|
16
|
+
from typing import Tuple
|
|
15
17
|
from aiohttp import ClientSession
|
|
16
|
-
from
|
|
18
|
+
from ocr_helper.core.baidu import ImageContentOCR
|
|
17
19
|
|
|
18
20
|
# 复用 OCR 实例,不用每次都重新加载模型(更快)
|
|
19
21
|
_ocr = ddddocr.DdddOcr(show_ad=False)
|
|
@@ -39,45 +41,38 @@ async def async_fetch_and_ocr_captcha(url: str) -> Tuple[str, bytes]:
|
|
|
39
41
|
return result, img_bytes
|
|
40
42
|
|
|
41
43
|
|
|
42
|
-
def
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
44
|
+
async def get_image_text(image_path: str, captcha_type: int, api_key, secret_key: str) -> str:
|
|
45
|
+
if captcha_type == 0:
|
|
46
|
+
with open(image_path, "rb") as f:
|
|
47
|
+
img_bytes = f.read()
|
|
48
|
+
for _ in range(100):
|
|
49
|
+
text = _ocr.classification(img_bytes).strip()
|
|
50
|
+
if len(text) == 4:
|
|
51
|
+
return text
|
|
52
|
+
raise RuntimeError("ddddocr识别验证码失败")
|
|
53
|
+
else:
|
|
54
|
+
api = ImageContentOCR(api_key=api_key, secret_key=secret_key)
|
|
55
|
+
response = await api.get_access_token(is_end=False)
|
|
56
|
+
if not response.get("access_token"):
|
|
57
|
+
raise RuntimeError(f"获取百度API的认证Token失败,原因:{response}")
|
|
58
|
+
token = response.get("access_token")
|
|
59
|
+
response = await api.submit_request(
|
|
60
|
+
question='图片中的文字是什么,如果含有运算信息,请将运算结果返回。注意给我返回一个json格式数据包,例如:{"content":"xxxx", result: xxx}, 如果无运算信息,设置为空串就行',
|
|
61
|
+
image_path=image_path,
|
|
62
|
+
token=token,
|
|
63
|
+
is_end=False
|
|
64
|
+
)
|
|
65
|
+
task_id: str = response.get("result", dict()).get("task_id")
|
|
66
|
+
if not task_id:
|
|
67
|
+
raise RuntimeError(f"提交图片至百度API接口失败,原因:{response}")
|
|
68
|
+
await asyncio.sleep(delay=10)
|
|
69
|
+
response = await api.get_result(task_id=task_id, token=token, is_end=True)
|
|
70
|
+
if response.get("result").get("ret_code") == 0 and response.get("result").get("ret_msg") == "success":
|
|
71
|
+
description = response.get("result").get("description")
|
|
72
|
+
description = json.loads(description[description.find("{"):description.find("}") + 1])
|
|
73
|
+
if description.get("result"):
|
|
74
|
+
return str(description.get("result")).strip()
|
|
75
|
+
else:
|
|
76
|
+
return description.get("content").strip()
|
|
55
77
|
else:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
result = _ocr.classification(img_bytes)
|
|
59
|
-
return result
|
|
60
|
-
|
|
61
|
-
except Exception as e:
|
|
62
|
-
raise RuntimeError(f"OCR 识别失败: {e}")
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
async def get_image_text(page: Page, selector: str, timeout: float = 5.0) -> Tuple[bool, str]:
|
|
66
|
-
try:
|
|
67
|
-
# 找到 img
|
|
68
|
-
locator = page.locator(selector)
|
|
69
|
-
if locator:
|
|
70
|
-
img = await locator.element_handle(timeout=timeout * 1000)
|
|
71
|
-
|
|
72
|
-
# 直接截图获取原始图片字节,不刷新图片
|
|
73
|
-
img_bytes = await img.screenshot(timeout=timeout * 1000)
|
|
74
|
-
|
|
75
|
-
# OCR 识别
|
|
76
|
-
text = _ocr.classification(img_bytes)
|
|
77
|
-
return True, text.strip()
|
|
78
|
-
else:
|
|
79
|
-
return False, f'没有找到当前页面中的【{selector}】图片'
|
|
80
|
-
except PlaywrightTimeoutError:
|
|
81
|
-
return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
|
|
82
|
-
except Exception as e:
|
|
83
|
-
return False, f"检查元素时发生错误: {str(e)}"
|
|
78
|
+
raise RuntimeError(f"调用百度API,获取图片识别结果失败,原因{response}")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|