python-qlv-helper 0.6.0__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,11 +9,13 @@
9
9
  # Copyright ©2011-2025. Hunan xxxxxxx Company limited. All rights reserved.
10
10
  # ---------------------------------------------------------------------------------------------------------
11
11
  """
12
+ import json
13
+ import asyncio
12
14
  import ddddocr
13
15
  import requests
14
- from typing import Union, Tuple
16
+ from typing import Tuple
15
17
  from aiohttp import ClientSession
16
- from playwright.async_api import Page, TimeoutError as PlaywrightTimeoutError
18
+ from ocr_helper.core.baidu import ImageContentOCR
17
19
 
18
20
  # 复用 OCR 实例,不用每次都重新加载模型(更快)
19
21
  _ocr = ddddocr.DdddOcr(show_ad=False)
@@ -39,45 +41,38 @@ async def async_fetch_and_ocr_captcha(url: str) -> Tuple[str, bytes]:
39
41
  return result, img_bytes
40
42
 
41
43
 
42
- def recognize_captcha(image: Union[str, bytes]) -> str:
43
- """
44
- 识别验证码图片,返回识别文本。
45
- 参数:
46
- image: 图片路径 str,或图片的二进制 bytes
47
- 返回:
48
- 识别出的验证码字符串
49
- """
50
- try:
51
- # 如果是路径,读取文件
52
- if isinstance(image, str):
53
- with open(image, "rb") as f:
54
- img_bytes = f.read()
44
+ async def get_image_text(image_path: str, captcha_type: int, api_key, secret_key: str) -> str:
45
+ if captcha_type == 0:
46
+ with open(image_path, "rb") as f:
47
+ img_bytes = f.read()
48
+ for _ in range(100):
49
+ text = _ocr.classification(img_bytes).strip()
50
+ if len(text) == 4:
51
+ return text
52
+ raise RuntimeError("ddddocr识别验证码失败")
53
+ else:
54
+ api = ImageContentOCR(api_key=api_key, secret_key=secret_key)
55
+ response = await api.get_access_token(is_end=False)
56
+ if not response.get("access_token"):
57
+ raise RuntimeError(f"获取百度API的认证Token失败,原因:{response}")
58
+ token = response.get("access_token")
59
+ response = await api.submit_request(
60
+ question='图片中的文字是什么,如果含有运算信息,请将运算结果返回。注意给我返回一个json格式数据包,例如:{"content":"xxxx", result: xxx}, 如果无运算信息,设置为空串就行',
61
+ image_path=image_path,
62
+ token=token,
63
+ is_end=False
64
+ )
65
+ task_id: str = response.get("result", dict()).get("task_id")
66
+ if not task_id:
67
+ raise RuntimeError(f"提交图片至百度API接口失败,原因:{response}")
68
+ await asyncio.sleep(delay=10)
69
+ response = await api.get_result(task_id=task_id, token=token, is_end=True)
70
+ if response.get("result").get("ret_code") == 0 and response.get("result").get("ret_msg") == "success":
71
+ description = response.get("result").get("description")
72
+ description = json.loads(description[description.find("{"):description.find("}") + 1])
73
+ if description.get("result"):
74
+ return str(description.get("result")).strip()
75
+ else:
76
+ return description.get("content").strip()
55
77
  else:
56
- img_bytes = image
57
-
58
- result = _ocr.classification(img_bytes)
59
- return result
60
-
61
- except Exception as e:
62
- raise RuntimeError(f"OCR 识别失败: {e}")
63
-
64
-
65
- async def get_image_text(page: Page, selector: str, timeout: float = 5.0) -> Tuple[bool, str]:
66
- try:
67
- # 找到 img
68
- locator = page.locator(selector)
69
- if locator:
70
- img = await locator.element_handle(timeout=timeout * 1000)
71
-
72
- # 直接截图获取原始图片字节,不刷新图片
73
- img_bytes = await img.screenshot(timeout=timeout * 1000)
74
-
75
- # OCR 识别
76
- text = _ocr.classification(img_bytes)
77
- return True, text.strip()
78
- else:
79
- return False, f'没有找到当前页面中的【{selector}】图片'
80
- except PlaywrightTimeoutError:
81
- return False, f"元素 '{selector}' 未在 {timeout} 秒内找到"
82
- except Exception as e:
83
- return False, f"检查元素时发生错误: {str(e)}"
78
+ raise RuntimeError(f"调用百度API,获取图片识别结果失败,原因{response}")