PyPI - kotonebot - Versions diffs - 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

kotonebot 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

kotonebot/__init__.py +39 -39
kotonebot/backend/bot.py +312 -312
kotonebot/backend/color.py +525 -525
kotonebot/backend/context/__init__.py +3 -3
kotonebot/backend/context/task_action.py +183 -183
kotonebot/backend/core.py +129 -129
kotonebot/backend/debug/entry.py +89 -89
kotonebot/backend/debug/mock.py +78 -78
kotonebot/backend/debug/server.py +222 -222
kotonebot/backend/debug/vars.py +351 -351
kotonebot/backend/dispatch.py +227 -227
kotonebot/backend/flow_controller.py +196 -196
kotonebot/backend/ocr.py +535 -529
kotonebot/backend/preprocessor.py +103 -103
kotonebot/client/__init__.py +9 -9
kotonebot/client/device.py +528 -503
kotonebot/client/fast_screenshot.py +377 -377
kotonebot/client/host/__init__.py +43 -12
kotonebot/client/host/adb_common.py +107 -103
kotonebot/client/host/custom.py +118 -114
kotonebot/client/host/leidian_host.py +196 -201
kotonebot/client/host/mumu12_host.py +353 -358
kotonebot/client/host/protocol.py +214 -213
kotonebot/client/host/windows_common.py +58 -58
kotonebot/client/implements/__init__.py +71 -15
kotonebot/client/implements/adb.py +89 -85
kotonebot/client/implements/adb_raw.py +162 -158
kotonebot/client/implements/nemu_ipc/__init__.py +11 -7
kotonebot/client/implements/nemu_ipc/external_renderer_ipc.py +284 -284
kotonebot/client/implements/nemu_ipc/nemu_ipc.py +327 -327
kotonebot/client/implements/remote_windows.py +188 -188
kotonebot/client/implements/uiautomator2.py +85 -81
kotonebot/client/implements/windows.py +176 -172
kotonebot/client/protocol.py +69 -69
kotonebot/client/registration.py +24 -24
kotonebot/config/base_config.py +96 -96
kotonebot/config/manager.py +36 -36
kotonebot/errors.py +76 -71
kotonebot/interop/win/__init__.py +10 -3
kotonebot/interop/win/_mouse.py +311 -0
kotonebot/interop/win/message_box.py +313 -313
kotonebot/interop/win/reg.py +37 -37
kotonebot/interop/win/shortcut.py +43 -43
kotonebot/interop/win/task_dialog.py +513 -513
kotonebot/logging/__init__.py +2 -2
kotonebot/logging/log.py +17 -17
kotonebot/primitives/__init__.py +17 -17
kotonebot/primitives/geometry.py +862 -290
kotonebot/primitives/visual.py +63 -63
kotonebot/tools/mirror.py +354 -354
kotonebot/ui/file_host/sensio.py +36 -36
kotonebot/ui/file_host/tmp_send.py +54 -54
kotonebot/ui/pushkit/__init__.py +3 -3
kotonebot/ui/pushkit/image_host.py +88 -87
kotonebot/ui/pushkit/protocol.py +13 -13
kotonebot/ui/pushkit/wxpusher.py +54 -53
kotonebot/ui/user.py +148 -148
kotonebot/util.py +436 -436
{kotonebot-0.4.0.dist-info → kotonebot-0.5.0.dist-info}/METADATA +82 -81
kotonebot-0.5.0.dist-info/RECORD +71 -0
{kotonebot-0.4.0.dist-info → kotonebot-0.5.0.dist-info}/licenses/LICENSE +673 -673
kotonebot-0.4.0.dist-info/RECORD +0 -70
{kotonebot-0.4.0.dist-info → kotonebot-0.5.0.dist-info}/WHEEL +0 -0
{kotonebot-0.4.0.dist-info → kotonebot-0.5.0.dist-info}/top_level.txt +0 -0

kotonebot/backend/ocr.py CHANGED Viewed

@@ -1,529 +1,535 @@
-import re
-import time
-import logging
-import unicodedata
-from functools import lru_cache
-from dataclasses import dataclass
-import warnings
-from typing_extensions import Self, deprecated
-from typing import Callable, NamedTuple
-import cv2
-import numpy as np
-from cv2.typing import MatLike
-from thefuzz import fuzz as _fuzz
-from rapidocr_onnxruntime import RapidOCR
-from ..util import lf_path
-from ..primitives import Rect, Point
-from .core import HintBox, Image, unify_image
-from .debug import result as debug_result, debug
-logger = logging.getLogger(__name__)
-StringMatchFunction = Callable[[str], bool]
-REGEX_NUMBERS = re.compile(r'\d+')
-global_character_mapping: dict[str, str] = {
-    'ó': '6',
-    'ą': 'a',
-}
-"""
-全局字符映射表。某些字符可能在某些情况下被错误地识别，此时可以在这里添加映射。
-"""
-def sanitize_text(text: str) -> str:
-    """
-    对识别结果进行清理。此函数将被所有 OCR 引擎调用。
-    默认使用 `global_character_mapping` 中的映射数据进行清理。
-    可以重写此函数以实现自定义的清理逻辑。
-    """
-    for k, v in global_character_mapping.items():
-        text = text.replace(k, v)
-    return text
-@dataclass
-class OcrResult:
-    text: str
-    rect: Rect
-    confidence: float
-    original_rect: Rect
-    """
-    识别结果在原图中的区域坐标。
-    如果识别时没有设置 `rect` 或 `hint` 参数，则此属性值与 `rect` 相同。
-    """
-    def __repr__(self) -> str:
-        return f'OcrResult(text="{self.text}", rect={self.rect}, confidence={self.confidence})'
-    def replace(self, old: str, new: str, count: int = -1) -> Self:
-        """
-        替换识别结果中的文本。
-        """
-        self.text = self.text.replace(old, new, count)
-        return self
-    def regex(self, pattern: re.Pattern | str) -> list[str]:
-        """
-        提取识别结果中符合正则表达式的文本。
-        """
-        if isinstance(pattern, str):
-            pattern = re.compile(pattern)
-        return pattern.findall(self.text)
-    def numbers(self) -> list[int]:
-        """
-        提取识别结果中的数字。
-        """
-        return [int(x) for x in REGEX_NUMBERS.findall(self.text)]
-class OcrResultList(list[OcrResult]):
-    def squash(self, remove_newlines: bool = True) -> OcrResult:
-        """
-        将所有识别结果合并为一个大结果。
-        """
-        if not self:
-            return OcrResult('', Rect(0, 0, 0, 0), 0, Rect(0, 0, 0, 0))
-        text = [r.text for r in self]
-        confidence = sum(r.confidence for r in self) / len(self)
-        points = []
-        for r in self:
-            points.append(Point(r.rect.x1, r.rect.y1))
-            points.append(Point(r.rect.x1 + r.rect.w, r.rect.y1))
-            points.append(Point(r.rect.x1, r.rect.y1 + r.rect.h))
-            points.append(Point(r.rect.x1 + r.rect.w, r.rect.y1 + r.rect.h))
-        rect = Rect(xywh=bounding_box(points))
-        text = '\n'.join(text)
-        if remove_newlines:
-            text = text.replace('\n', '')
-        return OcrResult(
-            text=text,
-            rect=rect,
-            confidence=confidence,
-            original_rect=rect,
-        )
-    def first(self) -> OcrResult | None:
-        """
-        返回第一个识别结果。
-        """
-        return self[0] if self else None
-    def where(self, pattern: StringMatchFunction) -> 'OcrResultList':
-        """
-        返回符合条件的识别结果。
-        """
-        return OcrResultList([x for x in self if pattern(x.text)])
-class TextNotFoundError(Exception):
-    def __init__(self, pattern: str | re.Pattern | StringMatchFunction, image: 'MatLike'):
-        self.pattern = pattern
-        self.image = image
-        super().__init__(f"Expected text not found: {pattern}")
-class TextComparator:
-    def __init__(self, name: str, text: str, func: Callable[[str], bool]):
-        self.name = name
-        self.text = text
-        self.func = func
-    def __call__(self, text: str) -> bool:
-        return self.func(text)
-    def __repr__(self) -> str:
-        return f'{self.name}("{self.text}")'
-@deprecated("即将移除")
-@lru_cache(maxsize=1000)
-def fuzz(text: str) -> TextComparator:
-    """返回 fuzzy 算法的字符串匹配函数。"""
-    func = lambda s: _fuzz.ratio(s, text) > 90
-    return TextComparator("fuzzy", text, func)
-@lru_cache(maxsize=1000)
-def regex(regex: str) -> TextComparator:
-    """返回正则表达式字符串匹配函数。"""
-    func = lambda s: re.match(regex, s) is not None
-    return TextComparator("regex", regex, func)
-@lru_cache(maxsize=1000)
-def contains(text: str, *, ignore_case: bool = False) -> TextComparator:
-    """返回包含指定文本的函数。"""
-    if ignore_case:
-        func = lambda s: text.lower() in s.lower()
-    else:
-        func = lambda s: text in s
-    return TextComparator("contains", text, func)
-@lru_cache(maxsize=1000)
-def equals(
-    text: str,
-    *,
-    remove_space: bool = False,
-    ignore_case: bool = True,
-) -> TextComparator:
-    """
-    返回等于指定文本的函数。
-    :param text: 要比较的文本。
-    :param remove_space: 是否忽略空格。默认为 False。
-    :param ignore_case: 是否忽略大小写。默认为 True。
-    """
-    def compare(s: str) -> bool:
-        nonlocal text
-        if ignore_case:
-            text = text.lower()
-            s = s.lower()
-        if remove_space:
-            text = text.replace(' ', '').replace('　', '')
-            s = s.replace(' ', '').replace('　', '')
-        return text == s
-    return TextComparator("equals", text, compare)
-def grayscaled(img: 'MatLike | str | Image') -> MatLike:
-    img = unify_image(img)
-    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-def _is_match(text: str, pattern: re.Pattern | str | StringMatchFunction | TextComparator) -> bool:
-    if isinstance(pattern, re.Pattern):
-        return pattern.match(text) is not None
-    elif callable(pattern):
-        return pattern(text)
-    else:
-        return text == pattern
-# https://stackoverflow.com/questions/46335488/how-to-efficiently-find-the-bounding-box-of-a-collection-of-points
-def _bounding_box(points):
-    x_coordinates, y_coordinates = zip(*points)
-    return [(min(x_coordinates), min(y_coordinates)), (max(x_coordinates), max(y_coordinates))]
-def bounding_box(points: list[tuple[int, int]]) -> tuple[int, int, int, int]:
-    """
-    计算点集的外接矩形。
-    :param points: 点集。以左上角为原点，向下向右为正方向。
-    :return: 外接矩形的左上角坐标和宽高
-    """
-    topleft, bottomright = _bounding_box(points)
-    return (topleft[0], topleft[1], bottomright[0] - topleft[0], bottomright[1] - topleft[1])
-def pad_to(img: MatLike, target_size: int, rgb: tuple[int, int, int] = (255, 255, 255)) -> tuple[MatLike, tuple[int, int]]:
-    """
-    将图像居中填充到指定大小。缺少部分使用指定颜色填充。
-    :return: 填充后的图像和填充的偏移量 (x, y)。
-    """
-    h, w = img.shape[:2]
-    # 计算需要填充的宽高
-    pad_h = max(0, target_size - h)
-    pad_w = max(0, target_size - w)
-    # 如果不需要填充则直接返回
-    if pad_h == 0 and pad_w == 0:
-        return img, (0, 0)
-    # 创建目标画布并填充
-    if len(img.shape) == 2:
-        # 灰度图像
-        ret = np.full((h + pad_h, w + pad_w), rgb[0], dtype=np.uint8)
-    else:
-        # RGB图像
-        ret = np.full((h + pad_h, w + pad_w, 3), rgb, dtype=np.uint8)
-    # 将原图像居中放置
-    if len(img.shape) == 2:
-        ret[
-            pad_h // 2:pad_h // 2 + h,
-            pad_w // 2:pad_w // 2 + w] = img
-    else:
-        ret[
-            pad_h // 2:pad_h // 2 + h,
-            pad_w // 2:pad_w // 2 + w, :] = img
-    return ret, (pad_w // 2, pad_h // 2)
-def _draw_result(image: 'MatLike', result: list[OcrResult]) -> 'MatLike':
-    import numpy as np
-    from PIL import Image, ImageDraw, ImageFont
-    # 转换为PIL图像
-    result_image = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)
-    pil_image = Image.fromarray(result_image)
-    draw = ImageDraw.Draw(pil_image, 'RGBA')
-    # 加载字体
-    try:
-        font = ImageFont.truetype(lf_path('res/fonts/SourceHanSansHW-Regular.otf'), 16)
-    except:
-        font = ImageFont.load_default()
-    for r in result:
-        # 画矩形框
-        draw.rectangle(
-            [r.rect.x1, r.rect.y1, r.rect.x1 + r.rect.w, r.rect.y1 + r.rect.h],
-            outline=(255, 0, 0),
-            width=2
-        )
-        # 获取文本大小
-        text = r.text + f" ({r.confidence:.2f})"  # 添加置信度显示
-        text_bbox = draw.textbbox((0, 0), text, font=font)
-        text_width = text_bbox[2] - text_bbox[0]
-        text_height = text_bbox[3] - text_bbox[1]
-        # 计算文本位置
-        text_x = r.rect.x1
-        text_y = r.rect.y1 - text_height - 5 if r.rect.y1 > text_height + 5 else r.rect.y1 + r.rect.h + 5
-        # 添加padding
-        padding = 4
-        bg_rect = [
-            text_x - padding,
-            text_y - padding,
-            text_x + text_width + padding,
-            text_y + text_height + padding
-        ]
-        # 画半透明背景
-        draw.rectangle(
-            bg_rect,
-            fill=(0, 0, 0, 128)
-        )
-        # 画文字
-        draw.text(
-            (text_x, text_y),
-            text,
-            font=font,
-            fill=(255, 255, 255)
-        )
-    # 转回OpenCV格式
-    result_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
-    return result_image
-class Ocr:
-    def __init__(self, engine: RapidOCR):
-        self.__engine = engine
-    # TODO: 考虑缓存 OCR 结果，避免重复调用。
-    def ocr(
-        self,
-        img: 'MatLike',
-        *,
-        rect: Rect | None = None,
-        pad: bool = True,
-    ) -> OcrResultList:
-        """
-        OCR 一个 cv2 的图像。注意识别结果中的**全角字符会被转换为半角字符**。
-        :param rect: 如果指定，则只识别指定矩形区域。
-        :param pad:
-            是否将过小的图像（尺寸 < 631x631）的图像填充到 631x631。
-            默认为 True。
-            对于 PaddleOCR 模型，图片尺寸太小会降低准确率。
-            将图片周围填充放大，有助于提高准确率，降低耗时。
-        :return: 所有识别结果
-        """
-        if rect is not None:
-            x, y, w, h = rect.xywh
-            img = img[y:y+h, x:x+w]
-        original_img = img
-        if pad:
-            # TODO: 详细研究哪个尺寸最佳，以及背景颜色、图片位置是否对准确率与耗时有影响
-            # https://blog.csdn.net/YY007H/article/details/124973777
-            original_img = img.copy()
-            img, pos_in_padded_img = pad_to(img, 631)
-        else:
-            pos_in_padded_img = (0, 0)
-        img_content = img
-        result, elapse = self.__engine(img_content)
-        if result is None:
-            return OcrResultList()
-        ret = []
-        for r in result:
-            text = sanitize_text(r[1])
-            # r[0] = [左上, 右上, 右下, 左下]
-            # 这里有个坑，返回的点不一定是矩形，只能保证是四边形
-            # 所以这里需要计算出四个点的外接矩形
-            result_rect = tuple(int(x) for x in bounding_box(r[0])) # type: ignore
-            # result_rect (x, y, w, h)
-            if rect is not None:
-                original_rect = (
-                    result_rect[0] + rect.x1 - pos_in_padded_img[0],
-                    result_rect[1] + rect.y1 - pos_in_padded_img[1],
-                    result_rect[2],
-                    result_rect[3]
-                )
-            else:
-                original_rect = result_rect
-            if not len(original_rect) == 4:
-                raise ValueError(f'Invalid original_rect: {original_rect}')
-            if not len(result_rect) == 4:
-                raise ValueError(f'Invalid result_rect: {result_rect}')
-            confidence = float(r[2])
-            ret.append(OcrResult(
-                text=text,
-                rect=Rect(xywh=result_rect),
-                original_rect=Rect(xywh=original_rect),
-                confidence=confidence
-            ))
-        ret = OcrResultList(ret)
-        if debug.enabled:
-            result_image = _draw_result(img, ret)
-            elapse = elapse or [0, 0, 0]
-            debug_result(
-                'ocr',
-                [result_image, original_img],
-                f"pad={pad}\n" + \
-                f"rect={rect}\n" + \
-                f"elapsed: det={elapse[0]:.3f}s cls={elapse[1]:.3f}s rec={elapse[2]:.3f}s\n" + \
-                f"result: \n" + \
-                "<table class='result-table'><tr><th>Text</th><th>Confidence</th></tr>" + \
-                "\n".join([f"<tr><td>{r.text}</td><td>{r.confidence:.3f}</td></tr>" for r in ret]) + \
-                "</table>"
-            )
-        return ret
-    def find(
-        self,
-        img: 'MatLike',
-        text: str | re.Pattern | StringMatchFunction,
-        *,
-        hint: HintBox | None = None,
-        rect: Rect | None = None,
-        pad: bool = True,
-    ) -> OcrResult | None:
-        """
-        识别图像中的文本，并寻找满足指定要求的文本。
-        :param hint: 如果指定，则首先只识别 HintBox 范围内的文本，若未命中，再全局寻找。
-        :param rect: 如果指定，则只识别指定矩形区域。此参数优先级低于 `hint`。
-        :param pad: 见 `ocr` 的 `pad` 参数。
-        :return: 找到的文本，如果未找到则返回 None
-        """
-        if hint is not None:
-            warnings.warn("使用 `rect` 参数代替")
-            if ret := self.find(img, text, rect=Rect(xywh=hint.rect)):
-                logger.debug(f"find: {text} SUCCESS [hint={hint}]")
-                return ret
-            logger.debug(f"find: {text} FAILED [hint={hint}]")
-        start_time = time.time()
-        results = self.ocr(img, rect=rect, pad=pad)
-        end_time = time.time()
-        target = None
-        for result in results:
-            if _is_match(result.text, text):
-                target = result
-                break
-        logger.debug(
-            f"find: {text} {'SUCCESS' if target else 'FAILED'} " + \
-            f"[elapsed={end_time - start_time:.3f}s] [rect={rect}]"
-        )
-        return target
-    def find_all(
-        self,
-        img: 'MatLike',
-        texts: list[str | re.Pattern | StringMatchFunction],
-        *,
-        hint: HintBox | None = None,
-        rect: Rect | None = None,
-        pad: bool = True,
-    ) -> list[OcrResult | None]:
-        """
-        识别图像中的文本，并寻找多个满足指定要求的文本。
-        :return:
-            所有找到的文本，结果顺序与输入顺序相同。
-            若某个文本未找到，则该位置为 None。
-        """
-        # HintBox 处理
-        if hint is not None:
-            warnings.warn("使用 `rect` 参数代替")
-            result = self.find_all(img, texts, rect=Rect(xywh=hint.rect), pad=pad)
-            if all(result):
-                return result
-        ret: list[OcrResult | None] = []
-        ocr_results = self.ocr(img, rect=rect, pad=pad)
-        logger.debug(f"ocr_results: {ocr_results}")
-        for text in texts:
-            for result in ocr_results:
-                if _is_match(result.text, text):
-                    ret.append(result)
-                    break
-            else:
-                ret.append(None)
-        return ret
-    def expect(
-        self,
-        img: 'MatLike',
-        text: str | re.Pattern | StringMatchFunction,
-        *,
-        hint: HintBox | None = None,
-        rect: Rect | None = None,
-        pad: bool = True,
-    ) -> OcrResult:
-        """
-        识别图像中的文本，并寻找满足指定要求的文本。如果未找到则抛出异常。
-        :param hint: 如果指定，则首先只识别 HintBox 范围内的文本，若未命中，再全局寻找。
-        :param rect: 如果指定，则只识别指定矩形区域。此参数优先级高于 `hint`。
-        :param pad: 见 `ocr` 的 `pad` 参数。
-        :return: 找到的文本
-        """
-        ret = self.find(img, text, hint=hint, rect=rect, pad=pad)
-        if ret is None:
-            raise TextNotFoundError(text, img)
-        return ret
-# TODO: 这个路径需要能够独立设置
-_engine_jp: RapidOCR | None = None
-_engine_en: RapidOCR | None = RapidOCR(
-    rec_model_path=lf_path('models/en_PP-OCRv3_rec_infer.onnx'),
-    use_det=True,
-    use_cls=False,
-    use_rec=True,
-)
-def jp() -> Ocr:
-    """
-    日语 OCR 引擎。
-    """
-    global _engine_jp
-    if _engine_jp is None:
-        _engine_jp = RapidOCR(
-            rec_model_path=lf_path('models/japan_PP-OCRv3_rec_infer.onnx'),
-            use_det=True,
-            use_cls=False,
-            use_rec=True,
-        )
-    return Ocr(_engine_jp)
-def en() -> Ocr:
-    """
-    英语 OCR 引擎。
-    """
-    global _engine_en
-    if _engine_en is None:
-        _engine_en = RapidOCR(
-            rec_model_path=lf_path('models/en_PP-OCRv3_rec_infer.onnx'),
-            use_det=True,
-            use_cls=False,
-            use_rec=True,
-        )
-    return Ocr(_engine_en)
-if __name__ == '__main__':
-    pass
+import re
+import time
+import logging
+import unicodedata
+from functools import lru_cache
+from dataclasses import dataclass
+import warnings
+from typing_extensions import Self, deprecated
+from typing import Callable, NamedTuple
+import cv2
+import numpy as np
+from cv2.typing import MatLike
+from thefuzz import fuzz as _fuzz
+from rapidocr_onnxruntime import RapidOCR
+from ..util import lf_path
+from ..primitives import Rect, Point
+from .core import HintBox, Image, unify_image
+from .debug import result as debug_result, debug
+logger = logging.getLogger(__name__)
+StringMatchFunction = Callable[[str], bool]
+REGEX_NUMBERS = re.compile(r'\d+')
+global_character_mapping: dict[str, str] = {
+    'ó': '6',
+    'ą': 'a',
+}
+"""
+全局字符映射表。某些字符可能在某些情况下被错误地识别，此时可以在这里添加映射。
+"""
+def sanitize_text(text: str) -> str:
+    """
+    对识别结果进行清理。此函数将被所有 OCR 引擎调用。
+    默认行为为先将文本 `Unicode 规范化`_，然后使用 `global_character_mapping` 中的映射数据进行清理。
+    可以重写此函数以实现自定义的清理逻辑。
+    .. note::
+        Unicode 规范化最常见的一个行为是将全角字符转换为半角字符。
+    .. _Unicode 规范化: https://docs.python.org/zh-cn/3.14/library/unicodedata.html#unicodedata.normalize
+    """
+    text = unicodedata.normalize('NFKC', text)
+    for k, v in global_character_mapping.items():
+        text = text.replace(k, v)
+    return text
+@dataclass
+class OcrResult:
+    text: str
+    rect: Rect
+    confidence: float
+    original_rect: Rect
+    """
+    识别结果在原图中的区域坐标。
+    如果识别时没有设置 `rect` 或 `hint` 参数，则此属性值与 `rect` 相同。
+    """
+    def __repr__(self) -> str:
+        return f'OcrResult(text="{self.text}", rect={self.rect}, confidence={self.confidence})'
+    def replace(self, old: str, new: str, count: int = -1) -> Self:
+        """
+        替换识别结果中的文本。
+        """
+        self.text = self.text.replace(old, new, count)
+        return self
+    def regex(self, pattern: re.Pattern | str) -> list[str]:
+        """
+        提取识别结果中符合正则表达式的文本。
+        """
+        if isinstance(pattern, str):
+            pattern = re.compile(pattern)
+        return pattern.findall(self.text)
+    def numbers(self) -> list[int]:
+        """
+        提取识别结果中的数字。
+        """
+        return [int(x) for x in REGEX_NUMBERS.findall(self.text)]
+class OcrResultList(list[OcrResult]):
+    def squash(self, remove_newlines: bool = True) -> OcrResult:
+        """
+        将所有识别结果合并为一个大结果。
+        """
+        if not self:
+            return OcrResult('', Rect(0, 0, 0, 0), 0, Rect(0, 0, 0, 0))
+        text = [r.text for r in self]
+        confidence = sum(r.confidence for r in self) / len(self)
+        points = []
+        for r in self:
+            points.append(Point(r.rect.x1, r.rect.y1))
+            points.append(Point(r.rect.x1 + r.rect.w, r.rect.y1))
+            points.append(Point(r.rect.x1, r.rect.y1 + r.rect.h))
+            points.append(Point(r.rect.x1 + r.rect.w, r.rect.y1 + r.rect.h))
+        rect = Rect(xywh=bounding_box(points))
+        text = '\n'.join(text)
+        if remove_newlines:
+            text = text.replace('\n', '')
+        return OcrResult(
+            text=text,
+            rect=rect,
+            confidence=confidence,
+            original_rect=rect,
+        )
+    def first(self) -> OcrResult | None:
+        """
+        返回第一个识别结果。
+        """
+        return self[0] if self else None
+    def where(self, pattern: StringMatchFunction) -> 'OcrResultList':
+        """
+        返回符合条件的识别结果。
+        """
+        return OcrResultList([x for x in self if pattern(x.text)])
+class TextNotFoundError(Exception):
+    def __init__(self, pattern: str | re.Pattern | StringMatchFunction, image: 'MatLike'):
+        self.pattern = pattern
+        self.image = image
+        super().__init__(f"Expected text not found: {pattern}")
+class TextComparator:
+    def __init__(self, name: str, text: str, func: Callable[[str], bool]):
+        self.name = name
+        self.text = text
+        self.func = func
+    def __call__(self, text: str) -> bool:
+        return self.func(text)
+    def __repr__(self) -> str:
+        return f'{self.name}("{self.text}")'
+@deprecated("即将移除")
+@lru_cache(maxsize=1000)
+def fuzz(text: str) -> TextComparator:
+    """返回 fuzzy 算法的字符串匹配函数。"""
+    func = lambda s: _fuzz.ratio(s, text) > 90
+    return TextComparator("fuzzy", text, func)
+@lru_cache(maxsize=1000)
+def regex(regex: str) -> TextComparator:
+    """返回正则表达式字符串匹配函数。"""
+    func = lambda s: re.match(regex, s) is not None
+    return TextComparator("regex", regex, func)
+@lru_cache(maxsize=1000)
+def contains(text: str, *, ignore_case: bool = False) -> TextComparator:
+    """返回包含指定文本的函数。"""
+    if ignore_case:
+        func = lambda s: text.lower() in s.lower()
+    else:
+        func = lambda s: text in s
+    return TextComparator("contains", text, func)
+@lru_cache(maxsize=1000)
+def equals(
+    text: str,
+    *,
+    remove_space: bool = False,
+    ignore_case: bool = True,
+) -> TextComparator:
+    """
+    返回等于指定文本的函数。
+    :param text: 要比较的文本。
+    :param remove_space: 是否忽略空格。默认为 False。
+    :param ignore_case: 是否忽略大小写。默认为 True。
+    """
+    def compare(s: str) -> bool:
+        nonlocal text
+        if ignore_case:
+            text = text.lower()
+            s = s.lower()
+        if remove_space:
+            text = text.replace(' ', '').replace('　', '')
+            s = s.replace(' ', '').replace('　', '')
+        return text == s
+    return TextComparator("equals", text, compare)
+def grayscaled(img: 'MatLike | str | Image') -> MatLike:
+    img = unify_image(img)
+    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+def _is_match(text: str, pattern: re.Pattern | str | StringMatchFunction | TextComparator) -> bool:
+    if isinstance(pattern, re.Pattern):
+        return pattern.match(text) is not None
+    elif callable(pattern):
+        return pattern(text)
+    else:
+        return text == pattern
+# https://stackoverflow.com/questions/46335488/how-to-efficiently-find-the-bounding-box-of-a-collection-of-points
+def _bounding_box(points):
+    x_coordinates, y_coordinates = zip(*points)
+    return [(min(x_coordinates), min(y_coordinates)), (max(x_coordinates), max(y_coordinates))]
+def bounding_box(points: list[tuple[int, int]]) -> tuple[int, int, int, int]:
+    """
+    计算点集的外接矩形。
+    :param points: 点集。以左上角为原点，向下向右为正方向。
+    :return: 外接矩形的左上角坐标和宽高
+    """
+    topleft, bottomright = _bounding_box(points)
+    return (topleft[0], topleft[1], bottomright[0] - topleft[0], bottomright[1] - topleft[1])
+def pad_to(img: MatLike, target_size: int, rgb: tuple[int, int, int] = (255, 255, 255)) -> tuple[MatLike, tuple[int, int]]:
+    """
+    将图像居中填充到指定大小。缺少部分使用指定颜色填充。
+    :return: 填充后的图像和填充的偏移量 (x, y)。
+    """
+    h, w = img.shape[:2]
+    # 计算需要填充的宽高
+    pad_h = max(0, target_size - h)
+    pad_w = max(0, target_size - w)
+    # 如果不需要填充则直接返回
+    if pad_h == 0 and pad_w == 0:
+        return img, (0, 0)
+    # 创建目标画布并填充
+    if len(img.shape) == 2:
+        # 灰度图像
+        ret = np.full((h + pad_h, w + pad_w), rgb[0], dtype=np.uint8)
+    else:
+        # RGB图像
+        ret = np.full((h + pad_h, w + pad_w, 3), rgb, dtype=np.uint8)
+    # 将原图像居中放置
+    if len(img.shape) == 2:
+        ret[
+            pad_h // 2:pad_h // 2 + h,
+            pad_w // 2:pad_w // 2 + w] = img
+    else:
+        ret[
+            pad_h // 2:pad_h // 2 + h,
+            pad_w // 2:pad_w // 2 + w, :] = img
+    return ret, (pad_w // 2, pad_h // 2)
+def _draw_result(image: 'MatLike', result: list[OcrResult]) -> 'MatLike':
+    import numpy as np
+    from PIL import Image, ImageDraw, ImageFont
+    # 转换为PIL图像
+    result_image = cv2.cvtColor(image.copy(), cv2.COLOR_BGR2RGB)
+    pil_image = Image.fromarray(result_image)
+    draw = ImageDraw.Draw(pil_image, 'RGBA')
+    # 加载字体
+    try:
+        font = ImageFont.truetype(lf_path('res/fonts/SourceHanSansHW-Regular.otf'), 16)
+    except:
+        font = ImageFont.load_default()
+    for r in result:
+        # 画矩形框
+        draw.rectangle(
+            [r.rect.x1, r.rect.y1, r.rect.x1 + r.rect.w, r.rect.y1 + r.rect.h],
+            outline=(255, 0, 0),
+            width=2
+        )
+        # 获取文本大小
+        text = r.text + f" ({r.confidence:.2f})"  # 添加置信度显示
+        text_bbox = draw.textbbox((0, 0), text, font=font)
+        text_width = text_bbox[2] - text_bbox[0]
+        text_height = text_bbox[3] - text_bbox[1]
+        # 计算文本位置
+        text_x = r.rect.x1
+        text_y = r.rect.y1 - text_height - 5 if r.rect.y1 > text_height + 5 else r.rect.y1 + r.rect.h + 5
+        # 添加padding
+        padding = 4
+        bg_rect = [
+            text_x - padding,
+            text_y - padding,
+            text_x + text_width + padding,
+            text_y + text_height + padding
+        ]
+        # 画半透明背景
+        draw.rectangle(
+            bg_rect,
+            fill=(0, 0, 0, 128)
+        )
+        # 画文字
+        draw.text(
+            (text_x, text_y),
+            text,
+            font=font,
+            fill=(255, 255, 255)
+        )
+    # 转回OpenCV格式
+    result_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+    return result_image
+class Ocr:
+    def __init__(self, engine: RapidOCR):
+        self.__engine = engine
+    # TODO: 考虑缓存 OCR 结果，避免重复调用。
+    def ocr(
+        self,
+        img: 'MatLike',
+        *,
+        rect: Rect | None = None,
+        pad: bool = True,
+    ) -> OcrResultList:
+        """
+        OCR 一个 cv2 的图像。注意识别结果中的**全角字符会被转换为半角字符**。
+        :param rect: 如果指定，则只识别指定矩形区域。
+        :param pad:
+            是否将过小的图像（尺寸 < 631x631）的图像填充到 631x631。
+            默认为 True。
+            对于 PaddleOCR 模型，图片尺寸太小会降低准确率。
+            将图片周围填充放大，有助于提高准确率，降低耗时。
+        :return: 所有识别结果
+        """
+        if rect is not None:
+            x, y, w, h = rect.xywh
+            img = img[y:y+h, x:x+w]
+        original_img = img
+        if pad:
+            # TODO: 详细研究哪个尺寸最佳，以及背景颜色、图片位置是否对准确率与耗时有影响
+            # https://blog.csdn.net/YY007H/article/details/124973777
+            original_img = img.copy()
+            img, pos_in_padded_img = pad_to(img, 631)
+        else:
+            pos_in_padded_img = (0, 0)
+        img_content = img
+        result, elapse = self.__engine(img_content)
+        if result is None:
+            return OcrResultList()
+        ret = []
+        for r in result:
+            text = sanitize_text(r[1])
+            # r[0] = [左上, 右上, 右下, 左下]
+            # 这里有个坑，返回的点不一定是矩形，只能保证是四边形
+            # 所以这里需要计算出四个点的外接矩形
+            result_rect = tuple(int(x) for x in bounding_box(r[0])) # type: ignore
+            # result_rect (x, y, w, h)
+            if rect is not None:
+                original_rect = (
+                    result_rect[0] + rect.x1 - pos_in_padded_img[0],
+                    result_rect[1] + rect.y1 - pos_in_padded_img[1],
+                    result_rect[2],
+                    result_rect[3]
+                )
+            else:
+                original_rect = result_rect
+            if not len(original_rect) == 4:
+                raise ValueError(f'Invalid original_rect: {original_rect}')
+            if not len(result_rect) == 4:
+                raise ValueError(f'Invalid result_rect: {result_rect}')
+            confidence = float(r[2])
+            ret.append(OcrResult(
+                text=text,
+                rect=Rect(xywh=result_rect),
+                original_rect=Rect(xywh=original_rect),
+                confidence=confidence
+            ))
+        ret = OcrResultList(ret)
+        if debug.enabled:
+            result_image = _draw_result(img, ret)
+            elapse = elapse or [0, 0, 0]
+            debug_result(
+                'ocr',
+                [result_image, original_img],
+                f"pad={pad}\n" + \
+                f"rect={rect}\n" + \
+                f"elapsed: det={elapse[0]:.3f}s cls={elapse[1]:.3f}s rec={elapse[2]:.3f}s\n" + \
+                f"result: \n" + \
+                "<table class='result-table'><tr><th>Text</th><th>Confidence</th></tr>" + \
+                "\n".join([f"<tr><td>{r.text}</td><td>{r.confidence:.3f}</td></tr>" for r in ret]) + \
+                "</table>"
+            )
+        return ret
+    def find(
+        self,
+        img: 'MatLike',
+        text: str | re.Pattern | StringMatchFunction,
+        *,
+        hint: HintBox | None = None,
+        rect: Rect | None = None,
+        pad: bool = True,
+    ) -> OcrResult | None:
+        """
+        识别图像中的文本，并寻找满足指定要求的文本。
+        :param hint: 如果指定，则首先只识别 HintBox 范围内的文本，若未命中，再全局寻找。
+        :param rect: 如果指定，则只识别指定矩形区域。此参数优先级低于 `hint`。
+        :param pad: 见 `ocr` 的 `pad` 参数。
+        :return: 找到的文本，如果未找到则返回 None
+        """
+        if hint is not None:
+            warnings.warn("使用 `rect` 参数代替")
+            if ret := self.find(img, text, rect=Rect(xywh=hint.rect)):
+                logger.debug(f"find: {text} SUCCESS [hint={hint}]")
+                return ret
+            logger.debug(f"find: {text} FAILED [hint={hint}]")
+        start_time = time.time()
+        results = self.ocr(img, rect=rect, pad=pad)
+        end_time = time.time()
+        target = None
+        for result in results:
+            if _is_match(result.text, text):
+                target = result
+                break
+        logger.debug(
+            f"find: {text} {'SUCCESS' if target else 'FAILED'} " + \
+            f"[elapsed={end_time - start_time:.3f}s] [rect={rect}]"
+        )
+        return target
+    def find_all(
+        self,
+        img: 'MatLike',
+        texts: list[str | re.Pattern | StringMatchFunction],
+        *,
+        hint: HintBox | None = None,
+        rect: Rect | None = None,
+        pad: bool = True,
+    ) -> list[OcrResult | None]:
+        """
+        识别图像中的文本，并寻找多个满足指定要求的文本。
+        :return:
+            所有找到的文本，结果顺序与输入顺序相同。
+            若某个文本未找到，则该位置为 None。
+        """
+        # HintBox 处理
+        if hint is not None:
+            warnings.warn("使用 `rect` 参数代替")
+            result = self.find_all(img, texts, rect=Rect(xywh=hint.rect), pad=pad)
+            if all(result):
+                return result
+        ret: list[OcrResult | None] = []
+        ocr_results = self.ocr(img, rect=rect, pad=pad)
+        logger.debug(f"ocr_results: {ocr_results}")
+        for text in texts:
+            for result in ocr_results:
+                if _is_match(result.text, text):
+                    ret.append(result)
+                    break
+            else:
+                ret.append(None)
+        return ret
+    def expect(
+        self,
+        img: 'MatLike',
+        text: str | re.Pattern | StringMatchFunction,
+        *,
+        hint: HintBox | None = None,
+        rect: Rect | None = None,
+        pad: bool = True,
+    ) -> OcrResult:
+        """
+        识别图像中的文本，并寻找满足指定要求的文本。如果未找到则抛出异常。
+        :param hint: 如果指定，则首先只识别 HintBox 范围内的文本，若未命中，再全局寻找。
+        :param rect: 如果指定，则只识别指定矩形区域。此参数优先级高于 `hint`。
+        :param pad: 见 `ocr` 的 `pad` 参数。
+        :return: 找到的文本
+        """
+        ret = self.find(img, text, hint=hint, rect=rect, pad=pad)
+        if ret is None:
+            raise TextNotFoundError(text, img)
+        return ret
+# TODO: 这个路径需要能够独立设置
+_engine_jp: RapidOCR | None = None
+_engine_en: RapidOCR | None = RapidOCR(
+    rec_model_path=lf_path('models/en_PP-OCRv3_rec_infer.onnx'),
+    use_det=True,
+    use_cls=False,
+    use_rec=True,
+)
+def jp() -> Ocr:
+    """
+    日语 OCR 引擎。
+    """
+    global _engine_jp
+    if _engine_jp is None:
+        _engine_jp = RapidOCR(
+            rec_model_path=lf_path('models/japan_PP-OCRv3_rec_infer.onnx'),
+            use_det=True,
+            use_cls=False,
+            use_rec=True,
+        )
+    return Ocr(_engine_jp)
+def en() -> Ocr:
+    """
+    英语 OCR 引擎。
+    """
+    global _engine_en
+    if _engine_en is None:
+        _engine_en = RapidOCR(
+            rec_model_path=lf_path('models/en_PP-OCRv3_rec_infer.onnx'),
+            use_det=True,
+            use_cls=False,
+            use_rec=True,
+        )
+    return Ocr(_engine_en)
+if __name__ == '__main__':
+    pass

kotonebot 0.4.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

kotonebot 0.4.0py3-none-any.whl → 0.5.0py3-none-any.whl