PyPI - screenshot-vision-algorithm - Versions diffs - 0.3.0__py3-none-any.whl - Mend

screenshot-vision-algorithm 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

screenshot_vision_algorithm/android/wechat/algorithms/title_ocr.py ADDED Viewed

@@ -0,0 +1,143 @@
+"""Best-effort OCR for the WeChat group-chat title between 「<」 and 「⋯」."""
+from __future__ import annotations
+import unicodedata
+from typing import Optional
+import cv2  # type: ignore
+import numpy as np  # type: ignore
+from loguru import logger
+from screenshot_vision_algorithm.android.wechat.algorithms.template_matching import (
+    detect_chat_back_chevron,
+    detect_chat_title_more_dots,
+)
+_OCR_SINGLETON = None
+_OCR_VERSION_MAJOR: Optional[int] = None
+_TITLE_MAX_CHARS = 120
+def _title_bar_y_band(screen_h: int, scale_w: float) -> tuple[int, int]:
+    sw = max(scale_w, 1e-6)
+    y1 = int(round(108 * sw))
+    y2 = int(round(220 * sw))
+    y1 = max(0, min(max(2, screen_h) - 2, y1))
+    y2 = max(y1 + 40, min(screen_h, y2))
+    return y1, y2
+def _get_ocr():
+    global _OCR_SINGLETON, _OCR_VERSION_MAJOR
+    if _OCR_SINGLETON is not None:
+        return _OCR_SINGLETON, _OCR_VERSION_MAJOR
+    try:
+        from paddleocr import PaddleOCR  # type: ignore
+        import paddleocr as _pkg  # type: ignore
+    except ImportError as e:
+        raise RuntimeError(
+            "chat title OCR requires paddleocr; pip install paddleocr",
+        ) from e
+    try:
+        ver_major = int(str(getattr(_pkg, "__version__", "2.0.0")).split(".")[0])
+    except Exception:  # pragma: no cover
+        ver_major = 2
+    logger.info("chat_title_ocr: initialising PaddleOCR v%d (once-per-process)", ver_major)
+    if ver_major >= 3:
+        ocr = PaddleOCR(
+            use_textline_orientation=False,
+            lang="ch",
+            device="cpu",
+            text_detection_model_name="PP-OCRv5_mobile_det",
+            text_recognition_model_name="PP-OCRv5_mobile_rec",
+        )
+    else:  # pragma: no cover
+        ocr = PaddleOCR(use_angle_cls=False, lang="ch", use_gpu=False)
+    _OCR_SINGLETON = ocr
+    _OCR_VERSION_MAJOR = ver_major
+    return ocr, ver_major
+def try_ocr_group_chat_title_from_png(
+    png_bytes: bytes,
+    *,
+    scale_w: float,
+) -> Optional[str]:
+    """Return NFC-stripped title text, or ``None`` if anchors/OCR unavailable."""
+    if not png_bytes:
+        return None
+    try:
+        arr = np.asarray(bytearray(png_bytes), dtype=np.uint8)
+        screen_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
+    except Exception:
+        return None
+    if screen_bgr is None or screen_bgr.size == 0:
+        return None
+    back = detect_chat_back_chevron(screen_bgr, scale_w)
+    more = detect_chat_title_more_dots(screen_bgr, scale_w)
+    if back is None or more is None:
+        return None
+    sw = max(scale_w, 1e-6)
+    pad = max(4, int(round(6 * sw)))
+    h, w_full = screen_bgr.shape[:2]
+    x1 = min(back.x + back.w + pad, w_full - 1)
+    x2 = max(more.x - pad, 0)
+    if x2 <= x1 + 4:
+        return None
+    y1, y2 = _title_bar_y_band(h, scale_w)
+    crop = screen_bgr[y1:y2, x1:x2]
+    if crop.size == 0 or crop.shape[0] < 8 or crop.shape[1] < 16:
+        return None
+    try:
+        ocr, ver_major = _get_ocr()
+    except RuntimeError as e:
+        logger.debug("%s", e)
+        return None
+    upscaled = cv2.resize(
+        crop, None, fx=1.75, fy=1.75, interpolation=cv2.INTER_CUBIC,
+    )
+    texts: list[str] = []
+    scores: list[float] = []
+    try:
+        if ver_major >= 3:
+            results = list(ocr.predict(upscaled))
+            if results:
+                r = results[0]
+                texts = list(r.get("rec_texts", []))
+                scores = [float(s) for s in r.get("rec_scores", [])]
+        else:  # pragma: no cover
+            raw = ocr.ocr(upscaled, cls=False)
+            if raw and raw[0]:
+                texts = [line[1][0] for line in raw[0]]
+                scores = [float(line[1][1]) for line in raw[0]]
+    except Exception as exc:  # pragma: no cover
+        logger.debug("chat_title_ocr predict failed: %s", exc)
+        return None
+    if not texts:
+        return None
+    # Prefer the highest-confidence line; merge multi-line titles rarely needed.
+    best_i = max(range(len(texts)), key=lambda i: scores[i] if i < len(scores) else 0.0)
+    raw_title = texts[best_i].strip()
+    if not raw_title:
+        joined = "".join(t.strip() for t in texts if t and t.strip())
+        raw_title = joined.strip()
+    if not raw_title or len(raw_title) > _TITLE_MAX_CHARS:
+        return None
+    return unicodedata.normalize("NFC", raw_title)
+__all__ = ["try_ocr_group_chat_title_from_png"]

screenshot_vision_algorithm/android/wechat/merge/__init__.py ADDED Viewed

File without changes

screenshot_vision_algorithm/android/wechat/merge/multipage.py ADDED Viewed

@@ -0,0 +1,157 @@
+"""
+多页 OCR 文本合并（待办 §4 #6 / d5-resume-ocr-multipage + PRD 九§8）。
+分页截图竖滑时，相邻页 OCR 结果常在边界重复若干行或重复同一段字符。合并时去掉
+「上一页尾部 vs 下一页头部」的重复，降低后续拆分/结构化噪声。
+**仅**实现 PRD §8（2）步骤（``_merge_two_pages_prd``）：尾窗 M、N 精确比对与降级、
+A 侧最下锚点、延伸块 ``Levenshtein.ratio``、B 侧最上合格锚点；（B）半角空格衔接；
+（C）不合并为 **同一页** 时 **不**做半角空格硬接，保留 **两页** 并以 **页间分割线** 分隔（与 §8（1）1）。
+不再提供历史 ``overlap`` 分支。
+多页折叠：始终用 **当前序列最后一页** 与 **下一截图页 OCR 全文** 做 pair，（C）时在
+序列末尾追加新 **页**；最后 ``join`` 为单份画布字符串。
+"""
+from __future__ import annotations
+from Levenshtein import ratio as levenshtein_ratio
+# PRD §8（1）1 / （C）：**页间**分割线字面（常量名与 PRD ``WX_MATCH_OCR_PAGE_DIVIDER`` 一致）。
+WX_MATCH_OCR_PAGE_DIVIDER = "\n<<<WX_MATCH_OCR_PAGE_DIVIDER>>>\n"
+def paragraph_content_bounds(merged: str, anchor: int) -> tuple[int, int]:
+    """``anchor`` 落在的 PRD **页**（截图级画布；由 :data:`WX_MATCH_OCR_PAGE_DIVIDER` 分隔）的 ``[lo, hi)``。"""
+    if not merged:
+        return (0, 0)
+    d = WX_MATCH_OCR_PAGE_DIVIDER
+    a = max(0, min(int(anchor), len(merged)))
+    if d not in merged:
+        return (0, len(merged))
+    lo = 0
+    while lo <= len(merged):
+        nxt = merged.find(d, lo)
+        if nxt < 0:
+            return (lo, len(merged))
+        if a < nxt:
+            return (lo, nxt)
+        if a < nxt + len(d):
+            lo = nxt + len(d)
+            continue
+        lo = nxt + len(d)
+    return (0, len(merged))
+def _prd_edit_similarity(a: str, b: str) -> float:
+    """PRD §8(A)：延伸块 1‑Q 与 2‑Q/… 的相似度（``python-Levenshtein.ratio``）。"""
+    if not a and not b:
+        return 1.0
+    if not a or not b:
+        return 0.0
+    return float(levenshtein_ratio(a, b))
+def _substring_occurrences(haystack: str, needle: str) -> list[int]:
+    if not needle or len(needle) > len(haystack):
+        return []
+    out: list[int] = []
+    start = 0
+    while True:
+        i = haystack.find(needle, start)
+        if i < 0:
+            break
+        out.append(i)
+        start = i + 1
+    return out
+def _merge_two_pages_prd(
+    prev: str,
+    nxt: str,
+    *,
+    tail_m: int = 300,
+    ngram_candidates: tuple[int, ...] = (20, 15, 12, 8),
+    similarity_threshold: float = 0.8,
+) -> str | tuple[str, str]:
+    """PRD §8 相邻两截图 **页**：成功则为 **单页** 连续串；（C）返回 ``(prev, nxt)`` 两 **页** 不合并。"""
+    pa = prev
+    pb = nxt
+    if not pb or not pb.strip():
+        return pa
+    if not pa or not pa.strip():
+        return pb
+    tail_start = max(0, len(pa) - int(tail_m))
+    thr = float(similarity_threshold)
+    for ng in ngram_candidates:
+        if ng > len(pa) or ng > len(pb):
+            continue
+        s_star: int | None = None
+        for s in range(tail_start, len(pa) - ng + 1):
+            anchor = pa[s : s + ng]
+            if _substring_occurrences(pb, anchor):
+                s_star = s
+        if s_star is None:
+            continue
+        blob_1q = pa[s_star:]
+        l1q = len(blob_1q)
+        anchor_star = pa[s_star : s_star + ng]
+        js = _substring_occurrences(pb, anchor_star)
+        good_js: list[int] = []
+        for j in js:
+            if j + l1q > len(pb):
+                continue
+            seg_b = pb[j : j + l1q]
+            if _prd_edit_similarity(blob_1q, seg_b) >= thr:
+                good_js.append(j)
+        if not good_js:
+            continue
+        j_pick = min(good_js)
+        cut = j_pick + l1q
+        suffix_b = pb[cut:].lstrip("\n").lstrip("\r")
+        out = pa.rstrip("\n").rstrip("\r")
+        if suffix_b:
+            return out + " " + suffix_b
+        return out
+    return (pa, pb)
+def merge_multipage_ocr_texts(
+    pages: list[str],
+    *,
+    prd_tail_m: int = 300,
+    prd_similarity: float = 0.8,
+) -> str:
+    """顺序合并多页 OCR（PRD §8 n‑gram；（C）时 **页间** 用 ``WX_MATCH_OCR_PAGE_DIVIDER``）。
+    折叠时只对 **列表最后一项**（当前尾 **页** 的纯 OCR；若前几页曾 (B) 成功，则该项已是多图并成的**单串**）
+    与 **下一张截图全文** 做 ``_merge_two_pages_prd``，**不**把「已含页间分割线的整段 acc」当作 ``prev``：
+    否则尾窗 *M* 与 N‑gram 会扫到 ``WX_MATCH_OCR_PAGE_DIVIDER`` 或更早页的正文，违背 PRD「A=上一截图页、B=下一截图页」的相邻定义。
+    """
+    texts = [p for p in pages if p and p.strip()]
+    if not texts:
+        return ""
+    if len(texts) == 1:
+        return texts[0]
+    # Maintain list of page OCR blobs—never join with PAGE_DIVIDER into ``prev`` before pairing.
+    paras: list[str] = [texts[0]]
+    for page in texts[1:]:
+        tail = paras[-1]
+        r = _merge_two_pages_prd(
+            tail,
+            page,
+            tail_m=prd_tail_m,
+            similarity_threshold=prd_similarity,
+        )
+        if isinstance(r, tuple):
+            paras[-1] = r[0]
+            paras.append(r[1])
+        else:
+            paras[-1] = r
+    return WX_MATCH_OCR_PAGE_DIVIDER.join(paras)

screenshot_vision_algorithm/android/wechat/ocr/__init__.py ADDED Viewed

File without changes