screenshot-vision-algorithm 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. screenshot_vision_algorithm/__init__.py +48 -0
  2. screenshot_vision_algorithm/_config.py +61 -0
  3. screenshot_vision_algorithm/android/__init__.py +1 -0
  4. screenshot_vision_algorithm/android/wechat/__init__.py +1 -0
  5. screenshot_vision_algorithm/android/wechat/algorithms/__init__.py +0 -0
  6. screenshot_vision_algorithm/android/wechat/algorithms/avatar_column.py +209 -0
  7. screenshot_vision_algorithm/android/wechat/algorithms/badge_detection.py +275 -0
  8. screenshot_vision_algorithm/android/wechat/algorithms/card_bbox.py +1000 -0
  9. screenshot_vision_algorithm/android/wechat/algorithms/phash_utils.py +267 -0
  10. screenshot_vision_algorithm/android/wechat/algorithms/speaker_band.py +290 -0
  11. screenshot_vision_algorithm/android/wechat/algorithms/template_matching.py +2163 -0
  12. screenshot_vision_algorithm/android/wechat/algorithms/title_ocr.py +143 -0
  13. screenshot_vision_algorithm/android/wechat/merge/__init__.py +0 -0
  14. screenshot_vision_algorithm/android/wechat/merge/multipage.py +157 -0
  15. screenshot_vision_algorithm/android/wechat/ocr/__init__.py +0 -0
  16. screenshot_vision_algorithm/android/wechat/ocr/avatar_guard.py +434 -0
  17. screenshot_vision_algorithm/android/wechat/ocr/badge_ocr.py +232 -0
  18. screenshot_vision_algorithm/android/wechat/ocr/nickname_binding.py +1888 -0
  19. screenshot_vision_algorithm/android/wechat/ocr/text_ocr_adapter.py +625 -0
  20. screenshot_vision_algorithm/android/wechat/profiles/__init__.py +0 -0
  21. screenshot_vision_algorithm/android/wechat/profiles/android.py +53 -0
  22. screenshot_vision_algorithm/android/wechat/profiles/harmony.py +10 -0
  23. screenshot_vision_algorithm/android/wechat/profiles/ios.py +53 -0
  24. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_back_chevron.png +0 -0
  25. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_emoji_smile.png +0 -0
  26. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_plus.png +0 -0
  27. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_voice.png +0 -0
  28. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_title_more_dots.png +0 -0
  29. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/favorite_label.png +0 -0
  30. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/new_messages_hint_suffix.png +0 -0
  31. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint.png +0 -0
  32. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint_v2_textonly.png +0 -0
  33. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/wechat_note_header.png +0 -0
  34. screenshot_vision_algorithm/android/xhs/__init__.py +4 -0
  35. screenshot_vision_algorithm/android/zhihu/__init__.py +4 -0
  36. screenshot_vision_algorithm/png_utils.py +86 -0
  37. screenshot_vision_algorithm-0.3.0.dist-info/METADATA +425 -0
  38. screenshot_vision_algorithm-0.3.0.dist-info/RECORD +40 -0
  39. screenshot_vision_algorithm-0.3.0.dist-info/WHEEL +5 -0
  40. screenshot_vision_algorithm-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,143 @@
1
+ """Best-effort OCR for the WeChat group-chat title between 「<」 and 「⋯」."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import unicodedata
6
+ from typing import Optional
7
+
8
+ import cv2 # type: ignore
9
+ import numpy as np # type: ignore
10
+ from loguru import logger
11
+
12
+ from screenshot_vision_algorithm.android.wechat.algorithms.template_matching import (
13
+ detect_chat_back_chevron,
14
+ detect_chat_title_more_dots,
15
+ )
16
+
17
+ _OCR_SINGLETON = None
18
+ _OCR_VERSION_MAJOR: Optional[int] = None
19
+
20
+ _TITLE_MAX_CHARS = 120
21
+
22
+
23
+ def _title_bar_y_band(screen_h: int, scale_w: float) -> tuple[int, int]:
24
+ sw = max(scale_w, 1e-6)
25
+ y1 = int(round(108 * sw))
26
+ y2 = int(round(220 * sw))
27
+ y1 = max(0, min(max(2, screen_h) - 2, y1))
28
+ y2 = max(y1 + 40, min(screen_h, y2))
29
+ return y1, y2
30
+
31
+
32
+ def _get_ocr():
33
+ global _OCR_SINGLETON, _OCR_VERSION_MAJOR
34
+ if _OCR_SINGLETON is not None:
35
+ return _OCR_SINGLETON, _OCR_VERSION_MAJOR
36
+
37
+ try:
38
+ from paddleocr import PaddleOCR # type: ignore
39
+ import paddleocr as _pkg # type: ignore
40
+ except ImportError as e:
41
+ raise RuntimeError(
42
+ "chat title OCR requires paddleocr; pip install paddleocr",
43
+ ) from e
44
+
45
+ try:
46
+ ver_major = int(str(getattr(_pkg, "__version__", "2.0.0")).split(".")[0])
47
+ except Exception: # pragma: no cover
48
+ ver_major = 2
49
+
50
+ logger.info("chat_title_ocr: initialising PaddleOCR v%d (once-per-process)", ver_major)
51
+ if ver_major >= 3:
52
+ ocr = PaddleOCR(
53
+ use_textline_orientation=False,
54
+ lang="ch",
55
+ device="cpu",
56
+ text_detection_model_name="PP-OCRv5_mobile_det",
57
+ text_recognition_model_name="PP-OCRv5_mobile_rec",
58
+ )
59
+ else: # pragma: no cover
60
+ ocr = PaddleOCR(use_angle_cls=False, lang="ch", use_gpu=False)
61
+
62
+ _OCR_SINGLETON = ocr
63
+ _OCR_VERSION_MAJOR = ver_major
64
+ return ocr, ver_major
65
+
66
+
67
+ def try_ocr_group_chat_title_from_png(
68
+ png_bytes: bytes,
69
+ *,
70
+ scale_w: float,
71
+ ) -> Optional[str]:
72
+ """Return NFC-stripped title text, or ``None`` if anchors/OCR unavailable."""
73
+ if not png_bytes:
74
+ return None
75
+ try:
76
+ arr = np.asarray(bytearray(png_bytes), dtype=np.uint8)
77
+ screen_bgr = cv2.imdecode(arr, cv2.IMREAD_COLOR)
78
+ except Exception:
79
+ return None
80
+ if screen_bgr is None or screen_bgr.size == 0:
81
+ return None
82
+
83
+ back = detect_chat_back_chevron(screen_bgr, scale_w)
84
+ more = detect_chat_title_more_dots(screen_bgr, scale_w)
85
+ if back is None or more is None:
86
+ return None
87
+
88
+ sw = max(scale_w, 1e-6)
89
+ pad = max(4, int(round(6 * sw)))
90
+ h, w_full = screen_bgr.shape[:2]
91
+ x1 = min(back.x + back.w + pad, w_full - 1)
92
+ x2 = max(more.x - pad, 0)
93
+ if x2 <= x1 + 4:
94
+ return None
95
+
96
+ y1, y2 = _title_bar_y_band(h, scale_w)
97
+ crop = screen_bgr[y1:y2, x1:x2]
98
+ if crop.size == 0 or crop.shape[0] < 8 or crop.shape[1] < 16:
99
+ return None
100
+
101
+ try:
102
+ ocr, ver_major = _get_ocr()
103
+ except RuntimeError as e:
104
+ logger.debug("%s", e)
105
+ return None
106
+
107
+ upscaled = cv2.resize(
108
+ crop, None, fx=1.75, fy=1.75, interpolation=cv2.INTER_CUBIC,
109
+ )
110
+ texts: list[str] = []
111
+ scores: list[float] = []
112
+ try:
113
+ if ver_major >= 3:
114
+ results = list(ocr.predict(upscaled))
115
+ if results:
116
+ r = results[0]
117
+ texts = list(r.get("rec_texts", []))
118
+ scores = [float(s) for s in r.get("rec_scores", [])]
119
+ else: # pragma: no cover
120
+ raw = ocr.ocr(upscaled, cls=False)
121
+ if raw and raw[0]:
122
+ texts = [line[1][0] for line in raw[0]]
123
+ scores = [float(line[1][1]) for line in raw[0]]
124
+ except Exception as exc: # pragma: no cover
125
+ logger.debug("chat_title_ocr predict failed: %s", exc)
126
+ return None
127
+
128
+ if not texts:
129
+ return None
130
+
131
+ # Prefer the highest-confidence line; merge multi-line titles rarely needed.
132
+ best_i = max(range(len(texts)), key=lambda i: scores[i] if i < len(scores) else 0.0)
133
+ raw_title = texts[best_i].strip()
134
+ if not raw_title:
135
+ joined = "".join(t.strip() for t in texts if t and t.strip())
136
+ raw_title = joined.strip()
137
+ if not raw_title or len(raw_title) > _TITLE_MAX_CHARS:
138
+ return None
139
+
140
+ return unicodedata.normalize("NFC", raw_title)
141
+
142
+
143
+ __all__ = ["try_ocr_group_chat_title_from_png"]
@@ -0,0 +1,157 @@
1
+ """
2
+ 多页 OCR 文本合并(待办 §4 #6 / d5-resume-ocr-multipage + PRD 九§8)。
3
+
4
+ 分页截图竖滑时,相邻页 OCR 结果常在边界重复若干行或重复同一段字符。合并时去掉
5
+ 「上一页尾部 vs 下一页头部」的重复,降低后续拆分/结构化噪声。
6
+
7
+ **仅**实现 PRD §8(2)步骤(``_merge_two_pages_prd``):尾窗 M、N 精确比对与降级、
8
+ A 侧最下锚点、延伸块 ``Levenshtein.ratio``、B 侧最上合格锚点;(B)半角空格衔接;
9
+ (C)不合并为 **同一页** 时 **不**做半角空格硬接,保留 **两页** 并以 **页间分割线** 分隔(与 §8(1)1)。
10
+ 不再提供历史 ``overlap`` 分支。
11
+
12
+ 多页折叠:始终用 **当前序列最后一页** 与 **下一截图页 OCR 全文** 做 pair,(C)时在
13
+ 序列末尾追加新 **页**;最后 ``join`` 为单份画布字符串。
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from Levenshtein import ratio as levenshtein_ratio
19
+
20
+ # PRD §8(1)1 / (C):**页间**分割线字面(常量名与 PRD ``WX_MATCH_OCR_PAGE_DIVIDER`` 一致)。
21
+ WX_MATCH_OCR_PAGE_DIVIDER = "\n<<<WX_MATCH_OCR_PAGE_DIVIDER>>>\n"
22
+
23
+
24
+ def paragraph_content_bounds(merged: str, anchor: int) -> tuple[int, int]:
25
+ """``anchor`` 落在的 PRD **页**(截图级画布;由 :data:`WX_MATCH_OCR_PAGE_DIVIDER` 分隔)的 ``[lo, hi)``。"""
26
+ if not merged:
27
+ return (0, 0)
28
+ d = WX_MATCH_OCR_PAGE_DIVIDER
29
+ a = max(0, min(int(anchor), len(merged)))
30
+ if d not in merged:
31
+ return (0, len(merged))
32
+ lo = 0
33
+ while lo <= len(merged):
34
+ nxt = merged.find(d, lo)
35
+ if nxt < 0:
36
+ return (lo, len(merged))
37
+ if a < nxt:
38
+ return (lo, nxt)
39
+ if a < nxt + len(d):
40
+ lo = nxt + len(d)
41
+ continue
42
+ lo = nxt + len(d)
43
+ return (0, len(merged))
44
+
45
+
46
+ def _prd_edit_similarity(a: str, b: str) -> float:
47
+ """PRD §8(A):延伸块 1‑Q 与 2‑Q/… 的相似度(``python-Levenshtein.ratio``)。"""
48
+ if not a and not b:
49
+ return 1.0
50
+ if not a or not b:
51
+ return 0.0
52
+ return float(levenshtein_ratio(a, b))
53
+
54
+
55
+ def _substring_occurrences(haystack: str, needle: str) -> list[int]:
56
+ if not needle or len(needle) > len(haystack):
57
+ return []
58
+ out: list[int] = []
59
+ start = 0
60
+ while True:
61
+ i = haystack.find(needle, start)
62
+ if i < 0:
63
+ break
64
+ out.append(i)
65
+ start = i + 1
66
+ return out
67
+
68
+
69
+ def _merge_two_pages_prd(
70
+ prev: str,
71
+ nxt: str,
72
+ *,
73
+ tail_m: int = 300,
74
+ ngram_candidates: tuple[int, ...] = (20, 15, 12, 8),
75
+ similarity_threshold: float = 0.8,
76
+ ) -> str | tuple[str, str]:
77
+ """PRD §8 相邻两截图 **页**:成功则为 **单页** 连续串;(C)返回 ``(prev, nxt)`` 两 **页** 不合并。"""
78
+ pa = prev
79
+ pb = nxt
80
+ if not pb or not pb.strip():
81
+ return pa
82
+ if not pa or not pa.strip():
83
+ return pb
84
+
85
+ tail_start = max(0, len(pa) - int(tail_m))
86
+ thr = float(similarity_threshold)
87
+
88
+ for ng in ngram_candidates:
89
+ if ng > len(pa) or ng > len(pb):
90
+ continue
91
+ s_star: int | None = None
92
+ for s in range(tail_start, len(pa) - ng + 1):
93
+ anchor = pa[s : s + ng]
94
+ if _substring_occurrences(pb, anchor):
95
+ s_star = s
96
+ if s_star is None:
97
+ continue
98
+
99
+ blob_1q = pa[s_star:]
100
+ l1q = len(blob_1q)
101
+ anchor_star = pa[s_star : s_star + ng]
102
+ js = _substring_occurrences(pb, anchor_star)
103
+ good_js: list[int] = []
104
+ for j in js:
105
+ if j + l1q > len(pb):
106
+ continue
107
+ seg_b = pb[j : j + l1q]
108
+ if _prd_edit_similarity(blob_1q, seg_b) >= thr:
109
+ good_js.append(j)
110
+ if not good_js:
111
+ continue
112
+
113
+ j_pick = min(good_js)
114
+ cut = j_pick + l1q
115
+ suffix_b = pb[cut:].lstrip("\n").lstrip("\r")
116
+ out = pa.rstrip("\n").rstrip("\r")
117
+ if suffix_b:
118
+ return out + " " + suffix_b
119
+ return out
120
+
121
+ return (pa, pb)
122
+
123
+
124
+ def merge_multipage_ocr_texts(
125
+ pages: list[str],
126
+ *,
127
+ prd_tail_m: int = 300,
128
+ prd_similarity: float = 0.8,
129
+ ) -> str:
130
+ """顺序合并多页 OCR(PRD §8 n‑gram;(C)时 **页间** 用 ``WX_MATCH_OCR_PAGE_DIVIDER``)。
131
+
132
+ 折叠时只对 **列表最后一项**(当前尾 **页** 的纯 OCR;若前几页曾 (B) 成功,则该项已是多图并成的**单串**)
133
+ 与 **下一张截图全文** 做 ``_merge_two_pages_prd``,**不**把「已含页间分割线的整段 acc」当作 ``prev``:
134
+ 否则尾窗 *M* 与 N‑gram 会扫到 ``WX_MATCH_OCR_PAGE_DIVIDER`` 或更早页的正文,违背 PRD「A=上一截图页、B=下一截图页」的相邻定义。
135
+ """
136
+ texts = [p for p in pages if p and p.strip()]
137
+ if not texts:
138
+ return ""
139
+ if len(texts) == 1:
140
+ return texts[0]
141
+
142
+ # Maintain list of page OCR blobs—never join with PAGE_DIVIDER into ``prev`` before pairing.
143
+ paras: list[str] = [texts[0]]
144
+ for page in texts[1:]:
145
+ tail = paras[-1]
146
+ r = _merge_two_pages_prd(
147
+ tail,
148
+ page,
149
+ tail_m=prd_tail_m,
150
+ similarity_threshold=prd_similarity,
151
+ )
152
+ if isinstance(r, tuple):
153
+ paras[-1] = r[0]
154
+ paras.append(r[1])
155
+ else:
156
+ paras[-1] = r
157
+ return WX_MATCH_OCR_PAGE_DIVIDER.join(paras)