screenshot-vision-algorithm 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. screenshot_vision_algorithm/__init__.py +48 -0
  2. screenshot_vision_algorithm/_config.py +61 -0
  3. screenshot_vision_algorithm/android/__init__.py +1 -0
  4. screenshot_vision_algorithm/android/wechat/__init__.py +1 -0
  5. screenshot_vision_algorithm/android/wechat/algorithms/__init__.py +0 -0
  6. screenshot_vision_algorithm/android/wechat/algorithms/avatar_column.py +209 -0
  7. screenshot_vision_algorithm/android/wechat/algorithms/badge_detection.py +275 -0
  8. screenshot_vision_algorithm/android/wechat/algorithms/card_bbox.py +1000 -0
  9. screenshot_vision_algorithm/android/wechat/algorithms/phash_utils.py +267 -0
  10. screenshot_vision_algorithm/android/wechat/algorithms/speaker_band.py +290 -0
  11. screenshot_vision_algorithm/android/wechat/algorithms/template_matching.py +2163 -0
  12. screenshot_vision_algorithm/android/wechat/algorithms/title_ocr.py +143 -0
  13. screenshot_vision_algorithm/android/wechat/merge/__init__.py +0 -0
  14. screenshot_vision_algorithm/android/wechat/merge/multipage.py +157 -0
  15. screenshot_vision_algorithm/android/wechat/ocr/__init__.py +0 -0
  16. screenshot_vision_algorithm/android/wechat/ocr/avatar_guard.py +434 -0
  17. screenshot_vision_algorithm/android/wechat/ocr/badge_ocr.py +232 -0
  18. screenshot_vision_algorithm/android/wechat/ocr/nickname_binding.py +1888 -0
  19. screenshot_vision_algorithm/android/wechat/ocr/text_ocr_adapter.py +625 -0
  20. screenshot_vision_algorithm/android/wechat/profiles/__init__.py +0 -0
  21. screenshot_vision_algorithm/android/wechat/profiles/android.py +53 -0
  22. screenshot_vision_algorithm/android/wechat/profiles/harmony.py +10 -0
  23. screenshot_vision_algorithm/android/wechat/profiles/ios.py +53 -0
  24. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_back_chevron.png +0 -0
  25. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_emoji_smile.png +0 -0
  26. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_plus.png +0 -0
  27. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_voice.png +0 -0
  28. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_title_more_dots.png +0 -0
  29. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/favorite_label.png +0 -0
  30. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/new_messages_hint_suffix.png +0 -0
  31. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint.png +0 -0
  32. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint_v2_textonly.png +0 -0
  33. screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/wechat_note_header.png +0 -0
  34. screenshot_vision_algorithm/android/xhs/__init__.py +4 -0
  35. screenshot_vision_algorithm/android/zhihu/__init__.py +4 -0
  36. screenshot_vision_algorithm/png_utils.py +86 -0
  37. screenshot_vision_algorithm-0.3.0.dist-info/METADATA +425 -0
  38. screenshot_vision_algorithm-0.3.0.dist-info/RECORD +40 -0
  39. screenshot_vision_algorithm-0.3.0.dist-info/WHEEL +5 -0
  40. screenshot_vision_algorithm-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,267 @@
1
+ """Perceptual hashing utilities for the collector's "stuck at bottom" detector.
2
+
3
+ ``chat_content_scroll_down`` / ``detail_content_scroll_down``(capture_action)对齐 PRD **内容 scroll_down**
4
+ (手指上滑);术语 SSOT:``docs/requirements/product_requirement_document.md`` 开篇。
5
+
6
+ When ``chat_content_scroll_down`` (or ``detail_content_scroll_down``) has already scrolled past
7
+ the last new message / end of the resume, subsequent swipes produce near-
8
+ identical frames. Comparing raw pixels fails under:
9
+ - WeChat's top-bar pulse animations (notification icons, battery, time)
10
+ - Keyboard / system insets that briefly animate
11
+ - Subpixel anti-aliasing on the same static content across screencap calls
12
+
13
+ Perceptual hashing is robust to those. We use the **dHash** family (difference
14
+ hash) because:
15
+ - It's cheap (grayscale + 9x8 downscale + 64 bit-compare)
16
+ - It handles minor pixel shifts better than aHash
17
+ - It's plenty for our "is this the same screen?" binary decision
18
+
19
+ Two reference points in the collector (DD section 2.4.x D):
20
+ - ``chat_content_scroll_down`` ROI y: 13% ~ 92% (exclude top status bar +
21
+ bottom home indicator)
22
+ - ``detail_content_scroll_down`` ROI y: 13% ~ 95% (note page has no home
23
+ indicator; keep more)
24
+
25
+ This module is stdlib + numpy + cv2 only — no pip dependency on
26
+ ``imagehash`` so the collector venv stays lean.
27
+
28
+ Aligned with:
29
+ DD section 2.3 finalize trigger (phash duplicate → finalize)
30
+ DD section 2.4.x D phash ROI contract
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ from dataclasses import dataclass
36
+ from typing import Literal
37
+
38
+ import cv2
39
+ import numpy as np
40
+
41
+ #: ROI definition for chat-page pHash comparison (DD §2.4.x D).
42
+ CHAT_ROI = (0.13, 0.92)
43
+
44
+ #: ROI definition for note-page pHash comparison (DD §2.4.x D).
45
+ DETAIL_ROI = (0.13, 0.95)
46
+
47
+ #: dHash dimensions — output is 8 * 8 = 64 bits.
48
+ DHASH_W = 9
49
+ DHASH_H = 8
50
+
51
+ #: Default similarity threshold for "same frame".
52
+ #: Hamming distance ≤ 5 bits (out of 64) means ≥ 92% match — enough to be
53
+ #: robust to top-bar icon jitter while still catching "scroll jammed at
54
+ #: bottom" cases. Callers can dial down for stricter dup detection on
55
+ #: detail pages where content variability is higher.
56
+ DEFAULT_DUP_HAMMING_THRESHOLD = 5
57
+
58
+ #: docs/adr/manual_labeling.md §步骤1(2) C — 「连续 3 次比较相似度都 ≥95%」
59
+ #: 在 64-bit dHash 上对应 Hamming ≤ 3((64−3)/64 ≈ 95.3%)。
60
+ MANUAL_LABELING_SIM95_MAX_HAMMING = 3
61
+ #: Same ADR段落 — consecutive similar pulls required before treating as anchored.
62
+ MANUAL_LABELING_SIM95_STREAK_REQUIRED = 3
63
+
64
+ #: manual_labeling.md §步骤2.(2) B / §步骤3 — 将「phash 相同」实现为 perceptual equivalence
65
+ #:(与 §1(2) 的 95% 口径对齐,减轻状态栏抖动导致的误判)。
66
+ MANUAL_LABELING_PHASH_IDENTICAL_MAX_HAMMING = MANUAL_LABELING_SIM95_MAX_HAMMING
67
+
68
+
69
+ def manual_labeling_phash_matches(a: int, b: int) -> bool:
70
+ """True when two dHashes count as 「相同」 for manual_labeling list/chat stops."""
71
+ return hamming_distance(a, b) <= MANUAL_LABELING_PHASH_IDENTICAL_MAX_HAMMING
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class PHashResult:
76
+ """Container for a single frame's perceptual hash + diagnostics.
77
+
78
+ ``value`` is a 64-bit int (the 8x8 dHash bitmap packed MSB-first).
79
+ ``roi_pixels`` lets callers log what region was actually hashed so a
80
+ mis-configured ROI doesn't silently break dup detection.
81
+ """
82
+
83
+ value: int
84
+ roi_x1: int
85
+ roi_y1: int
86
+ roi_x2: int
87
+ roi_y2: int
88
+
89
+ def hex(self) -> str:
90
+ return f"{self.value:016x}"
91
+
92
+
93
+ def crop_roi(
94
+ bgr: np.ndarray,
95
+ roi_kind: Literal["chat", "detail"],
96
+ ) -> tuple[np.ndarray, tuple[int, int, int, int]]:
97
+ """Crop the ROI used for perceptual-hash comparison.
98
+
99
+ Returns the cropped ndarray + the absolute ``(x1, y1, x2, y2)`` rectangle
100
+ for logging. The X axis is always full-width (0 → screen_w); we only
101
+ trim Y because the top/bottom fixtures (status bar / nav bar) are the
102
+ only regions that animate in the absence of real content changes.
103
+ """
104
+ if roi_kind == "chat":
105
+ y1_r, y2_r = CHAT_ROI
106
+ elif roi_kind == "detail":
107
+ y1_r, y2_r = DETAIL_ROI
108
+ else:
109
+ raise ValueError(f"roi_kind must be 'chat' or 'detail', got {roi_kind!r}")
110
+
111
+ h, w = bgr.shape[:2]
112
+ y1 = int(h * y1_r)
113
+ y2 = int(h * y2_r)
114
+ if y2 <= y1:
115
+ raise ValueError(
116
+ f"degenerate ROI: y1={y1} y2={y2} for image h={h}; "
117
+ f"check roi_kind={roi_kind!r} and image dims"
118
+ )
119
+ return bgr[y1:y2, :, :].copy(), (0, y1, w, y2)
120
+
121
+
122
+ def compute_dhash(
123
+ bgr: np.ndarray,
124
+ roi_kind: Literal["chat", "detail"],
125
+ ) -> PHashResult:
126
+ """Compute the 64-bit dHash of the ROI-cropped frame.
127
+
128
+ Algorithm:
129
+ 1. Crop to the ROI (see :func:`crop_roi`).
130
+ 2. Convert to grayscale.
131
+ 3. Resize to ``(DHASH_W, DHASH_H)`` using INTER_AREA — best for
132
+ down-sampling (preserves average intensity per bucket).
133
+ 4. For each row, bit = (px[j] > px[j+1]); 8 bits per row * 8 rows
134
+ = 64-bit value, MSB-first.
135
+
136
+ A cold call is ~2-3ms on the baseline device image (1080x2248). Well
137
+ within the scroll cycle (~400ms settle). No lru_cache — frames are
138
+ unique per screencap so caching wouldn't help.
139
+ """
140
+ roi, (x1, y1, x2, y2) = crop_roi(bgr, roi_kind)
141
+
142
+ gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
143
+ resized = cv2.resize(
144
+ gray, (DHASH_W, DHASH_H), interpolation=cv2.INTER_AREA,
145
+ )
146
+ # Broadcast-compare adjacent columns (the standard dHash trick).
147
+ bits = resized[:, 1:] > resized[:, :-1] # shape (8,8) of bool
148
+
149
+ value = 0
150
+ for bit in bits.flatten():
151
+ value = (value << 1) | int(bit)
152
+
153
+ return PHashResult(
154
+ value=value,
155
+ roi_x1=x1, roi_y1=y1, roi_x2=x2, roi_y2=y2,
156
+ )
157
+
158
+
159
+ def hamming_distance(a: int, b: int) -> int:
160
+ """Count differing bits between two 64-bit hashes (xor + popcount)."""
161
+ return int.bit_count(a ^ b) # Python 3.10+ stdlib popcount
162
+
163
+
164
+ def is_duplicate(
165
+ a: PHashResult,
166
+ b: PHashResult,
167
+ threshold: int = DEFAULT_DUP_HAMMING_THRESHOLD,
168
+ ) -> bool:
169
+ """Decide whether two hashes count as "the same frame" for finalize."""
170
+ return hamming_distance(a.value, b.value) <= threshold
171
+
172
+
173
+ class ScrollDuplicationTracker:
174
+ """Rolling accumulator: fire an ``is_stuck`` signal when we've seen
175
+ ``required_consecutive`` duplicate frames in a row (DD §2.3 CC3).
176
+
177
+ Usage (inside the flow's scroll loop)::
178
+
179
+ tracker = ScrollDuplicationTracker(roi_kind="chat", required_consecutive=2)
180
+ for i in range(max_chat_content_scroll_down):
181
+ do_scroll()
182
+ frame = screencap()
183
+ if tracker.observe(decode(frame)).is_stuck:
184
+ # Early finalize (CC3): next scroll would also land on the
185
+ # same frame; no point in capturing more.
186
+ break
187
+
188
+ The first observation bootstraps the state; the second with
189
+ ``hamming_distance ≤ threshold`` sets ``consecutive = 1``; the third
190
+ with ``hamming ≤ threshold`` sets ``consecutive = 2`` → is_stuck
191
+ returns True.
192
+
193
+ Reset is explicit (:meth:`reset`) so a subflow crossing phases (chat
194
+ loop → resume subflow → chat loop) doesn't leak stale state across
195
+ phases.
196
+ """
197
+
198
+ def __init__(
199
+ self,
200
+ roi_kind: Literal["chat", "detail"],
201
+ *,
202
+ threshold: int = DEFAULT_DUP_HAMMING_THRESHOLD,
203
+ required_consecutive: int = 2,
204
+ ) -> None:
205
+ if required_consecutive < 1:
206
+ raise ValueError("required_consecutive must be >= 1")
207
+ self.roi_kind = roi_kind
208
+ self.threshold = threshold
209
+ self.required_consecutive = required_consecutive
210
+ self._last: PHashResult | None = None
211
+ self._consecutive: int = 0
212
+
213
+ def reset(self) -> None:
214
+ self._last = None
215
+ self._consecutive = 0
216
+
217
+ @dataclass(frozen=True)
218
+ class Observation:
219
+ """What :meth:`observe` returns for the current frame.
220
+
221
+ ``distance`` is the Hamming distance to the PREVIOUS frame, or
222
+ -1 when no previous frame exists (first observation bootstrap)."""
223
+ phash: "PHashResult"
224
+ distance: int
225
+ consecutive_duplicates: int
226
+ is_stuck: bool
227
+
228
+ def observe(self, bgr: np.ndarray) -> "ScrollDuplicationTracker.Observation":
229
+ ph = compute_dhash(bgr, self.roi_kind)
230
+
231
+ if self._last is None:
232
+ self._last = ph
233
+ return self.Observation(
234
+ phash=ph, distance=-1,
235
+ consecutive_duplicates=0, is_stuck=False,
236
+ )
237
+
238
+ dist = hamming_distance(ph.value, self._last.value)
239
+ self._last = ph
240
+ if dist <= self.threshold:
241
+ self._consecutive += 1
242
+ else:
243
+ self._consecutive = 0
244
+ stuck = self._consecutive >= self.required_consecutive
245
+ return self.Observation(
246
+ phash=ph, distance=dist,
247
+ consecutive_duplicates=self._consecutive, is_stuck=stuck,
248
+ )
249
+
250
+
251
+ __all__ = [
252
+ "CHAT_ROI",
253
+ "DETAIL_ROI",
254
+ "DHASH_W",
255
+ "DHASH_H",
256
+ "DEFAULT_DUP_HAMMING_THRESHOLD",
257
+ "MANUAL_LABELING_PHASH_IDENTICAL_MAX_HAMMING",
258
+ "MANUAL_LABELING_SIM95_MAX_HAMMING",
259
+ "MANUAL_LABELING_SIM95_STREAK_REQUIRED",
260
+ "manual_labeling_phash_matches",
261
+ "PHashResult",
262
+ "crop_roi",
263
+ "compute_dhash",
264
+ "hamming_distance",
265
+ "is_duplicate",
266
+ "ScrollDuplicationTracker",
267
+ ]
@@ -0,0 +1,290 @@
1
+ """PRD 术语「5. 群聊天聊天气泡(y)坐标」— 纵向筒几何(头像时间轴)。
2
+
3
+ 不识别气泡圆角矢量;仅以 **左/右侧发言头像** 顶的 y(时间序合并)划出
4
+ 发言人竖直筒 ``[y_n, y_{n+1})``,供 ``nickname_ocr_simple`` 在筒内切段。
5
+
6
+ 常量均为 **1080 设计宽度基准**,调用方乘以 ``scale_w = width/1080``。
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import numpy as np
12
+ from dataclasses import dataclass
13
+ from typing import Literal, Optional
14
+
15
+ from loguru import logger
16
+
17
+ from screenshot_vision_algorithm.android.wechat.algorithms.template_matching import (
18
+ detect_chat_back_chevron,
19
+ detect_chat_title_more_dots,
20
+ detect_wechat_main_bottom_tab_bar_four_columns,
21
+ detect_wechat_main_title_bottom_y,
22
+ )
23
+
24
+ #: 「1-y_top」相对聊天内容上沿 orphan 阈值(baseline px @ 1080)。
25
+ CHAT_BAND_FIRST_AVATAR_TOP_GAP_REJECT_BASELINE = 30
26
+
27
+ #: 首个发言人筒上移量(一个头像高度,防止几何误差),见 PRD §5 补充2。
28
+ CHAT_FIRST_BAND_TOP_EXTEND_BASELINE = 100
29
+
30
+ #: 底部输入区预留(与主列表 scanner 一脉,baseline)。
31
+ CHAT_COMPOSER_RESERVE_BOTTOM_BASELINE = 160
32
+
33
+ #: 群聊/单聊标题栏底到首条聊天内容的经验偏移(baseline px @ 1080)。
34
+ CHAT_TITLE_BAR_TO_CONTENT_OFFSET_BASELINE = 4
35
+
36
+ #: 微信标题底到首条会话内容的经验偏移;复用列表页同一常数家族(见 template_matcher)。
37
+ from screenshot_vision_algorithm.android.wechat.algorithms.template_matching import WECHAT_TITLE_TO_FIRST_ROW_OFFSET_BASELINE
38
+
39
+ # Chat 页左、右头像列 ROI(baseline 宽度 1080)
40
+ CHAT_SIDE_AVATAR_COLUMN_WIDTH_BASELINE = 108
41
+ CHAT_SIDE_AVATAR_HOUGH_MIN_DIST_BASELINE = 68
42
+ CHAT_SIDE_AVATAR_MIN_R_BASELINE = 14
43
+ CHAT_SIDE_AVATAR_MAX_R_BASELINE = 54
44
+
45
+ try:
46
+ import cv2
47
+ except ImportError:
48
+ cv2 = None # type: ignore[assignment]
49
+
50
+ Side = Literal["left", "right"]
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class ChatVerticalBounds:
55
+ """聊天可视区竖直范围(不含标题栏/状态区与底部输入栏)。"""
56
+
57
+ y_top: int
58
+ y_bottom_excl: int # exclusive lower bound for content (open interval top)
59
+
60
+
61
+ @dataclass(frozen=True)
62
+ class AvatarTimelineEntry:
63
+ y_top: int
64
+ side: Side
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class PrdChatVerticalLayout:
69
+ """单帧群聊截图在 PRD §5 下的竖直布局。"""
70
+
71
+ content_bounds: ChatVerticalBounds
72
+ #: 若 ``(1-y_top - content_top) > 阈值`` 产生的顶隙 orphan ``[orphan_y0, orphan_y1)``
73
+ orphan_top_band: Optional[tuple[int, int]]
74
+ #: 时间上排序后的发言人筒 ``[y_n, y_{n+1})``;与 orphan 互不重叠。
75
+ speaker_bands: tuple[tuple[int, int], ...]
76
+ #: True 当未检出任何头像:整页应记「找不到归属昵称」。
77
+ no_avatars_all_orphan: bool
78
+
79
+
80
+ def _sw(scale_w: float) -> float:
81
+ return max(float(scale_w), 1e-6)
82
+
83
+
84
+ def compute_chat_content_vertical_bounds(
85
+ screen_bgr: np.ndarray,
86
+ scale_w: float,
87
+ *,
88
+ screen_h: int,
89
+ ) -> ChatVerticalBounds:
90
+ """聊天内容上、下界:标题/底栏 chrome 与 PRD 对齐的粗锚点。"""
91
+
92
+ sw = _sw(scale_w)
93
+ h = int(screen_h)
94
+
95
+ # ── 群聊 / 单聊页:标题栏锚点(「←」与「⋯」)─────────────────
96
+ back = detect_chat_back_chevron(screen_bgr, scale_w)
97
+ more = detect_chat_title_more_dots(screen_bgr, scale_w)
98
+ if back is not None and more is not None:
99
+ # 标题栏底边 = max(两个图标底边) + 少许间隙
100
+ title_bar_bottom = max(back.y + back.h, more.y + more.h)
101
+ y_top = int(
102
+ round(float(title_bar_bottom)
103
+ + CHAT_TITLE_BAR_TO_CONTENT_OFFSET_BASELINE * sw),
104
+ )
105
+ else:
106
+ # ── 主会话列表页:微信标题 + 底栏四 Tab ─────────────────
107
+ title_y = detect_wechat_main_title_bottom_y(screen_bgr, scale_w)
108
+ if title_y is not None and detect_wechat_main_bottom_tab_bar_four_columns(
109
+ screen_bgr, scale_w,
110
+ ):
111
+ y_top = int(
112
+ round(float(title_y)
113
+ + WECHAT_TITLE_TO_FIRST_ROW_OFFSET_BASELINE * sw),
114
+ )
115
+ else:
116
+ # fallback:与列表首行 top 同量级
117
+ y_top = int(round(280 * sw))
118
+ y_top = max(0, min(h - 8, y_top))
119
+ nav = int(round(CHAT_COMPOSER_RESERVE_BOTTOM_BASELINE * sw))
120
+ y_bot_exc = max(y_top + 8, h - nav)
121
+ return ChatVerticalBounds(y_top=y_top, y_bottom_excl=y_bot_exc)
122
+
123
+
124
+ def _hough_ytops_in_column(
125
+ gray_full: np.ndarray,
126
+ *,
127
+ x1: int,
128
+ x2: int,
129
+ y1: int,
130
+ y2: int,
131
+ scale_w: float,
132
+ ) -> list[int]:
133
+ if cv2 is None or y2 <= y1 + 4 or x2 <= x1 + 8:
134
+ return []
135
+ crop = gray_full[y1:y2, x1:x2]
136
+ if crop.size == 0:
137
+ return []
138
+ blur = cv2.GaussianBlur(crop, (5, 5), 0)
139
+ sw = _sw(scale_w)
140
+ circles = cv2.HoughCircles(
141
+ blur,
142
+ cv2.HOUGH_GRADIENT,
143
+ dp=1.2,
144
+ minDist=int(round(CHAT_SIDE_AVATAR_HOUGH_MIN_DIST_BASELINE * sw)),
145
+ param1=90,
146
+ param2=20,
147
+ minRadius=max(8, int(round(CHAT_SIDE_AVATAR_MIN_R_BASELINE * sw))),
148
+ maxRadius=int(round(CHAT_SIDE_AVATAR_MAX_R_BASELINE * sw)),
149
+ )
150
+ if circles is None:
151
+ return []
152
+ ys = sorted(
153
+ int(y1 + float(cy))
154
+ for _r, _xc, cy in circles[0]
155
+ if y1 <= y1 + cy < y2
156
+ )
157
+ return ys
158
+
159
+
160
+ def detect_chat_side_avatar_ytops(
161
+ screen_bgr: np.ndarray,
162
+ scale_w: float,
163
+ bounds: ChatVerticalBounds,
164
+ ) -> tuple[list[int], list[int]]:
165
+ """左列与右列头像顶 y(左列与列表页共用 ``left_avatar_column``)。"""
166
+
167
+ if cv2 is None:
168
+ return [], []
169
+ h, w_full = screen_bgr.shape[:2]
170
+ sw = _sw(scale_w)
171
+ gray = cv2.cvtColor(screen_bgr, cv2.COLOR_BGR2GRAY)
172
+ y1 = max(0, bounds.y_top)
173
+ y2 = min(h, bounds.y_bottom_excl)
174
+ try:
175
+ from processor.left_avatar_column import (
176
+ detect_left_avatar_column_layout,
177
+ left_avatar_ytops,
178
+ )
179
+
180
+ layout = detect_left_avatar_column_layout(
181
+ screen_bgr,
182
+ sw,
183
+ y_top=y1,
184
+ y_bottom_excl=y2,
185
+ )
186
+ left_ys = left_avatar_ytops(layout)
187
+ except ImportError:
188
+ col_w = int(round(CHAT_SIDE_AVATAR_COLUMN_WIDTH_BASELINE * sw))
189
+ left_x2 = min(w_full, col_w)
190
+ left_ys = _hough_ytops_in_column(
191
+ gray, x1=0, x2=left_x2, y1=y1, y2=y2, scale_w=scale_w,
192
+ )
193
+ col_w = int(round(CHAT_SIDE_AVATAR_COLUMN_WIDTH_BASELINE * sw))
194
+ right_x1 = max(0, w_full - col_w)
195
+ right_ys = _hough_ytops_in_column(
196
+ gray, x1=right_x1, x2=w_full, y1=y1, y2=y2, scale_w=scale_w,
197
+ )
198
+ return left_ys, right_ys
199
+
200
+
201
+ def merge_avatar_ytops_time_order(
202
+ left_ytops: list[int],
203
+ right_ytops: list[int],
204
+ ) -> tuple[AvatarTimelineEntry, ...]:
205
+ """按 y 合并左右列头像顶,得到时间轴(同 y 容差内先左后右)。"""
206
+
207
+ items: list[AvatarTimelineEntry] = []
208
+ for y in left_ytops:
209
+ items.append(AvatarTimelineEntry(y_top=int(y), side="left"))
210
+ for y in right_ytops:
211
+ items.append(AvatarTimelineEntry(y_top=int(y), side="right"))
212
+ items.sort(key=lambda e: (e.y_top, 0 if e.side == "left" else 1))
213
+ return tuple(items)
214
+
215
+
216
+ def build_prd_speaker_vertical_bands(
217
+ timeline: tuple[AvatarTimelineEntry, ...],
218
+ bounds: ChatVerticalBounds,
219
+ *,
220
+ scale_w: float,
221
+ first_avatar_gap_reject_baseline: int = CHAT_BAND_FIRST_AVATAR_TOP_GAP_REJECT_BASELINE,
222
+ first_band_top_extend_baseline: int = CHAT_FIRST_BAND_TOP_EXTEND_BASELINE,
223
+ ) -> PrdChatVerticalLayout:
224
+ """由时间序头像顶构造 PRD §5 顶隙 orphan + 左闭右开发言人筒。
225
+
226
+ 首个发言人筒上界额外上移 ``first_band_top_extend_baseline``(PRD §5 补充2:
227
+ 一个头像高度,防几何误差与系统变化)。
228
+ """
229
+
230
+ sw = _sw(scale_w)
231
+ gap_thr = int(round(int(first_avatar_gap_reject_baseline) * sw))
232
+ y_ct = int(bounds.y_top)
233
+ y_bot = int(bounds.y_bottom_excl)
234
+
235
+ if not timeline:
236
+ return PrdChatVerticalLayout(
237
+ content_bounds=bounds,
238
+ orphan_top_band=None,
239
+ speaker_bands=(),
240
+ no_avatars_all_orphan=True,
241
+ )
242
+
243
+ y_first = int(timeline[0].y_top)
244
+ orphan_top: Optional[tuple[int, int]] = None
245
+ if y_first - y_ct > gap_thr:
246
+ orphan_top = (y_ct, y_first)
247
+
248
+ # 时间轴序去重:过近视为同一人头像重复检出
249
+ merged_starts: list[int] = []
250
+ min_sep = int(round(24 * sw))
251
+ for e in timeline:
252
+ y = int(e.y_top)
253
+ if not merged_starts or y - merged_starts[-1] >= min_sep:
254
+ merged_starts.append(y)
255
+
256
+ # 首个筒上界上移(PRD §5 补充2):上移 100px @1080,防几何误差与系统变化。
257
+ first_band_extend = int(round(int(first_band_top_extend_baseline) * sw))
258
+
259
+ bands: list[tuple[int, int]] = []
260
+ for i, y0 in enumerate(merged_starts):
261
+ y1 = merged_starts[i + 1] if i + 1 < len(merged_starts) else y_bot
262
+ if i == 0:
263
+ y0 = y0 - first_band_extend
264
+ y0c = max(y_ct, min(y0, y_bot - 1))
265
+ y1c = max(y0c + 1, min(y1, y_bot))
266
+ bands.append((y0c, y1c))
267
+
268
+ # 首个筒上移后,若 orphan_top_band 与首个 speaker_band 重叠,
269
+ # 以首个筒实际起点为界收缩 orphan(无重叠的连续左闭右开划分)。
270
+ if orphan_top is not None and bands:
271
+ first_band_start = bands[0][0]
272
+ if first_band_start <= orphan_top[0]:
273
+ orphan_top = None
274
+ elif first_band_start < orphan_top[1]:
275
+ orphan_top = (orphan_top[0], first_band_start)
276
+
277
+ return PrdChatVerticalLayout(
278
+ content_bounds=bounds,
279
+ orphan_top_band=orphan_top,
280
+ speaker_bands=tuple(bands),
281
+ no_avatars_all_orphan=False,
282
+ )
283
+
284
+
285
+ __all__ = [
286
+ "build_prd_speaker_vertical_bands",
287
+ "compute_chat_content_vertical_bounds",
288
+ "detect_chat_side_avatar_ytops",
289
+ "merge_avatar_ytops_time_order",
290
+ ]