screenshot-vision-algorithm 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- screenshot_vision_algorithm/__init__.py +48 -0
- screenshot_vision_algorithm/_config.py +61 -0
- screenshot_vision_algorithm/android/__init__.py +1 -0
- screenshot_vision_algorithm/android/wechat/__init__.py +1 -0
- screenshot_vision_algorithm/android/wechat/algorithms/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/algorithms/avatar_column.py +209 -0
- screenshot_vision_algorithm/android/wechat/algorithms/badge_detection.py +275 -0
- screenshot_vision_algorithm/android/wechat/algorithms/card_bbox.py +1000 -0
- screenshot_vision_algorithm/android/wechat/algorithms/phash_utils.py +267 -0
- screenshot_vision_algorithm/android/wechat/algorithms/speaker_band.py +290 -0
- screenshot_vision_algorithm/android/wechat/algorithms/template_matching.py +2163 -0
- screenshot_vision_algorithm/android/wechat/algorithms/title_ocr.py +143 -0
- screenshot_vision_algorithm/android/wechat/merge/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/merge/multipage.py +157 -0
- screenshot_vision_algorithm/android/wechat/ocr/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/ocr/avatar_guard.py +434 -0
- screenshot_vision_algorithm/android/wechat/ocr/badge_ocr.py +232 -0
- screenshot_vision_algorithm/android/wechat/ocr/nickname_binding.py +1888 -0
- screenshot_vision_algorithm/android/wechat/ocr/text_ocr_adapter.py +625 -0
- screenshot_vision_algorithm/android/wechat/profiles/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/profiles/android.py +53 -0
- screenshot_vision_algorithm/android/wechat/profiles/harmony.py +10 -0
- screenshot_vision_algorithm/android/wechat/profiles/ios.py +53 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_back_chevron.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_emoji_smile.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_plus.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_voice.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_title_more_dots.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/favorite_label.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/new_messages_hint_suffix.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint_v2_textonly.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/wechat_note_header.png +0 -0
- screenshot_vision_algorithm/android/xhs/__init__.py +4 -0
- screenshot_vision_algorithm/android/zhihu/__init__.py +4 -0
- screenshot_vision_algorithm/png_utils.py +86 -0
- screenshot_vision_algorithm-0.3.0.dist-info/METADATA +425 -0
- screenshot_vision_algorithm-0.3.0.dist-info/RECORD +40 -0
- screenshot_vision_algorithm-0.3.0.dist-info/WHEEL +5 -0
- screenshot_vision_algorithm-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1000 @@
|
|
|
1
|
+
"""Resume-thumbnail card geometry from ``favorite_label`` template hits.
|
|
2
|
+
|
|
3
|
+
Given the (variable) set of ``favorite_label.png`` hits found across one chat
|
|
4
|
+
screenshot, derive:
|
|
5
|
+
- the bounding box (top/bottom/left/right) of each resume-thumbnail card
|
|
6
|
+
- the recommended click center for ``tap_thumbnail``
|
|
7
|
+
- the ``click_side`` half (``left`` / ``right``) relative to the
|
|
8
|
+
screen midline — stored in ``click_context.click_side`` for gate-1e/4
|
|
9
|
+
group-A discipline
|
|
10
|
+
|
|
11
|
+
Two-stage strategy (DD section 2.4.x A + section 3.3 1):
|
|
12
|
+
|
|
13
|
+
Stage 1: ``favorite_label.png`` template match (done upstream by
|
|
14
|
+
``collector.template_matcher``) returns a list of (x, y, score)
|
|
15
|
+
hits, one per card (the label is the strongest on-card anchor
|
|
16
|
+
and appears exactly once per card).
|
|
17
|
+
|
|
18
|
+
Stage 2: For each hit:
|
|
19
|
+
- card.top = prev_hit.y2 + FAVORITE_TAIL_OFFSET + 1
|
|
20
|
+
(for i=0, fall back to ``y1 - FAVORITE_TO_CARD_TOP_OFFSET``)
|
|
21
|
+
- card.bottom = hit.y2 + FAVORITE_TAIL_OFFSET (anchor sits near bottom)
|
|
22
|
+
- card.left = 0, card.right = screen_w
|
|
23
|
+
(the label only gives us y; the card always spans full text column
|
|
24
|
+
width, and tapping anywhere horizontally inside it opens the note)
|
|
25
|
+
|
|
26
|
+
The click target is the **center of the ``favorite_label`` template
|
|
27
|
+
hit** (the ``收藏`` crop — ``chat_profiles/.../favorite_label.png``,
|
|
28
|
+
**70×60 @ 1080 baseline**, scaled by ``scale_w`` at match time). This
|
|
29
|
+
keeps taps on the label foot instead of the card mid-column (avoids
|
|
30
|
+
right-side image thumbnails and lands on the note entry chrome).
|
|
31
|
+
|
|
32
|
+
Constants are anchored to the 1080x2248 baseline; runtime ``scale_w`` is
|
|
33
|
+
applied when loading templates.
|
|
34
|
+
|
|
35
|
+
Aligned with:
|
|
36
|
+
DD section 2.4.x A (two-stage thumbnail detection)
|
|
37
|
+
DD section 3.3 step 1 (tap_thumbnail)
|
|
38
|
+
chat_profiles/README.md v0.2.2 UI constants
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
from dataclasses import dataclass
|
|
44
|
+
from typing import Iterable, Optional, Sequence
|
|
45
|
+
|
|
46
|
+
import cv2
|
|
47
|
+
import numpy as np
|
|
48
|
+
|
|
49
|
+
#: Distance from the top of the ``favorite_label.png`` hit to the top of the
|
|
50
|
+
#: enclosing note card, measured at 1080x2248 baseline. Used as an
|
|
51
|
+
#: **upper-bound heuristic** for the first hit only (subsequent hits derive
|
|
52
|
+
#: their top from the previous hit's bottom + ``FAVORITE_TAIL_OFFSET``).
|
|
53
|
+
FAVORITE_TO_CARD_TOP_OFFSET_BASELINE = 421
|
|
54
|
+
|
|
55
|
+
#: Gap from the bottom of the ``favorite_label.png`` hit to the card's
|
|
56
|
+
#: actual bottom edge, at 1080x2248 baseline. Used to extend the card bbox
|
|
57
|
+
#: past the label.
|
|
58
|
+
FAVORITE_TAIL_OFFSET_BASELINE = 60
|
|
59
|
+
|
|
60
|
+
#: ``favorite_label.png`` raster size @ 1080-wide baseline (`chat_profiles/README.md`).
|
|
61
|
+
FAVORITE_LABEL_TEMPLATE_W_BASELINE = 70
|
|
62
|
+
FAVORITE_LABEL_TEMPLATE_H_BASELINE = 60
|
|
63
|
+
|
|
64
|
+
#: **edb1a89f 1080×2248** empirical row step (px @ baseline width):
|
|
65
|
+
#:
|
|
66
|
+
#: - ``session_20260506193232`` ``scr_004`` ``chat_resume_rescan``: real card tops
|
|
67
|
+
#: 424→966 → **Δtop = 542** (minimal full-notebook row step that frame).
|
|
68
|
+
#: - Same session ``scr_009``: first row ``top`` clamps to **0**, next real row top
|
|
69
|
+
#: **396** → **Δtop = 396** (pseudo strip from «421px 顶上启发» eating grey/chrome).
|
|
70
|
+
#:
|
|
71
|
+
#: When ``card.top == 0`` and another card sits below: if
|
|
72
|
+
#: ``Δtop < round(REFERENCE_RESUME_CARD_TOP_GAP_BASELINE * scale_w)
|
|
73
|
+
#: - round(FAVORITE_LABEL_TEMPLATE_H_BASELINE * scale_w)``
|
|
74
|
+
#: (same as ``Δtop - H_fixed <`` 收藏小矩形高度 in px), drop the clamped strip as
|
|
75
|
+
#: a false clickable card. Single top-clamped hits (no sibling) stay — no pitch to
|
|
76
|
+
#: compare (caller may rely on OCR / downstream).
|
|
77
|
+
REFERENCE_RESUME_CARD_TOP_GAP_BASELINE: int = 542
|
|
78
|
+
|
|
79
|
+
#: Session de-dup: ``chat_resume_rescan`` / 连续帧上同一张卡,只比较 **纵轴**:两卡行区间
|
|
80
|
+
#: ``[top,bottom]``(端点像素含于区间内)相交长度除以 ``min(h_a,h_b)``
|
|
81
|
+
#: ≥ 本阈值 → 视为已从会话内路径处理过。**不**用 2D 面积交叉(Notebook 推导卡本就
|
|
82
|
+
#: 横跨整列宽,横轴对辨重贡献小;且略省乘加)。仍为 O(K) 逐项比、\(K\le\) 屏幕上收藏命中数,
|
|
83
|
+
#: **相对 screencap + OpenCV**,性能可忽略。
|
|
84
|
+
PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO: float = 0.70
|
|
85
|
+
|
|
86
|
+
#: Anti-detection horizontal jitter (px @ 1080 baseline) applied by
|
|
87
|
+
#: ``enter_note`` on every tap attempt: ``tap_x = click_x ± random(0..15)``.
|
|
88
|
+
#: Y is never jittered (speaker attribution depends on exact Y).
|
|
89
|
+
TAP_ANTI_DETECT_X_JITTER_PX: int = 15
|
|
90
|
+
|
|
91
|
+
#: 3b gap 合并/延长判别距离(px @ 1080 baseline,运行时 ×scale_w):
|
|
92
|
+
#: 收藏锚点落在同一竖线相邻两 zone 的间隙时,取锚点下方第一条横线,
|
|
93
|
+
#: 横线与下方 zone 起点距离 ≤ 本值 → 两 zone 同卡(被收藏标签行打断),合并;
|
|
94
|
+
#: > 本值 → 异卡,上方 zone 向下延长至该横线作为卡片下界。
|
|
95
|
+
GAP_HLINE_TO_NEXT_ZONE_MAX_BASELINE: int = 30
|
|
96
|
+
|
|
97
|
+
#: 3a 标签相交横线排除的纵向扩展(px @ 1080 baseline,运行时 ×scale_w):
|
|
98
|
+
#: 横线 y 落在收藏标签纵向范围上下各扩本值内、且横向与标签有交集 → 排除
|
|
99
|
+
#: (收藏标签行自身的中方差纹理会产生伪横线,不能作为卡片边界)。
|
|
100
|
+
FAV_LABEL_HLINE_EXCLUDE_PAD_BASELINE: int = 5
|
|
101
|
+
|
|
102
|
+
#: zone 端点 snap 容差(px,图像空间,**不**随 scale_w 缩放):中方差掩码经
|
|
103
|
+
#: 7×7 盒滤波产生 ±3px 边缘扩散,zone 端点可越过其真实边界横线 1~3px;
|
|
104
|
+
#: snap 时允许横线落在 zone 端点向内本值范围内,避免跳过紧贴的真边界。
|
|
105
|
+
ZONE_SNAP_BLUR_TOL_PX: int = 4
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass(frozen=True)
|
|
109
|
+
class FavoriteLabelHit:
|
|
110
|
+
"""One match of ``favorite_label.png`` on the chat screenshot.
|
|
111
|
+
|
|
112
|
+
``x, y`` are the top-left corner (OpenCV convention — same as what
|
|
113
|
+
``collector.template_matcher.Hit`` carries). ``w, h`` are the
|
|
114
|
+
(possibly scale_w-adjusted) template dimensions so callers can compute
|
|
115
|
+
``y2 = y + h`` without re-loading the template image.
|
|
116
|
+
"""
|
|
117
|
+
|
|
118
|
+
x: int
|
|
119
|
+
y: int
|
|
120
|
+
w: int
|
|
121
|
+
h: int
|
|
122
|
+
score: float
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def y1(self) -> int:
|
|
126
|
+
return self.y
|
|
127
|
+
|
|
128
|
+
@property
|
|
129
|
+
def y2(self) -> int:
|
|
130
|
+
return self.y + self.h
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass(frozen=True)
|
|
134
|
+
class ThumbnailCard:
|
|
135
|
+
"""Derived rectangle for one resume-note thumbnail.
|
|
136
|
+
|
|
137
|
+
Coordinates are in raw-image pixel space (same as the source screenshot).
|
|
138
|
+
``click_x`` / ``click_y`` is what the driver passes to ``adb shell
|
|
139
|
+
input tap``. ``click_side`` is the DD §2.4.x A ``click_context.click_side``
|
|
140
|
+
signal (which half of the screen the card lives on — historically useful
|
|
141
|
+
because WeChat sometimes shows the reply preview on the opposite side).
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
index: int # 0-based; matches the favorite_label hit ordering (top→bottom)
|
|
145
|
+
top: int
|
|
146
|
+
bottom: int
|
|
147
|
+
left: int
|
|
148
|
+
right: int
|
|
149
|
+
click_x: int
|
|
150
|
+
click_y: int
|
|
151
|
+
click_side: str # "left" or "right"
|
|
152
|
+
favorite_hit: FavoriteLabelHit
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@dataclass(frozen=True)
|
|
156
|
+
class BubbleBbox:
|
|
157
|
+
"""Derived rectangle for one text-chat bubble.
|
|
158
|
+
|
|
159
|
+
Coordinates are in raw-image pixel space. Generated from remaining
|
|
160
|
+
mid-variance zones after card zones are excluded (Step 4).
|
|
161
|
+
"""
|
|
162
|
+
|
|
163
|
+
top: int
|
|
164
|
+
bottom: int
|
|
165
|
+
left: int
|
|
166
|
+
right: int
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _compute_exact_card_and_bubble_bboxes(
|
|
170
|
+
ordered_hits: list[FavoriteLabelHit],
|
|
171
|
+
bgr_img: np.ndarray,
|
|
172
|
+
screen_w: int,
|
|
173
|
+
screen_h: int,
|
|
174
|
+
) -> tuple[list[ThumbnailCard], list[BubbleBbox]]:
|
|
175
|
+
"""PRD S6: vline-segment + hline-boundary card + bubble bbox detection.
|
|
176
|
+
|
|
177
|
+
Steps:
|
|
178
|
+
1. Convert to grayscale, compute local variance via box filter,
|
|
179
|
+
derive mid-variance mask (gray 190-248, std 6-25).
|
|
180
|
+
2. Find right-side vline candidates via vertical projection;
|
|
181
|
+
score them (total + longest run) as the per-card try order.
|
|
182
|
+
3. Segment every candidate vline into continuous runs (zones);
|
|
183
|
+
clamp to [TOP, BOT].
|
|
184
|
+
4. Find card-boundary hlines (span > 50% card_w); per vline keep
|
|
185
|
+
hlines whose endpoint is within 100px of the vline; exclude
|
|
186
|
+
hlines intersecting any 收藏 label (3a — the label row's own
|
|
187
|
+
texture creates spurious hlines).
|
|
188
|
+
5. Per fav anchor, walk candidate vlines: zone containment →
|
|
189
|
+
normal hline snap; anchor in a zone gap → 3b merge/extend via
|
|
190
|
+
the first hline below the anchor; enforce fav_cy ∈ bbox.
|
|
191
|
+
6. Remaining zones not vertically intersecting any card bbox are
|
|
192
|
+
emitted as text-bubble bboxes.
|
|
193
|
+
"""
|
|
194
|
+
gray = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2GRAY).astype(np.float32)
|
|
195
|
+
|
|
196
|
+
H, _W = gray.shape
|
|
197
|
+
TOP = int(H * 0.10)
|
|
198
|
+
BOT = int(H * 0.93)
|
|
199
|
+
AR = int(108 * screen_w / 1080)
|
|
200
|
+
card_w = screen_w - AR
|
|
201
|
+
sw = screen_w / 1080.0 # 1080-baseline scale for px constants
|
|
202
|
+
|
|
203
|
+
# Step 1 — fast local variance via box filter (O(N) via integral images)
|
|
204
|
+
win = 7
|
|
205
|
+
mean = cv2.boxFilter(gray, -1, (win, win), normalize=True)
|
|
206
|
+
sq_mean = cv2.boxFilter(gray * gray, -1, (win, win), normalize=True)
|
|
207
|
+
var_map = np.maximum(sq_mean - mean * mean, 0.0)
|
|
208
|
+
std_map = np.sqrt(var_map)
|
|
209
|
+
|
|
210
|
+
# Mid-variance mask: gray 190-248, std 6-25
|
|
211
|
+
mid = (
|
|
212
|
+
(gray >= 190.0) & (gray <= 248.0) &
|
|
213
|
+
(std_map >= 6.0) & (std_map <= 25.0)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Step 2 — right-side vline candidates
|
|
217
|
+
# PRD: per-column max *continuous-segment* length > 15% of zone height
|
|
218
|
+
# (not total pixel sum across the column).
|
|
219
|
+
vh = BOT - TOP
|
|
220
|
+
min_seg = vh * 0.15
|
|
221
|
+
|
|
222
|
+
cand_cols = []
|
|
223
|
+
for x in range(screen_w):
|
|
224
|
+
col = mid[TOP:BOT, int(x)]
|
|
225
|
+
max_seg = 0
|
|
226
|
+
cur = 0
|
|
227
|
+
for v in col:
|
|
228
|
+
if v:
|
|
229
|
+
cur += 1
|
|
230
|
+
else:
|
|
231
|
+
if cur > max_seg:
|
|
232
|
+
max_seg = cur
|
|
233
|
+
cur = 0
|
|
234
|
+
if cur > max_seg:
|
|
235
|
+
max_seg = cur
|
|
236
|
+
if max_seg > min_seg:
|
|
237
|
+
cand_cols.append(x)
|
|
238
|
+
|
|
239
|
+
# PRD: group qualifying columns by >30px gap, take cluster mean
|
|
240
|
+
vd_lines = []
|
|
241
|
+
if cand_cols:
|
|
242
|
+
cluster = [cand_cols[0]]
|
|
243
|
+
for i in range(1, len(cand_cols)):
|
|
244
|
+
if cand_cols[i] - cand_cols[i - 1] > 30:
|
|
245
|
+
vd_lines.append(int(round(sum(cluster) / len(cluster))))
|
|
246
|
+
cluster = [cand_cols[i]]
|
|
247
|
+
else:
|
|
248
|
+
cluster.append(cand_cols[i])
|
|
249
|
+
vd_lines.append(int(round(sum(cluster) / len(cluster))))
|
|
250
|
+
|
|
251
|
+
# PRD §B.2 Step 2: 竖线离发言人头像的X距离应该 >= 260 px (1080基准)
|
|
252
|
+
min_card_x_gap = int(260 * screen_w / 1080)
|
|
253
|
+
vd_lines = [vx for vx in vd_lines if vx - AR >= min_card_x_gap]
|
|
254
|
+
|
|
255
|
+
if not vd_lines:
|
|
256
|
+
return [], []
|
|
257
|
+
|
|
258
|
+
# Pick best vline: max total mid pixels + longest continuous segment
|
|
259
|
+
vline_scores: dict[int, float] = {}
|
|
260
|
+
best_vx, best_score = None, 0.0
|
|
261
|
+
for vx in vd_lines:
|
|
262
|
+
col = mid[:, vx]
|
|
263
|
+
segs = []
|
|
264
|
+
k = 0
|
|
265
|
+
while k < H:
|
|
266
|
+
if col[k]:
|
|
267
|
+
s = k
|
|
268
|
+
while k < H and col[k]:
|
|
269
|
+
k += 1
|
|
270
|
+
segs.append(k - s)
|
|
271
|
+
else:
|
|
272
|
+
k += 1
|
|
273
|
+
total = float(sum(segs))
|
|
274
|
+
longest = float(max(segs) if segs else 0)
|
|
275
|
+
score = total + longest
|
|
276
|
+
vline_scores[vx] = score
|
|
277
|
+
if score > best_score:
|
|
278
|
+
best_score = score
|
|
279
|
+
best_vx = vx
|
|
280
|
+
|
|
281
|
+
if best_vx is None:
|
|
282
|
+
return [], []
|
|
283
|
+
|
|
284
|
+
# Step 3 — vline segments, computed per candidate vline. A fav-anchored
|
|
285
|
+
# card may sit on a different right-edge vline than the frame-global best
|
|
286
|
+
# one when plain text bubbles and note cards mix in one frame (the bubble
|
|
287
|
+
# edge can out-score the card edge — 20260611 session_185429 C1 case).
|
|
288
|
+
def _zones_for(vx: int) -> list[tuple[int, int]]:
|
|
289
|
+
colv = mid[:, int(vx)]
|
|
290
|
+
segs: list[tuple[int, int]] = []
|
|
291
|
+
yy = 0
|
|
292
|
+
while yy < H:
|
|
293
|
+
if colv[yy]:
|
|
294
|
+
s = yy
|
|
295
|
+
while yy < H and colv[yy]:
|
|
296
|
+
yy += 1
|
|
297
|
+
segs.append((s, yy - 1))
|
|
298
|
+
else:
|
|
299
|
+
yy += 1
|
|
300
|
+
# Clamp to [TOP, BOT], drop tiny
|
|
301
|
+
return [
|
|
302
|
+
(max(TOP, s), min(BOT, e))
|
|
303
|
+
for s, e in segs
|
|
304
|
+
if min(BOT, e) - max(TOP, s) > 10
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
vline_order: list[int] = [int(best_vx)] + sorted(
|
|
308
|
+
(int(vx) for vx in vd_lines if vx != best_vx),
|
|
309
|
+
key=lambda vx: vline_scores[vx],
|
|
310
|
+
reverse=True,
|
|
311
|
+
)
|
|
312
|
+
zones_by_vx = {vx: _zones_for(vx) for vx in vline_order}
|
|
313
|
+
if not any(zones_by_vx.values()):
|
|
314
|
+
return [], []
|
|
315
|
+
|
|
316
|
+
# Step 4 — card boundary hlines (span > 50% card_w, X-near vline,
|
|
317
|
+
# max gray gradient >= 30 = white→gray boundary)
|
|
318
|
+
hproj = np.sum(mid[:, AR:], axis=1).astype(float)
|
|
319
|
+
above = hproj > card_w * 0.25
|
|
320
|
+
XLIMIT = int(100 * screen_w / 1080)
|
|
321
|
+
all_hd: list[tuple[int, int, int, int, int]] = [] # (hy,span,left_x,right_x,white_pct_diff)
|
|
322
|
+
i = 0
|
|
323
|
+
while i < H:
|
|
324
|
+
if above[i]:
|
|
325
|
+
j = i + 1
|
|
326
|
+
while j < H and above[j]:
|
|
327
|
+
j += 1
|
|
328
|
+
hy = (i + j - 1) // 2
|
|
329
|
+
hrow = int(hy)
|
|
330
|
+
row = mid[hrow, :]
|
|
331
|
+
seg_info: list[tuple[int, int, int]] = [] # (len, start_x, end_x)
|
|
332
|
+
k = 0
|
|
333
|
+
while k < screen_w:
|
|
334
|
+
if row[k]:
|
|
335
|
+
s = k
|
|
336
|
+
while k < screen_w and row[k]:
|
|
337
|
+
k += 1
|
|
338
|
+
seg_info.append((k - s, s, k - 1))
|
|
339
|
+
else:
|
|
340
|
+
k += 1
|
|
341
|
+
if seg_info:
|
|
342
|
+
span, left_x, right_x = max(seg_info, key=lambda t: t[0])
|
|
343
|
+
else:
|
|
344
|
+
span, left_x, right_x = 0, 0, 0
|
|
345
|
+
# Vertical white-pct change across this hline: real white→gray
|
|
346
|
+
# boundaries show a large drop in white-pixel ratio above vs
|
|
347
|
+
# below; uniform UI separators (e.g. y=206) show near-zero.
|
|
348
|
+
col = gray[:, AR:int(best_vx)]
|
|
349
|
+
H_img = col.shape[0]
|
|
350
|
+
r1 = max(0, hrow - 2)
|
|
351
|
+
r2 = min(H_img, hrow + 2)
|
|
352
|
+
above_zone = col[r1:hrow, :]
|
|
353
|
+
below_zone = col[hrow:r2, :]
|
|
354
|
+
above_white = int((above_zone > 244).sum())
|
|
355
|
+
below_white = int((below_zone > 244).sum())
|
|
356
|
+
above_pct = above_white / above_zone.size if above_zone.size else 0.0
|
|
357
|
+
below_pct = below_white / below_zone.size if below_zone.size else 0.0
|
|
358
|
+
white_pct_diff = int(abs(above_pct - below_pct) * 100)
|
|
359
|
+
all_hd.append((hy, span, left_x, right_x, white_pct_diff))
|
|
360
|
+
i = j
|
|
361
|
+
else:
|
|
362
|
+
i += 1
|
|
363
|
+
|
|
364
|
+
# 3a — exclude label-row artifact hlines: the 收藏 label row's own
|
|
365
|
+
# mid-variance texture creates spurious hlines that must never act as
|
|
366
|
+
# card boundaries. Horizontal-locality guard: only lines confined to
|
|
367
|
+
# the label's neighborhood (±1 label width) are artifacts — the card's
|
|
368
|
+
# TRUE bottom boundary also intersects the label's vertical range
|
|
369
|
+
# (the label sits at the card bottom) but spans the full card width
|
|
370
|
+
# and must stay eligible (20260611 18-session benchmark: vertical-only
|
|
371
|
+
# exclusion extended every card bottom to the next card's top).
|
|
372
|
+
fav_pad = int(round(FAV_LABEL_HLINE_EXCLUDE_PAD_BASELINE * sw))
|
|
373
|
+
|
|
374
|
+
def _hline_intersects_fav_label(
|
|
375
|
+
hy: int, left_x: int, right_x: int,
|
|
376
|
+
) -> bool:
|
|
377
|
+
for h_ in ordered_hits:
|
|
378
|
+
if (h_.y - fav_pad) <= hy <= (h_.y2 + fav_pad):
|
|
379
|
+
if (
|
|
380
|
+
left_x >= h_.x - h_.w
|
|
381
|
+
and right_x <= h_.x + 2 * h_.w
|
|
382
|
+
):
|
|
383
|
+
return True
|
|
384
|
+
return False
|
|
385
|
+
|
|
386
|
+
def _hline_endpoint_near_vline(
|
|
387
|
+
left_x: int, right_x: int, vx: int, x_limit: int,
|
|
388
|
+
) -> bool:
|
|
389
|
+
# 横线段覆盖竖线 x(±x_limit)即可:真实卡片边界横线常与全宽
|
|
390
|
+
# 分隔行(时间戳行等)融合,端点远在竖线 100px 之外但线体确实
|
|
391
|
+
# 经过竖线(20260611 191430 scr_002 基准回归);纯"端点判距"
|
|
392
|
+
# 会把这类真边界排除。远离竖线的短横线仍被本判据拒绝。
|
|
393
|
+
return (left_x - x_limit) <= int(vx) <= (right_x + x_limit)
|
|
394
|
+
|
|
395
|
+
hlines_by_vx: dict[int, list[tuple[int, int, int, int, int]]] = {
|
|
396
|
+
vx: sorted(
|
|
397
|
+
(hy, span, left_x, right_x, white_pct_diff)
|
|
398
|
+
for hy, span, left_x, right_x, white_pct_diff in all_hd
|
|
399
|
+
if TOP <= hy <= BOT and span > card_w * 0.5
|
|
400
|
+
and _hline_endpoint_near_vline(left_x, right_x, vx, XLIMIT)
|
|
401
|
+
and not _hline_intersects_fav_label(hy, left_x, right_x)
|
|
402
|
+
)
|
|
403
|
+
for vx in vline_order
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
# Step 5 — match vline zones to fav hits, snap to hlines. Per hit, walk
|
|
407
|
+
# candidate vlines (best-scored first) and accept the first bbox that
|
|
408
|
+
# actually contains the fav anchor: the 收藏 label lives INSIDE its card,
|
|
409
|
+
# so a bbox excluding it is a mis-matched zone, not this card.
|
|
410
|
+
gap_tol = max(1, int(round(GAP_HLINE_TO_NEXT_ZONE_MAX_BASELINE * sw)))
|
|
411
|
+
cards: list[ThumbnailCard] = []
|
|
412
|
+
prev_bottom: int | None = None # anti-overlap: enforce card boundaries non-overlapping
|
|
413
|
+
for idx, hit in enumerate(ordered_hits):
|
|
414
|
+
fav_cy = hit.y + hit.h // 2
|
|
415
|
+
|
|
416
|
+
chosen: tuple[int, int, int] | None = None # (vx, exact_y1, exact_y2)
|
|
417
|
+
for vx in vline_order:
|
|
418
|
+
zones = zones_by_vx[vx]
|
|
419
|
+
if not zones:
|
|
420
|
+
continue
|
|
421
|
+
card_hlines = hlines_by_vx[vx]
|
|
422
|
+
|
|
423
|
+
def _snap(y1_: int, y2_: int) -> tuple[int, int]:
|
|
424
|
+
# 盒滤波边缘扩散:zone 端点可越过真实边界横线 1~3px,
|
|
425
|
+
# 允许横线落在端点向内 ZONE_SNAP_BLUR_TOL_PX 内。
|
|
426
|
+
# Upper: "向上最近的白底-灰底的中方差横线(要求另一端在发言人
|
|
427
|
+
# 头像100 px附近)或者 TOP_BAR_BOT"
|
|
428
|
+
upper = [h for h in card_hlines
|
|
429
|
+
if TOP <= h[0] < y1_ + ZONE_SNAP_BLUR_TOL_PX
|
|
430
|
+
and h[2] < AR + XLIMIT]
|
|
431
|
+
ey1 = max(upper, key=lambda h: h[0])[0] if upper else TOP
|
|
432
|
+
# Lower: "向下最近的白底-灰底的中方差横线 或者 BOT_BAR_TOP"
|
|
433
|
+
lower = [h for h in card_hlines
|
|
434
|
+
if y2_ - ZONE_SNAP_BLUR_TOL_PX < h[0] <= BOT]
|
|
435
|
+
ey2 = min(lower, key=lambda h: h[0])[0] if lower else BOT
|
|
436
|
+
return ey1, ey2
|
|
437
|
+
|
|
438
|
+
# 锚点直接落在某个 zone 内 → 正常 snap
|
|
439
|
+
fav_zone_idx = None
|
|
440
|
+
for zi, (z1, z2) in enumerate(zones):
|
|
441
|
+
if z1 <= fav_cy <= z2:
|
|
442
|
+
fav_zone_idx = zi
|
|
443
|
+
break
|
|
444
|
+
|
|
445
|
+
if fav_zone_idx is not None:
|
|
446
|
+
exact_y1, exact_y2 = _snap(
|
|
447
|
+
zones[fav_zone_idx][0], zones[fav_zone_idx][1],
|
|
448
|
+
)
|
|
449
|
+
else:
|
|
450
|
+
# 锚点落在相邻两 zone 的间隙(收藏标签行可能打断竖线)→ 3b
|
|
451
|
+
gap_idx = None
|
|
452
|
+
for zi in range(len(zones) - 1):
|
|
453
|
+
if zones[zi][1] < fav_cy < zones[zi + 1][0]:
|
|
454
|
+
gap_idx = zi
|
|
455
|
+
break
|
|
456
|
+
if gap_idx is None:
|
|
457
|
+
# 锚点在所有 zone 上方或下方 → 该竖线与这张卡无关
|
|
458
|
+
continue
|
|
459
|
+
|
|
460
|
+
upper_zone = zones[gap_idx]
|
|
461
|
+
next_zone = zones[gap_idx + 1]
|
|
462
|
+
# 3b 仅处理"收藏标签行打断竖线"的窄缝(标签高 + 2×判别
|
|
463
|
+
# 容差)。宽缝意味着该竖线在锚点处根本没有边缘纹理(多为
|
|
464
|
+
# 其它气泡的右缘竖线),借横线构造的 bbox 会假性包含锚点
|
|
465
|
+
# (20260611 185429 scr_003/008/013 基准回归),跳过该竖线。
|
|
466
|
+
if (next_zone[0] - upper_zone[1]) > hit.h + 2 * gap_tol:
|
|
467
|
+
continue
|
|
468
|
+
# 3b 第一步:从锚点中心向下找第一条(已 3a 过滤的)横线。
|
|
469
|
+
# 含等于(>=):锚点恰好压在本卡底边界横线上时,该边界
|
|
470
|
+
# 必须参与判别,否则会越过它误触发情况 A 跨卡合并。
|
|
471
|
+
below = [h for h in card_hlines if h[0] >= fav_cy]
|
|
472
|
+
first_hline_y = min(below, key=lambda h: h[0])[0] if below else None
|
|
473
|
+
|
|
474
|
+
if first_hline_y is None:
|
|
475
|
+
# 缺省策略:沿用旧规则只取上方 zone,正常 snap
|
|
476
|
+
exact_y1, exact_y2 = _snap(upper_zone[0], upper_zone[1])
|
|
477
|
+
elif abs(first_hline_y - next_zone[0]) <= gap_tol:
|
|
478
|
+
# 情况 A:横线靠近下方 zone 起点 → 同一张卡被收藏标签行
|
|
479
|
+
# 打断,合并两 zone 后重新 snap
|
|
480
|
+
exact_y1, exact_y2 = _snap(upper_zone[0], next_zone[1])
|
|
481
|
+
else:
|
|
482
|
+
# 情况 B:横线远离下方 zone 起点 → 异卡;上方 zone 向下
|
|
483
|
+
# 延长,以该横线作为卡片下界(上界仍正常 snap)
|
|
484
|
+
ey1, _ = _snap(upper_zone[0], upper_zone[1])
|
|
485
|
+
exact_y1, exact_y2 = ey1, int(first_hline_y)
|
|
486
|
+
|
|
487
|
+
if exact_y2 <= exact_y1 or exact_y2 - exact_y1 < 80:
|
|
488
|
+
continue
|
|
489
|
+
if not (exact_y1 <= fav_cy <= exact_y2):
|
|
490
|
+
continue
|
|
491
|
+
chosen = (vx, exact_y1, exact_y2)
|
|
492
|
+
break
|
|
493
|
+
|
|
494
|
+
if chosen is None:
|
|
495
|
+
continue
|
|
496
|
+
card_vx, exact_y1, exact_y2 = chosen
|
|
497
|
+
|
|
498
|
+
# Anti-overlap: ensure sequence of cards is strictly non-overlapping
|
|
499
|
+
# (essential when gap-matching + hline snapping pushes boundaries inward)
|
|
500
|
+
if prev_bottom is not None and exact_y1 <= prev_bottom:
|
|
501
|
+
exact_y1 = prev_bottom + 1
|
|
502
|
+
if exact_y2 <= exact_y1 or exact_y2 - exact_y1 < 80:
|
|
503
|
+
continue
|
|
504
|
+
|
|
505
|
+
click_x = min(screen_w - 1, max(0, hit.x + hit.w // 2))
|
|
506
|
+
click_y = min(screen_h - 1, max(0, hit.y + hit.h // 2))
|
|
507
|
+
midline = screen_w / 2.0
|
|
508
|
+
click_side = "left" if click_x < midline else "right"
|
|
509
|
+
|
|
510
|
+
cards.append(ThumbnailCard(
|
|
511
|
+
index=idx,
|
|
512
|
+
top=exact_y1,
|
|
513
|
+
bottom=exact_y2,
|
|
514
|
+
left=AR,
|
|
515
|
+
right=card_vx,
|
|
516
|
+
click_x=click_x,
|
|
517
|
+
click_y=click_y,
|
|
518
|
+
click_side=click_side,
|
|
519
|
+
favorite_hit=hit,
|
|
520
|
+
))
|
|
521
|
+
prev_bottom = exact_y2
|
|
522
|
+
|
|
523
|
+
# Step 6 — remaining zones → text-bubble bboxes. 对每条竖线的每个 zone:
|
|
524
|
+
# 若与任何卡片包围盒纵向相交则放弃该 zone;否则该 zone 属于文本聊天气泡。
|
|
525
|
+
card_intervals = [(c.top, c.bottom) for c in cards]
|
|
526
|
+
|
|
527
|
+
def _intersects_any_card(z1: int, z2: int) -> bool:
|
|
528
|
+
return any(z2 >= ct and z1 <= cb for ct, cb in card_intervals)
|
|
529
|
+
|
|
530
|
+
bubbles: list[BubbleBbox] = []
|
|
531
|
+
for vx in sorted(zones_by_vx):
|
|
532
|
+
zones = zones_by_vx[vx]
|
|
533
|
+
if not zones:
|
|
534
|
+
continue
|
|
535
|
+
bubble_hlines = hlines_by_vx[vx]
|
|
536
|
+
last_zi = len(zones) - 1
|
|
537
|
+
for zi, (z1, z2) in enumerate(zones):
|
|
538
|
+
if _intersects_any_card(z1, z2):
|
|
539
|
+
continue
|
|
540
|
+
# 上端向上找最近横线(要求另一端在头像附近 100px 内);
|
|
541
|
+
# 仅第一个 zone 允许回退到 TOP_BAR_BOT,中间 zone 匹配不到
|
|
542
|
+
# 横线时保留原始 zone 边界
|
|
543
|
+
upper = [h for h in bubble_hlines
|
|
544
|
+
if TOP <= h[0] < z1 + ZONE_SNAP_BLUR_TOL_PX
|
|
545
|
+
and h[2] < AR + XLIMIT]
|
|
546
|
+
if upper:
|
|
547
|
+
by1 = max(upper, key=lambda h: h[0])[0]
|
|
548
|
+
else:
|
|
549
|
+
by1 = TOP if zi == 0 else z1
|
|
550
|
+
# 下端向下找最近横线;仅最后一个 zone 允许回退到 BOT_BAR_TOP
|
|
551
|
+
lower = [h for h in bubble_hlines
|
|
552
|
+
if z2 - ZONE_SNAP_BLUR_TOL_PX < h[0] <= BOT]
|
|
553
|
+
if lower:
|
|
554
|
+
by2 = min(lower, key=lambda h: h[0])[0]
|
|
555
|
+
else:
|
|
556
|
+
by2 = BOT if zi == last_zi else z2
|
|
557
|
+
if by2 - by1 < 80:
|
|
558
|
+
continue
|
|
559
|
+
bubbles.append(BubbleBbox(top=by1, bottom=by2, left=AR, right=int(vx)))
|
|
560
|
+
|
|
561
|
+
return cards, bubbles
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _compute_exact_card_bboxes(
|
|
565
|
+
ordered_hits: list[FavoriteLabelHit],
|
|
566
|
+
bgr_img: np.ndarray,
|
|
567
|
+
screen_w: int,
|
|
568
|
+
screen_h: int,
|
|
569
|
+
) -> list[ThumbnailCard]:
|
|
570
|
+
"""Backward-compat wrapper — cards only (see
|
|
571
|
+
:func:`_compute_exact_card_and_bubble_bboxes`)."""
|
|
572
|
+
cards, _bubbles = _compute_exact_card_and_bubble_bboxes(
|
|
573
|
+
ordered_hits, bgr_img, screen_w, screen_h,
|
|
574
|
+
)
|
|
575
|
+
return cards
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def derive_cards_and_bubbles(
|
|
579
|
+
hits: Sequence[FavoriteLabelHit],
|
|
580
|
+
*,
|
|
581
|
+
screen_w: int,
|
|
582
|
+
screen_h: int,
|
|
583
|
+
scale_w: float = 1.0,
|
|
584
|
+
chat_img: np.ndarray,
|
|
585
|
+
) -> tuple[list[ThumbnailCard], list[BubbleBbox]]:
|
|
586
|
+
"""Like :func:`derive_cards` but also returns text-bubble bboxes
|
|
587
|
+
(PRD §9(1) Step 4 — remaining vline zones not occupied by cards).
|
|
588
|
+
|
|
589
|
+
``chat_img`` is mandatory: bubble derivation requires the
|
|
590
|
+
variance-texture pipeline (no legacy fixed-offset fallback).
|
|
591
|
+
Bubbles can exist on frames with zero 收藏 hits as well, but this
|
|
592
|
+
function mirrors :func:`derive_cards` and returns ``([], [])`` for an
|
|
593
|
+
empty ``hits`` sequence (callers without hits don't run the pipeline).
|
|
594
|
+
"""
|
|
595
|
+
if not hits:
|
|
596
|
+
return [], []
|
|
597
|
+
ordered = sorted(hits, key=lambda h: h.y)
|
|
598
|
+
try:
|
|
599
|
+
cards, bubbles = _compute_exact_card_and_bubble_bboxes(
|
|
600
|
+
ordered, chat_img, screen_w, screen_h,
|
|
601
|
+
)
|
|
602
|
+
except Exception:
|
|
603
|
+
return [], []
|
|
604
|
+
return (
|
|
605
|
+
drop_top_clamped_false_positive_cards(cards, scale_w=scale_w),
|
|
606
|
+
bubbles,
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def bubble_to_metadata_list(bubble: BubbleBbox) -> list[int]:
|
|
611
|
+
"""Convert a :class:`BubbleBbox` to schema shape ``[x1, y1, x2, y2]``."""
|
|
612
|
+
return [bubble.left, bubble.top, bubble.right, bubble.bottom]
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def derive_cards(
|
|
616
|
+
hits: Sequence[FavoriteLabelHit],
|
|
617
|
+
*,
|
|
618
|
+
screen_w: int,
|
|
619
|
+
screen_h: int,
|
|
620
|
+
scale_w: float = 1.0,
|
|
621
|
+
chat_img: Optional[np.ndarray] = None,
|
|
622
|
+
) -> list[ThumbnailCard]:
|
|
623
|
+
"""Translate favorite_label hits into click-ready :class:`ThumbnailCard`.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
hits: favorite_label.png hits on the chat screenshot, in any order
|
|
627
|
+
(we sort by y ourselves).
|
|
628
|
+
screen_w / screen_h: the source screenshot's raw-image dimensions
|
|
629
|
+
(usually equal to ``device_info.screen_resolution``).
|
|
630
|
+
scale_w: ``screen_w / baseline_w`` (baseline = 1080). Used to scale
|
|
631
|
+
``FAVORITE_TO_CARD_TOP_OFFSET`` + ``FAVORITE_TAIL_OFFSET``
|
|
632
|
+
so the logic survives across the 720 / 1080 / 1220 device
|
|
633
|
+
whitelist.
|
|
634
|
+
chat_img: optional BGR image (as returned by ``cv2.imdecode``).
|
|
635
|
+
When provided, uses the vline-segment + hline-boundary
|
|
636
|
+
variance-based algorithm (PRD S6) for precise card bbox
|
|
637
|
+
derivation. Falls back to legacy fixed-offset logic
|
|
638
|
+
when ``chat_img`` is None.
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
One :class:`ThumbnailCard` per hit, sorted top -> bottom. Empty list
|
|
642
|
+
if ``hits`` is empty (caller should then decide whether to scroll
|
|
643
|
+
further or end the session).
|
|
644
|
+
"""
|
|
645
|
+
if not hits:
|
|
646
|
+
return []
|
|
647
|
+
|
|
648
|
+
ordered = sorted(hits, key=lambda h: h.y)
|
|
649
|
+
|
|
650
|
+
# New algorithm: vline-segment + hline-boundary (PRD S6). When the
|
|
651
|
+
# pipeline ran successfully its verdict is final — an anchor it dropped
|
|
652
|
+
# (no containing zone → "点对卡、记错框" guard) must NOT be resurrected
|
|
653
|
+
# by the legacy fixed-offset fallback. Legacy only serves chat_img=None
|
|
654
|
+
# callers and pipeline exceptions.
|
|
655
|
+
if chat_img is not None and len(ordered) > 0:
|
|
656
|
+
try:
|
|
657
|
+
exact_cards = _compute_exact_card_bboxes(
|
|
658
|
+
ordered, chat_img, screen_w, screen_h,
|
|
659
|
+
)
|
|
660
|
+
except Exception:
|
|
661
|
+
pass
|
|
662
|
+
else:
|
|
663
|
+
return drop_top_clamped_false_positive_cards(
|
|
664
|
+
exact_cards, scale_w=scale_w,
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
# Legacy: fixed-offset algorithm
|
|
668
|
+
top_offset = int(round(FAVORITE_TO_CARD_TOP_OFFSET_BASELINE * scale_w))
|
|
669
|
+
tail_offset = int(round(FAVORITE_TAIL_OFFSET_BASELINE * scale_w))
|
|
670
|
+
|
|
671
|
+
cards: list[ThumbnailCard] = []
|
|
672
|
+
for idx, hit in enumerate(ordered):
|
|
673
|
+
if idx == 0:
|
|
674
|
+
card_top = max(0, hit.y1 - top_offset)
|
|
675
|
+
else:
|
|
676
|
+
prev_bottom = ordered[idx - 1].y2 + tail_offset
|
|
677
|
+
card_top = max(prev_bottom + 1, 0)
|
|
678
|
+
|
|
679
|
+
card_bottom = min(screen_h - 1, hit.y2 + tail_offset)
|
|
680
|
+
|
|
681
|
+
if card_bottom <= card_top:
|
|
682
|
+
continue
|
|
683
|
+
|
|
684
|
+
click_x = min(screen_w - 1, max(0, hit.x + hit.w // 2))
|
|
685
|
+
click_y = min(screen_h - 1, max(0, hit.y + hit.h // 2))
|
|
686
|
+
midline = screen_w / 2
|
|
687
|
+
click_side = "left" if click_x < midline else "right"
|
|
688
|
+
|
|
689
|
+
cards.append(ThumbnailCard(
|
|
690
|
+
index=idx,
|
|
691
|
+
top=card_top,
|
|
692
|
+
bottom=card_bottom,
|
|
693
|
+
left=0,
|
|
694
|
+
right=screen_w - 1,
|
|
695
|
+
click_x=click_x,
|
|
696
|
+
click_y=click_y,
|
|
697
|
+
click_side=click_side,
|
|
698
|
+
favorite_hit=hit,
|
|
699
|
+
))
|
|
700
|
+
|
|
701
|
+
return drop_top_clamped_false_positive_cards(cards, scale_w=scale_w)
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def drop_top_clamped_false_positive_cards(
|
|
705
|
+
cards: list[ThumbnailCard],
|
|
706
|
+
*,
|
|
707
|
+
scale_w: float,
|
|
708
|
+
) -> list[ThumbnailCard]:
|
|
709
|
+
"""Drop notebook strips whose ``top`` clamped to 0 when the gap to the
|
|
710
|
+
next derived card top matches the empirical **pseudo-row** signature on
|
|
711
|
+
edb1a89f — see :data:`REFERENCE_RESUME_CARD_TOP_GAP_BASELINE`.
|
|
712
|
+
|
|
713
|
+
Condition (scaled): let ``Δ = cards[k+1].top - cards[k].top``.
|
|
714
|
+
Discard ``cards[k]`` when ``cards[k].top == 0`` and
|
|
715
|
+
``Δ < round(base_gap * scale_w) - round(label_h * scale_w)`` — i.e.
|
|
716
|
+
``Δ - H_fixed < H_收藏`` in device px.
|
|
717
|
+
"""
|
|
718
|
+
if len(cards) <= 1:
|
|
719
|
+
return cards
|
|
720
|
+
|
|
721
|
+
ref_gap = max(1, int(round(REFERENCE_RESUME_CARD_TOP_GAP_BASELINE * scale_w)))
|
|
722
|
+
label_scaled = max(1, int(round(FAVORITE_LABEL_TEMPLATE_H_BASELINE * scale_w)))
|
|
723
|
+
min_step = ref_gap - label_scaled
|
|
724
|
+
|
|
725
|
+
kept: list[ThumbnailCard] = []
|
|
726
|
+
i = 0
|
|
727
|
+
while i < len(cards):
|
|
728
|
+
card = cards[i]
|
|
729
|
+
gap_to_next_card_top = (
|
|
730
|
+
cards[i + 1].top - card.top if i + 1 < len(cards) else None
|
|
731
|
+
)
|
|
732
|
+
reject = False
|
|
733
|
+
if card.top == 0 and gap_to_next_card_top is not None:
|
|
734
|
+
dy = gap_to_next_card_top
|
|
735
|
+
if dy < min_step:
|
|
736
|
+
reject = True
|
|
737
|
+
|
|
738
|
+
if not reject:
|
|
739
|
+
kept.append(card)
|
|
740
|
+
|
|
741
|
+
i += 1
|
|
742
|
+
|
|
743
|
+
return [
|
|
744
|
+
ThumbnailCard(
|
|
745
|
+
index=j,
|
|
746
|
+
top=c.top,
|
|
747
|
+
bottom=c.bottom,
|
|
748
|
+
left=c.left,
|
|
749
|
+
right=c.right,
|
|
750
|
+
click_x=c.click_x,
|
|
751
|
+
click_y=c.click_y,
|
|
752
|
+
click_side=c.click_side,
|
|
753
|
+
favorite_hit=c.favorite_hit,
|
|
754
|
+
)
|
|
755
|
+
for j, c in enumerate(kept)
|
|
756
|
+
]
|
|
757
|
+
|
|
758
|
+
|
|
759
|
+
def bbox_to_metadata_list(card: ThumbnailCard) -> list[int]:
|
|
760
|
+
"""Convert a :class:`ThumbnailCard` bbox to the schema's
|
|
761
|
+
``resume_thumb_bboxes`` element shape ``[x1, y1, x2, y2]``.
|
|
762
|
+
|
|
763
|
+
The schema declares ``resume_thumb_bboxes: Optional[list[list[int]]]``
|
|
764
|
+
per screenshot — one list-of-4 entry per thumbnail the collector
|
|
765
|
+
identified on that screen.
|
|
766
|
+
"""
|
|
767
|
+
return [card.left, card.top, card.right, card.bottom]
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def click_context_for_tap_thumbnail(card: ThumbnailCard) -> dict:
|
|
771
|
+
"""Build the ``click_context`` dict for a group-A ``tap_thumbnail`` frame.
|
|
772
|
+
|
|
773
|
+
Schema requirement (gate 1e/4):
|
|
774
|
+
group A action → ``click_coords`` + ``click_side`` MUST be populated
|
|
775
|
+
AND ``click_position`` + ``divider_verified`` MUST be null.
|
|
776
|
+
"""
|
|
777
|
+
return {
|
|
778
|
+
"click_coords": [card.click_x, card.click_y],
|
|
779
|
+
"click_side": card.click_side,
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def card_bounding_tuple(card: ThumbnailCard) -> tuple[int, int, int, int]:
|
|
784
|
+
"""Inclusive pixel bbox ``(left, top, right, bottom)`` aligned with
|
|
785
|
+
``bbox_to_metadata_list`` / gate-1d thumbnail rectangles."""
|
|
786
|
+
return (card.left, card.top, card.right, card.bottom)
|
|
787
|
+
|
|
788
|
+
|
|
789
|
+
def y_interval_overlap_ratio(
|
|
790
|
+
top_a: int,
|
|
791
|
+
bottom_a: int,
|
|
792
|
+
top_b: int,
|
|
793
|
+
bottom_b: int,
|
|
794
|
+
) -> float:
|
|
795
|
+
"""1D overlap of inclusive row intervals `[top_* , bottom_*]`.
|
|
796
|
+
|
|
797
|
+
Returns ``intersection_px / min(h_a, h_b)`` with
|
|
798
|
+
``h = bottom - top + 1``, or ``0.0`` if disjoint."""
|
|
799
|
+
iy1 = max(top_a, top_b)
|
|
800
|
+
iy2 = min(bottom_a, bottom_b)
|
|
801
|
+
if iy2 < iy1:
|
|
802
|
+
return 0.0
|
|
803
|
+
inter = iy2 - iy1 + 1
|
|
804
|
+
ha = bottom_a - top_a + 1
|
|
805
|
+
hb = bottom_b - top_b + 1
|
|
806
|
+
denom = min(ha, hb)
|
|
807
|
+
return inter / denom if denom > 0 else 0.0
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def card_overlaps_processed(
|
|
811
|
+
card: ThumbnailCard,
|
|
812
|
+
processed_bboxes: Iterable[tuple[int, int, int, int]],
|
|
813
|
+
*,
|
|
814
|
+
min_overlap_ratio: float = PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO,
|
|
815
|
+
) -> bool:
|
|
816
|
+
"""True if ``card``'s **vertical span** overlaps any processed card enough.
|
|
817
|
+
|
|
818
|
+
Compared per pair: ``intersection_rows / min(height_new, height_old)
|
|
819
|
+
>= min_overlap_ratio`` where heights use inclusive ``card.top /
|
|
820
|
+
card.bottom``. ``processed_bboxes`` tuples are ``(left,top,right,bottom)``
|
|
821
|
+
from :func:`card_bounding_tuple` — only ``top,bottom`` are read.
|
|
822
|
+
"""
|
|
823
|
+
for prev in processed_bboxes:
|
|
824
|
+
_l, pt, _r, pb = prev
|
|
825
|
+
r = y_interval_overlap_ratio(card.top, card.bottom, pt, pb)
|
|
826
|
+
if r >= min_overlap_ratio:
|
|
827
|
+
return True
|
|
828
|
+
return False
|
|
829
|
+
|
|
830
|
+
|
|
831
|
+
def pick_first_unprocessed_card(
|
|
832
|
+
cards: Iterable[ThumbnailCard],
|
|
833
|
+
processed_card_bboxes: Sequence[tuple[int, int, int, int]] | None = None,
|
|
834
|
+
) -> ThumbnailCard | None:
|
|
835
|
+
"""Return the first card that does **not** sufficiently overlap any
|
|
836
|
+
already-processed card in the current session (**Y-span only**, see below).
|
|
837
|
+
|
|
838
|
+
Overlap rule: for inclusive row intervals ``[top,bottom]`` vs each stored
|
|
839
|
+
processed bbox, compute
|
|
840
|
+
``intersection_px / min(h_new,h_old)``; if ``>=``
|
|
841
|
+
:data:`PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO` (default **0.70**) → same
|
|
842
|
+
logical card → skip. Stored tuples remain ``(left,top,right,bottom)``
|
|
843
|
+
from :func:`card_bounding_tuple`; only ``top``/``bottom`` participate.
|
|
844
|
+
|
|
845
|
+
Used between ``chat_content_scroll_down`` / ``chat_resume_rescan`` frames so a
|
|
846
|
+
notebook card that reappears with a **slightly shifted** ``favorite_label``
|
|
847
|
+
match (integer ``(x,y)`` off by a few pixels) is still treated as the same
|
|
848
|
+
card and not double-tapped.
|
|
849
|
+
|
|
850
|
+
``processed_card_bboxes`` is the list accumulated from prior
|
|
851
|
+
:func:`card_bounding_tuple` values for cards that entered ``tap_thumbnail``
|
|
852
|
+
(see ``run_collector_minimal``).
|
|
853
|
+
|
|
854
|
+
NOTE: does not model new messages inserted above the card; P0 accepts
|
|
855
|
+
that rare edge.
|
|
856
|
+
"""
|
|
857
|
+
seen: list[tuple[int, int, int, int]] = (
|
|
858
|
+
list(processed_card_bboxes) if processed_card_bboxes else []
|
|
859
|
+
)
|
|
860
|
+
for card in cards:
|
|
861
|
+
if not card_overlaps_processed(card, seen):
|
|
862
|
+
return card
|
|
863
|
+
return None
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
@dataclass
|
|
867
|
+
class TrackedCard:
|
|
868
|
+
"""One tracked card in the height-sequence dedup state (array A)."""
|
|
869
|
+
|
|
870
|
+
height: int # bbox height (bottom - top + 1)
|
|
871
|
+
clicked: bool # whether this card has been entered
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def _card_height(card: ThumbnailCard) -> int:
|
|
875
|
+
"""Pixel-height of a card bbox (inclusive row count)."""
|
|
876
|
+
return card.bottom - card.top + 1
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
def _longest_subsequence_match(
|
|
880
|
+
a_heights: list[int],
|
|
881
|
+
b_heights: list[int],
|
|
882
|
+
) -> tuple[int, int, int] | None:
|
|
883
|
+
"""Find the longest contiguous substring of a_heights that appears in b_heights.
|
|
884
|
+
|
|
885
|
+
Returns ``(a_start, b_start, length)`` or ``None``.
|
|
886
|
+
If multiple matches share the same maximum length, picks the one with
|
|
887
|
+
the smallest ``b_start`` (earliest in B).
|
|
888
|
+
"""
|
|
889
|
+
if not a_heights or not b_heights:
|
|
890
|
+
return None
|
|
891
|
+
|
|
892
|
+
best: tuple[int, int, int] | None = None # (a_start, b_start, length)
|
|
893
|
+
|
|
894
|
+
for a_start in range(len(a_heights)):
|
|
895
|
+
max_len = len(a_heights) - a_start
|
|
896
|
+
for length in range(max_len, 0, -1):
|
|
897
|
+
if best is not None and length < best[2]:
|
|
898
|
+
break
|
|
899
|
+
sub = a_heights[a_start:a_start + length]
|
|
900
|
+
for b_start in range(len(b_heights) - length + 1):
|
|
901
|
+
if b_heights[b_start:b_start + length] == sub:
|
|
902
|
+
if (
|
|
903
|
+
best is None
|
|
904
|
+
or length > best[2]
|
|
905
|
+
or (length == best[2] and b_start < best[1])
|
|
906
|
+
):
|
|
907
|
+
best = (a_start, b_start, length)
|
|
908
|
+
|
|
909
|
+
return best
|
|
910
|
+
|
|
911
|
+
|
|
912
|
+
def pick_next_unclicked_card(
|
|
913
|
+
cards: list[ThumbnailCard],
|
|
914
|
+
tracked_cards: list[TrackedCard],
|
|
915
|
+
) -> ThumbnailCard | None:
|
|
916
|
+
"""Return the next card to enter based on height-sequence dedup.
|
|
917
|
+
|
|
918
|
+
Algorithm (PRD S6):
|
|
919
|
+
1. If ``tracked_cards`` (A) is empty -> build A from current ``cards`` (B),
|
|
920
|
+
return ``cards[0]``.
|
|
921
|
+
2. Find the longest contiguous substring of A-heights in B-heights.
|
|
922
|
+
If multiple, pick the one with smallest B start index.
|
|
923
|
+
3. Rebuild A: keep matched cards (inheriting ``clicked``), add new
|
|
924
|
+
B cards before/after the match segment (unclicked), drop unmatched A.
|
|
925
|
+
4. Collect candidates (B indices where ``tracked_cards[i].clicked`` is
|
|
926
|
+
``False``), return ``cards[min(candidates)]`` or ``None``.
|
|
927
|
+
|
|
928
|
+
``tracked_cards`` is mutated in-place on every call - the caller must
|
|
929
|
+
clear it on ``chat_content_scroll_up`` / ``chat_content_scroll_down``
|
|
930
|
+
to reset the tracking window.
|
|
931
|
+
"""
|
|
932
|
+
cur_heights = [_card_height(c) for c in cards]
|
|
933
|
+
|
|
934
|
+
if not tracked_cards:
|
|
935
|
+
tracked_cards[:] = [TrackedCard(height=h, clicked=False) for h in cur_heights]
|
|
936
|
+
return cards[0]
|
|
937
|
+
|
|
938
|
+
a_heights = [tc.height for tc in tracked_cards]
|
|
939
|
+
match = _longest_subsequence_match(a_heights, cur_heights)
|
|
940
|
+
|
|
941
|
+
if match is None:
|
|
942
|
+
tracked_cards[:] = [TrackedCard(height=h, clicked=False) for h in cur_heights]
|
|
943
|
+
candidates = [i for i, tc in enumerate(tracked_cards) if not tc.clicked]
|
|
944
|
+
if not candidates:
|
|
945
|
+
return None
|
|
946
|
+
return cards[min(candidates)]
|
|
947
|
+
|
|
948
|
+
a_start, b_start, length = match
|
|
949
|
+
|
|
950
|
+
new_tracked: list[TrackedCard] = []
|
|
951
|
+
|
|
952
|
+
for i in range(b_start):
|
|
953
|
+
new_tracked.append(TrackedCard(height=cur_heights[i], clicked=False))
|
|
954
|
+
|
|
955
|
+
for j in range(length):
|
|
956
|
+
a_idx = a_start + j
|
|
957
|
+
new_tracked.append(TrackedCard(
|
|
958
|
+
height=tracked_cards[a_idx].height,
|
|
959
|
+
clicked=tracked_cards[a_idx].clicked,
|
|
960
|
+
))
|
|
961
|
+
|
|
962
|
+
for i in range(b_start + length, len(cards)):
|
|
963
|
+
new_tracked.append(TrackedCard(height=cur_heights[i], clicked=False))
|
|
964
|
+
|
|
965
|
+
tracked_cards[:] = new_tracked
|
|
966
|
+
|
|
967
|
+
candidates = [i for i, tc in enumerate(tracked_cards) if not tc.clicked]
|
|
968
|
+
if not candidates:
|
|
969
|
+
return None
|
|
970
|
+
return cards[min(candidates)]
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
__all__ = [
|
|
974
|
+
"FAVORITE_LABEL_TEMPLATE_W_BASELINE",
|
|
975
|
+
"FAVORITE_LABEL_TEMPLATE_H_BASELINE",
|
|
976
|
+
"FAVORITE_TO_CARD_TOP_OFFSET_BASELINE",
|
|
977
|
+
"FAVORITE_TAIL_OFFSET_BASELINE",
|
|
978
|
+
"FAV_LABEL_HLINE_EXCLUDE_PAD_BASELINE",
|
|
979
|
+
"GAP_HLINE_TO_NEXT_ZONE_MAX_BASELINE",
|
|
980
|
+
"PROCESSED_CARD_MATCH_MIN_OVERLAP_RATIO",
|
|
981
|
+
"REFERENCE_RESUME_CARD_TOP_GAP_BASELINE",
|
|
982
|
+
"TAP_ANTI_DETECT_X_JITTER_PX",
|
|
983
|
+
"FavoriteLabelHit",
|
|
984
|
+
"ThumbnailCard",
|
|
985
|
+
"TrackedCard",
|
|
986
|
+
"BubbleBbox",
|
|
987
|
+
"derive_cards",
|
|
988
|
+
"derive_cards_and_bubbles",
|
|
989
|
+
"drop_top_clamped_false_positive_cards",
|
|
990
|
+
"bbox_to_metadata_list",
|
|
991
|
+
"bubble_to_metadata_list",
|
|
992
|
+
"card_bounding_tuple",
|
|
993
|
+
"card_overlaps_processed",
|
|
994
|
+
"click_context_for_tap_thumbnail",
|
|
995
|
+
"pick_first_unprocessed_card",
|
|
996
|
+
"pick_next_unclicked_card",
|
|
997
|
+
"y_interval_overlap_ratio",
|
|
998
|
+
"_compute_exact_card_bboxes",
|
|
999
|
+
"_compute_exact_card_and_bubble_bboxes",
|
|
1000
|
+
]
|