screenshot-vision-algorithm 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- screenshot_vision_algorithm/__init__.py +48 -0
- screenshot_vision_algorithm/_config.py +61 -0
- screenshot_vision_algorithm/android/__init__.py +1 -0
- screenshot_vision_algorithm/android/wechat/__init__.py +1 -0
- screenshot_vision_algorithm/android/wechat/algorithms/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/algorithms/avatar_column.py +209 -0
- screenshot_vision_algorithm/android/wechat/algorithms/badge_detection.py +275 -0
- screenshot_vision_algorithm/android/wechat/algorithms/card_bbox.py +1000 -0
- screenshot_vision_algorithm/android/wechat/algorithms/phash_utils.py +267 -0
- screenshot_vision_algorithm/android/wechat/algorithms/speaker_band.py +290 -0
- screenshot_vision_algorithm/android/wechat/algorithms/template_matching.py +2163 -0
- screenshot_vision_algorithm/android/wechat/algorithms/title_ocr.py +143 -0
- screenshot_vision_algorithm/android/wechat/merge/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/merge/multipage.py +157 -0
- screenshot_vision_algorithm/android/wechat/ocr/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/ocr/avatar_guard.py +434 -0
- screenshot_vision_algorithm/android/wechat/ocr/badge_ocr.py +232 -0
- screenshot_vision_algorithm/android/wechat/ocr/nickname_binding.py +1888 -0
- screenshot_vision_algorithm/android/wechat/ocr/text_ocr_adapter.py +625 -0
- screenshot_vision_algorithm/android/wechat/profiles/__init__.py +0 -0
- screenshot_vision_algorithm/android/wechat/profiles/android.py +53 -0
- screenshot_vision_algorithm/android/wechat/profiles/harmony.py +10 -0
- screenshot_vision_algorithm/android/wechat/profiles/ios.py +53 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_back_chevron.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_emoji_smile.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_plus.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_input_voice.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/chat_title_more_dots.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/favorite_label.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/new_messages_hint_suffix.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/unread_divider_hint_v2_textonly.png +0 -0
- screenshot_vision_algorithm/android/wechat/templates/android/8.0.69/wechat_note_header.png +0 -0
- screenshot_vision_algorithm/android/xhs/__init__.py +4 -0
- screenshot_vision_algorithm/android/zhihu/__init__.py +4 -0
- screenshot_vision_algorithm/png_utils.py +86 -0
- screenshot_vision_algorithm-0.3.0.dist-info/METADATA +425 -0
- screenshot_vision_algorithm-0.3.0.dist-info/RECORD +40 -0
- screenshot_vision_algorithm-0.3.0.dist-info/WHEEL +5 -0
- screenshot_vision_algorithm-0.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1888 @@
|
|
|
1
|
+
"""Session-scoped, simplest-viable speaker nickname extractor (d3-nickname-ocr-minimal, v0.5 P1-3 minimal).
|
|
2
|
+
|
|
3
|
+
Day 3 scope (this module):
|
|
4
|
+
|
|
5
|
+
Take one ``OcrPageResult`` (from ``text_ocr_adapter`` — d2-4) plus the
|
|
6
|
+
corresponding ``Screenshot`` metadata and return a
|
|
7
|
+
``ChatSpeakerAttribution`` describing which OCR text blocks are
|
|
8
|
+
**nicknames** (group-chat bubble headers) and which bubbles belong
|
|
9
|
+
to the device owner (right-half-screen) vs. still-unknown speakers.
|
|
10
|
+
|
|
11
|
+
Replaces the ``speaker_nickname=stub='UNKNOWN'`` placeholder that
|
|
12
|
+
Thin Slice v1 left behind. **Session-internal only** — no cross-session
|
|
13
|
+
nickname-hash aggregation (that's P1-3' ``CrossSessionSpeakerService``,
|
|
14
|
+
scheduled post-launch).
|
|
15
|
+
|
|
16
|
+
**PRD 验收口径(防漂移)**:单帧 **昵称行 / 气泡块** 的产品边界、靠左/靠右发言之 **头像守卫** 硬门槛,以及「昵称不属于任何气泡、气泡内不包括昵称」等,以仓库根 **[`docs/requirements/product_requirement_document.md`](../../../docs/requirements/product_requirement_document.md) §九、7.(2)(3)** 为验收 SSOT;本模块启发式或 `nickname_avatar_guard` 行为与之不一致时,**以实现补丁对齐 PRD** 为准(ADR §2.5.2 术语镜像段亦引用该节)。
|
|
17
|
+
|
|
18
|
+
Explicit non-goals (by design, per ADR v0.3 / v0.5):
|
|
19
|
+
|
|
20
|
+
- **Cross-session aggregation**: P1-3' ``CrossSessionSpeakerService``
|
|
21
|
+
will handle ``sha256(platform|NFC(nickname)|phone_extracted|salt)[:16]``
|
|
22
|
+
hashing for order attribution; this module stops at
|
|
23
|
+
``nickname=<raw-string>`` within one session.
|
|
24
|
+
- **Head-avatar pHash / geometry**: ADR v0.3 永久边界:**不做**头像 pHash;
|
|
25
|
+
仅以 OCR 块 + bbox 归因;PRD 术语§5 另允许 **侧栏头像 Hough 纵向筒**
|
|
26
|
+
(`processor/chat_bubble_geometry_prd.py`,与 pHash **无关**)。
|
|
27
|
+
- **Bubble detection**: PaddleOCR returns text blocks, NOT speech bubble
|
|
28
|
+
outlines. We approximate bubble membership via "next text block(s)
|
|
29
|
+
below the nickname within ``bubble_vertical_gap_max_px`` on the same
|
|
30
|
+
screen-half". This is the simplest viable heuristic and covers the
|
|
31
|
+
80% case (group chat with visible nicknames above each bubble).
|
|
32
|
+
- **Confidence-weighted voting across pages**: a nickname string is
|
|
33
|
+
returned as-is per page; no de-duping / normalization / merging
|
|
34
|
+
happens here. Downstream code (Admin ``/wx-ocr-stats`` page) does
|
|
35
|
+
its own roll-up (non-stub share %).
|
|
36
|
+
- **Text normalization**: raw OCR string is kept as-is (matches the
|
|
37
|
+
``raw_full_text`` contract of ``text_ocr_adapter``). Downstream
|
|
38
|
+
``lite_text_normalizer.normalize_business_text`` is the phase-4
|
|
39
|
+
consumer.
|
|
40
|
+
|
|
41
|
+
Contract surface (2 dataclasses + 1 function):
|
|
42
|
+
|
|
43
|
+
``extract_nicknames(
|
|
44
|
+
ocr_result, screenshot, config=None, png_path=None
|
|
45
|
+
) -> ChatSpeakerAttribution``
|
|
46
|
+
|
|
47
|
+
The function accepts any ``OcrPageResult`` (``screenshot.type ==
|
|
48
|
+
"chat_message"`` expected by caller; we assert it) and returns a
|
|
49
|
+
frozen ``ChatSpeakerAttribution``. All geometry heuristics live in
|
|
50
|
+
``NicknameOcrConfig`` — overridable for fixture / test isolation.
|
|
51
|
+
|
|
52
|
+
Heuristic summary (all adjustable via ``NicknameOcrConfig``):
|
|
53
|
+
|
|
54
|
+
STEP 0 (PRD §7(3)):仅对相邻且均已标为 ``bubble_text`` 的块尝试合并——
|
|
55
|
+
**同一半屏**内「水平连续」(同水平带 + 横向间隙小)的 Paddle 碎片合成一条气泡块;
|
|
56
|
+
(同气泡多行竖叠合并易与相邻消息混淆,留给昵称认领链处理。)
|
|
57
|
+
再进入昵称认领。
|
|
58
|
+
|
|
59
|
+
STEP 1: Pre-filter each text block to one of
|
|
60
|
+
"nickname_candidate" / "bubble_text" / "drop".
|
|
61
|
+
|
|
62
|
+
``nickname_candidate`` requires:
|
|
63
|
+
(a) ``confidence >= config.min_confidence``
|
|
64
|
+
(b) ``len(text_stripped) <= config.nickname_max_chars``
|
|
65
|
+
(c) text does NOT start with any prefix in
|
|
66
|
+
``config.system_prefix_drops`` (``@``, ``撤回``,
|
|
67
|
+
``系统消息``, …)
|
|
68
|
+
(d) text is NOT a pure ``HH:MM`` time stamp
|
|
69
|
+
(e) text is NOT a pure digit run (phone / day number)
|
|
70
|
+
(f) bbox x-center is on the LEFT half-screen
|
|
71
|
+
(``x_center < original_width * config.screen_midline_ratio``)
|
|
72
|
+
(g) bbox height is ``<= median_text_height *
|
|
73
|
+
config.nickname_max_char_height_ratio``
|
|
74
|
+
(nicknames are visually smaller than bubble body text in
|
|
75
|
+
the WeChat group-chat layout)
|
|
76
|
+
|
|
77
|
+
``bubble_text``: the leftover blocks (may be left-half or
|
|
78
|
+
right-half bubbles).
|
|
79
|
+
|
|
80
|
+
``drop``: low-confidence blocks (already filtered by
|
|
81
|
+
``TextOcrAdapter`` upstream; we keep the gate here for
|
|
82
|
+
tests that inject raw fake blocks bypassing the adapter).
|
|
83
|
+
|
|
84
|
+
STEP 2: For each ``nickname_candidate``, claim ``bubble_text``
|
|
85
|
+
blocks immediately below it on the LEFT half screen, within
|
|
86
|
+
``bubble_vertical_gap_max_px`` vertical pixels, stopping when
|
|
87
|
+
we hit the next ``nickname_candidate`` or walk past the gap.
|
|
88
|
+
|
|
89
|
+
STEP 3: Classify each remaining ``bubble_text``:
|
|
90
|
+
- right-half bubble → ``owner``(``wo``)
|
|
91
|
+
- left-half orphan → 归因链结束后 **继承**同侧上方最近昵称;无可继承则
|
|
92
|
+
``speaker_display=找不到归属昵称``(参见 PRD 九§7(4)(5))。
|
|
93
|
+
|
|
94
|
+
STEP 4(PRD §7(5)+(5')):``SpeakerBodySegment`` 层按 **整行占位关键词**剔除后,
|
|
95
|
+
``len(body.strip()) < min_bubble_body_chars`` 的段落丢弃;
|
|
96
|
+
``total_bubbles`` 等计数以_finalize 过滤后为准。
|
|
97
|
+
|
|
98
|
+
STEP 5(术语§5):在提供 ``png_path`` 且启用 ``NicknameOcrConfig.use_prd_chat_vertical_bands``
|
|
99
|
+
时加载 ``chat_bubble_geometry_prd`` 纵向筒——昵称仅能认领与其 **同属一筒(含 ±12px@1080)**
|
|
100
|
+
的左侧正文块;检出无头像则将整帧正文归为「找不到归属昵称」单段。
|
|
101
|
+
|
|
102
|
+
Fields on ``ChatSpeakerAttribution`` designed so Admin roll-up can
|
|
103
|
+
compute "non-stub share %" via ``bubbles_with_nickname / total_bubbles``.
|
|
104
|
+
|
|
105
|
+
References:
|
|
106
|
+
OCR ADR v0.5 §2.3 speaker_nickname (phase 2 NicknameBoundaryService)
|
|
107
|
+
OCR ADR v0.3 §4.3.0 project vision-layer boundary
|
|
108
|
+
session-handoff-20260429.mdc Day 3 d3-nickname-ocr-minimal row
|
|
109
|
+
scripts/wx_match/processor/text_ocr_adapter.py
|
|
110
|
+
``OcrPageResult`` / ``TextBlock`` / reading order contract
|
|
111
|
+
"""
|
|
112
|
+
|
|
113
|
+
from __future__ import annotations
|
|
114
|
+
|
|
115
|
+
import re
|
|
116
|
+
from dataclasses import dataclass, field
|
|
117
|
+
from dataclasses import replace as dataclass_replace
|
|
118
|
+
from pathlib import Path
|
|
119
|
+
from statistics import median
|
|
120
|
+
from typing import TYPE_CHECKING, Any, Optional, Tuple
|
|
121
|
+
|
|
122
|
+
from screenshot_vision_algorithm.android.wechat.ocr.text_ocr_adapter import OcrPageResult, TextBlock
|
|
123
|
+
|
|
124
|
+
if TYPE_CHECKING:
|
|
125
|
+
from collector.contracts import Screenshot
|
|
126
|
+
|
|
127
|
+
_OWNER_SPEAKER_LABEL = "wo"
|
|
128
|
+
"""右侧气泡(设备持有方)在 PRD 发言人 JSON 中的统一展示键。"""
|
|
129
|
+
|
|
130
|
+
SPEAKER_DISPLAY_UNATTRIBUTED = "找不到归属昵称"
|
|
131
|
+
"""PRD 九§7(4):左侧无法归因到昵称的段级占位展示键。"""
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ============================================================================
|
|
135
|
+
# Config
|
|
136
|
+
# ============================================================================
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass(frozen=True)
|
|
140
|
+
class NicknameOcrConfig:
|
|
141
|
+
"""All heuristic thresholds in one place.
|
|
142
|
+
|
|
143
|
+
Defaults are tuned for the ``edb1a89f`` Thin Slice v1 fixture
|
|
144
|
+
(1080x2248 real-device captures, WeChat 8.0.69, default-light
|
|
145
|
+
theme). Tests override freely — no I/O, no logging dependency.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
min_confidence: float = 0.7
|
|
149
|
+
"""Skip any block below this confidence(defence-in-depth vs.
|
|
150
|
+
``TextOcrAdapter.confidence_threshold``;与 PRD §6 / §11 块级 **0.7** 对齐)。"""
|
|
151
|
+
|
|
152
|
+
nickname_max_chars: int = 30
|
|
153
|
+
"""WeChat group-chat nickname cap. Longer strings are almost
|
|
154
|
+
certainly bubble body text."""
|
|
155
|
+
|
|
156
|
+
screen_midline_ratio: float = 0.5
|
|
157
|
+
"""x-center < ``original_width * ratio`` ⇒ left-half-screen."""
|
|
158
|
+
|
|
159
|
+
nickname_max_char_height_ratio: float = 0.85
|
|
160
|
+
"""Nickname bbox height must be at most ``median_height * ratio``.
|
|
161
|
+
|
|
162
|
+
Default ``0.85`` is calibrated for real WeChat captures — nicknames
|
|
163
|
+
render in a visibly smaller font than bubble body text (typically
|
|
164
|
+
~30px vs. ~45px on a 1080-wide device). This cuts false positives
|
|
165
|
+
cleanly once the frame has ≥1 bubble-body block to anchor the median.
|
|
166
|
+
|
|
167
|
+
Failure mode on degenerate same-height synthetic inputs: every block
|
|
168
|
+
fails the bound (median == self) and no nickname is reported. That's
|
|
169
|
+
acceptable — a frame with zero height variance has no speaker
|
|
170
|
+
signal to extract anyway; callers see ``nicknames=()`` and fall
|
|
171
|
+
through to the owner/unknown classification. Tests override this
|
|
172
|
+
knob to explore boundary behaviour."""
|
|
173
|
+
|
|
174
|
+
bubble_vertical_gap_max_px: int = 120
|
|
175
|
+
"""Max vertical distance (bubble top minus nickname bottom) within
|
|
176
|
+
which a ``bubble_text`` block is claimed by the nickname above."""
|
|
177
|
+
|
|
178
|
+
min_bubble_body_chars: int = 12
|
|
179
|
+
"""PRD §7(5) L:单段气泡正文 strip 后字符数低于此值则丢弃该段(不计入分段与计数)。"""
|
|
180
|
+
|
|
181
|
+
bubble_fragment_same_line_y_tol_px: int = 18
|
|
182
|
+
"""PRD §7(3):同一水平带上相邻 OCR 块竖直中心容差(px)。"""
|
|
183
|
+
|
|
184
|
+
bubble_fragment_horizontal_gap_max_px: int = 36
|
|
185
|
+
"""PRD §7(3):水平相邻碎片最大间隙;实际阈值与 ``median_h`` 成比例取较大。"""
|
|
186
|
+
|
|
187
|
+
avatar_gutter_px: int = 10
|
|
188
|
+
avatar_min_left_roi_width_px: int = 40
|
|
189
|
+
avatar_laplacian_var_min: float = 48.0
|
|
190
|
+
avatar_rgb_std_min: float = 4.0
|
|
191
|
+
avatar_roi_extend_up_px: int = 80
|
|
192
|
+
avatar_roi_extend_down_px: int = 4
|
|
193
|
+
avatar_roi_extend_up_row_mul: float = 2.5
|
|
194
|
+
avatar_roi_narrow_up_px: int = 4
|
|
195
|
+
avatar_roi_narrow_down_px: int = 4
|
|
196
|
+
avatar_edge_sig_cols_min: float = 15.0
|
|
197
|
+
avatar_edge_right_left_ratio_min: float = 1.2
|
|
198
|
+
"""头像栏 ROI 裁剪与纹理/形状阈值(见 ``nickname_avatar_guard.avatar_roi_pass``)。"""
|
|
199
|
+
|
|
200
|
+
nickname_max_x1_ratio: float = 0.30
|
|
201
|
+
"""昵称行 bbox 左缘须 ``x1 < original_width * ratio``(排除居中系统行/时间行)。"""
|
|
202
|
+
|
|
203
|
+
nickname_phone_height_ratio: float = 1.08
|
|
204
|
+
"""行末含 11 位手机号时,字高上限放宽为 ``median_h * ratio``。"""
|
|
205
|
+
|
|
206
|
+
#: True 时在提供 PNG 路径且 cv2 可用时启用 PRD 术语§5头像时间轴纵向筒昵称认领。
|
|
207
|
+
use_prd_chat_vertical_bands: bool = True
|
|
208
|
+
|
|
209
|
+
#: PRD §九 7(2):无 ``png_path`` 或守卫未通过时 **不得** 记为可绑定昵称行。
|
|
210
|
+
require_avatar_guard_for_nickname: bool = True
|
|
211
|
+
|
|
212
|
+
#: PRD §九 7(2):``avatar_roi_pass`` 使用 :func:`nickname_avatar_guard.nickname_row_passes_prd_avatar_guard`(须有 Hough 锚点)。
|
|
213
|
+
use_prd_strict_avatar_guard: bool = True
|
|
214
|
+
|
|
215
|
+
#: PRD §九 7(3):昵称行 bbox 宽度上限(px @ 1080 设计宽,× ``width/1080``);更宽视为气泡正文。
|
|
216
|
+
nickname_max_bbox_width_px: int = 220
|
|
217
|
+
|
|
218
|
+
system_prefix_drops: tuple[str, ...] = (
|
|
219
|
+
"@",
|
|
220
|
+
"撤回了一条消息",
|
|
221
|
+
"系统消息",
|
|
222
|
+
"邀请",
|
|
223
|
+
"加入了群聊",
|
|
224
|
+
"已加入群聊",
|
|
225
|
+
"[系统提示]",
|
|
226
|
+
"以上为历史消息",
|
|
227
|
+
"仅群主",
|
|
228
|
+
"以下为新消息",
|
|
229
|
+
)
|
|
230
|
+
"""Prefix / substring tags that exclude a block from nickname
|
|
231
|
+
consideration even if all geometric criteria pass."""
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
_ATTRIBUTION_PREFIX_DROPS: tuple[str, ...] = (
|
|
235
|
+
"撤回了一条消息",
|
|
236
|
+
"系统消息",
|
|
237
|
+
"邀请",
|
|
238
|
+
"加入了群聊",
|
|
239
|
+
"已加入群聊",
|
|
240
|
+
"[系统提示]",
|
|
241
|
+
"以上为历史消息",
|
|
242
|
+
"仅群主",
|
|
243
|
+
)
|
|
244
|
+
"""归因展示昵称路径专用:不包含 ``@``,避免企业微信「@后缀」整块被误判为 @mention。
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
业务规则:归因用昵称取 OCR 原文(同桌邻块按阅读序拼接);仅排除明确的系统占位行。"""
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# ============================================================================
|
|
251
|
+
# Output shape
|
|
252
|
+
# ============================================================================
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
@dataclass(frozen=True)
|
|
256
|
+
class NicknameExtraction:
|
|
257
|
+
"""One identified nickname + which subsequent bubble text blocks it owns.
|
|
258
|
+
|
|
259
|
+
``following_block_indices`` are indices into
|
|
260
|
+
``OcrPageResult.text_blocks`` (reading-order). Empty list means the
|
|
261
|
+
nickname was detected but no bubble body was close enough to claim;
|
|
262
|
+
still reported so Admin can surface "naked nickname lines" (useful
|
|
263
|
+
for QA).
|
|
264
|
+
|
|
265
|
+
``nickname_block_index`` is the index of the nickname line itself in
|
|
266
|
+
``text_blocks`` (reading order), used to emit ``SpeakerBodySegment`` in
|
|
267
|
+
top-to-bottom order.
|
|
268
|
+
"""
|
|
269
|
+
|
|
270
|
+
nickname: str
|
|
271
|
+
bbox_xyxy: tuple[int, int, int, int]
|
|
272
|
+
confidence: float
|
|
273
|
+
following_block_indices: tuple[int, ...] = field(default_factory=tuple)
|
|
274
|
+
nickname_block_index: int = -1
|
|
275
|
+
band_index: Optional[int] = None
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
@dataclass(frozen=True)
|
|
279
|
+
class SpeakerBodySegment:
|
|
280
|
+
"""单页聊天图中一段可归属正文(PRD §8 发言人 JSON 的原材料)。"""
|
|
281
|
+
|
|
282
|
+
speaker_display: str
|
|
283
|
+
"""OCR 昵称原文、设备侧 ``wo``、或空串(左侧 orphan 气泡)。"""
|
|
284
|
+
|
|
285
|
+
body_text: str
|
|
286
|
+
orphan: bool
|
|
287
|
+
#: OCR text block 在 ``blocks`` 列表中的原始索引(-1 表示无)。
|
|
288
|
+
#: 用于 PRD §5 band 归属推断。
|
|
289
|
+
block_index: int = -1
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
@dataclass(frozen=True)
|
|
293
|
+
class ResumeThumbBinding:
|
|
294
|
+
"""PRD §6(2):单帧内一张简历卡 → 所属昵称的绑定。
|
|
295
|
+
|
|
296
|
+
绑定依据 **PRD §5 头像 y-top 纵向筒**(``chat_bubble_geometry_prd``):
|
|
297
|
+
thumb bbox 与已被昵称认领的左侧气泡块几何相交 → 认领该气泡的昵称即为
|
|
298
|
+
thumb 所属发言人。参见 ``_resume_thumb_bindings_and_orphans``。
|
|
299
|
+
"""
|
|
300
|
+
|
|
301
|
+
thumb_bbox: tuple[int, int, int, int]
|
|
302
|
+
"""[x1, y1, x2, y2] — 与 metadata ``source_thumb_bbox`` 完全对齐。"""
|
|
303
|
+
nickname: str
|
|
304
|
+
"""OCR 昵称原文(与 ``NicknameExtraction.nickname`` 同源)。"""
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
@dataclass(frozen=True)
|
|
308
|
+
class ChatSpeakerAttribution:
|
|
309
|
+
"""Session-internal speaker attribution result for ONE chat screenshot.
|
|
310
|
+
|
|
311
|
+
Top-level status counters expose enough for Admin
|
|
312
|
+
``/wx-ocr-stats`` to compute per-session non-stub share:
|
|
313
|
+
|
|
314
|
+
non_stub_share = bubbles_with_nickname / max(total_bubbles, 1)
|
|
315
|
+
|
|
316
|
+
Zero-text frames (OCR soft error / all low-confidence) return
|
|
317
|
+
``total_bubbles=0`` and empty ``nicknames``; callers must handle
|
|
318
|
+
the divide-by-zero explicitly.
|
|
319
|
+
|
|
320
|
+
``resume_thumb_orphans``: PRD 九 · §7(1)+§9(4) — metadata 中给出的简历卡
|
|
321
|
+
bbox 是否与**已被昵称认领**的左侧气泡相交;若不相交则计为孤儿(无法归属昵称)。
|
|
322
|
+
|
|
323
|
+
``resume_thumb_bindings``: PRD §6(2) — 简历卡 bbox → 所属昵称的绑定映射
|
|
324
|
+
(经 PRD §5 头像 y-top 纵向筒归属的左侧气泡)。bridge 层直接用于
|
|
325
|
+
``resolve_resume_card_speaker_binding``,不再使用侧栏启发式 fallback。
|
|
326
|
+
"""
|
|
327
|
+
|
|
328
|
+
screenshot_id: str
|
|
329
|
+
nicknames: tuple[NicknameExtraction, ...] = field(default_factory=tuple)
|
|
330
|
+
total_bubbles: int = 0
|
|
331
|
+
bubbles_with_nickname: int = 0
|
|
332
|
+
bubbles_owner: int = 0
|
|
333
|
+
bubbles_unknown: int = 0
|
|
334
|
+
resume_thumb_orphans: int = 0
|
|
335
|
+
#: PRD §6(2):简历卡 bbox → 昵称的绑定映射(本帧内,经纵向筒归属验证)。
|
|
336
|
+
resume_thumb_bindings: tuple[ResumeThumbBinding, ...] = field(default_factory=tuple)
|
|
337
|
+
#: PRD §8:本页按昵称 / 设备侧 / orphan 切分的正文段(供合并后按发言人形成 JSON 工作项)。
|
|
338
|
+
speaker_body_segments: tuple[SpeakerBodySegment, ...] = field(default_factory=tuple)
|
|
339
|
+
#: 调试用:本帧 OCR 归因全量中间数据(blocks / bands / ytops / bboxes)。
|
|
340
|
+
#: 非 None 时写入 ``debug_session_derived.json``;线上可通过环境变量关闭。
|
|
341
|
+
ocr_debug: Optional[dict[str, Any]] = None
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
# ============================================================================
|
|
345
|
+
# Internal helpers
|
|
346
|
+
# ============================================================================
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
_PURE_DIGITS_RE = re.compile(r"^\d+$")
|
|
350
|
+
_PURE_TIME_RE = re.compile(r"^\d{1,2}:\d{2}(:\d{2})?$")
|
|
351
|
+
_WEEKDAY_TIME_RE = re.compile(
|
|
352
|
+
r"^周[一二三四五六日天].*?(?:上午|下午|晚上|凌晨)?\s*\d{0,2}[::]?\d{0,2}"
|
|
353
|
+
)
|
|
354
|
+
_MOBILE_PHONE_SUFFIX_RE = re.compile(r"1\d{10}\s*$")
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def _block_height(block: "TextBlock") -> int:
|
|
358
|
+
x1, y1, x2, y2 = block.bbox_xyxy
|
|
359
|
+
return max(0, y2 - y1)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _block_x_center(block: "TextBlock") -> float:
|
|
363
|
+
x1, _, x2, _ = block.bbox_xyxy
|
|
364
|
+
return (x1 + x2) / 2.0
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _block_top(block: "TextBlock") -> int:
|
|
368
|
+
return block.bbox_xyxy[1]
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _block_bottom(block: "TextBlock") -> int:
|
|
372
|
+
return block.bbox_xyxy[3]
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def _is_pure_time(text: str) -> bool:
|
|
376
|
+
return bool(_PURE_TIME_RE.match(text))
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
def _is_pure_digits(text: str) -> bool:
|
|
380
|
+
return bool(_PURE_DIGITS_RE.match(text))
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def _is_chat_timestamp_or_divider(text: str) -> bool:
|
|
384
|
+
t = (text or "").strip()
|
|
385
|
+
if not t:
|
|
386
|
+
return False
|
|
387
|
+
if "以下为新消息" in t:
|
|
388
|
+
return True
|
|
389
|
+
if _WEEKDAY_TIME_RE.match(t):
|
|
390
|
+
return True
|
|
391
|
+
return False
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def _is_favorite_card_label(text: str) -> bool:
|
|
395
|
+
"""微信卡片底栏「收藏」:禁止作为发言人昵称(产品约定昵称不可用「收藏」)。"""
|
|
396
|
+
t = (text or "").strip()
|
|
397
|
+
if t == "收藏":
|
|
398
|
+
return True
|
|
399
|
+
inner = t.strip("「」[]()() \t")
|
|
400
|
+
return inner == "收藏"
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
def _has_mobile_phone_suffix(text: str) -> bool:
|
|
404
|
+
return bool(_MOBILE_PHONE_SUFFIX_RE.search((text or "").replace(" ", "")))
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
def _is_avatar_column_nickname_row(
|
|
408
|
+
block: "TextBlock",
|
|
409
|
+
original_width: int,
|
|
410
|
+
config: NicknameOcrConfig,
|
|
411
|
+
avatar_layout: Any = None,
|
|
412
|
+
) -> bool:
|
|
413
|
+
try:
|
|
414
|
+
from processor.left_avatar_column import nickname_bbox_in_avatar_column
|
|
415
|
+
except ImportError:
|
|
416
|
+
nickname_bbox_in_avatar_column = None # type: ignore[assignment,misc]
|
|
417
|
+
if avatar_layout is not None and nickname_bbox_in_avatar_column is not None:
|
|
418
|
+
if not getattr(avatar_layout, "empty", True):
|
|
419
|
+
return nickname_bbox_in_avatar_column(
|
|
420
|
+
avatar_layout,
|
|
421
|
+
block.bbox_xyxy,
|
|
422
|
+
original_width=original_width,
|
|
423
|
+
max_x1_ratio=config.nickname_max_x1_ratio,
|
|
424
|
+
)
|
|
425
|
+
x1, _, _, _ = block.bbox_xyxy
|
|
426
|
+
return float(x1) < float(original_width) * float(config.nickname_max_x1_ratio)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def _avatar_guard_kwargs(config: NicknameOcrConfig) -> dict:
|
|
430
|
+
return {
|
|
431
|
+
"gutter_px": config.avatar_gutter_px,
|
|
432
|
+
"min_roi_width_px": config.avatar_min_left_roi_width_px,
|
|
433
|
+
"laplacian_min": float(config.avatar_laplacian_var_min),
|
|
434
|
+
"rgb_std_min": float(config.avatar_rgb_std_min),
|
|
435
|
+
"edge_sig_cols_min": float(config.avatar_edge_sig_cols_min),
|
|
436
|
+
"edge_right_left_ratio_min": float(config.avatar_edge_right_left_ratio_min),
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def _avatar_anchor_for_block(
|
|
441
|
+
block: "TextBlock",
|
|
442
|
+
avatar_layout: Any,
|
|
443
|
+
*,
|
|
444
|
+
scale_w: float,
|
|
445
|
+
) -> Any:
|
|
446
|
+
if avatar_layout is None or getattr(avatar_layout, "empty", True):
|
|
447
|
+
return None
|
|
448
|
+
try:
|
|
449
|
+
from processor.left_avatar_column import find_avatar_anchor_for_nickname_bbox
|
|
450
|
+
except ImportError:
|
|
451
|
+
return None
|
|
452
|
+
return find_avatar_anchor_for_nickname_bbox(
|
|
453
|
+
avatar_layout,
|
|
454
|
+
block.bbox_xyxy,
|
|
455
|
+
scale_w=scale_w,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def _load_left_avatar_layout(
|
|
460
|
+
png_path: Optional[Path],
|
|
461
|
+
screenshot: "Screenshot",
|
|
462
|
+
) -> Any:
|
|
463
|
+
if png_path is None:
|
|
464
|
+
return None
|
|
465
|
+
try:
|
|
466
|
+
from processor.chat_bubble_geometry_prd import compute_chat_content_vertical_bounds
|
|
467
|
+
from processor.left_avatar_column import detect_left_avatar_column_layout
|
|
468
|
+
from processor.nickname_avatar_guard import load_png_bgr
|
|
469
|
+
except ImportError:
|
|
470
|
+
return None
|
|
471
|
+
arr = load_png_bgr(str(png_path))
|
|
472
|
+
if arr is None or getattr(arr, "size", 0) == 0:
|
|
473
|
+
return None
|
|
474
|
+
ow = screenshot.original_resolution.width
|
|
475
|
+
oh = screenshot.original_resolution.height
|
|
476
|
+
sw = float(ow) / 1080.0
|
|
477
|
+
bounds = compute_chat_content_vertical_bounds(arr, sw, screen_h=oh)
|
|
478
|
+
return detect_left_avatar_column_layout(
|
|
479
|
+
arr,
|
|
480
|
+
sw,
|
|
481
|
+
y_top=bounds.y_top,
|
|
482
|
+
y_bottom_excl=bounds.y_bottom_excl,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _starts_with_system_prefix(text: str, drops: tuple[str, ...]) -> bool:
|
|
487
|
+
for p in drops:
|
|
488
|
+
if text.startswith(p):
|
|
489
|
+
return True
|
|
490
|
+
return False
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def _is_on_left_half(
|
|
494
|
+
block: "TextBlock",
|
|
495
|
+
original_width: int,
|
|
496
|
+
ratio: float,
|
|
497
|
+
) -> bool:
|
|
498
|
+
return _block_x_center(block) < original_width * ratio
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
def _vertical_center(block: "TextBlock") -> float:
|
|
502
|
+
return (_block_top(block) + _block_bottom(block)) / 2.0
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _xyxy_intersects(
|
|
506
|
+
a: tuple[int, int, int, int],
|
|
507
|
+
b: tuple[int, int, int, int],
|
|
508
|
+
) -> bool:
|
|
509
|
+
ax1, ay1, ax2, ay2 = a
|
|
510
|
+
bx1, by1, bx2, by2 = b
|
|
511
|
+
return not (ax2 < bx1 or bx2 < ax1 or ay2 < by1 or by2 < ay1)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def _xyxy_union(
|
|
515
|
+
a: tuple[int, int, int, int],
|
|
516
|
+
b: tuple[int, int, int, int],
|
|
517
|
+
) -> tuple[int, int, int, int]:
|
|
518
|
+
return (
|
|
519
|
+
min(a[0], b[0]),
|
|
520
|
+
min(a[1], b[1]),
|
|
521
|
+
max(a[2], b[2]),
|
|
522
|
+
max(a[3], b[3]),
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _bubble_envelopes_from_blocks(
|
|
528
|
+
blocks: list["TextBlock"],
|
|
529
|
+
classes: list[str],
|
|
530
|
+
*,
|
|
531
|
+
original_width: int,
|
|
532
|
+
config: NicknameOcrConfig,
|
|
533
|
+
median_height: float,
|
|
534
|
+
) -> list[tuple[int, int, int, int]]:
|
|
535
|
+
"""由 ``bubble_text`` 聚块得到包络 bbox(昵称行不得与之相交)。"""
|
|
536
|
+
envelopes: list[tuple[int, int, int, int]] = []
|
|
537
|
+
n = len(blocks)
|
|
538
|
+
i = 0
|
|
539
|
+
while i < n:
|
|
540
|
+
if classes[i] != "bubble_text":
|
|
541
|
+
i += 1
|
|
542
|
+
continue
|
|
543
|
+
cur = blocks[i].bbox_xyxy
|
|
544
|
+
j = i + 1
|
|
545
|
+
while j < n and classes[j] == "bubble_text":
|
|
546
|
+
prev_b, next_b = blocks[j - 1], blocks[j]
|
|
547
|
+
merged = _mergeable_bubble_fragments(
|
|
548
|
+
prev_b,
|
|
549
|
+
next_b,
|
|
550
|
+
original_width=original_width,
|
|
551
|
+
median_h=median_height,
|
|
552
|
+
config=config,
|
|
553
|
+
)
|
|
554
|
+
same_half = _is_on_left_half(
|
|
555
|
+
prev_b, original_width, config.screen_midline_ratio
|
|
556
|
+
) == _is_on_left_half(
|
|
557
|
+
next_b, original_width, config.screen_midline_ratio
|
|
558
|
+
)
|
|
559
|
+
gap = _block_top(next_b) - _block_bottom(prev_b)
|
|
560
|
+
if merged is not None or (
|
|
561
|
+
same_half
|
|
562
|
+
and 0 <= gap <= int(config.bubble_vertical_gap_max_px)
|
|
563
|
+
):
|
|
564
|
+
cur = _xyxy_union(cur, next_b.bbox_xyxy)
|
|
565
|
+
j += 1
|
|
566
|
+
else:
|
|
567
|
+
break
|
|
568
|
+
envelopes.append(cur)
|
|
569
|
+
i = j
|
|
570
|
+
return envelopes
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
def _apply_prd_nickname_bubble_separation(
|
|
574
|
+
blocks: list["TextBlock"],
|
|
575
|
+
classes: list[str],
|
|
576
|
+
*,
|
|
577
|
+
original_width: int,
|
|
578
|
+
config: NicknameOcrConfig,
|
|
579
|
+
median_height: float,
|
|
580
|
+
) -> None:
|
|
581
|
+
"""PRD §九 7(3):(已禁用 — 改为 bridge 层 bbox 过滤)。"""
|
|
582
|
+
pass
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def _nickname_passes_avatar_guard(
|
|
586
|
+
*,
|
|
587
|
+
bgr_guard: Any,
|
|
588
|
+
nick_block: "TextBlock",
|
|
589
|
+
avatar_layout: Any,
|
|
590
|
+
scale_w: float,
|
|
591
|
+
original_width: int,
|
|
592
|
+
config: NicknameOcrConfig,
|
|
593
|
+
) -> bool:
|
|
594
|
+
"""PRD §九 7(2):靠左/靠右发言均须影像守门(无 PNG 时由调用方在 ``require_*`` 下拒绝)。"""
|
|
595
|
+
if bgr_guard is None:
|
|
596
|
+
return not config.require_avatar_guard_for_nickname
|
|
597
|
+
try:
|
|
598
|
+
from processor.nickname_avatar_guard import avatar_roi_pass
|
|
599
|
+
except ImportError:
|
|
600
|
+
return not config.require_avatar_guard_for_nickname
|
|
601
|
+
|
|
602
|
+
x1, y1, x2, y2 = nick_block.bbox_xyxy
|
|
603
|
+
left = _is_on_left_half(nick_block, original_width, config.screen_midline_ratio)
|
|
604
|
+
side = "left" if left else "right"
|
|
605
|
+
anchor = _avatar_anchor_for_block(nick_block, avatar_layout, scale_w=scale_w)
|
|
606
|
+
prd_strict = bool(config.use_prd_strict_avatar_guard)
|
|
607
|
+
return bool(
|
|
608
|
+
avatar_roi_pass(
|
|
609
|
+
bgr_guard,
|
|
610
|
+
nickname_x1=int(x1),
|
|
611
|
+
nickname_x2=int(x2),
|
|
612
|
+
y_top=int(y1),
|
|
613
|
+
y_bottom=int(y2),
|
|
614
|
+
avatar_anchor=anchor,
|
|
615
|
+
prd_strict=prd_strict,
|
|
616
|
+
side=side,
|
|
617
|
+
**_avatar_guard_kwargs(config),
|
|
618
|
+
)
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
# PRD §7(5):整行占位类关键词剔除(不包含「[文件]」)。
|
|
623
|
+
# 追加 PRD §6(4):排除收藏 / 以下为新消息。
|
|
624
|
+
_PLACEHOLDER_EXACT_LINES: frozenset[str] = frozenset({
|
|
625
|
+
"收藏",
|
|
626
|
+
"以下为新消息",
|
|
627
|
+
"[语音]",
|
|
628
|
+
"[表情]",
|
|
629
|
+
"[动画表情]",
|
|
630
|
+
"[图片]",
|
|
631
|
+
"[视频]",
|
|
632
|
+
"[位置]",
|
|
633
|
+
"[链接]",
|
|
634
|
+
"[红包]",
|
|
635
|
+
"[转账]",
|
|
636
|
+
"[撤回了一条消息]",
|
|
637
|
+
})
|
|
638
|
+
|
|
639
|
+
|
|
640
|
+
def _strip_placeholder_lines_from_body(text: str) -> str:
|
|
641
|
+
if not text.strip():
|
|
642
|
+
return ""
|
|
643
|
+
kept: list[str] = []
|
|
644
|
+
for raw_ln in text.split("\n"):
|
|
645
|
+
stripped_ln = raw_ln.strip()
|
|
646
|
+
if not stripped_ln:
|
|
647
|
+
continue
|
|
648
|
+
if stripped_ln in _PLACEHOLDER_EXACT_LINES:
|
|
649
|
+
continue
|
|
650
|
+
kept.append(raw_ln.rstrip())
|
|
651
|
+
return "\n".join(kept).strip()
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _apply_resume_thumb_block_mask(
|
|
655
|
+
blocks: list["TextBlock"],
|
|
656
|
+
classes: list[str],
|
|
657
|
+
screenshot: "Screenshot",
|
|
658
|
+
*,
|
|
659
|
+
original_width: int = 1080,
|
|
660
|
+
screen_midline_ratio: float = 0.45,
|
|
661
|
+
) -> None:
|
|
662
|
+
"""§7:简历微信笔记 thumbnail bbox 相交的 OCR 块强制 drop。
|
|
663
|
+
|
|
664
|
+
仅丢弃**右半屏**的块(简历卡片正文在右侧);左半屏的发言人聊天气泡
|
|
665
|
+
保留为 ``bubble_text``,供后续 nickname 认领和 thumb binding 使用。
|
|
666
|
+
"""
|
|
667
|
+
raws = getattr(screenshot, "resume_thumb_bboxes", None) or []
|
|
668
|
+
thumbs: list[tuple[int, int, int, int]] = []
|
|
669
|
+
for raw in raws:
|
|
670
|
+
if raw and len(raw) >= 4:
|
|
671
|
+
thumbs.append((int(raw[0]), int(raw[1]), int(raw[2]), int(raw[3])))
|
|
672
|
+
if not thumbs:
|
|
673
|
+
return
|
|
674
|
+
for i, b in enumerate(blocks):
|
|
675
|
+
bb = b.bbox_xyxy
|
|
676
|
+
if any(_xyxy_intersects(bb, tbb) for tbb in thumbs):
|
|
677
|
+
if not _is_on_left_half(b, original_width, screen_midline_ratio):
|
|
678
|
+
classes[i] = "drop"
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def _load_prd_layout_optional(
|
|
682
|
+
png_path: Optional[Path],
|
|
683
|
+
screenshot: "Screenshot",
|
|
684
|
+
*,
|
|
685
|
+
config: NicknameOcrConfig,
|
|
686
|
+
) -> Any:
|
|
687
|
+
if png_path is None or not config.use_prd_chat_vertical_bands:
|
|
688
|
+
return None
|
|
689
|
+
try:
|
|
690
|
+
from processor.chat_bubble_geometry_prd import (
|
|
691
|
+
build_prd_speaker_vertical_bands,
|
|
692
|
+
compute_chat_content_vertical_bounds,
|
|
693
|
+
detect_chat_side_avatar_ytops,
|
|
694
|
+
merge_avatar_ytops_time_order,
|
|
695
|
+
)
|
|
696
|
+
from processor.nickname_avatar_guard import load_png_bgr
|
|
697
|
+
except ImportError:
|
|
698
|
+
return None
|
|
699
|
+
arr = load_png_bgr(str(png_path))
|
|
700
|
+
if arr is None or getattr(arr, "size", 0) == 0:
|
|
701
|
+
return None
|
|
702
|
+
oh = screenshot.original_resolution.height
|
|
703
|
+
ow = screenshot.original_resolution.width
|
|
704
|
+
sw = float(ow) / 1080.0
|
|
705
|
+
bounds = compute_chat_content_vertical_bounds(arr, sw, screen_h=oh)
|
|
706
|
+
ly, ry = detect_chat_side_avatar_ytops(arr, sw, bounds)
|
|
707
|
+
timeline = merge_avatar_ytops_time_order(ly, ry)
|
|
708
|
+
return build_prd_speaker_vertical_bands(timeline, bounds, scale_w=sw)
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def _finalize_prd_attribution(
|
|
712
|
+
attr: ChatSpeakerAttribution,
|
|
713
|
+
*,
|
|
714
|
+
config: NicknameOcrConfig,
|
|
715
|
+
blocks: Optional[list["TextBlock"]] = None,
|
|
716
|
+
layout_opt: Any = None,
|
|
717
|
+
scale_w: float = 1.0,
|
|
718
|
+
) -> ChatSpeakerAttribution:
|
|
719
|
+
"""§7:占位行剔除、短正文门控、左侧 orphan 昵称继承、「找不到归属昵称」。
|
|
720
|
+
|
|
721
|
+
PRD §5 补充:当 orphan 段落在 band 内且无 ``last_left`` 可继承时,
|
|
722
|
+
通过 band 索引查找该 band 的首个发言人昵称作为归属。
|
|
723
|
+
"""
|
|
724
|
+
# 0)预计算 band_index → 首个发言人昵称(供 orphan 回退)
|
|
725
|
+
band_nick: dict[int, str] = {}
|
|
726
|
+
if layout_opt is not None and blocks is not None:
|
|
727
|
+
for n in attr.nicknames:
|
|
728
|
+
bi = n.band_index
|
|
729
|
+
if bi is not None and bi not in band_nick:
|
|
730
|
+
band_nick[bi] = n.nickname.strip()
|
|
731
|
+
|
|
732
|
+
# 1)占位剔除 + L
|
|
733
|
+
pruned_segs: list[SpeakerBodySegment] = []
|
|
734
|
+
for s in attr.speaker_body_segments:
|
|
735
|
+
body = _strip_placeholder_lines_from_body(s.body_text)
|
|
736
|
+
stripped_len = len(body.strip())
|
|
737
|
+
if stripped_len == 0:
|
|
738
|
+
continue
|
|
739
|
+
if stripped_len < max(0, int(config.min_bubble_body_chars)):
|
|
740
|
+
continue
|
|
741
|
+
pruned_segs.append(dataclass_replace(s, body_text=body))
|
|
742
|
+
|
|
743
|
+
# 2)左侧继承同侧上方最近昵称(右半屏不变)
|
|
744
|
+
# PRD §5:若 orphan 无 last_left,回退到所在 band 的发言人
|
|
745
|
+
last_left = ""
|
|
746
|
+
out2: list[SpeakerBodySegment] = []
|
|
747
|
+
for s in pruned_segs:
|
|
748
|
+
disp_raw = (s.speaker_display or "").strip()
|
|
749
|
+
if s.speaker_display == _OWNER_SPEAKER_LABEL:
|
|
750
|
+
out2.append(s)
|
|
751
|
+
continue
|
|
752
|
+
if s.orphan and not disp_raw:
|
|
753
|
+
if last_left:
|
|
754
|
+
out2.append(
|
|
755
|
+
dataclass_replace(
|
|
756
|
+
s,
|
|
757
|
+
speaker_display=last_left,
|
|
758
|
+
orphan=False,
|
|
759
|
+
),
|
|
760
|
+
)
|
|
761
|
+
elif (
|
|
762
|
+
band_nick
|
|
763
|
+
and blocks is not None
|
|
764
|
+
and layout_opt is not None
|
|
765
|
+
and s.block_index >= 0
|
|
766
|
+
):
|
|
767
|
+
# PRD §5:进入哪个 band 就归入哪个 band 的发言人
|
|
768
|
+
blk = blocks[s.block_index]
|
|
769
|
+
bi = None
|
|
770
|
+
if layout_opt is not None:
|
|
771
|
+
_sb_o = getattr(layout_opt, "speaker_bands", ())
|
|
772
|
+
_by1, _by2 = float(blk.bbox_xyxy[1]), float(blk.bbox_xyxy[3])
|
|
773
|
+
for _ix, (_b0, _b1) in enumerate(_sb_o):
|
|
774
|
+
if _by2 > float(_b0) and _by1 < float(_b1):
|
|
775
|
+
bi = _ix
|
|
776
|
+
break
|
|
777
|
+
band_sp = band_nick.get(bi, "") if bi is not None else ""
|
|
778
|
+
if band_sp:
|
|
779
|
+
out2.append(
|
|
780
|
+
dataclass_replace(
|
|
781
|
+
s,
|
|
782
|
+
speaker_display=band_sp,
|
|
783
|
+
orphan=False,
|
|
784
|
+
),
|
|
785
|
+
)
|
|
786
|
+
last_left = band_sp
|
|
787
|
+
else:
|
|
788
|
+
out2.append(
|
|
789
|
+
dataclass_replace(
|
|
790
|
+
s,
|
|
791
|
+
speaker_display=SPEAKER_DISPLAY_UNATTRIBUTED,
|
|
792
|
+
orphan=True,
|
|
793
|
+
),
|
|
794
|
+
)
|
|
795
|
+
else:
|
|
796
|
+
out2.append(
|
|
797
|
+
dataclass_replace(
|
|
798
|
+
s,
|
|
799
|
+
speaker_display=SPEAKER_DISPLAY_UNATTRIBUTED,
|
|
800
|
+
orphan=True,
|
|
801
|
+
),
|
|
802
|
+
)
|
|
803
|
+
else:
|
|
804
|
+
out2.append(s)
|
|
805
|
+
if (
|
|
806
|
+
disp_raw
|
|
807
|
+
and disp_raw != SPEAKER_DISPLAY_UNATTRIBUTED
|
|
808
|
+
and disp_raw != _OWNER_SPEAKER_LABEL
|
|
809
|
+
):
|
|
810
|
+
last_left = disp_raw
|
|
811
|
+
|
|
812
|
+
bubbles_owner = sum(1 for s in out2 if s.speaker_display == _OWNER_SPEAKER_LABEL)
|
|
813
|
+
bubbles_unknown = sum(
|
|
814
|
+
1 for s in out2 if (s.speaker_display or "").strip() == SPEAKER_DISPLAY_UNATTRIBUTED
|
|
815
|
+
)
|
|
816
|
+
bubbles_with_nickname = 0
|
|
817
|
+
for s in out2:
|
|
818
|
+
disp_fin = (s.speaker_display or "").strip()
|
|
819
|
+
if (
|
|
820
|
+
(not s.orphan)
|
|
821
|
+
and disp_fin
|
|
822
|
+
and disp_fin not in (_OWNER_SPEAKER_LABEL, SPEAKER_DISPLAY_UNATTRIBUTED)
|
|
823
|
+
):
|
|
824
|
+
bubbles_with_nickname += 1
|
|
825
|
+
|
|
826
|
+
total_bubbles = len(out2)
|
|
827
|
+
return ChatSpeakerAttribution(
|
|
828
|
+
screenshot_id=attr.screenshot_id,
|
|
829
|
+
nicknames=attr.nicknames,
|
|
830
|
+
total_bubbles=total_bubbles,
|
|
831
|
+
bubbles_with_nickname=bubbles_with_nickname,
|
|
832
|
+
bubbles_owner=bubbles_owner,
|
|
833
|
+
bubbles_unknown=bubbles_unknown,
|
|
834
|
+
resume_thumb_orphans=attr.resume_thumb_orphans,
|
|
835
|
+
resume_thumb_bindings=attr.resume_thumb_bindings,
|
|
836
|
+
speaker_body_segments=tuple(out2),
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def _resume_thumb_bindings_and_orphans(
|
|
841
|
+
screenshot: "Screenshot",
|
|
842
|
+
blocks: list["TextBlock"],
|
|
843
|
+
classes: list[str],
|
|
844
|
+
claimed_indices: set[int],
|
|
845
|
+
nicknames: tuple[NicknameExtraction, ...],
|
|
846
|
+
*,
|
|
847
|
+
original_width: int,
|
|
848
|
+
config: NicknameOcrConfig,
|
|
849
|
+
layout_opt: Any = None,
|
|
850
|
+
scale_w: float = 1.0,
|
|
851
|
+
image_size: tuple[int, int] | None = None,
|
|
852
|
+
) -> tuple[tuple[ResumeThumbBinding, ...], int, set[int]]:
|
|
853
|
+
"""PRD §7(2):简历卡 bbox → 所属昵称绑定 + orphan 计数。
|
|
854
|
+
|
|
855
|
+
三步逻辑:
|
|
856
|
+
A. 找到简历卡片所属 band(PRD §5 头像 y-top 纵向筒)
|
|
857
|
+
B. 在 band 内找第一行符合昵称要求的文字 → 昵称
|
|
858
|
+
C. 若无,向上找到上一行文字 → 昵称
|
|
859
|
+
(排除同上;该行文字从所在区域内容里"去除")
|
|
860
|
+
|
|
861
|
+
Returns:
|
|
862
|
+
(bindings, orphan_count, excluded_indices)
|
|
863
|
+
excluded_indices 是 Step C 中"去除"的 block 索引,调用方应将其 classes 设为 drop。
|
|
864
|
+
"""
|
|
865
|
+
raws = getattr(screenshot, "resume_thumb_bboxes", None) or []
|
|
866
|
+
if not raws:
|
|
867
|
+
return (), 0, set()
|
|
868
|
+
|
|
869
|
+
# ── 将 resume_thumb_bboxes 从手机原生分辨率缩放到 OCR 图像空间 ──
|
|
870
|
+
if image_size is not None:
|
|
871
|
+
iw, ih = float(image_size[0]), float(image_size[1])
|
|
872
|
+
now = float(screenshot.original_resolution.width)
|
|
873
|
+
noh = float(screenshot.original_resolution.height)
|
|
874
|
+
sx = iw / now if now > 0 else 1.0
|
|
875
|
+
sy = ih / noh if noh > 0 else 1.0
|
|
876
|
+
if abs(sx - 1.0) > 0.0001 or abs(sy - 1.0) > 0.0001:
|
|
877
|
+
raws = [
|
|
878
|
+
[raw[0] * sx, raw[1] * sy, raw[2] * sx, raw[3] * sy]
|
|
879
|
+
for raw in raws if raw and len(raw) >= 4
|
|
880
|
+
]
|
|
881
|
+
|
|
882
|
+
# OCR bbox 与卡片 bbox 各有 ±2~3px 抖动:卡内标题行可能比卡顶高出 1~3px,
|
|
883
|
+
# 若按严格包含判定会被误认为"卡外文字"而误绑为昵称(2026-06-11 真实案例)。
|
|
884
|
+
# 真昵称行整体位于卡片上方(y2 < 卡顶),不受此容差影响。
|
|
885
|
+
_inside_tol = 4.0 * max(scale_w, 1.0)
|
|
886
|
+
|
|
887
|
+
def _completely_inside(block_bbox: tuple, card_bbox: tuple) -> bool:
|
|
888
|
+
bx1, by1, bx2, by2 = block_bbox
|
|
889
|
+
cx1, cy1, cx2, cy2 = card_bbox
|
|
890
|
+
# Use block centre-x rather than both edges: a block may extend a few px
|
|
891
|
+
# beyond the card's right side but still belong to the card (e.g. long
|
|
892
|
+
# skill lines), so we shouldn't treat it as "outside" on that basis.
|
|
893
|
+
bcx = (bx1 + bx2) / 2.0
|
|
894
|
+
return (
|
|
895
|
+
cx1 <= bcx <= cx2
|
|
896
|
+
and by1 >= cy1 - _inside_tol
|
|
897
|
+
and by2 <= cy2 + _inside_tol
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
def _should_skip_block(b: "TextBlock") -> bool:
|
|
901
|
+
t = (b.text or "").strip()
|
|
902
|
+
if not t:
|
|
903
|
+
return True
|
|
904
|
+
if t == "收藏" or t == "以下为新消息":
|
|
905
|
+
return True
|
|
906
|
+
if t.startswith("姓名:") or t.startswith("姓名:"):
|
|
907
|
+
return True
|
|
908
|
+
# 排除"**条新消息"系统提示(如 "69条新消息" 或其 OCR 片段 "69条")
|
|
909
|
+
if re.fullmatch(r"\d+条", t) or "条新消息" in t:
|
|
910
|
+
return True
|
|
911
|
+
if _is_pure_time(t) or _is_pure_digits(t) or _is_chat_timestamp_or_divider(t):
|
|
912
|
+
return True
|
|
913
|
+
if _is_favorite_card_label(t):
|
|
914
|
+
return True
|
|
915
|
+
return False
|
|
916
|
+
|
|
917
|
+
bindings: list[ResumeThumbBinding] = []
|
|
918
|
+
orphan = 0
|
|
919
|
+
excluded_indices: set[int] = set()
|
|
920
|
+
|
|
921
|
+
# ── 用 click_coords 虚拟 2×2 区域判定点击源卡片 ──
|
|
922
|
+
click_source_card_idx: int | None = None
|
|
923
|
+
_click_ctx = getattr(screenshot, "click_context", None)
|
|
924
|
+
if _click_ctx is not None:
|
|
925
|
+
_coords = getattr(_click_ctx, "click_coords", None)
|
|
926
|
+
if _coords and len(_coords) >= 2:
|
|
927
|
+
# 按 image_size 缩放 click_coords 到 OCR 空间
|
|
928
|
+
if image_size is not None:
|
|
929
|
+
iw, ih = float(image_size[0]), float(image_size[1])
|
|
930
|
+
now = float(screenshot.original_resolution.width)
|
|
931
|
+
noh = float(screenshot.original_resolution.height)
|
|
932
|
+
csx = iw / now if now > 0 else 1.0
|
|
933
|
+
csy = ih / noh if noh > 0 else 1.0
|
|
934
|
+
else:
|
|
935
|
+
csx, csy = 1.0, 1.0
|
|
936
|
+
cx, cy = _coords[0] * csx, _coords[1] * csy
|
|
937
|
+
# 构造 2×2 虚拟区域
|
|
938
|
+
click_area = (cx, cy, cx + 2.0, cy + 2.0)
|
|
939
|
+
# 找唯一包含该区域的卡片
|
|
940
|
+
matching_cards: list[int] = []
|
|
941
|
+
for i, raw in enumerate(raws):
|
|
942
|
+
if not raw or len(raw) < 4:
|
|
943
|
+
continue
|
|
944
|
+
rx1, ry1, rx2, ry2 = raw[0], raw[1], raw[2], raw[3]
|
|
945
|
+
if rx1 <= cx <= rx2 and ry1 <= cy <= ry2:
|
|
946
|
+
matching_cards.append(i)
|
|
947
|
+
if len(matching_cards) == 1:
|
|
948
|
+
click_source_card_idx = matching_cards[0]
|
|
949
|
+
|
|
950
|
+
for ci, raw in enumerate(raws):
|
|
951
|
+
if not raw or len(raw) < 4:
|
|
952
|
+
continue
|
|
953
|
+
tbb = (int(raw[0]), int(raw[1]), int(raw[2]), int(raw[3]))
|
|
954
|
+
|
|
955
|
+
# ── Step A: 找到简历卡片所属 band ──
|
|
956
|
+
# PRD §8.A:
|
|
957
|
+
# (0) 卡片完全在某个 band 内 → 归该 band
|
|
958
|
+
# (1) 卡片同时与 2 个 band 相交 → 归下方 band
|
|
959
|
+
band_idx: Optional[int] = None
|
|
960
|
+
if layout_opt is not None:
|
|
961
|
+
_sb_list = getattr(layout_opt, "speaker_bands", ())
|
|
962
|
+
_cy1, _cy2 = float(tbb[1]), float(tbb[3])
|
|
963
|
+
for _bi, (_b0, _b1) in enumerate(_sb_list):
|
|
964
|
+
if _cy2 > float(_b0) and _cy1 < float(_b1):
|
|
965
|
+
band_idx = _bi # 相交 → 候选;持续覆盖 → 最后一个(下方)胜出
|
|
966
|
+
|
|
967
|
+
matched_nickname: Optional[str] = None
|
|
968
|
+
|
|
969
|
+
# ── Step B: 在 band 内找第一行符合昵称要求的文字 ──
|
|
970
|
+
# PRD §7(2) 说明1:规则仅处理块与下方 speaker_band / 简历卡片 bbox 的相交(已去掉上方相交规则)
|
|
971
|
+
if band_idx is not None and band_idx >= 0:
|
|
972
|
+
# ── 当前 band 和下方 band / 下方卡片的 Y 范围 ──
|
|
973
|
+
_sb_list = getattr(layout_opt, "speaker_bands", ())
|
|
974
|
+
if band_idx >= len(_sb_list):
|
|
975
|
+
band_idx = None # 防御:band_idx 越界则跳过 Step B
|
|
976
|
+
else:
|
|
977
|
+
_band_y0, _band_y1 = float(_sb_list[band_idx][0]), float(_sb_list[band_idx][1])
|
|
978
|
+
|
|
979
|
+
_below_band_y0: Optional[float] = None
|
|
980
|
+
_below_band_y1: Optional[float] = None
|
|
981
|
+
if band_idx + 1 < len(_sb_list):
|
|
982
|
+
_bb_y0, _bb_y1 = _sb_list[band_idx + 1]
|
|
983
|
+
_below_band_y0 = float(_bb_y0)
|
|
984
|
+
_below_band_y1 = float(_bb_y1)
|
|
985
|
+
|
|
986
|
+
_below_card_y1: Optional[float] = None
|
|
987
|
+
_below_card_y2: Optional[float] = None
|
|
988
|
+
if ci + 1 < len(raws):
|
|
989
|
+
_next_raw = raws[ci + 1]
|
|
990
|
+
if _next_raw and len(_next_raw) >= 4:
|
|
991
|
+
_below_card_y1 = float(_next_raw[1])
|
|
992
|
+
_below_card_y2 = float(_next_raw[3])
|
|
993
|
+
|
|
994
|
+
matched_nickname: Optional[str] = None
|
|
995
|
+
best_y: float = float('inf')
|
|
996
|
+
for i, b in enumerate(blocks):
|
|
997
|
+
bb = b.bbox_xyxy
|
|
998
|
+
_by1, _by2 = float(bb[1]), float(bb[3])
|
|
999
|
+
|
|
1000
|
+
# ── 规则 (0):block 必须与当前 speaker_band 有 Y 交集 ──
|
|
1001
|
+
if not (_by2 > _band_y0 and _by1 < _band_y1):
|
|
1002
|
+
continue
|
|
1003
|
+
|
|
1004
|
+
# ── PRD §8 相交规则 (1)(2)(3) ──
|
|
1005
|
+
_inter_below_band = False
|
|
1006
|
+
if _below_band_y0 is not None:
|
|
1007
|
+
_inter_below_band = _by2 > _below_band_y0 and _by1 < _below_band_y1
|
|
1008
|
+
|
|
1009
|
+
_inter_below_card = False
|
|
1010
|
+
if _below_card_y1 is not None:
|
|
1011
|
+
_inter_below_card = _by2 > _below_card_y1 and _by1 < _below_card_y2
|
|
1012
|
+
|
|
1013
|
+
# 规则 (1):与下方 band 相交且不与下方 card 相交 → skip
|
|
1014
|
+
# (块位于下方 band 内部,但与该 band 的卡无交集,不属于当前卡)
|
|
1015
|
+
if _inter_below_band and not _inter_below_card:
|
|
1016
|
+
continue
|
|
1017
|
+
|
|
1018
|
+
# 规则 (2):与下方 band 和下方 card 同时相交 → skip
|
|
1019
|
+
# (块夹在 band 分界线和下方 card 顶线之间 → 属下方 band/card)
|
|
1020
|
+
if _inter_below_band and _inter_below_card:
|
|
1021
|
+
continue
|
|
1022
|
+
|
|
1023
|
+
# 规则 (3):不与下方 band 相交,但与下方 card 相交 → 属于当前 card
|
|
1024
|
+
# (隐含:不 continue)
|
|
1025
|
+
|
|
1026
|
+
if _should_skip_block(b):
|
|
1027
|
+
continue
|
|
1028
|
+
|
|
1029
|
+
is_fully_inside = _completely_inside(bb, tbb)
|
|
1030
|
+
# 排除完全在简历卡片 bbox 内的文字(正文)
|
|
1031
|
+
if is_fully_inside:
|
|
1032
|
+
continue
|
|
1033
|
+
|
|
1034
|
+
# ── 按距离选最优:取 y_bottom 最接近 band 上界的块 ──
|
|
1035
|
+
if _by2 < best_y:
|
|
1036
|
+
best_y = _by2
|
|
1037
|
+
matched_nickname = (b.text or "").strip()
|
|
1038
|
+
|
|
1039
|
+
# ── Step C: 向上找到上一行文字(跨 band) ──
|
|
1040
|
+
if matched_nickname is None:
|
|
1041
|
+
best_i: Optional[int] = None
|
|
1042
|
+
best_bottom: float = -1.0
|
|
1043
|
+
card_top = float(tbb[1])
|
|
1044
|
+
for i, b in enumerate(blocks):
|
|
1045
|
+
bcx = float(b.bbox_xyxy[3]) # block bottom
|
|
1046
|
+
if bcx >= card_top:
|
|
1047
|
+
continue
|
|
1048
|
+
if _should_skip_block(b):
|
|
1049
|
+
continue
|
|
1050
|
+
if bcx > best_bottom:
|
|
1051
|
+
best_bottom = bcx
|
|
1052
|
+
best_i = i
|
|
1053
|
+
|
|
1054
|
+
if best_i is not None:
|
|
1055
|
+
# PRD §B.1 C:距离检查——文字底部到卡片所属 speaker_band 顶部
|
|
1056
|
+
# 超过阈值(50px baseline × scale_w)则不作为昵称
|
|
1057
|
+
_use_as_nickname = True
|
|
1058
|
+
if band_idx is not None and band_idx >= 0:
|
|
1059
|
+
_sb_list = getattr(layout_opt, "speaker_bands", ())
|
|
1060
|
+
if band_idx < len(_sb_list):
|
|
1061
|
+
_band_y0 = float(_sb_list[band_idx][0])
|
|
1062
|
+
_gap = _band_y0 - best_bottom # positive → block above band
|
|
1063
|
+
_threshold = 50.0 * float(scale_w)
|
|
1064
|
+
if _gap > _threshold:
|
|
1065
|
+
_use_as_nickname = False
|
|
1066
|
+
if _use_as_nickname:
|
|
1067
|
+
matched_nickname = (blocks[best_i].text or "").strip()
|
|
1068
|
+
# PRD §7(2):「去除」——这行文字从所在区域内容里移除
|
|
1069
|
+
excluded_indices.add(best_i)
|
|
1070
|
+
|
|
1071
|
+
if matched_nickname:
|
|
1072
|
+
bindings.append(ResumeThumbBinding(thumb_bbox=tbb, nickname=matched_nickname))
|
|
1073
|
+
else:
|
|
1074
|
+
orphan += 1
|
|
1075
|
+
|
|
1076
|
+
return tuple(bindings), orphan, excluded_indices
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
def _merge_attrib_line_from_seed(
|
|
1080
|
+
blocks: list["TextBlock"],
|
|
1081
|
+
seed_idx: int,
|
|
1082
|
+
*,
|
|
1083
|
+
original_width: int,
|
|
1084
|
+
cfg: NicknameOcrConfig,
|
|
1085
|
+
median_height: float,
|
|
1086
|
+
) -> Optional[Tuple[str, tuple[int, int, int, int]]]:
|
|
1087
|
+
"""从 seed 块起按阅读序合并同桌昵称行,返回 (text, bbox)。"""
|
|
1088
|
+
ROW_TOL = 30
|
|
1089
|
+
anchor_y = _vertical_center(blocks[seed_idx])
|
|
1090
|
+
merged_blocks: list["TextBlock"] = []
|
|
1091
|
+
for j in range(seed_idx, len(blocks)):
|
|
1092
|
+
b = blocks[j]
|
|
1093
|
+
tb = b.text.strip()
|
|
1094
|
+
if not tb or b.confidence < cfg.min_confidence:
|
|
1095
|
+
break
|
|
1096
|
+
if not _is_on_left_half(b, original_width, cfg.screen_midline_ratio):
|
|
1097
|
+
break
|
|
1098
|
+
if abs(_vertical_center(b) - anchor_y) > ROW_TOL:
|
|
1099
|
+
break
|
|
1100
|
+
if _is_pure_time(tb) or _is_pure_digits(tb) or _is_chat_timestamp_or_divider(tb):
|
|
1101
|
+
break
|
|
1102
|
+
if _is_favorite_card_label(tb):
|
|
1103
|
+
break
|
|
1104
|
+
if _starts_with_system_prefix(tb, _ATTRIBUTION_PREFIX_DROPS):
|
|
1105
|
+
break
|
|
1106
|
+
if j > seed_idx and median_height > 2.0:
|
|
1107
|
+
if _block_height(b) > median_height * 1.12:
|
|
1108
|
+
break
|
|
1109
|
+
merged_blocks.append(b)
|
|
1110
|
+
if not merged_blocks:
|
|
1111
|
+
return None
|
|
1112
|
+
xs1 = min(b.bbox_xyxy[0] for b in merged_blocks)
|
|
1113
|
+
ys1 = min(b.bbox_xyxy[1] for b in merged_blocks)
|
|
1114
|
+
xs2 = max(b.bbox_xyxy[2] for b in merged_blocks)
|
|
1115
|
+
ys2 = max(b.bbox_xyxy[3] for b in merged_blocks)
|
|
1116
|
+
merged_text = "".join(b.text.strip() for b in merged_blocks)
|
|
1117
|
+
if not merged_text:
|
|
1118
|
+
return None
|
|
1119
|
+
return merged_text, (int(xs1), int(ys1), int(xs2), int(ys2))
|
|
1120
|
+
|
|
1121
|
+
|
|
1122
|
+
def _iter_first_attrib_seed_indices(
|
|
1123
|
+
blocks: list["TextBlock"],
|
|
1124
|
+
*,
|
|
1125
|
+
original_width: int,
|
|
1126
|
+
cfg: NicknameOcrConfig,
|
|
1127
|
+
median_height: float,
|
|
1128
|
+
avatar_layout: Any = None,
|
|
1129
|
+
) -> list[int]:
|
|
1130
|
+
"""阅读序下所有通过几何启发式的首归因 seed 索引(供过滤择优)。"""
|
|
1131
|
+
seeds: list[int] = []
|
|
1132
|
+
for idx, b in enumerate(blocks):
|
|
1133
|
+
text = b.text.strip()
|
|
1134
|
+
if not text or b.confidence < cfg.min_confidence:
|
|
1135
|
+
continue
|
|
1136
|
+
if _is_pure_time(text) or _is_pure_digits(text) or _is_chat_timestamp_or_divider(text):
|
|
1137
|
+
continue
|
|
1138
|
+
if _is_favorite_card_label(text):
|
|
1139
|
+
continue
|
|
1140
|
+
if _starts_with_system_prefix(text, _ATTRIBUTION_PREFIX_DROPS):
|
|
1141
|
+
continue
|
|
1142
|
+
if not _is_on_left_half(b, original_width, cfg.screen_midline_ratio):
|
|
1143
|
+
continue
|
|
1144
|
+
if not _is_avatar_column_nickname_row(b, original_width, cfg, avatar_layout):
|
|
1145
|
+
continue
|
|
1146
|
+
seeds.append(idx)
|
|
1147
|
+
return seeds
|
|
1148
|
+
|
|
1149
|
+
|
|
1150
|
+
def first_attrib_verbatim_display_line_and_bbox(
|
|
1151
|
+
ocr_result: "OcrPageResult",
|
|
1152
|
+
screenshot: "Screenshot",
|
|
1153
|
+
*,
|
|
1154
|
+
config: Optional[NicknameOcrConfig] = None,
|
|
1155
|
+
png_path: Optional["Path"] = None,
|
|
1156
|
+
) -> Optional[Tuple[str, tuple[int, int, int, int]]]:
|
|
1157
|
+
"""左栏首条合格昵称:几何候选按阅读序过滤,**第一个**通过 ``avatar_roi_pass`` 的才采纳。"""
|
|
1158
|
+
cfg = config or NicknameOcrConfig()
|
|
1159
|
+
assert screenshot.type == "chat_message", (
|
|
1160
|
+
"first_attrib_verbatim_display_line expects chat_message type; got "
|
|
1161
|
+
f"{screenshot.type!r}"
|
|
1162
|
+
)
|
|
1163
|
+
assert ocr_result.screenshot_id == screenshot.screenshot_id, (
|
|
1164
|
+
f"screenshot_id mismatch ocr_result={ocr_result.screenshot_id!r} vs "
|
|
1165
|
+
f"screenshot={screenshot.screenshot_id!r}"
|
|
1166
|
+
)
|
|
1167
|
+
blocks = list(ocr_result.text_blocks)
|
|
1168
|
+
if not blocks:
|
|
1169
|
+
return None
|
|
1170
|
+
|
|
1171
|
+
original_width = screenshot.original_resolution.width
|
|
1172
|
+
heights = [_block_height(b) for b in blocks if _block_height(b) > 0]
|
|
1173
|
+
median_height = float(median(heights)) if heights else 1.0
|
|
1174
|
+
|
|
1175
|
+
bgr_guard = None
|
|
1176
|
+
avatar_layout = _load_left_avatar_layout(png_path, screenshot)
|
|
1177
|
+
scale_w = float(original_width) / 1080.0
|
|
1178
|
+
if png_path is not None:
|
|
1179
|
+
try:
|
|
1180
|
+
from processor.nickname_avatar_guard import load_png_bgr
|
|
1181
|
+
|
|
1182
|
+
bgr_guard = load_png_bgr(str(png_path))
|
|
1183
|
+
except ImportError:
|
|
1184
|
+
bgr_guard = None
|
|
1185
|
+
|
|
1186
|
+
for seed_idx in _iter_first_attrib_seed_indices(
|
|
1187
|
+
blocks,
|
|
1188
|
+
original_width=original_width,
|
|
1189
|
+
cfg=cfg,
|
|
1190
|
+
median_height=median_height,
|
|
1191
|
+
avatar_layout=avatar_layout,
|
|
1192
|
+
):
|
|
1193
|
+
merged = _merge_attrib_line_from_seed(
|
|
1194
|
+
blocks,
|
|
1195
|
+
seed_idx,
|
|
1196
|
+
original_width=original_width,
|
|
1197
|
+
cfg=cfg,
|
|
1198
|
+
median_height=median_height,
|
|
1199
|
+
)
|
|
1200
|
+
if merged is None:
|
|
1201
|
+
continue
|
|
1202
|
+
merged_text, bbox = merged
|
|
1203
|
+
seed_block = blocks[seed_idx]
|
|
1204
|
+
if not _nickname_passes_avatar_guard(
|
|
1205
|
+
bgr_guard=bgr_guard,
|
|
1206
|
+
nick_block=seed_block,
|
|
1207
|
+
avatar_layout=avatar_layout,
|
|
1208
|
+
scale_w=scale_w,
|
|
1209
|
+
original_width=original_width,
|
|
1210
|
+
config=cfg,
|
|
1211
|
+
):
|
|
1212
|
+
continue
|
|
1213
|
+
return merged_text, bbox
|
|
1214
|
+
|
|
1215
|
+
return None
|
|
1216
|
+
|
|
1217
|
+
|
|
1218
|
+
def first_attrib_verbatim_display_line(
|
|
1219
|
+
ocr_result: "OcrPageResult",
|
|
1220
|
+
screenshot: "Screenshot",
|
|
1221
|
+
*,
|
|
1222
|
+
config: Optional[NicknameOcrConfig] = None,
|
|
1223
|
+
png_path: Optional["Path"] = None,
|
|
1224
|
+
) -> Optional[str]:
|
|
1225
|
+
"""左栏第一条「归因展示昵称」行:拼接 OCR Reading order 中同桌邻块原文。
|
|
1226
|
+
|
|
1227
|
+
与 ``extract_nicknames``(气泡归因统计)并行;**不入库**侧的微信昵称是否采纳还应在
|
|
1228
|
+
影像层校验左侧头像 ROI(processor 中与 ``nickname_avatar_guard`` 串联)。
|
|
1229
|
+
"""
|
|
1230
|
+
pair = first_attrib_verbatim_display_line_and_bbox(
|
|
1231
|
+
ocr_result, screenshot, config=config, png_path=png_path
|
|
1232
|
+
)
|
|
1233
|
+
return pair[0] if pair else None
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
def _classify(
|
|
1237
|
+
block: "TextBlock",
|
|
1238
|
+
*,
|
|
1239
|
+
original_width: int,
|
|
1240
|
+
config: NicknameOcrConfig,
|
|
1241
|
+
median_height: float,
|
|
1242
|
+
avatar_layout: Any = None,
|
|
1243
|
+
) -> str:
|
|
1244
|
+
"""Return ``"nickname_candidate"`` / ``"bubble_text"`` / ``"drop"``."""
|
|
1245
|
+
text = block.text.strip()
|
|
1246
|
+
if not text:
|
|
1247
|
+
return "drop"
|
|
1248
|
+
if block.confidence < config.min_confidence:
|
|
1249
|
+
return "drop"
|
|
1250
|
+
if (
|
|
1251
|
+
_is_pure_time(text)
|
|
1252
|
+
or _is_pure_digits(text)
|
|
1253
|
+
or _is_chat_timestamp_or_divider(text)
|
|
1254
|
+
or _is_favorite_card_label(text)
|
|
1255
|
+
):
|
|
1256
|
+
return "drop"
|
|
1257
|
+
|
|
1258
|
+
is_left = _is_on_left_half(block, original_width, config.screen_midline_ratio)
|
|
1259
|
+
|
|
1260
|
+
if (
|
|
1261
|
+
len(text) <= config.nickname_max_chars
|
|
1262
|
+
and not _starts_with_system_prefix(text, config.system_prefix_drops)
|
|
1263
|
+
and is_left
|
|
1264
|
+
and _is_avatar_column_nickname_row(
|
|
1265
|
+
block, original_width, config, avatar_layout
|
|
1266
|
+
)
|
|
1267
|
+
):
|
|
1268
|
+
return "nickname_candidate"
|
|
1269
|
+
|
|
1270
|
+
return "bubble_text"
|
|
1271
|
+
|
|
1272
|
+
|
|
1273
|
+
def _merge_two_text_blocks(a: TextBlock, b: TextBlock, *, joiner: str) -> TextBlock:
|
|
1274
|
+
ax1, ay1, ax2, ay2 = a.bbox_xyxy
|
|
1275
|
+
bx1, by1, bx2, by2 = b.bbox_xyxy
|
|
1276
|
+
u = (min(ax1, bx1), min(ay1, by1), max(ax2, bx2), max(ay2, by2))
|
|
1277
|
+
ta = a.text.strip()
|
|
1278
|
+
tb = b.text.strip()
|
|
1279
|
+
if joiner == "\n":
|
|
1280
|
+
text = f"{ta}\n{tb}" if ta else tb
|
|
1281
|
+
else:
|
|
1282
|
+
text = f"{ta}{tb}" if ta else tb
|
|
1283
|
+
conf = min(a.confidence, b.confidence)
|
|
1284
|
+
return TextBlock(
|
|
1285
|
+
text=text,
|
|
1286
|
+
bbox_xyxy=tuple(int(x) for x in u),
|
|
1287
|
+
confidence=conf,
|
|
1288
|
+
line_read_index=a.line_read_index or b.line_read_index,
|
|
1289
|
+
paragraph_read_index=a.paragraph_read_index or b.paragraph_read_index,
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
|
|
1293
|
+
def _horizontal_gap_left_to_right(a_xyxy: tuple[int, int, int, int], b_xyxy: tuple[int, int, int, int]) -> int:
|
|
1294
|
+
"""Non-negative gap when ``a`` is left of ``b`` (``a.x2 <= b.x1``); else ``0``."""
|
|
1295
|
+
ax2 = a_xyxy[2]
|
|
1296
|
+
bx1 = b_xyxy[0]
|
|
1297
|
+
if ax2 <= bx1:
|
|
1298
|
+
return bx1 - ax2
|
|
1299
|
+
return 0
|
|
1300
|
+
|
|
1301
|
+
|
|
1302
|
+
def _vertical_axis_overlap_ratio(a_xyxy: tuple[int, int, int, int], b_xyxy: tuple[int, int, int, int]) -> float:
|
|
1303
|
+
ay1, ay2 = a_xyxy[1], a_xyxy[3]
|
|
1304
|
+
by1, by2 = b_xyxy[1], b_xyxy[3]
|
|
1305
|
+
inter = min(ay2, by2) - max(ay1, by1)
|
|
1306
|
+
if inter <= 0:
|
|
1307
|
+
return 0.0
|
|
1308
|
+
ha = max(1, ay2 - ay1)
|
|
1309
|
+
hb = max(1, by2 - by1)
|
|
1310
|
+
return float(inter) / float(min(ha, hb))
|
|
1311
|
+
|
|
1312
|
+
|
|
1313
|
+
def _mergeable_bubble_fragments(
|
|
1314
|
+
a: TextBlock,
|
|
1315
|
+
b: TextBlock,
|
|
1316
|
+
*,
|
|
1317
|
+
original_width: int,
|
|
1318
|
+
median_h: float,
|
|
1319
|
+
config: NicknameOcrConfig,
|
|
1320
|
+
) -> Optional[str]:
|
|
1321
|
+
"""PRD §7(3):返回 ``''``(水平拼接)或 ``'\\n'``(换行拼接),不可合并则 ``None``。"""
|
|
1322
|
+
if _is_on_left_half(a, original_width, config.screen_midline_ratio) != _is_on_left_half(
|
|
1323
|
+
b,
|
|
1324
|
+
original_width,
|
|
1325
|
+
config.screen_midline_ratio,
|
|
1326
|
+
):
|
|
1327
|
+
return None
|
|
1328
|
+
if a.confidence < config.min_confidence or b.confidence < config.min_confidence:
|
|
1329
|
+
return None
|
|
1330
|
+
|
|
1331
|
+
y_tol = float(config.bubble_fragment_same_line_y_tol_px)
|
|
1332
|
+
d_centers = abs(_vertical_center(a) - _vertical_center(b))
|
|
1333
|
+
if d_centers <= y_tol:
|
|
1334
|
+
gap = _horizontal_gap_left_to_right(a.bbox_xyxy, b.bbox_xyxy)
|
|
1335
|
+
max_gap = max(
|
|
1336
|
+
float(config.bubble_fragment_horizontal_gap_max_px),
|
|
1337
|
+
median_h * 1.15,
|
|
1338
|
+
)
|
|
1339
|
+
if gap <= max_gap and (
|
|
1340
|
+
_vertical_axis_overlap_ratio(a.bbox_xyxy, b.bbox_xyxy) >= 0.22 or gap == 0
|
|
1341
|
+
):
|
|
1342
|
+
return ""
|
|
1343
|
+
return None
|
|
1344
|
+
|
|
1345
|
+
return None
|
|
1346
|
+
def _prd_merge_adjacent_bubble_text_blocks(
|
|
1347
|
+
blocks: list[TextBlock],
|
|
1348
|
+
*,
|
|
1349
|
+
original_width: int,
|
|
1350
|
+
config: NicknameOcrConfig,
|
|
1351
|
+
) -> list[TextBlock]:
|
|
1352
|
+
"""PRD §7(3):在进入 STEP1 分类主循环前,合并同一气泡的 Paddle 碎片(仅 ``bubble_text`` 之间)。"""
|
|
1353
|
+
if len(blocks) < 2:
|
|
1354
|
+
return blocks
|
|
1355
|
+
heights = [_block_height(b) for b in blocks if _block_height(b) > 0]
|
|
1356
|
+
median_h = float(median(heights)) if heights else 1.0
|
|
1357
|
+
classes = [
|
|
1358
|
+
_classify(
|
|
1359
|
+
b,
|
|
1360
|
+
original_width=original_width,
|
|
1361
|
+
config=config,
|
|
1362
|
+
median_height=median_h,
|
|
1363
|
+
)
|
|
1364
|
+
for b in blocks
|
|
1365
|
+
]
|
|
1366
|
+
out: list[TextBlock] = []
|
|
1367
|
+
i = 0
|
|
1368
|
+
n = len(blocks)
|
|
1369
|
+
while i < n:
|
|
1370
|
+
if classes[i] != "bubble_text":
|
|
1371
|
+
out.append(blocks[i])
|
|
1372
|
+
i += 1
|
|
1373
|
+
continue
|
|
1374
|
+
cur = blocks[i]
|
|
1375
|
+
j = i + 1
|
|
1376
|
+
while j < n:
|
|
1377
|
+
if classes[j] != "bubble_text":
|
|
1378
|
+
break
|
|
1379
|
+
joiner = _mergeable_bubble_fragments(
|
|
1380
|
+
cur,
|
|
1381
|
+
blocks[j],
|
|
1382
|
+
original_width=original_width,
|
|
1383
|
+
median_h=median_h,
|
|
1384
|
+
config=config,
|
|
1385
|
+
)
|
|
1386
|
+
if joiner is None:
|
|
1387
|
+
break
|
|
1388
|
+
cur = _merge_two_text_blocks(cur, blocks[j], joiner=joiner)
|
|
1389
|
+
j += 1
|
|
1390
|
+
out.append(cur)
|
|
1391
|
+
i = j
|
|
1392
|
+
return out
|
|
1393
|
+
|
|
1394
|
+
|
|
1395
|
+
def _join_segment_body_lines(blocks: list["TextBlock"], indices: tuple[int, ...]) -> str:
|
|
1396
|
+
parts = [blocks[i].text.strip() for i in indices]
|
|
1397
|
+
return "\n".join(p for p in parts if p)
|
|
1398
|
+
|
|
1399
|
+
|
|
1400
|
+
def _speaker_body_segments_from_layout(
|
|
1401
|
+
blocks: list["TextBlock"],
|
|
1402
|
+
classes: list[str],
|
|
1403
|
+
nicknames: tuple[NicknameExtraction, ...],
|
|
1404
|
+
claimed_indices: set[int],
|
|
1405
|
+
*,
|
|
1406
|
+
original_width: int,
|
|
1407
|
+
config: NicknameOcrConfig,
|
|
1408
|
+
) -> tuple[SpeakerBodySegment, ...]:
|
|
1409
|
+
"""按阅读序构造本页 ``SpeakerBodySegment``(昵称块本身不占正文)。"""
|
|
1410
|
+
min_len = max(0, int(config.min_bubble_body_chars))
|
|
1411
|
+
segments: list[tuple[int, SpeakerBodySegment]] = []
|
|
1412
|
+
|
|
1413
|
+
for n in nicknames:
|
|
1414
|
+
body = _join_segment_body_lines(blocks, n.following_block_indices)
|
|
1415
|
+
stripped = body.strip()
|
|
1416
|
+
if not stripped:
|
|
1417
|
+
continue
|
|
1418
|
+
if len(stripped) < min_len:
|
|
1419
|
+
continue
|
|
1420
|
+
idx = int(n.nickname_block_index)
|
|
1421
|
+
if idx < 0:
|
|
1422
|
+
continue
|
|
1423
|
+
segments.append(
|
|
1424
|
+
(
|
|
1425
|
+
idx,
|
|
1426
|
+
SpeakerBodySegment(
|
|
1427
|
+
speaker_display=n.nickname.strip(),
|
|
1428
|
+
body_text=body,
|
|
1429
|
+
orphan=False,
|
|
1430
|
+
block_index=idx,
|
|
1431
|
+
),
|
|
1432
|
+
)
|
|
1433
|
+
)
|
|
1434
|
+
|
|
1435
|
+
for idx, klass in enumerate(classes):
|
|
1436
|
+
if klass != "bubble_text":
|
|
1437
|
+
continue
|
|
1438
|
+
if idx in claimed_indices:
|
|
1439
|
+
continue
|
|
1440
|
+
text = blocks[idx].text.strip()
|
|
1441
|
+
if len(text) < min_len:
|
|
1442
|
+
continue
|
|
1443
|
+
if _is_on_left_half(blocks[idx], original_width, config.screen_midline_ratio):
|
|
1444
|
+
segments.append(
|
|
1445
|
+
(
|
|
1446
|
+
idx,
|
|
1447
|
+
SpeakerBodySegment(
|
|
1448
|
+
speaker_display="",
|
|
1449
|
+
body_text=text,
|
|
1450
|
+
orphan=True,
|
|
1451
|
+
block_index=idx,
|
|
1452
|
+
),
|
|
1453
|
+
)
|
|
1454
|
+
)
|
|
1455
|
+
else:
|
|
1456
|
+
segments.append(
|
|
1457
|
+
(
|
|
1458
|
+
idx,
|
|
1459
|
+
SpeakerBodySegment(
|
|
1460
|
+
speaker_display=_OWNER_SPEAKER_LABEL,
|
|
1461
|
+
body_text=text,
|
|
1462
|
+
orphan=False,
|
|
1463
|
+
block_index=idx,
|
|
1464
|
+
),
|
|
1465
|
+
)
|
|
1466
|
+
)
|
|
1467
|
+
|
|
1468
|
+
segments.sort(key=lambda t: t[0])
|
|
1469
|
+
return tuple(s for _, s in segments)
|
|
1470
|
+
|
|
1471
|
+
|
|
1472
|
+
# ============================================================================
|
|
1473
|
+
# Public API
|
|
1474
|
+
# ============================================================================
|
|
1475
|
+
|
|
1476
|
+
|
|
1477
|
+
def _build_ocr_debug_dict(
|
|
1478
|
+
screenshot: "Screenshot",
|
|
1479
|
+
blocks: list["TextBlock"],
|
|
1480
|
+
classes: list[str],
|
|
1481
|
+
layout_opt: Any,
|
|
1482
|
+
scale_w: float,
|
|
1483
|
+
image_size: tuple[int, int],
|
|
1484
|
+
) -> dict[str, Any]:
|
|
1485
|
+
"""构建单帧 OCR 归因全量调试数据,供 ``debug_session_derived.json`` 落地。
|
|
1486
|
+
|
|
1487
|
+
所有坐标统一使用 *OCR 实际图像空间*(``image_size``),与
|
|
1488
|
+
``metadata.json`` 中的手机原生分辨率(``screenshot.original_resolution``)
|
|
1489
|
+
可能不同(采集端可能按 ``renxin.yaml`` 缩放后输出)。
|
|
1490
|
+
"""
|
|
1491
|
+
iw, ih = int(image_size[0]), int(image_size[1])
|
|
1492
|
+
now = float(screenshot.original_resolution.width)
|
|
1493
|
+
noh = float(screenshot.original_resolution.height)
|
|
1494
|
+
sx = iw / now if now > 0 else 1.0
|
|
1495
|
+
sy = ih / noh if noh > 0 else 1.0
|
|
1496
|
+
# 所有来自 metadata(手机原生空间)的坐标统一缩放到 OCR 图像空间
|
|
1497
|
+
_remap_bbox = (
|
|
1498
|
+
lambda raw: [int(raw[0] * sx), int(raw[1] * sy),
|
|
1499
|
+
int(raw[2] * sx), int(raw[3] * sy)]
|
|
1500
|
+
) if (abs(sx - 1.0) > 0.0001 or abs(sy - 1.0) > 0.0001) else (
|
|
1501
|
+
lambda raw: list(map(int, raw))
|
|
1502
|
+
)
|
|
1503
|
+
_click_ctx = getattr(screenshot, "click_context", None)
|
|
1504
|
+
user_click_area: Any = None
|
|
1505
|
+
if _click_ctx is not None:
|
|
1506
|
+
_coords = getattr(_click_ctx, "click_coords", None)
|
|
1507
|
+
if _coords and len(_coords) >= 2:
|
|
1508
|
+
_cx = int(_coords[0] * sx)
|
|
1509
|
+
_cy = int(_coords[1] * sy)
|
|
1510
|
+
user_click_area = [_cx, _cy, _cx + 2, _cy + 2]
|
|
1511
|
+
debug: dict[str, Any] = {
|
|
1512
|
+
"screen_size": {"width": iw, "height": ih},
|
|
1513
|
+
"scale_w": round(float(iw) / 1080.0, 4),
|
|
1514
|
+
"user_click_area_scaling": user_click_area,
|
|
1515
|
+
"resume_thumb_bboxes_scaling": [
|
|
1516
|
+
_remap_bbox(raw)
|
|
1517
|
+
for raw in (getattr(screenshot, "resume_thumb_bboxes", None) or [])
|
|
1518
|
+
if raw and len(raw) >= 4
|
|
1519
|
+
],
|
|
1520
|
+
# 经过实际验证,简历卡片正常高度为 260–279 px(占 87%,中位数 273)
|
|
1521
|
+
}
|
|
1522
|
+
if layout_opt is not None:
|
|
1523
|
+
_sb_debug = getattr(layout_opt, "speaker_bands", ())
|
|
1524
|
+
debug["orphan_top_band"] = (
|
|
1525
|
+
[int(ot[0]), int(ot[1])]
|
|
1526
|
+
if (ot := layout_opt.orphan_top_band) is not None
|
|
1527
|
+
else None
|
|
1528
|
+
)
|
|
1529
|
+
debug["speaker_bands"] = [
|
|
1530
|
+
[int(b0), int(b1)] for b0, b1 in _sb_debug
|
|
1531
|
+
]
|
|
1532
|
+
cb = layout_opt.content_bounds
|
|
1533
|
+
debug["content_bounds"] = {
|
|
1534
|
+
"y_top": int(cb.y_top),
|
|
1535
|
+
"y_bottom_excl": int(cb.y_bottom_excl),
|
|
1536
|
+
}
|
|
1537
|
+
debug["no_avatars_all_orphan"] = bool(
|
|
1538
|
+
getattr(layout_opt, "no_avatars_all_orphan", False)
|
|
1539
|
+
)
|
|
1540
|
+
block_items: list[dict[str, Any]] = []
|
|
1541
|
+
for i, b in enumerate(blocks):
|
|
1542
|
+
bb = b.bbox_xyxy
|
|
1543
|
+
_by1, _by2 = float(bb[1]), float(bb[3])
|
|
1544
|
+
bi = None
|
|
1545
|
+
for _ix, (_b0, _b1) in enumerate(_sb_debug):
|
|
1546
|
+
if _by2 > float(_b0) and _by1 < float(_b1):
|
|
1547
|
+
bi = _ix
|
|
1548
|
+
break
|
|
1549
|
+
block_items.append({
|
|
1550
|
+
"bbox_xyxy": list(map(int, bb)),
|
|
1551
|
+
"text": (b.text or "").strip(),
|
|
1552
|
+
"confidence": round(float(b.confidence), 4),
|
|
1553
|
+
"class": classes[i] if i < len(classes) else "unknown",
|
|
1554
|
+
"band": bi,
|
|
1555
|
+
})
|
|
1556
|
+
debug["blocks"] = block_items
|
|
1557
|
+
else:
|
|
1558
|
+
# 无 layout 时仅输出块信息(无 band)
|
|
1559
|
+
debug["orphan_top_band"] = None
|
|
1560
|
+
debug["speaker_bands"] = []
|
|
1561
|
+
debug["content_bounds"] = None
|
|
1562
|
+
debug["no_avatars_all_orphan"] = True
|
|
1563
|
+
block_items: list[dict[str, Any]] = []
|
|
1564
|
+
for i, b in enumerate(blocks):
|
|
1565
|
+
bb = b.bbox_xyxy
|
|
1566
|
+
block_items.append({
|
|
1567
|
+
"bbox_xyxy": list(map(int, bb)),
|
|
1568
|
+
"text": (b.text or "").strip(),
|
|
1569
|
+
"confidence": round(float(b.confidence), 4),
|
|
1570
|
+
"class": classes[i] if i < len(classes) else "unknown",
|
|
1571
|
+
"band": None,
|
|
1572
|
+
})
|
|
1573
|
+
debug["blocks"] = block_items
|
|
1574
|
+
return debug
|
|
1575
|
+
|
|
1576
|
+
|
|
1577
|
+
def extract_nicknames(
|
|
1578
|
+
ocr_result: "OcrPageResult",
|
|
1579
|
+
screenshot: "Screenshot",
|
|
1580
|
+
*,
|
|
1581
|
+
config: Optional[NicknameOcrConfig] = None,
|
|
1582
|
+
png_path: Optional[Path] = None,
|
|
1583
|
+
) -> ChatSpeakerAttribution:
|
|
1584
|
+
"""Simplest viable nickname extraction from one chat-frame OCR result.
|
|
1585
|
+
|
|
1586
|
+
Preconditions:
|
|
1587
|
+
- ``screenshot.type == "chat_message"`` (caller must enforce;
|
|
1588
|
+
we assert).
|
|
1589
|
+
- ``ocr_result.screenshot_id == screenshot.screenshot_id``
|
|
1590
|
+
(we assert — defensive: catches accidental list mismatches
|
|
1591
|
+
in the caller's loop).
|
|
1592
|
+
|
|
1593
|
+
Returns:
|
|
1594
|
+
A frozen :class:`ChatSpeakerAttribution`(计数与段落经 PRD §7 后处理,见
|
|
1595
|
+
:func:`_finalize_prd_attribution`)。
|
|
1596
|
+
|
|
1597
|
+
Side effects / I/O:
|
|
1598
|
+
- 当 ``NicknameOcrConfig.use_prd_chat_vertical_bands`` 为真且提供
|
|
1599
|
+
``png_path`` 时会读 PNG 做术语§5 纵向筒分段(否则退化为 OCR 间隙启发式)。
|
|
1600
|
+
- 仍为单帧纯函数语义:不写外部存储、不写日志。
|
|
1601
|
+
"""
|
|
1602
|
+
assert screenshot.type == "chat_message", (
|
|
1603
|
+
f"extract_nicknames expects chat_message type; got "
|
|
1604
|
+
f"{screenshot.type!r} for screenshot_id={screenshot.screenshot_id!r}"
|
|
1605
|
+
)
|
|
1606
|
+
assert ocr_result.screenshot_id == screenshot.screenshot_id, (
|
|
1607
|
+
f"screenshot_id mismatch: ocr_result={ocr_result.screenshot_id!r} "
|
|
1608
|
+
f"vs screenshot={screenshot.screenshot_id!r}"
|
|
1609
|
+
)
|
|
1610
|
+
|
|
1611
|
+
cfg = config or NicknameOcrConfig()
|
|
1612
|
+
blocks = list(ocr_result.text_blocks)
|
|
1613
|
+
|
|
1614
|
+
if not blocks:
|
|
1615
|
+
return ChatSpeakerAttribution(
|
|
1616
|
+
screenshot_id=screenshot.screenshot_id,
|
|
1617
|
+
resume_thumb_orphans=0,
|
|
1618
|
+
speaker_body_segments=(),
|
|
1619
|
+
)
|
|
1620
|
+
|
|
1621
|
+
original_width = screenshot.original_resolution.width
|
|
1622
|
+
scale_w = float(original_width) / 1080.0
|
|
1623
|
+
layout_opt = _load_prd_layout_optional(png_path, screenshot, config=cfg)
|
|
1624
|
+
avatar_layout = _load_left_avatar_layout(png_path, screenshot)
|
|
1625
|
+
|
|
1626
|
+
# ── y 位置门控:丢弃聊天内容区上界以上 / 下界以下的 OCR 文本块 ──
|
|
1627
|
+
if layout_opt is not None:
|
|
1628
|
+
cb = layout_opt.content_bounds
|
|
1629
|
+
blocks = [
|
|
1630
|
+
b
|
|
1631
|
+
for b in blocks
|
|
1632
|
+
if cb.y_top <= _vertical_center(b) < cb.y_bottom_excl
|
|
1633
|
+
]
|
|
1634
|
+
if not blocks:
|
|
1635
|
+
return ChatSpeakerAttribution(
|
|
1636
|
+
screenshot_id=screenshot.screenshot_id,
|
|
1637
|
+
resume_thumb_orphans=0,
|
|
1638
|
+
speaker_body_segments=(),
|
|
1639
|
+
)
|
|
1640
|
+
|
|
1641
|
+
blocks = _prd_merge_adjacent_bubble_text_blocks(
|
|
1642
|
+
blocks,
|
|
1643
|
+
original_width=original_width,
|
|
1644
|
+
config=cfg,
|
|
1645
|
+
)
|
|
1646
|
+
if not blocks:
|
|
1647
|
+
return ChatSpeakerAttribution(
|
|
1648
|
+
screenshot_id=screenshot.screenshot_id,
|
|
1649
|
+
resume_thumb_orphans=0,
|
|
1650
|
+
speaker_body_segments=(),
|
|
1651
|
+
)
|
|
1652
|
+
|
|
1653
|
+
heights = [_block_height(b) for b in blocks if _block_height(b) > 0]
|
|
1654
|
+
median_height = float(median(heights)) if heights else 1.0
|
|
1655
|
+
|
|
1656
|
+
classes = [
|
|
1657
|
+
_classify(
|
|
1658
|
+
b,
|
|
1659
|
+
original_width=original_width,
|
|
1660
|
+
config=cfg,
|
|
1661
|
+
median_height=median_height,
|
|
1662
|
+
avatar_layout=avatar_layout,
|
|
1663
|
+
)
|
|
1664
|
+
for b in blocks
|
|
1665
|
+
]
|
|
1666
|
+
_apply_resume_thumb_block_mask(
|
|
1667
|
+
blocks, classes, screenshot,
|
|
1668
|
+
original_width=original_width,
|
|
1669
|
+
screen_midline_ratio=cfg.screen_midline_ratio,
|
|
1670
|
+
)
|
|
1671
|
+
_apply_prd_nickname_bubble_separation(
|
|
1672
|
+
blocks,
|
|
1673
|
+
classes,
|
|
1674
|
+
original_width=original_width,
|
|
1675
|
+
config=cfg,
|
|
1676
|
+
median_height=median_height,
|
|
1677
|
+
)
|
|
1678
|
+
|
|
1679
|
+
if layout_opt is not None and getattr(layout_opt, "no_avatars_all_orphan", False):
|
|
1680
|
+
bodies: list[str] = []
|
|
1681
|
+
for i, b in enumerate(blocks):
|
|
1682
|
+
if classes[i] == "drop":
|
|
1683
|
+
continue
|
|
1684
|
+
if b.confidence < cfg.min_confidence:
|
|
1685
|
+
continue
|
|
1686
|
+
stripped = _strip_placeholder_lines_from_body(b.text.strip())
|
|
1687
|
+
if stripped:
|
|
1688
|
+
bodies.append(stripped)
|
|
1689
|
+
merged = "\n".join(bodies).strip()
|
|
1690
|
+
claimed_indices: set[int] = set()
|
|
1691
|
+
thumb_bindings, thumb_o, _thumb_excluded = _resume_thumb_bindings_and_orphans(
|
|
1692
|
+
screenshot,
|
|
1693
|
+
blocks,
|
|
1694
|
+
classes,
|
|
1695
|
+
claimed_indices,
|
|
1696
|
+
(),
|
|
1697
|
+
original_width=original_width,
|
|
1698
|
+
config=cfg,
|
|
1699
|
+
layout_opt=layout_opt,
|
|
1700
|
+
scale_w=scale_w,
|
|
1701
|
+
image_size=ocr_result.original_size,
|
|
1702
|
+
)
|
|
1703
|
+
for ei in _thumb_excluded:
|
|
1704
|
+
if classes[ei] != "drop":
|
|
1705
|
+
classes[ei] = "drop"
|
|
1706
|
+
if not merged:
|
|
1707
|
+
debug_d = _build_ocr_debug_dict(
|
|
1708
|
+
screenshot, blocks, classes, layout_opt, scale_w,
|
|
1709
|
+
ocr_result.original_size,
|
|
1710
|
+
)
|
|
1711
|
+
return ChatSpeakerAttribution(
|
|
1712
|
+
screenshot_id=screenshot.screenshot_id,
|
|
1713
|
+
resume_thumb_orphans=thumb_o,
|
|
1714
|
+
resume_thumb_bindings=thumb_bindings,
|
|
1715
|
+
speaker_body_segments=(),
|
|
1716
|
+
ocr_debug=debug_d,
|
|
1717
|
+
)
|
|
1718
|
+
unattributed_seg = SpeakerBodySegment(
|
|
1719
|
+
speaker_display=SPEAKER_DISPLAY_UNATTRIBUTED,
|
|
1720
|
+
body_text=merged,
|
|
1721
|
+
orphan=True,
|
|
1722
|
+
)
|
|
1723
|
+
no_av_attr = ChatSpeakerAttribution(
|
|
1724
|
+
screenshot_id=screenshot.screenshot_id,
|
|
1725
|
+
nicknames=(),
|
|
1726
|
+
total_bubbles=0,
|
|
1727
|
+
bubbles_with_nickname=0,
|
|
1728
|
+
bubbles_owner=0,
|
|
1729
|
+
bubbles_unknown=0,
|
|
1730
|
+
resume_thumb_orphans=thumb_o,
|
|
1731
|
+
resume_thumb_bindings=thumb_bindings,
|
|
1732
|
+
speaker_body_segments=(unattributed_seg,),
|
|
1733
|
+
)
|
|
1734
|
+
debug_d = _build_ocr_debug_dict(
|
|
1735
|
+
screenshot, blocks, classes, layout_opt, scale_w,
|
|
1736
|
+
ocr_result.original_size,
|
|
1737
|
+
)
|
|
1738
|
+
no_av_attr_with_debug = dataclass_replace(no_av_attr, ocr_debug=debug_d)
|
|
1739
|
+
result = _finalize_prd_attribution(no_av_attr_with_debug, config=cfg)
|
|
1740
|
+
# _finalize_prd_attribution 内部构造新实例,debug 需显式带过
|
|
1741
|
+
return dataclass_replace(result, ocr_debug=debug_d)
|
|
1742
|
+
|
|
1743
|
+
layout_active = (
|
|
1744
|
+
layout_opt is not None and not getattr(layout_opt, "no_avatars_all_orphan", False)
|
|
1745
|
+
)
|
|
1746
|
+
nicknames_list: list[NicknameExtraction] = []
|
|
1747
|
+
claimed_indices = set[int]()
|
|
1748
|
+
bgr_guard = None
|
|
1749
|
+
if png_path is not None:
|
|
1750
|
+
try:
|
|
1751
|
+
from processor.nickname_avatar_guard import load_png_bgr
|
|
1752
|
+
|
|
1753
|
+
bgr_guard = load_png_bgr(str(png_path))
|
|
1754
|
+
except ImportError:
|
|
1755
|
+
bgr_guard = None
|
|
1756
|
+
|
|
1757
|
+
for idx, klass in enumerate(classes):
|
|
1758
|
+
if klass != "nickname_candidate":
|
|
1759
|
+
continue
|
|
1760
|
+
nick_block = blocks[idx]
|
|
1761
|
+
if not _nickname_passes_avatar_guard(
|
|
1762
|
+
bgr_guard=bgr_guard,
|
|
1763
|
+
nick_block=nick_block,
|
|
1764
|
+
avatar_layout=avatar_layout,
|
|
1765
|
+
scale_w=scale_w,
|
|
1766
|
+
original_width=original_width,
|
|
1767
|
+
config=cfg,
|
|
1768
|
+
):
|
|
1769
|
+
continue
|
|
1770
|
+
nick_bottom = _block_bottom(nick_block)
|
|
1771
|
+
bi_n: Optional[int] = None
|
|
1772
|
+
if layout_active:
|
|
1773
|
+
_sb_for_nick = getattr(layout_opt, "speaker_bands", ())
|
|
1774
|
+
_ny1, _ny2 = float(nick_block.bbox_xyxy[1]), float(nick_block.bbox_xyxy[3])
|
|
1775
|
+
for _idx, (_b0, _b1) in enumerate(_sb_for_nick):
|
|
1776
|
+
if _ny2 > float(_b0) and _ny1 < float(_b1):
|
|
1777
|
+
bi_n = _idx
|
|
1778
|
+
break
|
|
1779
|
+
followers: list[int] = []
|
|
1780
|
+
for j in range(idx + 1, len(blocks)):
|
|
1781
|
+
if j in claimed_indices:
|
|
1782
|
+
continue
|
|
1783
|
+
if classes[j] == "nickname_candidate":
|
|
1784
|
+
break
|
|
1785
|
+
if classes[j] != "bubble_text":
|
|
1786
|
+
continue
|
|
1787
|
+
bj = blocks[j]
|
|
1788
|
+
if layout_active and bi_n is not None and bi_n >= 0:
|
|
1789
|
+
_sb = getattr(layout_opt, "speaker_bands", ())
|
|
1790
|
+
if bi_n < len(_sb):
|
|
1791
|
+
_band_y0, _band_y1 = float(_sb[bi_n][0]), float(_sb[bi_n][1])
|
|
1792
|
+
_bj_top, _bj_bot = float(bj.bbox_xyxy[1]), float(bj.bbox_xyxy[3])
|
|
1793
|
+
if not (_bj_bot > _band_y0 and _bj_top < _band_y1):
|
|
1794
|
+
break
|
|
1795
|
+
if not _is_on_left_half(bj, original_width, cfg.screen_midline_ratio):
|
|
1796
|
+
break
|
|
1797
|
+
gap = _block_top(bj) - nick_bottom
|
|
1798
|
+
if gap < 0 or gap > cfg.bubble_vertical_gap_max_px:
|
|
1799
|
+
break
|
|
1800
|
+
followers.append(j)
|
|
1801
|
+
claimed_indices.add(j)
|
|
1802
|
+
nick_bottom = _block_bottom(bj)
|
|
1803
|
+
|
|
1804
|
+
nicknames_list.append(
|
|
1805
|
+
NicknameExtraction(
|
|
1806
|
+
nickname=nick_block.text.strip(),
|
|
1807
|
+
bbox_xyxy=nick_block.bbox_xyxy,
|
|
1808
|
+
confidence=nick_block.confidence,
|
|
1809
|
+
following_block_indices=tuple(followers),
|
|
1810
|
+
nickname_block_index=idx,
|
|
1811
|
+
band_index=bi_n,
|
|
1812
|
+
),
|
|
1813
|
+
)
|
|
1814
|
+
|
|
1815
|
+
thumb_bindings, thumb_orphans, thumb_excluded = _resume_thumb_bindings_and_orphans(
|
|
1816
|
+
screenshot,
|
|
1817
|
+
blocks,
|
|
1818
|
+
classes,
|
|
1819
|
+
claimed_indices,
|
|
1820
|
+
nicknames_list,
|
|
1821
|
+
original_width=original_width,
|
|
1822
|
+
config=cfg,
|
|
1823
|
+
layout_opt=layout_opt,
|
|
1824
|
+
scale_w=scale_w,
|
|
1825
|
+
image_size=ocr_result.original_size,
|
|
1826
|
+
)
|
|
1827
|
+
# PRD §7(2):Step C 中"去除"的 block 标记为 drop
|
|
1828
|
+
for ei in thumb_excluded:
|
|
1829
|
+
if classes[ei] != "drop":
|
|
1830
|
+
classes[ei] = "drop"
|
|
1831
|
+
|
|
1832
|
+
body_segments = _speaker_body_segments_from_layout(
|
|
1833
|
+
blocks,
|
|
1834
|
+
classes,
|
|
1835
|
+
tuple(nicknames_list),
|
|
1836
|
+
claimed_indices,
|
|
1837
|
+
original_width=original_width,
|
|
1838
|
+
config=cfg,
|
|
1839
|
+
)
|
|
1840
|
+
bubbles_with_nickname = sum(
|
|
1841
|
+
1
|
|
1842
|
+
for s in body_segments
|
|
1843
|
+
if (not s.orphan)
|
|
1844
|
+
and (s.speaker_display or "").strip()
|
|
1845
|
+
and s.speaker_display != _OWNER_SPEAKER_LABEL
|
|
1846
|
+
)
|
|
1847
|
+
bubbles_owner = sum(
|
|
1848
|
+
1 for s in body_segments if s.speaker_display == _OWNER_SPEAKER_LABEL
|
|
1849
|
+
)
|
|
1850
|
+
bubbles_unknown = sum(1 for s in body_segments if s.orphan)
|
|
1851
|
+
total_bubbles = len(body_segments)
|
|
1852
|
+
|
|
1853
|
+
base_attr = ChatSpeakerAttribution(
|
|
1854
|
+
screenshot_id=screenshot.screenshot_id,
|
|
1855
|
+
nicknames=tuple(nicknames_list),
|
|
1856
|
+
total_bubbles=total_bubbles,
|
|
1857
|
+
bubbles_with_nickname=bubbles_with_nickname,
|
|
1858
|
+
bubbles_owner=bubbles_owner,
|
|
1859
|
+
bubbles_unknown=bubbles_unknown,
|
|
1860
|
+
resume_thumb_orphans=thumb_orphans,
|
|
1861
|
+
resume_thumb_bindings=thumb_bindings,
|
|
1862
|
+
speaker_body_segments=body_segments,
|
|
1863
|
+
)
|
|
1864
|
+
debug_d = _build_ocr_debug_dict(
|
|
1865
|
+
screenshot, blocks, classes, layout_opt, scale_w,
|
|
1866
|
+
ocr_result.original_size,
|
|
1867
|
+
)
|
|
1868
|
+
result = _finalize_prd_attribution(
|
|
1869
|
+
base_attr,
|
|
1870
|
+
config=cfg,
|
|
1871
|
+
blocks=blocks,
|
|
1872
|
+
layout_opt=layout_opt if layout_active else None,
|
|
1873
|
+
scale_w=scale_w,
|
|
1874
|
+
)
|
|
1875
|
+
return dataclass_replace(result, ocr_debug=debug_d)
|
|
1876
|
+
|
|
1877
|
+
|
|
1878
|
+
__all__ = [
|
|
1879
|
+
"NicknameOcrConfig",
|
|
1880
|
+
"NicknameExtraction",
|
|
1881
|
+
"ResumeThumbBinding",
|
|
1882
|
+
"SpeakerBodySegment",
|
|
1883
|
+
"ChatSpeakerAttribution",
|
|
1884
|
+
"SPEAKER_DISPLAY_UNATTRIBUTED",
|
|
1885
|
+
"extract_nicknames",
|
|
1886
|
+
"first_attrib_verbatim_display_line",
|
|
1887
|
+
"first_attrib_verbatim_display_line_and_bbox",
|
|
1888
|
+
]
|