screenforge 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. cli/__init__.py +0 -0
  2. cli/_version.py +1 -0
  3. cli/dispatch.py +266 -0
  4. cli/doctor.py +487 -0
  5. cli/modes/__init__.py +0 -0
  6. cli/modes/action.py +262 -0
  7. cli/modes/default.py +248 -0
  8. cli/modes/demo.py +162 -0
  9. cli/modes/dry_run.py +237 -0
  10. cli/modes/init.py +133 -0
  11. cli/modes/plan.py +148 -0
  12. cli/modes/workflow.py +354 -0
  13. cli/parser.py +305 -0
  14. cli/reporter.py +207 -0
  15. cli/session.py +146 -0
  16. cli/shared.py +427 -0
  17. cli/shorthand.py +90 -0
  18. cli/tool_protocol_handlers.py +446 -0
  19. common/__init__.py +0 -0
  20. common/adapters/__init__.py +21 -0
  21. common/adapters/android_adapter.py +273 -0
  22. common/adapters/base_adapter.py +24 -0
  23. common/adapters/ios_adapter.py +278 -0
  24. common/adapters/web_adapter.py +271 -0
  25. common/ai.py +277 -0
  26. common/ai_autonomous.py +273 -0
  27. common/ai_heal.py +222 -0
  28. common/cache/__init__.py +15 -0
  29. common/cache/cache_hash.py +57 -0
  30. common/cache/cache_manager.py +300 -0
  31. common/cache/cache_stats.py +133 -0
  32. common/cache/cache_storage.py +79 -0
  33. common/cache/embedding_loader.py +150 -0
  34. common/capabilities.py +121 -0
  35. common/case_memory.py +327 -0
  36. common/error_codes.py +61 -0
  37. common/exceptions.py +18 -0
  38. common/executor.py +1504 -0
  39. common/failure_diagnosis.py +138 -0
  40. common/history_manager.py +75 -0
  41. common/logs.py +168 -0
  42. common/mcp_server.py +467 -0
  43. common/preflight.py +496 -0
  44. common/progress.py +37 -0
  45. common/run_reporter.py +415 -0
  46. common/run_resume.py +149 -0
  47. common/runtime_modes.py +35 -0
  48. common/tool_protocol.py +196 -0
  49. common/visual_fallback.py +71 -0
  50. common/workflow_schema.py +150 -0
  51. config/__init__.py +0 -0
  52. config/config.py +167 -0
  53. config/env_loader.py +76 -0
  54. screenforge-0.4.0.dist-info/METADATA +43 -0
  55. screenforge-0.4.0.dist-info/RECORD +64 -0
  56. screenforge-0.4.0.dist-info/WHEEL +5 -0
  57. screenforge-0.4.0.dist-info/entry_points.txt +2 -0
  58. screenforge-0.4.0.dist-info/licenses/LICENSE +21 -0
  59. screenforge-0.4.0.dist-info/top_level.txt +4 -0
  60. utils/__init__.py +0 -0
  61. utils/screenshot_annotator.py +60 -0
  62. utils/utils_ios.py +195 -0
  63. utils/utils_web.py +304 -0
  64. utils/utils_xml.py +218 -0
utils/utils_web.py ADDED
@@ -0,0 +1,304 @@
1
+ from urllib.parse import urlsplit, urlunsplit
2
+
3
+
4
+ def normalize_loopback_url(url: str) -> str:
5
+ parsed = urlsplit(str(url))
6
+ if parsed.hostname != "localhost":
7
+ return str(url)
8
+
9
+ normalized_netloc = parsed.netloc.replace("localhost", "127.0.0.1", 1)
10
+ return urlunsplit(
11
+ (
12
+ parsed.scheme,
13
+ normalized_netloc,
14
+ parsed.path,
15
+ parsed.query,
16
+ parsed.fragment,
17
+ )
18
+ )
19
+
20
+ def compress_web_dom(page) -> str:
21
+ """
22
+ 通过向 Playwright 的 page 注入 JS,提取当前页面可见的、有交互价值的元素。
23
+ 该算法采用了"物理可见性校验"与"布局噪音消除"机制,能将动辄几万行的 HTML 压缩 95% 以上的噪音,
24
+ 并将其降维成与 Android XML 结构一致且富含语义的 JSON。
25
+ """
26
+ js_script = """
27
+ () => {
28
+ const elements = [];
29
+ let refIndex = 0;
30
+ // nodeData -> 原始 DOM el,供「重复同名控件消歧」后处理沿 DOM 上爬找所在行。
31
+ // 只在内存里用,绝不参与序列化(DOM 节点有环,JSON.stringify 会炸)。
32
+ const rawElOf = new Map();
33
+
34
+ // Web 端具有明确交互语义的 role 集合
35
+ const interactiveRoles = new Set(['button', 'link', 'menuitem', 'option', 'tab', 'switch', 'checkbox', 'radio', 'combobox']);
36
+ // 纯结构或绝对无用的标签。注意 iframe 不在此列:我们要递归进入其内容
37
+ // 文档(见 walk)而不是把 iframe 框本身当作元素。
38
+ const ignoreTags = new Set(['script', 'style', 'noscript', 'head', 'meta', 'title', 'br', 'hr', 'svg', 'path', 'g', 'img', 'video', 'audio', 'iframe']);
39
+
40
+ // 判定元素是否处于 inert 子树。closest('[inert]') 只在同一棵树内向上找,
41
+ // 看不到 shadow host / iframe 之外的祖先 —— 故由 walk 把跨边界继承来的
42
+ // inert 状态(inherited)一并算入,与 offX/offY 跨边界传递的方式一致。
43
+ function isInertEl(el, inherited) {
44
+ if (inherited) return true;
45
+ try { return el.closest('[inert]') !== null; } catch (e) { return false; }
46
+ }
47
+
48
+ // 处理单个元素:offX/offY 是从内层文档(iframe)坐标系到顶层视口坐标系
49
+ // 的偏移量,保证 shadow/iframe 内元素的 bbox 仍然是顶层坐标,ref 点击不会错位。
50
+ // inheritedInert:宿主/iframe 跨边界继承来的 inert 状态(见 walk)。
51
+ function processEl(el, offX, offY, inheritedInert) {
52
+ const tag = el.tagName.toLowerCase();
53
+ if (ignoreTags.has(tag)) return;
54
+
55
+ // 1. 物理可见性校验 (过滤幽灵节点)
56
+ let rect;
57
+ try { rect = el.getBoundingClientRect(); } catch (e) { return; }
58
+ if (rect.width === 0 || rect.height === 0) return;
59
+ let style;
60
+ try { style = (el.ownerDocument.defaultView || window).getComputedStyle(el); } catch (e) { return; }
61
+ if (style.visibility === 'hidden' || style.opacity === '0' || style.display === 'none') return;
62
+
63
+ // 2. 交互意图判定。disabled / aria-disabled 的控件不可点:标 clickable=false
64
+ // (仍然收录,便于断言其存在/禁用),否则 LLM 会去点禁用按钮并卡超时。
65
+ // 用 :disabled 伪类而非 el.disabled —— 后者只反映元素自身的 disabled
66
+ // 属性,看不到「<fieldset disabled> 传播给后代控件」这一规范行为
67
+ // (含首个 <legend> 内控件豁免、嵌套 fieldset 由外层继续禁用)。
68
+ // :disabled 正是浏览器对「actually disabled」的实现,一次到位且权威。
69
+ const ariaDisabled = el.getAttribute('aria-disabled') === 'true';
70
+ let nativeDisabled;
71
+ try { nativeDisabled = el.matches(':disabled'); }
72
+ catch (e) { nativeDisabled = el.disabled === true; }
73
+ const isDisabled = nativeDisabled || ariaDisabled;
74
+ // inert 子树(开 <dialog> 时背景标 inert 的标准模式)会吞掉点击 ——
75
+ // 这类控件仍可见会被收录,但若仍标 clickable,LLM 会去点模态背后的死
76
+ // 按钮然后 no-op/超时,故同样判为不可点。但 inert ≠ disabled(前者是"被
77
+ // 遮挡/暂不可交互",后者是"控件本身被禁用"),分开上报:让 LLM 能推断
78
+ // "有模态需先关掉"而非"表单被禁用",也避免 assert disabled 误判通过。
79
+ const isInert = isInertEl(el, inheritedInert);
80
+ const isInteractive = !isDisabled && !isInert && (
81
+ ['a', 'button', 'input', 'select', 'textarea'].includes(tag) ||
82
+ el.hasAttribute('onclick') ||
83
+ interactiveRoles.has(el.getAttribute('role')) ||
84
+ style.cursor === 'pointer');
85
+
86
+ // 3. 智能文本提取 (防止父容器吞噬子节点文本造成大量重复)
87
+ const directText = Array.from(el.childNodes)
88
+ .filter(node => node.nodeType === Node.TEXT_NODE)
89
+ .map(node => node.nodeValue.trim())
90
+ .join(' ').trim();
91
+
92
+ let fullText = el.innerText ? el.innerText.trim() : '';
93
+ if (tag === 'input' || tag === 'textarea') fullText = el.value || '';
94
+ if (fullText.length > 100) fullText = fullText.substring(0, 100) + '...';
95
+
96
+ const ariaLabel = el.getAttribute('aria-label') || el.getAttribute('title') || el.getAttribute('alt') || '';
97
+
98
+ // 4. 噪音与垃圾数据剔除策略
99
+ // 收录条件用「语义可交互」(标签/role 本身可交互,忽略 disabled),
100
+ // 这样禁用按钮也会被收录(clickable=false),而纯排版 wrapper 仍被丢弃。
101
+ const isSemanticControl = ['a', 'button', 'input', 'select', 'textarea'].includes(tag) ||
102
+ el.hasAttribute('onclick') ||
103
+ interactiveRoles.has(el.getAttribute('role'));
104
+ const keepForLayout = isInteractive || isSemanticControl;
105
+ if (!keepForLayout && !directText && !ariaLabel) return;
106
+
107
+ // For a kept (interactive/semantic) element, prefer innerText but
108
+ // fall back to its own directText — otherwise a clickable shadow
109
+ // HOST whose light text isn't slotted (innerText==='') would be
110
+ // dropped by the empty-shell guard below, leaving it invisible to
111
+ // the LLM (the exact blind spot this change targets).
112
+ const displayText = keepForLayout ? (fullText || directText) : (directText.length > 0 ? fullText : directText);
113
+
114
+ const placeholder = el.getAttribute('placeholder') || '';
115
+ const type = el.getAttribute('type') || '';
116
+ const name = el.getAttribute('name') || '';
117
+
118
+ if (!displayText && !ariaLabel && !placeholder && !['input', 'select', 'textarea'].includes(tag)) return;
119
+
120
+ // 5. 构建低 Token 结构体
121
+ refIndex++;
122
+ const nodeData = { "ref": "@" + refIndex, "class": tag, "clickable": isInteractive };
123
+ if (el.id) nodeData.id = el.id;
124
+ if (name) nodeData.name = name;
125
+ if (type) nodeData.type = type;
126
+ if (placeholder) nodeData.placeholder = placeholder;
127
+ if (ariaLabel) nodeData.desc = ariaLabel;
128
+ if (displayText) nodeData.text = displayText;
129
+ if (isDisabled) nodeData.disabled = true;
130
+ if (isInert) nodeData.inert = true;
131
+ nodeData.x = Math.round(rect.x + offX);
132
+ nodeData.y = Math.round(rect.y + offY);
133
+ nodeData.w = Math.round(rect.width);
134
+ nodeData.h = Math.round(rect.height);
135
+
136
+ elements.push(nodeData);
137
+ rawElOf.set(nodeData, el);
138
+ }
139
+
140
+ // 递归遍历:普通子树 + shadow DOM + 同源 iframe。这是修复"压缩器对
141
+ // shadow DOM / iframe 失明"的核心:querySelectorAll('*') 不穿透 shadow
142
+ // root,也不进入 iframe 文档,导致整类应用对 LLM 不可见。
143
+ function walk(root, offX, offY, depth, inheritedInert) {
144
+ // Depth cap: static DOM can't form true cycles (an iframe's
145
+ // contentDocument is always a fresh document; a shadow root can't
146
+ // contain its own host), so this is insurance against pathologically
147
+ // deep generated pages approaching the JS recursion limit.
148
+ if (depth > 50) return;
149
+ let nodes;
150
+ try { nodes = root.querySelectorAll('*'); } catch (e) { return; }
151
+ nodes.forEach(el => {
152
+ const tag = el.tagName ? el.tagName.toLowerCase() : '';
153
+
154
+ // 同源 iframe:进入其内容文档,并按 iframe 在顶层的位置做坐标偏移。
155
+ // 跨域 iframe 访问 contentDocument 会抛异常 —— 那是浏览器安全边界,
156
+ // 无法穿透,静默跳过(诚实:我们不假装能看到跨域内容)。
157
+ if (tag === 'iframe') {
158
+ // inert 跨 iframe 边界继承:frame 内文档看不到父文档的 inert 祖先,
159
+ // 故在此判定 iframe 自身或其祖先是否 inert,向内传递。
160
+ const frameInert = inheritedInert || isInertEl(el, false);
161
+ let doc = null, frameRect = null, insetX = 0, insetY = 0;
162
+ try {
163
+ frameRect = el.getBoundingClientRect();
164
+ // getBoundingClientRect gives the iframe's BORDER-box origin,
165
+ // but the content document starts inside the border+padding.
166
+ // Without this inset every child is reported too far up-left
167
+ // (Chromium's default 2px iframe border alone shifts a ref
168
+ // click off-target; thick-bordered embed/payment frames more).
169
+ const cs = (el.ownerDocument.defaultView || window).getComputedStyle(el);
170
+ insetX = (parseFloat(cs.borderLeftWidth) || 0) + (parseFloat(cs.paddingLeft) || 0);
171
+ insetY = (parseFloat(cs.borderTopWidth) || 0) + (parseFloat(cs.paddingTop) || 0);
172
+ doc = el.contentDocument;
173
+ } catch (e) { doc = null; }
174
+ if (doc && doc.documentElement) {
175
+ walk(doc.documentElement, offX + frameRect.x + insetX, offY + frameRect.y + insetY, depth + 1, frameInert);
176
+ }
177
+ return;
178
+ }
179
+
180
+ processEl(el, offX, offY, inheritedInert);
181
+
182
+ // shadow root(open 模式):递归进入。坐标系与宿主一致,偏移不变。
183
+ // inert 同样跨 shadow 边界继承:closest 不穿透 shadow root,故把
184
+ // 宿主自身/继承来的 inert 状态算好后传入。
185
+ if (el.shadowRoot) {
186
+ walk(el.shadowRoot, offX, offY, depth + 1, isInertEl(el, inheritedInert));
187
+ }
188
+ });
189
+ }
190
+
191
+ walk(document.documentElement, 0, 0, 0, false);
192
+
193
+ // 6. 最终去重 (防止某些前端库生成多个不可见的克隆 DOM)
194
+ const uniqueElements = [];
195
+ const seen = new Set();
196
+ elements.forEach(el => {
197
+ const dedupKeys = Object.keys(el).filter(k => k !== 'ref').sort();
198
+ const key = JSON.stringify(el, dedupKeys);
199
+ if (!seen.has(key)) {
200
+ seen.add(key);
201
+ uniqueElements.push(el);
202
+ }
203
+ });
204
+
205
+ // 7. 重复同名控件消歧。N 行同名按钮(每行一个 "Delete")压缩后 text 全相同,
206
+ // codegen 只能 get_by_text('Delete').first —— 永远点第一行,持久化测试说谎。
207
+ // 这里给「有歧义」的控件补两样东西:scope(所在行的标识文本,如 "Bob Jones")
208
+ // 与 dup_index(DOM 序里的第几个),让 codegen 生成作用域定位器。仅对真正
209
+ // 碰撞(≥2)的控件补,非歧义页 0 额外 token。
210
+ // 碰撞键 = (role, accessible-name),与 codegen 的 _fallback_strategy 同口径。
211
+ // 角色推断必须与 codegen 的 _infer_web_role 同口径,否则分组会漏判:
212
+ // 比如 <input type=submit value=X> 与 <button>X</button> 都渲染成
213
+ // get_by_role('button', name='X'),分组键若只看 tag 会把它们分到两组而漏掉碰撞。
214
+ function roleOf(nd) {
215
+ const tag = nd.class;
216
+ const t = (nd.type || '').toLowerCase();
217
+ if (tag === 'a') return 'link';
218
+ if (tag === 'button' || t === 'submit' || t === 'button' || t === 'reset') return 'button';
219
+ if (t === 'checkbox') return 'checkbox';
220
+ if (t === 'radio') return 'radio';
221
+ if (tag === 'select') return 'combobox';
222
+ if (tag === 'textarea' || (tag === 'input' &&
223
+ ['', 'text', 'email', 'search', 'url', 'tel', 'password'].includes(t))) return 'textbox';
224
+ return tag;
225
+ }
226
+ function nameKeyOf(nd) {
227
+ const accName = (nd.desc || nd.text || '').trim();
228
+ if (!accName) return null;
229
+ return roleOf(nd) + '\\u0000' + accName;
230
+ }
231
+ // 收集每个碰撞键的成员(仅可点击控件——不可点文本不会被 LLM 当点击目标)。
232
+ const groups = new Map();
233
+ uniqueElements.forEach(nd => {
234
+ if (!nd.clickable) return;
235
+ const k = nameKeyOf(nd);
236
+ if (!k) return;
237
+ if (!groups.has(k)) groups.set(k, []);
238
+ groups.get(k).push(nd);
239
+ });
240
+ groups.forEach(members => {
241
+ if (members.length < 2) return; // 唯一 → 不补,省 token
242
+ const groupEls = members.map(m => rawElOf.get(m)).filter(Boolean);
243
+ // 先各自算候选 scope(所在行的唯一标识 label)。
244
+ members.forEach((nd, i) => {
245
+ nd.dup_index = i; // DOM 序(uniqueElements 保持遍历顺序)
246
+ const el = rawElOf.get(nd);
247
+ nd._scopeCand = el ? computeScope(el, groupEls, nd) : '';
248
+ });
249
+ // 组内唯一性校验:若某 scope 被多行共用(行标识相同)或为空,则它无法消歧,
250
+ // 不写 scope(该成员只保留 dup_index → codegen 走诚实 skip,绝不持久化会必然
251
+ // 失败的定位器)。只有「组内唯一且非空」的 scope 才采纳。
252
+ const counts = {};
253
+ members.forEach(nd => { const s = nd._scopeCand; if (s) counts[s] = (counts[s] || 0) + 1; });
254
+ members.forEach(nd => {
255
+ if (nd._scopeCand && counts[nd._scopeCand] === 1) nd.scope = nd._scopeCand;
256
+ delete nd._scopeCand;
257
+ });
258
+ });
259
+
260
+ // 找「行根」(子树只含本组这一个成员的最高祖先),再在行内取一个干净的叶子 label
261
+ // 作为 scope —— 叶子的 textContent 可被 get_by_text(exact=True) 精确命中,避免
262
+ // 子串误匹配("Bob" 命中 "Bob Jones")。取最长的叶子文本(最具体的行标识)。
263
+ function computeScope(el, groupEls, nd) {
264
+ const ownName = (nd.desc || nd.text || '').trim();
265
+ let rowRoot = null;
266
+ let cur = el.parentElement;
267
+ while (cur && cur.tagName && cur.tagName.toLowerCase() !== 'body') {
268
+ let containsOther = false;
269
+ for (const other of groupEls) {
270
+ if (other !== el && cur.contains(other)) { containsOther = true; break; }
271
+ }
272
+ if (containsOther) break; // 再往上就跨行了
273
+ rowRoot = cur;
274
+ cur = cur.parentElement;
275
+ }
276
+ if (!rowRoot) return '';
277
+ let best = '';
278
+ let nodes;
279
+ try { nodes = rowRoot.querySelectorAll('*'); } catch (e) { return ''; }
280
+ nodes.forEach(d => {
281
+ if (el.contains(d)) return; // 跳过控件自身及其内部(它的 label)
282
+ if (d.childElementCount > 0) return; // 只取叶子 → 文本可被 exact 命中
283
+ let t = '';
284
+ try { t = (d.textContent || '').trim().replace(/\\s+/g, ' '); } catch (e) { return; }
285
+ if (!t || t === ownName) return;
286
+ // 上限 80:scope 要用 get_by_text(exact=True) 精确命中,绝不能截断
287
+ //(截断后 exact 永远匹配不上)。超长叶子直接不作为候选 —— 该行宁可
288
+ // 走诚实 skip,也不持久化一个脆弱/必失败的定位器。
289
+ if (t.length > 80) return;
290
+ if (t.length > best.length) best = t; // 最长叶子 = 最具体的行标识
291
+ });
292
+ return best;
293
+ }
294
+
295
+ return JSON.stringify({"ui_elements": uniqueElements});
296
+ }
297
+ """
298
+ try:
299
+ # 在 Playwright 浏览器上下文环境中执行 JS 注入并获取结果
300
+ ui_json_str = page.evaluate(js_script)
301
+ return ui_json_str
302
+ except Exception as e:
303
+ print(f"[Warning] Failed to extract Web DOM: {e}")
304
+ return '{"ui_elements": []}'
utils/utils_xml.py ADDED
@@ -0,0 +1,218 @@
1
+ try:
2
+ import defusedxml.ElementTree as ET
3
+ except ModuleNotFoundError:
4
+ import xml.etree.ElementTree as ET
5
+ import json
6
+ import re
7
+
8
+ _PATTERN_NOISE = re.compile(r'^[\$\¥\€\£\d\.\,\+\-\%]+$')
9
+ _PATTERN_HASH_SUFFIX = re.compile(r'_[a-f0-9]{8}$')
10
+
11
+ def _should_filter_by_text(text: str, clickable: bool) -> bool:
12
+ if clickable:
13
+ return False
14
+ if len(text) <= 5 and _PATTERN_NOISE.match(text):
15
+ return True
16
+ return False
17
+
18
+ def _should_filter_by_id(res_id: str) -> bool:
19
+ if not res_id:
20
+ return False
21
+ return "com.android.systemui" in res_id
22
+
23
+ def _should_filter_by_desc(desc: str) -> bool:
24
+ if not desc:
25
+ return False
26
+ if "OpenVPN" in desc or "VoLTE" in desc:
27
+ return True
28
+ if len(desc) > 30 and "0, 1, 2" in desc:
29
+ return True
30
+ return False
31
+
32
+ def _short_resource_id(res_id: str) -> str:
33
+ """The bare id name (no package prefix), for display/token economy only.
34
+
35
+ NOTE: do NOT use this as a locator value — uiautomator2's resourceId
36
+ selector matches the FULL `pkg:id/name`, so the compressor emits the full id
37
+ (see compress_android_xml). This helper exists only for the optional `id_short`
38
+ hint.
39
+ """
40
+ short = res_id.split("/")[-1]
41
+ short = _PATTERN_HASH_SUFFIX.sub('', short)
42
+ return short
43
+
44
+ def _node_label(node) -> str:
45
+ """A node's own label (text, falling back to content-desc), stripped."""
46
+ return node.attrib.get("text", "").strip() or node.attrib.get("content-desc", "").strip()
47
+
48
+
49
+ def _is_filtered_node(node) -> bool:
50
+ """True if the emit loop will drop this node entirely (id / desc filters).
51
+
52
+ A clickable/promoted node survives the numeric-noise *text* filter, so only
53
+ `_should_filter_by_id` / `_should_filter_by_desc` matter — they `continue`
54
+ past the node regardless of its label. Promotion must consult this so it
55
+ never suppresses a row container in favor of a label that then vanishes.
56
+ """
57
+ res_id = node.attrib.get("resource-id", "").strip()
58
+ desc = node.attrib.get("content-desc", "").strip()
59
+ return _should_filter_by_id(res_id) or _should_filter_by_desc(desc)
60
+
61
+
62
+ def _emittable_own_label(node) -> bool:
63
+ """The node carries its own label AND will survive emission — i.e. it is
64
+ already a locatable control (a Button, a labeled clickable), not a headless
65
+ container needing promotion. A container whose only own label is itself
66
+ filtered (e.g. a clickable wrapper with content-desc='VoLTE') is treated as
67
+ label-less so its real child label can still be promoted."""
68
+ return bool(_node_label(node)) and not _is_filtered_node(node)
69
+
70
+
71
+ def _scope_label_descendants(container) -> list:
72
+ """Surviving labeled descendants in document order, without crossing a nested
73
+ clickable boundary (an inner card owns its own labels). Labels the emit loop
74
+ would drop (filtered id/desc) are skipped, so promotion never targets — nor
75
+ suppresses a container in favor of — a node that would vanish."""
76
+ out: list = []
77
+ for child in container:
78
+ if child.attrib.get("clickable") == "true":
79
+ continue # nested clickable owns its own subtree's labels
80
+ if _node_label(child) and not _is_filtered_node(child):
81
+ out.append(child)
82
+ out.extend(_scope_label_descendants(child))
83
+ return out
84
+
85
+
86
+ def _promotable_label(container):
87
+ """The label node to promote for a headless clickable container, or None.
88
+
89
+ Prefers the standard `:id/title` node when present, else the first surviving
90
+ label in document order — so a summary/status line that happens to render
91
+ before the title (e.g. '已连接' above '蓝牙') doesn't become the row's tap
92
+ label. Returns None for an icon-only container or one whose only labels are
93
+ all filtered (→ left as an honest headless clickable, never fabricated)."""
94
+ labels = _scope_label_descendants(container)
95
+ if not labels:
96
+ return None
97
+ for node in labels:
98
+ if node.attrib.get("resource-id", "").strip().endswith("/title"):
99
+ return node
100
+ return labels[0]
101
+
102
+
103
+ def _compute_row_promotions(root):
104
+ """Find list-row label promotions (RecyclerView / Preference rows).
105
+
106
+ The dominant Android list shape is a CLICKABLE container with no own label
107
+ whose text lives in a non-clickable child TextView. A flat walk splits the
108
+ row into a headless (unlocatable) clickable + a text node marked not-clickable,
109
+ so NO element is both clickable and labeled. We promote the container's title
110
+ (or first surviving) label descendant to clickable (a real node with a real id
111
+ — tapping it bubbles to the clickable ancestor, verified on a real device) and
112
+ suppress the now-redundant empty container.
113
+
114
+ Returns (promote_ids, suppress_ids): sets of id(node) for Pass 2 to apply.
115
+ Identity keys are safe because `root` (and all its Element nodes) is held
116
+ alive across both passes within compress_android_xml; do not stream/re-parse
117
+ between the passes.
118
+
119
+ Honesty boundaries:
120
+ - Disabled container (enabled=false) → not effectively clickable, no promotion.
121
+ - No promotable, *survivable* label (icon-only, or only filtered labels) →
122
+ container left as an honest headless clickable; never fabricate a label and
123
+ never suppress a row in favor of a label that the emit loop would drop.
124
+ - Label search does NOT cross into a nested clickable — an inner card's label
125
+ belongs to the inner card, so an outer wrapper can't steal it (an outer
126
+ wrapper around already-promoted inner cards stays an honest, locator-less
127
+ clickable rather than being given a borrowed label).
128
+ """
129
+ promote_ids: set[int] = set()
130
+ suppress_ids: set[int] = set()
131
+
132
+ for node in root.iter():
133
+ if node.attrib.get("clickable") != "true":
134
+ continue
135
+ if node.attrib.get("enabled") == "false":
136
+ continue # disabled row is not effectively clickable — don't promote
137
+ if _emittable_own_label(node):
138
+ continue # already a locatable control (e.g. a Button) — nothing to lift
139
+
140
+ label_node = _promotable_label(node)
141
+ if label_node is None:
142
+ # Icon-only container, or every candidate label would be filtered out:
143
+ # leave the container un-suppressed (today's headless-clickable, still
144
+ # present/assertable) rather than dropping the row entirely.
145
+ continue
146
+
147
+ promote_ids.add(id(label_node))
148
+ suppress_ids.add(id(node))
149
+
150
+ # Never suppress a node we also promote (defensive; can't currently coincide).
151
+ suppress_ids -= promote_ids
152
+ return promote_ids, suppress_ids
153
+
154
+
155
+ def compress_android_xml(raw_xml: str) -> str:
156
+ try:
157
+ root = ET.fromstring(raw_xml)
158
+ except ET.ParseError as e:
159
+ raw_preview = raw_xml[:200] if raw_xml else "(empty)"
160
+ print(f"[Warning] XML parse failed: {e}, first 200 chars: {raw_preview}")
161
+ return '{"ui_elements": []}'
162
+
163
+ elements = []
164
+ promote_ids, suppress_ids = _compute_row_promotions(root)
165
+
166
+ for node in root.iter():
167
+ if id(node) in suppress_ids:
168
+ # Redundant empty row container — its label child carries the row now.
169
+ continue
170
+
171
+ attrib = node.attrib
172
+ text = attrib.get("text", "").strip()
173
+ desc = attrib.get("content-desc", "").strip()
174
+ res_id = attrib.get("resource-id", "").strip()
175
+ # `enabled` defaults to true in Android; only an explicit "false" disables.
176
+ # A disabled control must not be reported clickable (the LLM would tap it
177
+ # and hang on the timeout) but is still emitted so its existence/disabled
178
+ # state stays assertable — mirrors the web compressor's disabled contract.
179
+ disabled = attrib.get("enabled") == "false"
180
+ # A row label promoted from a headless clickable container is effectively
181
+ # clickable (tap bubbles to the clickable ancestor — real-device verified);
182
+ # a disabled node is never promoted (excluded in _compute_row_promotions).
183
+ promoted = id(node) in promote_ids
184
+ clickable = (attrib.get("clickable") == "true" or promoted) and not disabled
185
+ node_class = attrib.get("class", "").split(".")[-1]
186
+
187
+ if _should_filter_by_id(res_id):
188
+ continue
189
+
190
+ if _should_filter_by_desc(desc):
191
+ continue
192
+
193
+ # Pass `clickable or disabled`: the numeric-noise filter must not drop a
194
+ # disabled control (clickable is False for it), or its disabled state
195
+ # could never be seen/asserted — the filter runs before emission.
196
+ if _should_filter_by_text(text, clickable or disabled):
197
+ continue
198
+
199
+ if text or desc or clickable or disabled:
200
+ el_info = {"class": node_class}
201
+ if text: el_info["text"] = text
202
+ if desc: el_info["desc"] = desc
203
+ if clickable: el_info["clickable"] = True
204
+ if disabled: el_info["disabled"] = True
205
+
206
+ if res_id:
207
+ # Emit the FULL resource-id (pkg:id/name) — this is what
208
+ # uiautomator2's resourceId selector matches. Stripping the
209
+ # prefix produced ids that could never be located (the agent's
210
+ # #2-priority locator was silently broken on Android).
211
+ el_info["id"] = res_id
212
+ short = _short_resource_id(res_id)
213
+ if short and short != res_id:
214
+ el_info["id_short"] = short
215
+
216
+ elements.append(el_info)
217
+
218
+ return json.dumps({"ui_elements": elements}, ensure_ascii=False)