validpilot-oss 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +111 -0
  2. package/README.md +196 -0
  3. package/bin/validpilot.js +173 -0
  4. package/brain/error_aggregator.js +203 -0
  5. package/core/artifacts.js +44 -0
  6. package/core/config.js +37 -0
  7. package/core/redaction.js +39 -0
  8. package/core/report.js +42 -0
  9. package/core/result.js +29 -0
  10. package/core/security.js +57 -0
  11. package/engines/chrome_mcp_adapter.js +319 -0
  12. package/engines/playwright_adapter.js +421 -0
  13. package/examples/demo/README.md +58 -0
  14. package/examples/demo/diagnostic-error-flow.json +22 -0
  15. package/examples/demo/diagnostic-error.html +29 -0
  16. package/examples/demo/flow.json +27 -0
  17. package/examples/demo/index.html +29 -0
  18. package/hands/browser_operator.js +67 -0
  19. package/hands/evidence_collector.js +97 -0
  20. package/package.json +55 -0
  21. package/rules/suggested-rules.json +237 -0
  22. package/server.js +5376 -0
  23. package/standalone-start.js +43 -0
  24. package/start-http.js +45 -0
  25. package/tools/ai_debug_investigate.json +30 -0
  26. package/tools/benchmark_run.json +37 -0
  27. package/tools/browser_a11y_check.json +21 -0
  28. package/tools/browser_artifacts.json +8 -0
  29. package/tools/browser_artifacts_clear.json +11 -0
  30. package/tools/browser_assert.json +16 -0
  31. package/tools/browser_batch.json +61 -0
  32. package/tools/browser_click.json +11 -0
  33. package/tools/browser_console.json +26 -0
  34. package/tools/browser_cookies.json +38 -0
  35. package/tools/browser_debug_report.json +11 -0
  36. package/tools/browser_diagnose.json +23 -0
  37. package/tools/browser_dom.json +11 -0
  38. package/tools/browser_element_status.json +26 -0
  39. package/tools/browser_errors.json +17 -0
  40. package/tools/browser_errors_aggregate.json +12 -0
  41. package/tools/browser_errors_clear.json +8 -0
  42. package/tools/browser_eval.json +11 -0
  43. package/tools/browser_events.json +15 -0
  44. package/tools/browser_events_clear.json +8 -0
  45. package/tools/browser_find_element.json +30 -0
  46. package/tools/browser_find_page.json +22 -0
  47. package/tools/browser_flow.json +38 -0
  48. package/tools/browser_har_export.json +17 -0
  49. package/tools/browser_highlight.json +18 -0
  50. package/tools/browser_hover.json +14 -0
  51. package/tools/browser_instrument.json +10 -0
  52. package/tools/browser_links.json +21 -0
  53. package/tools/browser_locator_suggest.json +16 -0
  54. package/tools/browser_locator_validate.json +12 -0
  55. package/tools/browser_network.json +16 -0
  56. package/tools/browser_network_detail.json +17 -0
  57. package/tools/browser_open.json +12 -0
  58. package/tools/browser_performance_check.json +25 -0
  59. package/tools/browser_press_key.json +18 -0
  60. package/tools/browser_quick_fix.json +29 -0
  61. package/tools/browser_screenshot.json +15 -0
  62. package/tools/browser_scroll.json +31 -0
  63. package/tools/browser_select.json +26 -0
  64. package/tools/browser_session_close.json +12 -0
  65. package/tools/browser_session_create.json +17 -0
  66. package/tools/browser_session_switch.json +12 -0
  67. package/tools/browser_sessions.json +8 -0
  68. package/tools/browser_snapshot.json +8 -0
  69. package/tools/browser_step.json +18 -0
  70. package/tools/browser_storage.json +10 -0
  71. package/tools/browser_trace_start.json +14 -0
  72. package/tools/browser_trace_stop.json +10 -0
  73. package/tools/browser_traverse_menu.json +25 -0
  74. package/tools/browser_type.json +12 -0
  75. package/tools/browser_verify_fix.json +39 -0
  76. package/tools/browser_visual_baseline.json +19 -0
  77. package/tools/browser_visual_compare.json +20 -0
  78. package/tools/browser_visual_report.json +8 -0
  79. package/tools/browser_wait.json +18 -0
  80. package/tools/debug_investigate.json +17 -0
  81. package/tools/error_fix_suggestion.json +13 -0
  82. package/tools/error_summary_md.json +11 -0
  83. package/tools/fix_verify.json +13 -0
  84. package/tools/mcp_health_check.json +8 -0
  85. package/tools/mcp_self_test.json +12 -0
  86. package/tools/screenshot_diff.json +16 -0
  87. package/tools/validation_check.json +20 -0
  88. package/tools/validation_decision.json +24 -0
  89. package/tools/validation_element.json +13 -0
  90. package/tools/validation_flow.json +12 -0
  91. package/tools/validation_matrix.json +27 -0
  92. package/tools/validation_quick_run.json +13 -0
  93. package/tools/validation_report.json +10 -0
  94. package/tools/validation_report_export.json +8 -0
  95. package/tools/validation_run.json +35 -0
  96. package/tools/validation_start.json +12 -0
  97. package/tools/validation_suite_run.json +17 -0
package/CHANGELOG.md ADDED
@@ -0,0 +1,111 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ ## [1.1.0] - 2026-06-28
6
+
7
+ ### Added
8
+
9
+ #### New Tools (12)
10
+
11
+ - **browser_find_element** - 智能元素定位基础版
12
+ - 7种匹配策略:精确文本、模糊文本、placeholder、aria-label、title/alt、role
13
+ - 置信度评分(1.0→0.6)
14
+ - 自动生成稳定CSS选择器
15
+ - 可见性过滤
16
+
17
+ - **browser_diagnose** - 错误自动诊断
18
+ - 自动分析错误根因和置信度
19
+ - 支持 TypeError/500/404/CORS 等模式识别
20
+ - 返回 suggestedFixes 修复建议
21
+
22
+ - **browser_element_status** - 元素状态诊断
23
+ - 可见性检查(isFullyVisible)
24
+ - 可交互性检查(isDisabled/isHidden)
25
+ - 遮挡检查(obscuredBy)
26
+ - 事件绑定检查
27
+
28
+ - **browser_quick_fix** - 快速修复
29
+ - 8种修复策略:wait/scroll/force_visible/remove_obscuring/remove_disabled/force_click/inject_click_listener/trigger_event
30
+ - 自动尝试直到成功
31
+
32
+ - **browser_verify_fix** - 修复验证闭环
33
+ - 记录 before/after 状态
34
+ - 验证修复效果
35
+ - 返回 FIXED/NOT_FIXED 状态
36
+
37
+ - **browser_cookies** - Cookie管理
38
+ - 获取/设置/清除 Cookie
39
+ - 支持域名筛选
40
+ - 敏感值自动脱敏
41
+
42
+ - **browser_traverse_menu** - 菜单遍历
43
+ - 自动遍历多级菜单发现功能
44
+ - 支持 maxDepth/maxItems 控制
45
+ - 每步错误检测
46
+
47
+ - **validation_element** - 元素状态验证
48
+ - 8种断言:exists/visible/enabled/textContains/hasAttribute/valueEquals/countEquals
49
+ - 失败自动截图
50
+ - 与 validation_check 格式一致
51
+
52
+ - **validation_flow** - 流程验证
53
+ - 7种步骤类型:click/type/wait/assert/goto/scroll
54
+ - 支持 continueOnFailure
55
+ - 返回每步结果和总通过率
56
+
57
+ - **validation_quick_run** - 一键快速验证
58
+ - 7项自动检查:load_time/no_js_errors/no_5xx/no_404/not_blank/has_title/has_content
59
+ - 自动截图证据
60
+ - 快速返回 pass/fail
61
+
62
+ - **error_fix_suggestion** - 错误修复建议
63
+ - 11种错误模式:404/401/403/500/TypeError/CORS/超时/元素未找到/不可见/disabled/网络失败
64
+ - 每条建议含 severity/confidence/verifyAction/relatedTool
65
+ - 与 browser_diagnose 形成闭环
66
+
67
+ ### Enhanced
68
+
69
+ #### Enhanced Tools
70
+
71
+ - **analyzeScreenshotForErrors** - 截图错误分析
72
+ - 30+ UI框架选择器(Ant Design/Element UI/Vuetify/PrimeNG等)
73
+ - RGB/HSL/颜色名红色检测
74
+ - toastLike 检测
75
+ - domErrorStats DOM统计
76
+
77
+ - **postActionErrorCheck** - 操作后错误检查
78
+ - checkpoint时间戳过滤
79
+ - 300ms延迟等待
80
+
81
+ - **browser_assert** - 断言验证
82
+ - autoScreenshot 参数
83
+ - 失败自动截图 + 错误分析
84
+
85
+ - **buildDebugReport** - 调试报告
86
+ - 新增 domStats(11字段)
87
+ - totalElements/inputs/buttons/links/images/forms/tables/iframes/navs
88
+
89
+ - **browser_snapshot** - 页面快照
90
+ - 新增 pageInfo/navElements/imageCount/tableCount/frameworks
91
+
92
+ - **browser_click/type/press_key** - 交互工具
93
+ - 新增 suggestions 调试建议
94
+ - 按错误类型提供具体提示
95
+
96
+ ### Documentation
97
+
98
+ - **README.md** - 全面更新
99
+ - 能力清单扩展为8分类
100
+ - 新增72工具总数说明
101
+ - 新增智能诊断与修复建议核心卖点
102
+ - 新增进阶使用示例
103
+
104
+ ## [1.0.0] - 2026-06-27
105
+
106
+ ### Added
107
+
108
+ - Initial release
109
+ - 73 MCP tools covering browser automation, validation, and debugging
110
+ - MCP protocol native support (HTTP mode)
111
+ - Playwright-based browser automation
package/README.md ADDED
@@ -0,0 +1,196 @@
1
+ # ValidPilot Open Source 🚀
2
+
3
+ > 浏览器自动化与验证框架,MCP 协议原生支持
4
+
5
+ ---
6
+
7
+ ## ✨ 核心卖点
8
+
9
+ ### 🤖 MCP 协议原生支持
10
+ AI Agent 直接调用,无需写代码,让你的智能体瞬间拥有浏览器操作能力
11
+
12
+ ### 💬 自然语言操作浏览器
13
+ 描述你想做什么,AI 帮你操作。点击、输入、滚动、截图... 一句话搞定
14
+
15
+ ### 🎯 智能元素定位
16
+ 用自然语言描述元素,自动查找。支持文本匹配、占位符、ARIA 标签、角色等多策略 fallback,不用再写复杂的 CSS 选择器
17
+
18
+ ### 🔍 智能诊断与修复建议
19
+ 自动诊断错误根因,给出具体修复建议。从发现问题到解决问题,形成完整闭环
20
+
21
+ ### ✅ 基础验证框架
22
+ 元素验证、流程验证、快速验证、视觉对比,开箱即用
23
+
24
+ ### 📊 自动错误收集
25
+ Console 错误、网络错误、页面异常... 自动聚合,一目了然
26
+
27
+ ---
28
+
29
+ ## 📋 能力清单
30
+
31
+ | 分类 | 能力 | 工具 | 说明 |
32
+ |------|------|------|------|
33
+ | 🌐 **浏览器操作** | 页面导航 | browser_open / browser_navigate | 打开 URL、前进、后退、刷新 |
34
+ | | 元素交互 | browser_click / browser_type / browser_hover / browser_select_option | 点击、输入、悬停、选择 |
35
+ | | 键盘操作 | browser_press_key | 按键、组合键、文本输入 |
36
+ | | 页面滚动 | browser_scroll | 滚动到指定位置、元素、底部/顶部 |
37
+ | | 截图功能 | browser_screenshot / browser_screenshot_element | 全屏截图、元素截图、可视区域截图 |
38
+ | | Cookie 管理 | browser_cookies | 获取、设置、清除 Cookie |
39
+ | | 存储操作 | browser_storage | localStorage、sessionStorage 读写 |
40
+ | | 网络控制 | browser_network | 网络请求诊断、失败分析 |
41
+ | | JavaScript 执行 | browser_eval | 注入并执行自定义 JS 代码 |
42
+ | | 等待机制 | browser_wait | 等待元素、等待导航、等待网络空闲 |
43
+ | | 内容提取 | browser_dom / browser_snapshot | 获取 DOM、页面快照、元素属性 |
44
+ | | 批量操作 | browser_batch | 批量执行操作序列 |
45
+ | | 表单操作 | browser_type / browser_select_option | 填充表单、选择选项 |
46
+ | | 窗口管理 | browser_sessions / browser_session_create | 多标签页、会话管理 |
47
+ | | 事件监听 | browser_events | 捕获页面事件 |
48
+ | 🎯 **智能定位** | 按文本查找元素 | **browser_find_element** | 用描述找到页面元素(新增) |
49
+ | | 选择器建议 | browser_locator_suggest | 生成更稳定的选择器 |
50
+ | | 选择器验证 | browser_locator_validate | 验证选择器稳定性和匹配数 |
51
+ | | 多策略 fallback | — | 文本、占位符、ARIA、role 智能匹配 |
52
+ | 🔍 **调试诊断** | 错误自动诊断 | **browser_diagnose** | 自动分析错误根因和置信度 |
53
+ | | 元素状态诊断 | **browser_element_status** | 可见性、可交互性、遮挡、事件检查 |
54
+ | | 快速修复 | **browser_quick_fix** | 8种策略自动尝试修复 |
55
+ | | 修复验证闭环 | **browser_verify_fix** | 记录前后状态,验证修复效果 |
56
+ | | 控制台日志 | browser_console | 获取浏览器控制台日志 |
57
+ | | 错误聚合 | browser_errors_aggregate | 按类型/严重度聚合错误 |
58
+ | | 调试报告 | browser_debug_report | 生成完整调试报告 |
59
+ | ✅ **基础验证** | 元素验证 | **validation_element** | 元素存在/可见/启用/文本等断言(增强) |
60
+ | | 流程验证 | **validation_flow** | 多步骤流程验证(增强) |
61
+ | | 快速验证 | **validation_quick_run** | 一键URL健康检查(增强) |
62
+ | | 检查点验证 | validation_check | 灵活的自定义验证 |
63
+ | | 验证运行 | validation_run | 执行验证计划 |
64
+ | | 断言验证 | browser_assert | 页面级断言 |
65
+ | | 视觉回归 | screenshot_diff | 截图对比,发现 UI 变化 |
66
+ | | 元素验证工具 | validation_element | 元素状态验证 |
67
+ | 🔍 **错误收集** | Console 错误 | browser_errors | 自动捕获 JS 错误和警告 |
68
+ | | 网络错误 | browser_network | 失败请求自动记录 |
69
+ | | 页面错误 | — | 资源加载、运行时异常聚合 |
70
+ | | 错误分类 | error_aggregate | 按类型、严重程度自动归类 |
71
+ | | 修复建议 | **error_fix_suggestion** | 基于规则的修复建议(增强) |
72
+ | 📝 **验证报告** | 基础报告 | validation_report | HTML/JSON/Markdown 格式报告 |
73
+ | | 报告导出 | validation_report_export | 导出验证报告 |
74
+ | | 步骤记录 | — | 操作步骤自动留存 |
75
+ | | 截图证据 | browser_screenshot | 关键节点自动截图 |
76
+ | | 工件管理 | browser_artifacts | 截图、日志等工件管理 |
77
+ | 🧪 **快速修复能力** | 截图错误分析 | — | 截图后自动分析页面错误 |
78
+ | | 断言证据链 | — | 断言失败自动截图+错误分析 |
79
+ | | 交互错误提示 | — | 点击/输入失败给出调试建议 |
80
+ | | 菜单遍历 | browser_traverse_menu | 自动遍历多级菜单发现功能 |
81
+ | | 页面发现 | browser_find_page | 自动识别登录页/首页等页面类型 |
82
+
83
+ ---
84
+
85
+ ## � 工具总数
86
+
87
+ **72 个工具** — 覆盖浏览器操作、智能定位、调试诊断、验证框架、错误分析、报告导出全流程
88
+
89
+ ---
90
+
91
+ ## �🚀 快速开始
92
+
93
+ ### 第 1 步:安装
94
+
95
+ ```bash
96
+ npm install -g validpilot
97
+ ```
98
+
99
+ ### 第 2 步:配置 MCP Server
100
+
101
+ 在你的 MCP 配置文件中添加:
102
+
103
+ ```json
104
+ {
105
+ "mcpServers": {
106
+ "validpilot": {
107
+ "command": "npx",
108
+ "args": ["-y", "validpilot", "mcp"]
109
+ }
110
+ }
111
+ }
112
+ ```
113
+
114
+ ### 第 3 步:第一个操作
115
+
116
+ 重启你的 AI 客户端,然后试试说:
117
+
118
+ > "打开百度,搜索 'MCP 协议',然后截图"
119
+
120
+ 就这么简单!🎉
121
+
122
+ ### 💡 进阶:智能定位试试这个
123
+
124
+ > "找到页面上的登录按钮并点击"
125
+
126
+ 无需写选择器,ValidPilot 会自动帮你找到元素。
127
+
128
+ ### 💡 进阶:快速验证试试这个
129
+
130
+ > "用 validation_quick_run 检查 https://example.com 是否正常"
131
+
132
+ 一键检查页面加载、JS错误、白屏等问题。
133
+
134
+ ---
135
+
136
+ ## 💎 开源版 vs 付费版
137
+
138
+ | 功能 | 开源版 | 付费版 |
139
+ |------|--------|--------|
140
+ | 🌐 浏览器操作(20+工具) | ✅ | ✅ |
141
+ | 🎯 智能元素定位(基础版) | ✅ 文本/属性匹配 | ✅ 多模态视觉定位 |
142
+ | 🔍 错误诊断与修复建议 | ✅ 基于规则 | ✅ AI 智能分析 |
143
+ | ✅ 基础验证框架 | ✅ | ✅ |
144
+ | 🔍 错误自动收集 | ✅ | ✅ |
145
+ | 📝 基础验证报告 | ✅ | ✅ |
146
+ | 🤖 AI 自动探索验证 | ❌ | ✅ |
147
+ | 🔬 AI 深度根因分析 | ❌ | ✅ |
148
+ | 💡 AI 智能修复建议 | ❌ | ✅ |
149
+ | 📦 批量验证执行 | ❌ | ✅ |
150
+ | 🔒 安全扫描 | ❌ | ✅ |
151
+ | 👥 团队协作 | ❌ | ✅ |
152
+ | 📈 高级分析仪表盘 | ❌ | ✅ |
153
+ | 🎧 优先技术支持 | ❌ | ✅ |
154
+
155
+ > 💡 想体验更强大的 AI 能力?了解 [ValidPilot Pro](https://validpilot.com/pro)
156
+
157
+ ---
158
+
159
+ ## 🌟 社区与贡献
160
+
161
+ 我们欢迎各种形式的贡献!
162
+
163
+ - 🐛 [提交 Bug](https://github.com/validpilot/validpilot/issues)
164
+ - 💡 [功能建议](https://github.com/validpilot/validpilot/discussions)
165
+ - 📖 [改进文档](https://github.com/validpilot/validpilot/pulls)
166
+ - 🔧 [提交代码](https://github.com/validpilot/validpilot/blob/main/CONTRIBUTING.md)
167
+
168
+ ### 开发环境
169
+
170
+ ```bash
171
+ # 克隆项目
172
+ git clone https://github.com/validpilot/validpilot.git
173
+
174
+ # 安装依赖
175
+ cd validpilot
176
+ npm install
177
+
178
+ # 启动开发模式
179
+ npm run dev
180
+ ```
181
+
182
+ ---
183
+
184
+ ## 📄 License
185
+
186
+ [MIT](LICENSE) © ValidPilot Team
187
+
188
+ ---
189
+
190
+ <div align="center">
191
+
192
+ 如果这个项目对你有帮助,别忘了给个 ⭐ Star!
193
+
194
+ [官网](https://validpilot.com) · [文档](https://docs.validpilot.com) · [社区](https://github.com/validpilot/validpilot/discussions)
195
+
196
+ </div>
@@ -0,0 +1,173 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ const fs = require('fs');
5
+ const path = require('path');
6
+ const { validationQuickRun } = require('../hands/verification_runner');
7
+ const browserOperator = require('../hands/browser_operator');
8
+
9
+ const PKG = require('../package.json');
10
+
11
+ function printHelp() {
12
+ const cmd = 'validpilot';
13
+ const version = PKG.version || '0.1.0';
14
+ console.log(`${cmd} ${version} — AI 编程的最后一公里验证平台
15
+
16
+ Usage:
17
+ ${cmd} health 检查 Playwright 可用性 (exit 0=ok / exit 1=unavailable)
18
+ ${cmd} run --flow <file> 执行轻量 flow JSON,逐 action 输出结果
19
+ ${cmd} validate --url <url> 快速验证 URL,输出 pass/fail / Top errors / artifact 路径
20
+ ${cmd} --version 输出版本号
21
+ ${cmd} --help 显示此帮助
22
+
23
+ AI 配置选项:
24
+ --ai-provider <provider> 设置 AI 提供商 (openai/deepseek/qwen)
25
+ --ai-api-key <key> 设置 AI API Key
26
+
27
+ Examples:
28
+ ${cmd} health
29
+ ${cmd} validate --url examples/demo/index.html
30
+ ${cmd} run --flow examples/demo/flow.json
31
+ ${cmd} validate --url examples/demo/index.html --ai-provider openai --ai-api-key sk-xxx
32
+ `);
33
+ }
34
+
35
+ function parseArgs(argv) {
36
+ const result = { _: [] };
37
+ for (let i = 0; i < argv.length; i += 1) {
38
+ const item = argv[i];
39
+ if (item.startsWith('--')) {
40
+ const key = item.slice(2);
41
+ const next = argv[i + 1];
42
+ if (next !== undefined && !next.startsWith('--')) {
43
+ result[key] = next;
44
+ i += 1;
45
+ } else {
46
+ result[key] = true;
47
+ }
48
+ } else if (item.startsWith('-') && item.length === 2 && item !== '--') {
49
+ const key = item.slice(1);
50
+ const next = argv[i + 1];
51
+ if (next !== undefined && !next.startsWith('-')) {
52
+ result[key] = next;
53
+ i += 1;
54
+ } else {
55
+ result[key] = true;
56
+ }
57
+ } else {
58
+ result._.push(item);
59
+ }
60
+ }
61
+ return result;
62
+ }
63
+
64
+ function printLowToken(result) {
65
+ console.log(JSON.stringify({
66
+ pass: result.pass,
67
+ mode: result.mode,
68
+ summary: result.summary,
69
+ topErrors: (result.topErrors || []).slice(0, 5),
70
+ artifacts: result.artifacts || []
71
+ }, null, 2));
72
+ }
73
+
74
+ async function cmdHealth() {
75
+ let chromium;
76
+ try {
77
+ chromium = require('playwright').chromium;
78
+ } catch (error) {
79
+ console.log(JSON.stringify({ ok: false, name: PKG.name || 'validpilot', version: PKG.version || '0.1.0', error: `Playwright module unavailable: ${error.message}` }, null, 2));
80
+ process.exit(1);
81
+ }
82
+
83
+ let browser;
84
+ try {
85
+ browser = await chromium.launch({ headless: true, timeout: 15000 });
86
+ await browser.close();
87
+ console.log(JSON.stringify({ ok: true, name: PKG.name || 'validpilot', version: PKG.version || '0.1.0', message: 'Playwright browser is available' }, null, 2));
88
+ process.exit(0);
89
+ } catch (error) {
90
+ console.log(JSON.stringify({ ok: false, name: PKG.name || 'validpilot', version: PKG.version || '0.1.0', error: `Playwright browser failed to launch: ${error.message}` }, null, 2));
91
+ process.exit(1);
92
+ }
93
+ }
94
+
95
+ async function cmdRun(flowPath) {
96
+ if (!flowPath) throw new Error('run requires --flow <file>');
97
+
98
+ const resolved = path.resolve(flowPath);
99
+ const raw = JSON.parse(fs.readFileSync(resolved, 'utf8'));
100
+
101
+ // Support both array-format and {steps, goal, stopOnError} format
102
+ let steps, goal, stopOnError;
103
+ if (Array.isArray(raw)) {
104
+ // Array format: [{action: "open", args: {url: "..."}}, ...]
105
+ steps = raw.map(item => {
106
+ const step = { action: item.action };
107
+ if (item.args && typeof item.args === 'object') {
108
+ Object.assign(step, item.args);
109
+ }
110
+ return step;
111
+ });
112
+ goal = 'CLI low-token flow';
113
+ stopOnError = true;
114
+ } else {
115
+ steps = Array.isArray(raw.steps) ? raw.steps : [];
116
+ goal = raw.goal || 'CLI low-token flow';
117
+ stopOnError = raw.stopOnError !== false;
118
+ }
119
+
120
+ const result = await browserOperator.batch({ goal, steps, stopOnError });
121
+ console.log(JSON.stringify({
122
+ pass: result.ok,
123
+ summary: `flow steps=${steps.length}`,
124
+ results: result.results,
125
+ topErrors: [],
126
+ artifacts: (result.results || []).map(item => item.artifactPath).filter(Boolean)
127
+ }, null, 2));
128
+ }
129
+
130
+ async function main() {
131
+ const args = parseArgs(process.argv.slice(2));
132
+ const command = args._[0];
133
+
134
+ if (args['ai-provider']) {
135
+ process.env.AI_PROVIDER = args['ai-provider'];
136
+ }
137
+ if (args['ai-api-key']) {
138
+ process.env.AI_API_KEY = args['ai-api-key'];
139
+ }
140
+
141
+ if (args.version || command === '--version' || command === '-v') {
142
+ console.log(PKG.version || '0.1.0');
143
+ return;
144
+ }
145
+ if (args.help || command === '--help' || command === '-h' || !command) {
146
+ printHelp();
147
+ return;
148
+ }
149
+
150
+ if (command === 'health') {
151
+ await cmdHealth();
152
+ return;
153
+ }
154
+
155
+ if (command === 'validate') {
156
+ if (!args.url) throw new Error('validate requires --url <url>');
157
+ const result = await validationQuickRun({ url: args.url, headless: true });
158
+ printLowToken(result);
159
+ return;
160
+ }
161
+
162
+ if (command === 'run') {
163
+ await cmdRun(args.flow);
164
+ return;
165
+ }
166
+
167
+ throw new Error(`Unknown command: ${command}`);
168
+ }
169
+
170
+ main().catch(error => {
171
+ console.error(JSON.stringify({ pass: false, error: error.message }, null, 2));
172
+ process.exitCode = 1;
173
+ });
@@ -0,0 +1,203 @@
1
+ 'use strict';
2
+
3
+ const { truncate } = require('../engines/playwright_adapter');
4
+
5
+ function collectRawErrors(input = {}) {
6
+ const sources = [];
7
+ const push = (source, records = []) => {
8
+ if (!Array.isArray(records)) return;
9
+ for (const item of records) sources.push({ source, ...item });
10
+ };
11
+
12
+ push('console', input.console?.recent || input.consoleErrors || input.consoleLogs || input.console || []);
13
+ push('network', input.network?.recent || input.networkErrors || input.networkLogs || input.network || []);
14
+ push('pageerror', input.pageerror?.recent || input.pageErrors || input.pageerror || []);
15
+ push('mcp', input.mcpErrors || []);
16
+
17
+ if (input.evidence) sources.push(...collectRawErrors(input.evidence));
18
+ if (input.errors) sources.push(...collectRawErrors(input.errors));
19
+ return sources;
20
+ }
21
+
22
+ function severityOf(item = {}) {
23
+ // CRITICAL: Page-level runtime errors (blocks everything)
24
+ if (item.source === 'pageerror') return 4;
25
+ if (item.failed || Number(item.status || 0) >= 500) return 3;
26
+
27
+ // HIGH: 404 on critical resources (JS/CSS that app needs)
28
+ if (Number(item.status || 0) >= 400 &&
29
+ (item.url || '').match(/\.(js|css|jsx|tsx|wasm)($|\?)/i)) {
30
+ return 2;
31
+ }
32
+
33
+ // MEDIUM: Other 404s (images, fonts, optional resources)
34
+ if (Number(item.status || 0) >= 400) return 1;
35
+
36
+ if (['error', 'assert'].includes(String(item.type || '').toLowerCase())) return 2;
37
+ if (['warning', 'warn'].includes(String(item.type || '').toLowerCase())) return 1;
38
+ return 0;
39
+ }
40
+
41
+ function pageFunctionalStatus(input = {}) {
42
+ const raw = collectRawErrors(input);
43
+
44
+ // Check if page-level errors exist
45
+ const pageErrors = raw.filter(item => item.source === 'pageerror');
46
+ const criticalJsErrors = raw.filter(item =>
47
+ item.source === 'console' &&
48
+ (item.url || '').match(/\.js($|\?)/i) &&
49
+ item.text && item.text.includes('Failed to load resource')
50
+ );
51
+ const criticalCssErrors = raw.filter(item =>
52
+ item.source === 'console' &&
53
+ (item.url || '').match(/\.css($|\?)/i) &&
54
+ item.text && item.text.includes('Failed to load resource')
55
+ );
56
+
57
+ // Calculate page health score (0-100)
58
+ let healthScore = 100;
59
+ const criticalErrorCount = pageErrors.length + criticalJsErrors.length + criticalCssErrors.length;
60
+
61
+ // Deduct points for critical errors
62
+ healthScore -= pageErrors.length * 30; // Each page error costs 30 points
63
+ healthScore -= criticalJsErrors.length * 3; // Each JS 404 costs 3 points
64
+ healthScore -= criticalCssErrors.length * 2; // Each CSS 404 costs 2 points
65
+ healthScore = Math.max(0, Math.min(100, healthScore));
66
+
67
+ if (pageErrors.length > 0) {
68
+ return {
69
+ status: 'blocked',
70
+ message: `检测到 ${pageErrors.length} 个页面运行时错误,页面功能被阻塞,不能执行真实业务验证。`,
71
+ canTestBusiness: false,
72
+ recommendation: '优先修复页面运行时错误,然后重新验证业务闭环。',
73
+ healthScore,
74
+ criticalErrorCount,
75
+ details: {
76
+ pageErrorCount: pageErrors.length,
77
+ criticalJsErrors: criticalJsErrors.length,
78
+ criticalCssErrors: criticalCssErrors.length
79
+ }
80
+ };
81
+ }
82
+
83
+ if (criticalJsErrors.length > 10 || criticalCssErrors.length > 10) {
84
+ return {
85
+ status: 'degraded',
86
+ message: `检测到 ${criticalJsErrors.length} 个关键 JS 资源和 ${criticalCssErrors.length} 个 CSS 资源 404 错误,页面功能降级,业务验证结果可能不可靠。`,
87
+ canTestBusiness: false,
88
+ recommendation: '修复关键资源加载问题后重新验证。',
89
+ healthScore,
90
+ criticalErrorCount,
91
+ details: {
92
+ pageErrorCount: pageErrors.length,
93
+ criticalJsErrors: criticalJsErrors.length,
94
+ criticalCssErrors: criticalCssErrors.length
95
+ }
96
+ };
97
+ }
98
+
99
+ // Check for noisy 404s (images, fonts, etc.)
100
+ const noisy404s = raw.filter(item =>
101
+ item.source === 'console' &&
102
+ (item.url || '').match(/\.(png|jpg|jpeg|gif|svg|ico|woff|woff2|ttf|eot)($|\?)/i) &&
103
+ item.text && item.text.includes('Failed to load resource')
104
+ );
105
+
106
+ if (noisy404s.length > 20) {
107
+ return {
108
+ status: 'noisy',
109
+ message: `检测到 ${noisy404s.length} 个图片/字体资源 404 错误,页面可运行但用户体验可能受影响。`,
110
+ canTestBusiness: true,
111
+ recommendation: '建议修复资源 404 以提升用户体验,但不影响核心业务验证。',
112
+ healthScore,
113
+ criticalErrorCount,
114
+ details: {
115
+ pageErrorCount: pageErrors.length,
116
+ criticalJsErrors: criticalJsErrors.length,
117
+ criticalCssErrors: criticalCssErrors.length,
118
+ noisy404s: noisy404s.length
119
+ }
120
+ };
121
+ }
122
+
123
+ return {
124
+ status: 'functional',
125
+ message: `页面功能正常 (健康度 ${healthScore}/100),可执行真实业务验证。`,
126
+ canTestBusiness: true,
127
+ recommendation: healthScore >= 90 ? '继续业务动作验证。' : '建议修复部分资源 404 后验证以提升结果可信度。',
128
+ healthScore,
129
+ criticalErrorCount,
130
+ details: {
131
+ pageErrorCount: pageErrors.length,
132
+ criticalJsErrors: criticalJsErrors.length,
133
+ criticalCssErrors: criticalCssErrors.length,
134
+ noisy404s: noisy404s.length
135
+ }
136
+ };
137
+ }
138
+
139
+ function signatureOf(item = {}) {
140
+ const status = item.status ? ` ${item.status}` : '';
141
+ const url = item.url ? ` ${String(item.url).replace(/[?#].*$/, '')}` : '';
142
+ const text = item.text || item.message || item.errorText || item.stack || '';
143
+ return `${item.source || 'unknown'}${status}${url} ${String(text).replace(/\d{3,}/g, '#').slice(0, 180)}`.trim();
144
+ }
145
+
146
+ function aggregateErrors(input = {}, options = {}) {
147
+ const raw = collectRawErrors(input).filter(item => severityOf(item) > 0 || item.failed || item.status >= 400);
148
+ const grouped = new Map();
149
+ for (const item of raw) {
150
+ const sig = signatureOf(item);
151
+ const existing = grouped.get(sig) || { signature: sig, count: 0, severity: 0, examples: [] };
152
+ existing.count += 1;
153
+ existing.severity = Math.max(existing.severity, severityOf(item));
154
+ if (existing.examples.length < 2) existing.examples.push({
155
+ source: item.source,
156
+ type: item.type,
157
+ status: item.status,
158
+ method: item.method,
159
+ url: item.url,
160
+ text: truncate(item.text || item.message || item.errorText || item.stack || '', 260),
161
+ timestamp: item.timestamp
162
+ });
163
+ grouped.set(sig, existing);
164
+ }
165
+
166
+ const topErrors = Array.from(grouped.values())
167
+ .sort((a, b) => (b.severity - a.severity) || (b.count - a.count))
168
+ .slice(0, options.limit || 5);
169
+
170
+ const totalCount = raw.length;
171
+ const uniqueCount = grouped.size;
172
+ const summary = buildSummary(topErrors, uniqueCount, totalCount);
173
+
174
+ return { topErrors, summary, uniqueCount, totalCount };
175
+ }
176
+
177
+ function buildSummary(topErrors, uniqueCount, totalCount) {
178
+ const lines = [
179
+ '## Error Summary',
180
+ `- Status: ${topErrors.length ? 'fail' : 'pass'}`,
181
+ `- Errors: total=${totalCount}, unique=${uniqueCount}`,
182
+ '- Top errors:'
183
+ ];
184
+ if (!topErrors.length) lines.push(' - none');
185
+ for (const error of topErrors.slice(0, 5)) {
186
+ const example = error.examples?.[0] || {};
187
+ lines.push(` - [${error.count}x/S${error.severity}] ${truncate(error.signature, 160)}`);
188
+ if (example.url) lines.push(` - url: ${truncate(example.url, 120)}`);
189
+ }
190
+ const md = lines.join('\n');
191
+ return md.length > 500 ? `${md.slice(0, 497)}...` : md;
192
+ }
193
+
194
+ function errorSummaryMd(input = {}, options = {}) {
195
+ const aggregated = input.topErrors ? input : aggregateErrors(input, options);
196
+ return aggregated.summary || buildSummary(aggregated.topErrors || [], aggregated.uniqueCount || 0, aggregated.totalCount || 0);
197
+ }
198
+
199
+ module.exports = {
200
+ aggregateErrors,
201
+ errorSummaryMd,
202
+ collectRawErrors
203
+ };