jarvis-ai-assistant 0.3.30__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +458 -152
  3. jarvis/jarvis_agent/agent_manager.py +17 -13
  4. jarvis/jarvis_agent/builtin_input_handler.py +2 -6
  5. jarvis/jarvis_agent/config_editor.py +2 -7
  6. jarvis/jarvis_agent/event_bus.py +82 -12
  7. jarvis/jarvis_agent/file_context_handler.py +329 -0
  8. jarvis/jarvis_agent/file_methodology_manager.py +3 -4
  9. jarvis/jarvis_agent/jarvis.py +628 -55
  10. jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
  11. jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
  12. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
  13. jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
  14. jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
  15. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
  16. jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
  17. jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
  18. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
  19. jarvis/jarvis_agent/language_support_info.py +486 -0
  20. jarvis/jarvis_agent/main.py +34 -10
  21. jarvis/jarvis_agent/memory_manager.py +7 -16
  22. jarvis/jarvis_agent/methodology_share_manager.py +10 -16
  23. jarvis/jarvis_agent/prompt_manager.py +1 -1
  24. jarvis/jarvis_agent/prompts.py +193 -171
  25. jarvis/jarvis_agent/protocols.py +8 -12
  26. jarvis/jarvis_agent/run_loop.py +105 -9
  27. jarvis/jarvis_agent/session_manager.py +2 -3
  28. jarvis/jarvis_agent/share_manager.py +20 -22
  29. jarvis/jarvis_agent/shell_input_handler.py +1 -2
  30. jarvis/jarvis_agent/stdio_redirect.py +295 -0
  31. jarvis/jarvis_agent/task_analyzer.py +31 -6
  32. jarvis/jarvis_agent/task_manager.py +11 -27
  33. jarvis/jarvis_agent/tool_executor.py +2 -3
  34. jarvis/jarvis_agent/tool_share_manager.py +12 -24
  35. jarvis/jarvis_agent/utils.py +5 -1
  36. jarvis/jarvis_agent/web_bridge.py +189 -0
  37. jarvis/jarvis_agent/web_output_sink.py +53 -0
  38. jarvis/jarvis_agent/web_server.py +786 -0
  39. jarvis/jarvis_c2rust/__init__.py +26 -0
  40. jarvis/jarvis_c2rust/cli.py +575 -0
  41. jarvis/jarvis_c2rust/collector.py +250 -0
  42. jarvis/jarvis_c2rust/constants.py +26 -0
  43. jarvis/jarvis_c2rust/library_replacer.py +1254 -0
  44. jarvis/jarvis_c2rust/llm_module_agent.py +1272 -0
  45. jarvis/jarvis_c2rust/loaders.py +207 -0
  46. jarvis/jarvis_c2rust/models.py +28 -0
  47. jarvis/jarvis_c2rust/optimizer.py +2157 -0
  48. jarvis/jarvis_c2rust/scanner.py +1681 -0
  49. jarvis/jarvis_c2rust/transpiler.py +2983 -0
  50. jarvis/jarvis_c2rust/utils.py +385 -0
  51. jarvis/jarvis_code_agent/build_validation_config.py +132 -0
  52. jarvis/jarvis_code_agent/code_agent.py +1371 -220
  53. jarvis/jarvis_code_agent/code_analyzer/__init__.py +65 -0
  54. jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +106 -0
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +74 -0
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +72 -0
  60. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +70 -0
  61. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +53 -0
  62. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +47 -0
  63. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +61 -0
  64. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +110 -0
  65. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +154 -0
  66. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +110 -0
  67. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +153 -0
  68. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
  69. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +648 -0
  70. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
  71. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
  72. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
  73. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
  74. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
  75. jarvis/jarvis_code_agent/code_analyzer/language_support.py +110 -0
  76. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +49 -0
  77. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +299 -0
  78. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +215 -0
  79. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
  80. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
  81. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +269 -0
  82. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +281 -0
  83. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
  84. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +605 -0
  85. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
  86. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +252 -0
  87. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +58 -0
  88. jarvis/jarvis_code_agent/lint.py +501 -8
  89. jarvis/jarvis_code_agent/utils.py +141 -0
  90. jarvis/jarvis_code_analysis/code_review.py +493 -584
  91. jarvis/jarvis_data/config_schema.json +128 -12
  92. jarvis/jarvis_git_squash/main.py +4 -5
  93. jarvis/jarvis_git_utils/git_commiter.py +82 -75
  94. jarvis/jarvis_mcp/sse_mcp_client.py +22 -29
  95. jarvis/jarvis_mcp/stdio_mcp_client.py +12 -13
  96. jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
  97. jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
  98. jarvis/jarvis_methodology/main.py +32 -48
  99. jarvis/jarvis_multi_agent/__init__.py +287 -55
  100. jarvis/jarvis_multi_agent/main.py +36 -4
  101. jarvis/jarvis_platform/base.py +524 -202
  102. jarvis/jarvis_platform/human.py +7 -8
  103. jarvis/jarvis_platform/kimi.py +30 -36
  104. jarvis/jarvis_platform/openai.py +88 -25
  105. jarvis/jarvis_platform/registry.py +26 -10
  106. jarvis/jarvis_platform/tongyi.py +24 -25
  107. jarvis/jarvis_platform/yuanbao.py +32 -43
  108. jarvis/jarvis_platform_manager/main.py +66 -77
  109. jarvis/jarvis_platform_manager/service.py +8 -13
  110. jarvis/jarvis_rag/cli.py +53 -55
  111. jarvis/jarvis_rag/embedding_manager.py +13 -18
  112. jarvis/jarvis_rag/llm_interface.py +8 -9
  113. jarvis/jarvis_rag/query_rewriter.py +10 -21
  114. jarvis/jarvis_rag/rag_pipeline.py +24 -27
  115. jarvis/jarvis_rag/reranker.py +4 -5
  116. jarvis/jarvis_rag/retriever.py +28 -30
  117. jarvis/jarvis_sec/__init__.py +305 -0
  118. jarvis/jarvis_sec/agents.py +143 -0
  119. jarvis/jarvis_sec/analysis.py +276 -0
  120. jarvis/jarvis_sec/checkers/__init__.py +32 -0
  121. jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
  122. jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
  123. jarvis/jarvis_sec/cli.py +139 -0
  124. jarvis/jarvis_sec/clustering.py +1439 -0
  125. jarvis/jarvis_sec/file_manager.py +427 -0
  126. jarvis/jarvis_sec/parsers.py +73 -0
  127. jarvis/jarvis_sec/prompts.py +268 -0
  128. jarvis/jarvis_sec/report.py +336 -0
  129. jarvis/jarvis_sec/review.py +453 -0
  130. jarvis/jarvis_sec/status.py +264 -0
  131. jarvis/jarvis_sec/types.py +20 -0
  132. jarvis/jarvis_sec/utils.py +499 -0
  133. jarvis/jarvis_sec/verification.py +848 -0
  134. jarvis/jarvis_sec/workflow.py +226 -0
  135. jarvis/jarvis_smart_shell/main.py +38 -87
  136. jarvis/jarvis_stats/cli.py +2 -2
  137. jarvis/jarvis_stats/stats.py +8 -8
  138. jarvis/jarvis_stats/storage.py +15 -21
  139. jarvis/jarvis_stats/visualizer.py +1 -1
  140. jarvis/jarvis_tools/clear_memory.py +3 -20
  141. jarvis/jarvis_tools/cli/main.py +21 -23
  142. jarvis/jarvis_tools/edit_file.py +1019 -132
  143. jarvis/jarvis_tools/execute_script.py +83 -25
  144. jarvis/jarvis_tools/file_analyzer.py +6 -9
  145. jarvis/jarvis_tools/generate_new_tool.py +14 -21
  146. jarvis/jarvis_tools/lsp_client.py +1552 -0
  147. jarvis/jarvis_tools/methodology.py +2 -3
  148. jarvis/jarvis_tools/read_code.py +1736 -35
  149. jarvis/jarvis_tools/read_symbols.py +140 -0
  150. jarvis/jarvis_tools/read_webpage.py +12 -13
  151. jarvis/jarvis_tools/registry.py +427 -200
  152. jarvis/jarvis_tools/retrieve_memory.py +20 -19
  153. jarvis/jarvis_tools/rewrite_file.py +72 -158
  154. jarvis/jarvis_tools/save_memory.py +3 -15
  155. jarvis/jarvis_tools/search_web.py +18 -18
  156. jarvis/jarvis_tools/sub_agent.py +36 -43
  157. jarvis/jarvis_tools/sub_code_agent.py +25 -26
  158. jarvis/jarvis_tools/virtual_tty.py +55 -33
  159. jarvis/jarvis_utils/clipboard.py +7 -10
  160. jarvis/jarvis_utils/config.py +232 -45
  161. jarvis/jarvis_utils/embedding.py +8 -5
  162. jarvis/jarvis_utils/fzf.py +8 -8
  163. jarvis/jarvis_utils/git_utils.py +225 -36
  164. jarvis/jarvis_utils/globals.py +3 -3
  165. jarvis/jarvis_utils/http.py +1 -1
  166. jarvis/jarvis_utils/input.py +99 -48
  167. jarvis/jarvis_utils/jsonnet_compat.py +465 -0
  168. jarvis/jarvis_utils/methodology.py +52 -48
  169. jarvis/jarvis_utils/utils.py +819 -491
  170. jarvis_ai_assistant-0.7.6.dist-info/METADATA +600 -0
  171. jarvis_ai_assistant-0.7.6.dist-info/RECORD +218 -0
  172. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/entry_points.txt +4 -0
  173. jarvis/jarvis_agent/config.py +0 -92
  174. jarvis/jarvis_agent/edit_file_handler.py +0 -296
  175. jarvis/jarvis_platform/ai8.py +0 -332
  176. jarvis/jarvis_tools/ask_user.py +0 -54
  177. jarvis_ai_assistant-0.3.30.dist-info/METADATA +0 -381
  178. jarvis_ai_assistant-0.3.30.dist-info/RECORD +0 -137
  179. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/WHEEL +0 -0
  180. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/licenses/LICENSE +0 -0
  181. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,848 @@
1
+ # -*- coding: utf-8 -*-
2
+ """验证相关模块"""
3
+
4
+ from typing import Dict, List, Optional
5
+ import typer
6
+
7
+ from jarvis.jarvis_agent import Agent
8
+ from jarvis.jarvis_tools.registry import ToolRegistry
9
+ from jarvis.jarvis_sec.prompts import build_verification_summary_prompt
10
+ from jarvis.jarvis_sec.parsers import try_parse_summary_report
11
+ from jarvis.jarvis_sec.agents import create_analysis_agent, subscribe_summary_event
12
+ from jarvis.jarvis_sec.utils import git_restore_if_dirty, sig_of, count_issues_from_file
13
+ from jarvis.jarvis_sec.analysis import (
14
+ build_analysis_task_context,
15
+ run_analysis_agent_with_retry,
16
+ expand_and_filter_analysis_results,
17
+ )
18
+
19
+
20
+ def build_gid_to_verification_mapping(verification_results: List[Dict]) -> Dict[int, Dict]:
21
+ """构建gid到验证结果的映射"""
22
+ gid_to_verification: Dict[int, Dict] = {}
23
+ for vr in verification_results:
24
+ if not isinstance(vr, dict):
25
+ continue
26
+ gids_to_process: List[int] = []
27
+ if "gids" in vr and isinstance(vr.get("gids"), list):
28
+ for gid_val in vr.get("gids", []):
29
+ try:
30
+ gid_int = int(gid_val)
31
+ if gid_int >= 1:
32
+ gids_to_process.append(gid_int)
33
+ except Exception as e:
34
+ try:
35
+ typer.secho(f"[jarvis-sec] 警告:验证结果中 gids 数组元素格式错误: {gid_val}, 错误: {e}", fg=typer.colors.YELLOW)
36
+ except Exception:
37
+ pass
38
+ elif "gid" in vr:
39
+ try:
40
+ gid_val = vr.get("gid", 0)
41
+ gid_int = int(gid_val)
42
+ if gid_int >= 1:
43
+ gids_to_process.append(gid_int)
44
+ else:
45
+ try:
46
+ typer.secho(f"[jarvis-sec] 警告:验证结果中 gid 值无效: {gid_val} (必须 >= 1)", fg=typer.colors.YELLOW)
47
+ except Exception:
48
+ pass
49
+ except Exception as e:
50
+ try:
51
+ typer.secho(f"[jarvis-sec] 警告:验证结果中 gid 格式错误: {vr.get('gid')}, 错误: {e}", fg=typer.colors.YELLOW)
52
+ except Exception:
53
+ pass
54
+ else:
55
+ try:
56
+ typer.secho(f"[jarvis-sec] 警告:验证结果项缺少 gid 或 gids 字段: {vr}", fg=typer.colors.YELLOW)
57
+ except Exception:
58
+ pass
59
+
60
+ is_valid = vr.get("is_valid")
61
+ verification_notes = str(vr.get("verification_notes", "")).strip()
62
+ for gid in gids_to_process:
63
+ gid_to_verification[gid] = {
64
+ "is_valid": is_valid,
65
+ "verification_notes": verification_notes
66
+ }
67
+ return gid_to_verification
68
+
69
+
70
+ def merge_verified_items(
71
+ items_with_risk: List[Dict],
72
+ batch: List[Dict],
73
+ gid_to_verification: Dict[int, Dict],
74
+ ) -> List[Dict]:
75
+ """合并验证通过的告警"""
76
+ gid_to_candidate: Dict[int, Dict] = {}
77
+ for c in batch:
78
+ try:
79
+ c_gid = int(c.get("gid", 0))
80
+ if c_gid >= 1:
81
+ gid_to_candidate[c_gid] = c
82
+ except Exception:
83
+ pass
84
+
85
+ verified_items: List[Dict] = []
86
+ for item in items_with_risk:
87
+ item_gid = int(item.get("gid", 0))
88
+ verification = gid_to_verification.get(item_gid)
89
+ if verification and verification.get("is_valid") is True:
90
+ # 合并原始候选信息(file, line, pattern, category, language, evidence, confidence, severity 等)
91
+ candidate = gid_to_candidate.get(item_gid, {})
92
+ merged_item = {
93
+ **candidate, # 原始候选信息
94
+ **item, # 分析结果
95
+ "verification_notes": str(verification.get("verification_notes", "")).strip(),
96
+ }
97
+ verified_items.append(merged_item)
98
+ elif verification and verification.get("is_valid") is False:
99
+ try:
100
+ typer.secho(f"[jarvis-sec] 验证 Agent 判定 gid={item_gid} 为误报: {verification.get('verification_notes', '')}", fg=typer.colors.BLUE)
101
+ except Exception:
102
+ pass
103
+ else:
104
+ try:
105
+ typer.secho(f"[jarvis-sec] 警告:验证结果中未找到 gid={item_gid},视为验证不通过", fg=typer.colors.YELLOW)
106
+ except Exception:
107
+ pass
108
+ return verified_items
109
+
110
+
111
+ def merge_verified_items_without_verification(
112
+ items_with_risk: List[Dict],
113
+ batch: List[Dict],
114
+ ) -> List[Dict]:
115
+ """合并分析Agent确认的问题(不进行二次验证)"""
116
+ gid_to_candidate: Dict[int, Dict] = {}
117
+ for c in batch:
118
+ try:
119
+ c_gid = int(c.get("gid", 0))
120
+ if c_gid >= 1:
121
+ gid_to_candidate[c_gid] = c
122
+ except Exception:
123
+ pass
124
+
125
+ verified_items: List[Dict] = []
126
+ for item in items_with_risk:
127
+ item_gid = int(item.get("gid", 0))
128
+ # 处理 gids 数组的情况
129
+ if "gids" in item:
130
+ gids = item.get("gids", [])
131
+ for gid in gids:
132
+ candidate = gid_to_candidate.get(gid, {})
133
+ merged_item = {
134
+ **candidate, # 原始候选信息
135
+ **item, # 分析结果
136
+ "gid": gid, # 使用单个 gid
137
+ "verification_notes": "未进行二次验证(--no-verification)",
138
+ }
139
+ # 移除 gids 字段,因为已经展开为单个 gid
140
+ merged_item.pop("gids", None)
141
+ verified_items.append(merged_item)
142
+ else:
143
+ # 单个 gid 的情况
144
+ candidate = gid_to_candidate.get(item_gid, {})
145
+ merged_item = {
146
+ **candidate, # 原始候选信息
147
+ **item, # 分析结果
148
+ "verification_notes": "未进行二次验证(--no-verification)",
149
+ }
150
+ verified_items.append(merged_item)
151
+ return verified_items
152
+
153
+
154
+ def is_valid_verification_item(item: Dict) -> bool:
155
+ """验证验证结果项的格式"""
156
+ if not isinstance(item, dict) or "is_valid" not in item:
157
+ return False
158
+ has_gid = "gid" in item
159
+ has_gids = "gids" in item
160
+ if not has_gid and not has_gids:
161
+ return False
162
+ if has_gid and has_gids:
163
+ return False # gid 和 gids 不能同时出现
164
+ if has_gid:
165
+ try:
166
+ return int(item["gid"]) >= 1
167
+ except Exception:
168
+ return False
169
+ elif has_gids:
170
+ if not isinstance(item["gids"], list) or len(item["gids"]) == 0:
171
+ return False
172
+ try:
173
+ return all(int(gid_val) >= 1 for gid_val in item["gids"])
174
+ except Exception:
175
+ return False
176
+ return False
177
+
178
+
179
+ def run_verification_agent_with_retry(
180
+ verification_agent,
181
+ verification_task: str,
182
+ verification_summary_prompt: str,
183
+ entry_path: str,
184
+ verification_summary_container: Dict[str, str],
185
+ bidx: int,
186
+ ) -> tuple[Optional[List[Dict]], Optional[str]]:
187
+ """运行验证Agent并永久重试直到格式正确,返回(验证结果, 解析错误)"""
188
+ use_direct_model_verify = False
189
+ prev_parse_error_verify: Optional[str] = None
190
+ verify_attempt = 0
191
+
192
+ while True:
193
+ verify_attempt += 1
194
+ verification_summary_container["text"] = ""
195
+
196
+ if use_direct_model_verify:
197
+ verification_summary_prompt_text = build_verification_summary_prompt()
198
+ error_guidance = ""
199
+ if prev_parse_error_verify:
200
+ error_guidance = f"\n\n**格式错误详情(请根据以下错误修复输出格式):**\n- JSON解析失败: {prev_parse_error_verify}\n\n请确保输出的JSON格式正确,包括正确的引号、逗号、大括号等。仅输出一个 <REPORT> 块,块内直接包含 JSON 数组(不需要额外的标签)。支持jsonnet语法(如尾随逗号、注释、||| 或 ``` 分隔符多行字符串等)。"
201
+
202
+ full_verify_prompt = f"{verification_task}{error_guidance}\n\n{verification_summary_prompt_text}"
203
+ try:
204
+ verify_response = verification_agent.model.chat_until_success(full_verify_prompt) # type: ignore
205
+ verification_summary_container["text"] = verify_response
206
+ except Exception as e:
207
+ try:
208
+ typer.secho(f"[jarvis-sec] 验证阶段直接模型调用失败: {e},回退到 run()", fg=typer.colors.YELLOW)
209
+ except Exception:
210
+ pass
211
+ verification_agent.run(verification_task)
212
+ else:
213
+ verification_agent.run(verification_task)
214
+
215
+ # 工作区保护
216
+ try:
217
+ _changed_verify = git_restore_if_dirty(entry_path)
218
+ if _changed_verify:
219
+ try:
220
+ typer.secho(f"[jarvis-sec] 验证 Agent 工作区已恢复 ({_changed_verify} 个文件)", fg=typer.colors.BLUE)
221
+ except Exception:
222
+ pass
223
+ except Exception:
224
+ pass
225
+
226
+ # 解析验证结果
227
+ verification_summary_text = verification_summary_container.get("text", "")
228
+ parse_error_verify = None
229
+ if verification_summary_text:
230
+ verification_parsed, parse_error_verify = try_parse_summary_report(verification_summary_text)
231
+ if parse_error_verify:
232
+ prev_parse_error_verify = parse_error_verify
233
+ try:
234
+ typer.secho(f"[jarvis-sec] 验证结果JSON解析失败: {parse_error_verify}", fg=typer.colors.YELLOW)
235
+ except Exception:
236
+ pass
237
+ else:
238
+ prev_parse_error_verify = None
239
+ if isinstance(verification_parsed, list):
240
+ if verification_parsed and all(is_valid_verification_item(item) for item in verification_parsed):
241
+ return verification_parsed, None
242
+
243
+ # 格式校验失败,后续重试使用直接模型调用
244
+ use_direct_model_verify = True
245
+ if parse_error_verify:
246
+ try:
247
+ typer.secho(f"[jarvis-sec] 验证结果JSON解析失败 -> 重试第 {verify_attempt} 次 (批次={bidx},使用直接模型调用,将反馈解析错误)", fg=typer.colors.YELLOW)
248
+ except Exception:
249
+ pass
250
+ else:
251
+ try:
252
+ typer.secho(f"[jarvis-sec] 验证结果格式无效 -> 重试第 {verify_attempt} 次 (批次={bidx},使用直接模型调用)", fg=typer.colors.YELLOW)
253
+ except Exception:
254
+ pass
255
+
256
+
257
+ def process_verification_batch(
258
+ batch: List[Dict],
259
+ bidx: int,
260
+ total_batches: int,
261
+ entry_path: str,
262
+ langs: List[str],
263
+ llm_group: Optional[str],
264
+ status_mgr,
265
+ _progress_append,
266
+ _append_report,
267
+ meta_records: List[Dict],
268
+ gid_counts: Dict[int, int],
269
+ sec_dir,
270
+ enable_verification: bool = True,
271
+ force_save_memory: bool = False,
272
+ ) -> None:
273
+ """
274
+ 处理单个验证批次。
275
+
276
+ 参数:
277
+ - batch: 当前批次的候选列表
278
+ - bidx: 批次索引
279
+ - total_batches: 总批次数
280
+ - 其他参数用于状态管理和结果收集
281
+ """
282
+ task_id = f"JARVIS-SEC-Batch-{bidx}"
283
+ batch_file = batch[0].get("file") if batch else None
284
+
285
+ # 进度:批次开始
286
+ _progress_append(
287
+ {
288
+ "event": "batch_status",
289
+ "status": "running",
290
+ "batch_id": task_id,
291
+ "batch_index": bidx,
292
+ "total_batches": total_batches,
293
+ "batch_size": len(batch),
294
+ "file": batch_file,
295
+ }
296
+ )
297
+ # 更新验证阶段进度
298
+ status_mgr.update_verification(
299
+ current_batch=bidx,
300
+ total_batches=total_batches,
301
+ batch_id=task_id,
302
+ file_name=batch_file,
303
+ message=f"正在验证批次 {bidx}/{total_batches}"
304
+ )
305
+
306
+ # 显示进度(提取批次中的所有 gid,用于后续打印)
307
+ batch_gids_all = []
308
+ try:
309
+ for item in batch:
310
+ try:
311
+ gid_val = item.get("gid", 0)
312
+ gid_int = int(gid_val) if gid_val else 0
313
+ if gid_int >= 1:
314
+ batch_gids_all.append(gid_int)
315
+ except Exception:
316
+ pass
317
+ batch_gids_all_sorted = sorted(batch_gids_all)
318
+ except Exception:
319
+ batch_gids_all_sorted = []
320
+
321
+ # 显示进度
322
+ try:
323
+ if batch_gids_all_sorted:
324
+ gids_str = str(batch_gids_all_sorted)
325
+ typer.secho(f"\n[jarvis-sec] 分析批次 {bidx}/{total_batches}: 大小={len(batch)} 文件='{batch_file}' gids={gids_str}", fg=typer.colors.CYAN)
326
+ else:
327
+ typer.secho(f"\n[jarvis-sec] 分析批次 {bidx}/{total_batches}: 大小={len(batch)} 文件='{batch_file}' (无有效gid)", fg=typer.colors.CYAN)
328
+ except Exception:
329
+ pass
330
+
331
+ # 创建分析Agent
332
+ agent = create_analysis_agent(task_id, llm_group, force_save_memory=force_save_memory)
333
+
334
+ # 构建任务上下文
335
+ per_task = build_analysis_task_context(batch, entry_path, langs)
336
+
337
+ # 订阅摘要事件
338
+ summary_container = subscribe_summary_event(agent)
339
+
340
+ # 运行分析Agent并重试
341
+ summary_items, workspace_restore_info = run_analysis_agent_with_retry(
342
+ agent, per_task, summary_container, entry_path, task_id, bidx, meta_records
343
+ )
344
+
345
+ # 处理分析结果
346
+ parse_fail = summary_items is None
347
+ verified_items: List[Dict] = []
348
+
349
+ # 处理空数组情况:分析 Agent 返回 [] 表示所有候选都被判定为无风险
350
+ if summary_items is not None and len(summary_items) == 0:
351
+ # 空数组表示所有候选都被判定为无风险,需要保存到 analysis.jsonl
352
+ try:
353
+ batch_gids = sorted([int(item.get("gid", 0)) for item in batch if int(item.get("gid", 0)) >= 1])
354
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 分析 Agent 返回空数组,判定所有候选为无风险: 有风险gid=[], 无风险gid={batch_gids}", fg=typer.colors.BLUE)
355
+
356
+ # 构建无风险项(将批次中的所有候选标记为无风险)
357
+ no_risk_items = []
358
+ for item in batch:
359
+ try:
360
+ gid = int(item.get("gid", 0))
361
+ if gid >= 1:
362
+ no_risk_item = {
363
+ **item,
364
+ "has_risk": False,
365
+ "verification_notes": "分析 Agent 返回空数组,判定为无风险",
366
+ }
367
+ no_risk_items.append(no_risk_item)
368
+ except Exception:
369
+ pass
370
+
371
+ # 保存到 analysis.jsonl
372
+ if no_risk_items:
373
+ merged_no_risk_items = merge_verified_items_without_verification(no_risk_items, batch)
374
+ if merged_no_risk_items:
375
+ _append_report(merged_no_risk_items, "analysis_only", task_id, {"batch": True, "candidates": batch})
376
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 已将所有无风险候选保存到 analysis.jsonl: gids={batch_gids}", fg=typer.colors.GREEN)
377
+ except Exception as e:
378
+ try:
379
+ typer.secho(f"[jarvis-sec] 警告:处理空数组结果失败: {e}", fg=typer.colors.YELLOW)
380
+ except Exception:
381
+ pass
382
+
383
+ elif summary_items:
384
+ # 展开并过滤分析结果
385
+ items_with_risk, items_without_risk = expand_and_filter_analysis_results(summary_items)
386
+
387
+ # 记录分析结论(分别显示有风险和无风险的gid)
388
+ risk_gids = sorted([int(item.get("gid", 0)) for item in items_with_risk if int(item.get("gid", 0)) >= 1]) if items_with_risk else []
389
+ no_risk_gids = sorted([int(item.get("gid", 0)) for item in items_without_risk if int(item.get("gid", 0)) >= 1]) if items_without_risk else []
390
+
391
+ if items_with_risk or items_without_risk:
392
+ try:
393
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 分析 Agent 判定结果: 有风险gid={risk_gids}, 无风险gid={no_risk_gids}", fg=typer.colors.YELLOW if items_with_risk else typer.colors.BLUE)
394
+ except Exception:
395
+ pass
396
+
397
+ # 如果所有 gid 都被判定为无风险,也需要保存到 analysis.jsonl
398
+ if not items_with_risk and items_without_risk:
399
+ try:
400
+ # 将所有无风险的 gid 保存到 analysis.jsonl,确保它们被标记为已分析
401
+ no_risk_items = merge_verified_items_without_verification(items_without_risk, batch)
402
+ if no_risk_items:
403
+ _append_report(no_risk_items, "analysis_only", task_id, {"batch": True, "candidates": batch})
404
+ try:
405
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 所有候选均为无风险,已保存到 analysis.jsonl", fg=typer.colors.BLUE)
406
+ except Exception:
407
+ pass
408
+ except Exception as e:
409
+ try:
410
+ typer.secho(f"[jarvis-sec] 警告:保存无风险 gid 失败: {e}", fg=typer.colors.YELLOW)
411
+ except Exception:
412
+ pass
413
+
414
+ # 运行验证Agent(仅当分析Agent发现有风险的问题时,且启用二次验证)
415
+ if items_with_risk:
416
+ if not enable_verification:
417
+ # 如果关闭二次验证,直接将分析Agent确认的问题作为已验证的问题
418
+ verified_items = merge_verified_items_without_verification(items_with_risk, batch)
419
+ if verified_items:
420
+ verified_gids = sorted([int(item.get("gid", 0)) for item in verified_items if int(item.get("gid", 0)) >= 1])
421
+ for item in verified_items:
422
+ gid = int(item.get("gid", 0))
423
+ if gid >= 1:
424
+ gid_counts[gid] = gid_counts.get(gid, 0) + 1
425
+ # 计算无风险的gid(批次中不在verified_gids中的gid)
426
+ no_risk_gids_in_batch = sorted([gid for gid in batch_gids_all_sorted if gid not in verified_gids])
427
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 跳过验证,直接写入: 数量={len(verified_items)} 有风险gid={verified_gids}, 无风险gid={no_risk_gids_in_batch}", fg=typer.colors.GREEN)
428
+ _append_report(verified_items, "analysis_only", task_id, {"batch": True, "candidates": batch})
429
+ current_count = count_issues_from_file(sec_dir)
430
+ status_mgr.update_verification(
431
+ current_batch=bidx,
432
+ total_batches=total_batches,
433
+ issues_found=current_count,
434
+ message=f"已处理 {bidx}/{total_batches} 批次,发现 {current_count} 个问题(未验证)"
435
+ )
436
+ else:
437
+ # 启用二次验证,运行验证Agent
438
+ # 创建验证 Agent 来验证分析 Agent 的结论
439
+ verification_system_prompt = """
440
+ # 验证 Agent 约束
441
+ - 你的核心任务是验证分析 Agent 给出的安全结论是否正确。
442
+ - 你需要仔细检查分析 Agent 给出的前置条件、触发路径、后果和建议是否合理、准确。
443
+ - 工具优先:使用 read_code 读取目标文件附近源码(行号前后各 ~50 行),必要时用 execute_script 辅助检索。
444
+ - 必要时需向上追溯调用者,查看完整的调用路径,以确认分析 Agent 的结论是否成立。
445
+ - 禁止修改任何文件或执行写操作命令;仅进行只读分析与读取。
446
+ - 每次仅执行一个操作;等待工具结果后再进行下一步。
447
+ - **记忆使用**:
448
+ - 在验证过程中,充分利用 retrieve_memory 工具检索已有的记忆,特别是分析 Agent 保存的与当前验证函数相关的记忆。
449
+ - 这些记忆可能包含函数的分析要点、指针判空情况、输入校验情况、调用路径分析结果等,可以帮助你更准确地验证分析结论。
450
+ - 如果发现分析 Agent 的结论与记忆中的信息不一致,需要仔细核实。
451
+ - 完成验证后,主输出仅打印结束符 <!!!COMPLETE!!!> ,不需要汇总结果。
452
+ """.strip()
453
+
454
+ verification_task_id = f"JARVIS-SEC-Verify-Batch-{bidx}"
455
+ verification_agent_kwargs: Dict = dict(
456
+ system_prompt=verification_system_prompt,
457
+ name=verification_task_id,
458
+ auto_complete=True,
459
+ need_summary=True,
460
+ summary_prompt=build_verification_summary_prompt(),
461
+ non_interactive=True,
462
+ in_multi_agent=False,
463
+ use_methodology=False,
464
+ use_analysis=False,
465
+ output_handler=[ToolRegistry()],
466
+ use_tools=["read_code", "execute_script", "retrieve_memory"],
467
+ )
468
+ if llm_group:
469
+ verification_agent_kwargs["model_group"] = llm_group
470
+ verification_agent = Agent(**verification_agent_kwargs)
471
+
472
+ # 构造验证任务上下文
473
+ import json as _json3
474
+ verification_task = f"""
475
+ # 验证分析结论任务
476
+ 上下文参数:
477
+ - entry_path: {entry_path}
478
+ - languages: {langs}
479
+
480
+ 分析 Agent 给出的结论(需要验证,仅包含 has_risk: true 的项目):
481
+ {_json3.dumps(items_with_risk, ensure_ascii=False, indent=2)}
482
+
483
+ 请验证上述分析结论是否正确,包括:
484
+ 1. 前置条件(preconditions)是否合理
485
+ 2. 触发路径(trigger_path)是否成立
486
+ 3. 后果(consequences)评估是否准确
487
+ 4. 建议(suggestions)是否合适
488
+
489
+ 对于每个 gid,请判断分析结论是否正确(is_valid: true/false),并给出验证说明。
490
+ """.strip()
491
+
492
+ # 订阅验证 Agent 的摘要
493
+ verification_summary_container = subscribe_summary_event(verification_agent)
494
+
495
+ verification_results, verification_parse_error = run_verification_agent_with_retry(
496
+ verification_agent,
497
+ verification_task,
498
+ build_verification_summary_prompt(),
499
+ entry_path,
500
+ verification_summary_container,
501
+ bidx,
502
+ )
503
+
504
+ # 调试日志:显示验证结果
505
+ if verification_results is None:
506
+ try:
507
+ typer.secho("[jarvis-sec] 警告:验证 Agent 返回 None,可能解析失败", fg=typer.colors.YELLOW)
508
+ except Exception:
509
+ pass
510
+ elif not isinstance(verification_results, list):
511
+ try:
512
+ typer.secho(f"[jarvis-sec] 警告:验证 Agent 返回类型错误,期望 list,实际: {type(verification_results)}", fg=typer.colors.YELLOW)
513
+ except Exception:
514
+ pass
515
+ elif len(verification_results) == 0:
516
+ try:
517
+ typer.secho("[jarvis-sec] 警告:验证 Agent 返回空列表", fg=typer.colors.YELLOW)
518
+ except Exception:
519
+ pass
520
+ else:
521
+ try:
522
+ typer.secho(f"[jarvis-sec] 验证 Agent 返回 {len(verification_results)} 个结果项", fg=typer.colors.BLUE)
523
+ except Exception:
524
+ pass
525
+
526
+ # 根据验证结果筛选:只保留验证通过(is_valid: true)的告警
527
+ if verification_results:
528
+ gid_to_verification = build_gid_to_verification_mapping(verification_results)
529
+
530
+ # 调试日志:显示提取到的验证结果(包含gid列表)
531
+ if gid_to_verification:
532
+ try:
533
+ # 分类显示验证结果:通过和不通过的gid
534
+ valid_gids = sorted([gid for gid, v in gid_to_verification.items() if v.get("is_valid") is True])
535
+ invalid_gids = sorted([gid for gid, v in gid_to_verification.items() if v.get("is_valid") is False])
536
+ all_verified_gids = sorted(gid_to_verification.keys())
537
+
538
+ # 计算未验证的gid(批次中不在验证结果中的gid,视为无风险)
539
+ unverified_gids = sorted([gid for gid in batch_gids_all_sorted if gid not in all_verified_gids])
540
+ # 合并所有无风险的gid(验证不通过的 + 未验证的)
541
+ all_no_risk_gids = sorted(list(set(invalid_gids + unverified_gids)))
542
+ typer.secho(f"[jarvis-sec] 验证 Agent 返回 {len(gid_to_verification)} 个验证结果: 有风险gid={valid_gids}, 无风险gid={all_no_risk_gids}", fg=typer.colors.BLUE)
543
+ if valid_gids:
544
+ typer.secho(f"[jarvis-sec] 验证 Agent 判定 {len(valid_gids)} 个候选验证通过(is_valid: true): 有风险gid={valid_gids}", fg=typer.colors.GREEN)
545
+ if invalid_gids:
546
+ typer.secho(f"[jarvis-sec] 验证 Agent 判定 {len(invalid_gids)} 个候选验证不通过(is_valid: false): 无风险gid={invalid_gids}", fg=typer.colors.RED)
547
+ if unverified_gids:
548
+ typer.secho(f"[jarvis-sec] 验证 Agent 未验证的候选(不在验证结果中,视为无风险): 无风险gid={unverified_gids}", fg=typer.colors.BLUE)
549
+ except Exception:
550
+ pass
551
+ else:
552
+ try:
553
+ typer.secho(f"[jarvis-sec] 警告:验证结果解析成功,但未提取到任何有效的 gid。验证结果: {verification_results}", fg=typer.colors.YELLOW)
554
+ except Exception:
555
+ pass
556
+
557
+ # 合并验证通过的告警
558
+ verified_items = merge_verified_items(items_with_risk, batch, gid_to_verification)
559
+ else:
560
+ typer.secho("[jarvis-sec] 警告:验证 Agent 结果解析失败,不保留任何告警(保守策略)", fg=typer.colors.YELLOW)
561
+
562
+ # 只有验证通过的告警才写入文件
563
+ if verified_items:
564
+ verified_gids = sorted([int(item.get("gid", 0)) for item in verified_items if int(item.get("gid", 0)) >= 1])
565
+ for item in verified_items:
566
+ gid = int(item.get("gid", 0))
567
+ if gid >= 1:
568
+ gid_counts[gid] = gid_counts.get(gid, 0) + 1
569
+ # 计算无风险的gid(批次中不在verified_gids中的gid)
570
+ no_risk_gids_in_batch = sorted([gid for gid in batch_gids_all_sorted if gid not in verified_gids])
571
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 验证通过: 数量={len(verified_items)}/{len(items_with_risk)} -> 写入文件 有风险gid={verified_gids}, 无风险gid={no_risk_gids_in_batch}", fg=typer.colors.GREEN)
572
+ _append_report(verified_items, "verified", task_id, {"batch": True, "candidates": batch})
573
+ # 从文件读取当前总数(用于状态显示)
574
+ current_count = count_issues_from_file(sec_dir)
575
+ status_mgr.update_verification(
576
+ current_batch=bidx,
577
+ total_batches=total_batches,
578
+ issues_found=current_count,
579
+ message=f"已验证 {bidx}/{total_batches} 批次,发现 {current_count} 个问题(验证通过)"
580
+ )
581
+ else:
582
+ # 验证后无有效告警时也要打印gid列表(所有都视为无风险)
583
+ try:
584
+ risk_gids = sorted([int(item.get("gid", 0)) for item in items_with_risk if int(item.get("gid", 0)) >= 1])
585
+ # 验证后全部不通过,所以所有gid都是无风险
586
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 验证后无有效告警: 分析 Agent 发现 {len(items_with_risk)} 个有风险的问题,验证后全部不通过 有风险gid=[], 无风险gid={batch_gids_all_sorted}", fg=typer.colors.BLUE)
587
+ except Exception:
588
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 验证后无有效告警: 分析 Agent 发现 {len(items_with_risk)} 个有风险的问题,验证后全部不通过 有风险gid=[], 无风险gid={batch_gids_all_sorted}", fg=typer.colors.BLUE)
589
+ current_count = count_issues_from_file(sec_dir)
590
+ status_mgr.update_verification(
591
+ current_batch=bidx,
592
+ total_batches=total_batches,
593
+ issues_found=current_count,
594
+ message=f"已验证 {bidx}/{total_batches} 批次,验证后无有效告警"
595
+ )
596
+ elif parse_fail:
597
+ # 解析失败时也要打印gid列表(无法判断风险,但显示所有gid)
598
+ try:
599
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 解析失败 (摘要中无 <REPORT> 或字段无效): 有风险gid=?, 无风险gid=? (无法判断,gids={batch_gids_all_sorted})", fg=typer.colors.YELLOW)
600
+ except Exception:
601
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 解析失败 (摘要中无 <REPORT> 或字段无效)", fg=typer.colors.YELLOW)
602
+ else:
603
+ # 未发现问题时也要打印gid列表(所有都视为无风险)
604
+ try:
605
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 未发现问题: 有风险gid=[], 无风险gid={batch_gids_all_sorted}", fg=typer.colors.BLUE)
606
+ except Exception:
607
+ typer.secho(f"[jarvis-sec] 批次 {bidx}/{total_batches} 未发现问题", fg=typer.colors.BLUE)
608
+ current_count = count_issues_from_file(sec_dir)
609
+ status_mgr.update_verification(
610
+ current_batch=bidx,
611
+ total_batches=total_batches,
612
+ issues_found=current_count,
613
+ message=f"已验证 {bidx}/{total_batches} 批次"
614
+ )
615
+
616
+ # 为本批次所有候选写入 done 记录
617
+ for c in batch:
618
+ sig = sig_of(c)
619
+ try:
620
+ c_gid = int(c.get("gid", 0))
621
+ except Exception:
622
+ c_gid = 0
623
+ cnt = gid_counts.get(c_gid, 0)
624
+ _progress_append({
625
+ "event": "task_status",
626
+ "status": "done",
627
+ "task_id": task_id,
628
+ "candidate_signature": sig,
629
+ "candidate": c,
630
+ "issues_count": int(cnt),
631
+ "parse_fail": parse_fail,
632
+ "workspace_restore": workspace_restore_info,
633
+ "batch_index": bidx,
634
+ })
635
+
636
+ # 批次结束记录
637
+ _progress_append({
638
+ "event": "batch_status",
639
+ "status": "done",
640
+ "batch_id": task_id,
641
+ "batch_index": bidx,
642
+ "total_batches": total_batches,
643
+ "issues_count": len(verified_items),
644
+ "parse_fail": parse_fail,
645
+ })
646
+
647
+
648
+ def process_verification_phase(
649
+ cluster_batches: List[List[Dict]],
650
+ entry_path: str,
651
+ langs: List[str],
652
+ llm_group: Optional[str],
653
+ sec_dir,
654
+ status_mgr,
655
+ _progress_append,
656
+ _append_report,
657
+ enable_verification: bool = True,
658
+ force_save_memory: bool = False,
659
+ ) -> List[Dict]:
660
+ """处理验证阶段,返回所有已保存的告警"""
661
+ from jarvis.jarvis_sec.file_manager import load_analysis_results, get_all_analyzed_gids
662
+
663
+ batches: List[List[Dict]] = cluster_batches
664
+ total_batches = len(batches)
665
+
666
+ # 从 analysis.jsonl 中读取已分析的结果
667
+ analysis_results = load_analysis_results(sec_dir)
668
+ analyzed_gids = get_all_analyzed_gids(sec_dir)
669
+
670
+ # 构建已完成的批次集合(通过 cluster_id 匹配)
671
+ completed_cluster_ids = set()
672
+ for result in analysis_results:
673
+ cluster_id = result.get("cluster_id", "")
674
+ if cluster_id:
675
+ completed_cluster_ids.add(cluster_id)
676
+
677
+ if completed_cluster_ids:
678
+ try:
679
+ typer.secho(f"[jarvis-sec] 断点恢复:从 analysis.jsonl 读取到 {len(completed_cluster_ids)} 个已完成的聚类", fg=typer.colors.BLUE)
680
+ except Exception:
681
+ pass
682
+
683
+ if analyzed_gids:
684
+ try:
685
+ typer.secho(f"[jarvis-sec] 断点恢复:从 analysis.jsonl 读取到 {len(analyzed_gids)} 个已分析的 gids", fg=typer.colors.BLUE)
686
+ except Exception:
687
+ pass
688
+
689
+ meta_records: List[Dict] = []
690
+ gid_counts: Dict[int, int] = {}
691
+
692
+ # 加载 clusters.jsonl 以匹配批次和聚类
693
+ from jarvis.jarvis_sec.file_manager import load_clusters
694
+ clusters = load_clusters(sec_dir)
695
+
696
+ # 计算实际需要处理的批次数量(排除已完成的批次)
697
+ pending_batches = []
698
+ skipped_count = 0
699
+
700
+ # 调试:显示已分析的 gid 信息
701
+ if analyzed_gids:
702
+ try:
703
+ analyzed_gids_sorted = sorted(list(analyzed_gids))
704
+ sample_gids = analyzed_gids_sorted[:10] if len(analyzed_gids_sorted) > 10 else analyzed_gids_sorted
705
+ typer.secho(f"[jarvis-sec] 断点恢复:已分析的 gid 示例: {sample_gids}{'...' if len(analyzed_gids_sorted) > 10 else ''} (共 {len(analyzed_gids)} 个)", fg=typer.colors.BLUE)
706
+ except Exception:
707
+ pass
708
+
709
+ for bidx, batch in enumerate(batches, start=1):
710
+ batch_file = batch[0].get("file") if batch else None
711
+
712
+ # 检查批次是否已完成
713
+ is_batch_completed = False
714
+ completion_reason = ""
715
+
716
+ # 从批次中提取 gids(确保类型为整数)
717
+ batch_gids = set()
718
+ for item in batch:
719
+ try:
720
+ _gid_val = item.get("gid", 0)
721
+ _gid = int(_gid_val) if _gid_val else 0
722
+ if _gid >= 1:
723
+ batch_gids.add(_gid)
724
+ except Exception:
725
+ pass
726
+
727
+ if not batch_gids:
728
+ # 批次中没有有效的 gid,跳过
729
+ skipped_count += 1
730
+ continue
731
+
732
+ # 方法1:通过 cluster_id 检查是否已完成
733
+ # 查找匹配的聚类(精确匹配:文件相同且 gid 集合完全相同)
734
+ for cluster in clusters:
735
+ cluster_file = str(cluster.get("file", ""))
736
+ cluster_gids_list = cluster.get("gids", [])
737
+ # 转换为整数集合进行比较
738
+ cluster_gids = set()
739
+ for gid_val in cluster_gids_list:
740
+ try:
741
+ cluster_gids.add(int(gid_val))
742
+ except Exception:
743
+ pass
744
+
745
+ # 文件路径匹配:使用标准化路径进行比较(去除尾部斜杠等)
746
+ def normalize_path(p: str) -> str:
747
+ if not p:
748
+ return ""
749
+ # 统一使用正斜杠,去除尾部斜杠
750
+ return p.replace("\\", "/").rstrip("/")
751
+
752
+ batch_file_normalized = normalize_path(batch_file or "")
753
+ cluster_file_normalized = normalize_path(cluster_file)
754
+
755
+ # 匹配条件:文件路径相同(标准化后)且 gid 集合完全相同
756
+ if cluster_file_normalized == batch_file_normalized and cluster_gids == batch_gids:
757
+ cluster_id = cluster.get("cluster_id", "")
758
+ if cluster_id and cluster_id in completed_cluster_ids:
759
+ is_batch_completed = True
760
+ completion_reason = f"通过 cluster_id 匹配: {cluster_id}"
761
+ break
762
+
763
+ # 方法2:如果所有 gid 都已分析,则认为该批次已完成
764
+ if not is_batch_completed and batch_gids and analyzed_gids:
765
+ # batch_gids已经是整数集合,analyzed_gids也应该是整数集合
766
+ # 直接使用issubset检查
767
+ missing_gids = batch_gids - analyzed_gids
768
+ if not missing_gids: # 所有 gid 都已分析
769
+ is_batch_completed = True
770
+ completion_reason = f"所有 gid 已分析 (批次 gids: {sorted(list(batch_gids))[:5]}{'...' if len(batch_gids) > 5 else ''})"
771
+ elif bidx <= 3: # 调试:显示前3个批次的匹配情况
772
+ try:
773
+ typer.secho(f"[jarvis-sec] 批次 {bidx} 部分 gid 未分析: 缺失={sorted(list(missing_gids))[:5]}{'...' if len(missing_gids) > 5 else ''}, 已分析={sorted(list(batch_gids & analyzed_gids))[:5]}{'...' if len(batch_gids & analyzed_gids) > 5 else ''}", fg=typer.colors.YELLOW)
774
+ except Exception:
775
+ pass
776
+
777
+ if is_batch_completed:
778
+ skipped_count += 1
779
+ # 调试日志:显示跳过的批次信息
780
+ try:
781
+ typer.secho(f"[jarvis-sec] 跳过批次 {bidx}/{total_batches} (文件={batch_file}, gids={sorted(list(batch_gids))[:5]}{'...' if len(batch_gids) > 5 else ''}): {completion_reason}", fg=typer.colors.GREEN)
782
+ except Exception:
783
+ pass
784
+ else:
785
+ # 调试日志:显示待处理的批次信息
786
+ if bidx <= 3: # 只显示前3个待处理批次
787
+ try:
788
+ missing_gids = batch_gids - analyzed_gids if analyzed_gids else batch_gids
789
+ typer.secho(f"[jarvis-sec] 待处理批次 {bidx}/{total_batches} (文件={batch_file}, gids={sorted(list(batch_gids))[:5]}{'...' if len(batch_gids) > 5 else ''}, 未分析={sorted(list(missing_gids))[:5]}{'...' if len(missing_gids) > 5 else ''})", fg=typer.colors.YELLOW)
790
+ except Exception:
791
+ pass
792
+ pending_batches.append((bidx, batch))
793
+
794
+ # 实际需要处理的批次数量
795
+ actual_total_batches = len(pending_batches)
796
+ processed_count = 0
797
+
798
+ # 显示跳过批次的信息
799
+ if skipped_count > 0:
800
+ try:
801
+ typer.secho(f"[jarvis-sec] 断点恢复:跳过 {skipped_count} 个已完成的批次,剩余 {actual_total_batches} 个批次待处理", fg=typer.colors.GREEN)
802
+ except Exception:
803
+ pass
804
+
805
+ # 更新验证阶段状态(使用实际需要处理的总批次数)
806
+ if actual_total_batches > 0:
807
+ status_mgr.update_verification(
808
+ current_batch=0,
809
+ total_batches=actual_total_batches,
810
+ message=f"开始安全验证...(共 {actual_total_batches} 个批次待处理)"
811
+ )
812
+
813
+ # 处理待处理的批次
814
+ for bidx, batch in pending_batches:
815
+ processed_count += 1
816
+ batch_file = batch[0].get("file") if batch else None
817
+
818
+ # 处理验证批次(使用实际已处理的批次编号)
819
+ process_verification_batch(
820
+ batch,
821
+ processed_count, # 使用实际已处理的批次编号
822
+ actual_total_batches, # 使用实际需要处理的总批次数
823
+ entry_path,
824
+ langs,
825
+ llm_group,
826
+ status_mgr,
827
+ _progress_append,
828
+ _append_report,
829
+ meta_records,
830
+ gid_counts,
831
+ sec_dir,
832
+ enable_verification=enable_verification,
833
+ force_save_memory=force_save_memory,
834
+ )
835
+
836
+ # 从 analysis.jsonl 读取所有已验证的问题
837
+ from jarvis.jarvis_sec.file_manager import get_verified_issue_gids, load_candidates
838
+ get_verified_issue_gids(sec_dir)
839
+ load_candidates(sec_dir)
840
+
841
+ # 构建问题列表(从 analysis.jsonl 的 issues 字段)
842
+ all_issues = []
843
+ for result in analysis_results:
844
+ issues = result.get("issues", [])
845
+ if isinstance(issues, list):
846
+ all_issues.extend(issues)
847
+
848
+ return all_issues