jarvis-ai-assistant 0.3.30__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +458 -152
  3. jarvis/jarvis_agent/agent_manager.py +17 -13
  4. jarvis/jarvis_agent/builtin_input_handler.py +2 -6
  5. jarvis/jarvis_agent/config_editor.py +2 -7
  6. jarvis/jarvis_agent/event_bus.py +82 -12
  7. jarvis/jarvis_agent/file_context_handler.py +329 -0
  8. jarvis/jarvis_agent/file_methodology_manager.py +3 -4
  9. jarvis/jarvis_agent/jarvis.py +628 -55
  10. jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
  11. jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
  12. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
  13. jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
  14. jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
  15. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
  16. jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
  17. jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
  18. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
  19. jarvis/jarvis_agent/language_support_info.py +486 -0
  20. jarvis/jarvis_agent/main.py +34 -10
  21. jarvis/jarvis_agent/memory_manager.py +7 -16
  22. jarvis/jarvis_agent/methodology_share_manager.py +10 -16
  23. jarvis/jarvis_agent/prompt_manager.py +1 -1
  24. jarvis/jarvis_agent/prompts.py +193 -171
  25. jarvis/jarvis_agent/protocols.py +8 -12
  26. jarvis/jarvis_agent/run_loop.py +105 -9
  27. jarvis/jarvis_agent/session_manager.py +2 -3
  28. jarvis/jarvis_agent/share_manager.py +20 -22
  29. jarvis/jarvis_agent/shell_input_handler.py +1 -2
  30. jarvis/jarvis_agent/stdio_redirect.py +295 -0
  31. jarvis/jarvis_agent/task_analyzer.py +31 -6
  32. jarvis/jarvis_agent/task_manager.py +11 -27
  33. jarvis/jarvis_agent/tool_executor.py +2 -3
  34. jarvis/jarvis_agent/tool_share_manager.py +12 -24
  35. jarvis/jarvis_agent/utils.py +5 -1
  36. jarvis/jarvis_agent/web_bridge.py +189 -0
  37. jarvis/jarvis_agent/web_output_sink.py +53 -0
  38. jarvis/jarvis_agent/web_server.py +786 -0
  39. jarvis/jarvis_c2rust/__init__.py +26 -0
  40. jarvis/jarvis_c2rust/cli.py +575 -0
  41. jarvis/jarvis_c2rust/collector.py +250 -0
  42. jarvis/jarvis_c2rust/constants.py +26 -0
  43. jarvis/jarvis_c2rust/library_replacer.py +1254 -0
  44. jarvis/jarvis_c2rust/llm_module_agent.py +1272 -0
  45. jarvis/jarvis_c2rust/loaders.py +207 -0
  46. jarvis/jarvis_c2rust/models.py +28 -0
  47. jarvis/jarvis_c2rust/optimizer.py +2157 -0
  48. jarvis/jarvis_c2rust/scanner.py +1681 -0
  49. jarvis/jarvis_c2rust/transpiler.py +2983 -0
  50. jarvis/jarvis_c2rust/utils.py +385 -0
  51. jarvis/jarvis_code_agent/build_validation_config.py +132 -0
  52. jarvis/jarvis_code_agent/code_agent.py +1371 -220
  53. jarvis/jarvis_code_agent/code_analyzer/__init__.py +65 -0
  54. jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +106 -0
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +74 -0
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +72 -0
  60. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +70 -0
  61. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +53 -0
  62. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +47 -0
  63. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +61 -0
  64. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +110 -0
  65. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +154 -0
  66. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +110 -0
  67. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +153 -0
  68. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
  69. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +648 -0
  70. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
  71. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
  72. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
  73. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
  74. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
  75. jarvis/jarvis_code_agent/code_analyzer/language_support.py +110 -0
  76. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +49 -0
  77. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +299 -0
  78. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +215 -0
  79. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
  80. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
  81. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +269 -0
  82. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +281 -0
  83. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
  84. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +605 -0
  85. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
  86. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +252 -0
  87. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +58 -0
  88. jarvis/jarvis_code_agent/lint.py +501 -8
  89. jarvis/jarvis_code_agent/utils.py +141 -0
  90. jarvis/jarvis_code_analysis/code_review.py +493 -584
  91. jarvis/jarvis_data/config_schema.json +128 -12
  92. jarvis/jarvis_git_squash/main.py +4 -5
  93. jarvis/jarvis_git_utils/git_commiter.py +82 -75
  94. jarvis/jarvis_mcp/sse_mcp_client.py +22 -29
  95. jarvis/jarvis_mcp/stdio_mcp_client.py +12 -13
  96. jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
  97. jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
  98. jarvis/jarvis_methodology/main.py +32 -48
  99. jarvis/jarvis_multi_agent/__init__.py +287 -55
  100. jarvis/jarvis_multi_agent/main.py +36 -4
  101. jarvis/jarvis_platform/base.py +524 -202
  102. jarvis/jarvis_platform/human.py +7 -8
  103. jarvis/jarvis_platform/kimi.py +30 -36
  104. jarvis/jarvis_platform/openai.py +88 -25
  105. jarvis/jarvis_platform/registry.py +26 -10
  106. jarvis/jarvis_platform/tongyi.py +24 -25
  107. jarvis/jarvis_platform/yuanbao.py +32 -43
  108. jarvis/jarvis_platform_manager/main.py +66 -77
  109. jarvis/jarvis_platform_manager/service.py +8 -13
  110. jarvis/jarvis_rag/cli.py +53 -55
  111. jarvis/jarvis_rag/embedding_manager.py +13 -18
  112. jarvis/jarvis_rag/llm_interface.py +8 -9
  113. jarvis/jarvis_rag/query_rewriter.py +10 -21
  114. jarvis/jarvis_rag/rag_pipeline.py +24 -27
  115. jarvis/jarvis_rag/reranker.py +4 -5
  116. jarvis/jarvis_rag/retriever.py +28 -30
  117. jarvis/jarvis_sec/__init__.py +305 -0
  118. jarvis/jarvis_sec/agents.py +143 -0
  119. jarvis/jarvis_sec/analysis.py +276 -0
  120. jarvis/jarvis_sec/checkers/__init__.py +32 -0
  121. jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
  122. jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
  123. jarvis/jarvis_sec/cli.py +139 -0
  124. jarvis/jarvis_sec/clustering.py +1439 -0
  125. jarvis/jarvis_sec/file_manager.py +427 -0
  126. jarvis/jarvis_sec/parsers.py +73 -0
  127. jarvis/jarvis_sec/prompts.py +268 -0
  128. jarvis/jarvis_sec/report.py +336 -0
  129. jarvis/jarvis_sec/review.py +453 -0
  130. jarvis/jarvis_sec/status.py +264 -0
  131. jarvis/jarvis_sec/types.py +20 -0
  132. jarvis/jarvis_sec/utils.py +499 -0
  133. jarvis/jarvis_sec/verification.py +848 -0
  134. jarvis/jarvis_sec/workflow.py +226 -0
  135. jarvis/jarvis_smart_shell/main.py +38 -87
  136. jarvis/jarvis_stats/cli.py +2 -2
  137. jarvis/jarvis_stats/stats.py +8 -8
  138. jarvis/jarvis_stats/storage.py +15 -21
  139. jarvis/jarvis_stats/visualizer.py +1 -1
  140. jarvis/jarvis_tools/clear_memory.py +3 -20
  141. jarvis/jarvis_tools/cli/main.py +21 -23
  142. jarvis/jarvis_tools/edit_file.py +1019 -132
  143. jarvis/jarvis_tools/execute_script.py +83 -25
  144. jarvis/jarvis_tools/file_analyzer.py +6 -9
  145. jarvis/jarvis_tools/generate_new_tool.py +14 -21
  146. jarvis/jarvis_tools/lsp_client.py +1552 -0
  147. jarvis/jarvis_tools/methodology.py +2 -3
  148. jarvis/jarvis_tools/read_code.py +1736 -35
  149. jarvis/jarvis_tools/read_symbols.py +140 -0
  150. jarvis/jarvis_tools/read_webpage.py +12 -13
  151. jarvis/jarvis_tools/registry.py +427 -200
  152. jarvis/jarvis_tools/retrieve_memory.py +20 -19
  153. jarvis/jarvis_tools/rewrite_file.py +72 -158
  154. jarvis/jarvis_tools/save_memory.py +3 -15
  155. jarvis/jarvis_tools/search_web.py +18 -18
  156. jarvis/jarvis_tools/sub_agent.py +36 -43
  157. jarvis/jarvis_tools/sub_code_agent.py +25 -26
  158. jarvis/jarvis_tools/virtual_tty.py +55 -33
  159. jarvis/jarvis_utils/clipboard.py +7 -10
  160. jarvis/jarvis_utils/config.py +232 -45
  161. jarvis/jarvis_utils/embedding.py +8 -5
  162. jarvis/jarvis_utils/fzf.py +8 -8
  163. jarvis/jarvis_utils/git_utils.py +225 -36
  164. jarvis/jarvis_utils/globals.py +3 -3
  165. jarvis/jarvis_utils/http.py +1 -1
  166. jarvis/jarvis_utils/input.py +99 -48
  167. jarvis/jarvis_utils/jsonnet_compat.py +465 -0
  168. jarvis/jarvis_utils/methodology.py +52 -48
  169. jarvis/jarvis_utils/utils.py +819 -491
  170. jarvis_ai_assistant-0.7.6.dist-info/METADATA +600 -0
  171. jarvis_ai_assistant-0.7.6.dist-info/RECORD +218 -0
  172. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/entry_points.txt +4 -0
  173. jarvis/jarvis_agent/config.py +0 -92
  174. jarvis/jarvis_agent/edit_file_handler.py +0 -296
  175. jarvis/jarvis_platform/ai8.py +0 -332
  176. jarvis/jarvis_tools/ask_user.py +0 -54
  177. jarvis_ai_assistant-0.3.30.dist-info/METADATA +0 -381
  178. jarvis_ai_assistant-0.3.30.dist-info/RECORD +0 -137
  179. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/WHEEL +0 -0
  180. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/licenses/LICENSE +0 -0
  181. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,427 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ 状态文件管理模块
4
+
5
+ 重构后的3个配置文件:
6
+ 1. candidates.jsonl - 只扫结果文件:保存每个原始告警的信息,包括gid
7
+ 2. clusters.jsonl - 聚类信息文件:所有聚类(包括无效聚类),每个聚类包括的gids
8
+ 3. analysis.jsonl - 分析结果文件:包括所有聚类,聚类中哪些问题是问题,哪些问题是误报
9
+ """
10
+
11
+ from typing import Dict, List, Set, Tuple
12
+ from pathlib import Path
13
+ import json
14
+ import typer
15
+
16
+
17
+ # ==================== 文件路径定义 ====================
18
+
19
+ def get_candidates_file(sec_dir: Path) -> Path:
20
+ """获取只扫结果文件路径"""
21
+ return sec_dir / "candidates.jsonl"
22
+
23
+
24
+ def get_clusters_file(sec_dir: Path) -> Path:
25
+ """获取聚类信息文件路径"""
26
+ return sec_dir / "clusters.jsonl"
27
+
28
+
29
+ def get_analysis_file(sec_dir: Path) -> Path:
30
+ """获取分析结果文件路径"""
31
+ return sec_dir / "analysis.jsonl"
32
+
33
+
34
+ # ==================== 只扫结果文件 (candidates.jsonl) ====================
35
+
36
+ def save_candidates(sec_dir: Path, candidates: List[Dict]) -> None:
37
+ """
38
+ 保存只扫结果到 candidates.jsonl
39
+
40
+ 格式:每行一个候选,包含所有原始信息 + gid
41
+ {
42
+ "gid": 1,
43
+ "language": "c",
44
+ "category": "buffer_overflow",
45
+ "pattern": "strcpy",
46
+ "file": "src/main.c",
47
+ "line": 42,
48
+ "evidence": "...",
49
+ "confidence": 0.8,
50
+ "severity": "high"
51
+ }
52
+ """
53
+ path = get_candidates_file(sec_dir)
54
+ path.parent.mkdir(parents=True, exist_ok=True)
55
+
56
+ # 覆盖模式,确保文件内容是最新的
57
+ with path.open("w", encoding="utf-8") as f:
58
+ for candidate in candidates:
59
+ f.write(json.dumps(candidate, ensure_ascii=False) + "\n")
60
+
61
+ try:
62
+ typer.secho(f"[jarvis-sec] 已保存 {len(candidates)} 个候选到 {path}", fg=typer.colors.GREEN)
63
+ except Exception:
64
+ pass
65
+
66
+
67
+ def load_candidates(sec_dir: Path) -> List[Dict]:
68
+ """
69
+ 从 candidates.jsonl 加载只扫结果
70
+
71
+ 返回: 候选列表,每个候选包含gid
72
+ """
73
+ path = get_candidates_file(sec_dir)
74
+ candidates = []
75
+
76
+ if path.exists():
77
+ try:
78
+ with path.open("r", encoding="utf-8", errors="ignore") as f:
79
+ for line in f:
80
+ line = line.strip()
81
+ if not line:
82
+ continue
83
+ try:
84
+ candidate = json.loads(line)
85
+ candidates.append(candidate)
86
+ except Exception:
87
+ pass
88
+ except Exception as e:
89
+ try:
90
+ typer.secho(f"[jarvis-sec] 警告:加载 candidates.jsonl 失败: {e}", fg=typer.colors.YELLOW)
91
+ except Exception:
92
+ pass
93
+
94
+ return candidates
95
+
96
+
97
+ def get_all_candidate_gids(sec_dir: Path) -> Set[int]:
98
+ """获取所有候选的gid集合"""
99
+ candidates = load_candidates(sec_dir)
100
+ gids = set()
101
+ for candidate in candidates:
102
+ try:
103
+ gid = int(candidate.get("gid", 0))
104
+ if gid >= 1:
105
+ gids.add(gid)
106
+ except Exception:
107
+ pass
108
+ return gids
109
+
110
+
111
+ # ==================== 聚类信息文件 (clusters.jsonl) ====================
112
+
113
+ def save_cluster(sec_dir: Path, cluster: Dict) -> None:
114
+ """
115
+ 保存单个聚类到 clusters.jsonl(追加模式)
116
+
117
+ 格式:每行一个聚类记录
118
+ {
119
+ "cluster_id": "file_path|batch_index|index", # 唯一标识
120
+ "file": "src/main.c",
121
+ "batch_index": 1,
122
+ "cluster_index": 0, # 同一批次中的聚类索引
123
+ "gids": [1, 2, 3], # 该聚类包含的gid列表
124
+ "verification": "验证候选的安全风险", # 聚类验证描述
125
+ "is_invalid": false, # 是否为无效聚类
126
+ "invalid_reason": "", # 无效原因(如果is_invalid为true)
127
+ "created_at": "2024-01-01T00:00:00" # 创建时间(可选)
128
+ }
129
+ """
130
+ path = get_clusters_file(sec_dir)
131
+ path.parent.mkdir(parents=True, exist_ok=True)
132
+
133
+ # 追加模式
134
+ with path.open("a", encoding="utf-8") as f:
135
+ f.write(json.dumps(cluster, ensure_ascii=False) + "\n")
136
+
137
+
138
+ def load_clusters(sec_dir: Path) -> List[Dict]:
139
+ """
140
+ 从 clusters.jsonl 加载所有聚类
141
+
142
+ 返回: 聚类列表
143
+ """
144
+ path = get_clusters_file(sec_dir)
145
+ clusters = []
146
+
147
+ if path.exists():
148
+ try:
149
+ # 使用字典合并:key 为 cluster_id,合并同一个 cluster_id 的所有记录的 gid
150
+ seen_clusters: Dict[str, Dict] = {}
151
+ with path.open("r", encoding="utf-8", errors="ignore") as f:
152
+ for line in f:
153
+ line = line.strip()
154
+ if not line:
155
+ continue
156
+ try:
157
+ cluster = json.loads(line)
158
+ cluster_id = cluster.get("cluster_id", "")
159
+ if cluster_id:
160
+ if cluster_id in seen_clusters:
161
+ # 如果已存在,合并 gid 列表(去重)
162
+ existing_gids = set(seen_clusters[cluster_id].get("gids", []))
163
+ new_gids = set(cluster.get("gids", []))
164
+ merged_gids = sorted(list(existing_gids | new_gids))
165
+ seen_clusters[cluster_id]["gids"] = merged_gids
166
+ # 保留最新的其他字段(verification, is_invalid 等)
167
+ seen_clusters[cluster_id].update({
168
+ k: v for k, v in cluster.items()
169
+ if k != "gids" and k != "cluster_id"
170
+ })
171
+ else:
172
+ # 第一次遇到这个 cluster_id,直接保存
173
+ seen_clusters[cluster_id] = cluster
174
+ except Exception:
175
+ pass
176
+
177
+ clusters = list(seen_clusters.values())
178
+ except Exception as e:
179
+ try:
180
+ typer.secho(f"[jarvis-sec] 警告:加载 clusters.jsonl 失败: {e}", fg=typer.colors.YELLOW)
181
+ except Exception:
182
+ pass
183
+
184
+ return clusters
185
+
186
+
187
+ def get_all_clustered_gids(sec_dir: Path) -> Set[int]:
188
+ """获取所有已聚类的gid集合"""
189
+ clusters = load_clusters(sec_dir)
190
+ gids = set()
191
+ for cluster in clusters:
192
+ gids_list = cluster.get("gids", [])
193
+ if isinstance(gids_list, list):
194
+ for gid_val in gids_list:
195
+ try:
196
+ gid_int = int(gid_val)
197
+ if gid_int >= 1:
198
+ gids.add(gid_int)
199
+ except Exception:
200
+ pass
201
+ return gids
202
+
203
+
204
+ def validate_clustering_completeness(sec_dir: Path) -> Tuple[bool, Set[int]]:
205
+ """
206
+ 校验聚类完整性,确保所有候选的gid都被聚类
207
+
208
+ 返回: (is_complete, missing_gids)
209
+ """
210
+ all_candidate_gids = get_all_candidate_gids(sec_dir)
211
+ all_clustered_gids = get_all_clustered_gids(sec_dir)
212
+ missing_gids = all_candidate_gids - all_clustered_gids
213
+
214
+ return len(missing_gids) == 0, missing_gids
215
+
216
+
217
+ # ==================== 分析结果文件 (analysis.jsonl) ====================
218
+
219
+ def save_analysis_result(sec_dir: Path, analysis: Dict) -> None:
220
+ """
221
+ 保存单个分析结果到 analysis.jsonl(追加模式)
222
+
223
+ 格式:每行一个分析结果记录
224
+ {
225
+ "cluster_id": "file_path|batch_index|index", # 对应的聚类ID
226
+ "file": "src/main.c",
227
+ "batch_index": 1,
228
+ "cluster_index": 0,
229
+ "gids": [1, 2, 3], # 该聚类包含的所有gid
230
+ "verified_gids": [1, 2], # 验证为问题的gid(has_risk: true)
231
+ "false_positive_gids": [3], # 验证为误报的gid(has_risk: false)
232
+ "issues": [ # 详细的问题列表(仅verified_gids对应的)
233
+ {
234
+ "gid": 1,
235
+ "has_risk": true,
236
+ "verification_notes": "...",
237
+ "severity": "high",
238
+ ...
239
+ },
240
+ ...
241
+ ],
242
+ "analyzed_at": "2024-01-01T00:00:00" # 分析时间(可选)
243
+ }
244
+ """
245
+ path = get_analysis_file(sec_dir)
246
+ path.parent.mkdir(parents=True, exist_ok=True)
247
+
248
+ # 追加模式
249
+ with path.open("a", encoding="utf-8") as f:
250
+ f.write(json.dumps(analysis, ensure_ascii=False) + "\n")
251
+
252
+
253
+ def load_analysis_results(sec_dir: Path) -> List[Dict]:
254
+ """
255
+ 从 analysis.jsonl 加载所有分析结果
256
+
257
+ 返回: 分析结果列表
258
+ """
259
+ path = get_analysis_file(sec_dir)
260
+ results = []
261
+
262
+ if path.exists():
263
+ try:
264
+ # 使用字典合并:key 为 cluster_id,合并同一个 cluster_id 的所有记录
265
+ seen_results: Dict[str, Dict] = {}
266
+ with path.open("r", encoding="utf-8", errors="ignore") as f:
267
+ for line in f:
268
+ line = line.strip()
269
+ if not line:
270
+ continue
271
+ try:
272
+ result = json.loads(line)
273
+ cluster_id = result.get("cluster_id", "")
274
+ if cluster_id:
275
+ if cluster_id in seen_results:
276
+ # 如果已存在,合并 gid、verified_gids、false_positive_gids 和 issues
277
+ existing = seen_results[cluster_id]
278
+
279
+ # 合并 gids(去重)
280
+ existing_gids = set(existing.get("gids", []))
281
+ new_gids = set(result.get("gids", []))
282
+ existing["gids"] = sorted(list(existing_gids | new_gids))
283
+
284
+ # 合并 verified_gids(去重)
285
+ existing_verified = set(existing.get("verified_gids", []))
286
+ new_verified = set(result.get("verified_gids", []))
287
+ existing["verified_gids"] = sorted(list(existing_verified | new_verified))
288
+
289
+ # 合并 false_positive_gids(去重)
290
+ existing_false = set(existing.get("false_positive_gids", []))
291
+ new_false = set(result.get("false_positive_gids", []))
292
+ existing["false_positive_gids"] = sorted(list(existing_false | new_false))
293
+
294
+ # 合并 issues(通过 gid 去重)
295
+ existing_issues = {issue.get("gid"): issue for issue in existing.get("issues", [])}
296
+ for issue in result.get("issues", []):
297
+ gid = issue.get("gid")
298
+ if gid:
299
+ existing_issues[gid] = issue # 保留最新的 issue
300
+ existing["issues"] = list(existing_issues.values())
301
+
302
+ # 保留最新的其他字段
303
+ existing.update({
304
+ k: v for k, v in result.items()
305
+ if k not in ["gids", "verified_gids", "false_positive_gids", "issues", "cluster_id"]
306
+ })
307
+ else:
308
+ # 第一次遇到这个 cluster_id,直接保存
309
+ seen_results[cluster_id] = result
310
+ except Exception:
311
+ pass
312
+
313
+ results = list(seen_results.values())
314
+ except Exception as e:
315
+ try:
316
+ typer.secho(f"[jarvis-sec] 警告:加载 analysis.jsonl 失败: {e}", fg=typer.colors.YELLOW)
317
+ except Exception:
318
+ pass
319
+
320
+ return results
321
+
322
+
323
+ def get_all_analyzed_gids(sec_dir: Path) -> Set[int]:
324
+ """获取所有已分析的gid集合(包括问题和误报)"""
325
+ results = load_analysis_results(sec_dir)
326
+ gids = set()
327
+ for result in results:
328
+ gids_list = result.get("gids", [])
329
+ if isinstance(gids_list, list):
330
+ for gid_val in gids_list:
331
+ try:
332
+ gid_int = int(gid_val)
333
+ if gid_int >= 1:
334
+ gids.add(gid_int)
335
+ except Exception:
336
+ pass
337
+ return gids
338
+
339
+
340
+ def get_verified_issue_gids(sec_dir: Path) -> Set[int]:
341
+ """获取所有验证为问题的gid集合"""
342
+ results = load_analysis_results(sec_dir)
343
+ gids = set()
344
+ for result in results:
345
+ verified_gids = result.get("verified_gids", [])
346
+ if isinstance(verified_gids, list):
347
+ for gid_val in verified_gids:
348
+ try:
349
+ gid_int = int(gid_val)
350
+ if gid_int >= 1:
351
+ gids.add(gid_int)
352
+ except Exception:
353
+ pass
354
+ return gids
355
+
356
+
357
+ def get_false_positive_gids(sec_dir: Path) -> Set[int]:
358
+ """获取所有验证为误报的gid集合"""
359
+ results = load_analysis_results(sec_dir)
360
+ gids = set()
361
+ for result in results:
362
+ false_positive_gids = result.get("false_positive_gids", [])
363
+ if isinstance(false_positive_gids, list):
364
+ for gid_val in false_positive_gids:
365
+ try:
366
+ gid_int = int(gid_val)
367
+ if gid_int >= 1:
368
+ gids.add(gid_int)
369
+ except Exception:
370
+ pass
371
+ return gids
372
+
373
+
374
+ # ==================== 断点恢复状态检查 ====================
375
+
376
+ def get_resume_status(sec_dir: Path) -> Dict[str, any]:
377
+ """
378
+ 根据3个配置文件的存在性和状态,推断断点恢复状态
379
+
380
+ 返回: {
381
+ "has_candidates": bool, # 是否有只扫结果
382
+ "has_clusters": bool, # 是否有聚类结果
383
+ "has_analysis": bool, # 是否有分析结果
384
+ "candidates_count": int, # 候选数量
385
+ "clusters_count": int, # 聚类数量
386
+ "analysis_count": int, # 分析结果数量
387
+ "clustering_complete": bool, # 聚类是否完整
388
+ "missing_gids": Set[int], # 遗漏的gid(如果聚类不完整)
389
+ }
390
+ """
391
+ status = {
392
+ "has_candidates": False,
393
+ "has_clusters": False,
394
+ "has_analysis": False,
395
+ "candidates_count": 0,
396
+ "clusters_count": 0,
397
+ "analysis_count": 0,
398
+ "clustering_complete": False,
399
+ "missing_gids": set(),
400
+ }
401
+
402
+ # 检查只扫结果
403
+ candidates = load_candidates(sec_dir)
404
+ if candidates:
405
+ status["has_candidates"] = True
406
+ status["candidates_count"] = len(candidates)
407
+
408
+ # 检查聚类结果
409
+ clusters = load_clusters(sec_dir)
410
+ if clusters:
411
+ status["has_clusters"] = True
412
+ status["clusters_count"] = len(clusters)
413
+
414
+ # 检查分析结果
415
+ results = load_analysis_results(sec_dir)
416
+ if results:
417
+ status["has_analysis"] = True
418
+ status["analysis_count"] = len(results)
419
+
420
+ # 校验聚类完整性
421
+ if status["has_candidates"]:
422
+ is_complete, missing_gids = validate_clustering_completeness(sec_dir)
423
+ status["clustering_complete"] = is_complete
424
+ status["missing_gids"] = missing_gids
425
+
426
+ return status
427
+
@@ -0,0 +1,73 @@
1
+ # -*- coding: utf-8 -*-
2
+ """解析模块 - 用于解析Agent返回的JSON格式摘要"""
3
+
4
+ from typing import List, Optional
5
+ from jarvis.jarvis_utils.jsonnet_compat import loads as json_loads
6
+
7
+
8
+ def parse_clusters_from_text(text: str) -> tuple[Optional[List], Optional[str]]:
9
+ """解析聚类文本,返回(解析结果, 错误信息)"""
10
+ try:
11
+ import re as _re
12
+ # 使用正则表达式进行大小写不敏感的匹配
13
+ pattern = r"<CLUSTERS>([\s\S]*?)</CLUSTERS>"
14
+ match = _re.search(pattern, text, flags=_re.IGNORECASE)
15
+ if not match:
16
+ # 如果正则匹配失败,尝试直接查找(大小写敏感)
17
+ start = text.find("<CLUSTERS>")
18
+ end = text.find("</CLUSTERS>")
19
+ if start == -1 or end == -1 or end <= start:
20
+ return None, "未找到 <CLUSTERS> 或 </CLUSTERS> 标签,或标签顺序错误"
21
+ content = text[start + len("<CLUSTERS>"):end].strip()
22
+ else:
23
+ content = match.group(1).strip()
24
+
25
+ if not content:
26
+ return None, "JSON 内容为空"
27
+ try:
28
+ data = json_loads(content)
29
+ except Exception as json_err:
30
+ error_msg = f"JSON 解析失败: {str(json_err)}"
31
+ return None, error_msg
32
+ if isinstance(data, list):
33
+ return data, None
34
+ return None, f"JSON 解析结果不是数组,而是 {type(data).__name__}"
35
+ except Exception as e:
36
+ return None, f"解析过程发生异常: {str(e)}"
37
+
38
+
39
+ def try_parse_summary_report(text: str) -> tuple[Optional[object], Optional[str]]:
40
+ """
41
+ 从摘要文本中提取 <REPORT>...</REPORT> 内容,并解析为对象(dict 或 list,使用 JSON)。
42
+ 返回(解析结果, 错误信息)
43
+ 如果解析成功,返回(data, None)
44
+ 如果解析失败,返回(None, 错误信息)
45
+ """
46
+ try:
47
+ import re as _re
48
+ # 使用正则表达式进行大小写不敏感的匹配
49
+ pattern = r"<REPORT>([\s\S]*?)</REPORT>"
50
+ match = _re.search(pattern, text, flags=_re.IGNORECASE)
51
+ if not match:
52
+ # 如果正则匹配失败,尝试直接查找(大小写敏感)
53
+ start = text.find("<REPORT>")
54
+ end = text.find("</REPORT>")
55
+ if start == -1 or end == -1 or end <= start:
56
+ return None, "未找到 <REPORT> 或 </REPORT> 标签,或标签顺序错误"
57
+ content = text[start + len("<REPORT>"):end].strip()
58
+ else:
59
+ content = match.group(1).strip()
60
+
61
+ if not content:
62
+ return None, "JSON 内容为空"
63
+ try:
64
+ data = json_loads(content)
65
+ except Exception as json_err:
66
+ error_msg = f"JSON 解析失败: {str(json_err)}"
67
+ return None, error_msg
68
+ if isinstance(data, (dict, list)):
69
+ return data, None
70
+ return None, f"JSON 解析结果不是字典或数组,而是 {type(data).__name__}"
71
+ except Exception as e:
72
+ return None, f"解析过程发生异常: {str(e)}"
73
+