jarvis-ai-assistant 0.3.30__py3-none-any.whl → 0.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +458 -152
- jarvis/jarvis_agent/agent_manager.py +17 -13
- jarvis/jarvis_agent/builtin_input_handler.py +2 -6
- jarvis/jarvis_agent/config_editor.py +2 -7
- jarvis/jarvis_agent/event_bus.py +82 -12
- jarvis/jarvis_agent/file_context_handler.py +329 -0
- jarvis/jarvis_agent/file_methodology_manager.py +3 -4
- jarvis/jarvis_agent/jarvis.py +628 -55
- jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
- jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
- jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
- jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
- jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
- jarvis/jarvis_agent/language_support_info.py +486 -0
- jarvis/jarvis_agent/main.py +34 -10
- jarvis/jarvis_agent/memory_manager.py +7 -16
- jarvis/jarvis_agent/methodology_share_manager.py +10 -16
- jarvis/jarvis_agent/prompt_manager.py +1 -1
- jarvis/jarvis_agent/prompts.py +193 -171
- jarvis/jarvis_agent/protocols.py +8 -12
- jarvis/jarvis_agent/run_loop.py +105 -9
- jarvis/jarvis_agent/session_manager.py +2 -3
- jarvis/jarvis_agent/share_manager.py +20 -22
- jarvis/jarvis_agent/shell_input_handler.py +1 -2
- jarvis/jarvis_agent/stdio_redirect.py +295 -0
- jarvis/jarvis_agent/task_analyzer.py +31 -6
- jarvis/jarvis_agent/task_manager.py +11 -27
- jarvis/jarvis_agent/tool_executor.py +2 -3
- jarvis/jarvis_agent/tool_share_manager.py +12 -24
- jarvis/jarvis_agent/utils.py +5 -1
- jarvis/jarvis_agent/web_bridge.py +189 -0
- jarvis/jarvis_agent/web_output_sink.py +53 -0
- jarvis/jarvis_agent/web_server.py +786 -0
- jarvis/jarvis_c2rust/__init__.py +26 -0
- jarvis/jarvis_c2rust/cli.py +575 -0
- jarvis/jarvis_c2rust/collector.py +250 -0
- jarvis/jarvis_c2rust/constants.py +26 -0
- jarvis/jarvis_c2rust/library_replacer.py +1254 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +1272 -0
- jarvis/jarvis_c2rust/loaders.py +207 -0
- jarvis/jarvis_c2rust/models.py +28 -0
- jarvis/jarvis_c2rust/optimizer.py +2157 -0
- jarvis/jarvis_c2rust/scanner.py +1681 -0
- jarvis/jarvis_c2rust/transpiler.py +2983 -0
- jarvis/jarvis_c2rust/utils.py +385 -0
- jarvis/jarvis_code_agent/build_validation_config.py +132 -0
- jarvis/jarvis_code_agent/code_agent.py +1371 -220
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +65 -0
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +106 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +74 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +72 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +70 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +53 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +47 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +61 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +110 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +154 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +110 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +153 -0
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +648 -0
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +110 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +49 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +299 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +215 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +269 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +281 -0
- jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +605 -0
- jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +252 -0
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +58 -0
- jarvis/jarvis_code_agent/lint.py +501 -8
- jarvis/jarvis_code_agent/utils.py +141 -0
- jarvis/jarvis_code_analysis/code_review.py +493 -584
- jarvis/jarvis_data/config_schema.json +128 -12
- jarvis/jarvis_git_squash/main.py +4 -5
- jarvis/jarvis_git_utils/git_commiter.py +82 -75
- jarvis/jarvis_mcp/sse_mcp_client.py +22 -29
- jarvis/jarvis_mcp/stdio_mcp_client.py +12 -13
- jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
- jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
- jarvis/jarvis_methodology/main.py +32 -48
- jarvis/jarvis_multi_agent/__init__.py +287 -55
- jarvis/jarvis_multi_agent/main.py +36 -4
- jarvis/jarvis_platform/base.py +524 -202
- jarvis/jarvis_platform/human.py +7 -8
- jarvis/jarvis_platform/kimi.py +30 -36
- jarvis/jarvis_platform/openai.py +88 -25
- jarvis/jarvis_platform/registry.py +26 -10
- jarvis/jarvis_platform/tongyi.py +24 -25
- jarvis/jarvis_platform/yuanbao.py +32 -43
- jarvis/jarvis_platform_manager/main.py +66 -77
- jarvis/jarvis_platform_manager/service.py +8 -13
- jarvis/jarvis_rag/cli.py +53 -55
- jarvis/jarvis_rag/embedding_manager.py +13 -18
- jarvis/jarvis_rag/llm_interface.py +8 -9
- jarvis/jarvis_rag/query_rewriter.py +10 -21
- jarvis/jarvis_rag/rag_pipeline.py +24 -27
- jarvis/jarvis_rag/reranker.py +4 -5
- jarvis/jarvis_rag/retriever.py +28 -30
- jarvis/jarvis_sec/__init__.py +305 -0
- jarvis/jarvis_sec/agents.py +143 -0
- jarvis/jarvis_sec/analysis.py +276 -0
- jarvis/jarvis_sec/checkers/__init__.py +32 -0
- jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
- jarvis/jarvis_sec/cli.py +139 -0
- jarvis/jarvis_sec/clustering.py +1439 -0
- jarvis/jarvis_sec/file_manager.py +427 -0
- jarvis/jarvis_sec/parsers.py +73 -0
- jarvis/jarvis_sec/prompts.py +268 -0
- jarvis/jarvis_sec/report.py +336 -0
- jarvis/jarvis_sec/review.py +453 -0
- jarvis/jarvis_sec/status.py +264 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/utils.py +499 -0
- jarvis/jarvis_sec/verification.py +848 -0
- jarvis/jarvis_sec/workflow.py +226 -0
- jarvis/jarvis_smart_shell/main.py +38 -87
- jarvis/jarvis_stats/cli.py +2 -2
- jarvis/jarvis_stats/stats.py +8 -8
- jarvis/jarvis_stats/storage.py +15 -21
- jarvis/jarvis_stats/visualizer.py +1 -1
- jarvis/jarvis_tools/clear_memory.py +3 -20
- jarvis/jarvis_tools/cli/main.py +21 -23
- jarvis/jarvis_tools/edit_file.py +1019 -132
- jarvis/jarvis_tools/execute_script.py +83 -25
- jarvis/jarvis_tools/file_analyzer.py +6 -9
- jarvis/jarvis_tools/generate_new_tool.py +14 -21
- jarvis/jarvis_tools/lsp_client.py +1552 -0
- jarvis/jarvis_tools/methodology.py +2 -3
- jarvis/jarvis_tools/read_code.py +1736 -35
- jarvis/jarvis_tools/read_symbols.py +140 -0
- jarvis/jarvis_tools/read_webpage.py +12 -13
- jarvis/jarvis_tools/registry.py +427 -200
- jarvis/jarvis_tools/retrieve_memory.py +20 -19
- jarvis/jarvis_tools/rewrite_file.py +72 -158
- jarvis/jarvis_tools/save_memory.py +3 -15
- jarvis/jarvis_tools/search_web.py +18 -18
- jarvis/jarvis_tools/sub_agent.py +36 -43
- jarvis/jarvis_tools/sub_code_agent.py +25 -26
- jarvis/jarvis_tools/virtual_tty.py +55 -33
- jarvis/jarvis_utils/clipboard.py +7 -10
- jarvis/jarvis_utils/config.py +232 -45
- jarvis/jarvis_utils/embedding.py +8 -5
- jarvis/jarvis_utils/fzf.py +8 -8
- jarvis/jarvis_utils/git_utils.py +225 -36
- jarvis/jarvis_utils/globals.py +3 -3
- jarvis/jarvis_utils/http.py +1 -1
- jarvis/jarvis_utils/input.py +99 -48
- jarvis/jarvis_utils/jsonnet_compat.py +465 -0
- jarvis/jarvis_utils/methodology.py +52 -48
- jarvis/jarvis_utils/utils.py +819 -491
- jarvis_ai_assistant-0.7.6.dist-info/METADATA +600 -0
- jarvis_ai_assistant-0.7.6.dist-info/RECORD +218 -0
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/entry_points.txt +4 -0
- jarvis/jarvis_agent/config.py +0 -92
- jarvis/jarvis_agent/edit_file_handler.py +0 -296
- jarvis/jarvis_platform/ai8.py +0 -332
- jarvis/jarvis_tools/ask_user.py +0 -54
- jarvis_ai_assistant-0.3.30.dist-info/METADATA +0 -381
- jarvis_ai_assistant-0.3.30.dist-info/RECORD +0 -137
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,427 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
状态文件管理模块
|
|
4
|
+
|
|
5
|
+
重构后的3个配置文件:
|
|
6
|
+
1. candidates.jsonl - 只扫结果文件:保存每个原始告警的信息,包括gid
|
|
7
|
+
2. clusters.jsonl - 聚类信息文件:所有聚类(包括无效聚类),每个聚类包括的gids
|
|
8
|
+
3. analysis.jsonl - 分析结果文件:包括所有聚类,聚类中哪些问题是问题,哪些问题是误报
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Dict, List, Set, Tuple
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
import json
|
|
14
|
+
import typer
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ==================== 文件路径定义 ====================
|
|
18
|
+
|
|
19
|
+
def get_candidates_file(sec_dir: Path) -> Path:
|
|
20
|
+
"""获取只扫结果文件路径"""
|
|
21
|
+
return sec_dir / "candidates.jsonl"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def get_clusters_file(sec_dir: Path) -> Path:
|
|
25
|
+
"""获取聚类信息文件路径"""
|
|
26
|
+
return sec_dir / "clusters.jsonl"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_analysis_file(sec_dir: Path) -> Path:
|
|
30
|
+
"""获取分析结果文件路径"""
|
|
31
|
+
return sec_dir / "analysis.jsonl"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ==================== 只扫结果文件 (candidates.jsonl) ====================
|
|
35
|
+
|
|
36
|
+
def save_candidates(sec_dir: Path, candidates: List[Dict]) -> None:
|
|
37
|
+
"""
|
|
38
|
+
保存只扫结果到 candidates.jsonl
|
|
39
|
+
|
|
40
|
+
格式:每行一个候选,包含所有原始信息 + gid
|
|
41
|
+
{
|
|
42
|
+
"gid": 1,
|
|
43
|
+
"language": "c",
|
|
44
|
+
"category": "buffer_overflow",
|
|
45
|
+
"pattern": "strcpy",
|
|
46
|
+
"file": "src/main.c",
|
|
47
|
+
"line": 42,
|
|
48
|
+
"evidence": "...",
|
|
49
|
+
"confidence": 0.8,
|
|
50
|
+
"severity": "high"
|
|
51
|
+
}
|
|
52
|
+
"""
|
|
53
|
+
path = get_candidates_file(sec_dir)
|
|
54
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
# 覆盖模式,确保文件内容是最新的
|
|
57
|
+
with path.open("w", encoding="utf-8") as f:
|
|
58
|
+
for candidate in candidates:
|
|
59
|
+
f.write(json.dumps(candidate, ensure_ascii=False) + "\n")
|
|
60
|
+
|
|
61
|
+
try:
|
|
62
|
+
typer.secho(f"[jarvis-sec] 已保存 {len(candidates)} 个候选到 {path}", fg=typer.colors.GREEN)
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def load_candidates(sec_dir: Path) -> List[Dict]:
|
|
68
|
+
"""
|
|
69
|
+
从 candidates.jsonl 加载只扫结果
|
|
70
|
+
|
|
71
|
+
返回: 候选列表,每个候选包含gid
|
|
72
|
+
"""
|
|
73
|
+
path = get_candidates_file(sec_dir)
|
|
74
|
+
candidates = []
|
|
75
|
+
|
|
76
|
+
if path.exists():
|
|
77
|
+
try:
|
|
78
|
+
with path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
79
|
+
for line in f:
|
|
80
|
+
line = line.strip()
|
|
81
|
+
if not line:
|
|
82
|
+
continue
|
|
83
|
+
try:
|
|
84
|
+
candidate = json.loads(line)
|
|
85
|
+
candidates.append(candidate)
|
|
86
|
+
except Exception:
|
|
87
|
+
pass
|
|
88
|
+
except Exception as e:
|
|
89
|
+
try:
|
|
90
|
+
typer.secho(f"[jarvis-sec] 警告:加载 candidates.jsonl 失败: {e}", fg=typer.colors.YELLOW)
|
|
91
|
+
except Exception:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
return candidates
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_all_candidate_gids(sec_dir: Path) -> Set[int]:
|
|
98
|
+
"""获取所有候选的gid集合"""
|
|
99
|
+
candidates = load_candidates(sec_dir)
|
|
100
|
+
gids = set()
|
|
101
|
+
for candidate in candidates:
|
|
102
|
+
try:
|
|
103
|
+
gid = int(candidate.get("gid", 0))
|
|
104
|
+
if gid >= 1:
|
|
105
|
+
gids.add(gid)
|
|
106
|
+
except Exception:
|
|
107
|
+
pass
|
|
108
|
+
return gids
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
# ==================== 聚类信息文件 (clusters.jsonl) ====================
|
|
112
|
+
|
|
113
|
+
def save_cluster(sec_dir: Path, cluster: Dict) -> None:
|
|
114
|
+
"""
|
|
115
|
+
保存单个聚类到 clusters.jsonl(追加模式)
|
|
116
|
+
|
|
117
|
+
格式:每行一个聚类记录
|
|
118
|
+
{
|
|
119
|
+
"cluster_id": "file_path|batch_index|index", # 唯一标识
|
|
120
|
+
"file": "src/main.c",
|
|
121
|
+
"batch_index": 1,
|
|
122
|
+
"cluster_index": 0, # 同一批次中的聚类索引
|
|
123
|
+
"gids": [1, 2, 3], # 该聚类包含的gid列表
|
|
124
|
+
"verification": "验证候选的安全风险", # 聚类验证描述
|
|
125
|
+
"is_invalid": false, # 是否为无效聚类
|
|
126
|
+
"invalid_reason": "", # 无效原因(如果is_invalid为true)
|
|
127
|
+
"created_at": "2024-01-01T00:00:00" # 创建时间(可选)
|
|
128
|
+
}
|
|
129
|
+
"""
|
|
130
|
+
path = get_clusters_file(sec_dir)
|
|
131
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
132
|
+
|
|
133
|
+
# 追加模式
|
|
134
|
+
with path.open("a", encoding="utf-8") as f:
|
|
135
|
+
f.write(json.dumps(cluster, ensure_ascii=False) + "\n")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def load_clusters(sec_dir: Path) -> List[Dict]:
|
|
139
|
+
"""
|
|
140
|
+
从 clusters.jsonl 加载所有聚类
|
|
141
|
+
|
|
142
|
+
返回: 聚类列表
|
|
143
|
+
"""
|
|
144
|
+
path = get_clusters_file(sec_dir)
|
|
145
|
+
clusters = []
|
|
146
|
+
|
|
147
|
+
if path.exists():
|
|
148
|
+
try:
|
|
149
|
+
# 使用字典合并:key 为 cluster_id,合并同一个 cluster_id 的所有记录的 gid
|
|
150
|
+
seen_clusters: Dict[str, Dict] = {}
|
|
151
|
+
with path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
152
|
+
for line in f:
|
|
153
|
+
line = line.strip()
|
|
154
|
+
if not line:
|
|
155
|
+
continue
|
|
156
|
+
try:
|
|
157
|
+
cluster = json.loads(line)
|
|
158
|
+
cluster_id = cluster.get("cluster_id", "")
|
|
159
|
+
if cluster_id:
|
|
160
|
+
if cluster_id in seen_clusters:
|
|
161
|
+
# 如果已存在,合并 gid 列表(去重)
|
|
162
|
+
existing_gids = set(seen_clusters[cluster_id].get("gids", []))
|
|
163
|
+
new_gids = set(cluster.get("gids", []))
|
|
164
|
+
merged_gids = sorted(list(existing_gids | new_gids))
|
|
165
|
+
seen_clusters[cluster_id]["gids"] = merged_gids
|
|
166
|
+
# 保留最新的其他字段(verification, is_invalid 等)
|
|
167
|
+
seen_clusters[cluster_id].update({
|
|
168
|
+
k: v for k, v in cluster.items()
|
|
169
|
+
if k != "gids" and k != "cluster_id"
|
|
170
|
+
})
|
|
171
|
+
else:
|
|
172
|
+
# 第一次遇到这个 cluster_id,直接保存
|
|
173
|
+
seen_clusters[cluster_id] = cluster
|
|
174
|
+
except Exception:
|
|
175
|
+
pass
|
|
176
|
+
|
|
177
|
+
clusters = list(seen_clusters.values())
|
|
178
|
+
except Exception as e:
|
|
179
|
+
try:
|
|
180
|
+
typer.secho(f"[jarvis-sec] 警告:加载 clusters.jsonl 失败: {e}", fg=typer.colors.YELLOW)
|
|
181
|
+
except Exception:
|
|
182
|
+
pass
|
|
183
|
+
|
|
184
|
+
return clusters
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def get_all_clustered_gids(sec_dir: Path) -> Set[int]:
|
|
188
|
+
"""获取所有已聚类的gid集合"""
|
|
189
|
+
clusters = load_clusters(sec_dir)
|
|
190
|
+
gids = set()
|
|
191
|
+
for cluster in clusters:
|
|
192
|
+
gids_list = cluster.get("gids", [])
|
|
193
|
+
if isinstance(gids_list, list):
|
|
194
|
+
for gid_val in gids_list:
|
|
195
|
+
try:
|
|
196
|
+
gid_int = int(gid_val)
|
|
197
|
+
if gid_int >= 1:
|
|
198
|
+
gids.add(gid_int)
|
|
199
|
+
except Exception:
|
|
200
|
+
pass
|
|
201
|
+
return gids
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def validate_clustering_completeness(sec_dir: Path) -> Tuple[bool, Set[int]]:
|
|
205
|
+
"""
|
|
206
|
+
校验聚类完整性,确保所有候选的gid都被聚类
|
|
207
|
+
|
|
208
|
+
返回: (is_complete, missing_gids)
|
|
209
|
+
"""
|
|
210
|
+
all_candidate_gids = get_all_candidate_gids(sec_dir)
|
|
211
|
+
all_clustered_gids = get_all_clustered_gids(sec_dir)
|
|
212
|
+
missing_gids = all_candidate_gids - all_clustered_gids
|
|
213
|
+
|
|
214
|
+
return len(missing_gids) == 0, missing_gids
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ==================== 分析结果文件 (analysis.jsonl) ====================
|
|
218
|
+
|
|
219
|
+
def save_analysis_result(sec_dir: Path, analysis: Dict) -> None:
|
|
220
|
+
"""
|
|
221
|
+
保存单个分析结果到 analysis.jsonl(追加模式)
|
|
222
|
+
|
|
223
|
+
格式:每行一个分析结果记录
|
|
224
|
+
{
|
|
225
|
+
"cluster_id": "file_path|batch_index|index", # 对应的聚类ID
|
|
226
|
+
"file": "src/main.c",
|
|
227
|
+
"batch_index": 1,
|
|
228
|
+
"cluster_index": 0,
|
|
229
|
+
"gids": [1, 2, 3], # 该聚类包含的所有gid
|
|
230
|
+
"verified_gids": [1, 2], # 验证为问题的gid(has_risk: true)
|
|
231
|
+
"false_positive_gids": [3], # 验证为误报的gid(has_risk: false)
|
|
232
|
+
"issues": [ # 详细的问题列表(仅verified_gids对应的)
|
|
233
|
+
{
|
|
234
|
+
"gid": 1,
|
|
235
|
+
"has_risk": true,
|
|
236
|
+
"verification_notes": "...",
|
|
237
|
+
"severity": "high",
|
|
238
|
+
...
|
|
239
|
+
},
|
|
240
|
+
...
|
|
241
|
+
],
|
|
242
|
+
"analyzed_at": "2024-01-01T00:00:00" # 分析时间(可选)
|
|
243
|
+
}
|
|
244
|
+
"""
|
|
245
|
+
path = get_analysis_file(sec_dir)
|
|
246
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
247
|
+
|
|
248
|
+
# 追加模式
|
|
249
|
+
with path.open("a", encoding="utf-8") as f:
|
|
250
|
+
f.write(json.dumps(analysis, ensure_ascii=False) + "\n")
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def load_analysis_results(sec_dir: Path) -> List[Dict]:
|
|
254
|
+
"""
|
|
255
|
+
从 analysis.jsonl 加载所有分析结果
|
|
256
|
+
|
|
257
|
+
返回: 分析结果列表
|
|
258
|
+
"""
|
|
259
|
+
path = get_analysis_file(sec_dir)
|
|
260
|
+
results = []
|
|
261
|
+
|
|
262
|
+
if path.exists():
|
|
263
|
+
try:
|
|
264
|
+
# 使用字典合并:key 为 cluster_id,合并同一个 cluster_id 的所有记录
|
|
265
|
+
seen_results: Dict[str, Dict] = {}
|
|
266
|
+
with path.open("r", encoding="utf-8", errors="ignore") as f:
|
|
267
|
+
for line in f:
|
|
268
|
+
line = line.strip()
|
|
269
|
+
if not line:
|
|
270
|
+
continue
|
|
271
|
+
try:
|
|
272
|
+
result = json.loads(line)
|
|
273
|
+
cluster_id = result.get("cluster_id", "")
|
|
274
|
+
if cluster_id:
|
|
275
|
+
if cluster_id in seen_results:
|
|
276
|
+
# 如果已存在,合并 gid、verified_gids、false_positive_gids 和 issues
|
|
277
|
+
existing = seen_results[cluster_id]
|
|
278
|
+
|
|
279
|
+
# 合并 gids(去重)
|
|
280
|
+
existing_gids = set(existing.get("gids", []))
|
|
281
|
+
new_gids = set(result.get("gids", []))
|
|
282
|
+
existing["gids"] = sorted(list(existing_gids | new_gids))
|
|
283
|
+
|
|
284
|
+
# 合并 verified_gids(去重)
|
|
285
|
+
existing_verified = set(existing.get("verified_gids", []))
|
|
286
|
+
new_verified = set(result.get("verified_gids", []))
|
|
287
|
+
existing["verified_gids"] = sorted(list(existing_verified | new_verified))
|
|
288
|
+
|
|
289
|
+
# 合并 false_positive_gids(去重)
|
|
290
|
+
existing_false = set(existing.get("false_positive_gids", []))
|
|
291
|
+
new_false = set(result.get("false_positive_gids", []))
|
|
292
|
+
existing["false_positive_gids"] = sorted(list(existing_false | new_false))
|
|
293
|
+
|
|
294
|
+
# 合并 issues(通过 gid 去重)
|
|
295
|
+
existing_issues = {issue.get("gid"): issue for issue in existing.get("issues", [])}
|
|
296
|
+
for issue in result.get("issues", []):
|
|
297
|
+
gid = issue.get("gid")
|
|
298
|
+
if gid:
|
|
299
|
+
existing_issues[gid] = issue # 保留最新的 issue
|
|
300
|
+
existing["issues"] = list(existing_issues.values())
|
|
301
|
+
|
|
302
|
+
# 保留最新的其他字段
|
|
303
|
+
existing.update({
|
|
304
|
+
k: v for k, v in result.items()
|
|
305
|
+
if k not in ["gids", "verified_gids", "false_positive_gids", "issues", "cluster_id"]
|
|
306
|
+
})
|
|
307
|
+
else:
|
|
308
|
+
# 第一次遇到这个 cluster_id,直接保存
|
|
309
|
+
seen_results[cluster_id] = result
|
|
310
|
+
except Exception:
|
|
311
|
+
pass
|
|
312
|
+
|
|
313
|
+
results = list(seen_results.values())
|
|
314
|
+
except Exception as e:
|
|
315
|
+
try:
|
|
316
|
+
typer.secho(f"[jarvis-sec] 警告:加载 analysis.jsonl 失败: {e}", fg=typer.colors.YELLOW)
|
|
317
|
+
except Exception:
|
|
318
|
+
pass
|
|
319
|
+
|
|
320
|
+
return results
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def get_all_analyzed_gids(sec_dir: Path) -> Set[int]:
|
|
324
|
+
"""获取所有已分析的gid集合(包括问题和误报)"""
|
|
325
|
+
results = load_analysis_results(sec_dir)
|
|
326
|
+
gids = set()
|
|
327
|
+
for result in results:
|
|
328
|
+
gids_list = result.get("gids", [])
|
|
329
|
+
if isinstance(gids_list, list):
|
|
330
|
+
for gid_val in gids_list:
|
|
331
|
+
try:
|
|
332
|
+
gid_int = int(gid_val)
|
|
333
|
+
if gid_int >= 1:
|
|
334
|
+
gids.add(gid_int)
|
|
335
|
+
except Exception:
|
|
336
|
+
pass
|
|
337
|
+
return gids
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def get_verified_issue_gids(sec_dir: Path) -> Set[int]:
|
|
341
|
+
"""获取所有验证为问题的gid集合"""
|
|
342
|
+
results = load_analysis_results(sec_dir)
|
|
343
|
+
gids = set()
|
|
344
|
+
for result in results:
|
|
345
|
+
verified_gids = result.get("verified_gids", [])
|
|
346
|
+
if isinstance(verified_gids, list):
|
|
347
|
+
for gid_val in verified_gids:
|
|
348
|
+
try:
|
|
349
|
+
gid_int = int(gid_val)
|
|
350
|
+
if gid_int >= 1:
|
|
351
|
+
gids.add(gid_int)
|
|
352
|
+
except Exception:
|
|
353
|
+
pass
|
|
354
|
+
return gids
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
def get_false_positive_gids(sec_dir: Path) -> Set[int]:
|
|
358
|
+
"""获取所有验证为误报的gid集合"""
|
|
359
|
+
results = load_analysis_results(sec_dir)
|
|
360
|
+
gids = set()
|
|
361
|
+
for result in results:
|
|
362
|
+
false_positive_gids = result.get("false_positive_gids", [])
|
|
363
|
+
if isinstance(false_positive_gids, list):
|
|
364
|
+
for gid_val in false_positive_gids:
|
|
365
|
+
try:
|
|
366
|
+
gid_int = int(gid_val)
|
|
367
|
+
if gid_int >= 1:
|
|
368
|
+
gids.add(gid_int)
|
|
369
|
+
except Exception:
|
|
370
|
+
pass
|
|
371
|
+
return gids
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
# ==================== 断点恢复状态检查 ====================
|
|
375
|
+
|
|
376
|
+
def get_resume_status(sec_dir: Path) -> Dict[str, any]:
|
|
377
|
+
"""
|
|
378
|
+
根据3个配置文件的存在性和状态,推断断点恢复状态
|
|
379
|
+
|
|
380
|
+
返回: {
|
|
381
|
+
"has_candidates": bool, # 是否有只扫结果
|
|
382
|
+
"has_clusters": bool, # 是否有聚类结果
|
|
383
|
+
"has_analysis": bool, # 是否有分析结果
|
|
384
|
+
"candidates_count": int, # 候选数量
|
|
385
|
+
"clusters_count": int, # 聚类数量
|
|
386
|
+
"analysis_count": int, # 分析结果数量
|
|
387
|
+
"clustering_complete": bool, # 聚类是否完整
|
|
388
|
+
"missing_gids": Set[int], # 遗漏的gid(如果聚类不完整)
|
|
389
|
+
}
|
|
390
|
+
"""
|
|
391
|
+
status = {
|
|
392
|
+
"has_candidates": False,
|
|
393
|
+
"has_clusters": False,
|
|
394
|
+
"has_analysis": False,
|
|
395
|
+
"candidates_count": 0,
|
|
396
|
+
"clusters_count": 0,
|
|
397
|
+
"analysis_count": 0,
|
|
398
|
+
"clustering_complete": False,
|
|
399
|
+
"missing_gids": set(),
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
# 检查只扫结果
|
|
403
|
+
candidates = load_candidates(sec_dir)
|
|
404
|
+
if candidates:
|
|
405
|
+
status["has_candidates"] = True
|
|
406
|
+
status["candidates_count"] = len(candidates)
|
|
407
|
+
|
|
408
|
+
# 检查聚类结果
|
|
409
|
+
clusters = load_clusters(sec_dir)
|
|
410
|
+
if clusters:
|
|
411
|
+
status["has_clusters"] = True
|
|
412
|
+
status["clusters_count"] = len(clusters)
|
|
413
|
+
|
|
414
|
+
# 检查分析结果
|
|
415
|
+
results = load_analysis_results(sec_dir)
|
|
416
|
+
if results:
|
|
417
|
+
status["has_analysis"] = True
|
|
418
|
+
status["analysis_count"] = len(results)
|
|
419
|
+
|
|
420
|
+
# 校验聚类完整性
|
|
421
|
+
if status["has_candidates"]:
|
|
422
|
+
is_complete, missing_gids = validate_clustering_completeness(sec_dir)
|
|
423
|
+
status["clustering_complete"] = is_complete
|
|
424
|
+
status["missing_gids"] = missing_gids
|
|
425
|
+
|
|
426
|
+
return status
|
|
427
|
+
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""解析模块 - 用于解析Agent返回的JSON格式摘要"""
|
|
3
|
+
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
from jarvis.jarvis_utils.jsonnet_compat import loads as json_loads
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def parse_clusters_from_text(text: str) -> tuple[Optional[List], Optional[str]]:
|
|
9
|
+
"""解析聚类文本,返回(解析结果, 错误信息)"""
|
|
10
|
+
try:
|
|
11
|
+
import re as _re
|
|
12
|
+
# 使用正则表达式进行大小写不敏感的匹配
|
|
13
|
+
pattern = r"<CLUSTERS>([\s\S]*?)</CLUSTERS>"
|
|
14
|
+
match = _re.search(pattern, text, flags=_re.IGNORECASE)
|
|
15
|
+
if not match:
|
|
16
|
+
# 如果正则匹配失败,尝试直接查找(大小写敏感)
|
|
17
|
+
start = text.find("<CLUSTERS>")
|
|
18
|
+
end = text.find("</CLUSTERS>")
|
|
19
|
+
if start == -1 or end == -1 or end <= start:
|
|
20
|
+
return None, "未找到 <CLUSTERS> 或 </CLUSTERS> 标签,或标签顺序错误"
|
|
21
|
+
content = text[start + len("<CLUSTERS>"):end].strip()
|
|
22
|
+
else:
|
|
23
|
+
content = match.group(1).strip()
|
|
24
|
+
|
|
25
|
+
if not content:
|
|
26
|
+
return None, "JSON 内容为空"
|
|
27
|
+
try:
|
|
28
|
+
data = json_loads(content)
|
|
29
|
+
except Exception as json_err:
|
|
30
|
+
error_msg = f"JSON 解析失败: {str(json_err)}"
|
|
31
|
+
return None, error_msg
|
|
32
|
+
if isinstance(data, list):
|
|
33
|
+
return data, None
|
|
34
|
+
return None, f"JSON 解析结果不是数组,而是 {type(data).__name__}"
|
|
35
|
+
except Exception as e:
|
|
36
|
+
return None, f"解析过程发生异常: {str(e)}"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def try_parse_summary_report(text: str) -> tuple[Optional[object], Optional[str]]:
|
|
40
|
+
"""
|
|
41
|
+
从摘要文本中提取 <REPORT>...</REPORT> 内容,并解析为对象(dict 或 list,使用 JSON)。
|
|
42
|
+
返回(解析结果, 错误信息)
|
|
43
|
+
如果解析成功,返回(data, None)
|
|
44
|
+
如果解析失败,返回(None, 错误信息)
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
import re as _re
|
|
48
|
+
# 使用正则表达式进行大小写不敏感的匹配
|
|
49
|
+
pattern = r"<REPORT>([\s\S]*?)</REPORT>"
|
|
50
|
+
match = _re.search(pattern, text, flags=_re.IGNORECASE)
|
|
51
|
+
if not match:
|
|
52
|
+
# 如果正则匹配失败,尝试直接查找(大小写敏感)
|
|
53
|
+
start = text.find("<REPORT>")
|
|
54
|
+
end = text.find("</REPORT>")
|
|
55
|
+
if start == -1 or end == -1 or end <= start:
|
|
56
|
+
return None, "未找到 <REPORT> 或 </REPORT> 标签,或标签顺序错误"
|
|
57
|
+
content = text[start + len("<REPORT>"):end].strip()
|
|
58
|
+
else:
|
|
59
|
+
content = match.group(1).strip()
|
|
60
|
+
|
|
61
|
+
if not content:
|
|
62
|
+
return None, "JSON 内容为空"
|
|
63
|
+
try:
|
|
64
|
+
data = json_loads(content)
|
|
65
|
+
except Exception as json_err:
|
|
66
|
+
error_msg = f"JSON 解析失败: {str(json_err)}"
|
|
67
|
+
return None, error_msg
|
|
68
|
+
if isinstance(data, (dict, list)):
|
|
69
|
+
return data, None
|
|
70
|
+
return None, f"JSON 解析结果不是字典或数组,而是 {type(data).__name__}"
|
|
71
|
+
except Exception as e:
|
|
72
|
+
return None, f"解析过程发生异常: {str(e)}"
|
|
73
|
+
|