jarvis-ai-assistant 0.4.2__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +117 -6
- jarvis/jarvis_agent/jarvis.py +6 -0
- jarvis/jarvis_agent/share_manager.py +8 -1
- jarvis/jarvis_agent/task_planner.py +218 -0
- jarvis/jarvis_code_agent/code_agent.py +99 -3
- jarvis/jarvis_code_analysis/code_review.py +483 -568
- jarvis/jarvis_data/config_schema.json +8 -3
- jarvis/jarvis_sec/README.md +180 -0
- jarvis/jarvis_sec/__init__.py +674 -0
- jarvis/jarvis_sec/checkers/__init__.py +33 -0
- jarvis/jarvis_sec/checkers/c_checker.py +1269 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +367 -0
- jarvis/jarvis_sec/cli.py +110 -0
- jarvis/jarvis_sec/prompts.py +324 -0
- jarvis/jarvis_sec/report.py +260 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +513 -0
- jarvis/jarvis_tools/registry.py +20 -14
- jarvis/jarvis_tools/sub_agent.py +4 -3
- jarvis/jarvis_tools/sub_code_agent.py +3 -3
- jarvis/jarvis_utils/config.py +14 -2
- jarvis/jarvis_utils/methodology.py +25 -19
- jarvis/jarvis_utils/utils.py +193 -2
- {jarvis_ai_assistant-0.4.2.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/METADATA +1 -1
- {jarvis_ai_assistant-0.4.2.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/RECORD +30 -19
- {jarvis_ai_assistant-0.4.2.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/entry_points.txt +2 -0
- {jarvis_ai_assistant-0.4.2.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.4.2.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.4.2.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
OpenHarmony 安全演进多Agent套件 —— Workflow(阶段一,含可复现直扫基线)
|
|
4
|
+
|
|
5
|
+
目标(阶段一):
|
|
6
|
+
- 识别指定模块的安全问题(内存管理、缓冲区操作、错误处理等),检出率≥60% 为目标。
|
|
7
|
+
- 在不依赖外部服务的前提下,提供一个“可复现、可离线”的直扫基线(direct scan)。
|
|
8
|
+
- 当前阶段采用“先直扫拆分子任务,再由单Agent逐条分析”的模式;保留接口便于后续切换。
|
|
9
|
+
|
|
10
|
+
本模块提供:
|
|
11
|
+
- direct_scan(entry_path, languages=None, exclude_dirs=None) -> Dict:纯Python+正则/命令行辅助扫描,生成结构化结果
|
|
12
|
+
- format_markdown_report(result_json: Dict) -> str:将结构化结果转为可读的 Markdown
|
|
13
|
+
- run_security_analysis_fast(entry_path, languages=None, exclude_dirs=None) -> str:一键运行直扫并输出(JSON + Markdown)
|
|
14
|
+
- run_with_multi_agent(entry_path, languages=None) -> str:使用单Agent逐条子任务分析模式(复用 jarvis.jarvis_sec.__init__ 的实现)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
import re
|
|
20
|
+
import shutil
|
|
21
|
+
import subprocess
|
|
22
|
+
from dataclasses import asdict
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from typing import Dict, Iterable, List, Optional, Tuple
|
|
25
|
+
from jarvis.jarvis_sec.checkers import analyze_c_files, analyze_rust_files
|
|
26
|
+
from jarvis.jarvis_sec.report import build_json_and_markdown
|
|
27
|
+
from jarvis.jarvis_sec.types import Issue
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# ---------------------------
|
|
31
|
+
# 数据结构
|
|
32
|
+
# ---------------------------
|
|
33
|
+
|
|
34
|
+
# Issue dataclass is provided by jarvis.jarvis_sec.types to avoid circular imports
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ---------------------------
|
|
38
|
+
# 工具函数
|
|
39
|
+
# ---------------------------
|
|
40
|
+
|
|
41
|
+
def _rg_available() -> bool:
|
|
42
|
+
return shutil.which("rg") is not None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _iter_source_files(
|
|
46
|
+
entry_path: str,
|
|
47
|
+
languages: Optional[List[str]] = None,
|
|
48
|
+
exclude_dirs: Optional[List[str]] = None,
|
|
49
|
+
) -> Iterable[Path]:
|
|
50
|
+
"""
|
|
51
|
+
递归枚举源文件,支持按扩展名过滤与目录排除。
|
|
52
|
+
默认语言扩展:c, cpp, h, hpp, rs
|
|
53
|
+
"""
|
|
54
|
+
entry = Path(entry_path)
|
|
55
|
+
if not entry.exists():
|
|
56
|
+
return []
|
|
57
|
+
|
|
58
|
+
exts = set((languages or ["c", "cpp", "h", "hpp", "rs"]))
|
|
59
|
+
excludes = set(exclude_dirs or [".git", "build", "out", "target", "third_party", "vendor"])
|
|
60
|
+
|
|
61
|
+
for p in entry.rglob("*"):
|
|
62
|
+
if not p.is_file():
|
|
63
|
+
continue
|
|
64
|
+
# 目录排除(任意祖先包含即排除)
|
|
65
|
+
skip = False
|
|
66
|
+
for parent in p.parents:
|
|
67
|
+
if parent.name in excludes:
|
|
68
|
+
skip = True
|
|
69
|
+
break
|
|
70
|
+
if skip:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
suf = p.suffix.lstrip(".").lower()
|
|
74
|
+
if suf in exts:
|
|
75
|
+
yield p.relative_to(entry)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _read_file_lines(base: Path, relpath: Path) -> List[str]:
|
|
79
|
+
try:
|
|
80
|
+
return (base / relpath).read_text(errors="ignore").splitlines()
|
|
81
|
+
except Exception:
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _safe_evidence(line: str, max_len: int = 200) -> str:
|
|
86
|
+
s = line.strip().replace("\t", " ")
|
|
87
|
+
if len(s) > max_len:
|
|
88
|
+
return s[: max_len - 3] + "..."
|
|
89
|
+
return s
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _try_rg_search(pattern: str, files: List[Path], cwd: Path) -> List[Tuple[Path, int, str]]:
|
|
93
|
+
"""
|
|
94
|
+
使用 rg -n PATTERN file1 file2 ... 搜索,返回 (file, line, text)。
|
|
95
|
+
若 rg 不可用或失败,返回空列表。
|
|
96
|
+
"""
|
|
97
|
+
if not files:
|
|
98
|
+
return []
|
|
99
|
+
if not _rg_available():
|
|
100
|
+
return []
|
|
101
|
+
|
|
102
|
+
# rg 命令长度有限,分批执行
|
|
103
|
+
results: List[Tuple[Path, int, str]] = []
|
|
104
|
+
batch_size = 200
|
|
105
|
+
for i in range(0, len(files), batch_size):
|
|
106
|
+
batch = files[i : i + batch_size]
|
|
107
|
+
cmd = ["rg", "-n", pattern] + [str(cwd / f) for f in batch]
|
|
108
|
+
try:
|
|
109
|
+
proc = subprocess.run(
|
|
110
|
+
cmd, cwd=str(cwd), capture_output=True, text=True, check=False
|
|
111
|
+
)
|
|
112
|
+
if proc.returncode in (0, 1): # 0: 有匹配;1: 无匹配
|
|
113
|
+
out = proc.stdout.splitlines()
|
|
114
|
+
for line in out:
|
|
115
|
+
# 解析: path:lineno:content
|
|
116
|
+
# 注意: Windows 路径中可能含冒号,这里采用从右侧第一次冒号分割两次的方案
|
|
117
|
+
parts = line.split(":", 2)
|
|
118
|
+
if len(parts) < 3:
|
|
119
|
+
continue
|
|
120
|
+
fpath = Path(parts[0])
|
|
121
|
+
try:
|
|
122
|
+
lineno = int(parts[1])
|
|
123
|
+
except ValueError:
|
|
124
|
+
continue
|
|
125
|
+
text = parts[2]
|
|
126
|
+
try:
|
|
127
|
+
rel = fpath.relative_to(cwd)
|
|
128
|
+
except Exception:
|
|
129
|
+
# 回退:将绝对路径转相对路径
|
|
130
|
+
try:
|
|
131
|
+
rel = Path(os.path.relpath(fpath, cwd))
|
|
132
|
+
except Exception:
|
|
133
|
+
rel = fpath
|
|
134
|
+
results.append((rel, lineno, text))
|
|
135
|
+
except Exception:
|
|
136
|
+
# 忽略 rg 错误,交由纯Python扫描兜底
|
|
137
|
+
return []
|
|
138
|
+
return results
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ---------------------------
|
|
142
|
+
# 规则库(阶段一)
|
|
143
|
+
# ---------------------------
|
|
144
|
+
|
|
145
|
+
C_UNSAFE_API = re.compile(r"\b(strcpy|strcat|gets|sprintf|vsprintf|scanf)\s*\(", re.IGNORECASE)
|
|
146
|
+
C_BOUNDARY_FUNCS = re.compile(r"\b(memcpy|memmove|strncpy|strncat)\s*\(", re.IGNORECASE)
|
|
147
|
+
C_MEM_MGMT = re.compile(r"\b(malloc|calloc|realloc|free|new\s+|delete\b)", re.IGNORECASE)
|
|
148
|
+
C_IO_API = re.compile(r"\b(fopen|fclose|fread|fwrite|read|write|open|close)\s*\(", re.IGNORECASE)
|
|
149
|
+
|
|
150
|
+
R_UNSAFE = re.compile(r"\bunsafe\b")
|
|
151
|
+
R_RAW_PTR = re.compile(r"\*(mut|const)\s+[A-Za-z_]\w*")
|
|
152
|
+
R_FORGET = re.compile(r"\bmem::forget\b")
|
|
153
|
+
R_UNWRAP = re.compile(r"\bunwrap\s*\(|\bexpect\s*\(", re.IGNORECASE)
|
|
154
|
+
R_EXTERN_C = re.compile(r'extern\s+"C"')
|
|
155
|
+
R_UNSAFE_IMPL = re.compile(r"\bunsafe\s+impl\s+(Send|Sync)\b|\bimpl\s+unsafe\s+(Send|Sync)\b")
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ---------------------------
|
|
159
|
+
# 扫描实现
|
|
160
|
+
# ---------------------------
|
|
161
|
+
|
|
162
|
+
def _scan_c_cpp(
|
|
163
|
+
base: Path, relpath: Path, issues: List[Issue]
|
|
164
|
+
) -> None:
|
|
165
|
+
"""
|
|
166
|
+
针对 C/C++ 文件进行启发式扫描。
|
|
167
|
+
"""
|
|
168
|
+
lines = _read_file_lines(base, relpath)
|
|
169
|
+
if not lines:
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
for idx, line in enumerate(lines, start=1):
|
|
173
|
+
if C_UNSAFE_API.search(line):
|
|
174
|
+
m = C_UNSAFE_API.search(line)
|
|
175
|
+
pat = m.group(1) if m else "unsafe_api"
|
|
176
|
+
issues.append(
|
|
177
|
+
Issue(
|
|
178
|
+
language="c/cpp",
|
|
179
|
+
category="unsafe_api",
|
|
180
|
+
pattern=pat,
|
|
181
|
+
file=str(relpath),
|
|
182
|
+
line=idx,
|
|
183
|
+
evidence=_safe_evidence(line),
|
|
184
|
+
description="使用不安全/高风险字符串API,可能导致缓冲区溢出或格式化风险。",
|
|
185
|
+
suggestion="替换为带边界的安全API(如 snprintf/strlcpy 等)或加入显式长度检查。",
|
|
186
|
+
confidence=0.9,
|
|
187
|
+
severity="high",
|
|
188
|
+
)
|
|
189
|
+
)
|
|
190
|
+
if C_BOUNDARY_FUNCS.search(line):
|
|
191
|
+
m = C_BOUNDARY_FUNCS.search(line)
|
|
192
|
+
pat = m.group(1) if m else "boundary_api"
|
|
193
|
+
issues.append(
|
|
194
|
+
Issue(
|
|
195
|
+
language="c/cpp",
|
|
196
|
+
category="buffer_overflow",
|
|
197
|
+
pattern=pat,
|
|
198
|
+
file=str(relpath),
|
|
199
|
+
line=idx,
|
|
200
|
+
evidence=_safe_evidence(line),
|
|
201
|
+
description="缓冲区操作涉及长度/边界,需确认长度来源是否可靠,避免越界。",
|
|
202
|
+
suggestion="核对目标缓冲区大小与拷贝长度;对外部输入进行校验;优先使用安全封装。",
|
|
203
|
+
confidence=0.7,
|
|
204
|
+
severity="medium",
|
|
205
|
+
)
|
|
206
|
+
)
|
|
207
|
+
if C_MEM_MGMT.search(line):
|
|
208
|
+
m = C_MEM_MGMT.search(line)
|
|
209
|
+
pat = m.group(1) if m else "mem_mgmt"
|
|
210
|
+
issues.append(
|
|
211
|
+
Issue(
|
|
212
|
+
language="c/cpp",
|
|
213
|
+
category="memory_mgmt",
|
|
214
|
+
pattern=pat,
|
|
215
|
+
file=str(relpath),
|
|
216
|
+
line=idx,
|
|
217
|
+
evidence=_safe_evidence(line),
|
|
218
|
+
description="涉及内存管理API,需确认分配/释放匹配,realloc 的返回值处理,以及空指针检查。",
|
|
219
|
+
suggestion="确保 new/delete 与 malloc/free 匹配;realloc 先用临时变量接收;所有返回值做 NULL 检查。",
|
|
220
|
+
confidence=0.65,
|
|
221
|
+
severity="medium",
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
if C_IO_API.search(line):
|
|
225
|
+
m = C_IO_API.search(line)
|
|
226
|
+
pat = m.group(1) if m else "io_api"
|
|
227
|
+
issues.append(
|
|
228
|
+
Issue(
|
|
229
|
+
language="c/cpp",
|
|
230
|
+
category="error_handling",
|
|
231
|
+
pattern=pat,
|
|
232
|
+
file=str(relpath),
|
|
233
|
+
line=idx,
|
|
234
|
+
evidence=_safe_evidence(line),
|
|
235
|
+
description="I/O/系统调用返回值可能未检查,存在错误处理缺失风险。",
|
|
236
|
+
suggestion="检查返回值/errno;在错误路径上释放资源(句柄/内存/锁)。",
|
|
237
|
+
confidence=0.6,
|
|
238
|
+
severity="low",
|
|
239
|
+
)
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# 简单 UAF 线索(非常粗略):free(var); ... 后续再次出现 var
|
|
243
|
+
# 仅用于提示,非严格判定
|
|
244
|
+
text = "\n".join(lines)
|
|
245
|
+
free_vars = re.findall(r"free\s*\(\s*([A-Za-z_]\w*)\s*\)\s*;", text)
|
|
246
|
+
for v in set(free_vars):
|
|
247
|
+
# 搜索 free 后再次出现 v 的位置(简化判定)
|
|
248
|
+
pattern = re.compile(rf"free\s*\(\s*{re.escape(v)}\s*\)\s*;(.|\n)+?\b{re.escape(v)}\b", re.MULTILINE)
|
|
249
|
+
if pattern.search(text):
|
|
250
|
+
# 取第一次 free 的行号作为证据
|
|
251
|
+
for idx, line in enumerate(lines, start=1):
|
|
252
|
+
if re.search(rf"free\s*\(\s*{re.escape(v)}\s*\)\s*;", line):
|
|
253
|
+
issues.append(
|
|
254
|
+
Issue(
|
|
255
|
+
language="c/cpp",
|
|
256
|
+
category="memory_mgmt",
|
|
257
|
+
pattern="use_after_free_suspect",
|
|
258
|
+
file=str(relpath),
|
|
259
|
+
line=idx,
|
|
260
|
+
evidence=_safe_evidence(line),
|
|
261
|
+
description=f"变量 {v} 在 free 后可能仍被使用(UAF线索,需人工确认)。",
|
|
262
|
+
suggestion="free 后将指针置 NULL;为变量生命周期建立清晰约束;添加动态/静态检测。",
|
|
263
|
+
confidence=0.55,
|
|
264
|
+
severity="high",
|
|
265
|
+
)
|
|
266
|
+
)
|
|
267
|
+
break
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def _scan_rust(
|
|
271
|
+
base: Path, relpath: Path, issues: List[Issue]
|
|
272
|
+
) -> None:
|
|
273
|
+
"""
|
|
274
|
+
针对 Rust 文件进行启发式扫描。
|
|
275
|
+
"""
|
|
276
|
+
lines = _read_file_lines(base, relpath)
|
|
277
|
+
if not lines:
|
|
278
|
+
return
|
|
279
|
+
|
|
280
|
+
for idx, line in enumerate(lines, start=1):
|
|
281
|
+
if R_UNSAFE.search(line):
|
|
282
|
+
issues.append(
|
|
283
|
+
Issue(
|
|
284
|
+
language="rust",
|
|
285
|
+
category="unsafe_usage",
|
|
286
|
+
pattern="unsafe",
|
|
287
|
+
file=str(relpath),
|
|
288
|
+
line=idx,
|
|
289
|
+
evidence=_safe_evidence(line),
|
|
290
|
+
description="存在 unsafe 代码块/标识,需证明内存/别名/生命周期安全性。",
|
|
291
|
+
suggestion="将不安全操作封装在最小作用域内,补充不变式与前置条件,优先使用安全抽象。",
|
|
292
|
+
confidence=0.8,
|
|
293
|
+
severity="high",
|
|
294
|
+
)
|
|
295
|
+
)
|
|
296
|
+
if R_RAW_PTR.search(line):
|
|
297
|
+
issues.append(
|
|
298
|
+
Issue(
|
|
299
|
+
language="rust",
|
|
300
|
+
category="unsafe_usage",
|
|
301
|
+
pattern="raw_pointer",
|
|
302
|
+
file=str(relpath),
|
|
303
|
+
line=idx,
|
|
304
|
+
evidence=_safe_evidence(line),
|
|
305
|
+
description="出现原始指针(*mut/*const),可能绕过借用检查器。",
|
|
306
|
+
suggestion="使用引用/智能指针;必须使用时,谨慎证明无别名、对齐、生命周期安全。",
|
|
307
|
+
confidence=0.75,
|
|
308
|
+
severity="medium",
|
|
309
|
+
)
|
|
310
|
+
)
|
|
311
|
+
if R_FORGET.search(line):
|
|
312
|
+
issues.append(
|
|
313
|
+
Issue(
|
|
314
|
+
language="rust",
|
|
315
|
+
category="unsafe_usage",
|
|
316
|
+
pattern="mem::forget",
|
|
317
|
+
file=str(relpath),
|
|
318
|
+
line=idx,
|
|
319
|
+
evidence=_safe_evidence(line),
|
|
320
|
+
description="使用 mem::forget 可能导致资源泄漏或生命周期不匹配。",
|
|
321
|
+
suggestion="评估必要性;可使用 ManuallyDrop 等更安全模式;确保不破坏析构语义。",
|
|
322
|
+
confidence=0.7,
|
|
323
|
+
severity="medium",
|
|
324
|
+
)
|
|
325
|
+
)
|
|
326
|
+
if R_UNWRAP.search(line):
|
|
327
|
+
pat = "unwrap/expect"
|
|
328
|
+
issues.append(
|
|
329
|
+
Issue(
|
|
330
|
+
language="rust",
|
|
331
|
+
category="error_handling",
|
|
332
|
+
pattern=pat,
|
|
333
|
+
file=str(relpath),
|
|
334
|
+
line=idx,
|
|
335
|
+
evidence=_safe_evidence(line),
|
|
336
|
+
description="直接 unwrap/expect 可能在错误条件下 panic,缺少健壮的错误处理。",
|
|
337
|
+
suggestion="使用 ? 传播错误或 match 显式处理,返回 Result。",
|
|
338
|
+
confidence=0.65,
|
|
339
|
+
severity="low",
|
|
340
|
+
)
|
|
341
|
+
)
|
|
342
|
+
if R_EXTERN_C.search(line):
|
|
343
|
+
issues.append(
|
|
344
|
+
Issue(
|
|
345
|
+
language="rust",
|
|
346
|
+
category="ffi",
|
|
347
|
+
pattern='extern "C"',
|
|
348
|
+
file=str(relpath),
|
|
349
|
+
line=idx,
|
|
350
|
+
evidence=_safe_evidence(line),
|
|
351
|
+
description="FFI 边界需检查指针有效性、长度与生命周期,防止未定义行为。",
|
|
352
|
+
suggestion="在FFI边界进行严格的参数校验与安全封装;记录安全不变式。",
|
|
353
|
+
confidence=0.7,
|
|
354
|
+
severity="medium",
|
|
355
|
+
)
|
|
356
|
+
)
|
|
357
|
+
if R_UNSAFE_IMPL.search(line):
|
|
358
|
+
issues.append(
|
|
359
|
+
Issue(
|
|
360
|
+
language="rust",
|
|
361
|
+
category="concurrency",
|
|
362
|
+
pattern="unsafe_impl_Send_or_Sync",
|
|
363
|
+
file=str(relpath),
|
|
364
|
+
line=idx,
|
|
365
|
+
evidence=_safe_evidence(line),
|
|
366
|
+
description="手写 unsafe impl Send/Sync 可能破坏并发安全保证。",
|
|
367
|
+
suggestion="避免手写 unsafe impl;必要时严格证明线程安全前置条件。",
|
|
368
|
+
confidence=0.7,
|
|
369
|
+
severity="high",
|
|
370
|
+
)
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
# ---------------------------
|
|
375
|
+
# 汇总与报告
|
|
376
|
+
# ---------------------------
|
|
377
|
+
|
|
378
|
+
def direct_scan(
|
|
379
|
+
entry_path: str,
|
|
380
|
+
languages: Optional[List[str]] = None,
|
|
381
|
+
exclude_dirs: Optional[List[str]] = None,
|
|
382
|
+
) -> Dict:
|
|
383
|
+
"""
|
|
384
|
+
直扫基线:对 C/C++/Rust 进行启发式扫描,输出结构化 JSON。
|
|
385
|
+
- 改进:委派至模块化检查器(oh_sec.checkers),统一规则与置信度模型。
|
|
386
|
+
"""
|
|
387
|
+
base = Path(entry_path).resolve()
|
|
388
|
+
files = list(_iter_source_files(entry_path, languages, exclude_dirs))
|
|
389
|
+
|
|
390
|
+
# 按语言分组
|
|
391
|
+
c_like_exts = {".c", ".cpp", ".h", ".hpp"}
|
|
392
|
+
rust_exts = {".rs"}
|
|
393
|
+
c_files: List[Path] = [p for p in files if p.suffix.lower() in c_like_exts]
|
|
394
|
+
r_files: List[Path] = [p for p in files if p.suffix.lower() in rust_exts]
|
|
395
|
+
|
|
396
|
+
# 调用检查器(保持相对路径,基于 base_path 解析)
|
|
397
|
+
issues_c = analyze_c_files(str(base), [str(p) for p in c_files]) if c_files else []
|
|
398
|
+
issues_r = analyze_rust_files(str(base), [str(p) for p in r_files]) if r_files else []
|
|
399
|
+
issues: List[Issue] = issues_c + issues_r
|
|
400
|
+
|
|
401
|
+
# 统计
|
|
402
|
+
summary = {
|
|
403
|
+
"total": len(issues),
|
|
404
|
+
"by_language": {"c/cpp": 0, "rust": 0},
|
|
405
|
+
"by_category": {},
|
|
406
|
+
"top_risk_files": [],
|
|
407
|
+
"scanned_files": len(files),
|
|
408
|
+
"scanned_root": str(base),
|
|
409
|
+
}
|
|
410
|
+
file_score: Dict[str, int] = {}
|
|
411
|
+
for it in issues:
|
|
412
|
+
summary["by_language"][it.language] = summary["by_language"].get(it.language, 0) + 1
|
|
413
|
+
summary["by_category"][it.category] = summary["by_category"].get(it.category, 0) + 1
|
|
414
|
+
file_score[it.file] = file_score.get(it.file, 0) + 1
|
|
415
|
+
|
|
416
|
+
# Top 风险文件
|
|
417
|
+
summary["top_risk_files"] = [f for f, _ in sorted(file_score.items(), key=lambda x: x[1], reverse=True)[:10]]
|
|
418
|
+
|
|
419
|
+
result = {
|
|
420
|
+
"summary": summary,
|
|
421
|
+
"issues": [asdict(i) for i in issues],
|
|
422
|
+
}
|
|
423
|
+
return result
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def format_markdown_report(result_json: Dict) -> str:
|
|
427
|
+
"""
|
|
428
|
+
将结构化 JSON 转为 Markdown 可读报告。
|
|
429
|
+
"""
|
|
430
|
+
s = result_json.get("summary", {})
|
|
431
|
+
issues: List[Dict] = result_json.get("issues", [])
|
|
432
|
+
md: List[str] = []
|
|
433
|
+
md.append("# OpenHarmony 安全问题分析报告(阶段一直扫基线)")
|
|
434
|
+
md.append("")
|
|
435
|
+
md.append(f"- 扫描根目录: {s.get('scanned_root', '')}")
|
|
436
|
+
md.append(f"- 扫描文件数: {s.get('scanned_files', 0)}")
|
|
437
|
+
md.append(f"- 检出问题总数: {s.get('total', 0)}")
|
|
438
|
+
md.append("")
|
|
439
|
+
md.append("## 统计概览")
|
|
440
|
+
by_lang = s.get("by_language", {})
|
|
441
|
+
md.append(f"- 按语言: c/cpp={by_lang.get('c/cpp', 0)}, rust={by_lang.get('rust', 0)}")
|
|
442
|
+
md.append("- 按类别:")
|
|
443
|
+
for k, v in s.get("by_category", {}).items():
|
|
444
|
+
md.append(f" - {k}: {v}")
|
|
445
|
+
if s.get("top_risk_files"):
|
|
446
|
+
md.append("- Top 风险文件:")
|
|
447
|
+
for f in s["top_risk_files"]:
|
|
448
|
+
md.append(f" - {f}")
|
|
449
|
+
md.append("")
|
|
450
|
+
md.append("## 详细问题")
|
|
451
|
+
for i, it in enumerate(issues, start=1):
|
|
452
|
+
md.append(f"### [{i}] {it.get('file')}:{it.get('line')} ({it.get('language')}, {it.get('category')})")
|
|
453
|
+
md.append(f"- 模式: {it.get('pattern')}")
|
|
454
|
+
md.append(f"- 证据: `{it.get('evidence')}`")
|
|
455
|
+
md.append(f"- 描述: {it.get('description')}")
|
|
456
|
+
md.append(f"- 建议: {it.get('suggestion')}")
|
|
457
|
+
md.append(f"- 置信度: {it.get('confidence')}, 严重性: {it.get('severity')}")
|
|
458
|
+
md.append("")
|
|
459
|
+
return "\n".join(md)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def run_security_analysis_fast(
|
|
463
|
+
entry_path: str,
|
|
464
|
+
languages: Optional[List[str]] = None,
|
|
465
|
+
exclude_dirs: Optional[List[str]] = None,
|
|
466
|
+
) -> str:
|
|
467
|
+
"""
|
|
468
|
+
一键运行直扫基线,返回 JSON + Markdown 文本。
|
|
469
|
+
- 改进:使用统一的报告聚合与评分模块(oh_sec.report.build_json_and_markdown),
|
|
470
|
+
输出结构与多Agent Aggregator一致,便于评测与专家审阅。
|
|
471
|
+
"""
|
|
472
|
+
result = direct_scan(entry_path, languages=languages, exclude_dirs=exclude_dirs)
|
|
473
|
+
summary = result.get("summary", {})
|
|
474
|
+
issues = result.get("issues", [])
|
|
475
|
+
return build_json_and_markdown(
|
|
476
|
+
issues,
|
|
477
|
+
scanned_root=summary.get("scanned_root"),
|
|
478
|
+
scanned_files=summary.get("scanned_files"),
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def run_with_multi_agent(
|
|
483
|
+
entry_path: str,
|
|
484
|
+
languages: Optional[List[str]] = None,
|
|
485
|
+
llm_group: Optional[str] = None,
|
|
486
|
+
report_file: Optional[str] = None,
|
|
487
|
+
) -> str:
|
|
488
|
+
"""
|
|
489
|
+
使用单Agent逐条子任务分析模式运行(与 jarvis.jarvis_sec.__init__ 中保持一致)。
|
|
490
|
+
- 先执行本地直扫,生成候选问题
|
|
491
|
+
- 为每条候选创建一次普通Agent任务进行分析与验证
|
|
492
|
+
- 聚合为最终报告(JSON + Markdown)返回
|
|
493
|
+
|
|
494
|
+
其他:
|
|
495
|
+
- llm_group: 本次分析使用的模型组(仅透传给 Agent,不修改全局配置)
|
|
496
|
+
- report_file: JSONL 报告文件路径(可选,透传)
|
|
497
|
+
"""
|
|
498
|
+
from jarvis.jarvis_sec import run_security_analysis # 延迟导入,避免循环
|
|
499
|
+
return run_security_analysis(
|
|
500
|
+
entry_path,
|
|
501
|
+
languages=languages,
|
|
502
|
+
llm_group=llm_group,
|
|
503
|
+
report_file=report_file,
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
__all__ = [
|
|
508
|
+
"Issue",
|
|
509
|
+
"direct_scan",
|
|
510
|
+
"format_markdown_report",
|
|
511
|
+
"run_security_analysis_fast",
|
|
512
|
+
"run_with_multi_agent",
|
|
513
|
+
]
|
jarvis/jarvis_tools/registry.py
CHANGED
|
@@ -350,22 +350,28 @@ class ToolRegistry(OutputHandlerProtocol):
|
|
|
350
350
|
# 如果配置了中心工具仓库,将其添加到加载路径
|
|
351
351
|
central_repo = get_central_tool_repo()
|
|
352
352
|
if central_repo:
|
|
353
|
-
#
|
|
354
|
-
|
|
355
|
-
|
|
353
|
+
# 支持本地目录路径或Git仓库URL
|
|
354
|
+
expanded = os.path.expanduser(os.path.expandvars(central_repo))
|
|
355
|
+
if os.path.isdir(expanded):
|
|
356
|
+
# 直接使用本地目录(支持Git仓库的子目录)
|
|
357
|
+
tool_dirs.append(expanded)
|
|
358
|
+
else:
|
|
359
|
+
# 中心工具仓库存储在数据目录下的特定位置
|
|
360
|
+
central_repo_path = os.path.join(get_data_dir(), "central_tool_repo")
|
|
361
|
+
tool_dirs.append(central_repo_path)
|
|
356
362
|
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
363
|
+
# 确保中心工具仓库被克隆/更新
|
|
364
|
+
if not os.path.exists(central_repo_path):
|
|
365
|
+
try:
|
|
366
|
+
import subprocess
|
|
361
367
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
368
|
+
subprocess.run(
|
|
369
|
+
["git", "clone", central_repo, central_repo_path], check=True
|
|
370
|
+
)
|
|
371
|
+
except Exception as e:
|
|
372
|
+
PrettyOutput.print(
|
|
373
|
+
f"克隆中心工具仓库失败: {str(e)}", OutputType.ERROR
|
|
374
|
+
)
|
|
369
375
|
|
|
370
376
|
# --- 全局每日更新检查 ---
|
|
371
377
|
daily_check_git_updates(tool_dirs, "tools")
|
jarvis/jarvis_tools/sub_agent.py
CHANGED
|
@@ -4,7 +4,9 @@ sub_agent 工具
|
|
|
4
4
|
将子任务交给通用 Agent 执行,并返回执行结果。
|
|
5
5
|
|
|
6
6
|
约定:
|
|
7
|
-
- 必填参数:task, name,
|
|
7
|
+
- 必填参数:task, name, system_prompt, summary_prompt
|
|
8
|
+
- 可选参数:background
|
|
9
|
+
- 工具集:默认使用系统工具集(无需传入 use_tools)
|
|
8
10
|
- 继承父 Agent 的部分配置:model_group、input_handler、execute_tool_confirm、multiline_inputer、non_interactive、use_methodology、use_analysis;其他参数需显式提供
|
|
9
11
|
- 子Agent必须自动完成(auto_complete=True)且需要summary(need_summary=True)
|
|
10
12
|
"""
|
|
@@ -41,7 +43,7 @@ class SubAgentTool:
|
|
|
41
43
|
},
|
|
42
44
|
"background": {
|
|
43
45
|
"type": "string",
|
|
44
|
-
"description": "
|
|
46
|
+
"description": "任务背景与已知信息(可选,将与任务一并提供给子Agent)",
|
|
45
47
|
},
|
|
46
48
|
"system_prompt": {
|
|
47
49
|
"type": "string",
|
|
@@ -59,7 +61,6 @@ class SubAgentTool:
|
|
|
59
61
|
"required": [
|
|
60
62
|
"task",
|
|
61
63
|
"name",
|
|
62
|
-
"background",
|
|
63
64
|
"system_prompt",
|
|
64
65
|
"summary_prompt",
|
|
65
66
|
],
|
|
@@ -4,7 +4,8 @@ sub_code_agent 工具
|
|
|
4
4
|
将子任务交给 CodeAgent 执行,并返回执行结果。
|
|
5
5
|
|
|
6
6
|
约定:
|
|
7
|
-
-
|
|
7
|
+
- 必填参数:task
|
|
8
|
+
- 可选参数:background
|
|
8
9
|
- 不依赖父 Agent,所有配置使用系统默认与全局变量
|
|
9
10
|
- 子Agent必须自动完成(auto_complete=True)且需要summary(need_summary=True)
|
|
10
11
|
"""
|
|
@@ -196,7 +197,6 @@ class SubCodeAgentTool:
|
|
|
196
197
|
|
|
197
198
|
# 执行子任务(无提交信息前后缀)
|
|
198
199
|
ret = code_agent.run(enhanced_task, prefix="", suffix="")
|
|
199
|
-
stdout = ret if isinstance(ret, str) and ret else "任务执行完成"
|
|
200
200
|
|
|
201
201
|
# 主动清理内部 Agent,避免污染父Agent的全局状态
|
|
202
202
|
try:
|
|
@@ -207,7 +207,7 @@ class SubCodeAgentTool:
|
|
|
207
207
|
|
|
208
208
|
return {
|
|
209
209
|
"success": True,
|
|
210
|
-
"stdout":
|
|
210
|
+
"stdout": ret,
|
|
211
211
|
"stderr": "",
|
|
212
212
|
}
|
|
213
213
|
except Exception as e:
|
jarvis/jarvis_utils/config.py
CHANGED
|
@@ -700,6 +700,18 @@ def get_tool_filter_threshold() -> int:
|
|
|
700
700
|
"""
|
|
701
701
|
return int(GLOBAL_CONFIG_DATA.get("JARVIS_TOOL_FILTER_THRESHOLD", 30))
|
|
702
702
|
|
|
703
|
+
def get_plan_max_depth() -> int:
|
|
704
|
+
"""
|
|
705
|
+
获取任务规划的最大层数。
|
|
706
|
+
|
|
707
|
+
返回:
|
|
708
|
+
int: 最大规划层数,默认为2(可通过 GLOBAL_CONFIG_DATA['JARVIS_PLAN_MAX_DEPTH'] 配置)
|
|
709
|
+
"""
|
|
710
|
+
try:
|
|
711
|
+
return int(GLOBAL_CONFIG_DATA.get("JARVIS_PLAN_MAX_DEPTH", 2))
|
|
712
|
+
except Exception:
|
|
713
|
+
return 2
|
|
714
|
+
|
|
703
715
|
|
|
704
716
|
def get_auto_summary_rounds() -> int:
|
|
705
717
|
"""
|
|
@@ -709,9 +721,9 @@ def get_auto_summary_rounds() -> int:
|
|
|
709
721
|
int: 轮次阈值,默认20
|
|
710
722
|
"""
|
|
711
723
|
try:
|
|
712
|
-
return int(GLOBAL_CONFIG_DATA.get("JARVIS_AUTO_SUMMARY_ROUNDS",
|
|
724
|
+
return int(GLOBAL_CONFIG_DATA.get("JARVIS_AUTO_SUMMARY_ROUNDS", 50))
|
|
713
725
|
except Exception:
|
|
714
|
-
return
|
|
726
|
+
return 50
|
|
715
727
|
|
|
716
728
|
|
|
717
729
|
|