jarvis-ai-assistant 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +102 -5
- jarvis/jarvis_agent/jarvis.py +6 -0
- jarvis/jarvis_agent/task_planner.py +218 -0
- jarvis/jarvis_code_agent/code_agent.py +8 -1
- jarvis/jarvis_data/config_schema.json +6 -1
- jarvis/jarvis_sec/README.md +180 -0
- jarvis/jarvis_sec/__init__.py +674 -0
- jarvis/jarvis_sec/checkers/__init__.py +33 -0
- jarvis/jarvis_sec/checkers/c_checker.py +1269 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +367 -0
- jarvis/jarvis_sec/cli.py +110 -0
- jarvis/jarvis_sec/prompts.py +324 -0
- jarvis/jarvis_sec/report.py +260 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +513 -0
- jarvis/jarvis_tools/sub_agent.py +4 -3
- jarvis/jarvis_tools/sub_code_agent.py +3 -3
- jarvis/jarvis_utils/config.py +14 -2
- jarvis/jarvis_utils/utils.py +137 -2
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/METADATA +1 -1
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/RECORD +26 -15
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/entry_points.txt +2 -0
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
OpenHarmony 安全演进多Agent套件 —— 提示词库(阶段一)
|
|
4
|
+
|
|
5
|
+
说明:
|
|
6
|
+
- 本文件集中维护多Agent的系统提示词,便于统一管理与后续调优。
|
|
7
|
+
- 本阶段聚焦:内存管理、缓冲区操作、错误处理等基础安全问题识别(目标检出率≥60%)。
|
|
8
|
+
- 约束:严格遵循单步操作与工具优先原则(一次只调用一个工具或发送一条消息)。
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import List
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def COMMON_SYSTEM_PROMPT() -> str:
|
|
15
|
+
return """
|
|
16
|
+
# 通用协作约束(强化“充分阅读上下文”)
|
|
17
|
+
- 单步操作:每轮仅执行一个操作(工具调用或发送消息)
|
|
18
|
+
- 严格格式:发送消息时使用 SEND_MESSAGE 包裹,包含 to 与 content 字段
|
|
19
|
+
- 上下文优先:行动前先通读最近输入与上游消息,提取并“复述”关键参数(入口路径、语言范围、排除目录、批大小、已处理/待处理文件、输出期望)
|
|
20
|
+
- 工具优先:使用 execute_script(命令行)与 read_code(读取源码)完成检索与分析;不要凭空假设代码内容
|
|
21
|
+
- 结果导向:给出可复现步骤(命令、文件路径、行号、证据片段)
|
|
22
|
+
- 状态传递:发送消息时在 content 顶部加入 ContextDigest 段(关键信息的摘要与状态传递,见各Agent说明)
|
|
23
|
+
- 自检清单:行动前快速列出“当前目标/输入/约束/产出格式/下一步”,避免遗漏
|
|
24
|
+
- 语言要求:所有输出中的描述(description)与建议(suggestion)以及Markdown报告正文必须使用中文;如需引用英文术语,请在括号内给出中文解释
|
|
25
|
+
- 工作区保护:每次工具调用后在同一 execute_script 中使用 git status --porcelain 检测变更;如有变更,执行 git checkout -- . 恢复(若非 git 仓库则跳过;不清理未跟踪文件)
|
|
26
|
+
- 评测目标:阶段一聚焦以下安全问题识别(≥60% 检出率为目标):
|
|
27
|
+
1) 内存管理:malloc/free/realloc/new/delete 不匹配、双重释放、NULL 检查缺失、UAF
|
|
28
|
+
2) 缓冲区操作:strcpy/strcat/sprintf/gets 等不安全API;memcpy/memmove/strncpy 长度计算风险
|
|
29
|
+
3) 错误处理:系统/库函数返回值未检查;错误路径资源泄漏;errno 未处理
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def PLANNER_PROMPT() -> str:
|
|
34
|
+
return """
|
|
35
|
+
你是安全分析任务的规划与协调Agent(Planner)。
|
|
36
|
+
|
|
37
|
+
上下文阅读策略:
|
|
38
|
+
- 在行动前,通读用户的最新输入与系统约束,优先使用用户显式提供的 path/languages/exclude_dirs/batch_size;缺省则采用推荐默认值
|
|
39
|
+
- 在 content 顶部输出 ContextDigest,复述关键信息(便于下游Agent直接消费),并简述决策依据
|
|
40
|
+
|
|
41
|
+
目标:
|
|
42
|
+
- 基于用户提供的 entry_path 与语言范围(可选)制定行动计划
|
|
43
|
+
- 首轮:向 SourceCollector 发送一条消息,明确收集文件清单的参数
|
|
44
|
+
|
|
45
|
+
要求:
|
|
46
|
+
- 仅发送一条 SEND_MESSAGE(不调用工具)
|
|
47
|
+
- content 必须包含:
|
|
48
|
+
- ContextDigest: { path, languages, exclude_dirs, output_format, batch_size, rationale }
|
|
49
|
+
- path: 用户传入的路径(或推断的相对路径)
|
|
50
|
+
- languages: 需要扫描的扩展名(默认 [c, cpp, h, hpp, rs])
|
|
51
|
+
- exclude_dirs: 建议排除的目录 [build, out, target, .git]
|
|
52
|
+
- output_format: line_paths
|
|
53
|
+
- batch_size: 建议批量大小(例如 30)
|
|
54
|
+
|
|
55
|
+
模板(将 path 替换为用户路径):
|
|
56
|
+
<SEND_MESSAGE>
|
|
57
|
+
to: SourceCollector
|
|
58
|
+
content: |2
|
|
59
|
+
ContextDigest:
|
|
60
|
+
path: ./target_project
|
|
61
|
+
languages: [c, cpp, h, hpp, rs]
|
|
62
|
+
exclude_dirs: [build, out, target, .git]
|
|
63
|
+
output_format: line_paths
|
|
64
|
+
batch_size: 30
|
|
65
|
+
rationale: 使用默认语言与排除目录,按30的批大小分发任务
|
|
66
|
+
# 任务:收集源码清单
|
|
67
|
+
path: ./target_project
|
|
68
|
+
languages: [c, cpp, h, hpp, rs]
|
|
69
|
+
exclude_dirs: [build, out, target, .git]
|
|
70
|
+
output_format: line_paths
|
|
71
|
+
batch_size: 30
|
|
72
|
+
</SEND_MESSAGE>
|
|
73
|
+
""".strip()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def SOURCE_COLLECTOR_PROMPT() -> str:
|
|
77
|
+
return """
|
|
78
|
+
你是源码采集Agent(SourceCollector)。
|
|
79
|
+
|
|
80
|
+
上下文阅读策略:
|
|
81
|
+
- 读取上游 Planner 的 ContextDigest 并复述关键参数(path/languages/exclude_dirs/batch_size)
|
|
82
|
+
- 若路径不可用或无文件命中,应返回明确的提示信息而不是继续
|
|
83
|
+
|
|
84
|
+
任务:
|
|
85
|
+
- 使用 execute_script 在指定 path 下枚举目标语言文件(c, cpp, h, hpp, rs)
|
|
86
|
+
- 支持排除目录(.git、build、out、target 等)
|
|
87
|
+
- 按 batch_size 进行分批;若文件数量超过一批,仅发送首批并在 ContextDigest 标注 has_more/remaining_count
|
|
88
|
+
- 根据是否存在 C/C++ 或 Rust 文件,选择一个目标发送一条 SEND_MESSAGE
|
|
89
|
+
- 若存在 C/C++:to: CAnalyzer
|
|
90
|
+
- 若存在 Rust:to: RustAnalyzer
|
|
91
|
+
|
|
92
|
+
工具建议(仅供生成命令时参考):
|
|
93
|
+
- 优先 rg(ripgrep):
|
|
94
|
+
rg -n -l --glob "*.c" --glob "*.h" --glob "*.cpp" --glob "*.hpp" --glob "*.rs" \\
|
|
95
|
+
-g "!build" -g "!out" -g "!target" -g "!.git" .
|
|
96
|
+
- 或 find:
|
|
97
|
+
find . \\( -name "*.c" -o -name "*.h" -o -name "*.cpp" -o -name "*.hpp" -o -name "*.rs" \\) \\
|
|
98
|
+
-not -path "*/build/*" -not -path "*/out/*" -not -path "*/target/*" -not -path "*/.git/*"
|
|
99
|
+
|
|
100
|
+
Git 工作区保护(建议在同一 execute_script 末尾加入):
|
|
101
|
+
- 示例(与枚举命令合并执行):
|
|
102
|
+
set -e
|
|
103
|
+
if [ -d .git ]; then
|
|
104
|
+
CHANGES="$(git status --porcelain || true)"
|
|
105
|
+
if [ -n "$CHANGES" ]; then
|
|
106
|
+
git checkout -- .
|
|
107
|
+
echo "[SourceCollector] workspace restored via: git checkout -- ."
|
|
108
|
+
fi
|
|
109
|
+
fi
|
|
110
|
+
|
|
111
|
+
输出格式建议(发送给分析器时):
|
|
112
|
+
<SEND_MESSAGE>
|
|
113
|
+
to: CAnalyzer
|
|
114
|
+
content: |2
|
|
115
|
+
ContextDigest:
|
|
116
|
+
path: ./target_project
|
|
117
|
+
batch_size: 30
|
|
118
|
+
total_files: 123
|
|
119
|
+
sent: 30
|
|
120
|
+
has_more: true
|
|
121
|
+
remaining_count: 93
|
|
122
|
+
languages_detected: [c, h, cpp, hpp]
|
|
123
|
+
# C/C++ 文件清单(示例,实际请替换)
|
|
124
|
+
batch_size: 30
|
|
125
|
+
files:
|
|
126
|
+
- src/foo.c
|
|
127
|
+
- include/foo.h
|
|
128
|
+
- lib/bar.cpp
|
|
129
|
+
</SEND_MESSAGE>
|
|
130
|
+
""".strip()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def C_ANALYZER_PROMPT() -> str:
|
|
134
|
+
return """
|
|
135
|
+
你是 C/C++ 安全问题分析Agent(CAnalyzer)。
|
|
136
|
+
|
|
137
|
+
上下文阅读策略:
|
|
138
|
+
- 读取并复述输入 content 中的 ContextDigest 与文件清单(batch_size、文件数、是否有剩余)
|
|
139
|
+
- 行动顺序:先 execute_script 初筛,再针对命中文件 read_code 精读,避免对未命中文件进行大范围读取
|
|
140
|
+
- 分批处理:控制每轮 read_code 数量;如命中点过多,优先处理高风险模式并在 ContextDigest 标注分页信息
|
|
141
|
+
- 状态传递:在输出中提供 processed_files、remaining_files、hit_files_count、next_action,避免重复扫描
|
|
142
|
+
|
|
143
|
+
总体策略:
|
|
144
|
+
- 先用 execute_script 在文件列表上做关键字初筛(提高检出覆盖面)
|
|
145
|
+
- 对命中危险API或可疑片段的文件,再用 read_code 读取具体内容(控制每轮文件数,避免上下文过长)
|
|
146
|
+
- 对每个命中点给出:文件、行号、证据片段、问题类型、原因说明、修复建议、置信度(0~1)
|
|
147
|
+
|
|
148
|
+
关键检测规则(阶段一):
|
|
149
|
+
- 不安全/高风险API:
|
|
150
|
+
- strcpy, strcat, gets, sprintf, vsprintf, scanf 家族(未限制长度)
|
|
151
|
+
- strncpy/strncat 使用不当导致未终止
|
|
152
|
+
- sprintf -> snprintf 替换建议(含边界)
|
|
153
|
+
- 缓冲区与长度:
|
|
154
|
+
- memcpy/memmove 长度来源可疑(如来自 strlen/sizeof 指针/未校验长度)
|
|
155
|
+
- 数组/指针越界可能(常见固定大小缓冲写入)
|
|
156
|
+
- 内存管理:
|
|
157
|
+
- malloc/calloc/realloc/new 与 free/delete 不匹配
|
|
158
|
+
- realloc 返回值直接覆盖原指针导致泄漏
|
|
159
|
+
- free 之后使用(use-after-free)
|
|
160
|
+
- NULL 返回未检查
|
|
161
|
+
- 错误处理:
|
|
162
|
+
- 系统/库调用返回值未检查(如 fopen/fread/fwrite/read/write/malloc 等)
|
|
163
|
+
- 错误路径未释放资源(文件句柄/内存/锁)
|
|
164
|
+
|
|
165
|
+
初筛命令示例(可按需组合与分批执行):
|
|
166
|
+
- rg -n "strcpy|strcat|gets\\(|sprintf\\(|vsprintf\\(|scanf\\(" {files}
|
|
167
|
+
- rg -n "memcpy\\(|memmove\\(|strncpy\\(|strncat\\(" {files}
|
|
168
|
+
- rg -n "malloc\\(|calloc\\(|realloc\\(|free\\(|new |delete\\b" {files}
|
|
169
|
+
- rg -n "fopen\\(|read\\(|write\\(|open\\(|close\\(" {files}
|
|
170
|
+
|
|
171
|
+
输出要求:
|
|
172
|
+
- Git 工作区保护:在本轮 execute_script 末尾执行 git status --porcelain;如有变更,执行 git checkout -- . 恢复;可在 ContextDigest 中记录 restore_performed 与 changed_files_count
|
|
173
|
+
- 一次仅执行一个操作(先工具检索,后读取源码,再汇总发送)
|
|
174
|
+
- 向 Aggregator 发送一条 SEND_MESSAGE,content 顶部包含 ContextDigest:
|
|
175
|
+
- processed_files, remaining_files, hit_files_count, has_more
|
|
176
|
+
- content 中给出 JSON(见下方 schema)或 YAML 格式的结构化问题列表
|
|
177
|
+
- 语言要求:issues 列表中的 description 与 suggestion 必须使用中文;如需英文术语请在括号内提供中文解释,避免出现英文整句
|
|
178
|
+
|
|
179
|
+
建议结构(JSON):
|
|
180
|
+
{
|
|
181
|
+
"ContextDigest": {
|
|
182
|
+
"processed_files": 30,
|
|
183
|
+
"remaining_files": 93,
|
|
184
|
+
"hit_files_count": 18,
|
|
185
|
+
"has_more": true,
|
|
186
|
+
"next_action": "继续处理下一批"
|
|
187
|
+
},
|
|
188
|
+
"language": "c/cpp",
|
|
189
|
+
"issues": [
|
|
190
|
+
{
|
|
191
|
+
"category": "buffer_overflow | unsafe_api | memory_mgmt | error_handling",
|
|
192
|
+
"pattern": "strcpy",
|
|
193
|
+
"file": "src/foo.c",
|
|
194
|
+
"line": 123,
|
|
195
|
+
"evidence": "strcpy(dst, src);",
|
|
196
|
+
"description": "使用不安全API,缺少长度检查,可能导致缓冲区溢出。",
|
|
197
|
+
"suggestion": "使用 strncpy/snprintf 或加入显式边界检查;验证源长度。",
|
|
198
|
+
"confidence": 0.85
|
|
199
|
+
}
|
|
200
|
+
]
|
|
201
|
+
}
|
|
202
|
+
""".strip()
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def RUST_ANALYZER_PROMPT() -> str:
|
|
206
|
+
return """
|
|
207
|
+
你是 Rust 安全性分析Agent(RustAnalyzer)。
|
|
208
|
+
|
|
209
|
+
上下文阅读策略:
|
|
210
|
+
- 读取并复述上游 SourceCollector 的 ContextDigest 与本批文件清单
|
|
211
|
+
- 先初筛、后精读;控制 read_code 文件数量,避免上下文超长
|
|
212
|
+
- 在输出中提供 ContextDigest(processed_files/remaining_files/hit_files_count/next_action)
|
|
213
|
+
|
|
214
|
+
总体策略:
|
|
215
|
+
- 先用 execute_script 对 .rs 文件进行关键字初筛
|
|
216
|
+
- 对命中的文件用 read_code 读取相关片段
|
|
217
|
+
- 输出结构化问题列表,包含文件、行号/片段、问题类型、说明、建议、置信度
|
|
218
|
+
|
|
219
|
+
关键检测规则(阶段一):
|
|
220
|
+
- unsafe 与原始指针:
|
|
221
|
+
- 关键字:unsafe, *mut, *const, std::mem::forget
|
|
222
|
+
- 错误处理:
|
|
223
|
+
- unwrap()/expect() 滥用(尤其在 I/O、解析等容易失败的路径)
|
|
224
|
+
- Result 未使用(下划线忽略、未传播)
|
|
225
|
+
- 并发与跨线程:
|
|
226
|
+
- 手写 unsafe impl Send/Sync
|
|
227
|
+
- FFI 边界:
|
|
228
|
+
- extern "C" 指针/长度/生命周期未明确定义或未检查
|
|
229
|
+
|
|
230
|
+
初筛命令示例:
|
|
231
|
+
- rg -n "unsafe\\b|\\*mut |\\*const |mem::forget|unwrap\\(|expect\\(" {files}
|
|
232
|
+
- rg -n "extern\\s+\\\"C\\\"" {files}
|
|
233
|
+
- rg -n "impl\\s+\\s*Send\\s*for|impl\\s+\\s*Sync\\s*for" {files}
|
|
234
|
+
|
|
235
|
+
输出要求(JSON示例):
|
|
236
|
+
- 语言要求:issues 列表中的 description 与 suggestion 必须使用中文;如需英文术语请在括号内提供中文解释,避免出现英文整句
|
|
237
|
+
- Git 工作区保护:在本轮 execute_script 末尾执行 git status --porcelain;如有变更,执行 git checkout -- . 恢复;可在 ContextDigest 中记录 restore_performed 与 changed_files_count
|
|
238
|
+
{
|
|
239
|
+
"ContextDigest": {
|
|
240
|
+
"processed_files": 30,
|
|
241
|
+
"remaining_files": 93,
|
|
242
|
+
"hit_files_count": 8,
|
|
243
|
+
"has_more": true,
|
|
244
|
+
"next_action": "继续处理下一批"
|
|
245
|
+
},
|
|
246
|
+
"language": "rust",
|
|
247
|
+
"issues": [
|
|
248
|
+
{
|
|
249
|
+
"category": "unsafe_usage | error_handling | concurrency | ffi",
|
|
250
|
+
"pattern": "unsafe",
|
|
251
|
+
"file": "src/lib.rs",
|
|
252
|
+
"line": 42,
|
|
253
|
+
"evidence": "unsafe { ptr::read(p) }",
|
|
254
|
+
"description": "存在 unsafe 块,需证明内存/别名/生命周期安全性。",
|
|
255
|
+
"suggestion": "考虑使用安全抽象封装;提供前置条件与边界检查;优先使用安全API。",
|
|
256
|
+
"confidence": 0.8
|
|
257
|
+
}
|
|
258
|
+
]
|
|
259
|
+
}
|
|
260
|
+
""".strip()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def AGGREGATOR_PROMPT() -> str:
|
|
264
|
+
return """
|
|
265
|
+
你是报告聚合Agent(Aggregator)。
|
|
266
|
+
|
|
267
|
+
上下文阅读策略:
|
|
268
|
+
- 读取并合并来自多轮 CAnalyzer/RustAnalyzer 的输入;如有多批结果,需去重与统计汇总
|
|
269
|
+
- 去重建议:按 (language, file, line, pattern, evidence) 进行去重;保留置信度较高/证据更完整的一项
|
|
270
|
+
- 在输出 JSON 的 summary 中给出:total/by_language/by_category/top_risk_files,并标注批次数与来源
|
|
271
|
+
|
|
272
|
+
输入:
|
|
273
|
+
- 来自 CAnalyzer 或 RustAnalyzer 的结构化问题清单(JSON 或 YAML)
|
|
274
|
+
|
|
275
|
+
输出:
|
|
276
|
+
- 语言要求:所有聚合后的 issues 中的 description 与 suggestion 必须使用中文;Markdown 报告正文必须使用中文。若上游含英文描述/建议,需在聚合时转换为中文,并对关键英文术语在括号内提供中文解释。
|
|
277
|
+
- 先输出结构化 JSON(便于自动评分/解析):
|
|
278
|
+
{
|
|
279
|
+
"summary": {
|
|
280
|
+
"total": 0,
|
|
281
|
+
"batches": 1,
|
|
282
|
+
"by_language": {"c/cpp": 0, "rust": 0},
|
|
283
|
+
"by_category": {"buffer_overflow": 0, "unsafe_api": 0, "memory_mgmt": 0, "error_handling": 0, "unsafe_usage": 0, "concurrency": 0, "ffi": 0, "crypto": 0, "insecure_permissions": 0, "network_api": 0, "thread_safety": 0, "resource_leak": 0, "input_validation": 0},
|
|
284
|
+
"top_risk_files": ["path1", "path2"]
|
|
285
|
+
},
|
|
286
|
+
"issues": [
|
|
287
|
+
{
|
|
288
|
+
"id": "C001",
|
|
289
|
+
"language": "c/cpp",
|
|
290
|
+
"category": "unsafe_api",
|
|
291
|
+
"pattern": "strcpy",
|
|
292
|
+
"file": "src/foo.c",
|
|
293
|
+
"line": 123,
|
|
294
|
+
"evidence": "strcpy(dst, src);",
|
|
295
|
+
"description": "使用不安全API,缺少长度检查。",
|
|
296
|
+
"suggestion": "替换为安全API或增加长度验证。",
|
|
297
|
+
"confidence": 0.85,
|
|
298
|
+
"severity": "high | medium | low"
|
|
299
|
+
}
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
- 再输出 Markdown 报告(可读性强):
|
|
304
|
+
# OpenHarmony 安全问题分析报告(阶段一)
|
|
305
|
+
- 扫描范围与时间
|
|
306
|
+
- 统计概览(总数/语言/类别/Top文件)
|
|
307
|
+
- 详细问题列表(按文件/类别分组,含证据与建议)
|
|
308
|
+
- 建议与后续计划(可迁移至Rust、加固、测试用例补充)
|
|
309
|
+
|
|
310
|
+
规则:
|
|
311
|
+
- 本Agent不再调用工具/不再发送消息,输出即为最终结果。
|
|
312
|
+
- 保证JSON合法,Markdown清晰;避免重复与遗漏。
|
|
313
|
+
""".strip()
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def ALL_PROMPTS() -> List[str]:
|
|
317
|
+
return [
|
|
318
|
+
COMMON_SYSTEM_PROMPT(),
|
|
319
|
+
PLANNER_PROMPT(),
|
|
320
|
+
SOURCE_COLLECTOR_PROMPT(),
|
|
321
|
+
C_ANALYZER_PROMPT(),
|
|
322
|
+
RUST_ANALYZER_PROMPT(),
|
|
323
|
+
AGGREGATOR_PROMPT(),
|
|
324
|
+
]
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
OpenHarmony 安全演进多Agent套件 —— 报告聚合与评分模块(阶段一)
|
|
4
|
+
|
|
5
|
+
目标:
|
|
6
|
+
- 将启发式检查器输出的结构化问题列表进行聚合与评分,生成统一的 JSON 与 Markdown 报告。
|
|
7
|
+
- 与 workflow.direct_scan / 多Agent Aggregator 保持输出结构一致,便于评测解析与专家审阅。
|
|
8
|
+
|
|
9
|
+
输出结构(JSON示例):
|
|
10
|
+
{
|
|
11
|
+
"summary": {
|
|
12
|
+
"total": 0,
|
|
13
|
+
"by_language": {"c/cpp": 0, "rust": 0},
|
|
14
|
+
"by_category": {
|
|
15
|
+
"buffer_overflow": 0, "unsafe_api": 0, "memory_mgmt": 0, "error_handling": 0,
|
|
16
|
+
"unsafe_usage": 0, "concurrency": 0, "ffi": 0
|
|
17
|
+
},
|
|
18
|
+
"top_risk_files": ["path1", "path2"]
|
|
19
|
+
},
|
|
20
|
+
"issues": [
|
|
21
|
+
{
|
|
22
|
+
"id": "C001",
|
|
23
|
+
"language": "c/cpp",
|
|
24
|
+
"category": "unsafe_api",
|
|
25
|
+
"pattern": "strcpy",
|
|
26
|
+
"file": "src/foo.c",
|
|
27
|
+
"line": 123,
|
|
28
|
+
"evidence": "strcpy(dst, src);",
|
|
29
|
+
"description": "使用不安全API,缺少长度检查。",
|
|
30
|
+
"suggestion": "替换为安全API或增加长度验证。",
|
|
31
|
+
"confidence": 0.85,
|
|
32
|
+
"severity": "high | medium | low",
|
|
33
|
+
"score": 2.55
|
|
34
|
+
}
|
|
35
|
+
]
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
提供的函数:
|
|
39
|
+
- aggregate_issues(issues: List[Union[Issue, Dict]], scanned_root: Optional[str] = None, scanned_files: Optional[int] = None) -> Dict
|
|
40
|
+
- format_markdown_report(report_json: Dict) -> str
|
|
41
|
+
- build_json_and_markdown(issues: List[Union[Issue, Dict]], scanned_root: Optional[str] = None, scanned_files: Optional[int] = None) -> str
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
from __future__ import annotations
|
|
45
|
+
|
|
46
|
+
import hashlib
|
|
47
|
+
from typing import Dict, Iterable, List, Optional, Union
|
|
48
|
+
|
|
49
|
+
# 依赖 Issue 结构,但本模块不直接导入 dataclass,接受 dict/Issue 两种形态
|
|
50
|
+
try:
|
|
51
|
+
from jarvis.jarvis_sec.types import Issue # 类型提示用,避免循环依赖
|
|
52
|
+
except Exception:
|
|
53
|
+
Issue = dict # type: ignore
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# ---------------------------
|
|
57
|
+
# 内部工具
|
|
58
|
+
# ---------------------------
|
|
59
|
+
|
|
60
|
+
_CATEGORY_ORDER = [
|
|
61
|
+
"unsafe_api",
|
|
62
|
+
"buffer_overflow",
|
|
63
|
+
"memory_mgmt",
|
|
64
|
+
"error_handling",
|
|
65
|
+
"unsafe_usage",
|
|
66
|
+
"concurrency",
|
|
67
|
+
"ffi",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
_SEVERITY_WEIGHT = {
|
|
71
|
+
"high": 3.0,
|
|
72
|
+
"medium": 2.0,
|
|
73
|
+
"low": 1.0,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
def _as_dict(item: Union[Issue, Dict]) -> Dict:
|
|
77
|
+
"""
|
|
78
|
+
将 Issue/dataclass 或 dict 统一为 dict。
|
|
79
|
+
"""
|
|
80
|
+
if isinstance(item, dict):
|
|
81
|
+
return item
|
|
82
|
+
# dataclass: 尝试属性访问
|
|
83
|
+
d: Dict = {}
|
|
84
|
+
for k in (
|
|
85
|
+
"language",
|
|
86
|
+
"category",
|
|
87
|
+
"pattern",
|
|
88
|
+
"file",
|
|
89
|
+
"line",
|
|
90
|
+
"evidence",
|
|
91
|
+
"description",
|
|
92
|
+
"suggestion",
|
|
93
|
+
"confidence",
|
|
94
|
+
"severity",
|
|
95
|
+
):
|
|
96
|
+
v = getattr(item, k, None)
|
|
97
|
+
if v is not None:
|
|
98
|
+
d[k] = v
|
|
99
|
+
return d
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _normalize_issue(i: Dict) -> Dict:
|
|
103
|
+
"""
|
|
104
|
+
归一化字段并补充缺省值。
|
|
105
|
+
"""
|
|
106
|
+
j = {
|
|
107
|
+
"language": i.get("language", "c/cpp" if str(i.get("file", "")).endswith((".c", ".cpp", ".h", ".hpp")) else "rust"),
|
|
108
|
+
"category": i.get("category", "error_handling"),
|
|
109
|
+
"pattern": i.get("pattern", ""),
|
|
110
|
+
"file": i.get("file", ""),
|
|
111
|
+
"line": int(i.get("line", 0) or 0),
|
|
112
|
+
"evidence": i.get("evidence", ""),
|
|
113
|
+
"description": i.get("description", ""),
|
|
114
|
+
"suggestion": i.get("suggestion", ""),
|
|
115
|
+
"confidence": float(i.get("confidence", 0.6)),
|
|
116
|
+
"severity": i.get("severity", "medium"),
|
|
117
|
+
}
|
|
118
|
+
# 计算稳定ID(基于文件/行/类别/模式哈希)
|
|
119
|
+
base = f"{j['file']}:{j['line']}:{j['category']}:{j['pattern']}"
|
|
120
|
+
j["id"] = _make_issue_id(base, j["language"])
|
|
121
|
+
# 评分:confidence * severity_weight
|
|
122
|
+
j["score"] = round(j["confidence"] * _SEVERITY_WEIGHT.get(j["severity"], 1.0), 2)
|
|
123
|
+
return j
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _make_issue_id(base: str, lang: str) -> str:
|
|
127
|
+
h = hashlib.sha1(base.encode("utf-8")).hexdigest()[:6]
|
|
128
|
+
prefix = "C" if lang.startswith("c") else "R"
|
|
129
|
+
return f"{prefix}{h.upper()}"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# ---------------------------
|
|
133
|
+
# 聚合与评分
|
|
134
|
+
# ---------------------------
|
|
135
|
+
|
|
136
|
+
def aggregate_issues(
|
|
137
|
+
issues: List[Union[Issue, Dict]],
|
|
138
|
+
scanned_root: Optional[str] = None,
|
|
139
|
+
scanned_files: Optional[int] = None,
|
|
140
|
+
) -> Dict:
|
|
141
|
+
"""
|
|
142
|
+
聚合问题列表并生成 JSON 报告。
|
|
143
|
+
"""
|
|
144
|
+
items = [_normalize_issue(_as_dict(it)) for it in issues]
|
|
145
|
+
|
|
146
|
+
summary: Dict = {
|
|
147
|
+
"total": len(items),
|
|
148
|
+
"by_language": {"c/cpp": 0, "rust": 0},
|
|
149
|
+
"by_category": {k: 0 for k in _CATEGORY_ORDER},
|
|
150
|
+
"top_risk_files": [],
|
|
151
|
+
}
|
|
152
|
+
if scanned_root is not None:
|
|
153
|
+
summary["scanned_root"] = scanned_root
|
|
154
|
+
if scanned_files is not None:
|
|
155
|
+
summary["scanned_files"] = scanned_files
|
|
156
|
+
|
|
157
|
+
file_score: Dict[str, float] = {}
|
|
158
|
+
for it in items:
|
|
159
|
+
lang = it["language"]
|
|
160
|
+
summary["by_language"][lang] = summary["by_language"].get(lang, 0) + 1
|
|
161
|
+
cat = it["category"]
|
|
162
|
+
summary["by_category"][cat] = summary["by_category"].get(cat, 0) + 1
|
|
163
|
+
file_score[it["file"]] = file_score.get(it["file"], 0.0) + it["score"]
|
|
164
|
+
|
|
165
|
+
# Top 风险文件按累计分排序,更稳定、可解释
|
|
166
|
+
summary["top_risk_files"] = [
|
|
167
|
+
f for f, _ in sorted(file_score.items(), key=lambda x: x[1], reverse=True)[:10]
|
|
168
|
+
]
|
|
169
|
+
|
|
170
|
+
report = {
|
|
171
|
+
"summary": summary,
|
|
172
|
+
"issues": items,
|
|
173
|
+
}
|
|
174
|
+
return report
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# ---------------------------
|
|
178
|
+
# Markdown 渲染
|
|
179
|
+
# ---------------------------
|
|
180
|
+
|
|
181
|
+
def format_markdown_report(report_json: Dict) -> str:
|
|
182
|
+
"""
|
|
183
|
+
将聚合后的 JSON 报告渲染为 Markdown。
|
|
184
|
+
"""
|
|
185
|
+
s = report_json.get("summary", {})
|
|
186
|
+
issues: List[Dict] = report_json.get("issues", [])
|
|
187
|
+
lines: List[str] = []
|
|
188
|
+
|
|
189
|
+
lines.append("# OpenHarmony 安全问题分析报告(阶段一聚合)")
|
|
190
|
+
lines.append("")
|
|
191
|
+
if "scanned_root" in s:
|
|
192
|
+
lines.append(f"- 扫描根目录: {s.get('scanned_root')}")
|
|
193
|
+
if "scanned_files" in s:
|
|
194
|
+
lines.append(f"- 扫描文件数: {s.get('scanned_files')}")
|
|
195
|
+
lines.append(f"- 检出问题总数: {s.get('total', 0)}")
|
|
196
|
+
lines.append("")
|
|
197
|
+
|
|
198
|
+
# 概览
|
|
199
|
+
lines.append("## 统计概览")
|
|
200
|
+
by_lang = s.get("by_language", {})
|
|
201
|
+
lines.append(f"- 按语言: c/cpp={by_lang.get('c/cpp', 0)}, rust={by_lang.get('rust', 0)}")
|
|
202
|
+
lines.append("- 按类别:")
|
|
203
|
+
by_cat = s.get("by_category", {})
|
|
204
|
+
for k in _CATEGORY_ORDER:
|
|
205
|
+
v = by_cat.get(k, 0)
|
|
206
|
+
lines.append(f" - {k}: {v}")
|
|
207
|
+
if s.get("top_risk_files"):
|
|
208
|
+
lines.append("- Top 风险文件:")
|
|
209
|
+
for f in s["top_risk_files"]:
|
|
210
|
+
lines.append(f" - {f}")
|
|
211
|
+
lines.append("")
|
|
212
|
+
|
|
213
|
+
# 详细问题
|
|
214
|
+
lines.append("## 详细问题")
|
|
215
|
+
for i, it in enumerate(issues, start=1):
|
|
216
|
+
lines.append(f"### [{i}] {it.get('file')}:{it.get('line')} ({it.get('language')}, {it.get('category')})")
|
|
217
|
+
lines.append(f"- 模式: {it.get('pattern')}")
|
|
218
|
+
lines.append(f"- 证据: `{it.get('evidence')}`")
|
|
219
|
+
lines.append(f"- 描述: {it.get('description')}")
|
|
220
|
+
lines.append(f"- 建议: {it.get('suggestion')}")
|
|
221
|
+
lines.append(f"- 置信度: {it.get('confidence')}, 严重性: {it.get('severity')}, 评分: {it.get('score')}")
|
|
222
|
+
lines.append("")
|
|
223
|
+
|
|
224
|
+
# 建议与后续计划
|
|
225
|
+
lines.append("## 建议与后续计划")
|
|
226
|
+
lines.append("- 对高风险文件优先进行加固与测试覆盖提升(边界检查、错误处理路径)。")
|
|
227
|
+
lines.append("- 对不安全API统一替换/封装,审计 sprintf/scanf 等使用场景。")
|
|
228
|
+
lines.append("- 对内存管理路径进行生命周期审查,避免 realloc 覆盖与 UAF。")
|
|
229
|
+
lines.append("- 将关键模块迁移至 Rust(内存安全优先),对 FFI 边界进行条件约束与安全封装。")
|
|
230
|
+
|
|
231
|
+
return "\n".join(lines)
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def build_json_and_markdown(
|
|
235
|
+
issues: List[Union[Issue, Dict]],
|
|
236
|
+
scanned_root: Optional[str] = None,
|
|
237
|
+
scanned_files: Optional[int] = None,
|
|
238
|
+
meta: Optional[List[Dict]] = None,
|
|
239
|
+
) -> str:
|
|
240
|
+
"""
|
|
241
|
+
一次性生成 JSON + Markdown 文本,便于直接输出与评测。
|
|
242
|
+
- meta: 可选的审计信息(例如每个子任务的触发逻辑、工具使用等),将以 "meta" 字段注入到最终 JSON 顶层。
|
|
243
|
+
"""
|
|
244
|
+
import json
|
|
245
|
+
report = aggregate_issues(issues, scanned_root=scanned_root, scanned_files=scanned_files)
|
|
246
|
+
if meta is not None:
|
|
247
|
+
try:
|
|
248
|
+
report["meta"] = meta # 注入可选审计信息
|
|
249
|
+
except Exception:
|
|
250
|
+
pass
|
|
251
|
+
json_text = json.dumps(report, ensure_ascii=False, indent=2)
|
|
252
|
+
md_text = format_markdown_report(report)
|
|
253
|
+
return f"{json_text}\n\n{md_text}"
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
__all__ = [
|
|
257
|
+
"aggregate_issues",
|
|
258
|
+
"format_markdown_report",
|
|
259
|
+
"build_json_and_markdown",
|
|
260
|
+
]
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Shared types for jarvis.jarvis_sec to avoid circular imports.
|
|
4
|
+
"""
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Issue:
|
|
9
|
+
language: str
|
|
10
|
+
category: str
|
|
11
|
+
pattern: str
|
|
12
|
+
file: str
|
|
13
|
+
line: int
|
|
14
|
+
evidence: str
|
|
15
|
+
description: str
|
|
16
|
+
suggestion: str
|
|
17
|
+
confidence: float
|
|
18
|
+
severity: str = "medium"
|
|
19
|
+
|
|
20
|
+
__all__ = ["Issue"]
|