jarvis-ai-assistant 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +102 -5
- jarvis/jarvis_agent/jarvis.py +6 -0
- jarvis/jarvis_agent/task_planner.py +218 -0
- jarvis/jarvis_code_agent/code_agent.py +8 -1
- jarvis/jarvis_data/config_schema.json +6 -1
- jarvis/jarvis_sec/README.md +180 -0
- jarvis/jarvis_sec/__init__.py +674 -0
- jarvis/jarvis_sec/checkers/__init__.py +33 -0
- jarvis/jarvis_sec/checkers/c_checker.py +1269 -0
- jarvis/jarvis_sec/checkers/rust_checker.py +367 -0
- jarvis/jarvis_sec/cli.py +110 -0
- jarvis/jarvis_sec/prompts.py +324 -0
- jarvis/jarvis_sec/report.py +260 -0
- jarvis/jarvis_sec/types.py +20 -0
- jarvis/jarvis_sec/workflow.py +513 -0
- jarvis/jarvis_tools/sub_agent.py +4 -3
- jarvis/jarvis_tools/sub_code_agent.py +3 -3
- jarvis/jarvis_utils/config.py +14 -2
- jarvis/jarvis_utils/utils.py +137 -2
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/METADATA +1 -1
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/RECORD +26 -15
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/entry_points.txt +2 -0
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.5.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
OpenHarmony 安全演进多Agent套件(阶段一骨架)
|
|
4
|
+
|
|
5
|
+
目标(阶段一):
|
|
6
|
+
- 在不修改现有核心框架文件的前提下,基于现有 Agent 与 MultiAgent 能力,
|
|
7
|
+
新增一个多Agent系统入口,专注于识别指定模块的安全问题(重点:内存管理、
|
|
8
|
+
缓冲区操作、错误处理等)。
|
|
9
|
+
|
|
10
|
+
集成方式:
|
|
11
|
+
- 复用 jarvis.jarvis_agent 与 jarvis.jarvis_multi_agent 提供的能力,不侵入式新增模块。
|
|
12
|
+
- 提供 create_security_multi_agent() 与 run_security_analysis(entry) 两个入口。
|
|
13
|
+
|
|
14
|
+
后续扩展:
|
|
15
|
+
- 在后续提交中会新增 prompts.py、checkers/ 与 report.py、workflow.py 等模块,并将本文件中的默认提示词迁移到专门文件。
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Dict, List, Optional
|
|
19
|
+
|
|
20
|
+
from jarvis.jarvis_multi_agent import MultiAgent
|
|
21
|
+
from jarvis.jarvis_agent import Agent, output_handler
|
|
22
|
+
from jarvis.jarvis_sec.prompts import (
|
|
23
|
+
COMMON_SYSTEM_PROMPT,
|
|
24
|
+
PLANNER_PROMPT,
|
|
25
|
+
SOURCE_COLLECTOR_PROMPT,
|
|
26
|
+
C_ANALYZER_PROMPT,
|
|
27
|
+
RUST_ANALYZER_PROMPT,
|
|
28
|
+
AGGREGATOR_PROMPT,
|
|
29
|
+
)
|
|
30
|
+
from jarvis.jarvis_sec.workflow import run_security_analysis_fast, direct_scan, run_with_multi_agent
|
|
31
|
+
from jarvis.jarvis_tools.registry import ToolRegistry
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _try_parse_issues_from_text(text: str) -> Optional[List[Dict]]:
|
|
35
|
+
"""
|
|
36
|
+
尝试从模型输出中解析出 {"issues": [...]},宽松容错:
|
|
37
|
+
1) 直接作为完整JSON解析
|
|
38
|
+
2) 从 ```json ... ``` 或 ``` ... ``` 代码块中提取JSON解析
|
|
39
|
+
3) 从首个 { 开始进行大括号配对截取后解析
|
|
40
|
+
|
|
41
|
+
返回:
|
|
42
|
+
- 成功解析到 issues 列表则返回该列表(可为空列表)
|
|
43
|
+
- 未能解析则返回 None
|
|
44
|
+
"""
|
|
45
|
+
import json
|
|
46
|
+
import re
|
|
47
|
+
|
|
48
|
+
# 尝试直接解析
|
|
49
|
+
try:
|
|
50
|
+
data = json.loads(text)
|
|
51
|
+
items = data.get("issues", [])
|
|
52
|
+
if isinstance(items, list):
|
|
53
|
+
return items
|
|
54
|
+
except Exception:
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
# 尝试从代码块提取
|
|
58
|
+
try:
|
|
59
|
+
m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
|
|
60
|
+
if m:
|
|
61
|
+
data = json.loads(m.group(1))
|
|
62
|
+
items = data.get("issues", [])
|
|
63
|
+
if isinstance(items, list):
|
|
64
|
+
return items
|
|
65
|
+
except Exception:
|
|
66
|
+
pass
|
|
67
|
+
|
|
68
|
+
# 尝试基于大括号配对截取首个JSON对象
|
|
69
|
+
try:
|
|
70
|
+
start = text.find("{")
|
|
71
|
+
if start != -1:
|
|
72
|
+
stack = 0
|
|
73
|
+
end = None
|
|
74
|
+
for i, ch in enumerate(text[start:], start=start):
|
|
75
|
+
if ch == "{":
|
|
76
|
+
stack += 1
|
|
77
|
+
elif ch == "}":
|
|
78
|
+
stack -= 1
|
|
79
|
+
if stack == 0:
|
|
80
|
+
end = i + 1
|
|
81
|
+
break
|
|
82
|
+
if end:
|
|
83
|
+
snippet = text[start:end]
|
|
84
|
+
data = json.loads(snippet)
|
|
85
|
+
items = data.get("issues", [])
|
|
86
|
+
if isinstance(items, list):
|
|
87
|
+
return items
|
|
88
|
+
except Exception:
|
|
89
|
+
pass
|
|
90
|
+
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _try_parse_summary_json(text: str) -> Optional[Dict]:
|
|
95
|
+
"""
|
|
96
|
+
从模型摘要文本中尽力提取严格 JSON 对象(非仅 issues 列表)。
|
|
97
|
+
解析顺序:
|
|
98
|
+
1) 直接 JSON
|
|
99
|
+
2) ```json ...``` 或 ```...``` 代码块中的 JSON
|
|
100
|
+
3) 基于首个花括号的配对截取 JSON 对象
|
|
101
|
+
成功时返回解析后的 dict;失败返回 None
|
|
102
|
+
"""
|
|
103
|
+
import json
|
|
104
|
+
import re
|
|
105
|
+
|
|
106
|
+
# 直接解析
|
|
107
|
+
try:
|
|
108
|
+
data = json.loads(text)
|
|
109
|
+
if isinstance(data, dict):
|
|
110
|
+
return data
|
|
111
|
+
except Exception:
|
|
112
|
+
pass
|
|
113
|
+
|
|
114
|
+
# 代码块提取
|
|
115
|
+
try:
|
|
116
|
+
m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
|
|
117
|
+
if m:
|
|
118
|
+
data = json.loads(m.group(1))
|
|
119
|
+
if isinstance(data, dict):
|
|
120
|
+
return data
|
|
121
|
+
except Exception:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
# 花括号配对截取
|
|
125
|
+
try:
|
|
126
|
+
start = text.find("{")
|
|
127
|
+
if start != -1:
|
|
128
|
+
stack = 0
|
|
129
|
+
end = None
|
|
130
|
+
for i, ch in enumerate(text[start:], start=start):
|
|
131
|
+
if ch == "{":
|
|
132
|
+
stack += 1
|
|
133
|
+
elif ch == "}":
|
|
134
|
+
stack -= 1
|
|
135
|
+
if stack == 0:
|
|
136
|
+
end = i + 1
|
|
137
|
+
break
|
|
138
|
+
if end:
|
|
139
|
+
snippet = text[start:end]
|
|
140
|
+
data = json.loads(snippet)
|
|
141
|
+
if isinstance(data, dict):
|
|
142
|
+
return data
|
|
143
|
+
except Exception:
|
|
144
|
+
pass
|
|
145
|
+
|
|
146
|
+
return None
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _build_summary_prompt(task_id: str, entry_path: str, languages: List[str], candidate: Dict) -> str:
|
|
150
|
+
"""
|
|
151
|
+
构建摘要提示词:要求以 <REPORT>...</REPORT> 包裹的 JSON 或 YAML 输出。
|
|
152
|
+
系统提示词不强制规定主对话输出格式,仅在摘要中给出结构化结果。
|
|
153
|
+
"""
|
|
154
|
+
import json as _json
|
|
155
|
+
cand_json = _json.dumps(candidate, ensure_ascii=False, indent=2)
|
|
156
|
+
langs_json = _json.dumps(languages, ensure_ascii=False)
|
|
157
|
+
return f"""
|
|
158
|
+
请将本轮“安全子任务(单点验证)”的结构化结果仅放入以下标记中(允许 JSON 或 YAML):
|
|
159
|
+
<REPORT>
|
|
160
|
+
# 推荐 JSON;如果使用 YAML 亦可
|
|
161
|
+
issues:
|
|
162
|
+
- language: "c/cpp|rust"
|
|
163
|
+
category: "unsafe_api|buffer_overflow|memory_mgmt|error_handling|unsafe_usage|concurrency|ffi"
|
|
164
|
+
pattern: "命中的模式/关键字"
|
|
165
|
+
file: "相对或绝对路径"
|
|
166
|
+
line: 0
|
|
167
|
+
evidence: "证据代码片段(单行简化)"
|
|
168
|
+
description: "问题说明"
|
|
169
|
+
suggestion: "修复建议"
|
|
170
|
+
confidence: 0.0
|
|
171
|
+
severity: "high|medium|low"
|
|
172
|
+
meta:
|
|
173
|
+
task_id: "{task_id}"
|
|
174
|
+
entry_path: "{entry_path}"
|
|
175
|
+
languages: {langs_json}
|
|
176
|
+
candidate: {cand_json}
|
|
177
|
+
</REPORT>
|
|
178
|
+
要求:
|
|
179
|
+
- 报告只能出现在 <REPORT> 与 </REPORT> 中,且不得出现其他文本。
|
|
180
|
+
- 若确认误报,请返回空列表 issues: []。
|
|
181
|
+
- 值需与实际分析一致;未调用工具时可省略 used_tools 等非必要字段。
|
|
182
|
+
""".strip()
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _default_common_system_prompt() -> str:
|
|
186
|
+
"""
|
|
187
|
+
提供通用系统提示词(轻量),确保多Agent消息传递遵循单步操作与明确格式。
|
|
188
|
+
"""
|
|
189
|
+
return COMMON_SYSTEM_PROMPT()
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def _default_agents_config() -> List[Dict]:
|
|
193
|
+
"""
|
|
194
|
+
返回默认的多Agent配置(阶段一骨架)。
|
|
195
|
+
说明:
|
|
196
|
+
- Planner:面向用户输入的任务分解与路由
|
|
197
|
+
- SourceCollector:源码清单与语言分类(优先 C/C++/Rust)
|
|
198
|
+
- CAnalyzer:针对 C/C++ 的启发式安全检查(阶段一以启发式为主,后续增强)
|
|
199
|
+
- RustAnalyzer:Rust 代码安全性扫描(统计 unsafe、原始指针、Result 未处理等)
|
|
200
|
+
- Aggregator:聚合多方输出形成结构化报告并结束任务
|
|
201
|
+
"""
|
|
202
|
+
planner_sp = PLANNER_PROMPT()
|
|
203
|
+
|
|
204
|
+
collector_sp = SOURCE_COLLECTOR_PROMPT()
|
|
205
|
+
|
|
206
|
+
canalyzer_sp = C_ANALYZER_PROMPT()
|
|
207
|
+
|
|
208
|
+
rustanalyzer_sp = RUST_ANALYZER_PROMPT()
|
|
209
|
+
|
|
210
|
+
aggregator_sp = AGGREGATOR_PROMPT()
|
|
211
|
+
|
|
212
|
+
return [
|
|
213
|
+
{
|
|
214
|
+
"name": "Planner",
|
|
215
|
+
"description": "规划与协调(任务分解与路由)",
|
|
216
|
+
"system_prompt": planner_sp,
|
|
217
|
+
# 作为主智能体,由 MultiAgent 控制自动补全策略;此处不强制开启
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
"name": "SourceCollector",
|
|
221
|
+
"description": "源码采集(列出C/C++/Rust文件清单并分派)",
|
|
222
|
+
"system_prompt": collector_sp,
|
|
223
|
+
"summary_on_send": False, # 采集环节不强制生成交接摘要,减少模型开销
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"name": "CAnalyzer",
|
|
227
|
+
"description": "C/C++ 安全问题启发式分析",
|
|
228
|
+
"system_prompt": canalyzer_sp,
|
|
229
|
+
"summary_on_send": False,
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"name": "RustAnalyzer",
|
|
233
|
+
"description": "Rust 安全性分析(unsafe/指针/错误处理/FFI)",
|
|
234
|
+
"system_prompt": rustanalyzer_sp,
|
|
235
|
+
"summary_on_send": False,
|
|
236
|
+
},
|
|
237
|
+
{
|
|
238
|
+
"name": "Aggregator",
|
|
239
|
+
"description": "聚合并生成最终报告(JSON + Markdown)",
|
|
240
|
+
"system_prompt": aggregator_sp,
|
|
241
|
+
# 终端Agent:生成报告后返回字符串,MultiAgent.run 将返回至用户
|
|
242
|
+
},
|
|
243
|
+
]
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def create_security_multi_agent(
|
|
247
|
+
agents_config: Optional[List[Dict]] = None,
|
|
248
|
+
main_agent_name: str = "Planner",
|
|
249
|
+
common_system_prompt: Optional[str] = None,
|
|
250
|
+
) -> MultiAgent:
|
|
251
|
+
"""
|
|
252
|
+
创建“OpenHarmony安全演进”多Agent实例。
|
|
253
|
+
- agents_config 为空时使用默认配置(阶段一骨架)
|
|
254
|
+
- common_system_prompt 若为空,将使用内置的轻量通用提示
|
|
255
|
+
"""
|
|
256
|
+
config = agents_config or _default_agents_config()
|
|
257
|
+
# 强制禁用方法论与分析,仅作用于本模块创建的 Agent,避免全局行为变化
|
|
258
|
+
for c in config:
|
|
259
|
+
c.setdefault("use_methodology", False)
|
|
260
|
+
c.setdefault("use_analysis", False)
|
|
261
|
+
common_sp = common_system_prompt if common_system_prompt is not None else _default_common_system_prompt()
|
|
262
|
+
return MultiAgent(config, main_agent_name=main_agent_name, common_system_prompt=common_sp)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _git_restore_if_dirty(repo_root: str) -> int:
|
|
266
|
+
"""
|
|
267
|
+
若 repo_root 为 git 仓库:检测工作区是否有变更;如有则使用 'git checkout -- .' 恢复。
|
|
268
|
+
返回估算的变更文件数(基于 git status --porcelain 的行数)。
|
|
269
|
+
"""
|
|
270
|
+
try:
|
|
271
|
+
from pathlib import Path as _Path
|
|
272
|
+
import subprocess as _sub
|
|
273
|
+
root = _Path(repo_root)
|
|
274
|
+
if not (root / ".git").exists():
|
|
275
|
+
return 0
|
|
276
|
+
proc = _sub.run(["git", "status", "--porcelain"], cwd=str(root), capture_output=True, text=True)
|
|
277
|
+
if proc.returncode != 0:
|
|
278
|
+
return 0
|
|
279
|
+
lines = [l for l in proc.stdout.splitlines() if l.strip()]
|
|
280
|
+
if lines:
|
|
281
|
+
_sub.run(["git", "checkout", "--", "."], cwd=str(root), capture_output=True, text=True)
|
|
282
|
+
return len(lines)
|
|
283
|
+
except Exception:
|
|
284
|
+
pass
|
|
285
|
+
return 0
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def run_security_analysis(
|
|
289
|
+
entry_path: str,
|
|
290
|
+
languages: Optional[List[str]] = None,
|
|
291
|
+
llm_group: Optional[str] = None,
|
|
292
|
+
report_file: Optional[str] = None,
|
|
293
|
+
resume: bool = True,
|
|
294
|
+
) -> str:
|
|
295
|
+
"""
|
|
296
|
+
运行安全分析工作流(阶段一骨架,混合模式)。
|
|
297
|
+
|
|
298
|
+
改进:
|
|
299
|
+
- 即使在 agent 模式下,也先进行本地正则/启发式直扫,生成候选问题;
|
|
300
|
+
然后将候选问题拆分为子任务,交由多Agent进行深入分析与聚合。
|
|
301
|
+
|
|
302
|
+
参数:
|
|
303
|
+
- entry_path: 待分析的根目录路径
|
|
304
|
+
- languages: 限定扫描的语言扩展(例如 ["c", "cpp", "h", "hpp", "rs"]),为空则使用默认
|
|
305
|
+
|
|
306
|
+
返回:
|
|
307
|
+
- 最终报告(字符串),由 Aggregator 生成(JSON + Markdown)
|
|
308
|
+
|
|
309
|
+
其他:
|
|
310
|
+
- llm_group: 模型组名称(仅在当前调用链内生效,不覆盖全局配置),将直接传入 Agent 用于选择模型
|
|
311
|
+
- report_file: 增量报告文件路径(JSONL)。当每个子任务检测到 issues 时,立即将一条记录追加到该文件;
|
|
312
|
+
若未指定,则默认写入 entry_path/.jarvis/sec/agent_issues.jsonl
|
|
313
|
+
- resume: 是否基于进度文件进行断点续扫(默认开启)。进度文件为 entry_path/.jarvis/sec/progress.jsonl
|
|
314
|
+
将在每个子任务开始(running)与结束(done)时追加记录,异常中断后可自动跳过已完成项。
|
|
315
|
+
"""
|
|
316
|
+
import json
|
|
317
|
+
|
|
318
|
+
langs = languages or ["c", "cpp", "h", "hpp", "rs"]
|
|
319
|
+
|
|
320
|
+
# 进度文件(JSONL,断点续扫)
|
|
321
|
+
from pathlib import Path as _Path
|
|
322
|
+
from datetime import datetime as _dt
|
|
323
|
+
progress_path = _Path(entry_path) / ".jarvis/sec" / "progress.jsonl"
|
|
324
|
+
|
|
325
|
+
def _progress_append(rec: Dict) -> None:
|
|
326
|
+
try:
|
|
327
|
+
progress_path.parent.mkdir(parents=True, exist_ok=True)
|
|
328
|
+
rec = dict(rec)
|
|
329
|
+
rec.setdefault("timestamp", _dt.utcnow().isoformat() + "Z")
|
|
330
|
+
import json as _json
|
|
331
|
+
line = _json.dumps(rec, ensure_ascii=False)
|
|
332
|
+
with progress_path.open("a", encoding="utf-8") as f:
|
|
333
|
+
f.write(line + "\n")
|
|
334
|
+
except Exception:
|
|
335
|
+
# 进度文件失败不影响主流程
|
|
336
|
+
pass
|
|
337
|
+
|
|
338
|
+
# 已完成集合(按候选签名)
|
|
339
|
+
done_sigs: set = set()
|
|
340
|
+
if resume and progress_path.exists():
|
|
341
|
+
try:
|
|
342
|
+
import json as _json
|
|
343
|
+
for line in progress_path.read_text(encoding="utf-8", errors="ignore").splitlines():
|
|
344
|
+
line = line.strip()
|
|
345
|
+
if not line:
|
|
346
|
+
continue
|
|
347
|
+
try:
|
|
348
|
+
obj = _json.loads(line)
|
|
349
|
+
except Exception:
|
|
350
|
+
continue
|
|
351
|
+
if obj.get("event") == "task_status" and obj.get("status") == "done":
|
|
352
|
+
sig = obj.get("candidate_signature")
|
|
353
|
+
if sig:
|
|
354
|
+
done_sigs.add(sig)
|
|
355
|
+
except Exception:
|
|
356
|
+
pass
|
|
357
|
+
|
|
358
|
+
# 1) 本地直扫,生成初始候选(不可完全依赖Agent进行发现)
|
|
359
|
+
_progress_append({"event": "pre_scan_start", "entry_path": entry_path, "languages": langs})
|
|
360
|
+
pre_scan = direct_scan(entry_path, languages=langs)
|
|
361
|
+
candidates = pre_scan.get("issues", [])
|
|
362
|
+
summary = pre_scan.get("summary", {})
|
|
363
|
+
_progress_append({
|
|
364
|
+
"event": "pre_scan_done",
|
|
365
|
+
"entry_path": entry_path,
|
|
366
|
+
"languages": langs,
|
|
367
|
+
"scanned_files": summary.get("scanned_files"),
|
|
368
|
+
"issues_found": len(candidates)
|
|
369
|
+
})
|
|
370
|
+
|
|
371
|
+
# 2) 将候选问题精简为子任务清单,控制上下文长度
|
|
372
|
+
def _compact(it: Dict) -> Dict:
|
|
373
|
+
return {
|
|
374
|
+
"language": it.get("language"),
|
|
375
|
+
"category": it.get("category"),
|
|
376
|
+
"pattern": it.get("pattern"),
|
|
377
|
+
"file": it.get("file"),
|
|
378
|
+
"line": it.get("line"),
|
|
379
|
+
"evidence": it.get("evidence"),
|
|
380
|
+
"confidence": it.get("confidence"),
|
|
381
|
+
"severity": it.get("severity", "medium"),
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
compact_candidates = [_compact(it) for it in candidates]
|
|
385
|
+
MAX_ITEMS = 200 # 避免提示过长
|
|
386
|
+
compact_candidates = compact_candidates[:MAX_ITEMS]
|
|
387
|
+
candidates_json = json.dumps(compact_candidates, ensure_ascii=False)
|
|
388
|
+
# 进度总数
|
|
389
|
+
total = len(compact_candidates)
|
|
390
|
+
# 将检测出的 issues 增量写入报告文件(JSONL),便于长任务中途查看
|
|
391
|
+
def _append_report(items, source: str, task_id: str, cand: Dict):
|
|
392
|
+
"""
|
|
393
|
+
将当前子任务的检测结果追加写入 JSONL 报告文件(每行一个JSON对象)。
|
|
394
|
+
仅当 items 非空时写入。
|
|
395
|
+
source: "summary" | "output_fallback"
|
|
396
|
+
"""
|
|
397
|
+
if not items:
|
|
398
|
+
return
|
|
399
|
+
try:
|
|
400
|
+
from pathlib import Path as _Path
|
|
401
|
+
from datetime import datetime as _dt
|
|
402
|
+
|
|
403
|
+
path = _Path(report_file) if report_file else _Path(entry_path) / ".jarvis/sec" / "agent_issues.jsonl"
|
|
404
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
405
|
+
rec = {
|
|
406
|
+
"task_id": task_id,
|
|
407
|
+
"candidate": cand,
|
|
408
|
+
"issues": items,
|
|
409
|
+
"meta": {
|
|
410
|
+
"entry_path": entry_path,
|
|
411
|
+
"languages": langs,
|
|
412
|
+
"source": source,
|
|
413
|
+
"timestamp": _dt.utcnow().isoformat() + "Z",
|
|
414
|
+
},
|
|
415
|
+
}
|
|
416
|
+
line = json.dumps(rec, ensure_ascii=False)
|
|
417
|
+
with path.open("a", encoding="utf-8") as f:
|
|
418
|
+
f.write(line + "\n")
|
|
419
|
+
try:
|
|
420
|
+
print(f"[JARVIS-SEC] write {len(items)} issue(s) to {path}")
|
|
421
|
+
except Exception:
|
|
422
|
+
pass
|
|
423
|
+
except Exception:
|
|
424
|
+
# 报告写入失败不影响主流程
|
|
425
|
+
pass
|
|
426
|
+
|
|
427
|
+
# 3) 针对每个候选,单独创建一次多Agent任务,逐条验证并收集结果
|
|
428
|
+
all_issues: List[Dict] = []
|
|
429
|
+
meta_records: List[Dict] = []
|
|
430
|
+
for idx, cand in enumerate(compact_candidates, start=1):
|
|
431
|
+
# 计算候选签名用于断点续扫(language|file|line|pattern)
|
|
432
|
+
cand_sig = f"{cand.get('language','')}|{cand.get('file','')}|{cand.get('line','')}|{cand.get('pattern','')}"
|
|
433
|
+
if resume and cand_sig in done_sigs:
|
|
434
|
+
try:
|
|
435
|
+
print(f"[JARVIS-SEC] resume-skip {idx}/{total}: {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
|
|
436
|
+
except Exception:
|
|
437
|
+
pass
|
|
438
|
+
# 写入进度:任务跳过(skipped)
|
|
439
|
+
_progress_append(
|
|
440
|
+
{
|
|
441
|
+
"event": "task_status",
|
|
442
|
+
"status": "skipped",
|
|
443
|
+
"task_id": f"JARVIS-SEC-Analyzer-{idx}",
|
|
444
|
+
"idx": idx,
|
|
445
|
+
"total": total,
|
|
446
|
+
"candidate_signature": cand_sig,
|
|
447
|
+
"candidate": cand,
|
|
448
|
+
}
|
|
449
|
+
)
|
|
450
|
+
continue
|
|
451
|
+
# 使用单Agent逐条验证,避免多Agent复杂度与上下文污染
|
|
452
|
+
system_prompt = (
|
|
453
|
+
COMMON_SYSTEM_PROMPT()
|
|
454
|
+
+ "\n"
|
|
455
|
+
+ """
|
|
456
|
+
# 单Agent安全分析约束
|
|
457
|
+
- 仅围绕输入候选的位置进行验证与细化;避免无关扩展与大范围遍历。
|
|
458
|
+
- 工具优先:使用 read_code 读取 {file} 附近源码(行号前后各 ~50 行),必要时用 execute_script 辅助检索。
|
|
459
|
+
- 禁止修改任何文件或执行写操作命令(rm/mv/cp/echo >、sed -i、git、patch、chmod、chown 等);仅进行只读分析与读取。
|
|
460
|
+
- 每次仅执行一个操作;等待工具结果后再进行下一步。
|
|
461
|
+
""".strip()
|
|
462
|
+
)
|
|
463
|
+
task_id = f"JARVIS-SEC-Analyzer-{idx}"
|
|
464
|
+
# 显示当前进度
|
|
465
|
+
try:
|
|
466
|
+
print(f"[JARVIS-SEC] Progress {idx}/{total}: {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
|
|
467
|
+
except Exception:
|
|
468
|
+
# 打印失败不影响主流程
|
|
469
|
+
pass
|
|
470
|
+
agent_kwargs: Dict = dict(
|
|
471
|
+
system_prompt=system_prompt,
|
|
472
|
+
name=task_id,
|
|
473
|
+
auto_complete=True,
|
|
474
|
+
# 启用摘要,通过摘要统一结构化输出
|
|
475
|
+
need_summary=True,
|
|
476
|
+
summary_prompt=_build_summary_prompt(task_id, entry_path, langs, cand),
|
|
477
|
+
non_interactive=True,
|
|
478
|
+
in_multi_agent=False,
|
|
479
|
+
# 显式禁用方法论与分析,确保Agent按指令执行
|
|
480
|
+
use_methodology=False,
|
|
481
|
+
use_analysis=False,
|
|
482
|
+
output_handler=[ToolRegistry()],
|
|
483
|
+
use_tools=["read_code", "execute_script"],
|
|
484
|
+
)
|
|
485
|
+
# 将 llm_group 仅传递给本次 Agent,不覆盖全局配置
|
|
486
|
+
if llm_group:
|
|
487
|
+
agent_kwargs["model_group"] = llm_group
|
|
488
|
+
agent = Agent(**agent_kwargs)
|
|
489
|
+
per_task = f"""
|
|
490
|
+
# 安全子任务(单点验证)
|
|
491
|
+
目标:针对候选问题进行证据核实、风险评估与修复建议补充;若确认误报,issues 应为空。
|
|
492
|
+
上下文参数:
|
|
493
|
+
- entry_path: {entry_path}
|
|
494
|
+
- languages: {langs}
|
|
495
|
+
|
|
496
|
+
候选(JSON):
|
|
497
|
+
{json.dumps(cand, ensure_ascii=False, indent=2)}
|
|
498
|
+
|
|
499
|
+
操作建议:
|
|
500
|
+
- 使用 read_code 读取目标文件(尽量提供绝对路径或以 entry_path 拼接),围绕候选行号上下各约50行。
|
|
501
|
+
- 若需搜索更多线索,可使用 execute_script 调用 rg/find 对目标文件进行局部检索。
|
|
502
|
+
""".strip()
|
|
503
|
+
|
|
504
|
+
# 写入进度:任务开始(running)
|
|
505
|
+
_progress_append(
|
|
506
|
+
{
|
|
507
|
+
"event": "task_status",
|
|
508
|
+
"status": "running",
|
|
509
|
+
"task_id": task_id,
|
|
510
|
+
"idx": idx,
|
|
511
|
+
"total": total,
|
|
512
|
+
"candidate_signature": cand_sig,
|
|
513
|
+
"candidate": cand,
|
|
514
|
+
}
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# 订阅 AFTER_SUMMARY,捕获Agent内部生成的摘要,避免二次调用模型
|
|
518
|
+
try:
|
|
519
|
+
from jarvis.jarvis_agent.events import AFTER_SUMMARY as _AFTER_SUMMARY # type: ignore
|
|
520
|
+
except Exception:
|
|
521
|
+
_AFTER_SUMMARY = None # type: ignore
|
|
522
|
+
summary_container: Dict[str, str] = {"text": ""}
|
|
523
|
+
if _AFTER_SUMMARY:
|
|
524
|
+
def _on_after_summary(**kwargs):
|
|
525
|
+
try:
|
|
526
|
+
summary_container["text"] = str(kwargs.get("summary", "") or "")
|
|
527
|
+
except Exception:
|
|
528
|
+
summary_container["text"] = ""
|
|
529
|
+
try:
|
|
530
|
+
agent.event_bus.subscribe(_AFTER_SUMMARY, _on_after_summary)
|
|
531
|
+
except Exception:
|
|
532
|
+
pass
|
|
533
|
+
out = agent.run(per_task)
|
|
534
|
+
# 流程级工作区保护:调用 Agent 后如检测到文件被修改,则使用 git checkout -- . 恢复
|
|
535
|
+
workspace_restore_info: Optional[Dict] = None
|
|
536
|
+
try:
|
|
537
|
+
_changed = _git_restore_if_dirty(entry_path)
|
|
538
|
+
workspace_restore_info = {
|
|
539
|
+
"performed": bool(_changed),
|
|
540
|
+
"changed_files_count": int(_changed or 0),
|
|
541
|
+
"action": "git checkout -- .",
|
|
542
|
+
}
|
|
543
|
+
# 审计记录:每轮 Agent 执行后的工作区恢复情况,写入最终报告的 meta
|
|
544
|
+
meta_records.append(
|
|
545
|
+
{
|
|
546
|
+
"task_id": task_id,
|
|
547
|
+
"candidate": cand,
|
|
548
|
+
"workspace_restore": workspace_restore_info,
|
|
549
|
+
}
|
|
550
|
+
)
|
|
551
|
+
if _changed:
|
|
552
|
+
try:
|
|
553
|
+
print(f"[JARVIS-SEC] workspace restored ({_changed} file(s)) via: git checkout -- .")
|
|
554
|
+
except Exception:
|
|
555
|
+
pass
|
|
556
|
+
except Exception:
|
|
557
|
+
# 即使获取/写入审计信息失败,也不影响后续流程
|
|
558
|
+
pass
|
|
559
|
+
|
|
560
|
+
# 优先解析摘要中的 <REPORT>(JSON/YAML),失败再回退主输出解析
|
|
561
|
+
summary_items: Optional[List[Dict]] = None
|
|
562
|
+
summary_text = summary_container.get("text", "")
|
|
563
|
+
if summary_text:
|
|
564
|
+
rep = _try_parse_summary_report(summary_text)
|
|
565
|
+
if rep is None:
|
|
566
|
+
# 兼容:若摘要直接输出 JSON,则尝试旧解析
|
|
567
|
+
rep = _try_parse_summary_json(summary_text)
|
|
568
|
+
if isinstance(rep, dict):
|
|
569
|
+
items = rep.get("issues")
|
|
570
|
+
if isinstance(items, list):
|
|
571
|
+
summary_items = items
|
|
572
|
+
|
|
573
|
+
if isinstance(summary_items, list):
|
|
574
|
+
for it in summary_items:
|
|
575
|
+
it.setdefault("language", cand.get("language"))
|
|
576
|
+
it.setdefault("file", cand.get("file"))
|
|
577
|
+
it.setdefault("line", cand.get("line"))
|
|
578
|
+
if not summary_items:
|
|
579
|
+
try:
|
|
580
|
+
print(f"[JARVIS-SEC] no-issue {idx}/{total}: {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
|
|
581
|
+
except Exception:
|
|
582
|
+
pass
|
|
583
|
+
else:
|
|
584
|
+
all_issues.extend(summary_items)
|
|
585
|
+
try:
|
|
586
|
+
print(f"[JARVIS-SEC] issues-found {idx}/{total}: count={len(summary_items)} -> append report (summary)")
|
|
587
|
+
except Exception:
|
|
588
|
+
pass
|
|
589
|
+
_append_report(summary_items, "summary", task_id, cand)
|
|
590
|
+
# 写入进度:任务结束(done)
|
|
591
|
+
_progress_append(
|
|
592
|
+
{
|
|
593
|
+
"event": "task_status",
|
|
594
|
+
"status": "done",
|
|
595
|
+
"task_id": task_id,
|
|
596
|
+
"idx": idx,
|
|
597
|
+
"total": total,
|
|
598
|
+
"candidate_signature": cand_sig,
|
|
599
|
+
"candidate": cand,
|
|
600
|
+
"issues_count": len(summary_items) if isinstance(summary_items, list) else 0,
|
|
601
|
+
"workspace_restore": workspace_restore_info,
|
|
602
|
+
}
|
|
603
|
+
)
|
|
604
|
+
continue # 已通过摘要处理,进入下一条
|
|
605
|
+
|
|
606
|
+
# 摘要不可解析时,禁止回退解析主输出;直接记录失败并进入下一条
|
|
607
|
+
try:
|
|
608
|
+
print(f"[JARVIS-SEC] parse-fail {idx}/{total} (no <REPORT> in summary): {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
|
|
609
|
+
except Exception:
|
|
610
|
+
pass
|
|
611
|
+
# 写入进度:任务结束(done,解析失败视为0问题)
|
|
612
|
+
_progress_append(
|
|
613
|
+
{
|
|
614
|
+
"event": "task_status",
|
|
615
|
+
"status": "done",
|
|
616
|
+
"task_id": task_id,
|
|
617
|
+
"idx": idx,
|
|
618
|
+
"total": total,
|
|
619
|
+
"candidate_signature": cand_sig,
|
|
620
|
+
"candidate": cand,
|
|
621
|
+
"issues_count": 0,
|
|
622
|
+
"parse_fail": True,
|
|
623
|
+
"workspace_restore": workspace_restore_info,
|
|
624
|
+
}
|
|
625
|
+
)
|
|
626
|
+
continue
|
|
627
|
+
# 4) 使用统一聚合器生成最终报告(JSON + Markdown)
|
|
628
|
+
from jarvis.jarvis_sec.report import build_json_and_markdown
|
|
629
|
+
return build_json_and_markdown(
|
|
630
|
+
all_issues,
|
|
631
|
+
scanned_root=summary.get("scanned_root"),
|
|
632
|
+
scanned_files=summary.get("scanned_files"),
|
|
633
|
+
meta=meta_records or None,
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _try_parse_summary_report(text: str) -> Optional[Dict]:
|
|
638
|
+
"""
|
|
639
|
+
从摘要文本中提取 <REPORT>...</REPORT> 内容,并解析为 dict(支持 JSON 或 YAML)。
|
|
640
|
+
- 若提取/解析失败返回 None
|
|
641
|
+
- YAML 解析采用安全模式,若环境无 PyYAML 则忽略
|
|
642
|
+
"""
|
|
643
|
+
import re
|
|
644
|
+
import json as _json
|
|
645
|
+
start = text.find("<REPORT>")
|
|
646
|
+
end = text.find("</REPORT>")
|
|
647
|
+
if start == -1 or end == -1 or end <= start:
|
|
648
|
+
return None
|
|
649
|
+
content = text[start + len("<REPORT>"):end].strip()
|
|
650
|
+
# 优先 JSON
|
|
651
|
+
try:
|
|
652
|
+
data = _json.loads(content)
|
|
653
|
+
if isinstance(data, dict):
|
|
654
|
+
return data
|
|
655
|
+
except Exception:
|
|
656
|
+
pass
|
|
657
|
+
# 回退 YAML
|
|
658
|
+
try:
|
|
659
|
+
import yaml as _yaml # type: ignore
|
|
660
|
+
data = _yaml.safe_load(content)
|
|
661
|
+
if isinstance(data, dict):
|
|
662
|
+
return data
|
|
663
|
+
except Exception:
|
|
664
|
+
pass
|
|
665
|
+
return None
|
|
666
|
+
|
|
667
|
+
|
|
668
|
+
__all__ = [
|
|
669
|
+
"create_security_multi_agent",
|
|
670
|
+
"run_security_analysis",
|
|
671
|
+
"run_security_analysis_fast",
|
|
672
|
+
"direct_scan",
|
|
673
|
+
"run_with_multi_agent",
|
|
674
|
+
]
|