jarvis-ai-assistant 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +114 -6
  3. jarvis/jarvis_agent/agent_manager.py +3 -0
  4. jarvis/jarvis_agent/jarvis.py +45 -9
  5. jarvis/jarvis_agent/run_loop.py +6 -1
  6. jarvis/jarvis_agent/task_planner.py +219 -0
  7. jarvis/jarvis_c2rust/__init__.py +13 -0
  8. jarvis/jarvis_c2rust/cli.py +405 -0
  9. jarvis/jarvis_c2rust/collector.py +209 -0
  10. jarvis/jarvis_c2rust/library_replacer.py +933 -0
  11. jarvis/jarvis_c2rust/llm_module_agent.py +1265 -0
  12. jarvis/jarvis_c2rust/scanner.py +1671 -0
  13. jarvis/jarvis_c2rust/transpiler.py +1236 -0
  14. jarvis/jarvis_code_agent/code_agent.py +151 -18
  15. jarvis/jarvis_data/config_schema.json +13 -3
  16. jarvis/jarvis_sec/README.md +180 -0
  17. jarvis/jarvis_sec/__init__.py +674 -0
  18. jarvis/jarvis_sec/checkers/__init__.py +33 -0
  19. jarvis/jarvis_sec/checkers/c_checker.py +1269 -0
  20. jarvis/jarvis_sec/checkers/rust_checker.py +367 -0
  21. jarvis/jarvis_sec/cli.py +110 -0
  22. jarvis/jarvis_sec/prompts.py +324 -0
  23. jarvis/jarvis_sec/report.py +260 -0
  24. jarvis/jarvis_sec/types.py +20 -0
  25. jarvis/jarvis_sec/workflow.py +513 -0
  26. jarvis/jarvis_tools/cli/main.py +1 -0
  27. jarvis/jarvis_tools/execute_script.py +1 -1
  28. jarvis/jarvis_tools/read_code.py +11 -1
  29. jarvis/jarvis_tools/read_symbols.py +129 -0
  30. jarvis/jarvis_tools/registry.py +9 -1
  31. jarvis/jarvis_tools/sub_agent.py +4 -3
  32. jarvis/jarvis_tools/sub_code_agent.py +3 -3
  33. jarvis/jarvis_utils/config.py +28 -6
  34. jarvis/jarvis_utils/git_utils.py +39 -0
  35. jarvis/jarvis_utils/utils.py +150 -7
  36. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/METADATA +13 -1
  37. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/RECORD +41 -22
  38. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/entry_points.txt +4 -0
  39. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/WHEEL +0 -0
  40. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/licenses/LICENSE +0 -0
  41. {jarvis_ai_assistant-0.5.0.dist-info → jarvis_ai_assistant-0.6.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,674 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ OpenHarmony 安全演进多Agent套件(阶段一骨架)
4
+
5
+ 目标(阶段一):
6
+ - 在不修改现有核心框架文件的前提下,基于现有 Agent 与 MultiAgent 能力,
7
+ 新增一个多Agent系统入口,专注于识别指定模块的安全问题(重点:内存管理、
8
+ 缓冲区操作、错误处理等)。
9
+
10
+ 集成方式:
11
+ - 复用 jarvis.jarvis_agent 与 jarvis.jarvis_multi_agent 提供的能力,不侵入式新增模块。
12
+ - 提供 create_security_multi_agent() 与 run_security_analysis(entry) 两个入口。
13
+
14
+ 后续扩展:
15
+ - 在后续提交中会新增 prompts.py、checkers/ 与 report.py、workflow.py 等模块,并将本文件中的默认提示词迁移到专门文件。
16
+ """
17
+
18
+ from typing import Dict, List, Optional
19
+
20
+ from jarvis.jarvis_multi_agent import MultiAgent
21
+ from jarvis.jarvis_agent import Agent, output_handler
22
+ from jarvis.jarvis_sec.prompts import (
23
+ COMMON_SYSTEM_PROMPT,
24
+ PLANNER_PROMPT,
25
+ SOURCE_COLLECTOR_PROMPT,
26
+ C_ANALYZER_PROMPT,
27
+ RUST_ANALYZER_PROMPT,
28
+ AGGREGATOR_PROMPT,
29
+ )
30
+ from jarvis.jarvis_sec.workflow import run_security_analysis_fast, direct_scan, run_with_multi_agent
31
+ from jarvis.jarvis_tools.registry import ToolRegistry
32
+
33
+
34
+ def _try_parse_issues_from_text(text: str) -> Optional[List[Dict]]:
35
+ """
36
+ 尝试从模型输出中解析出 {"issues": [...]},宽松容错:
37
+ 1) 直接作为完整JSON解析
38
+ 2) 从 ```json ... ``` 或 ``` ... ``` 代码块中提取JSON解析
39
+ 3) 从首个 { 开始进行大括号配对截取后解析
40
+
41
+ 返回:
42
+ - 成功解析到 issues 列表则返回该列表(可为空列表)
43
+ - 未能解析则返回 None
44
+ """
45
+ import json
46
+ import re
47
+
48
+ # 尝试直接解析
49
+ try:
50
+ data = json.loads(text)
51
+ items = data.get("issues", [])
52
+ if isinstance(items, list):
53
+ return items
54
+ except Exception:
55
+ pass
56
+
57
+ # 尝试从代码块提取
58
+ try:
59
+ m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
60
+ if m:
61
+ data = json.loads(m.group(1))
62
+ items = data.get("issues", [])
63
+ if isinstance(items, list):
64
+ return items
65
+ except Exception:
66
+ pass
67
+
68
+ # 尝试基于大括号配对截取首个JSON对象
69
+ try:
70
+ start = text.find("{")
71
+ if start != -1:
72
+ stack = 0
73
+ end = None
74
+ for i, ch in enumerate(text[start:], start=start):
75
+ if ch == "{":
76
+ stack += 1
77
+ elif ch == "}":
78
+ stack -= 1
79
+ if stack == 0:
80
+ end = i + 1
81
+ break
82
+ if end:
83
+ snippet = text[start:end]
84
+ data = json.loads(snippet)
85
+ items = data.get("issues", [])
86
+ if isinstance(items, list):
87
+ return items
88
+ except Exception:
89
+ pass
90
+
91
+ return None
92
+
93
+
94
+ def _try_parse_summary_json(text: str) -> Optional[Dict]:
95
+ """
96
+ 从模型摘要文本中尽力提取严格 JSON 对象(非仅 issues 列表)。
97
+ 解析顺序:
98
+ 1) 直接 JSON
99
+ 2) ```json ...``` 或 ```...``` 代码块中的 JSON
100
+ 3) 基于首个花括号的配对截取 JSON 对象
101
+ 成功时返回解析后的 dict;失败返回 None
102
+ """
103
+ import json
104
+ import re
105
+
106
+ # 直接解析
107
+ try:
108
+ data = json.loads(text)
109
+ if isinstance(data, dict):
110
+ return data
111
+ except Exception:
112
+ pass
113
+
114
+ # 代码块提取
115
+ try:
116
+ m = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL | re.IGNORECASE)
117
+ if m:
118
+ data = json.loads(m.group(1))
119
+ if isinstance(data, dict):
120
+ return data
121
+ except Exception:
122
+ pass
123
+
124
+ # 花括号配对截取
125
+ try:
126
+ start = text.find("{")
127
+ if start != -1:
128
+ stack = 0
129
+ end = None
130
+ for i, ch in enumerate(text[start:], start=start):
131
+ if ch == "{":
132
+ stack += 1
133
+ elif ch == "}":
134
+ stack -= 1
135
+ if stack == 0:
136
+ end = i + 1
137
+ break
138
+ if end:
139
+ snippet = text[start:end]
140
+ data = json.loads(snippet)
141
+ if isinstance(data, dict):
142
+ return data
143
+ except Exception:
144
+ pass
145
+
146
+ return None
147
+
148
+
149
+ def _build_summary_prompt(task_id: str, entry_path: str, languages: List[str], candidate: Dict) -> str:
150
+ """
151
+ 构建摘要提示词:要求以 <REPORT>...</REPORT> 包裹的 JSON 或 YAML 输出。
152
+ 系统提示词不强制规定主对话输出格式,仅在摘要中给出结构化结果。
153
+ """
154
+ import json as _json
155
+ cand_json = _json.dumps(candidate, ensure_ascii=False, indent=2)
156
+ langs_json = _json.dumps(languages, ensure_ascii=False)
157
+ return f"""
158
+ 请将本轮“安全子任务(单点验证)”的结构化结果仅放入以下标记中(允许 JSON 或 YAML):
159
+ <REPORT>
160
+ # 推荐 JSON;如果使用 YAML 亦可
161
+ issues:
162
+ - language: "c/cpp|rust"
163
+ category: "unsafe_api|buffer_overflow|memory_mgmt|error_handling|unsafe_usage|concurrency|ffi"
164
+ pattern: "命中的模式/关键字"
165
+ file: "相对或绝对路径"
166
+ line: 0
167
+ evidence: "证据代码片段(单行简化)"
168
+ description: "问题说明"
169
+ suggestion: "修复建议"
170
+ confidence: 0.0
171
+ severity: "high|medium|low"
172
+ meta:
173
+ task_id: "{task_id}"
174
+ entry_path: "{entry_path}"
175
+ languages: {langs_json}
176
+ candidate: {cand_json}
177
+ </REPORT>
178
+ 要求:
179
+ - 报告只能出现在 <REPORT> 与 </REPORT> 中,且不得出现其他文本。
180
+ - 若确认误报,请返回空列表 issues: []。
181
+ - 值需与实际分析一致;未调用工具时可省略 used_tools 等非必要字段。
182
+ """.strip()
183
+
184
+
185
+ def _default_common_system_prompt() -> str:
186
+ """
187
+ 提供通用系统提示词(轻量),确保多Agent消息传递遵循单步操作与明确格式。
188
+ """
189
+ return COMMON_SYSTEM_PROMPT()
190
+
191
+
192
+ def _default_agents_config() -> List[Dict]:
193
+ """
194
+ 返回默认的多Agent配置(阶段一骨架)。
195
+ 说明:
196
+ - Planner:面向用户输入的任务分解与路由
197
+ - SourceCollector:源码清单与语言分类(优先 C/C++/Rust)
198
+ - CAnalyzer:针对 C/C++ 的启发式安全检查(阶段一以启发式为主,后续增强)
199
+ - RustAnalyzer:Rust 代码安全性扫描(统计 unsafe、原始指针、Result 未处理等)
200
+ - Aggregator:聚合多方输出形成结构化报告并结束任务
201
+ """
202
+ planner_sp = PLANNER_PROMPT()
203
+
204
+ collector_sp = SOURCE_COLLECTOR_PROMPT()
205
+
206
+ canalyzer_sp = C_ANALYZER_PROMPT()
207
+
208
+ rustanalyzer_sp = RUST_ANALYZER_PROMPT()
209
+
210
+ aggregator_sp = AGGREGATOR_PROMPT()
211
+
212
+ return [
213
+ {
214
+ "name": "Planner",
215
+ "description": "规划与协调(任务分解与路由)",
216
+ "system_prompt": planner_sp,
217
+ # 作为主智能体,由 MultiAgent 控制自动补全策略;此处不强制开启
218
+ },
219
+ {
220
+ "name": "SourceCollector",
221
+ "description": "源码采集(列出C/C++/Rust文件清单并分派)",
222
+ "system_prompt": collector_sp,
223
+ "summary_on_send": False, # 采集环节不强制生成交接摘要,减少模型开销
224
+ },
225
+ {
226
+ "name": "CAnalyzer",
227
+ "description": "C/C++ 安全问题启发式分析",
228
+ "system_prompt": canalyzer_sp,
229
+ "summary_on_send": False,
230
+ },
231
+ {
232
+ "name": "RustAnalyzer",
233
+ "description": "Rust 安全性分析(unsafe/指针/错误处理/FFI)",
234
+ "system_prompt": rustanalyzer_sp,
235
+ "summary_on_send": False,
236
+ },
237
+ {
238
+ "name": "Aggregator",
239
+ "description": "聚合并生成最终报告(JSON + Markdown)",
240
+ "system_prompt": aggregator_sp,
241
+ # 终端Agent:生成报告后返回字符串,MultiAgent.run 将返回至用户
242
+ },
243
+ ]
244
+
245
+
246
+ def create_security_multi_agent(
247
+ agents_config: Optional[List[Dict]] = None,
248
+ main_agent_name: str = "Planner",
249
+ common_system_prompt: Optional[str] = None,
250
+ ) -> MultiAgent:
251
+ """
252
+ 创建“OpenHarmony安全演进”多Agent实例。
253
+ - agents_config 为空时使用默认配置(阶段一骨架)
254
+ - common_system_prompt 若为空,将使用内置的轻量通用提示
255
+ """
256
+ config = agents_config or _default_agents_config()
257
+ # 强制禁用方法论与分析,仅作用于本模块创建的 Agent,避免全局行为变化
258
+ for c in config:
259
+ c.setdefault("use_methodology", False)
260
+ c.setdefault("use_analysis", False)
261
+ common_sp = common_system_prompt if common_system_prompt is not None else _default_common_system_prompt()
262
+ return MultiAgent(config, main_agent_name=main_agent_name, common_system_prompt=common_sp)
263
+
264
+
265
+ def _git_restore_if_dirty(repo_root: str) -> int:
266
+ """
267
+ 若 repo_root 为 git 仓库:检测工作区是否有变更;如有则使用 'git checkout -- .' 恢复。
268
+ 返回估算的变更文件数(基于 git status --porcelain 的行数)。
269
+ """
270
+ try:
271
+ from pathlib import Path as _Path
272
+ import subprocess as _sub
273
+ root = _Path(repo_root)
274
+ if not (root / ".git").exists():
275
+ return 0
276
+ proc = _sub.run(["git", "status", "--porcelain"], cwd=str(root), capture_output=True, text=True)
277
+ if proc.returncode != 0:
278
+ return 0
279
+ lines = [l for l in proc.stdout.splitlines() if l.strip()]
280
+ if lines:
281
+ _sub.run(["git", "checkout", "--", "."], cwd=str(root), capture_output=True, text=True)
282
+ return len(lines)
283
+ except Exception:
284
+ pass
285
+ return 0
286
+
287
+
288
+ def run_security_analysis(
289
+ entry_path: str,
290
+ languages: Optional[List[str]] = None,
291
+ llm_group: Optional[str] = None,
292
+ report_file: Optional[str] = None,
293
+ resume: bool = True,
294
+ ) -> str:
295
+ """
296
+ 运行安全分析工作流(阶段一骨架,混合模式)。
297
+
298
+ 改进:
299
+ - 即使在 agent 模式下,也先进行本地正则/启发式直扫,生成候选问题;
300
+ 然后将候选问题拆分为子任务,交由多Agent进行深入分析与聚合。
301
+
302
+ 参数:
303
+ - entry_path: 待分析的根目录路径
304
+ - languages: 限定扫描的语言扩展(例如 ["c", "cpp", "h", "hpp", "rs"]),为空则使用默认
305
+
306
+ 返回:
307
+ - 最终报告(字符串),由 Aggregator 生成(JSON + Markdown)
308
+
309
+ 其他:
310
+ - llm_group: 模型组名称(仅在当前调用链内生效,不覆盖全局配置),将直接传入 Agent 用于选择模型
311
+ - report_file: 增量报告文件路径(JSONL)。当每个子任务检测到 issues 时,立即将一条记录追加到该文件;
312
+ 若未指定,则默认写入 entry_path/.jarvis/sec/agent_issues.jsonl
313
+ - resume: 是否基于进度文件进行断点续扫(默认开启)。进度文件为 entry_path/.jarvis/sec/progress.jsonl
314
+ 将在每个子任务开始(running)与结束(done)时追加记录,异常中断后可自动跳过已完成项。
315
+ """
316
+ import json
317
+
318
+ langs = languages or ["c", "cpp", "h", "hpp", "rs"]
319
+
320
+ # 进度文件(JSONL,断点续扫)
321
+ from pathlib import Path as _Path
322
+ from datetime import datetime as _dt
323
+ progress_path = _Path(entry_path) / ".jarvis/sec" / "progress.jsonl"
324
+
325
+ def _progress_append(rec: Dict) -> None:
326
+ try:
327
+ progress_path.parent.mkdir(parents=True, exist_ok=True)
328
+ rec = dict(rec)
329
+ rec.setdefault("timestamp", _dt.utcnow().isoformat() + "Z")
330
+ import json as _json
331
+ line = _json.dumps(rec, ensure_ascii=False)
332
+ with progress_path.open("a", encoding="utf-8") as f:
333
+ f.write(line + "\n")
334
+ except Exception:
335
+ # 进度文件失败不影响主流程
336
+ pass
337
+
338
+ # 已完成集合(按候选签名)
339
+ done_sigs: set = set()
340
+ if resume and progress_path.exists():
341
+ try:
342
+ import json as _json
343
+ for line in progress_path.read_text(encoding="utf-8", errors="ignore").splitlines():
344
+ line = line.strip()
345
+ if not line:
346
+ continue
347
+ try:
348
+ obj = _json.loads(line)
349
+ except Exception:
350
+ continue
351
+ if obj.get("event") == "task_status" and obj.get("status") == "done":
352
+ sig = obj.get("candidate_signature")
353
+ if sig:
354
+ done_sigs.add(sig)
355
+ except Exception:
356
+ pass
357
+
358
+ # 1) 本地直扫,生成初始候选(不可完全依赖Agent进行发现)
359
+ _progress_append({"event": "pre_scan_start", "entry_path": entry_path, "languages": langs})
360
+ pre_scan = direct_scan(entry_path, languages=langs)
361
+ candidates = pre_scan.get("issues", [])
362
+ summary = pre_scan.get("summary", {})
363
+ _progress_append({
364
+ "event": "pre_scan_done",
365
+ "entry_path": entry_path,
366
+ "languages": langs,
367
+ "scanned_files": summary.get("scanned_files"),
368
+ "issues_found": len(candidates)
369
+ })
370
+
371
+ # 2) 将候选问题精简为子任务清单,控制上下文长度
372
+ def _compact(it: Dict) -> Dict:
373
+ return {
374
+ "language": it.get("language"),
375
+ "category": it.get("category"),
376
+ "pattern": it.get("pattern"),
377
+ "file": it.get("file"),
378
+ "line": it.get("line"),
379
+ "evidence": it.get("evidence"),
380
+ "confidence": it.get("confidence"),
381
+ "severity": it.get("severity", "medium"),
382
+ }
383
+
384
+ compact_candidates = [_compact(it) for it in candidates]
385
+ MAX_ITEMS = 200 # 避免提示过长
386
+ compact_candidates = compact_candidates[:MAX_ITEMS]
387
+ candidates_json = json.dumps(compact_candidates, ensure_ascii=False)
388
+ # 进度总数
389
+ total = len(compact_candidates)
390
+ # 将检测出的 issues 增量写入报告文件(JSONL),便于长任务中途查看
391
+ def _append_report(items, source: str, task_id: str, cand: Dict):
392
+ """
393
+ 将当前子任务的检测结果追加写入 JSONL 报告文件(每行一个JSON对象)。
394
+ 仅当 items 非空时写入。
395
+ source: "summary" | "output_fallback"
396
+ """
397
+ if not items:
398
+ return
399
+ try:
400
+ from pathlib import Path as _Path
401
+ from datetime import datetime as _dt
402
+
403
+ path = _Path(report_file) if report_file else _Path(entry_path) / ".jarvis/sec" / "agent_issues.jsonl"
404
+ path.parent.mkdir(parents=True, exist_ok=True)
405
+ rec = {
406
+ "task_id": task_id,
407
+ "candidate": cand,
408
+ "issues": items,
409
+ "meta": {
410
+ "entry_path": entry_path,
411
+ "languages": langs,
412
+ "source": source,
413
+ "timestamp": _dt.utcnow().isoformat() + "Z",
414
+ },
415
+ }
416
+ line = json.dumps(rec, ensure_ascii=False)
417
+ with path.open("a", encoding="utf-8") as f:
418
+ f.write(line + "\n")
419
+ try:
420
+ print(f"[JARVIS-SEC] write {len(items)} issue(s) to {path}")
421
+ except Exception:
422
+ pass
423
+ except Exception:
424
+ # 报告写入失败不影响主流程
425
+ pass
426
+
427
+ # 3) 针对每个候选,单独创建一次多Agent任务,逐条验证并收集结果
428
+ all_issues: List[Dict] = []
429
+ meta_records: List[Dict] = []
430
+ for idx, cand in enumerate(compact_candidates, start=1):
431
+ # 计算候选签名用于断点续扫(language|file|line|pattern)
432
+ cand_sig = f"{cand.get('language','')}|{cand.get('file','')}|{cand.get('line','')}|{cand.get('pattern','')}"
433
+ if resume and cand_sig in done_sigs:
434
+ try:
435
+ print(f"[JARVIS-SEC] resume-skip {idx}/{total}: {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
436
+ except Exception:
437
+ pass
438
+ # 写入进度:任务跳过(skipped)
439
+ _progress_append(
440
+ {
441
+ "event": "task_status",
442
+ "status": "skipped",
443
+ "task_id": f"JARVIS-SEC-Analyzer-{idx}",
444
+ "idx": idx,
445
+ "total": total,
446
+ "candidate_signature": cand_sig,
447
+ "candidate": cand,
448
+ }
449
+ )
450
+ continue
451
+ # 使用单Agent逐条验证,避免多Agent复杂度与上下文污染
452
+ system_prompt = (
453
+ COMMON_SYSTEM_PROMPT()
454
+ + "\n"
455
+ + """
456
+ # 单Agent安全分析约束
457
+ - 仅围绕输入候选的位置进行验证与细化;避免无关扩展与大范围遍历。
458
+ - 工具优先:使用 read_code 读取 {file} 附近源码(行号前后各 ~50 行),必要时用 execute_script 辅助检索。
459
+ - 禁止修改任何文件或执行写操作命令(rm/mv/cp/echo >、sed -i、git、patch、chmod、chown 等);仅进行只读分析与读取。
460
+ - 每次仅执行一个操作;等待工具结果后再进行下一步。
461
+ """.strip()
462
+ )
463
+ task_id = f"JARVIS-SEC-Analyzer-{idx}"
464
+ # 显示当前进度
465
+ try:
466
+ print(f"[JARVIS-SEC] Progress {idx}/{total}: {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
467
+ except Exception:
468
+ # 打印失败不影响主流程
469
+ pass
470
+ agent_kwargs: Dict = dict(
471
+ system_prompt=system_prompt,
472
+ name=task_id,
473
+ auto_complete=True,
474
+ # 启用摘要,通过摘要统一结构化输出
475
+ need_summary=True,
476
+ summary_prompt=_build_summary_prompt(task_id, entry_path, langs, cand),
477
+ non_interactive=True,
478
+ in_multi_agent=False,
479
+ # 显式禁用方法论与分析,确保Agent按指令执行
480
+ use_methodology=False,
481
+ use_analysis=False,
482
+ output_handler=[ToolRegistry()],
483
+ use_tools=["read_code", "execute_script"],
484
+ )
485
+ # 将 llm_group 仅传递给本次 Agent,不覆盖全局配置
486
+ if llm_group:
487
+ agent_kwargs["model_group"] = llm_group
488
+ agent = Agent(**agent_kwargs)
489
+ per_task = f"""
490
+ # 安全子任务(单点验证)
491
+ 目标:针对候选问题进行证据核实、风险评估与修复建议补充;若确认误报,issues 应为空。
492
+ 上下文参数:
493
+ - entry_path: {entry_path}
494
+ - languages: {langs}
495
+
496
+ 候选(JSON):
497
+ {json.dumps(cand, ensure_ascii=False, indent=2)}
498
+
499
+ 操作建议:
500
+ - 使用 read_code 读取目标文件(尽量提供绝对路径或以 entry_path 拼接),围绕候选行号上下各约50行。
501
+ - 若需搜索更多线索,可使用 execute_script 调用 rg/find 对目标文件进行局部检索。
502
+ """.strip()
503
+
504
+ # 写入进度:任务开始(running)
505
+ _progress_append(
506
+ {
507
+ "event": "task_status",
508
+ "status": "running",
509
+ "task_id": task_id,
510
+ "idx": idx,
511
+ "total": total,
512
+ "candidate_signature": cand_sig,
513
+ "candidate": cand,
514
+ }
515
+ )
516
+
517
+ # 订阅 AFTER_SUMMARY,捕获Agent内部生成的摘要,避免二次调用模型
518
+ try:
519
+ from jarvis.jarvis_agent.events import AFTER_SUMMARY as _AFTER_SUMMARY # type: ignore
520
+ except Exception:
521
+ _AFTER_SUMMARY = None # type: ignore
522
+ summary_container: Dict[str, str] = {"text": ""}
523
+ if _AFTER_SUMMARY:
524
+ def _on_after_summary(**kwargs):
525
+ try:
526
+ summary_container["text"] = str(kwargs.get("summary", "") or "")
527
+ except Exception:
528
+ summary_container["text"] = ""
529
+ try:
530
+ agent.event_bus.subscribe(_AFTER_SUMMARY, _on_after_summary)
531
+ except Exception:
532
+ pass
533
+ out = agent.run(per_task)
534
+ # 流程级工作区保护:调用 Agent 后如检测到文件被修改,则使用 git checkout -- . 恢复
535
+ workspace_restore_info: Optional[Dict] = None
536
+ try:
537
+ _changed = _git_restore_if_dirty(entry_path)
538
+ workspace_restore_info = {
539
+ "performed": bool(_changed),
540
+ "changed_files_count": int(_changed or 0),
541
+ "action": "git checkout -- .",
542
+ }
543
+ # 审计记录:每轮 Agent 执行后的工作区恢复情况,写入最终报告的 meta
544
+ meta_records.append(
545
+ {
546
+ "task_id": task_id,
547
+ "candidate": cand,
548
+ "workspace_restore": workspace_restore_info,
549
+ }
550
+ )
551
+ if _changed:
552
+ try:
553
+ print(f"[JARVIS-SEC] workspace restored ({_changed} file(s)) via: git checkout -- .")
554
+ except Exception:
555
+ pass
556
+ except Exception:
557
+ # 即使获取/写入审计信息失败,也不影响后续流程
558
+ pass
559
+
560
+ # 优先解析摘要中的 <REPORT>(JSON/YAML),失败再回退主输出解析
561
+ summary_items: Optional[List[Dict]] = None
562
+ summary_text = summary_container.get("text", "")
563
+ if summary_text:
564
+ rep = _try_parse_summary_report(summary_text)
565
+ if rep is None:
566
+ # 兼容:若摘要直接输出 JSON,则尝试旧解析
567
+ rep = _try_parse_summary_json(summary_text)
568
+ if isinstance(rep, dict):
569
+ items = rep.get("issues")
570
+ if isinstance(items, list):
571
+ summary_items = items
572
+
573
+ if isinstance(summary_items, list):
574
+ for it in summary_items:
575
+ it.setdefault("language", cand.get("language"))
576
+ it.setdefault("file", cand.get("file"))
577
+ it.setdefault("line", cand.get("line"))
578
+ if not summary_items:
579
+ try:
580
+ print(f"[JARVIS-SEC] no-issue {idx}/{total}: {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
581
+ except Exception:
582
+ pass
583
+ else:
584
+ all_issues.extend(summary_items)
585
+ try:
586
+ print(f"[JARVIS-SEC] issues-found {idx}/{total}: count={len(summary_items)} -> append report (summary)")
587
+ except Exception:
588
+ pass
589
+ _append_report(summary_items, "summary", task_id, cand)
590
+ # 写入进度:任务结束(done)
591
+ _progress_append(
592
+ {
593
+ "event": "task_status",
594
+ "status": "done",
595
+ "task_id": task_id,
596
+ "idx": idx,
597
+ "total": total,
598
+ "candidate_signature": cand_sig,
599
+ "candidate": cand,
600
+ "issues_count": len(summary_items) if isinstance(summary_items, list) else 0,
601
+ "workspace_restore": workspace_restore_info,
602
+ }
603
+ )
604
+ continue # 已通过摘要处理,进入下一条
605
+
606
+ # 摘要不可解析时,禁止回退解析主输出;直接记录失败并进入下一条
607
+ try:
608
+ print(f"[JARVIS-SEC] parse-fail {idx}/{total} (no <REPORT> in summary): {cand.get('file')}:{cand.get('line')} ({cand.get('language')})")
609
+ except Exception:
610
+ pass
611
+ # 写入进度:任务结束(done,解析失败视为0问题)
612
+ _progress_append(
613
+ {
614
+ "event": "task_status",
615
+ "status": "done",
616
+ "task_id": task_id,
617
+ "idx": idx,
618
+ "total": total,
619
+ "candidate_signature": cand_sig,
620
+ "candidate": cand,
621
+ "issues_count": 0,
622
+ "parse_fail": True,
623
+ "workspace_restore": workspace_restore_info,
624
+ }
625
+ )
626
+ continue
627
+ # 4) 使用统一聚合器生成最终报告(JSON + Markdown)
628
+ from jarvis.jarvis_sec.report import build_json_and_markdown
629
+ return build_json_and_markdown(
630
+ all_issues,
631
+ scanned_root=summary.get("scanned_root"),
632
+ scanned_files=summary.get("scanned_files"),
633
+ meta=meta_records or None,
634
+ )
635
+
636
+
637
+ def _try_parse_summary_report(text: str) -> Optional[Dict]:
638
+ """
639
+ 从摘要文本中提取 <REPORT>...</REPORT> 内容,并解析为 dict(支持 JSON 或 YAML)。
640
+ - 若提取/解析失败返回 None
641
+ - YAML 解析采用安全模式,若环境无 PyYAML 则忽略
642
+ """
643
+ import re
644
+ import json as _json
645
+ start = text.find("<REPORT>")
646
+ end = text.find("</REPORT>")
647
+ if start == -1 or end == -1 or end <= start:
648
+ return None
649
+ content = text[start + len("<REPORT>"):end].strip()
650
+ # 优先 JSON
651
+ try:
652
+ data = _json.loads(content)
653
+ if isinstance(data, dict):
654
+ return data
655
+ except Exception:
656
+ pass
657
+ # 回退 YAML
658
+ try:
659
+ import yaml as _yaml # type: ignore
660
+ data = _yaml.safe_load(content)
661
+ if isinstance(data, dict):
662
+ return data
663
+ except Exception:
664
+ pass
665
+ return None
666
+
667
+
668
+ __all__ = [
669
+ "create_security_multi_agent",
670
+ "run_security_analysis",
671
+ "run_security_analysis_fast",
672
+ "direct_scan",
673
+ "run_with_multi_agent",
674
+ ]