jarvis-ai-assistant 0.3.30__py3-none-any.whl → 0.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +458 -152
  3. jarvis/jarvis_agent/agent_manager.py +17 -13
  4. jarvis/jarvis_agent/builtin_input_handler.py +2 -6
  5. jarvis/jarvis_agent/config_editor.py +2 -7
  6. jarvis/jarvis_agent/event_bus.py +82 -12
  7. jarvis/jarvis_agent/file_context_handler.py +329 -0
  8. jarvis/jarvis_agent/file_methodology_manager.py +3 -4
  9. jarvis/jarvis_agent/jarvis.py +628 -55
  10. jarvis/jarvis_agent/language_extractors/__init__.py +57 -0
  11. jarvis/jarvis_agent/language_extractors/c_extractor.py +21 -0
  12. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +21 -0
  13. jarvis/jarvis_agent/language_extractors/go_extractor.py +21 -0
  14. jarvis/jarvis_agent/language_extractors/java_extractor.py +84 -0
  15. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +79 -0
  16. jarvis/jarvis_agent/language_extractors/python_extractor.py +21 -0
  17. jarvis/jarvis_agent/language_extractors/rust_extractor.py +21 -0
  18. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +84 -0
  19. jarvis/jarvis_agent/language_support_info.py +486 -0
  20. jarvis/jarvis_agent/main.py +34 -10
  21. jarvis/jarvis_agent/memory_manager.py +7 -16
  22. jarvis/jarvis_agent/methodology_share_manager.py +10 -16
  23. jarvis/jarvis_agent/prompt_manager.py +1 -1
  24. jarvis/jarvis_agent/prompts.py +193 -171
  25. jarvis/jarvis_agent/protocols.py +8 -12
  26. jarvis/jarvis_agent/run_loop.py +105 -9
  27. jarvis/jarvis_agent/session_manager.py +2 -3
  28. jarvis/jarvis_agent/share_manager.py +20 -22
  29. jarvis/jarvis_agent/shell_input_handler.py +1 -2
  30. jarvis/jarvis_agent/stdio_redirect.py +295 -0
  31. jarvis/jarvis_agent/task_analyzer.py +31 -6
  32. jarvis/jarvis_agent/task_manager.py +11 -27
  33. jarvis/jarvis_agent/tool_executor.py +2 -3
  34. jarvis/jarvis_agent/tool_share_manager.py +12 -24
  35. jarvis/jarvis_agent/utils.py +5 -1
  36. jarvis/jarvis_agent/web_bridge.py +189 -0
  37. jarvis/jarvis_agent/web_output_sink.py +53 -0
  38. jarvis/jarvis_agent/web_server.py +786 -0
  39. jarvis/jarvis_c2rust/__init__.py +26 -0
  40. jarvis/jarvis_c2rust/cli.py +575 -0
  41. jarvis/jarvis_c2rust/collector.py +250 -0
  42. jarvis/jarvis_c2rust/constants.py +26 -0
  43. jarvis/jarvis_c2rust/library_replacer.py +1254 -0
  44. jarvis/jarvis_c2rust/llm_module_agent.py +1272 -0
  45. jarvis/jarvis_c2rust/loaders.py +207 -0
  46. jarvis/jarvis_c2rust/models.py +28 -0
  47. jarvis/jarvis_c2rust/optimizer.py +2157 -0
  48. jarvis/jarvis_c2rust/scanner.py +1681 -0
  49. jarvis/jarvis_c2rust/transpiler.py +2983 -0
  50. jarvis/jarvis_c2rust/utils.py +385 -0
  51. jarvis/jarvis_code_agent/build_validation_config.py +132 -0
  52. jarvis/jarvis_code_agent/code_agent.py +1371 -220
  53. jarvis/jarvis_code_agent/code_analyzer/__init__.py +65 -0
  54. jarvis/jarvis_code_agent/code_analyzer/base_language.py +74 -0
  55. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +44 -0
  56. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +106 -0
  57. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +74 -0
  58. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +125 -0
  59. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +72 -0
  60. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +70 -0
  61. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +53 -0
  62. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +47 -0
  63. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +61 -0
  64. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +110 -0
  65. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +154 -0
  66. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +110 -0
  67. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +153 -0
  68. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +43 -0
  69. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +648 -0
  70. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +18 -0
  71. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +132 -0
  72. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +330 -0
  73. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +781 -0
  74. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +185 -0
  75. jarvis/jarvis_code_agent/code_analyzer/language_support.py +110 -0
  76. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +49 -0
  77. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +299 -0
  78. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +215 -0
  79. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +212 -0
  80. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +254 -0
  81. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +269 -0
  82. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +281 -0
  83. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +280 -0
  84. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +605 -0
  85. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +556 -0
  86. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +252 -0
  87. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +58 -0
  88. jarvis/jarvis_code_agent/lint.py +501 -8
  89. jarvis/jarvis_code_agent/utils.py +141 -0
  90. jarvis/jarvis_code_analysis/code_review.py +493 -584
  91. jarvis/jarvis_data/config_schema.json +128 -12
  92. jarvis/jarvis_git_squash/main.py +4 -5
  93. jarvis/jarvis_git_utils/git_commiter.py +82 -75
  94. jarvis/jarvis_mcp/sse_mcp_client.py +22 -29
  95. jarvis/jarvis_mcp/stdio_mcp_client.py +12 -13
  96. jarvis/jarvis_mcp/streamable_mcp_client.py +15 -14
  97. jarvis/jarvis_memory_organizer/memory_organizer.py +55 -74
  98. jarvis/jarvis_methodology/main.py +32 -48
  99. jarvis/jarvis_multi_agent/__init__.py +287 -55
  100. jarvis/jarvis_multi_agent/main.py +36 -4
  101. jarvis/jarvis_platform/base.py +524 -202
  102. jarvis/jarvis_platform/human.py +7 -8
  103. jarvis/jarvis_platform/kimi.py +30 -36
  104. jarvis/jarvis_platform/openai.py +88 -25
  105. jarvis/jarvis_platform/registry.py +26 -10
  106. jarvis/jarvis_platform/tongyi.py +24 -25
  107. jarvis/jarvis_platform/yuanbao.py +32 -43
  108. jarvis/jarvis_platform_manager/main.py +66 -77
  109. jarvis/jarvis_platform_manager/service.py +8 -13
  110. jarvis/jarvis_rag/cli.py +53 -55
  111. jarvis/jarvis_rag/embedding_manager.py +13 -18
  112. jarvis/jarvis_rag/llm_interface.py +8 -9
  113. jarvis/jarvis_rag/query_rewriter.py +10 -21
  114. jarvis/jarvis_rag/rag_pipeline.py +24 -27
  115. jarvis/jarvis_rag/reranker.py +4 -5
  116. jarvis/jarvis_rag/retriever.py +28 -30
  117. jarvis/jarvis_sec/__init__.py +305 -0
  118. jarvis/jarvis_sec/agents.py +143 -0
  119. jarvis/jarvis_sec/analysis.py +276 -0
  120. jarvis/jarvis_sec/checkers/__init__.py +32 -0
  121. jarvis/jarvis_sec/checkers/c_checker.py +2680 -0
  122. jarvis/jarvis_sec/checkers/rust_checker.py +1108 -0
  123. jarvis/jarvis_sec/cli.py +139 -0
  124. jarvis/jarvis_sec/clustering.py +1439 -0
  125. jarvis/jarvis_sec/file_manager.py +427 -0
  126. jarvis/jarvis_sec/parsers.py +73 -0
  127. jarvis/jarvis_sec/prompts.py +268 -0
  128. jarvis/jarvis_sec/report.py +336 -0
  129. jarvis/jarvis_sec/review.py +453 -0
  130. jarvis/jarvis_sec/status.py +264 -0
  131. jarvis/jarvis_sec/types.py +20 -0
  132. jarvis/jarvis_sec/utils.py +499 -0
  133. jarvis/jarvis_sec/verification.py +848 -0
  134. jarvis/jarvis_sec/workflow.py +226 -0
  135. jarvis/jarvis_smart_shell/main.py +38 -87
  136. jarvis/jarvis_stats/cli.py +2 -2
  137. jarvis/jarvis_stats/stats.py +8 -8
  138. jarvis/jarvis_stats/storage.py +15 -21
  139. jarvis/jarvis_stats/visualizer.py +1 -1
  140. jarvis/jarvis_tools/clear_memory.py +3 -20
  141. jarvis/jarvis_tools/cli/main.py +21 -23
  142. jarvis/jarvis_tools/edit_file.py +1019 -132
  143. jarvis/jarvis_tools/execute_script.py +83 -25
  144. jarvis/jarvis_tools/file_analyzer.py +6 -9
  145. jarvis/jarvis_tools/generate_new_tool.py +14 -21
  146. jarvis/jarvis_tools/lsp_client.py +1552 -0
  147. jarvis/jarvis_tools/methodology.py +2 -3
  148. jarvis/jarvis_tools/read_code.py +1736 -35
  149. jarvis/jarvis_tools/read_symbols.py +140 -0
  150. jarvis/jarvis_tools/read_webpage.py +12 -13
  151. jarvis/jarvis_tools/registry.py +427 -200
  152. jarvis/jarvis_tools/retrieve_memory.py +20 -19
  153. jarvis/jarvis_tools/rewrite_file.py +72 -158
  154. jarvis/jarvis_tools/save_memory.py +3 -15
  155. jarvis/jarvis_tools/search_web.py +18 -18
  156. jarvis/jarvis_tools/sub_agent.py +36 -43
  157. jarvis/jarvis_tools/sub_code_agent.py +25 -26
  158. jarvis/jarvis_tools/virtual_tty.py +55 -33
  159. jarvis/jarvis_utils/clipboard.py +7 -10
  160. jarvis/jarvis_utils/config.py +232 -45
  161. jarvis/jarvis_utils/embedding.py +8 -5
  162. jarvis/jarvis_utils/fzf.py +8 -8
  163. jarvis/jarvis_utils/git_utils.py +225 -36
  164. jarvis/jarvis_utils/globals.py +3 -3
  165. jarvis/jarvis_utils/http.py +1 -1
  166. jarvis/jarvis_utils/input.py +99 -48
  167. jarvis/jarvis_utils/jsonnet_compat.py +465 -0
  168. jarvis/jarvis_utils/methodology.py +52 -48
  169. jarvis/jarvis_utils/utils.py +819 -491
  170. jarvis_ai_assistant-0.7.6.dist-info/METADATA +600 -0
  171. jarvis_ai_assistant-0.7.6.dist-info/RECORD +218 -0
  172. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/entry_points.txt +4 -0
  173. jarvis/jarvis_agent/config.py +0 -92
  174. jarvis/jarvis_agent/edit_file_handler.py +0 -296
  175. jarvis/jarvis_platform/ai8.py +0 -332
  176. jarvis/jarvis_tools/ask_user.py +0 -54
  177. jarvis_ai_assistant-0.3.30.dist-info/METADATA +0 -381
  178. jarvis_ai_assistant-0.3.30.dist-info/RECORD +0 -137
  179. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/WHEEL +0 -0
  180. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/licenses/LICENSE +0 -0
  181. {jarvis_ai_assistant-0.3.30.dist-info → jarvis_ai_assistant-0.7.6.dist-info}/top_level.txt +0 -0
@@ -10,7 +10,6 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
10
  from rank_bm25 import BM25Okapi # type: ignore
11
11
 
12
12
  from .embedding_manager import EmbeddingManager
13
- from jarvis.jarvis_utils.output import OutputType, PrettyOutput
14
13
 
15
14
 
16
15
  class ChromaRetriever:
@@ -42,9 +41,8 @@ class ChromaRetriever:
42
41
  self.collection = self.client.get_or_create_collection(
43
42
  name=self.collection_name
44
43
  )
45
- PrettyOutput.print(
46
- f"ChromaDB 客户端已在 '{db_path}' 初始化,集合为 '{collection_name}'。",
47
- OutputType.SUCCESS,
44
+ print(
45
+ f"ChromaDB 客户端已在 '{db_path}' 初始化,集合为 '{collection_name}'。"
48
46
  )
49
47
 
50
48
  # BM25索引设置
@@ -58,15 +56,15 @@ class ChromaRetriever:
58
56
  def _load_or_initialize_bm25(self):
59
57
  """从磁盘加载BM25索引或初始化一个新索引。"""
60
58
  if os.path.exists(self.bm25_index_path):
61
- PrettyOutput.print("正在加载现有的 BM25 索引...", OutputType.INFO)
59
+ print("ℹ️ 正在加载现有的 BM25 索引...")
62
60
  with open(self.bm25_index_path, "rb") as f:
63
61
  data = pickle.load(f)
64
62
  self.bm25_corpus = data["corpus"]
65
63
  self.bm25_index = BM25Okapi(self.bm25_corpus)
66
- PrettyOutput.print("BM25 索引加载成功。", OutputType.SUCCESS)
64
+ print("BM25 索引加载成功。")
67
65
  else:
68
- PrettyOutput.print(
69
- "未找到 BM25 索引,将初始化一个新的。", OutputType.WARNING
66
+ print(
67
+ "⚠️ 未找到 BM25 索引,将初始化一个新的。"
70
68
  )
71
69
  self.bm25_corpus = []
72
70
  self.bm25_index = None
@@ -74,10 +72,10 @@ class ChromaRetriever:
74
72
  def _save_bm25_index(self):
75
73
  """将BM25索引保存到磁盘。"""
76
74
  if self.bm25_index:
77
- PrettyOutput.print("正在保存 BM25 索引...", OutputType.INFO)
75
+ print("ℹ️ 正在保存 BM25 索引...")
78
76
  with open(self.bm25_index_path, "wb") as f:
79
77
  pickle.dump({"corpus": self.bm25_corpus, "index": self.bm25_index}, f)
80
- PrettyOutput.print("BM25 索引保存成功。", OutputType.SUCCESS)
78
+ print("BM25 索引保存成功。")
81
79
 
82
80
  def _load_manifest(self) -> Dict[str, Dict[str, Any]]:
83
81
  """加载已索引文件清单,用于变更检测。"""
@@ -97,7 +95,7 @@ class ChromaRetriever:
97
95
  with open(self.manifest_path, "w", encoding="utf-8") as f:
98
96
  json.dump(manifest, f, ensure_ascii=False, indent=2)
99
97
  except Exception as e:
100
- PrettyOutput.print(f"保存索引清单失败: {e}", OutputType.WARNING)
98
+ print(f"⚠️ 保存索引清单失败: {e}")
101
99
 
102
100
  def _compute_md5(
103
101
  self, file_path: str, chunk_size: int = 1024 * 1024
@@ -136,8 +134,8 @@ class ChromaRetriever:
136
134
  continue
137
135
  if updated > 0:
138
136
  self._save_manifest(manifest)
139
- PrettyOutput.print(
140
- f"已更新索引清单,记录 {updated} 个源文件状态。", OutputType.INFO
137
+ print(
138
+ f"ℹ️ 已更新索引清单,记录 {updated} 个源文件状态。"
141
139
  )
142
140
 
143
141
  def _detect_changed_or_deleted(self) -> Dict[str, List[str]]:
@@ -203,7 +201,8 @@ class ChromaRetriever:
203
201
  lines.append(
204
202
  "提示:请使用 'jarvis-rag add <路径>' 重新索引相关文件,以更新向量库与BM25索引。"
205
203
  )
206
- PrettyOutput.print("\n".join(lines), OutputType.WARNING)
204
+ joined_lines = '\n'.join(lines)
205
+ print(f"⚠️ {joined_lines}")
207
206
 
208
207
  def detect_index_changes(self) -> Dict[str, List[str]]:
209
208
  """
@@ -225,8 +224,8 @@ class ChromaRetriever:
225
224
  removed += 1
226
225
  if removed > 0:
227
226
  self._save_manifest(manifest)
228
- PrettyOutput.print(
229
- f"已从索引清单中移除 {removed} 个已删除的源文件记录。", OutputType.INFO
227
+ print(
228
+ f"ℹ️ 已从索引清单中移除 {removed} 个已删除的源文件记录。"
230
229
  )
231
230
 
232
231
  def update_index_for_changes(self, changed: List[str], deleted: List[str]) -> None:
@@ -254,7 +253,8 @@ class ChromaRetriever:
254
253
  except Exception as e:
255
254
  delete_errors.append(f"删除源 '{src}' 时出错: {e}")
256
255
  if delete_errors:
257
- PrettyOutput.print("\n".join(delete_errors), OutputType.WARNING)
256
+ joined_errors = '\n'.join(delete_errors)
257
+ print(f"⚠️ {joined_errors}")
258
258
 
259
259
  # 再处理变更(重建)
260
260
  docs_to_add: List[Document] = []
@@ -275,14 +275,15 @@ class ChromaRetriever:
275
275
  except Exception as e:
276
276
  rebuild_errors.append(f"重建源 '{src}' 内容时出错: {e}")
277
277
  if rebuild_errors:
278
- PrettyOutput.print("\n".join(rebuild_errors), OutputType.WARNING)
278
+ joined_errors = '\n'.join(rebuild_errors)
279
+ print(f"⚠️ {joined_errors}")
279
280
 
280
281
  if docs_to_add:
281
282
  try:
282
283
  # 复用现有拆分与嵌入逻辑
283
284
  self.add_documents(docs_to_add)
284
285
  except Exception as e:
285
- PrettyOutput.print(f"添加变更文档到索引时出错: {e}", OutputType.ERROR)
286
+ print(f"添加变更文档到索引时出错: {e}")
286
287
 
287
288
  # 重建BM25索引,确保删除后的语料被清理
288
289
  try:
@@ -292,7 +293,7 @@ class ChromaRetriever:
292
293
  self.bm25_index = BM25Okapi(self.bm25_corpus) if self.bm25_corpus else None
293
294
  self._save_bm25_index()
294
295
  except Exception as e:
295
- PrettyOutput.print(f"重建BM25索引失败: {e}", OutputType.WARNING)
296
+ print(f"⚠️ 重建BM25索引失败: {e}")
296
297
 
297
298
  # 更新manifest:变更文件更新状态;删除文件从清单中移除
298
299
  try:
@@ -301,11 +302,10 @@ class ChromaRetriever:
301
302
  if deleted:
302
303
  self._remove_sources_from_manifest(deleted)
303
304
  except Exception as e:
304
- PrettyOutput.print(f"更新索引清单时出错: {e}", OutputType.WARNING)
305
+ print(f"⚠️ 更新索引清单时出错: {e}")
305
306
 
306
- PrettyOutput.print(
307
- f"索引已更新:变更 {len(changed)} 个,删除 {len(deleted)} 个。",
308
- OutputType.SUCCESS,
307
+ print(
308
+ f"索引已更新:变更 {len(changed)} 个,删除 {len(deleted)} 个。"
309
309
  )
310
310
 
311
311
  def add_documents(
@@ -319,9 +319,8 @@ class ChromaRetriever:
319
319
  )
320
320
  chunks = text_splitter.split_documents(documents)
321
321
 
322
- PrettyOutput.print(
323
- f"已将 {len(documents)} 个文档拆分为 {len(chunks)} 个块。",
324
- OutputType.INFO,
322
+ print(
323
+ f"ℹ️ 已将 {len(documents)} 个文档拆分为 {len(chunks)} 个块。"
325
324
  )
326
325
 
327
326
  if not chunks:
@@ -341,9 +340,8 @@ class ChromaRetriever:
341
340
  documents=chunk_texts,
342
341
  metadatas=cast(Any, metadatas),
343
342
  )
344
- PrettyOutput.print(
345
- f"成功将 {len(chunks)} 个块添加到 ChromaDB 集合中。",
346
- OutputType.SUCCESS,
343
+ print(
344
+ f"成功将 {len(chunks)} 个块添加到 ChromaDB 集合中。"
347
345
  )
348
346
 
349
347
  # 更新并保存BM25索引
@@ -0,0 +1,305 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Jarvis 安全分析套件
4
+
5
+ 当前版本概述:
6
+ - 关键路径:直扫(direct_scan)→ 单Agent逐条验证(只读工具:read_code/execute_script)→ 聚合输出(JSON + Markdown)
7
+ - 目标范围:内存管理、缓冲区操作、错误处理等基础安全问题识别
8
+ - 约束:不修改核心框架文件,保持最小侵入;严格只读分析
9
+
10
+ 集成方式:
11
+ - 复用 jarvis.jarvis_agent.Agent 与工具注册系统(jarvis.jarvis_tools.registry.ToolRegistry)
12
+ - 提供入口:
13
+ - run_security_analysis(entry_path, ...):直扫 + 单Agent逐条验证 + 聚合
14
+
15
+ - workflow.direct_scan(entry_path, ...):仅启发式直扫
16
+
17
+ 说明:
18
+ - 已移除 MultiAgent 编排与相关提示词;不存在"阶段一"等表述
19
+ - 模块化重构:将功能拆分为多个模块(prompts, parsers, utils, agents, clustering, analysis, verification, review)
20
+ """
21
+
22
+ from typing import Dict, List, Optional
23
+
24
+ import typer
25
+
26
+ from jarvis.jarvis_agent import Agent # noqa: F401
27
+ from jarvis.jarvis_sec.workflow import direct_scan, run_with_agent
28
+
29
+ # 导入模块化后的函数(用于触发模块加载)
30
+ from jarvis.jarvis_sec.prompts import ( # noqa: F401
31
+ build_summary_prompt as _build_summary_prompt,
32
+ build_verification_summary_prompt as _build_verification_summary_prompt,
33
+ get_review_system_prompt as _get_review_system_prompt,
34
+ get_review_summary_prompt as _get_review_summary_prompt,
35
+ get_cluster_system_prompt as _get_cluster_system_prompt,
36
+ get_cluster_summary_prompt as _get_cluster_summary_prompt,
37
+ )
38
+ from jarvis.jarvis_sec.parsers import ( # noqa: F401
39
+ parse_clusters_from_text as _parse_clusters_from_text,
40
+ try_parse_summary_report as _try_parse_summary_report,
41
+ )
42
+ from jarvis.jarvis_sec.utils import ( # noqa: F401
43
+ git_restore_if_dirty as _git_restore_if_dirty,
44
+ get_sec_dir as _get_sec_dir,
45
+ initialize_analysis_context as _initialize_analysis_context,
46
+ load_or_run_heuristic_scan as _load_or_run_heuristic_scan,
47
+ compact_candidate as _compact_candidate,
48
+ prepare_candidates as _prepare_candidates,
49
+ group_candidates_by_file as _group_candidates_by_file,
50
+ create_report_writer as _create_report_writer,
51
+ sig_of as _sig_of,
52
+ load_processed_gids_from_issues as _load_processed_gids_from_issues,
53
+ count_issues_from_file as _count_issues_from_file,
54
+ load_all_issues_from_file as _load_all_issues_from_file,
55
+ load_processed_gids_from_agent_issues as _load_processed_gids_from_agent_issues,
56
+ )
57
+ from jarvis.jarvis_sec.agents import ( # noqa: F401
58
+ subscribe_summary_event as _subscribe_summary_event,
59
+ create_analysis_agent as _create_analysis_agent,
60
+ create_review_agent as _create_review_agent,
61
+ create_cluster_agent as _create_cluster_agent,
62
+ )
63
+ from jarvis.jarvis_sec.clustering import ( # noqa: F401
64
+ load_existing_clusters as _load_existing_clusters,
65
+ restore_clusters_from_checkpoint as _restore_clusters_from_checkpoint,
66
+ create_cluster_snapshot_writer as _create_cluster_snapshot_writer,
67
+ collect_candidate_gids as _collect_candidate_gids,
68
+ collect_clustered_gids as _collect_clustered_gids,
69
+ # supplement_missing_gids_for_clustering已移除,不再需要
70
+ handle_single_alert_file as _handle_single_alert_file,
71
+ validate_cluster_format as _validate_cluster_format,
72
+ extract_classified_gids as _extract_classified_gids,
73
+ build_cluster_retry_task as _build_cluster_retry_task,
74
+ build_cluster_error_guidance as _build_cluster_error_guidance,
75
+ run_cluster_agent_direct_model as _run_cluster_agent_direct_model,
76
+ validate_cluster_result as _validate_cluster_result,
77
+ check_cluster_completeness as _check_cluster_completeness,
78
+ run_cluster_agent_with_retry as _run_cluster_agent_with_retry,
79
+ process_cluster_results as _process_cluster_results,
80
+ supplement_missing_gids as _supplement_missing_gids,
81
+ build_cluster_task as _build_cluster_task,
82
+ extract_input_gids as _extract_input_gids,
83
+ build_gid_to_item_mapping as _build_gid_to_item_mapping,
84
+ process_cluster_chunk as _process_cluster_chunk,
85
+ filter_pending_items as _filter_pending_items,
86
+ process_file_clustering as _process_file_clustering,
87
+ # check_and_supplement_missing_gids已移除,完整性检查已移至process_clustering_phase中
88
+ initialize_clustering_context as _initialize_clustering_context,
89
+ check_unclustered_gids as _check_unclustered_gids,
90
+ execute_clustering_for_files as _execute_clustering_for_files,
91
+ record_clustering_completion as _record_clustering_completion,
92
+ fallback_to_file_based_batches as _fallback_to_file_based_batches,
93
+ process_clustering_phase as _process_clustering_phase,
94
+ )
95
+ from jarvis.jarvis_sec.review import ( # noqa: F401
96
+ build_review_task as _build_review_task,
97
+ process_review_batch_items as _process_review_batch_items,
98
+ reinstated_candidates_to_cluster_batches as _reinstated_candidates_to_cluster_batches,
99
+ process_review_phase as _process_review_phase,
100
+ build_gid_to_review_mapping as _build_gid_to_review_mapping,
101
+ process_review_batch as _process_review_batch,
102
+ run_review_agent_with_retry as _run_review_agent_with_retry,
103
+ is_valid_review_item as _is_valid_review_item,
104
+ )
105
+ from jarvis.jarvis_sec.analysis import ( # noqa: F401
106
+ build_analysis_task_context as _build_analysis_task_context,
107
+ build_validation_error_guidance as _build_validation_error_guidance,
108
+ run_analysis_agent_with_retry as _run_analysis_agent_with_retry,
109
+ expand_and_filter_analysis_results as _expand_and_filter_analysis_results,
110
+ valid_items as _valid_items,
111
+ )
112
+ from jarvis.jarvis_sec.verification import ( # noqa: F401
113
+ build_gid_to_verification_mapping as _build_gid_to_verification_mapping,
114
+ merge_verified_items as _merge_verified_items,
115
+ merge_verified_items_without_verification as _merge_verified_items_without_verification,
116
+ process_verification_batch as _process_verification_batch,
117
+ is_valid_verification_item as _is_valid_verification_item,
118
+ run_verification_agent_with_retry as _run_verification_agent_with_retry,
119
+ process_verification_phase as _process_verification_phase,
120
+ )
121
+
122
+
123
+ # 注:当前版本不使用 MultiAgent 编排,已移除默认多智能体配置与创建函数。
124
+ # 请使用 run_security_analysis(单Agent逐条验证)或 workflow.direct_scan + format_markdown_report(直扫基线)。
125
+ # 注意:部分函数已迁移到模块化文件中(prompts.py, parsers.py, utils.py, agents.py, clustering.py, analysis.py, verification.py, review.py),
126
+ # 本文件中保留了这些函数的别名导入,以便向后兼容。
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+ def run_security_analysis(
137
+ entry_path: str,
138
+ languages: Optional[List[str]] = None,
139
+ llm_group: Optional[str] = None,
140
+ report_file: Optional[str] = None,
141
+ cluster_limit: int = 50,
142
+ exclude_dirs: Optional[List[str]] = None,
143
+ enable_verification: bool = True,
144
+ force_save_memory: bool = False,
145
+ output_file: Optional[str] = None,
146
+ ) -> str:
147
+ """
148
+ 运行安全分析工作流(混合模式)。
149
+
150
+ 改进:
151
+ - 即使在 agent 模式下,也先进行本地正则/启发式直扫,生成候选问题;
152
+ 然后将候选问题拆分为子任务,交由多Agent进行深入分析与聚合。
153
+
154
+ 注意:此函数会在发生异常时更新状态文件为 error 状态。
155
+
156
+ 参数:
157
+ - entry_path: 待分析的根目录路径
158
+ - languages: 限定扫描的语言扩展(例如 ["c", "cpp", "h", "hpp", "rs"]),为空则使用默认
159
+
160
+ 返回:
161
+ - 最终报告(字符串),由 Aggregator 生成(JSON + Markdown)
162
+
163
+ 其他:
164
+ - llm_group: 模型组名称(仅在当前调用链内生效,不覆盖全局配置),将直接传入 Agent 用于选择模型
165
+ - report_file: 增量报告文件路径(JSONL)。当每个子任务检测到 issues 时,立即将一条记录追加到该文件;
166
+ 若未指定,则默认写入 entry_path/.jarvis/sec/agent_issues.jsonl
167
+ - cluster_limit: 聚类时每批次最多处理的告警数(默认 50),当单个文件告警过多时按批次进行聚类
168
+ - exclude_dirs: 要排除的目录列表(可选),默认已包含测试目录(test, tests, __tests__, spec, testsuite, testdata)
169
+ - enable_verification: 是否启用二次验证(默认 True),关闭后分析Agent确认的问题将直接写入报告
170
+ - 断点续扫: 默认开启。会基于 .jarvis/sec/candidates.jsonl、clusters.jsonl 和 analysis.jsonl 文件进行状态恢复。
171
+ """
172
+
173
+ langs = languages or ["c", "cpp", "h", "hpp", "rs"]
174
+
175
+ # 状态管理器(不再使用 status.json,使用空对象)
176
+ class DummyStatusManager:
177
+ def update_pre_scan(self, **kwargs): pass
178
+ def update_clustering(self, **kwargs): pass
179
+ def update_review(self, **kwargs): pass
180
+ def update_verification(self, **kwargs): pass
181
+ def mark_completed(self, **kwargs): pass
182
+ def mark_error(self, **kwargs): pass
183
+
184
+ status_mgr = DummyStatusManager()
185
+
186
+ # 初始化分析上下文
187
+ sec_dir, progress_path, _progress_append = _initialize_analysis_context(
188
+ entry_path, status_mgr
189
+ )
190
+
191
+ # 1) 启发式扫描(支持断点续扫)
192
+ candidates, summary = _load_or_run_heuristic_scan(
193
+ entry_path, langs, exclude_dirs, sec_dir, status_mgr, _progress_append
194
+ )
195
+
196
+ # 2) 将候选问题精简为子任务清单,控制上下文长度
197
+ compact_candidates = _prepare_candidates(candidates)
198
+
199
+ # 3) 保存候选到新的 candidates.jsonl 文件(包含gid)
200
+ from jarvis.jarvis_sec.file_manager import save_candidates, get_candidates_file
201
+ try:
202
+ save_candidates(sec_dir, compact_candidates)
203
+ _progress_append({
204
+ "event": "candidates_saved",
205
+ "path": str(get_candidates_file(sec_dir)),
206
+ "issues_count": len(compact_candidates),
207
+ })
208
+ except Exception:
209
+ pass
210
+
211
+ # 记录批次选择信息(可选,用于日志)
212
+ try:
213
+ groups = _group_candidates_by_file(compact_candidates)
214
+ if groups:
215
+ selected_file, items = max(groups.items(), key=lambda kv: len(kv[1]))
216
+ try:
217
+ typer.secho(f"[jarvis-sec] 批次选择: 文件={selected_file} 数量={len(items)}", fg=typer.colors.BLUE)
218
+ except Exception:
219
+ pass
220
+ _progress_append({
221
+ "event": "batch_selection",
222
+ "selected_file": selected_file,
223
+ "selected_count": len(items),
224
+ "total_in_file": len(items),
225
+ })
226
+ except Exception:
227
+ pass
228
+
229
+ # 创建报告写入函数
230
+ _append_report = _create_report_writer(sec_dir, report_file)
231
+
232
+ # 3) 处理聚类阶段
233
+ cluster_batches, invalid_clusters_for_review = _process_clustering_phase(
234
+ compact_candidates,
235
+ entry_path,
236
+ langs,
237
+ cluster_limit,
238
+ llm_group,
239
+ sec_dir,
240
+ status_mgr,
241
+ _progress_append,
242
+ force_save_memory=force_save_memory,
243
+ )
244
+
245
+ # 4) 处理验证阶段
246
+ meta_records: List[Dict] = []
247
+ all_issues = _process_verification_phase(
248
+ cluster_batches,
249
+ entry_path,
250
+ langs,
251
+ llm_group,
252
+ sec_dir,
253
+ status_mgr,
254
+ _progress_append,
255
+ _append_report,
256
+ enable_verification=enable_verification,
257
+ force_save_memory=force_save_memory,
258
+ )
259
+
260
+ # 5) 使用统一聚合器生成最终报告(JSON + Markdown)
261
+ try:
262
+ from jarvis.jarvis_sec.report import build_json_and_markdown
263
+ result = build_json_and_markdown(
264
+ all_issues,
265
+ scanned_root=summary.get("scanned_root"),
266
+ scanned_files=summary.get("scanned_files"),
267
+ meta=meta_records or None,
268
+ output_file=output_file,
269
+ )
270
+ # 标记分析完成
271
+ status_mgr.mark_completed(
272
+ total_issues=len(all_issues),
273
+ message=f"安全分析完成,共发现 {len(all_issues)} 个问题"
274
+ )
275
+ return result
276
+ except Exception as e:
277
+ # 发生错误时更新状态
278
+ error_msg = str(e)
279
+ status_mgr.mark_error(
280
+ error_message=error_msg,
281
+ error_type=type(e).__name__
282
+ )
283
+ raise
284
+ finally:
285
+ # 清理LSP客户端资源,防止文件句柄泄露
286
+ try:
287
+ from jarvis.jarvis_tools.lsp_client import LSPClientTool
288
+ LSPClientTool.cleanup_all_clients()
289
+ except Exception:
290
+ pass # 清理失败不影响主流程
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+ __all__ = [
300
+
301
+ "run_security_analysis",
302
+
303
+ "direct_scan",
304
+ "run_with_agent",
305
+ ]
@@ -0,0 +1,143 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Agent创建和订阅模块"""
3
+
4
+ from typing import Dict, Optional
5
+ from jarvis.jarvis_agent import Agent
6
+ from jarvis.jarvis_tools.registry import ToolRegistry
7
+ from jarvis.jarvis_sec.prompts import (
8
+ build_summary_prompt,
9
+ get_review_system_prompt,
10
+ get_review_summary_prompt,
11
+ get_cluster_system_prompt,
12
+ get_cluster_summary_prompt,
13
+ )
14
+
15
+
16
+ def subscribe_summary_event(agent: Agent) -> Dict[str, str]:
17
+ """订阅Agent摘要事件"""
18
+ summary_container: Dict[str, str] = {"text": ""}
19
+ try:
20
+ from jarvis.jarvis_agent.events import AFTER_SUMMARY as _AFTER_SUMMARY
21
+ except Exception:
22
+ _AFTER_SUMMARY = None
23
+
24
+ if _AFTER_SUMMARY:
25
+ def _on_after_summary(**kwargs):
26
+ try:
27
+ summary_container["text"] = str(kwargs.get("summary", "") or "")
28
+ except Exception:
29
+ summary_container["text"] = ""
30
+ try:
31
+ agent.event_bus.subscribe(_AFTER_SUMMARY, _on_after_summary)
32
+ except Exception:
33
+ pass
34
+ return summary_container
35
+
36
+
37
+ def create_analysis_agent(task_id: str, llm_group: Optional[str], force_save_memory: bool = False) -> Agent:
38
+ """创建分析Agent"""
39
+ system_prompt = """
40
+ # 单Agent安全分析约束
41
+ - 你的核心任务是评估代码的安全问题,目标:针对本候选问题进行证据核实、风险评估与修复建议补充,查找漏洞触发路径,确认在某些条件下会触发;以此来判断是否是漏洞。
42
+ - **必须进行调用路径推导**:
43
+ - 对于每个候选问题,必须明确推导从可控输入到缺陷代码的完整调用路径。
44
+ - 调用路径推导必须包括:
45
+ 1. 识别可控输入的来源(例如:用户输入、网络数据、文件读取、命令行参数等)
46
+ 2. 追踪数据流:从输入源开始,逐步追踪数据如何传递到缺陷代码位置
47
+ 3. 识别调用链:明确列出从入口函数到缺陷代码的所有函数调用序列(例如:main() -> parse_input() -> process_data() -> vulnerable_function())
48
+ 4. 分析每个调用点的数据校验情况:检查每个函数是否对输入进行了校验、边界检查或安全检查
49
+ 5. 确认触发条件:明确说明在什么条件下,未校验或恶意输入能够到达缺陷代码位置
50
+ - 如果无法推导出完整的调用路径,或者所有调用路径都有充分的保护措施,则应该判定为误报。
51
+ - 调用路径推导必须在分析过程中明确展示,不能省略或假设。
52
+ - 工具优先:使用 read_code 读取目标文件附近源码(行号前后各 ~50 行),必要时用 execute_script 辅助检索。
53
+ - **调用路径追溯要求**:
54
+ - 必须向上追溯所有可能的调用者,查看完整的调用路径,以确认风险是否真实存在。
55
+ - 使用 read_code 和 execute_script 工具查找函数的调用者(例如:使用 grep 搜索函数名,查找所有调用该函数的位置)。
56
+ - 对于每个调用者,必须检查其是否对输入进行了校验。
57
+ - 如果发现任何调用路径未做校验,必须明确记录该路径。
58
+ - 例如:一个函数存在空指针解引用风险,必须检查所有调用者。如果所有调用者均能确保传入的指针非空,则该风险在当前代码库中可能不会实际触发;但如果存在任何调用者未做校验,则风险真实存在。
59
+ - 若多条告警位于同一文件且行号相距不远,可一次性读取共享上下文,对这些相邻告警进行联合分析与判断;但仍需避免无关扩展与大范围遍历。
60
+ - 禁止修改任何文件或执行写操作命令(rm/mv/cp/echo >、sed -i、git、patch、chmod、chown 等);仅进行只读分析与读取。
61
+ - 每次仅执行一个操作;等待工具结果后再进行下一步。
62
+ - **记忆使用**:
63
+ - 在分析过程中,充分利用 retrieve_memory 工具检索已有的记忆,特别是与当前分析函数相关的记忆。
64
+ - 如果有必要,使用 save_memory 工具保存每个函数的分析要点,使用函数名作为 tag(例如:函数名、文件名等)。
65
+ - 记忆内容示例:某个函数的指针已经判空、某个函数已有输入校验、某个函数的调用路径分析结果等。
66
+ - 这样可以避免重复分析,提高效率,并保持分析的一致性。
67
+ - 完成对本批次候选问题的判断后,主输出仅打印结束符 <!!!COMPLETE!!!> ,不需要汇总结果。
68
+ """.strip()
69
+
70
+ agent_kwargs: Dict = dict(
71
+ system_prompt=system_prompt,
72
+ name=task_id,
73
+ auto_complete=True,
74
+ need_summary=True,
75
+ summary_prompt=build_summary_prompt(),
76
+ non_interactive=True,
77
+ in_multi_agent=False,
78
+ use_methodology=False,
79
+ use_analysis=False,
80
+ output_handler=[ToolRegistry()],
81
+ force_save_memory=force_save_memory,
82
+ use_tools=["read_code", "execute_script", "save_memory", "retrieve_memory"],
83
+ )
84
+ if llm_group:
85
+ agent_kwargs["model_group"] = llm_group
86
+ return Agent(**agent_kwargs)
87
+
88
+
89
+ def create_review_agent(
90
+ current_review_num: int,
91
+ llm_group: Optional[str],
92
+ ) -> Agent:
93
+ """创建复核Agent"""
94
+ review_system_prompt = get_review_system_prompt()
95
+ review_summary_prompt = get_review_summary_prompt()
96
+
97
+ review_task_id = f"JARVIS-SEC-Review-Batch-{current_review_num}"
98
+ review_agent_kwargs: Dict = dict(
99
+ system_prompt=review_system_prompt,
100
+ name=review_task_id,
101
+ auto_complete=True,
102
+ need_summary=True,
103
+ summary_prompt=review_summary_prompt,
104
+ non_interactive=True,
105
+ in_multi_agent=False,
106
+ use_methodology=False,
107
+ use_analysis=False,
108
+ output_handler=[ToolRegistry()],
109
+ use_tools=["read_code", "execute_script", "retrieve_memory", "save_memory"],
110
+ )
111
+ if llm_group:
112
+ review_agent_kwargs["model_group"] = llm_group
113
+ return Agent(**review_agent_kwargs)
114
+
115
+
116
+ def create_cluster_agent(
117
+ file: str,
118
+ chunk_idx: int,
119
+ llm_group: Optional[str],
120
+ force_save_memory: bool = False,
121
+ ) -> Agent:
122
+ """创建聚类Agent"""
123
+ cluster_system_prompt = get_cluster_system_prompt()
124
+ cluster_summary_prompt = get_cluster_summary_prompt()
125
+
126
+ agent_kwargs_cluster: Dict = dict(
127
+ system_prompt=cluster_system_prompt,
128
+ name=f"JARVIS-SEC-Cluster::{file}::batch{chunk_idx}",
129
+ auto_complete=True,
130
+ need_summary=True,
131
+ summary_prompt=cluster_summary_prompt,
132
+ non_interactive=True,
133
+ in_multi_agent=False,
134
+ use_methodology=False,
135
+ use_analysis=False,
136
+ output_handler=[ToolRegistry()],
137
+ force_save_memory=force_save_memory,
138
+ use_tools=["read_code", "execute_script", "save_memory", "retrieve_memory"],
139
+ )
140
+ if llm_group:
141
+ agent_kwargs_cluster["model_group"] = llm_group
142
+ return Agent(**agent_kwargs_cluster)
143
+