jarvis-ai-assistant 0.7.8__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +567 -222
- jarvis/jarvis_agent/agent_manager.py +19 -12
- jarvis/jarvis_agent/builtin_input_handler.py +79 -11
- jarvis/jarvis_agent/config_editor.py +7 -2
- jarvis/jarvis_agent/event_bus.py +24 -13
- jarvis/jarvis_agent/events.py +19 -1
- jarvis/jarvis_agent/file_context_handler.py +67 -64
- jarvis/jarvis_agent/file_methodology_manager.py +38 -24
- jarvis/jarvis_agent/jarvis.py +186 -114
- jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
- jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
- jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
- jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
- jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
- jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
- jarvis/jarvis_agent/language_support_info.py +250 -219
- jarvis/jarvis_agent/main.py +19 -23
- jarvis/jarvis_agent/memory_manager.py +9 -6
- jarvis/jarvis_agent/methodology_share_manager.py +21 -15
- jarvis/jarvis_agent/output_handler.py +4 -2
- jarvis/jarvis_agent/prompt_builder.py +7 -6
- jarvis/jarvis_agent/prompt_manager.py +113 -8
- jarvis/jarvis_agent/prompts.py +317 -85
- jarvis/jarvis_agent/protocols.py +5 -2
- jarvis/jarvis_agent/run_loop.py +192 -32
- jarvis/jarvis_agent/session_manager.py +7 -3
- jarvis/jarvis_agent/share_manager.py +23 -13
- jarvis/jarvis_agent/shell_input_handler.py +12 -8
- jarvis/jarvis_agent/stdio_redirect.py +25 -26
- jarvis/jarvis_agent/task_analyzer.py +29 -23
- jarvis/jarvis_agent/task_list.py +869 -0
- jarvis/jarvis_agent/task_manager.py +26 -23
- jarvis/jarvis_agent/tool_executor.py +6 -5
- jarvis/jarvis_agent/tool_share_manager.py +24 -14
- jarvis/jarvis_agent/user_interaction.py +3 -3
- jarvis/jarvis_agent/utils.py +9 -1
- jarvis/jarvis_agent/web_bridge.py +37 -17
- jarvis/jarvis_agent/web_output_sink.py +5 -2
- jarvis/jarvis_agent/web_server.py +165 -36
- jarvis/jarvis_c2rust/__init__.py +1 -1
- jarvis/jarvis_c2rust/cli.py +260 -141
- jarvis/jarvis_c2rust/collector.py +37 -18
- jarvis/jarvis_c2rust/constants.py +60 -0
- jarvis/jarvis_c2rust/library_replacer.py +242 -1010
- jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
- jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
- jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
- jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
- jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
- jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
- jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
- jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
- jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
- jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
- jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
- jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
- jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
- jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
- jarvis/jarvis_c2rust/loaders.py +28 -10
- jarvis/jarvis_c2rust/models.py +5 -2
- jarvis/jarvis_c2rust/optimizer.py +192 -1974
- jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
- jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
- jarvis/jarvis_c2rust/optimizer_config.py +49 -0
- jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
- jarvis/jarvis_c2rust/optimizer_options.py +48 -0
- jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
- jarvis/jarvis_c2rust/optimizer_report.py +52 -0
- jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
- jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
- jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
- jarvis/jarvis_c2rust/scanner.py +229 -166
- jarvis/jarvis_c2rust/transpiler.py +531 -2732
- jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
- jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
- jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
- jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
- jarvis/jarvis_c2rust/transpiler_config.py +178 -0
- jarvis/jarvis_c2rust/transpiler_context.py +122 -0
- jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
- jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
- jarvis/jarvis_c2rust/transpiler_git.py +163 -0
- jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
- jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
- jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
- jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
- jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
- jarvis/jarvis_c2rust/utils.py +269 -79
- jarvis/jarvis_code_agent/after_change.py +233 -0
- jarvis/jarvis_code_agent/build_validation_config.py +37 -30
- jarvis/jarvis_code_agent/builtin_rules.py +68 -0
- jarvis/jarvis_code_agent/code_agent.py +976 -1517
- jarvis/jarvis_code_agent/code_agent_build.py +227 -0
- jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
- jarvis/jarvis_code_agent/code_agent_git.py +525 -0
- jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
- jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
- jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
- jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
- jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
- jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
- jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
- jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
- jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
- jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
- jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
- jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
- jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
- jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
- jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
- jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
- jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
- jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
- jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
- jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
- jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
- jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
- jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
- jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
- jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
- jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
- jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
- jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
- jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
- jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
- jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
- jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
- jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
- jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
- jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
- jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
- jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
- jarvis/jarvis_code_agent/lint.py +223 -524
- jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
- jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
- jarvis/jarvis_code_agent/rules/code_review.md +115 -0
- jarvis/jarvis_code_agent/rules/documentation.md +165 -0
- jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
- jarvis/jarvis_code_agent/rules/performance.md +158 -0
- jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
- jarvis/jarvis_code_agent/rules/security.md +160 -0
- jarvis/jarvis_code_agent/rules/tdd.md +78 -0
- jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
- jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
- jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
- jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
- jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
- jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
- jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
- jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
- jarvis/jarvis_code_agent/utils.py +36 -26
- jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
- jarvis/jarvis_code_analysis/code_review.py +64 -33
- jarvis/jarvis_data/config_schema.json +285 -192
- jarvis/jarvis_git_squash/main.py +8 -6
- jarvis/jarvis_git_utils/git_commiter.py +53 -76
- jarvis/jarvis_mcp/__init__.py +5 -2
- jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
- jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
- jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
- jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
- jarvis/jarvis_methodology/main.py +48 -39
- jarvis/jarvis_multi_agent/__init__.py +56 -23
- jarvis/jarvis_multi_agent/main.py +15 -18
- jarvis/jarvis_platform/base.py +179 -111
- jarvis/jarvis_platform/human.py +27 -16
- jarvis/jarvis_platform/kimi.py +52 -45
- jarvis/jarvis_platform/openai.py +101 -40
- jarvis/jarvis_platform/registry.py +51 -33
- jarvis/jarvis_platform/tongyi.py +68 -38
- jarvis/jarvis_platform/yuanbao.py +59 -43
- jarvis/jarvis_platform_manager/main.py +68 -76
- jarvis/jarvis_platform_manager/service.py +24 -14
- jarvis/jarvis_rag/README_CONFIG.md +314 -0
- jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
- jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
- jarvis/jarvis_rag/__init__.py +57 -4
- jarvis/jarvis_rag/cache.py +3 -1
- jarvis/jarvis_rag/cli.py +48 -68
- jarvis/jarvis_rag/embedding_interface.py +39 -0
- jarvis/jarvis_rag/embedding_manager.py +7 -230
- jarvis/jarvis_rag/embeddings/__init__.py +41 -0
- jarvis/jarvis_rag/embeddings/base.py +114 -0
- jarvis/jarvis_rag/embeddings/cohere.py +66 -0
- jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
- jarvis/jarvis_rag/embeddings/local.py +260 -0
- jarvis/jarvis_rag/embeddings/openai.py +62 -0
- jarvis/jarvis_rag/embeddings/registry.py +293 -0
- jarvis/jarvis_rag/llm_interface.py +8 -6
- jarvis/jarvis_rag/query_rewriter.py +8 -9
- jarvis/jarvis_rag/rag_pipeline.py +61 -52
- jarvis/jarvis_rag/reranker.py +7 -75
- jarvis/jarvis_rag/reranker_interface.py +32 -0
- jarvis/jarvis_rag/rerankers/__init__.py +41 -0
- jarvis/jarvis_rag/rerankers/base.py +109 -0
- jarvis/jarvis_rag/rerankers/cohere.py +67 -0
- jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
- jarvis/jarvis_rag/rerankers/jina.py +79 -0
- jarvis/jarvis_rag/rerankers/local.py +89 -0
- jarvis/jarvis_rag/rerankers/registry.py +293 -0
- jarvis/jarvis_rag/retriever.py +58 -43
- jarvis/jarvis_sec/__init__.py +66 -141
- jarvis/jarvis_sec/agents.py +21 -17
- jarvis/jarvis_sec/analysis.py +80 -33
- jarvis/jarvis_sec/checkers/__init__.py +7 -13
- jarvis/jarvis_sec/checkers/c_checker.py +356 -164
- jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
- jarvis/jarvis_sec/cli.py +43 -21
- jarvis/jarvis_sec/clustering.py +430 -272
- jarvis/jarvis_sec/file_manager.py +99 -55
- jarvis/jarvis_sec/parsers.py +9 -6
- jarvis/jarvis_sec/prompts.py +4 -3
- jarvis/jarvis_sec/report.py +44 -22
- jarvis/jarvis_sec/review.py +180 -107
- jarvis/jarvis_sec/status.py +50 -41
- jarvis/jarvis_sec/types.py +3 -0
- jarvis/jarvis_sec/utils.py +160 -83
- jarvis/jarvis_sec/verification.py +411 -181
- jarvis/jarvis_sec/workflow.py +132 -21
- jarvis/jarvis_smart_shell/main.py +28 -41
- jarvis/jarvis_stats/cli.py +14 -12
- jarvis/jarvis_stats/stats.py +28 -19
- jarvis/jarvis_stats/storage.py +14 -8
- jarvis/jarvis_stats/visualizer.py +12 -7
- jarvis/jarvis_tools/base.py +5 -2
- jarvis/jarvis_tools/clear_memory.py +13 -9
- jarvis/jarvis_tools/cli/main.py +23 -18
- jarvis/jarvis_tools/edit_file.py +572 -873
- jarvis/jarvis_tools/execute_script.py +10 -7
- jarvis/jarvis_tools/file_analyzer.py +7 -8
- jarvis/jarvis_tools/meta_agent.py +287 -0
- jarvis/jarvis_tools/methodology.py +5 -3
- jarvis/jarvis_tools/read_code.py +305 -1438
- jarvis/jarvis_tools/read_symbols.py +50 -17
- jarvis/jarvis_tools/read_webpage.py +19 -18
- jarvis/jarvis_tools/registry.py +435 -156
- jarvis/jarvis_tools/retrieve_memory.py +16 -11
- jarvis/jarvis_tools/save_memory.py +8 -6
- jarvis/jarvis_tools/search_web.py +31 -31
- jarvis/jarvis_tools/sub_agent.py +32 -28
- jarvis/jarvis_tools/sub_code_agent.py +44 -60
- jarvis/jarvis_tools/task_list_manager.py +1811 -0
- jarvis/jarvis_tools/virtual_tty.py +29 -19
- jarvis/jarvis_utils/__init__.py +4 -0
- jarvis/jarvis_utils/builtin_replace_map.py +2 -1
- jarvis/jarvis_utils/clipboard.py +9 -8
- jarvis/jarvis_utils/collections.py +331 -0
- jarvis/jarvis_utils/config.py +699 -194
- jarvis/jarvis_utils/dialogue_recorder.py +294 -0
- jarvis/jarvis_utils/embedding.py +6 -3
- jarvis/jarvis_utils/file_processors.py +7 -1
- jarvis/jarvis_utils/fzf.py +9 -3
- jarvis/jarvis_utils/git_utils.py +71 -42
- jarvis/jarvis_utils/globals.py +116 -32
- jarvis/jarvis_utils/http.py +6 -2
- jarvis/jarvis_utils/input.py +318 -83
- jarvis/jarvis_utils/jsonnet_compat.py +119 -104
- jarvis/jarvis_utils/methodology.py +37 -28
- jarvis/jarvis_utils/output.py +201 -44
- jarvis/jarvis_utils/utils.py +986 -628
- {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
- jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
- jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
- jarvis/jarvis_tools/generate_new_tool.py +0 -205
- jarvis/jarvis_tools/lsp_client.py +0 -1552
- jarvis/jarvis_tools/rewrite_file.py +0 -105
- jarvis_ai_assistant-0.7.8.dist-info/RECORD +0 -218
- {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0
jarvis/jarvis_rag/retriever.py
CHANGED
|
@@ -1,15 +1,21 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import json
|
|
1
3
|
import os
|
|
2
4
|
import pickle
|
|
3
|
-
import
|
|
4
|
-
import
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Any
|
|
6
|
+
from typing import Dict
|
|
7
|
+
from typing import List
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from typing import cast
|
|
6
10
|
|
|
7
11
|
import chromadb
|
|
8
12
|
from langchain.docstore.document import Document
|
|
9
13
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
10
|
-
from rank_bm25 import BM25Okapi
|
|
14
|
+
from rank_bm25 import BM25Okapi
|
|
15
|
+
|
|
16
|
+
from jarvis.jarvis_utils.output import PrettyOutput
|
|
11
17
|
|
|
12
|
-
from .
|
|
18
|
+
from .embedding_interface import EmbeddingInterface
|
|
13
19
|
|
|
14
20
|
|
|
15
21
|
class ChromaRetriever:
|
|
@@ -20,7 +26,7 @@ class ChromaRetriever:
|
|
|
20
26
|
|
|
21
27
|
def __init__(
|
|
22
28
|
self,
|
|
23
|
-
embedding_manager:
|
|
29
|
+
embedding_manager: EmbeddingInterface,
|
|
24
30
|
db_path: str,
|
|
25
31
|
collection_name: str = "jarvis_rag_collection",
|
|
26
32
|
):
|
|
@@ -28,7 +34,7 @@ class ChromaRetriever:
|
|
|
28
34
|
初始化ChromaRetriever。
|
|
29
35
|
|
|
30
36
|
参数:
|
|
31
|
-
embedding_manager:
|
|
37
|
+
embedding_manager: EmbeddingInterface的实例(可以是本地或在线模型)。
|
|
32
38
|
db_path: ChromaDB持久化存储的文件路径。
|
|
33
39
|
collection_name: ChromaDB中集合的名称。
|
|
34
40
|
"""
|
|
@@ -41,7 +47,7 @@ class ChromaRetriever:
|
|
|
41
47
|
self.collection = self.client.get_or_create_collection(
|
|
42
48
|
name=self.collection_name
|
|
43
49
|
)
|
|
44
|
-
|
|
50
|
+
PrettyOutput.auto_print(
|
|
45
51
|
f"✅ ChromaDB 客户端已在 '{db_path}' 初始化,集合为 '{collection_name}'。"
|
|
46
52
|
)
|
|
47
53
|
|
|
@@ -56,26 +62,24 @@ class ChromaRetriever:
|
|
|
56
62
|
def _load_or_initialize_bm25(self):
|
|
57
63
|
"""从磁盘加载BM25索引或初始化一个新索引。"""
|
|
58
64
|
if os.path.exists(self.bm25_index_path):
|
|
59
|
-
|
|
65
|
+
PrettyOutput.auto_print("ℹ️ 正在加载现有的 BM25 索引...")
|
|
60
66
|
with open(self.bm25_index_path, "rb") as f:
|
|
61
67
|
data = pickle.load(f)
|
|
62
68
|
self.bm25_corpus = data["corpus"]
|
|
63
69
|
self.bm25_index = BM25Okapi(self.bm25_corpus)
|
|
64
|
-
|
|
70
|
+
PrettyOutput.auto_print("✅ BM25 索引加载成功。")
|
|
65
71
|
else:
|
|
66
|
-
|
|
67
|
-
"⚠️ 未找到 BM25 索引,将初始化一个新的。"
|
|
68
|
-
)
|
|
72
|
+
PrettyOutput.auto_print("⚠️ 未找到 BM25 索引,将初始化一个新的。")
|
|
69
73
|
self.bm25_corpus = []
|
|
70
74
|
self.bm25_index = None
|
|
71
75
|
|
|
72
76
|
def _save_bm25_index(self):
|
|
73
77
|
"""将BM25索引保存到磁盘。"""
|
|
74
78
|
if self.bm25_index:
|
|
75
|
-
|
|
79
|
+
PrettyOutput.auto_print("ℹ️ 正在保存 BM25 索引...")
|
|
76
80
|
with open(self.bm25_index_path, "wb") as f:
|
|
77
81
|
pickle.dump({"corpus": self.bm25_corpus, "index": self.bm25_index}, f)
|
|
78
|
-
|
|
82
|
+
PrettyOutput.auto_print("✅ BM25 索引保存成功。")
|
|
79
83
|
|
|
80
84
|
def _load_manifest(self) -> Dict[str, Dict[str, Any]]:
|
|
81
85
|
"""加载已索引文件清单,用于变更检测。"""
|
|
@@ -84,7 +88,7 @@ class ChromaRetriever:
|
|
|
84
88
|
with open(self.manifest_path, "r", encoding="utf-8") as f:
|
|
85
89
|
data = json.load(f)
|
|
86
90
|
if isinstance(data, dict):
|
|
87
|
-
return data
|
|
91
|
+
return data
|
|
88
92
|
except Exception:
|
|
89
93
|
pass
|
|
90
94
|
return {}
|
|
@@ -95,7 +99,7 @@ class ChromaRetriever:
|
|
|
95
99
|
with open(self.manifest_path, "w", encoding="utf-8") as f:
|
|
96
100
|
json.dump(manifest, f, ensure_ascii=False, indent=2)
|
|
97
101
|
except Exception as e:
|
|
98
|
-
|
|
102
|
+
PrettyOutput.auto_print(f"⚠️ 保存索引清单失败: {e}")
|
|
99
103
|
|
|
100
104
|
def _compute_md5(
|
|
101
105
|
self, file_path: str, chunk_size: int = 1024 * 1024
|
|
@@ -128,15 +132,13 @@ class ChromaRetriever:
|
|
|
128
132
|
md5sum = self._compute_md5(src)
|
|
129
133
|
if md5sum:
|
|
130
134
|
entry["md5"] = md5sum
|
|
131
|
-
manifest[src] = entry
|
|
135
|
+
manifest[src] = entry
|
|
132
136
|
updated += 1
|
|
133
137
|
except Exception:
|
|
134
138
|
continue
|
|
135
139
|
if updated > 0:
|
|
136
140
|
self._save_manifest(manifest)
|
|
137
|
-
|
|
138
|
-
f"ℹ️ 已更新索引清单,记录 {updated} 个源文件状态。"
|
|
139
|
-
)
|
|
141
|
+
PrettyOutput.auto_print(f"ℹ️ 已更新索引清单,记录 {updated} 个源文件状态。")
|
|
140
142
|
|
|
141
143
|
def _detect_changed_or_deleted(self) -> Dict[str, List[str]]:
|
|
142
144
|
"""检测已记录的源文件是否发生变化或被删除。"""
|
|
@@ -201,8 +203,8 @@ class ChromaRetriever:
|
|
|
201
203
|
lines.append(
|
|
202
204
|
"提示:请使用 'jarvis-rag add <路径>' 重新索引相关文件,以更新向量库与BM25索引。"
|
|
203
205
|
)
|
|
204
|
-
joined_lines =
|
|
205
|
-
|
|
206
|
+
joined_lines = "\n".join(lines)
|
|
207
|
+
PrettyOutput.auto_print(f"⚠️ {joined_lines}")
|
|
206
208
|
|
|
207
209
|
def detect_index_changes(self) -> Dict[str, List[str]]:
|
|
208
210
|
"""
|
|
@@ -224,7 +226,7 @@ class ChromaRetriever:
|
|
|
224
226
|
removed += 1
|
|
225
227
|
if removed > 0:
|
|
226
228
|
self._save_manifest(manifest)
|
|
227
|
-
|
|
229
|
+
PrettyOutput.auto_print(
|
|
228
230
|
f"ℹ️ 已从索引清单中移除 {removed} 个已删除的源文件记录。"
|
|
229
231
|
)
|
|
230
232
|
|
|
@@ -249,12 +251,12 @@ class ChromaRetriever:
|
|
|
249
251
|
delete_errors: List[str] = []
|
|
250
252
|
for src in deleted:
|
|
251
253
|
try:
|
|
252
|
-
self.collection.delete(where={"source": src})
|
|
254
|
+
self.collection.delete(where={"source": src})
|
|
253
255
|
except Exception as e:
|
|
254
256
|
delete_errors.append(f"删除源 '{src}' 时出错: {e}")
|
|
255
257
|
if delete_errors:
|
|
256
|
-
joined_errors =
|
|
257
|
-
|
|
258
|
+
joined_errors = "\n".join(delete_errors)
|
|
259
|
+
PrettyOutput.auto_print(f"⚠️ {joined_errors}")
|
|
258
260
|
|
|
259
261
|
# 再处理变更(重建)
|
|
260
262
|
docs_to_add: List[Document] = []
|
|
@@ -263,7 +265,7 @@ class ChromaRetriever:
|
|
|
263
265
|
try:
|
|
264
266
|
# 删除旧条目
|
|
265
267
|
try:
|
|
266
|
-
self.collection.delete(where={"source": src})
|
|
268
|
+
self.collection.delete(where={"source": src})
|
|
267
269
|
except Exception:
|
|
268
270
|
pass
|
|
269
271
|
# 读取源文件内容(作为单文档载入,由 add_documents 进行拆分与嵌入)
|
|
@@ -275,15 +277,15 @@ class ChromaRetriever:
|
|
|
275
277
|
except Exception as e:
|
|
276
278
|
rebuild_errors.append(f"重建源 '{src}' 内容时出错: {e}")
|
|
277
279
|
if rebuild_errors:
|
|
278
|
-
joined_errors =
|
|
279
|
-
|
|
280
|
+
joined_errors = "\n".join(rebuild_errors)
|
|
281
|
+
PrettyOutput.auto_print(f"⚠️ {joined_errors}")
|
|
280
282
|
|
|
281
283
|
if docs_to_add:
|
|
282
284
|
try:
|
|
283
285
|
# 复用现有拆分与嵌入逻辑
|
|
284
286
|
self.add_documents(docs_to_add)
|
|
285
287
|
except Exception as e:
|
|
286
|
-
|
|
288
|
+
PrettyOutput.auto_print(f"❌ 添加变更文档到索引时出错: {e}")
|
|
287
289
|
|
|
288
290
|
# 重建BM25索引,确保删除后的语料被清理
|
|
289
291
|
try:
|
|
@@ -293,7 +295,7 @@ class ChromaRetriever:
|
|
|
293
295
|
self.bm25_index = BM25Okapi(self.bm25_corpus) if self.bm25_corpus else None
|
|
294
296
|
self._save_bm25_index()
|
|
295
297
|
except Exception as e:
|
|
296
|
-
|
|
298
|
+
PrettyOutput.auto_print(f"⚠️ 重建BM25索引失败: {e}")
|
|
297
299
|
|
|
298
300
|
# 更新manifest:变更文件更新状态;删除文件从清单中移除
|
|
299
301
|
try:
|
|
@@ -302,24 +304,41 @@ class ChromaRetriever:
|
|
|
302
304
|
if deleted:
|
|
303
305
|
self._remove_sources_from_manifest(deleted)
|
|
304
306
|
except Exception as e:
|
|
305
|
-
|
|
307
|
+
PrettyOutput.auto_print(f"⚠️ 更新索引清单时出错: {e}")
|
|
306
308
|
|
|
307
|
-
|
|
309
|
+
PrettyOutput.auto_print(
|
|
308
310
|
f"✅ 索引已更新:变更 {len(changed)} 个,删除 {len(deleted)} 个。"
|
|
309
311
|
)
|
|
310
312
|
|
|
311
313
|
def add_documents(
|
|
312
|
-
self, documents: List[Document], chunk_size=
|
|
314
|
+
self, documents: List[Document], chunk_size=None, chunk_overlap=100
|
|
313
315
|
):
|
|
314
316
|
"""
|
|
315
317
|
将文档拆分、嵌入,并添加到ChromaDB和BM25索引中。
|
|
318
|
+
|
|
319
|
+
参数:
|
|
320
|
+
documents: 要添加的文档列表
|
|
321
|
+
chunk_size: 块大小(字符数)。如果为None,将从embedding_manager的max_length自动计算。
|
|
322
|
+
chunk_overlap: 块之间的重叠大小(字符数)
|
|
316
323
|
"""
|
|
324
|
+
# 如果没有指定chunk_size,尝试从embedding_manager获取max_length
|
|
325
|
+
if chunk_size is None:
|
|
326
|
+
# 尝试从embedding_manager获取max_length
|
|
327
|
+
max_length = getattr(self.embedding_manager, "max_length", None)
|
|
328
|
+
if max_length:
|
|
329
|
+
# 将token数转换为字符数(粗略估算:1 token ≈ 4 字符)
|
|
330
|
+
# 留一些余量,使用80%的max_length
|
|
331
|
+
chunk_size = int(max_length * 4 * 0.8)
|
|
332
|
+
else:
|
|
333
|
+
# 默认值
|
|
334
|
+
chunk_size = 1000
|
|
335
|
+
|
|
317
336
|
text_splitter = RecursiveCharacterTextSplitter(
|
|
318
337
|
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
|
319
338
|
)
|
|
320
339
|
chunks = text_splitter.split_documents(documents)
|
|
321
340
|
|
|
322
|
-
|
|
341
|
+
PrettyOutput.auto_print(
|
|
323
342
|
f"ℹ️ 已将 {len(documents)} 个文档拆分为 {len(chunks)} 个块。"
|
|
324
343
|
)
|
|
325
344
|
|
|
@@ -340,9 +359,7 @@ class ChromaRetriever:
|
|
|
340
359
|
documents=chunk_texts,
|
|
341
360
|
metadatas=cast(Any, metadatas),
|
|
342
361
|
)
|
|
343
|
-
|
|
344
|
-
f"✅ 成功将 {len(chunks)} 个块添加到 ChromaDB 集合中。"
|
|
345
|
-
)
|
|
362
|
+
PrettyOutput.auto_print(f"✅ 成功将 {len(chunks)} 个块添加到 ChromaDB 集合中。")
|
|
346
363
|
|
|
347
364
|
# 更新并保存BM25索引
|
|
348
365
|
tokenized_chunks = [doc.split() for doc in chunk_texts]
|
|
@@ -398,7 +415,7 @@ class ChromaRetriever:
|
|
|
398
415
|
]
|
|
399
416
|
|
|
400
417
|
# 按分数排序并取最高结果
|
|
401
|
-
bm25_results_with_docs.sort(key=lambda x: x[2], reverse=True)
|
|
418
|
+
bm25_results_with_docs.sort(key=lambda x: x[2], reverse=True)
|
|
402
419
|
|
|
403
420
|
for doc_text, metadata, _ in bm25_results_with_docs[: n_results * 2]:
|
|
404
421
|
bm25_docs.append(Document(page_content=doc_text, metadata=metadata))
|
|
@@ -447,9 +464,7 @@ class ChromaRetriever:
|
|
|
447
464
|
for doc_text, metadata in zip(final_documents, final_metadatas):
|
|
448
465
|
if doc_text is not None and metadata is not None:
|
|
449
466
|
retrieved_docs.append(
|
|
450
|
-
Document(
|
|
451
|
-
page_content=cast(str, doc_text), metadata=metadata
|
|
452
|
-
)
|
|
467
|
+
Document(page_content=doc_text, metadata=metadata)
|
|
453
468
|
)
|
|
454
469
|
|
|
455
470
|
return retrieved_docs
|
jarvis/jarvis_sec/__init__.py
CHANGED
|
@@ -19,106 +19,33 @@ Jarvis 安全分析套件
|
|
|
19
19
|
- 模块化重构:将功能拆分为多个模块(prompts, parsers, utils, agents, clustering, analysis, verification, review)
|
|
20
20
|
"""
|
|
21
21
|
|
|
22
|
-
from typing import Dict
|
|
22
|
+
from typing import Dict
|
|
23
|
+
from typing import List
|
|
24
|
+
from typing import Optional
|
|
23
25
|
|
|
24
|
-
import
|
|
26
|
+
from jarvis.jarvis_utils.output import PrettyOutput
|
|
25
27
|
|
|
26
28
|
from jarvis.jarvis_agent import Agent # noqa: F401
|
|
27
|
-
from jarvis.jarvis_sec.
|
|
28
|
-
|
|
29
|
-
# 导入模块化后的函数(用于触发模块加载)
|
|
30
|
-
from jarvis.jarvis_sec.prompts import ( # noqa: F401
|
|
31
|
-
build_summary_prompt as _build_summary_prompt,
|
|
32
|
-
build_verification_summary_prompt as _build_verification_summary_prompt,
|
|
33
|
-
get_review_system_prompt as _get_review_system_prompt,
|
|
34
|
-
get_review_summary_prompt as _get_review_summary_prompt,
|
|
35
|
-
get_cluster_system_prompt as _get_cluster_system_prompt,
|
|
36
|
-
get_cluster_summary_prompt as _get_cluster_summary_prompt,
|
|
37
|
-
)
|
|
38
|
-
from jarvis.jarvis_sec.parsers import ( # noqa: F401
|
|
39
|
-
parse_clusters_from_text as _parse_clusters_from_text,
|
|
40
|
-
try_parse_summary_report as _try_parse_summary_report,
|
|
41
|
-
)
|
|
42
|
-
from jarvis.jarvis_sec.utils import ( # noqa: F401
|
|
43
|
-
git_restore_if_dirty as _git_restore_if_dirty,
|
|
44
|
-
get_sec_dir as _get_sec_dir,
|
|
29
|
+
from jarvis.jarvis_sec.utils import (
|
|
45
30
|
initialize_analysis_context as _initialize_analysis_context,
|
|
31
|
+
)
|
|
32
|
+
from jarvis.jarvis_sec.utils import (
|
|
46
33
|
load_or_run_heuristic_scan as _load_or_run_heuristic_scan,
|
|
47
|
-
compact_candidate as _compact_candidate,
|
|
48
|
-
prepare_candidates as _prepare_candidates,
|
|
49
|
-
group_candidates_by_file as _group_candidates_by_file,
|
|
50
|
-
create_report_writer as _create_report_writer,
|
|
51
|
-
sig_of as _sig_of,
|
|
52
|
-
load_processed_gids_from_issues as _load_processed_gids_from_issues,
|
|
53
|
-
count_issues_from_file as _count_issues_from_file,
|
|
54
|
-
load_all_issues_from_file as _load_all_issues_from_file,
|
|
55
|
-
load_processed_gids_from_agent_issues as _load_processed_gids_from_agent_issues,
|
|
56
34
|
)
|
|
57
|
-
from jarvis.jarvis_sec.
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
create_review_agent as _create_review_agent,
|
|
61
|
-
create_cluster_agent as _create_cluster_agent,
|
|
35
|
+
from jarvis.jarvis_sec.utils import prepare_candidates as _prepare_candidates
|
|
36
|
+
from jarvis.jarvis_sec.utils import (
|
|
37
|
+
group_candidates_by_file as _group_candidates_by_file,
|
|
62
38
|
)
|
|
63
|
-
from jarvis.jarvis_sec.
|
|
64
|
-
|
|
65
|
-
restore_clusters_from_checkpoint as _restore_clusters_from_checkpoint,
|
|
66
|
-
create_cluster_snapshot_writer as _create_cluster_snapshot_writer,
|
|
67
|
-
collect_candidate_gids as _collect_candidate_gids,
|
|
68
|
-
collect_clustered_gids as _collect_clustered_gids,
|
|
69
|
-
# supplement_missing_gids_for_clustering已移除,不再需要
|
|
70
|
-
handle_single_alert_file as _handle_single_alert_file,
|
|
71
|
-
validate_cluster_format as _validate_cluster_format,
|
|
72
|
-
extract_classified_gids as _extract_classified_gids,
|
|
73
|
-
build_cluster_retry_task as _build_cluster_retry_task,
|
|
74
|
-
build_cluster_error_guidance as _build_cluster_error_guidance,
|
|
75
|
-
run_cluster_agent_direct_model as _run_cluster_agent_direct_model,
|
|
76
|
-
validate_cluster_result as _validate_cluster_result,
|
|
77
|
-
check_cluster_completeness as _check_cluster_completeness,
|
|
78
|
-
run_cluster_agent_with_retry as _run_cluster_agent_with_retry,
|
|
79
|
-
process_cluster_results as _process_cluster_results,
|
|
80
|
-
supplement_missing_gids as _supplement_missing_gids,
|
|
81
|
-
build_cluster_task as _build_cluster_task,
|
|
82
|
-
extract_input_gids as _extract_input_gids,
|
|
83
|
-
build_gid_to_item_mapping as _build_gid_to_item_mapping,
|
|
84
|
-
process_cluster_chunk as _process_cluster_chunk,
|
|
85
|
-
filter_pending_items as _filter_pending_items,
|
|
86
|
-
process_file_clustering as _process_file_clustering,
|
|
87
|
-
# check_and_supplement_missing_gids已移除,完整性检查已移至process_clustering_phase中
|
|
88
|
-
initialize_clustering_context as _initialize_clustering_context,
|
|
89
|
-
check_unclustered_gids as _check_unclustered_gids,
|
|
90
|
-
execute_clustering_for_files as _execute_clustering_for_files,
|
|
91
|
-
record_clustering_completion as _record_clustering_completion,
|
|
92
|
-
fallback_to_file_based_batches as _fallback_to_file_based_batches,
|
|
39
|
+
from jarvis.jarvis_sec.utils import create_report_writer as _create_report_writer
|
|
40
|
+
from jarvis.jarvis_sec.clustering import (
|
|
93
41
|
process_clustering_phase as _process_clustering_phase,
|
|
94
42
|
)
|
|
95
|
-
from jarvis.jarvis_sec.
|
|
96
|
-
build_review_task as _build_review_task,
|
|
97
|
-
process_review_batch_items as _process_review_batch_items,
|
|
98
|
-
reinstated_candidates_to_cluster_batches as _reinstated_candidates_to_cluster_batches,
|
|
99
|
-
process_review_phase as _process_review_phase,
|
|
100
|
-
build_gid_to_review_mapping as _build_gid_to_review_mapping,
|
|
101
|
-
process_review_batch as _process_review_batch,
|
|
102
|
-
run_review_agent_with_retry as _run_review_agent_with_retry,
|
|
103
|
-
is_valid_review_item as _is_valid_review_item,
|
|
104
|
-
)
|
|
105
|
-
from jarvis.jarvis_sec.analysis import ( # noqa: F401
|
|
106
|
-
build_analysis_task_context as _build_analysis_task_context,
|
|
107
|
-
build_validation_error_guidance as _build_validation_error_guidance,
|
|
108
|
-
run_analysis_agent_with_retry as _run_analysis_agent_with_retry,
|
|
109
|
-
expand_and_filter_analysis_results as _expand_and_filter_analysis_results,
|
|
110
|
-
valid_items as _valid_items,
|
|
111
|
-
)
|
|
112
|
-
from jarvis.jarvis_sec.verification import ( # noqa: F401
|
|
113
|
-
build_gid_to_verification_mapping as _build_gid_to_verification_mapping,
|
|
114
|
-
merge_verified_items as _merge_verified_items,
|
|
115
|
-
merge_verified_items_without_verification as _merge_verified_items_without_verification,
|
|
116
|
-
process_verification_batch as _process_verification_batch,
|
|
117
|
-
is_valid_verification_item as _is_valid_verification_item,
|
|
118
|
-
run_verification_agent_with_retry as _run_verification_agent_with_retry,
|
|
43
|
+
from jarvis.jarvis_sec.verification import (
|
|
119
44
|
process_verification_phase as _process_verification_phase,
|
|
120
45
|
)
|
|
121
46
|
|
|
47
|
+
from jarvis.jarvis_sec.workflow import direct_scan
|
|
48
|
+
from jarvis.jarvis_sec.workflow import run_with_agent
|
|
122
49
|
|
|
123
50
|
# 注:当前版本不使用 MultiAgent 编排,已移除默认多智能体配置与创建函数。
|
|
124
51
|
# 请使用 run_security_analysis(单Agent逐条验证)或 workflow.direct_scan + format_markdown_report(直扫基线)。
|
|
@@ -126,13 +53,6 @@ from jarvis.jarvis_sec.verification import ( # noqa: F401
|
|
|
126
53
|
# 本文件中保留了这些函数的别名导入,以便向后兼容。
|
|
127
54
|
|
|
128
55
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
56
|
def run_security_analysis(
|
|
137
57
|
entry_path: str,
|
|
138
58
|
languages: Optional[List[str]] = None,
|
|
@@ -150,7 +70,7 @@ def run_security_analysis(
|
|
|
150
70
|
改进:
|
|
151
71
|
- 即使在 agent 模式下,也先进行本地正则/启发式直扫,生成候选问题;
|
|
152
72
|
然后将候选问题拆分为子任务,交由多Agent进行深入分析与聚合。
|
|
153
|
-
|
|
73
|
+
|
|
154
74
|
注意:此函数会在发生异常时更新状态文件为 error 状态。
|
|
155
75
|
|
|
156
76
|
参数:
|
|
@@ -174,13 +94,24 @@ def run_security_analysis(
|
|
|
174
94
|
|
|
175
95
|
# 状态管理器(不再使用 status.json,使用空对象)
|
|
176
96
|
class DummyStatusManager:
|
|
177
|
-
def update_pre_scan(self, **kwargs):
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
def
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
97
|
+
def update_pre_scan(self, **kwargs):
|
|
98
|
+
pass
|
|
99
|
+
|
|
100
|
+
def update_clustering(self, **kwargs):
|
|
101
|
+
pass
|
|
102
|
+
|
|
103
|
+
def update_review(self, **kwargs):
|
|
104
|
+
pass
|
|
105
|
+
|
|
106
|
+
def update_verification(self, **kwargs):
|
|
107
|
+
pass
|
|
108
|
+
|
|
109
|
+
def mark_completed(self, **kwargs):
|
|
110
|
+
pass
|
|
111
|
+
|
|
112
|
+
def mark_error(self, **kwargs):
|
|
113
|
+
pass
|
|
114
|
+
|
|
184
115
|
status_mgr = DummyStatusManager()
|
|
185
116
|
|
|
186
117
|
# 初始化分析上下文
|
|
@@ -195,37 +126,45 @@ def run_security_analysis(
|
|
|
195
126
|
|
|
196
127
|
# 2) 将候选问题精简为子任务清单,控制上下文长度
|
|
197
128
|
compact_candidates = _prepare_candidates(candidates)
|
|
198
|
-
|
|
129
|
+
|
|
199
130
|
# 3) 保存候选到新的 candidates.jsonl 文件(包含gid)
|
|
200
|
-
from jarvis.jarvis_sec.file_manager import
|
|
131
|
+
from jarvis.jarvis_sec.file_manager import get_candidates_file
|
|
132
|
+
from jarvis.jarvis_sec.file_manager import save_candidates
|
|
133
|
+
|
|
201
134
|
try:
|
|
202
135
|
save_candidates(sec_dir, compact_candidates)
|
|
203
|
-
_progress_append(
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
136
|
+
_progress_append(
|
|
137
|
+
{
|
|
138
|
+
"event": "candidates_saved",
|
|
139
|
+
"path": str(get_candidates_file(sec_dir)),
|
|
140
|
+
"issues_count": len(compact_candidates),
|
|
141
|
+
}
|
|
142
|
+
)
|
|
208
143
|
except Exception:
|
|
209
144
|
pass
|
|
210
|
-
|
|
145
|
+
|
|
211
146
|
# 记录批次选择信息(可选,用于日志)
|
|
212
147
|
try:
|
|
213
148
|
groups = _group_candidates_by_file(compact_candidates)
|
|
214
149
|
if groups:
|
|
215
150
|
selected_file, items = max(groups.items(), key=lambda kv: len(kv[1]))
|
|
216
151
|
try:
|
|
217
|
-
|
|
152
|
+
PrettyOutput.auto_print(
|
|
153
|
+
f"[jarvis-sec] 批次选择: 文件={selected_file} 数量={len(items)}"
|
|
154
|
+
)
|
|
218
155
|
except Exception:
|
|
219
156
|
pass
|
|
220
|
-
_progress_append(
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
157
|
+
_progress_append(
|
|
158
|
+
{
|
|
159
|
+
"event": "batch_selection",
|
|
160
|
+
"selected_file": selected_file,
|
|
161
|
+
"selected_count": len(items),
|
|
162
|
+
"total_in_file": len(items),
|
|
163
|
+
}
|
|
164
|
+
)
|
|
226
165
|
except Exception:
|
|
227
166
|
pass
|
|
228
|
-
|
|
167
|
+
|
|
229
168
|
# 创建报告写入函数
|
|
230
169
|
_append_report = _create_report_writer(sec_dir, report_file)
|
|
231
170
|
|
|
@@ -256,12 +195,13 @@ def run_security_analysis(
|
|
|
256
195
|
enable_verification=enable_verification,
|
|
257
196
|
force_save_memory=force_save_memory,
|
|
258
197
|
)
|
|
259
|
-
|
|
198
|
+
|
|
260
199
|
# 5) 使用统一聚合器生成最终报告(JSON + Markdown)
|
|
261
200
|
try:
|
|
262
201
|
from jarvis.jarvis_sec.report import build_json_and_markdown
|
|
202
|
+
|
|
263
203
|
result = build_json_and_markdown(
|
|
264
|
-
all_issues,
|
|
204
|
+
all_issues, # type: ignore[arg-type]
|
|
265
205
|
scanned_root=summary.get("scanned_root"),
|
|
266
206
|
scanned_files=summary.get("scanned_files"),
|
|
267
207
|
meta=meta_records or None,
|
|
@@ -270,36 +210,21 @@ def run_security_analysis(
|
|
|
270
210
|
# 标记分析完成
|
|
271
211
|
status_mgr.mark_completed(
|
|
272
212
|
total_issues=len(all_issues),
|
|
273
|
-
message=f"安全分析完成,共发现 {len(all_issues)} 个问题"
|
|
213
|
+
message=f"安全分析完成,共发现 {len(all_issues)} 个问题",
|
|
274
214
|
)
|
|
275
215
|
return result
|
|
276
216
|
except Exception as e:
|
|
277
217
|
# 发生错误时更新状态
|
|
278
218
|
error_msg = str(e)
|
|
279
|
-
status_mgr.mark_error(
|
|
280
|
-
error_message=error_msg,
|
|
281
|
-
error_type=type(e).__name__
|
|
282
|
-
)
|
|
219
|
+
status_mgr.mark_error(error_message=error_msg, error_type=type(e).__name__)
|
|
283
220
|
raise
|
|
284
221
|
finally:
|
|
285
|
-
#
|
|
286
|
-
|
|
287
|
-
from jarvis.jarvis_tools.lsp_client import LSPClientTool
|
|
288
|
-
LSPClientTool.cleanup_all_clients()
|
|
289
|
-
except Exception:
|
|
290
|
-
pass # 清理失败不影响主流程
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
222
|
+
# 清理资源
|
|
223
|
+
pass
|
|
297
224
|
|
|
298
225
|
|
|
299
226
|
__all__ = [
|
|
300
|
-
|
|
301
227
|
"run_security_analysis",
|
|
302
|
-
|
|
303
228
|
"direct_scan",
|
|
304
229
|
"run_with_agent",
|
|
305
|
-
]
|
|
230
|
+
]
|
jarvis/jarvis_sec/agents.py
CHANGED
|
@@ -1,16 +1,17 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
"""Agent创建和订阅模块"""
|
|
3
3
|
|
|
4
|
-
from typing import Dict
|
|
4
|
+
from typing import Dict
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
5
7
|
from jarvis.jarvis_agent import Agent
|
|
8
|
+
from jarvis.jarvis_sec.prompts import build_summary_prompt
|
|
9
|
+
from jarvis.jarvis_sec.prompts import get_cluster_summary_prompt
|
|
10
|
+
from jarvis.jarvis_sec.prompts import get_cluster_system_prompt
|
|
11
|
+
from jarvis.jarvis_sec.prompts import get_review_summary_prompt
|
|
12
|
+
from jarvis.jarvis_sec.prompts import get_review_system_prompt
|
|
6
13
|
from jarvis.jarvis_tools.registry import ToolRegistry
|
|
7
|
-
from jarvis.
|
|
8
|
-
build_summary_prompt,
|
|
9
|
-
get_review_system_prompt,
|
|
10
|
-
get_review_summary_prompt,
|
|
11
|
-
get_cluster_system_prompt,
|
|
12
|
-
get_cluster_summary_prompt,
|
|
13
|
-
)
|
|
14
|
+
from jarvis.jarvis_utils.tag import ot
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
def subscribe_summary_event(agent: Agent) -> Dict[str, str]:
|
|
@@ -19,14 +20,16 @@ def subscribe_summary_event(agent: Agent) -> Dict[str, str]:
|
|
|
19
20
|
try:
|
|
20
21
|
from jarvis.jarvis_agent.events import AFTER_SUMMARY as _AFTER_SUMMARY
|
|
21
22
|
except Exception:
|
|
22
|
-
_AFTER_SUMMARY = None
|
|
23
|
-
|
|
23
|
+
_AFTER_SUMMARY = None # type: ignore
|
|
24
|
+
|
|
24
25
|
if _AFTER_SUMMARY:
|
|
26
|
+
|
|
25
27
|
def _on_after_summary(**kwargs):
|
|
26
28
|
try:
|
|
27
29
|
summary_container["text"] = str(kwargs.get("summary", "") or "")
|
|
28
30
|
except Exception:
|
|
29
31
|
summary_container["text"] = ""
|
|
32
|
+
|
|
30
33
|
try:
|
|
31
34
|
agent.event_bus.subscribe(_AFTER_SUMMARY, _on_after_summary)
|
|
32
35
|
except Exception:
|
|
@@ -34,9 +37,11 @@ def subscribe_summary_event(agent: Agent) -> Dict[str, str]:
|
|
|
34
37
|
return summary_container
|
|
35
38
|
|
|
36
39
|
|
|
37
|
-
def create_analysis_agent(
|
|
40
|
+
def create_analysis_agent(
|
|
41
|
+
task_id: str, llm_group: Optional[str], force_save_memory: bool = False
|
|
42
|
+
) -> Agent:
|
|
38
43
|
"""创建分析Agent"""
|
|
39
|
-
system_prompt = """
|
|
44
|
+
system_prompt = f"""
|
|
40
45
|
# 单Agent安全分析约束
|
|
41
46
|
- 你的核心任务是评估代码的安全问题,目标:针对本候选问题进行证据核实、风险评估与修复建议补充,查找漏洞触发路径,确认在某些条件下会触发;以此来判断是否是漏洞。
|
|
42
47
|
- **必须进行调用路径推导**:
|
|
@@ -64,9 +69,9 @@ def create_analysis_agent(task_id: str, llm_group: Optional[str], force_save_mem
|
|
|
64
69
|
- 如果有必要,使用 save_memory 工具保存每个函数的分析要点,使用函数名作为 tag(例如:函数名、文件名等)。
|
|
65
70
|
- 记忆内容示例:某个函数的指针已经判空、某个函数已有输入校验、某个函数的调用路径分析结果等。
|
|
66
71
|
- 这样可以避免重复分析,提高效率,并保持分析的一致性。
|
|
67
|
-
- 完成对本批次候选问题的判断后,主输出仅打印结束符
|
|
72
|
+
- 完成对本批次候选问题的判断后,主输出仅打印结束符 {ot("!!!COMPLETE!!!")},不要输出其他任何内容。任务总结将会在后面的交互中被询问。
|
|
68
73
|
""".strip()
|
|
69
|
-
|
|
74
|
+
|
|
70
75
|
agent_kwargs: Dict = dict(
|
|
71
76
|
system_prompt=system_prompt,
|
|
72
77
|
name=task_id,
|
|
@@ -93,7 +98,7 @@ def create_review_agent(
|
|
|
93
98
|
"""创建复核Agent"""
|
|
94
99
|
review_system_prompt = get_review_system_prompt()
|
|
95
100
|
review_summary_prompt = get_review_summary_prompt()
|
|
96
|
-
|
|
101
|
+
|
|
97
102
|
review_task_id = f"JARVIS-SEC-Review-Batch-{current_review_num}"
|
|
98
103
|
review_agent_kwargs: Dict = dict(
|
|
99
104
|
system_prompt=review_system_prompt,
|
|
@@ -122,7 +127,7 @@ def create_cluster_agent(
|
|
|
122
127
|
"""创建聚类Agent"""
|
|
123
128
|
cluster_system_prompt = get_cluster_system_prompt()
|
|
124
129
|
cluster_summary_prompt = get_cluster_summary_prompt()
|
|
125
|
-
|
|
130
|
+
|
|
126
131
|
agent_kwargs_cluster: Dict = dict(
|
|
127
132
|
system_prompt=cluster_system_prompt,
|
|
128
133
|
name=f"JARVIS-SEC-Cluster::{file}::batch{chunk_idx}",
|
|
@@ -140,4 +145,3 @@ def create_cluster_agent(
|
|
|
140
145
|
if llm_group:
|
|
141
146
|
agent_kwargs_cluster["model_group"] = llm_group
|
|
142
147
|
return Agent(**agent_kwargs_cluster)
|
|
143
|
-
|