jarvis-ai-assistant 0.1.220__py3-none-any.whl → 0.1.222__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +110 -395
  3. jarvis/jarvis_agent/edit_file_handler.py +32 -185
  4. jarvis/jarvis_agent/jarvis.py +14 -9
  5. jarvis/jarvis_agent/main.py +13 -6
  6. jarvis/jarvis_agent/prompt_builder.py +57 -0
  7. jarvis/jarvis_agent/prompts.py +188 -0
  8. jarvis/jarvis_agent/protocols.py +30 -0
  9. jarvis/jarvis_agent/session_manager.py +84 -0
  10. jarvis/jarvis_agent/tool_executor.py +49 -0
  11. jarvis/jarvis_code_agent/code_agent.py +14 -23
  12. jarvis/jarvis_code_analysis/code_review.py +1 -1
  13. jarvis/jarvis_data/config_schema.json +13 -18
  14. jarvis/jarvis_git_details/main.py +1 -1
  15. jarvis/jarvis_platform/kimi.py +4 -2
  16. jarvis/jarvis_rag/__init__.py +2 -2
  17. jarvis/jarvis_rag/cache.py +28 -30
  18. jarvis/jarvis_rag/cli.py +141 -52
  19. jarvis/jarvis_rag/embedding_manager.py +32 -46
  20. jarvis/jarvis_rag/llm_interface.py +32 -34
  21. jarvis/jarvis_rag/query_rewriter.py +11 -12
  22. jarvis/jarvis_rag/rag_pipeline.py +40 -43
  23. jarvis/jarvis_rag/reranker.py +18 -18
  24. jarvis/jarvis_rag/retriever.py +29 -29
  25. jarvis/jarvis_tools/edit_file.py +11 -36
  26. jarvis/jarvis_utils/config.py +20 -25
  27. {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.222.dist-info}/METADATA +25 -20
  28. {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.222.dist-info}/RECORD +32 -27
  29. {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.222.dist-info}/entry_points.txt +9 -0
  30. {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.222.dist-info}/WHEEL +0 -0
  31. {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.222.dist-info}/licenses/LICENSE +0 -0
  32. {jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.222.dist-info}/top_level.txt +0 -0
@@ -4,22 +4,21 @@ from .llm_interface import LLMInterface
4
4
 
5
5
  class QueryRewriter:
6
6
  """
7
- Uses an LLM to rewrite a user's query into multiple, diverse search
8
- queries to enhance retrieval recall.
7
+ 使用LLM将用户的查询重写为多个不同的搜索查询,以提高检索召回率。
9
8
  """
10
9
 
11
10
  def __init__(self, llm: LLMInterface):
12
11
  """
13
- Initializes the QueryRewriter.
12
+ 初始化QueryRewriter
14
13
 
15
- Args:
16
- llm: An instance of a class implementing LLMInterface.
14
+ 参数:
15
+ llm: 实现LLMInterface接口的类的实例。
17
16
  """
18
17
  self.llm = llm
19
18
  self.rewrite_prompt_template = self._create_prompt_template()
20
19
 
21
20
  def _create_prompt_template(self) -> str:
22
- """Creates the prompt template for the multi-query rewriting task."""
21
+ """为多查询重写任务创建提示模板。"""
23
22
  return """
24
23
  你是一个精通检索的AI助手。你的任务是将以下这个单一的用户问题,从不同角度改写成 3 个不同的、但语义上相关的搜索查询。这有助于在知识库中进行更全面的搜索。
25
24
 
@@ -39,13 +38,13 @@ class QueryRewriter:
39
38
 
40
39
  def rewrite(self, query: str) -> List[str]:
41
40
  """
42
- Rewrites the user query into multiple queries using the LLM.
41
+ 使用LLM将用户查询重写为多个查询。
43
42
 
44
- Args:
45
- query: The original user query.
43
+ 参数:
44
+ query: 原始用户查询。
46
45
 
47
- Returns:
48
- A list of rewritten, search-optimized queries.
46
+ 返回:
47
+ 一个经过重写、搜索优化的查询列表。
49
48
  """
50
49
  prompt = self.rewrite_prompt_template.format(query=query)
51
50
  print(f"✍️ 正在将原始查询重写为多个搜索查询...")
@@ -55,7 +54,7 @@ class QueryRewriter:
55
54
  line.strip() for line in response_text.strip().split("\n") if line.strip()
56
55
  ]
57
56
 
58
- # Also include the original query for robustness
57
+ # 同时包含原始查询以保证鲁棒性
59
58
  if query not in rewritten_queries:
60
59
  rewritten_queries.insert(0, query)
61
60
 
@@ -1,5 +1,5 @@
1
1
  import os
2
- from typing import List, Literal, Optional, cast
2
+ from typing import List, Optional
3
3
 
4
4
  from langchain.docstore.document import Document
5
5
 
@@ -9,57 +9,55 @@ from .query_rewriter import QueryRewriter
9
9
  from .reranker import Reranker
10
10
  from .retriever import ChromaRetriever
11
11
  from jarvis.jarvis_utils.config import (
12
- get_rag_embedding_mode,
12
+ get_rag_embedding_model,
13
+ get_rag_rerank_model,
13
14
  get_rag_vector_db_path,
14
15
  get_rag_embedding_cache_path,
15
- get_rag_embedding_models,
16
16
  )
17
17
 
18
18
 
19
19
  class JarvisRAGPipeline:
20
20
  """
21
- The main orchestrator for the RAG pipeline.
21
+ RAG管道的主要协调器。
22
22
 
23
- This class integrates the embedding manager, retriever, and LLM to provide
24
- a complete pipeline for adding documents and querying them.
23
+ 该类集成了嵌入管理器、检索器和LLM,为添加文档和查询
24
+ 提供了一个完整的管道。
25
25
  """
26
26
 
27
27
  def __init__(
28
28
  self,
29
29
  llm: Optional[LLMInterface] = None,
30
- embedding_mode: Optional[Literal["performance", "accuracy"]] = None,
30
+ embedding_model: Optional[str] = None,
31
31
  db_path: Optional[str] = None,
32
32
  collection_name: str = "jarvis_rag_collection",
33
33
  ):
34
34
  """
35
- Initializes the RAG pipeline.
36
-
37
- Args:
38
- llm: An instance of a class implementing LLMInterface.
39
- If None, defaults to the ToolAgent_LLM.
40
- embedding_mode: The mode for the local embedding model. If None, uses config value.
41
- db_path: Path to the persistent vector database. If None, uses config value.
42
- collection_name: Name of the collection in the vector database.
35
+ 初始化RAG管道。
36
+
37
+ 参数:
38
+ llm: 实现LLMInterface接口的类的实例。
39
+ 如果为None,则默认为ToolAgent_LLM
40
+ embedding_model: 嵌入模型的名称。如果为None,则使用配置值。
41
+ db_path: 持久化向量数据库的路径。如果为None,则使用配置值。
42
+ collection_name: 向量数据库中集合的名称。
43
43
  """
44
- # Determine the embedding model to isolate data paths
45
- _embedding_mode = embedding_mode or get_rag_embedding_mode()
46
- embedding_models = get_rag_embedding_models()
47
- model_name = embedding_models[_embedding_mode]["model_name"]
44
+ # 确定嵌入模型以隔离数据路径
45
+ model_name = embedding_model or get_rag_embedding_model()
48
46
  sanitized_model_name = model_name.replace("/", "_").replace("\\", "_")
49
47
 
50
- # If a specific db_path is given, use it. Otherwise, create a model-specific path.
48
+ # 如果给定了特定的db_path,则使用它。否则,创建一个特定于模型的路径。
51
49
  _final_db_path = (
52
50
  str(db_path)
53
51
  if db_path
54
52
  else os.path.join(get_rag_vector_db_path(), sanitized_model_name)
55
53
  )
56
- # Always create a model-specific cache path.
54
+ # 始终创建一个特定于模型的缓存路径。
57
55
  _final_cache_path = os.path.join(
58
56
  get_rag_embedding_cache_path(), sanitized_model_name
59
57
  )
60
58
 
61
59
  self.embedding_manager = EmbeddingManager(
62
- mode=cast(Literal["performance", "accuracy"], _embedding_mode),
60
+ model_name=model_name,
63
61
  cache_dir=_final_cache_path,
64
62
  )
65
63
  self.retriever = ChromaRetriever(
@@ -67,27 +65,27 @@ class JarvisRAGPipeline:
67
65
  db_path=_final_db_path,
68
66
  collection_name=collection_name,
69
67
  )
70
- # Default to the ToolAgent_LLM unless a specific LLM is provided
68
+ # 除非提供了特定的LLM,否则默认为ToolAgent_LLM
71
69
  self.llm = llm if llm is not None else ToolAgent_LLM()
72
- self.reranker = Reranker()
73
- # Use a standard LLM for the query rewriting task, not the agent
70
+ self.reranker = Reranker(model_name=get_rag_rerank_model())
71
+ # 使用标准LLM执行查询重写任务,而不是代理
74
72
  self.query_rewriter = QueryRewriter(JarvisPlatform_LLM())
75
73
 
76
74
  print("✅ JarvisRAGPipeline 初始化成功。")
77
75
 
78
76
  def add_documents(self, documents: List[Document]):
79
77
  """
80
- Adds documents to the vector knowledge base.
78
+ 将文档添加到向量知识库。
81
79
 
82
- Args:
83
- documents: A list of LangChain Document objects to add.
80
+ 参数:
81
+ documents: 要添加的LangChain文档对象列表。
84
82
  """
85
83
  self.retriever.add_documents(documents)
86
84
 
87
85
  def _create_prompt(
88
86
  self, query: str, context_docs: List[Document], source_files: List[str]
89
87
  ) -> str:
90
- """Creates the final prompt for the LLM or Agent."""
88
+ """LLM或代理创建最终的提示。"""
91
89
  context = "\n\n".join([doc.page_content for doc in context_docs])
92
90
  sources_text = "\n".join([f"- {source}" for source in source_files])
93
91
 
@@ -114,34 +112,33 @@ class JarvisRAGPipeline:
114
112
 
115
113
  def query(self, query_text: str, n_results: int = 5) -> str:
116
114
  """
117
- Performs a query against the knowledge base using a multi-query
118
- retrieval and reranking pipeline.
115
+ 使用多查询检索和重排管道对知识库执行查询。
119
116
 
120
- Args:
121
- query_text: The user's original question.
122
- n_results: The number of final relevant chunks to retrieve.
117
+ 参数:
118
+ query_text: 用户的原始问题。
119
+ n_results: 要检索的最终相关块的数量。
123
120
 
124
- Returns:
125
- The answer generated by the LLM.
121
+ 返回:
122
+ LLM生成的答案。
126
123
  """
127
- # 1. Rewrite the original query into multiple queries
124
+ # 1. 将原始查询重写为多个查询
128
125
  rewritten_queries = self.query_rewriter.rewrite(query_text)
129
126
 
130
- # 2. Retrieve initial candidates for each rewritten query
127
+ # 2. 为每个重写的查询检索初始候选文档
131
128
  all_candidate_docs = []
132
129
  for q in rewritten_queries:
133
130
  print(f"🔍 正在为查询变体 '{q}' 进行混合检索...")
134
131
  candidates = self.retriever.retrieve(q, n_results=n_results * 2)
135
132
  all_candidate_docs.extend(candidates)
136
133
 
137
- # De-duplicate the candidate documents
134
+ # 对候选文档进行去重
138
135
  unique_docs_dict = {doc.page_content: doc for doc in all_candidate_docs}
139
136
  unique_candidate_docs = list(unique_docs_dict.values())
140
137
 
141
138
  if not unique_candidate_docs:
142
139
  return "我在提供的文档中找不到任何相关信息来回答您的问题。"
143
140
 
144
- # 3. Rerank the unified candidate pool against the *original* query
141
+ # 3. 根据*原始*查询对统一的候选池进行重排
145
142
  print(
146
143
  f"🔍 正在对 {len(unique_candidate_docs)} 个候选文档进行重排(基于原始问题)..."
147
144
  )
@@ -152,7 +149,7 @@ class JarvisRAGPipeline:
152
149
  if not retrieved_docs:
153
150
  return "我在提供的文档中找不到任何相关信息来回答您的问题。"
154
151
 
155
- # Print the sources of the final retrieved documents
152
+ # 打印最终检索到的文档的来源
156
153
  sources = sorted(
157
154
  list(
158
155
  {
@@ -167,8 +164,8 @@ class JarvisRAGPipeline:
167
164
  for source in sources:
168
165
  print(f" - {source}")
169
166
 
170
- # 4. Create the final prompt and generate the answer
171
- # We use the original query_text for the final prompt to the LLM
167
+ # 4. 创建最终提示并生成答案
168
+ # 我们使用原始的query_text作为给LLM的最终提示
172
169
  prompt = self._create_prompt(query_text, retrieved_docs, sources)
173
170
 
174
171
  print("🤖 正在从LLM生成答案...")
@@ -8,16 +8,16 @@ from sentence_transformers.cross_encoder import ( # type: ignore
8
8
 
9
9
  class Reranker:
10
10
  """
11
- A reranker class that uses a Cross-Encoder model to re-score and sort
12
- documents based on their relevance to a given query.
11
+ 一个重排器类,使用Cross-Encoder模型根据文档与给定查询的相关性
12
+ 对文档进行重新评分和排序。
13
13
  """
14
14
 
15
- def __init__(self, model_name: str = "BAAI/bge-reranker-base"):
15
+ def __init__(self, model_name: str):
16
16
  """
17
- Initializes the Reranker.
17
+ 初始化重排器。
18
18
 
19
- Args:
20
- model_name (str): The name of the Cross-Encoder model to use.
19
+ 参数:
20
+ model_name (str): 要使用的Cross-Encoder模型的名称。
21
21
  """
22
22
  print(f"🔍 正在初始化重排模型: {model_name}...")
23
23
  self.model = CrossEncoder(model_name)
@@ -27,30 +27,30 @@ class Reranker:
27
27
  self, query: str, documents: List[Document], top_n: int = 5
28
28
  ) -> List[Document]:
29
29
  """
30
- Reranks a list of documents based on their relevance to the query.
30
+ 根据文档与查询的相关性对文档列表进行重排。
31
31
 
32
- Args:
33
- query (str): The user's query.
34
- documents (List[Document]): The list of documents retrieved from the initial search.
35
- top_n (int): The number of top documents to return after reranking.
32
+ 参数:
33
+ query (str): 用户的查询。
34
+ documents (List[Document]): 从初始搜索中检索到的文档列表。
35
+ top_n (int): 重排后要返回的顶部文档数。
36
36
 
37
- Returns:
38
- List[Document]: A sorted list of the most relevant documents.
37
+ 返回:
38
+ List[Document]: 一个已排序的最相关文档列表。
39
39
  """
40
40
  if not documents:
41
41
  return []
42
42
 
43
- # Create pairs of [query, document_content] for scoring
43
+ # 创建 [查询, 文档内容] 对用于评分
44
44
  pairs = [[query, doc.page_content] for doc in documents]
45
45
 
46
- # Get scores from the Cross-Encoder model
46
+ # Cross-Encoder模型获取分数
47
47
  scores = self.model.predict(pairs)
48
48
 
49
- # Combine documents with their scores and sort
49
+ # 将文档与它们的分数结合并排序
50
50
  doc_with_scores = list(zip(documents, scores))
51
- doc_with_scores.sort(key=lambda x: x[1], reverse=True)
51
+ doc_with_scores.sort(key=lambda x: x[1], reverse=True) # type: ignore
52
52
 
53
- # Return the top N documents
53
+ # 返回前N个文档
54
54
  reranked_docs = [doc for doc, score in doc_with_scores[:top_n]]
55
55
 
56
56
  return reranked_docs
@@ -12,8 +12,8 @@ from .embedding_manager import EmbeddingManager
12
12
 
13
13
  class ChromaRetriever:
14
14
  """
15
- A retriever class that combines dense vector search (ChromaDB) and
16
- sparse keyword search (BM25) for hybrid retrieval.
15
+ 一个检索器类,它结合了密集向量搜索(ChromaDB)和稀疏关键字搜索(BM25)
16
+ 以实现混合检索。
17
17
  """
18
18
 
19
19
  def __init__(
@@ -23,18 +23,18 @@ class ChromaRetriever:
23
23
  collection_name: str = "jarvis_rag_collection",
24
24
  ):
25
25
  """
26
- Initializes the ChromaRetriever.
26
+ 初始化ChromaRetriever
27
27
 
28
- Args:
29
- embedding_manager: An instance of EmbeddingManager.
30
- db_path: The file path for ChromaDB's persistent storage.
31
- collection_name: The name of the collection within ChromaDB.
28
+ 参数:
29
+ embedding_manager: EmbeddingManager的实例。
30
+ db_path: ChromaDB持久化存储的文件路径。
31
+ collection_name: ChromaDB中集合的名称。
32
32
  """
33
33
  self.embedding_manager = embedding_manager
34
34
  self.db_path = db_path
35
35
  self.collection_name = collection_name
36
36
 
37
- # Initialize ChromaDB client
37
+ # 初始化ChromaDB客户端
38
38
  self.client = chromadb.PersistentClient(path=self.db_path)
39
39
  self.collection = self.client.get_or_create_collection(
40
40
  name=self.collection_name
@@ -43,12 +43,12 @@ class ChromaRetriever:
43
43
  f"✅ ChromaDB 客户端已在 '{db_path}' 初始化,集合为 '{collection_name}'。"
44
44
  )
45
45
 
46
- # BM25 Index setup
46
+ # BM25索引设置
47
47
  self.bm25_index_path = os.path.join(self.db_path, f"{collection_name}_bm25.pkl")
48
48
  self._load_or_initialize_bm25()
49
49
 
50
50
  def _load_or_initialize_bm25(self):
51
- """Loads the BM25 index from disk or initializes a new one."""
51
+ """从磁盘加载BM25索引或初始化一个新索引。"""
52
52
  if os.path.exists(self.bm25_index_path):
53
53
  print("🔍 正在加载现有的 BM25 索引...")
54
54
  with open(self.bm25_index_path, "rb") as f:
@@ -62,7 +62,7 @@ class ChromaRetriever:
62
62
  self.bm25_index = None
63
63
 
64
64
  def _save_bm25_index(self):
65
- """Saves the BM25 index to disk."""
65
+ """BM25索引保存到磁盘。"""
66
66
  if self.bm25_index:
67
67
  print("💾 正在保存 BM25 索引...")
68
68
  with open(self.bm25_index_path, "wb") as f:
@@ -73,7 +73,7 @@ class ChromaRetriever:
73
73
  self, documents: List[Document], chunk_size=1000, chunk_overlap=100
74
74
  ):
75
75
  """
76
- Splits, embeds, and adds documents to both ChromaDB and the BM25 index.
76
+ 将文档拆分、嵌入,并添加到ChromaDBBM25索引中。
77
77
  """
78
78
  text_splitter = RecursiveCharacterTextSplitter(
79
79
  chunk_size=chunk_size, chunk_overlap=chunk_overlap
@@ -85,13 +85,13 @@ class ChromaRetriever:
85
85
  if not chunks:
86
86
  return
87
87
 
88
- # Extract content, metadata, and generate IDs
88
+ # 提取内容、元数据并生成ID
89
89
  chunk_texts = [chunk.page_content for chunk in chunks]
90
90
  metadatas = [chunk.metadata for chunk in chunks]
91
91
  start_id = self.collection.count()
92
92
  ids = [f"doc_{i}" for i in range(start_id, start_id + len(chunks))]
93
93
 
94
- # Add to ChromaDB
94
+ # 添加到ChromaDB
95
95
  embeddings = self.embedding_manager.embed_documents(chunk_texts)
96
96
  self.collection.add(
97
97
  ids=ids,
@@ -101,7 +101,7 @@ class ChromaRetriever:
101
101
  )
102
102
  print(f"✅ 成功将 {len(chunks)} 个块添加到 ChromaDB 集合中。")
103
103
 
104
- # Update and save BM25 index
104
+ # 更新并保存BM25索引
105
105
  tokenized_chunks = [doc.split() for doc in chunk_texts]
106
106
  self.bm25_corpus.extend(tokenized_chunks)
107
107
  self.bm25_index = BM25Okapi(self.bm25_corpus)
@@ -109,30 +109,30 @@ class ChromaRetriever:
109
109
 
110
110
  def retrieve(self, query: str, n_results: int = 5) -> List[Document]:
111
111
  """
112
- Performs hybrid retrieval using both vector search and BM25,
113
- then fuses the results using Reciprocal Rank Fusion (RRF).
112
+ 使用向量搜索和BM25执行混合检索,然后使用倒数排序融合(RRF)
113
+ 对结果进行融合。
114
114
  """
115
- # 1. Vector Search (ChromaDB)
115
+ # 1. 向量搜索 (ChromaDB)
116
116
  query_embedding = self.embedding_manager.embed_query(query)
117
117
  vector_results = self.collection.query(
118
118
  query_embeddings=cast(Any, [query_embedding]),
119
- n_results=n_results * 2, # Retrieve more results for fusion
119
+ n_results=n_results * 2, # 检索更多结果用于融合
120
120
  )
121
121
 
122
- # 2. Keyword Search (BM25)
122
+ # 2. 关键字搜索 (BM25)
123
123
  bm25_docs = []
124
124
  if self.bm25_index:
125
125
  tokenized_query = query.split()
126
126
  doc_scores = self.bm25_index.get_scores(tokenized_query)
127
127
 
128
- # Get all documents from Chroma to match with BM25 scores
128
+ # Chroma获取所有文档以匹配BM25分数
129
129
  all_docs_in_collection = self.collection.get()
130
130
  all_documents = all_docs_in_collection.get("documents")
131
131
  all_metadatas = all_docs_in_collection.get("metadatas")
132
132
 
133
133
  bm25_results_with_docs = []
134
134
  if all_documents and all_metadatas:
135
- # Create a mapping from index to document
135
+ # 创建从索引到文档的映射
136
136
  bm25_results_with_docs = [
137
137
  (
138
138
  all_documents[i],
@@ -143,17 +143,17 @@ class ChromaRetriever:
143
143
  if score > 0
144
144
  ]
145
145
 
146
- # Sort by score and take top results
146
+ # 按分数排序并取最高结果
147
147
  bm25_results_with_docs.sort(key=lambda x: x[2], reverse=True)
148
148
 
149
149
  for doc_text, metadata, _ in bm25_results_with_docs[: n_results * 2]:
150
150
  bm25_docs.append(Document(page_content=doc_text, metadata=metadata))
151
151
 
152
- # 3. Reciprocal Rank Fusion (RRF)
152
+ # 3. 倒数排序融合 (RRF)
153
153
  fused_scores: Dict[str, float] = {}
154
- k = 60 # RRF ranking constant
154
+ k = 60 # RRF排名常数
155
155
 
156
- # Process vector results
156
+ # 处理向量结果
157
157
  if vector_results and vector_results["ids"] and vector_results["documents"]:
158
158
  vec_ids = vector_results["ids"][0]
159
159
  vec_texts = vector_results["documents"][0]
@@ -161,7 +161,7 @@ class ChromaRetriever:
161
161
  for rank, doc_id in enumerate(vec_ids):
162
162
  fused_scores[doc_id] = fused_scores.get(doc_id, 0) + 1 / (k + rank)
163
163
 
164
- # Create a map from document text to its ID for BM25 fusion
164
+ # 为BM25融合创建从文档文本到其ID的映射
165
165
  doc_text_to_id = {text: doc_id for text, doc_id in zip(vec_texts, vec_ids)}
166
166
 
167
167
  for rank, doc in enumerate(bm25_docs):
@@ -171,12 +171,12 @@ class ChromaRetriever:
171
171
  k + rank
172
172
  )
173
173
 
174
- # Sort fused results
174
+ # 对融合结果进行排序
175
175
  sorted_fused_results = sorted(
176
176
  fused_scores.items(), key=lambda x: x[1], reverse=True
177
177
  )
178
178
 
179
- # Get the final documents from ChromaDB based on fused ranking
179
+ # 根据融合排名从ChromaDB获取最终文档
180
180
  final_doc_ids = [item[0] for item in sorted_fused_results[:n_results]]
181
181
 
182
182
  if not final_doc_ids:
@@ -7,15 +7,12 @@
7
7
  2. 支持单个文件的编辑操作,包括创建新文件
8
8
  3. 实现原子操作:所有修改要么全部成功,要么全部回滚
9
9
  4. 严格匹配控制:每个搜索文本必须且只能匹配一次
10
- 5. 支持两种编辑模式:快速编辑(fast_edit)和AI辅助编辑(slow_edit)
11
10
 
12
11
  核心特性:
13
12
  - 支持不存在的文件和空文件处理
14
13
  - 自动创建所需目录结构
15
14
  - 完善的错误处理和回滚机制
16
15
  - 严格的格式保持要求
17
- - 支持大文件处理(自动上传到模型平台)
18
- - 提供3次重试机制确保操作可靠性
19
16
  """
20
17
  from typing import Any, Dict
21
18
 
@@ -134,7 +131,6 @@ class FileSearchReplaceTool:
134
131
  for file_info in args["files"]:
135
132
  file_path = os.path.abspath(file_info["path"])
136
133
  changes = file_info["changes"]
137
- agent = args.get("agent", None)
138
134
 
139
135
  # 创建已处理文件变量,用于失败时回滚
140
136
  original_content = None
@@ -152,44 +148,23 @@ class FileSearchReplaceTool:
152
148
  content = f.read()
153
149
  original_content = content
154
150
 
155
- if file_exists and agent:
156
- files = agent.get_user_data("files")
157
- if not files or file_path not in files:
158
- file_results.append(
159
- {
160
- "file": file_path,
161
- "success": False,
162
- "stdout": "",
163
- "stderr": f"请先读取文件 {file_path} 的内容后再编辑",
164
- }
165
- )
166
- continue
167
-
168
151
  print(f"⚙️ 正在处理文件 {file_path}...")
169
- # 首先尝试fast_edit模式
170
152
  success, temp_content = EditFileHandler._fast_edit(
171
153
  file_path, changes
172
154
  )
173
155
  if not success:
174
- # 如果fast_edit失败,尝试slow_edit模式
175
- success, temp_content = EditFileHandler._slow_edit(
176
- file_path, changes, agent
156
+ print(f"❌ 文件 {file_path} 处理失败")
157
+ file_results.append(
158
+ {
159
+ "file": file_path,
160
+ "success": False,
161
+ "stdout": "",
162
+ "stderr": temp_content,
163
+ }
177
164
  )
178
- if not success:
179
- print(f"❌ 文件 {file_path} 处理失败")
180
- file_results.append(
181
- {
182
- "file": file_path,
183
- "success": False,
184
- "stdout": "",
185
- "stderr": temp_content,
186
- }
187
- )
188
- continue
189
- else:
190
- print(f"✅ 文件 {file_path} 内容生成完成")
191
- else:
192
- print(f"✅ 文件 {file_path} 内容生成完成")
165
+ continue
166
+
167
+ print(f"✅ 文件 {file_path} 内容生成完成")
193
168
 
194
169
  # 只有当所有替换操作都成功时,才写回文件
195
170
  if success and (
@@ -241,6 +241,16 @@ def is_print_prompt() -> bool:
241
241
  return GLOBAL_CONFIG_DATA.get("JARVIS_PRINT_PROMPT", False) == True
242
242
 
243
243
 
244
+ def is_enable_static_analysis() -> bool:
245
+ """
246
+ 获取是否启用静态代码分析。
247
+
248
+ 返回:
249
+ bool: 如果启用静态代码分析则返回True,默认为True
250
+ """
251
+ return GLOBAL_CONFIG_DATA.get("JARVIS_ENABLE_STATIC_ANALYSIS", True) is True
252
+
253
+
244
254
  def get_mcp_config() -> List[Dict[str, Any]]:
245
255
  """
246
256
  获取MCP配置列表。
@@ -255,21 +265,6 @@ def get_mcp_config() -> List[Dict[str, Any]]:
255
265
  # RAG Framework Configuration
256
266
  # ==============================================================================
257
267
 
258
- EMBEDDING_MODELS = {
259
- "performance": {
260
- "model_name": "BAAI/bge-base-zh-v1.5",
261
- "model_kwargs": {"device": "cuda" if torch.cuda.is_available() else "cpu"},
262
- "encode_kwargs": {"normalize_embeddings": True},
263
- "show_progress": True,
264
- },
265
- "accuracy": {
266
- "model_name": "BAAI/bge-large-zh-v1.5",
267
- "model_kwargs": {"device": "cuda" if torch.cuda.is_available() else "cpu"},
268
- "encode_kwargs": {"normalize_embeddings": True},
269
- "show_progress": True,
270
- },
271
- }
272
-
273
268
 
274
269
  def get_rag_config() -> Dict[str, Any]:
275
270
  """
@@ -281,24 +276,24 @@ def get_rag_config() -> Dict[str, Any]:
281
276
  return GLOBAL_CONFIG_DATA.get("JARVIS_RAG", {})
282
277
 
283
278
 
284
- def get_rag_embedding_models() -> Dict[str, Any]:
279
+ def get_rag_embedding_model() -> str:
285
280
  """
286
- 获取RAG嵌入模型的定义。
281
+ 获取RAG嵌入模型的名称。
287
282
 
288
283
  返回:
289
- Dict[str, Any]: 嵌入模型配置字典
284
+ str: 嵌入模型的名称
290
285
  """
291
- return EMBEDDING_MODELS
286
+ return get_rag_config().get("embedding_model", "BAAI/bge-base-zh-v1.5")
292
287
 
293
288
 
294
- def get_rag_embedding_mode() -> str:
289
+ def get_rag_rerank_model() -> str:
295
290
  """
296
- 获取RAG嵌入模型的模式。
291
+ 获取RAG rerank模型的名称。
297
292
 
298
293
  返回:
299
- str: 'performance' 或 'accuracy'
294
+ str: rerank模型的名称
300
295
  """
301
- return get_rag_config().get("embedding_mode", "performance")
296
+ return get_rag_config().get("rerank_model", "BAAI/bge-reranker-base")
302
297
 
303
298
 
304
299
  def get_rag_embedding_cache_path() -> str:
@@ -308,7 +303,7 @@ def get_rag_embedding_cache_path() -> str:
308
303
  返回:
309
304
  str: 缓存路径
310
305
  """
311
- return get_rag_config().get("embedding_cache_path", ".jarvis/rag/embeddings")
306
+ return ".jarvis/rag/embeddings"
312
307
 
313
308
 
314
309
  def get_rag_vector_db_path() -> str:
@@ -318,4 +313,4 @@ def get_rag_vector_db_path() -> str:
318
313
  返回:
319
314
  str: 数据库路径
320
315
  """
321
- return get_rag_config().get("vector_db_path", ".jarvis/rag/vectordb")
316
+ return ".jarvis/rag/vectordb"