PyPI - jarvis-ai-assistant - Versions diffs - 0.1.91__py3-none-any.whl → 0.1.93__py3-none-any.whl - Mend

jarvis-ai-assistant 0.1.91py3-none-any.whl → 0.1.93py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of jarvis-ai-assistant might be problematic. Click here for more details.

Files changed (27) hide show

jarvis/__init__.py +1 -1
jarvis/agent.py +6 -4
jarvis/jarvis_codebase/main.py +274 -188
jarvis/jarvis_coder/__init__.py +0 -0
jarvis/jarvis_coder/git_utils.py +64 -0
jarvis/jarvis_coder/main.py +630 -0
jarvis/jarvis_coder/patch_handler.py +493 -0
jarvis/jarvis_coder/plan_generator.py +75 -0
jarvis/jarvis_platform/main.py +13 -2
jarvis/jarvis_rag/main.py +185 -49
jarvis/jarvis_smart_shell/main.py +16 -9
jarvis/main.py +9 -0
jarvis/models/ai8.py +4 -3
jarvis/models/ollama.py +3 -3
jarvis/models/openai.py +2 -2
jarvis/models/oyi.py +13 -13
jarvis/tools/ask_user.py +1 -2
jarvis/tools/coder.py +69 -0
jarvis/tools/thinker.py +25 -79
jarvis/utils.py +30 -2
{jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.93.dist-info}/METADATA +3 -1
jarvis_ai_assistant-0.1.93.dist-info/RECORD +47 -0
{jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.93.dist-info}/entry_points.txt +1 -0
jarvis_ai_assistant-0.1.91.dist-info/RECORD +0 -41
{jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.93.dist-info}/LICENSE +0 -0
{jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.93.dist-info}/WHEEL +0 -0
{jarvis_ai_assistant-0.1.91.dist-info → jarvis_ai_assistant-0.1.93.dist-info}/top_level.txt +0 -0

jarvis/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Jarvis AI Assistant"""
-__version__ = "0.1.91"
+__version__ = "0.1.93"

jarvis/agent.py CHANGED Viewed

@@ -140,7 +140,7 @@ class Agent:
     def _load_methodology(self, user_input: str) -> Dict[str, str]:
         """加载方法论并构建向量索引"""
-        PrettyOutput.print("加载方法论...", OutputType.PLANNING)
+        PrettyOutput.print("加载方法论...", OutputType.PROGRESS)
         user_jarvis_methodology = os.path.expanduser("~/.jarvis_methodology")
         if not os.path.exists(user_jarvis_methodology):
             return {}
@@ -165,13 +165,13 @@ class Agent:
             if vectors:
                 vectors_array = np.vstack(vectors)
-                self.methodology_index.add_with_ids(vectors_array, np.array(ids))
+                self.methodology_index.add_with_ids(vectors_array, np.array(ids)) # type: ignore
                 query_embedding = self._create_methodology_embedding(user_input)
                 k = min(5, len(self.methodology_data))
                 PrettyOutput.print(f"检索方法论...", OutputType.INFO)
                 distances, indices = self.methodology_index.search(
                     query_embedding.reshape(1, -1), k
-                )
+                ) # type: ignore
                 relevant_methodologies = {}
                 for dist, idx in zip(distances[0], indices[0]):
@@ -208,7 +208,7 @@ class Agent:
         """
         # 创建一个新的模型实例来做总结，避免影响主对话
-        PrettyOutput.print("总结对话历史，准备生成总结，开始新的对话...", OutputType.PLANNING)
+        PrettyOutput.print("总结对话历史，准备生成总结，开始新的对话...", OutputType.PROGRESS)
         prompt = """请总结之前对话中的关键信息，包括：
 1. 当前任务目标
@@ -259,6 +259,8 @@ class Agent:
                 analysis_prompt = """本次任务已结束，请分析是否需要生成方法论。
 如果认为需要生成方法论，请先判断是创建新的方法论还是更新已有方法论。如果是更新已有方法论，使用update，否则使用add。
 如果认为不需要生成方法论，请说明原因。
+方法论应该适应普遍场景，不要出现本次任务特定的信息，如代码的commit信息等。
+方法论中应该包含：问题重述、最优解决方案、注意事项（按需），除此外不要出现任何其他的信息。
 仅输出方法论工具的调用指令，或者是不需要生成方法论的说明，除此之外不要输出任何内容。
 """
                 self.prompt = analysis_prompt

jarvis/jarvis_codebase/main.py CHANGED Viewed

@@ -2,16 +2,18 @@ import hashlib
 import os
 import numpy as np
 import faiss
-from typing import List, Tuple, Optional
+from typing import List, Tuple, Optional, Dict
 from jarvis.models.registry import PlatformRegistry
 import concurrent.futures
 from threading import Lock
 from concurrent.futures import ThreadPoolExecutor
-from jarvis.utils import OutputType, PrettyOutput, find_git_root, get_max_context_length, get_thread_count, load_embedding_model, load_rerank_model
+from jarvis.utils import OutputType, PrettyOutput, find_git_root, get_file_md5, get_max_context_length, get_thread_count, load_embedding_model, load_rerank_model
 from jarvis.utils import load_env_from_file
 import argparse
 from sentence_transformers import SentenceTransformer
 import pickle
+import lzma  # 添加 lzma 导入
+from tqdm import tqdm
 class CodeBase:
     def __init__(self, root_dir: str):
@@ -58,7 +60,7 @@ class CodeBase:
         # 加载缓存
         if os.path.exists(self.cache_path):
             try:
-                with open(self.cache_path, 'rb') as f:
+                with lzma.open(self.cache_path, 'rb') as f:
                     cache_data = pickle.load(f)
                     self.vector_cache = cache_data["vectors"]
                     self.file_paths = cache_data["file_paths"]
@@ -88,19 +90,13 @@ class CodeBase:
                 return False
     def make_description(self, file_path: str, content: str) -> str:
-        model = PlatformRegistry.get_global_platform_registry().get_codegen_platform()
+        model = PlatformRegistry.get_global_platform_registry().get_cheap_platform()
         model.set_suppress_output(True)
         prompt = f"""请分析以下代码文件，并生成一个详细的描述。描述应该包含以下要点：
+1. 整个文件的功能描述，不超过100个字
+2. 每个全局变量的函数、类型定义、类、方法等代码元素的一句话描述，不超过50字
-1. 主要功能和用途
-2. 关键类和方法的作用
-3. 重要的依赖和技术特征（如使用了什么框架、算法、设计模式等）
-4. 代码处理的主要数据类型和数据结构
-5. 关键业务逻辑和处理流程
-6. 特殊功能点和亮点特性
-请用简洁专业的语言描述，突出代码的技术特征和功能特点，以便后续进行关联代码检索。
+请用简洁专业的语言描述，突出代码的技术功能，以便后续进行关联代码检索。
 文件路径：{file_path}
 代码内容：
 {content}
@@ -108,20 +104,24 @@ class CodeBase:
         response = model.chat(prompt)
         return response
-    def save_cache(self):
+    def _save_cache(self):
         """保存缓存数据"""
         try:
+            # 创建缓存数据的副本
             cache_data = {
-                "vectors": self.vector_cache,
-                "file_paths": self.file_paths
+                "vectors": dict(self.vector_cache),  # 创建字典的副本
+                "file_paths": list(self.file_paths)  # 创建列表的副本
             }
-            with open(self.cache_path, 'wb') as f:
-                pickle.dump(cache_data, f)
+            # 使用 lzma 压缩存储
+            with lzma.open(self.cache_path, 'wb') as f:
+                pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL)
             PrettyOutput.print(f"保存了 {len(self.vector_cache)} 个向量缓存",
                              output_type=OutputType.INFO)
         except Exception as e:
             PrettyOutput.print(f"保存缓存失败: {str(e)}",
                              output_type=OutputType.ERROR)
+            raise  # 抛出异常以便上层处理
     def get_cached_vector(self, file_path: str, description: str) -> Optional[np.ndarray]:
         """从缓存获取文件的向量表示"""
@@ -157,24 +157,13 @@ class CodeBase:
                               output_type=OutputType.ERROR)
             file_md5 = ""
+        # 只更新内存中的缓存
         self.vector_cache[file_path] = {
             "path": file_path,  # 保存文件路径
             "md5": file_md5,    # 保存文件MD5
             "description": description,  # 保存文件描述
             "vector": vector    # 保存向量
         }
-        # 保存缓存到文件
-        try:
-            with open(self.cache_path, 'wb') as f:
-                cache_data = {
-                    "vectors": self.vector_cache,
-                    "file_paths": self.file_paths
-                }
-                pickle.dump(cache_data, f)
-        except Exception as e:
-            PrettyOutput.print(f"保存向量缓存失败: {str(e)}",
-                              output_type=OutputType.ERROR)
     def get_embedding(self, text: str) -> np.ndarray:
         """使用 transformers 模型获取文本的向量表示"""
@@ -215,22 +204,34 @@ class CodeBase:
         except Exception as e:
             PrettyOutput.print(f"Error vectorizing file {file_path}: {str(e)}",
                              output_type=OutputType.ERROR)
-            return np.zeros(self.vector_dim, dtype=np.float32)
+            return np.zeros(self.vector_dim, dtype=np.float32) # type: ignore
     def clean_cache(self) -> bool:
         """清理过期的缓存记录，返回是否有文件被删除"""
-        files_to_delete = []
-        for file_path in list(self.vector_cache.keys()):
-            if file_path not in self.git_file_list:
-                del self.vector_cache[file_path]
-                files_to_delete.append(file_path)
-        if files_to_delete:
-            self.save_cache()
-            PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的缓存",
-                             output_type=OutputType.INFO)
-            return True
-        return False
+        try:
+            files_to_delete = []
+            for file_path in list(self.vector_cache.keys()):
+                if file_path not in self.git_file_list:
+                    del self.vector_cache[file_path]
+                    files_to_delete.append(file_path)
+            if files_to_delete:
+                # 只在有文件被删除时保存缓存
+                self._save_cache()
+                PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的缓存",
+                                output_type=OutputType.INFO)
+                return True
+            return False
+        except Exception as e:
+            PrettyOutput.print(f"清理缓存失败: {str(e)}",
+                            output_type=OutputType.ERROR)
+            # 发生异常时尝试保存当前状态
+            try:
+                self._save_cache()
+            except:
+                pass
+            return False
     def process_file(self, file_path: str):
         """处理单个文件"""
@@ -241,16 +242,10 @@ class CodeBase:
             if not self.is_text_file(file_path):
                 return None
-            # 读取文件内容，限制长度
-            with open(file_path, "r", encoding="utf-8") as f:
-                content = f.read()
-                if len(content) > self.max_context_length:
-                    PrettyOutput.print(f"文件 {file_path} 内容超出长度限制，将截取前 {self.max_context_length} 个字符",
-                                     output_type=OutputType.WARNING)
-                    content = content[:self.max_context_length]
-            md5 = hashlib.md5(content.encode('utf-8')).hexdigest()
+            md5 = get_file_md5(file_path)
+            content = open(file_path, "r", encoding="utf-8").read()
             # 检查文件是否已经处理过且内容未变
             if file_path in self.vector_cache:
@@ -295,14 +290,14 @@ class CodeBase:
         if vectors:
             vectors = np.vstack(vectors)
-            self.index.add_with_ids(vectors, np.array(ids))
+            self.index.add_with_ids(vectors, np.array(ids)) # type: ignore
         else:
             self.index = None
     def gen_vector_db_from_cache(self):
         """从缓存生成向量数据库"""
         self.build_index()
-        self.save_cache()
+        self._save_cache()
     def generate_codebase(self, force: bool = False):
@@ -310,100 +305,152 @@ class CodeBase:
         Args:
             force: 是否强制重建索引，不询问用户
         """
-        # 更新 git 文件列表
-        self.git_file_list = self.get_git_file_list()
-        # 检查文件变化
-        changes_detected = False
-        new_files = []
-        modified_files = []
-        deleted_files = []
-        # 检查删除的文件
-        files_to_delete = []
-        for file_path in list(self.vector_cache.keys()):
-            if file_path not in self.git_file_list:
-                deleted_files.append(file_path)
-                files_to_delete.append(file_path)
-                changes_detected = True
-        # 检查新增和修改的文件
-        for file_path in self.git_file_list:
-            if not os.path.exists(file_path) or not self.is_text_file(file_path):
-                continue
+        try:
+            # 更新 git 文件列表
+            self.git_file_list = self.get_git_file_list()
-            try:
-                current_md5 = hashlib.md5(open(file_path, "rb").read()).hexdigest()
-                if file_path not in self.vector_cache:
-                    new_files.append(file_path)
-                    changes_detected = True
-                elif self.vector_cache[file_path].get("md5") != current_md5:
-                    modified_files.append(file_path)
+            # 检查文件变化
+            PrettyOutput.print("\n检查文件变化...", output_type=OutputType.INFO)
+            changes_detected = False
+            new_files = []
+            modified_files = []
+            deleted_files = []
+            # 检查删除的文件
+            files_to_delete = []
+            for file_path in list(self.vector_cache.keys()):
+                if file_path not in self.git_file_list:
+                    deleted_files.append(file_path)
+                    files_to_delete.append(file_path)
                     changes_detected = True
-            except Exception as e:
-                PrettyOutput.print(f"检查文件失败 {file_path}: {str(e)}",
-                                 output_type=OutputType.ERROR)
-                continue
+            # 检查新增和修改的文件
+            with tqdm(total=len(self.git_file_list), desc="检查文件状态") as pbar:
+                for file_path in self.git_file_list:
+                    if not os.path.exists(file_path) or not self.is_text_file(file_path):
+                        pbar.update(1)
+                        continue
+                    try:
+                        current_md5 = get_file_md5(file_path)
+                        if file_path not in self.vector_cache:
+                            new_files.append(file_path)
+                            changes_detected = True
+                        elif self.vector_cache[file_path].get("md5") != current_md5:
+                            modified_files.append(file_path)
+                            changes_detected = True
+                    except Exception as e:
+                        PrettyOutput.print(f"检查文件失败 {file_path}: {str(e)}",
+                                         output_type=OutputType.ERROR)
+                    pbar.update(1)
+            # 如果检测到变化，显示变化并询问用户
+            if changes_detected:
+                PrettyOutput.print("\n检测到以下变化:", output_type=OutputType.WARNING)
+                if new_files:
+                    PrettyOutput.print("\n新增文件:", output_type=OutputType.INFO)
+                    for f in new_files:
+                        PrettyOutput.print(f"  {f}", output_type=OutputType.INFO)
+                if modified_files:
+                    PrettyOutput.print("\n修改的文件:", output_type=OutputType.INFO)
+                    for f in modified_files:
+                        PrettyOutput.print(f"  {f}", output_type=OutputType.INFO)
+                if deleted_files:
+                    PrettyOutput.print("\n删除的文件:", output_type=OutputType.INFO)
+                    for f in deleted_files:
+                        PrettyOutput.print(f"  {f}", output_type=OutputType.INFO)
+                # 如果force为True，直接继续
+                if not force:
+                    # 询问用户是否继续
+                    while True:
+                        response = input("\n是否重建索引？[y/N] ").lower().strip()
+                        if response in ['y', 'yes']:
+                            break
+                        elif response in ['', 'n', 'no']:
+                            PrettyOutput.print("取消重建索引", output_type=OutputType.INFO)
+                            return
+                        else:
+                            PrettyOutput.print("请输入 y 或 n", output_type=OutputType.WARNING)
+                # 清理已删除的文件
+                for file_path in files_to_delete:
+                    del self.vector_cache[file_path]
+                if files_to_delete:
+                    PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的缓存",
+                                     output_type=OutputType.INFO)
+                # 处理新文件和修改的文件
+                files_to_process = new_files + modified_files
+                processed_files = []
+                with tqdm(total=len(files_to_process), desc="处理文件") as pbar:
+                    # 使用线程池处理文件
+                    with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
+                        # 提交所有任务
+                        future_to_file = {
+                            executor.submit(self.process_file, file): file
+                            for file in files_to_process
+                        }
+                        # 处理完成的任务
+                        for future in concurrent.futures.as_completed(future_to_file):
+                            file = future_to_file[future]
+                            try:
+                                result = future.result()
+                                if result:
+                                    processed_files.append(result)
+                            except Exception as e:
+                                PrettyOutput.print(f"处理文件失败 {file}: {str(e)}",
+                                                output_type=OutputType.ERROR)
+                            pbar.update(1)
+                if processed_files:
+                    PrettyOutput.print("\n重新生成向量数据库...", output_type=OutputType.INFO)
+                    self.gen_vector_db_from_cache()
+                    PrettyOutput.print(f"成功为 {len(processed_files)} 个文件生成索引",
+                                    output_type=OutputType.SUCCESS)
+            else:
+                PrettyOutput.print("没有检测到文件变更，无需重建索引", output_type=OutputType.INFO)
+        except Exception as e:
+            # 发生异常时尝试保存缓存
+            try:
+                self._save_cache()
+            except Exception as save_error:
+                PrettyOutput.print(f"保存缓存失败: {str(save_error)}",
+                                output_type=OutputType.ERROR)
+            raise e  # 重新抛出原始异常
+    def _text_search_score(self, content: str, keywords: List[str]) -> float:
+        """计算文本内容与关键词的匹配分数
-        # 如果检测到变化，显示变化并询问用户
-        if changes_detected:
-            PrettyOutput.print("\n检测到以下变化:", output_type=OutputType.WARNING)
-            if new_files:
-                PrettyOutput.print("\n新增文件:", output_type=OutputType.INFO)
-                for f in new_files:
-                    PrettyOutput.print(f"  {f}", output_type=OutputType.INFO)
-            if modified_files:
-                PrettyOutput.print("\n修改的文件:", output_type=OutputType.INFO)
-                for f in modified_files:
-                    PrettyOutput.print(f"  {f}", output_type=OutputType.INFO)
-            if deleted_files:
-                PrettyOutput.print("\n删除的文件:", output_type=OutputType.INFO)
-                for f in deleted_files:
-                    PrettyOutput.print(f"  {f}", output_type=OutputType.INFO)
-            # 如果force为True，直接继续
-            if not force:
-                # 询问用户是否继续
-                while True:
-                    response = input("\n是否重建索引？[y/N] ").lower().strip()
-                    if response in ['y', 'yes']:
-                        break
-                    elif response in ['', 'n', 'no']:
-                        PrettyOutput.print("取消重建索引", output_type=OutputType.INFO)
-                        return
-                    else:
-                        PrettyOutput.print("请输入 y 或 n", output_type=OutputType.WARNING)
-            # 清理已删除的文件
-            for file_path in files_to_delete:
-                del self.vector_cache[file_path]
-            if files_to_delete:
-                PrettyOutput.print(f"清理了 {len(files_to_delete)} 个文件的缓存",
-                                 output_type=OutputType.INFO)
+        Args:
+            content: 文本内容
+            keywords: 关键词列表
-            # 处理新文件和修改的文件
-            processed_files = []
-            files_to_process = new_files + modified_files
-            # 使用线程池处理文件
-            with ThreadPoolExecutor(max_workers=self.thread_count) as executor:
-                futures = [executor.submit(self.process_file, file) for file in files_to_process]
-                for future in concurrent.futures.as_completed(futures):
-                    result = future.result()
-                    if result:
-                        processed_files.append(result)
-                        PrettyOutput.print(f"索引文件: {result}", output_type=OutputType.INFO)
-            PrettyOutput.print("重新生成向量数据库", output_type=OutputType.INFO)
-            self.gen_vector_db_from_cache()
-            PrettyOutput.print(f"成功为 {len(processed_files)} 个文件生成索引", output_type=OutputType.INFO)
-        else:
-            PrettyOutput.print("没有检测到文件变更，无需重建索引", output_type=OutputType.INFO)
+        Returns:
+            float: 匹配分数 (0-1)
+        """
+        if not keywords:
+            return 0.0
+        content = content.lower()
+        matched_keywords = set()
+        for keyword in keywords:
+            keyword = keyword.lower()
+            if keyword in content:
+                matched_keywords.add(keyword)
+        # 计算匹配分数
+        score = len(matched_keywords) / len(keywords)
+        return score
     def rerank_results(self, query: str, initial_results: List[Tuple[str, float, str]]) -> List[Tuple[str, float, str]]:
-        """使用 BAAI/bge-reranker-v2-m3 对搜索结果重新排序"""
+        """使用多种策略对搜索结果重新排序"""
         if not initial_results:
             return []
@@ -413,13 +460,15 @@ class CodeBase:
             # 加载模型和分词器
             model, tokenizer = load_rerank_model()
-            # 准备数据 - 加入文件内容进行更准确的重排序
+            # 准备数据
             pairs = []
             for path, _, desc in initial_results:
                 try:
                     with open(path, "r", encoding="utf-8") as f:
                         content = f.read()[:512]  # 限制内容长度
-                    # 组合文件路径、描述和内容
+                    # 组合文件信息
                     doc_content = f"文件: {path}\n描述: {desc}\n内容: {content}"
                     pairs.append([query, doc_content])
                 except Exception as e:
@@ -430,6 +479,7 @@ class CodeBase:
             # 使用更大的batch size提高处理速度
             batch_size = 16  # 根据GPU显存调整
+            batch_scores = []
             with torch.no_grad():
                 for i in range(0, len(pairs), batch_size):
@@ -446,8 +496,7 @@ class CodeBase:
                         encoded = {k: v.cuda() for k, v in encoded.items()}
                     outputs = model(**encoded)
-                    # 修改这里：直接使用 outputs.logits 作为分数
-                    batch_scores = outputs.logits.squeeze(-1).cpu().numpy()
+                    batch_scores.extend(outputs.logits.squeeze(-1).cpu().numpy())
             # 归一化分数到 0-1 范围
             if batch_scores:
@@ -456,61 +505,98 @@ class CodeBase:
                 if max_score > min_score:
                     batch_scores = [(s - min_score) / (max_score - min_score) for s in batch_scores]
-            # 将分数与原始结果组合并排序
+            # 将重排序分数与原始分数结合
             scored_results = []
-            for (path, _, desc), score in zip(initial_results, batch_scores):
-                if score >= 0.5:  # 只保留相关度大于 0.5 的结果
-                    scored_results.append((path, float(score), desc))
+            for (path, orig_score, desc), rerank_score in zip(initial_results, batch_scores):
+                # 综合分数 = 0.3 * 原始分数 + 0.7 * 重排序分数
+                combined_score = 0.3 * float(orig_score) + 0.7 * float(rerank_score)
+                if combined_score >= 0.5:  # 只保留相关度较高的结果
+                    scored_results.append((path, combined_score, desc))
-            # 按分数降序排序
+            # 按综合分数降序排序
             scored_results.sort(key=lambda x: x[1], reverse=True)
             return scored_results
         except Exception as e:
-            PrettyOutput.print(f"重排序失败，使用原始排序: {str(e)}", output_type=OutputType.WARNING)
-            return initial_results
+            PrettyOutput.print(f"重排序失败: {str(e)}",
+                            output_type=OutputType.ERROR)
+            return initial_results  # 发生错误时返回原始结果
+    def _generate_query_variants(self, query: str) -> List[str]:
+        """生成查询的不同表述变体
+        Args:
+            query: 原始查询
+        Returns:
+            List[str]: 查询变体列表
+        """
+        model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
+        prompt = f"""请根据以下查询，生成3个不同的表述，每个表述都要完整表达原始查询的意思。这些表述将用于代码搜索，要保持专业性和准确性。
+原始查询: {query}
+请直接输出3个表述，用换行分隔，不要有编号或其他标记。
+"""
+        variants = model.chat(prompt).strip().split('\n')
+        variants.append(query)  # 添加原始查询
+        return variants
+    def _vector_search(self, query_variants: List[str], top_k: int) -> Dict[str, Tuple[str, float, str]]:
+        """使用向量搜索查找相关文件
+        Args:
+            query_variants: 查询变体列表
+            top_k: 返回结果数量
+        Returns:
+            Dict[str, Tuple[str, float, str]]: 文件路径到(路径,分数,描述)的映射
+        """
+        results = {}
+        for query in query_variants:
+            query_vector = self.get_embedding(query)
+            query_vector = query_vector.reshape(1, -1)
+            distances, indices = self.index.search(query_vector, top_k) # type: ignore
+            for i, distance in zip(indices[0], distances[0]):
+                if i == -1:
+                    continue
+                similarity = 1.0 / (1.0 + float(distance))
+                if similarity >= 0.5:
+                    file_path = self.file_paths[i]
+                    # 使用最高的相似度分数
+                    if file_path not in results or similarity > results[file_path][1]:
+                        data = self.vector_cache[file_path]
+                        results[file_path] = (file_path, similarity, data["description"])
+        return results
     def search_similar(self, query: str, top_k: int = 30) -> List[Tuple[str, float, str]]:
         """搜索关联文件"""
         try:
             if self.index is None:
-                return []
-            # 生成多个查询变体以提高召回率
-            model = PlatformRegistry.get_global_platform_registry().get_normal_platform()
-            prompt = f"""请根据以下查询，生成3个不同的表述，每个表述都要完整表达原始查询的意思。这些表述将用于代码搜索，要保持专业性和准确性。
-原始查询: {query}
+                return []
+            # 生成查询变体
+            query_variants = self._generate_query_variants(query)
+            # 进行向量搜索
+            vector_results = self._vector_search(query_variants, top_k)
-请直接输出3个表述，用换行分隔，不要有编号或其他标记。
-"""
-            query_variants = model.chat(prompt).strip().split('\n')
-            query_variants.append(query)  # 添加原始查询
-            # 对每个查询变体进行搜索
-            all_results = {}
-            for q in query_variants:
-                q_vector = self.get_embedding(q)
-                q_vector = q_vector.reshape(1, -1)
-                distances, indices = self.index.search(q_vector, top_k)
-                for i, distance in zip(indices[0], distances[0]):
-                    if i == -1:
-                        continue
-                    similarity = 1.0 / (1.0 + float(distance))
-                    if similarity >= 0.5:
-                        file_path = self.file_paths[i]
-                        # 使用最高的相似度分数
-                        if file_path not in all_results or similarity > all_results[file_path][1]:
-                            data = self.vector_cache[file_path]
-                            all_results[file_path] = (file_path, similarity, data["description"])
-            # 转换为列表并排序
-            results = list(all_results.values())
+            results = list(vector_results.values())
             results.sort(key=lambda x: x[1], reverse=True)
+            # 取前 top_k 个结果进行重排序
+            initial_results = results[:top_k]
-            return results[:top_k]
+            # 如果没有找到结果，直接返回
+            if not initial_results:
+                return []
+            # 对初步结果进行重排序
+            return self.rerank_results(query, initial_results)
         except Exception as e:
             PrettyOutput.print(f"搜索失败: {str(e)}", output_type=OutputType.ERROR)
@@ -564,7 +650,7 @@ class CodeBase:
         # 检查缓存是否有效
         try:
-            with open(self.cache_path, 'rb') as f:
+            with lzma.open(self.cache_path, 'rb') as f:
                 cache_data = pickle.load(f)
                 if not cache_data.get("vectors") or not cache_data.get("file_paths"):
                     return False
@@ -625,4 +711,4 @@ def main():
 if __name__ == "__main__":
-    exit(main())
+    exit(main())

jarvis/jarvis_coder/__init__.py ADDED Viewed

File without changes

jarvis-ai-assistant 0.1.91__py3-none-any.whl → 0.1.93__py3-none-any.whl

Potentially problematic release.

jarvis-ai-assistant 0.1.91py3-none-any.whl → 0.1.93py3-none-any.whl