PyPI - jarvis-ai-assistant - Versions diffs - 0.1.220__py3-none-any.whl → 0.1.221__py3-none-any.whl - Mend

jarvis-ai-assistant 0.1.220py3-none-any.whl → 0.1.221py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

jarvis/__init__.py +1 -1
jarvis/jarvis_agent/__init__.py +93 -382
jarvis/jarvis_agent/edit_file_handler.py +32 -185
jarvis/jarvis_agent/prompt_builder.py +57 -0
jarvis/jarvis_agent/prompts.py +188 -0
jarvis/jarvis_agent/protocols.py +30 -0
jarvis/jarvis_agent/session_manager.py +84 -0
jarvis/jarvis_agent/tool_executor.py +49 -0
jarvis/jarvis_code_agent/code_agent.py +4 -4
jarvis/jarvis_data/config_schema.json +8 -18
jarvis/jarvis_rag/__init__.py +2 -2
jarvis/jarvis_rag/cache.py +28 -30
jarvis/jarvis_rag/cli.py +141 -52
jarvis/jarvis_rag/embedding_manager.py +32 -46
jarvis/jarvis_rag/llm_interface.py +32 -34
jarvis/jarvis_rag/query_rewriter.py +11 -12
jarvis/jarvis_rag/rag_pipeline.py +40 -43
jarvis/jarvis_rag/reranker.py +18 -18
jarvis/jarvis_rag/retriever.py +29 -29
jarvis/jarvis_tools/edit_file.py +11 -36
jarvis/jarvis_utils/config.py +10 -25
{jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/METADATA +15 -12
{jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/RECORD +27 -22
{jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/WHEEL +0 -0
{jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/entry_points.txt +0 -0
{jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/licenses/LICENSE +0 -0
{jarvis_ai_assistant-0.1.220.dist-info → jarvis_ai_assistant-0.1.221.dist-info}/top_level.txt +0 -0

jarvis/jarvis_agent/tool_executor.py ADDED Viewed

@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+from typing import Any, Tuple, TYPE_CHECKING
+from jarvis.jarvis_utils.input import user_confirm
+from jarvis.jarvis_utils.output import OutputType, PrettyOutput
+if TYPE_CHECKING:
+    from jarvis.jarvis_agent import Agent
+def execute_tool_call(response: str, agent: "Agent") -> Tuple[bool, Any]:
+    """
+    Parses the model's response, identifies the appropriate tool, and executes it.
+    Args:
+        response: The response string from the model, potentially containing a tool call.
+        agent: The agent instance, providing context like output handlers and settings.
+    Returns:
+        A tuple containing:
+        - A boolean indicating if the tool's result should be returned to the user.
+        - The result of the tool execution or an error message.
+    """
+    tool_list = []
+    for handler in agent.output_handler:
+        if handler.can_handle(response):
+            tool_list.append(handler)
+    if len(tool_list) > 1:
+        error_message = (
+            f"操作失败：检测到多个操作。一次只能执行一个操作。"
+            f"尝试执行的操作：{', '.join([handler.name() for handler in tool_list])}"
+        )
+        PrettyOutput.print(error_message, OutputType.WARNING)
+        return False, error_message
+    if not tool_list:
+        return False, ""
+    tool_to_execute = tool_list[0]
+    if not agent.execute_tool_confirm or user_confirm(
+        f"需要执行{tool_to_execute.name()}确认执行？", True
+    ):
+        print(f"🔧 正在执行{tool_to_execute.name()}...")
+        result = tool_to_execute.handle(response, agent)
+        print(f"✅ {tool_to_execute.name()}执行完成")
+        return result
+    return False, ""

jarvis/jarvis_code_agent/code_agent.py CHANGED Viewed

@@ -392,19 +392,19 @@ class CodeAgent:
             return
         # 用户确认最终结果
         if commited:
-            agent.prompt += final_ret
+            agent.session.prompt += final_ret
             return
         PrettyOutput.print(final_ret, OutputType.USER, lang="markdown")
         if not is_confirm_before_apply_patch() or user_confirm(
             "是否使用此回复？", default=True
         ):
-            agent.prompt += final_ret
+            agent.session.prompt += final_ret
             return
-        agent.prompt += final_ret
+        agent.session.prompt += final_ret
         custom_reply = get_multiline_input("请输入自定义回复")
         if custom_reply.strip():  # 如果自定义回复为空，返回空字符串
             agent.set_addon_prompt(custom_reply)
-        agent.prompt += final_ret
+        agent.session.prompt += final_ret
 def main() -> None:

jarvis/jarvis_data/config_schema.json CHANGED Viewed

@@ -185,30 +185,20 @@
       "type": "object",
       "description": "RAG框架的配置",
       "properties": {
-        "embedding_mode": {
+        "embedding_model": {
           "type": "string",
-          "enum": [
-            "performance",
-            "accuracy"
-          ],
-          "default": "performance",
-          "description": "嵌入模型的模式, 'performance'表示性能优先, 'accuracy'表示准确度优先"
+          "default": "BAAI/bge-base-zh-v1.5",
+          "description": "用于RAG的嵌入模型的名称, 默认为 'BAAI/bge-base-zh-v1.5'"
         },
-        "embedding_cache_path": {
+        "rerank_model": {
           "type": "string",
-          "default": ".jarvis/rag/embeddings",
-          "description": "嵌入向量缓存的路径, 相对于当前工作目录"
-        },
-        "vector_db_path": {
-          "type": "string",
-          "default": ".jarvis/rag/vectordb",
-          "description": "向量数据库的持久化存储路径, 相对于当前工作目录"
+          "default": "BAAI/bge-reranker-base",
+          "description": "用于RAG的rerank模型的名称, 默认为 'BAAI/bge-reranker-base'"
         }
       },
       "default": {
-        "embedding_mode": "performance",
-        "embedding_cache_path": ".jarvis/rag/embeddings",
-        "vector_db_path": ".jarvis/rag/vectordb"
+        "embedding_model": "BAAI/bge-base-zh-v1.5",
+        "rerank_model": "BAAI/bge-reranker-base"
       }
     },
     "JARVIS_REPLACE_MAP": {

jarvis/jarvis_rag/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 """
-Jarvis RAG Framework
+Jarvis RAG 框架
-A flexible RAG pipeline with pluggable remote LLMs and local, cache-enabled embedding models.
+一个灵活的RAG管道，具有可插拔的远程LLM和本地带缓存的嵌入模型。
 """
 from .rag_pipeline import JarvisRAGPipeline

jarvis/jarvis_rag/cache.py CHANGED Viewed

@@ -6,74 +6,72 @@ from diskcache import Cache
 class EmbeddingCache:
     """
-    A disk-based cache for storing and retrieving text embeddings.
+    一个用于存储和检索文本嵌入的基于磁盘的缓存。
-    This class uses diskcache to create a persistent, local cache. It generates
-    a key for each text content based on its SHA256 hash, making lookups
-    deterministic and efficient.
+    该类使用diskcache创建一个持久化的本地缓存。它根据每个文本内容的
+    SHA256哈希值为其生成一个键，使得查找过程具有确定性和高效性。
     """
     def __init__(self, cache_dir: str, salt: str = ""):
         """
-        Initializes the EmbeddingCache.
+        初始化EmbeddingCache。
-        Args:
-            cache_dir (str): The directory where the cache will be stored.
-            salt (str): A salt to be added to the hash. This is crucial for
-                        ensuring that embeddings generated by different models
-                        do not collide. For example, use the model name as a salt.
+        参数:
+            cache_dir (str): 缓存将要存储的目录。
+            salt (str): 添加到哈希中的盐值。这对于确保由不同模型生成的
+                        嵌入不会发生冲突至关重要。例如，可以使用模型名称作为盐值。
         """
         self.cache = Cache(cache_dir)
         self.salt = salt
     def _get_key(self, text: str) -> str:
-        """Generates a unique cache key for a given text and salt."""
+        """为一个给定的文本和盐值生成一个唯一的缓存键。"""
         hash_object = hashlib.sha256((self.salt + text).encode("utf-8"))
         return hash_object.hexdigest()
     def get(self, text: str) -> Optional[Any]:
         """
-        Retrieves an embedding from the cache.
+        从缓存中检索一个嵌入。
-        Args:
-            text (str): The text to look up.
+        参数:
+            text (str): 要查找的文本。
-        Returns:
-            The cached embedding, or None if it's not in the cache.
+        返回:
+            缓存的嵌入，如果不在缓存中则返回None。
         """
         key = self._get_key(text)
         return self.cache.get(key)
     def set(self, text: str, embedding: Any) -> None:
         """
-        Stores an embedding in the cache.
+        在缓存中存储一个嵌入。
-        Args:
-            text (str): The text corresponding to the embedding.
-            embedding (Any): The embedding vector to store.
+        参数:
+            text (str): 与嵌入相对应的文本。
+            embedding (Any): 要存储的嵌入向量。
         """
         key = self._get_key(text)
         self.cache.set(key, embedding)
     def get_batch(self, texts: List[str]) -> List[Optional[Any]]:
         """
-        Retrieves a batch of embeddings from the cache.
+        从缓存中检索一批嵌入。
-        Args:
-            texts (List[str]): A list of texts to look up.
+        参数:
+            texts (List[str]): 要查找的文本列表。
-        Returns:
-            A list containing cached embeddings or None for cache misses.
+        返回:
+            一个列表，其中包含缓存的嵌入，对于缓存未命中的情况则为None。
         """
         return [self.get(text) for text in texts]
     def set_batch(self, texts: List[str], embeddings: List[Any]) -> None:
         """
-        Stores a batch of embeddings in the cache.
+        在缓存中存储一批嵌入。
-        Args:
-            texts (List[str]): The list of texts.
-            embeddings (List[Any]): The list of corresponding embeddings.
+        参数:
+            texts (List[str]): 文本列表。
+            embeddings (List[Any]): 相应的嵌入列表。
         """
         if len(texts) != len(embeddings):
             raise ValueError("Length of texts and embeddings must be the same.")
@@ -83,5 +81,5 @@ class EmbeddingCache:
                 self.set(text, embedding)
     def close(self):
-        """Closes the cache connection."""
+        """关闭缓存连接。"""
         self.cache.close()

jarvis/jarvis_rag/cli.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pathlib import Path
 from typing import Optional, List, Literal, cast
 import mimetypes
+import pathspec
 import typer
 from langchain.docstore.document import Document
 from langchain_community.document_loaders import (
@@ -18,29 +19,29 @@ from jarvis.jarvis_utils.utils import init_env
 def is_likely_text_file(file_path: Path) -> bool:
     """
-    Checks if a file is likely to be a text file by reading its beginning.
-    Avoids loading large binary files into memory.
+    通过读取文件开头部分，检查文件是否可能为文本文件。
+    此方法可以避免将大型二进制文件加载到内存中。
     """
     try:
-        # Heuristic 1: Check MIME type if available
+        # 启发式方法1：检查MIME类型（如果可用）
         mime_type, _ = mimetypes.guess_type(file_path)
         if mime_type and mime_type.startswith("text/"):
             return True
         if mime_type and any(x in mime_type for x in ["json", "xml", "javascript"]):
             return True
-        # Heuristic 2: Check for null bytes in the first few KB
+        # 启发式方法2：检查文件的前几KB中是否包含空字节
         with open(file_path, "rb") as f:
-            chunk = f.read(4096)  # Read first 4KB
+            chunk = f.read(4096)  # 读取前4KB
             if b"\x00" in chunk:
-                return False  # Null bytes are a strong indicator of a binary file
+                return False  # 空字节是二进制文件的强指示符
         return True
     except Exception:
         return False
-# Ensure the project root is in the Python path to allow absolute imports
-# This makes the script runnable as a module.
+# 确保项目根目录在Python路径中，以允许绝对导入
+# 这使得脚本可以作为模块运行。
 _project_root = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..", "..")
 )
@@ -54,13 +55,13 @@ from jarvis.jarvis_rag.rag_pipeline import JarvisRAGPipeline
 app = typer.Typer(
     name="jarvis-rag",
-    help="A command-line tool to interact with the Jarvis RAG framework.",
+    help="一个与Jarvis RAG框架交互的命令行工具。",
     add_completion=False,
 )
 class _CustomPlatformLLM(LLMInterface):
-    """A simple wrapper to make a BasePlatform instance compatible with LLMInterface."""
+    """一个简单的包装器，使BasePlatform实例与LLMInterface兼容。"""
     def __init__(self, platform: BasePlatform):
         self.platform = platform
@@ -73,7 +74,7 @@ class _CustomPlatformLLM(LLMInterface):
 def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInterface]:
-    """Creates an LLM interface from a specific platform and model."""
+    """从指定的平台和模型创建LLM接口。"""
     if not platform_name or not model_name:
         return None
     try:
@@ -90,36 +91,70 @@ def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInter
         return None
+def _load_ragignore_spec() -> tuple[Optional[pathspec.PathSpec], Optional[Path]]:
+    """
+    从项目根目录加载忽略模式。
+    首先查找 `.jarvis/rag/.ragignore`，如果未找到，则回退到 `.gitignore`。
+    """
+    project_root_path = Path(_project_root)
+    ragignore_file = project_root_path / ".jarvis" / "rag" / ".ragignore"
+    gitignore_file = project_root_path / ".gitignore"
+    ignore_file_to_use = None
+    if ragignore_file.is_file():
+        ignore_file_to_use = ragignore_file
+    elif gitignore_file.is_file():
+        ignore_file_to_use = gitignore_file
+    if ignore_file_to_use:
+        try:
+            with open(ignore_file_to_use, "r", encoding="utf-8") as f:
+                patterns = f.read().splitlines()
+            spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
+            print(f"✅ 加载忽略规则: {ignore_file_to_use}")
+            return spec, project_root_path
+        except Exception as e:
+            print(f"⚠️ 加载 {ignore_file_to_use.name} 文件失败: {e}")
+    return None, None
 @app.command(
     "add",
-    help="Add documents from files, directories, or glob patterns (e.g., 'src/**/*.py').",
+    help="从文件、目录或glob模式（例如 'src/**/*.py'）添加文档。",
 )
 def add_documents(
     paths: List[Path] = typer.Argument(
         ...,
-        help="File/directory paths or glob patterns. Shell expansion is supported.",
+        help="文件/目录路径或glob模式。支持Shell扩展。",
     ),
     collection_name: str = typer.Option(
         "jarvis_rag_collection",
         "--collection",
         "-c",
-        help="Name of the collection in the vector database.",
+        help="向量数据库中集合的名称。",
     ),
-    embedding_mode: Optional[str] = typer.Option(
+    embedding_model: Optional[str] = typer.Option(
         None,
-        "--embedding-mode",
+        "--embedding-model",
         "-e",
-        help="Embedding mode ('performance' or 'accuracy'). Overrides global config.",
+        help="嵌入模型的名称。覆盖全局配置。",
     ),
     db_path: Optional[Path] = typer.Option(
-        None, "--db-path", help="Path to the vector database. Overrides global config."
+        None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
+    ),
+    batch_size: int = typer.Option(
+        500,
+        "--batch-size",
+        "-b",
+        help="单个批次中要处理的文档数。",
     ),
 ):
-    """Adds documents to the RAG knowledge base from various sources."""
+    """从不同来源向RAG知识库添加文档。"""
     files_to_process = set()
     for path_str in paths:
-        # Typer with List[Path] might not expand globs, so we do it manually
+        # Typer的List[Path]可能不会扩展glob，所以我们手动处理
         from glob import glob
         expanded_paths = glob(str(path_str), recursive=True)
@@ -141,59 +176,96 @@ def add_documents(
                     print(f"⚠️ 跳过可能的二进制文件: {path}")
     if not files_to_process:
-        print(f"⚠️ 在指定路径中未找到任何文本文件。")
+        print("⚠️ 在指定路径中未找到任何文本文件。")
+        return
+    # 使用 .ragignore 过滤文件
+    ragignore_spec, ragignore_root = _load_ragignore_spec()
+    if ragignore_spec and ragignore_root:
+        initial_count = len(files_to_process)
+        retained_files = set()
+        for file_path in files_to_process:
+            try:
+                # 将文件路径解析为绝对路径以确保正确比较
+                resolved_path = file_path.resolve()
+                relative_path = str(resolved_path.relative_to(ragignore_root))
+                if not ragignore_spec.match_file(relative_path):
+                    retained_files.add(file_path)
+            except ValueError:
+                # 文件不在项目根目录下，保留它
+                retained_files.add(file_path)
+        ignored_count = initial_count - len(retained_files)
+        if ignored_count > 0:
+            print(f"ℹ️ 根据 .ragignore 规则过滤掉 {ignored_count} 个文件。")
+        files_to_process = retained_files
+    if not files_to_process:
+        print("⚠️ 所有找到的文本文件都被忽略规则过滤掉了。")
         return
     print(f"✅ 发现 {len(files_to_process)} 个独立文件待处理。")
     try:
         pipeline = JarvisRAGPipeline(
-            embedding_mode=cast(
-                Optional[Literal["performance", "accuracy"]], embedding_mode
-            ),
+            embedding_model=embedding_model,
             db_path=str(db_path) if db_path else None,
             collection_name=collection_name,
         )
-        docs: List[Document] = []
+        docs_batch: List[Document] = []
+        total_docs_added = 0
         loader: BaseLoader
-        for file_path in sorted(list(files_to_process)):
+        sorted_files = sorted(list(files_to_process))
+        total_files = len(sorted_files)
+        for i, file_path in enumerate(sorted_files):
             try:
                 if file_path.suffix.lower() == ".md":
                     loader = UnstructuredMarkdownLoader(str(file_path))
-                else:  # Default to TextLoader for .txt and all code files
+                else:  # 对.txt和所有代码文件默认使用TextLoader
                     loader = TextLoader(str(file_path), encoding="utf-8")
-                docs.extend(loader.load())
-                print(f"✅ 已加载: {file_path}")
+                docs_batch.extend(loader.load())
+                print(f"✅ 已加载: {file_path} (文件 {i + 1}/{total_files})")
             except Exception as e:
                 print(f"⚠️ 加载失败 {file_path}: {e}")
-        if not docs:
+            # 当批处理已满或是最后一个文件时处理批处理
+            if docs_batch and (len(docs_batch) >= batch_size or (i + 1) == total_files):
+                print(f"⚙️ 正在处理批次，包含 {len(docs_batch)} 个文档...")
+                pipeline.add_documents(docs_batch)
+                total_docs_added += len(docs_batch)
+                print(f"✅ 成功添加 {len(docs_batch)} 个文档。")
+                docs_batch = []  # 清空批处理
+        if total_docs_added == 0:
             print("❌ 未能成功加载任何文档。")
             raise typer.Exit(code=1)
-        pipeline.add_documents(docs)
-        print(f"✅ 成功将 {len(docs)} 个文档的内容添加至集合 '{collection_name}'。")
+        print(
+            f"✅ 成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。"
+        )
     except Exception as e:
         print(f"❌ 发生严重错误: {e}")
         raise typer.Exit(code=1)
-@app.command("list-docs", help="List all unique documents in the knowledge base.")
+@app.command("list-docs", help="列出知识库中所有唯一的文档。")
 def list_documents(
     collection_name: str = typer.Option(
         "jarvis_rag_collection",
         "--collection",
         "-c",
-        help="Name of the collection in the vector database.",
+        help="向量数据库中集合的名称。",
     ),
     db_path: Optional[Path] = typer.Option(
-        None, "--db-path", help="Path to the vector database. Overrides global config."
+        None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
     ),
 ):
-    """Lists all unique documents in the specified collection."""
+    """列出指定集合中的所有唯一文档。"""
     try:
         pipeline = JarvisRAGPipeline(
             db_path=str(db_path) if db_path else None,
@@ -201,13 +273,13 @@ def list_documents(
         )
         collection = pipeline.retriever.collection
-        results = collection.get()  # Get all items in the collection
+        results = collection.get()  # 获取集合中的所有项目
         if not results or not results["metadatas"]:
             print("ℹ️ 知识库中没有找到任何文档。")
             return
-        # Extract unique source file paths from metadata
+        # 从元数据中提取唯一的源文件路径
         sources = set()
         for metadata in results["metadatas"]:
             if metadata:
@@ -228,38 +300,38 @@ def list_documents(
         raise typer.Exit(code=1)
-@app.command("query", help="Ask a question to the knowledge base.")
+@app.command("query", help="向知识库提问。")
 def query(
-    question: str = typer.Argument(..., help="The question to ask."),
+    question: str = typer.Argument(..., help="要提出的问题。"),
     collection_name: str = typer.Option(
         "jarvis_rag_collection",
         "--collection",
         "-c",
-        help="Name of the collection in the vector database.",
+        help="向量数据库中集合的名称。",
     ),
-    embedding_mode: Optional[str] = typer.Option(
+    embedding_model: Optional[str] = typer.Option(
         None,
-        "--embedding-mode",
+        "--embedding-model",
         "-e",
-        help="Embedding mode ('performance' or 'accuracy'). Overrides global config.",
+        help="嵌入模型的名称。覆盖全局配置。",
     ),
     db_path: Optional[Path] = typer.Option(
-        None, "--db-path", help="Path to the vector database. Overrides global config."
+        None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
     ),
     platform: Optional[str] = typer.Option(
         None,
         "--platform",
         "-p",
-        help="Specify a platform name for the LLM. Overrides the default thinking model.",
+        help="为LLM指定平台名称。覆盖默认的思考模型。",
     ),
     model: Optional[str] = typer.Option(
         None,
         "--model",
         "-m",
-        help="Specify a model name for the LLM. Requires --platform.",
+        help="为LLM指定模型名称。需要 --platform。",
     ),
 ):
-    """Queries the RAG knowledge base and prints the answer."""
+    """查询RAG知识库并打印答案。"""
     if model and not platform:
         print("❌ 错误: --model 需要指定 --platform。")
         raise typer.Exit(code=1)
@@ -271,9 +343,7 @@ def query(
         pipeline = JarvisRAGPipeline(
             llm=custom_llm,
-            embedding_mode=cast(
-                Optional[Literal["performance", "accuracy"]], embedding_mode
-            ),
+            embedding_model=embedding_model,
             db_path=str(db_path) if db_path else None,
             collection_name=collection_name,
         )
@@ -282,7 +352,7 @@ def query(
         answer = pipeline.query(question)
         print("💬 答案:")
-        # We can still use rich.markdown.Markdown as PrettyOutput uses rich underneath
+        # 我们仍然可以使用 rich.markdown.Markdown，因为 PrettyOutput 底层使用了 rich
         from jarvis.jarvis_utils.globals import console
         console.print(Markdown(answer))
@@ -292,6 +362,25 @@ def query(
         raise typer.Exit(code=1)
+_RAG_INSTALLED = False
+try:
+    import langchain  # noqa
+    _RAG_INSTALLED = True
+except ImportError:
+    pass
+def _check_rag_dependencies():
+    if not _RAG_INSTALLED:
+        print(
+            "❌ RAG依赖项未安装。"
+            "请运行 'pip install \"jarvis-ai-assistant[rag]\"' 来使用此命令。"
+        )
+        raise typer.Exit(code=1)
 def main():
+    _check_rag_dependencies()
     init_env(welcome_str="Jarvis RAG")
     app()

jarvis-ai-assistant 0.1.220__py3-none-any.whl → 0.1.221__py3-none-any.whl

jarvis-ai-assistant 0.1.220py3-none-any.whl → 0.1.221py3-none-any.whl