PyPI - jarvis-ai-assistant - Versions diffs - 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl - Mend

jarvis-ai-assistant 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

jarvis/__init__.py +1 -1
jarvis/jarvis_agent/prompts.py +26 -4
jarvis/jarvis_data/config_schema.json +67 -12
jarvis/jarvis_platform/tongyi.py +9 -9
jarvis/jarvis_rag/cli.py +79 -23
jarvis/jarvis_rag/query_rewriter.py +61 -12
jarvis/jarvis_rag/rag_pipeline.py +143 -34
jarvis/jarvis_rag/retriever.py +5 -5
jarvis/jarvis_tools/generate_new_tool.py +22 -1
jarvis/jarvis_utils/config.py +92 -11
jarvis/jarvis_utils/globals.py +29 -8
jarvis/jarvis_utils/input.py +114 -121
jarvis/jarvis_utils/utils.py +3 -0
{jarvis_ai_assistant-0.2.2.dist-info → jarvis_ai_assistant-0.2.3.dist-info}/METADATA +82 -9
{jarvis_ai_assistant-0.2.2.dist-info → jarvis_ai_assistant-0.2.3.dist-info}/RECORD +19 -19
{jarvis_ai_assistant-0.2.2.dist-info → jarvis_ai_assistant-0.2.3.dist-info}/WHEEL +0 -0
{jarvis_ai_assistant-0.2.2.dist-info → jarvis_ai_assistant-0.2.3.dist-info}/entry_points.txt +0 -0
{jarvis_ai_assistant-0.2.2.dist-info → jarvis_ai_assistant-0.2.3.dist-info}/licenses/LICENSE +0 -0
{jarvis_ai_assistant-0.2.2.dist-info → jarvis_ai_assistant-0.2.3.dist-info}/top_level.txt +0 -0

jarvis/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 """Jarvis AI Assistant"""
-__version__ = "0.2.2"
+__version__ = "0.2.3"

jarvis/jarvis_agent/prompts.py CHANGED Viewed

@@ -113,6 +113,31 @@ TASK_ANALYSIS_PROMPT = f"""<task_analysis>
            "stderr": f"操作失败: {{str(e)}}"
        }}
    ```
+4. **在工具中调用大模型**：如果工具需要调用大模型来完成子任务（例如，生成代码、分析文本等），为了避免干扰主对话流程，建议创建一个独立的大模型实例。
+   ```python
+    # 通过 agent 实例获取模型配置
+    agent = args.get("agent")
+    if not agent:
+        return {{"success": False, "stderr": "Agent not found."}}
+    current_model = agent.model
+    platform_name = current_model.platform_name()
+    model_name = current_model.name()
+    # 创建独立的模型实例
+    from jarvis.jarvis_platform.registry import PlatformRegistry
+    llm = PlatformRegistry().create_platform(platform_name)
+    if not llm:
+        return {{"success": False, "stderr": f"Platform {{platform_name}} not found."}}
+    llm.set_model_name(model_name)
+    llm.set_suppress_output(True) # 工具内的调用通常不需要流式输出
+    # 使用新实例调用大模型
+    PrettyOutput.print("正在执行子任务...", OutputType.INFO)
+    response = llm.chat_until_success("你的提示词")
+    PrettyOutput.print("子任务完成", OutputType.SUCCESS)
+   ```
 </tool_requirements>
 <methodology_requirements>
 方法论格式要求:
@@ -139,10 +164,7 @@ arguments:
     from jarvis.jarvis_utils.output import PrettyOutput, OutputType
     class 工具名称:
         name = "工具名称"
-        description = "Tool for text transformation"
-                Tool description
-        适用场景：1. 格式化文本; 2. 处理标题; 3. 标准化输出
-        \"\"\"
+        description = "Tool description"
         parameters = {{
             "type": "object",
             "properties": {{

jarvis/jarvis_data/config_schema.json CHANGED Viewed

@@ -141,38 +141,47 @@
       "description": "思考操作模型名称",
       "default": "deep_seek"
     },
-    "JARVIS_MODEL_GROUP": {
+    "JARVIS_LLM_GROUP": {
       "type": "string",
-      "description": "选择一个预定义的模型组"
+      "description": "选择一个预定义的模型组",
+      "default": ""
     },
-    "JARVIS_MODEL_GROUPS": {
+    "JARVIS_LLM_GROUPS": {
       "type": "array",
       "description": "预定义的模型配置组",
+      "default": [],
       "items": {
         "type": "object",
         "additionalProperties": {
           "type": "object",
           "properties": {
             "JARVIS_PLATFORM": {
-              "type": "string"
+              "type": "string",
+              "default": "yuanbao"
             },
             "JARVIS_MODEL": {
-              "type": "string"
+              "type": "string",
+              "default": "deep_seek_v3"
             },
             "JARVIS_THINKING_PLATFORM": {
-              "type": "string"
+              "type": "string",
+              "default": "yuanbao"
             },
             "JARVIS_THINKING_MODEL": {
-              "type": "string"
+              "type": "string",
+              "default": "deep_seek"
             },
             "JARVIS_MAX_TOKEN_COUNT": {
-              "type": "number"
+              "type": "number",
+              "default": 960000
             },
             "JARVIS_MAX_INPUT_TOKEN_COUNT": {
-              "type": "number"
+              "type": "number",
+              "default": 32000
             },
             "JARVIS_MAX_BIG_CONTENT_SIZE": {
-              "type": "number"
+              "type": "number",
+              "default": 160000
             }
           },
           "required": [
@@ -235,9 +244,43 @@
       "description": "是否启用静态代码分析",
       "default": true
     },
+    "JARVIS_RAG_GROUP": {
+      "type": "string",
+      "description": "选择一个预定义的RAG配置组",
+      "default": ""
+    },
+    "JARVIS_RAG_GROUPS": {
+      "type": "array",
+      "description": "预定义的RAG配置组",
+      "default": [],
+      "items": {
+        "type": "object",
+        "additionalProperties": {
+          "type": "object",
+          "properties": {
+            "embedding_model": {
+              "type": "string",
+              "default": "BAAI/bge-base-zh-v1.5"
+            },
+            "rerank_model": {
+              "type": "string",
+              "default": "BAAI/bge-reranker-base"
+            },
+            "use_bm25": {
+              "type": "boolean",
+              "default": true
+            },
+            "use_rerank": {
+              "type": "boolean",
+              "default": true
+            }
+          }
+        }
+      }
+    },
     "JARVIS_RAG": {
       "type": "object",
-      "description": "RAG框架的配置",
+      "description": "RAG框架的顶层配置。注意：此处的设置将覆盖任何由JARVIS_RAG_GROUP选择的组配置。",
       "properties": {
         "embedding_model": {
           "type": "string",
@@ -248,11 +291,23 @@
           "type": "string",
           "default": "BAAI/bge-reranker-base",
           "description": "用于RAG的rerank模型的名称, 默认为 'BAAI/bge-reranker-base'"
+        },
+        "use_bm25": {
+          "type": "boolean",
+          "default": true,
+          "description": "是否在RAG中为检索使用BM25, 默认为 true"
+        },
+        "use_rerank": {
+          "type": "boolean",
+          "default": true,
+          "description": "是否在RAG中为检索使用rerank, 默认为 true"
         }
       },
       "default": {
         "embedding_model": "BAAI/bge-base-zh-v1.5",
-        "rerank_model": "BAAI/bge-reranker-base"
+        "rerank_model": "BAAI/bge-reranker-base",
+        "use_bm25": true,
+        "use_rerank": true
       }
     },
     "JARVIS_REPLACE_MAP": {

jarvis/jarvis_platform/tongyi.py CHANGED Viewed

@@ -81,10 +81,10 @@ class TongyiPlatform(BasePlatform):
                 "contentType": "text",
                 "role": "user",
                 "ext": {
-                    "searchType": "",
+                    "searchType": "depth" if self.web else "",
                     "pptGenerate": False,
-                    "deepThink": False,
-                    "deepResearch": False,
+                    "deepThink": self.model_name == "Thinking",
+                    "deepResearch": self.model_name == "Deep-Research",
                 },
             }
         ]
@@ -98,10 +98,10 @@ class TongyiPlatform(BasePlatform):
                     "contentType": "text",
                     "role": "system",
                     "ext": {
-                        "searchType": "",
+                        "searchType": "depth" if self.web else "",
                         "pptGenerate": False,
-                        "deepThink": False,
-                        "deepResearch": False,
+                        "deepThink": self.model_name == "Thinking",
+                        "deepResearch": self.model_name == "Deep-Research",
                     },
                 },
             )
@@ -140,13 +140,13 @@ class TongyiPlatform(BasePlatform):
             "parentMsgId": self.msg_id,
             "params": {
                 "agentId": "",
-                "searchType": "",
+                "searchType": "depth" if self.web else "",
                 "pptGenerate": False,
                 "bizScene": "code_chat" if self.model_name == "Code-Chat" else "",
                 "bizSceneInfo": {},
                 "specifiedModel": "",
-                "deepThink": True if self.model_name == "Thinking" else False,
-                "deepResearch": False,
+                "deepThink": self.model_name == "Thinking",
+                "deepResearch": self.model_name == "Deep-Research",
                 "fileUploadBatchId": (
                     self.uploaded_file_info[0]["batchId"]
                     if self.uploaded_file_info

jarvis/jarvis_rag/cli.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import sys
 from pathlib import Path
-from typing import Optional, List, Literal, cast
+from typing import Optional, List, Literal, cast, Tuple
 import mimetypes
 import pathspec  # type: ignore
@@ -15,6 +15,11 @@ from langchain_core.document_loaders.base import BaseLoader
 from rich.markdown import Markdown
 from jarvis.jarvis_utils.utils import init_env
+from jarvis.jarvis_utils.config import (
+    get_rag_embedding_model,
+    get_rag_use_bm25,
+    get_rag_use_rerank,
+)
 def is_likely_text_file(file_path: Path) -> bool:
@@ -65,9 +70,7 @@ class _CustomPlatformLLM(LLMInterface):
     def __init__(self, platform: BasePlatform):
         self.platform = platform
-        print(
-            f"✅ 使用自定义LLM: 平台='{platform.platform_name()}', 模型='{platform.name()}'"
-        )
+        print(f"✅ 使用自定义LLM: 平台='{platform.platform_name()}', 模型='{platform.name()}'")
     def generate(self, prompt: str, **kwargs) -> str:
         return self.platform.chat_until_success(prompt)
@@ -91,7 +94,7 @@ def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInter
         return None
-def _load_ragignore_spec() -> tuple[Optional[pathspec.PathSpec], Optional[Path]]:
+def _load_ragignore_spec() -> Tuple[Optional[pathspec.PathSpec], Optional[Path]]:
     """
     从项目根目录加载忽略模式。
     首先查找 `.jarvis/rag/.ragignore`，如果未找到，则回退到 `.gitignore`。
@@ -140,9 +143,7 @@ def add_documents(
         "-e",
         help="嵌入模型的名称。覆盖全局配置。",
     ),
-    db_path: Optional[Path] = typer.Option(
-        None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
-    ),
+    db_path: Optional[Path] = typer.Option(None, "--db-path", help="向量数据库的路径。覆盖全局配置。"),
     batch_size: int = typer.Option(
         500,
         "--batch-size",
@@ -244,9 +245,7 @@ def add_documents(
             print("❌ 未能成功加载任何文档。")
             raise typer.Exit(code=1)
-        print(
-            f"✅ 成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。"
-        )
+        print(f"✅ 成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。")
     except Exception as e:
         print(f"❌ 发生严重错误: {e}")
@@ -261,9 +260,7 @@ def list_documents(
         "-c",
         help="向量数据库中集合的名称。",
     ),
-    db_path: Optional[Path] = typer.Option(
-        None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
-    ),
+    db_path: Optional[Path] = typer.Option(None, "--db-path", help="向量数据库的路径。覆盖全局配置。"),
 ):
     """列出指定集合中的所有唯一文档。"""
     try:
@@ -272,7 +269,7 @@ def list_documents(
             collection_name=collection_name,
         )
-        collection = pipeline.retriever.collection
+        collection = pipeline._get_collection()
         results = collection.get()  # 获取集合中的所有项目
         if not results or not results["metadatas"]:
@@ -300,6 +297,63 @@ def list_documents(
         raise typer.Exit(code=1)
+@app.command("retrieve", help="仅从知识库检索相关文档，不生成答案。")
+def retrieve(
+    question: str = typer.Argument(..., help="要提出的问题。"),
+    collection_name: str = typer.Option(
+        "jarvis_rag_collection",
+        "--collection",
+        "-c",
+        help="向量数据库中集合的名称。",
+    ),
+    embedding_model: Optional[str] = typer.Option(
+        None,
+        "--embedding-model",
+        "-e",
+        help="嵌入模型的名称。覆盖全局配置。",
+    ),
+    db_path: Optional[Path] = typer.Option(None, "--db-path", help="向量数据库的路径。覆盖全局配置。"),
+    n_results: int = typer.Option(5, "--top-n", help="要检索的文档数量。"),
+):
+    """仅从RAG知识库检索文档并打印结果。"""
+    try:
+        # 如果未在命令行中指定，则从配置中加载RAG设置
+        final_embedding_model = embedding_model or get_rag_embedding_model()
+        use_bm25 = get_rag_use_bm25()
+        use_rerank = get_rag_use_rerank()
+        pipeline = JarvisRAGPipeline(
+            embedding_model=final_embedding_model,
+            db_path=str(db_path) if db_path else None,
+            collection_name=collection_name,
+            use_bm25=use_bm25,
+            use_rerank=use_rerank,
+        )
+        print(f"🤔 正在为问题检索文档: '{question}'")
+        retrieved_docs = pipeline.retrieve_only(question, n_results=n_results)
+        if not retrieved_docs:
+            print("ℹ️ 未找到相关文档。")
+            return
+        print(f"✅ 成功检索到 {len(retrieved_docs)} 个文档:")
+        from jarvis.jarvis_utils.globals import console
+        for i, doc in enumerate(retrieved_docs, 1):
+            source = doc.metadata.get("source", "未知来源")
+            content = doc.page_content
+            panel_title = f"文档 {i} | 来源: {source}"
+            console.print(
+                f"\n[bold magenta]{panel_title}[/bold magenta]"
+            )
+            console.print(Markdown(f"```\n{content}\n```"))
+    except Exception as e:
+        print(f"❌ 发生错误: {e}")
+        raise typer.Exit(code=1)
 @app.command("query", help="向知识库提问。")
 def query(
     question: str = typer.Argument(..., help="要提出的问题。"),
@@ -315,9 +369,7 @@ def query(
         "-e",
         help="嵌入模型的名称。覆盖全局配置。",
     ),
-    db_path: Optional[Path] = typer.Option(
-        None, "--db-path", help="向量数据库的路径。覆盖全局配置。"
-    ),
+    db_path: Optional[Path] = typer.Option(None, "--db-path", help="向量数据库的路径。覆盖全局配置。"),
     platform: Optional[str] = typer.Option(
         None,
         "--platform",
@@ -341,11 +393,18 @@ def query(
         if (platform or model) and not custom_llm:
             raise typer.Exit(code=1)
+        # 如果未在命令行中指定，则从配置中加载RAG设置
+        final_embedding_model = embedding_model or get_rag_embedding_model()
+        use_bm25 = get_rag_use_bm25()
+        use_rerank = get_rag_use_rerank()
         pipeline = JarvisRAGPipeline(
             llm=custom_llm,
-            embedding_model=embedding_model,
+            embedding_model=final_embedding_model,
             db_path=str(db_path) if db_path else None,
             collection_name=collection_name,
+            use_bm25=use_bm25,
+            use_rerank=use_rerank,
         )
         print(f"🤔 正在查询: '{question}'")
@@ -373,10 +432,7 @@ except ImportError:
 def _check_rag_dependencies():
     if not _RAG_INSTALLED:
-        print(
-            "❌ RAG依赖项未安装。"
-            "请运行 'pip install \"jarvis-ai-assistant[rag]\"' 来使用此命令。"
-        )
+        print("❌ RAG依赖项未安装。" "请运行 'pip install \"jarvis-ai-assistant[rag]\"' 来使用此命令。")
         raise typer.Exit(code=1)

jarvis/jarvis_rag/query_rewriter.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import List
 from .llm_interface import LLMInterface
+from jarvis.jarvis_utils.output import PrettyOutput, OutputType
 class QueryRewriter:
@@ -20,20 +21,29 @@ class QueryRewriter:
     def _create_prompt_template(self) -> str:
         """为多查询重写任务创建提示模板。"""
         return """
-你是一个精通检索的AI助手。你的任务是将以下这个单一的用户问题，从不同角度改写成 3 个不同的、但语义上相关的搜索查询。这有助于在知识库中进行更全面的搜索。
+你是一个精通检索和语言的AI助手。你的任务是将以下这个单一的用户问题，改写为几个语义相关但表达方式不同的搜索查询，并提供英文翻译。这有助于在多语言知识库中进行更全面的搜索。
 请遵循以下原则：
-1.  **多样性**：生成的查询应尝试使用不同的关键词和表述方式。
-2.  **保留核心意图**：所有查询都必须围绕原始问题的核心意图。
-3.  **简洁性**：每个查询都应该是独立的、可以直接用于搜索的短语或问题。
-4.  **格式要求**：请直接输出 3 个查询，每个查询占一行，用换行符分隔。不要添加任何编号、前缀或解释。
+1.  **保留核心意图**: 所有查询都必须围绕原始问题的核心意图。
+2.  **查询类型**:
+    - **同义词/相关术语查询**: 使用原始语言，通过替换同义词或相关术语来生成1-2个新的查询。
+    - **英文翻译查询**: 将原始问题翻译成一个简洁的英文搜索查询。
+3.  **简洁性**: 每个查询都应该是独立的、可以直接用于搜索的短语或问题。
+4.  **严格格式要求**: 你必须将所有重写后的查询放置在 `<REWRITE>` 和 `</REWRITE>` 标签之间。每个查询占一行。不要在标签内外添加任何编号、前缀或解释。
+示例输出格式:
+<REWRITE>
+使用不同表述的中文查询
+另一个中文查询
+English version of the query
+</REWRITE>
 原始问题:
 ---
 {query}
 ---
-3个改写后的查询 (每行一个):
+请将改写后的查询包裹在 `<REWRITE>` 标签内:
 """
     def rewrite(self, query: str) -> List[str]:
@@ -47,16 +57,55 @@ class QueryRewriter:
             一个经过重写、搜索优化的查询列表。
         """
         prompt = self.rewrite_prompt_template.format(query=query)
-        print(f"✍️  正在将原始查询重写为多个搜索查询...")
+        PrettyOutput.print(
+            "正在将原始查询重写为多个搜索查询...", output_type=OutputType.INFO, timestamp=False
+        )
+        import re
+        max_retries = 3
+        attempts = 0
+        rewritten_queries = []
+        response_text = ""
+        while attempts < max_retries:
+            attempts += 1
+            response_text = self.llm.generate(prompt)
+            match = re.search(r"<REWRITE>(.*?)</REWRITE>", response_text, re.DOTALL)
+            if match:
+                content = match.group(1).strip()
+                rewritten_queries = [
+                    line.strip() for line in content.split("\n") if line.strip()
+                ]
+                PrettyOutput.print(
+                    f"成功从LLM响应中提取到内容 (尝试 {attempts}/{max_retries})。",
+                    output_type=OutputType.SUCCESS,
+                    timestamp=False,
+                )
+                break  # 提取成功，退出循环
+            else:
+                PrettyOutput.print(
+                    f"未能从LLM响应中提取内容。正在重试... ({attempts}/{max_retries})",
+                    output_type=OutputType.WARNING,
+                    timestamp=False,
+                )
-        response_text = self.llm.generate(prompt)
-        rewritten_queries = [
-            line.strip() for line in response_text.strip().split("\n") if line.strip()
-        ]
+        # 如果所有重试都失败，则跳过重写步骤
+        if not rewritten_queries:
+            PrettyOutput.print(
+                "所有重试均失败。跳过查询重写，将仅使用原始查询。",
+                output_type=OutputType.ERROR,
+                timestamp=False,
+            )
         # 同时包含原始查询以保证鲁棒性
         if query not in rewritten_queries:
             rewritten_queries.insert(0, query)
-        print(f"✅ 生成了 {len(rewritten_queries)} 个查询变体。")
+        PrettyOutput.print(
+            f"生成了 {len(rewritten_queries)} 个查询变体。",
+            output_type=OutputType.SUCCESS,
+            timestamp=False,
+        )
         return rewritten_queries

jarvis-ai-assistant 0.2.2__py3-none-any.whl → 0.2.3__py3-none-any.whl

jarvis-ai-assistant 0.2.2py3-none-any.whl → 0.2.3py3-none-any.whl