PyPI - auto-coder - Versions diffs - 0.1.288__py3-none-any.whl → 0.1.290__py3-none-any.whl - Mend

auto-coder 0.1.288py3-none-any.whl → 0.1.290py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (25) hide show

{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/METADATA +2 -2
{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/RECORD +25 -21
autocoder/auto_coder_rag.py +10 -0
autocoder/chat_auto_coder_lang.py +16 -16
autocoder/common/__init__.py +4 -0
autocoder/common/auto_coder_lang.py +16 -4
autocoder/common/mcp_hub.py +99 -77
autocoder/common/mcp_server.py +162 -61
autocoder/index/filter/quick_filter.py +373 -3
autocoder/rag/api_server.py +48 -0
autocoder/rag/cache/byzer_storage_cache.py +254 -44
autocoder/rag/cache/cache_result_merge.py +265 -0
autocoder/rag/cache/file_monitor_cache.py +117 -4
autocoder/rag/cache/local_byzer_storage_cache.py +286 -58
autocoder/rag/cache/rag_file_meta.py +494 -0
autocoder/rag/cache/simple_cache.py +67 -3
autocoder/rag/conversation_to_queries.py +139 -0
autocoder/rag/long_context_rag.py +31 -12
autocoder/rag/qa_conversation_strategy.py +21 -10
autocoder/rag/searchable.py +58 -0
autocoder/version.py +1 -1
{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/LICENSE +0 -0
{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/WHEEL +0 -0
{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/top_level.txt +0 -0

autocoder/rag/conversation_to_queries.py ADDED Viewed

@@ -0,0 +1,139 @@
+from typing import List, Dict, Any, Optional, Union
+import logging
+import byzerllm
+from pydantic import BaseModel
+from autocoder.common import AutoCoderArgs
+logger = logging.getLogger(__name__)
+class SearchQuery(BaseModel):
+    """搜索查询模型"""
+    query: str
+    importance: int = 5  # 1-10，表示查询的重要性
+    purpose: str = ""    # 查询的目的说明
+class ConversationToQueries:
+    """
+    将对话历史转换为搜索查询的工具类。
+    """
+    def __init__(self, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM]):
+        """
+        初始化对话转查询工具类。
+        参数:
+            llm: ByzerLLM 实例，用于执行 prompt 函数
+        """
+        self.llm = llm
+    @byzerllm.prompt()
+    def generate_search_queries(self, conversations: List[Dict[str, Any]], max_queries: int = 3) -> str:
+        """
+        根据历史对话生成搜索查询。
+        参数:
+            conversations: 历史对话列表，每个对话是一个字典，包含 'role' 和 'content' 字段
+            max_queries: 最大生成的查询数量，默认为 3
+        返回:
+            生成的搜索查询列表的 JSON 字符串
+        任务说明:
+        你是一个专业的对话分析助手。你的任务是分析用户与 AI 的对话历史，从中提取关键信息，
+        并生成用于搜索引擎的查询，以便获取与对话相关的知识和信息。
+        具体要求:
+        1. 仔细分析对话历史，特别是最近的几轮对话
+        2. 识别用户可能需要更多信息或知识的关键问题和主题
+        3. 将这些关键问题转化为明确、简洁的搜索查询
+        4. 每个查询应该足够具体，能够通过搜索引擎找到有用的结果
+        5. 为每个查询提供重要性评分（1-10 分）和用途说明
+        6. 最多生成 {{ max_queries }} 个查询，按重要性排序
+        7. 返回符合指定格式的 JSON 数据
+        可能的场景:
+        - 用户询问特定技术或概念，需要进一步的解释或示例
+        - 用户遇到编程问题，需要查找解决方案或最佳实践
+        - 用户讨论的话题涉及多个方面，需要查找不同角度的信息
+        - 用户想了解某个领域的最新发展或趋势
+        ---
+        对话历史:
+        <conversations>
+        {% for msg in conversations %}
+        {{ msg.role }}: {{ msg.content }}
+        {% endfor %}
+        </conversations>
+        请分析上述对话，提取关键问题并生成最多 {{ max_queries }} 个搜索查询。
+        输出格式:
+        ```json
+        [
+          {
+            "query": "搜索查询1",
+            "importance": 评分(1-10),
+            "purpose": "该查询的目的说明"
+          },
+          {
+            "query": "搜索查询2",
+            "importance": 评分(1-10),
+            "purpose": "该查询的目的说明"
+          }
+        ]
+        ```
+        """
+    def extract_queries(self, conversations: List[Dict[str, Any]], max_queries: int = 3) -> List[SearchQuery]:
+        """
+        从对话历史中提取搜索查询。
+        参数:
+            conversations: 历史对话列表
+            max_queries: 最大生成的查询数量
+        返回:
+            SearchQuery 对象列表
+        """
+        try:
+            # 使用 prompt 函数生成搜索查询
+            queries = self.generate_search_queries.with_llm(self.llm).with_return_type(SearchQuery).run(
+                conversations=conversations,
+                max_queries=max_queries
+            )
+            # 按重要性排序
+            queries.sort(key=lambda x: x.importance, reverse=True)
+            return queries
+        except Exception as e:
+            logger.error(f"Error extracting queries from conversation: {str(e)}")
+            return []
+def extract_search_queries(
+    conversations: List[Dict[str, Any]],
+    args:AutoCoderArgs,
+    llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM],
+    max_queries: int = 3,
+) -> List[SearchQuery]:
+    """
+    从对话历史中提取搜索查询的便捷函数。
+    参数:
+        conversations: 历史对话列表
+        llm: ByzerLLM 实例
+        max_queries: 最大生成的查询数量
+    返回:
+        SearchQuery 对象列表
+    """
+    if max_queries == 0:
+        return []
+    try:
+        extractor = ConversationToQueries(llm)
+        return extractor.extract_queries(conversations, max_queries)
+    except Exception as e:
+        logger.error(f"Error extracting search queries from conversation: {str(e)}")
+        return []

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -38,7 +38,8 @@ from pydantic import BaseModel
 from byzerllm.utils.types import SingleOutputMeta
 from autocoder.rag.lang import get_message_with_format_and_newline
 from autocoder.rag.qa_conversation_strategy import get_qa_strategy
+from autocoder.rag.searchable import SearchableResults
+from autocoder.rag.conversation_to_queries import extract_search_queries
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
     pro_version = version("auto-coder-pro")
@@ -257,7 +258,7 @@ class LongContextRAG:
         请根据提供的文档内容、用户对话历史以及最后一个问题，提取并总结文档中与问题相关的重要信息。
         如果文档中没有相关信息，请回复"该文档中没有与问题相关的信息"。
         提取的信息尽量保持和原文中的一样，并且只输出这些信息。
-        """
+        """
     def _get_document_retriever_class(self):
         """Get the document retriever class based on configuration."""
@@ -333,7 +334,9 @@ class LongContextRAG:
     def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
         query = conversations[-1]["content"]
-        documents = self._retrieve_documents(options={"query": query})
+        queries = extract_search_queries(conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
+        documents = self._retrieve_documents(
+            options={"queries": [query] + [query.query for query in queries]})
         return self.doc_filter.filter_docs(
             conversations=conversations, documents=documents
         )
@@ -500,6 +503,9 @@ class LongContextRAG:
         except json.JSONDecodeError:
             pass
+        if not only_contexts and extra_request_params.get("only_contexts", False):
+            only_contexts = True
         logger.info(f"Query: {query} only_contexts: {only_contexts}")
         start_time = time.time()
@@ -543,7 +549,10 @@ class LongContextRAG:
                 model_name=rag_stat.recall_stat.model_name
             )
             query = conversations[-1]["content"]
-            documents = self._retrieve_documents(options={"query": query})
+            queries = extract_search_queries(
+                conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
+            documents = self._retrieve_documents(
+                options={"queries": [query] + [query.query for query in queries]})
             # 使用带进度报告的过滤方法
             for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
@@ -593,10 +602,19 @@ class LongContextRAG:
             )
             if only_contexts:
-                final_docs = []
-                for doc in relevant_docs:
-                    final_docs.append(doc.model_dump())
-                return [json.dumps(final_docs, ensure_ascii=False)], []
+                try:
+                    searcher = SearchableResults()
+                    result = searcher.reorder(docs=relevant_docs)
+                    yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
+                                                                                                 generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
+                                                                                                 rag_stat.chunk_stat.total_generated_tokens,
+                                                                                                 ))
+                except Exception as e:
+                    yield (str(e), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
+                                                    generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
+                                                    rag_stat.chunk_stat.total_generated_tokens,
+                                                    ))
+                return
             if not relevant_docs:
                 yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
@@ -816,12 +834,13 @@ class LongContextRAG:
                 self._print_rag_stats(rag_stat)
             else:
-                qa_strategy = get_qa_strategy(self.args.rag_qa_conversation_strategy)
+                qa_strategy = get_qa_strategy(
+                    self.args.rag_qa_conversation_strategy)
                 new_conversations = qa_strategy.create_conversation(
                     documents=[doc.source_code for doc in relevant_docs],
-                    conversations=conversations
-                )
+                    conversations=conversations, local_image_host=self.args.local_image_host
+                )
                 chunks = target_llm.stream_chat_oai(
                     conversations=new_conversations,

autocoder/rag/qa_conversation_strategy.py CHANGED Viewed

@@ -8,7 +8,7 @@ class QAConversationStrategy(ABC):
     Different strategies organize documents and conversations differently.
     """
     @abstractmethod
-    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
+    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]], local_image_host: str) -> List[Dict]:
         """
         Create a conversation structure based on documents and history
@@ -26,10 +26,10 @@ class MultiRoundStrategy(QAConversationStrategy):
     Multi-round strategy: First let the model read documents, then do Q&A.
     Creates multiple conversation turns.
     """
-    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
+    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]], local_image_host: str) -> List[Dict]:
         messages = []
         messages.extend([
-            {"role": "user", "content": self._read_docs_prompt.prompt(documents)},
+            {"role": "user", "content": self._read_docs_prompt.prompt(documents, local_image_host)},
             {"role": "assistant", "content": "好的"}
         ])
         messages.extend(conversations)
@@ -37,7 +37,7 @@ class MultiRoundStrategy(QAConversationStrategy):
     @byzerllm.prompt()
     def _read_docs_prompt(
-        self, relevant_docs: List[str]
+        self, relevant_docs: List[str], local_image_host: str
     ) -> Generator[str, None, None]:
         """
         请阅读以下：
@@ -53,29 +53,35 @@ class MultiRoundStrategy(QAConversationStrategy):
         - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
         - 不要添加、推测或扩展文档未提及的信息
-        2. 格式如 ![image](./path.png) 的 Markdown 图片处理
+        2. 格式如 ![image](/path/to/images/path.png) 的 Markdown 图片处理
         - 根据Markdown 图片前后文本内容推测改图片与问题的相关性，有相关性则在回答中输出该Markdown图片路径
         - 根据相关图片在文档中的位置，自然融入答复内容,保持上下文连贯
         - 完整保留原始图片路径,不省略任何部分
         3. 回答格式要求
         - 使用markdown格式提升可读性
+        {% if local_image_host %}
+        4. 图片路径处理
+        - 图片地址需返回绝对路径,
+        - 为请求图片资源 需增加 http://{{ local_image_host }}/static/ 作为前缀
+        例如：/path/to/images/image.png， 返回 http://{{ local_image_host }}/static/path/to/images/image.png
+        {% endif %}
         """
 class SingleRoundStrategy(QAConversationStrategy):
     """
     Single-round strategy: Put documents and conversation history in a single round.
     """
-    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
+    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]], local_image_host: str) -> List[Dict]:
         messages = []
         messages.extend([
-            {"role": "user", "content": self._single_round_answer_question.prompt(documents, conversations)}
+            {"role": "user", "content": self._single_round_answer_question.prompt(documents, conversations, local_image_host)}
         ])
         return messages
     @byzerllm.prompt()
     def _single_round_answer_question(
-        self, relevant_docs: List[str], conversations: List[Dict[str, str]]
+        self, relevant_docs: List[str], conversations: List[Dict[str, str]], local_image_host: str
     ) -> Generator[str, None, None]:
         """
         文档：
@@ -98,14 +104,19 @@ class SingleRoundStrategy(QAConversationStrategy):
         - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
         - 不要添加、推测或扩展文档未提及的信息
-        2. 格式如 ![image](./path.png) 的 Markdown 图片处理
+        2. 格式如 ![image](/path/to/images/path.png) 的 Markdown 图片处理
         - 根据Markdown 图片前后文本内容推测改图片与问题的相关性，有相关性则在回答中输出该Markdown图片路径
         - 根据相关图片在文档中的位置，自然融入答复内容,保持上下文连贯
         - 完整保留原始图片路径,不省略任何部分
         3. 回答格式要求
         - 使用markdown格式提升可读性
+        {% if local_image_host %}
+        4. 图片路径处理
+        - 图片地址需返回绝对路径,
+        - 为请求图片资源 需增加 http://{{ local_image_host }}/static/ 作为前缀
+        例如：/path/to/images/image.png， 返回 http://{{ local_image_host }}/static/path/to/images/image.png
+        {% endif %}
         """
 def get_qa_strategy(strategy_name: str) -> QAConversationStrategy:

autocoder/rag/searchable.py ADDED Viewed

@@ -0,0 +1,58 @@
+import json
+from collections import Counter
+from typing import Dict, List, Any, Optional, Tuple, Set
+from pydantic import BaseModel
+from autocoder.rag.relevant_utils import FilterDoc
+class FileOccurrence(BaseModel):
+    """Represents a file and its occurrence count in search results"""
+    file_path: str
+    count: int
+    score: float = 0.0  # Optional relevance score
+class FileResult(BaseModel):
+    files: List[FileOccurrence]
+class SearchableResults:
+    """Class to process and organize search results by file frequency"""
+    def __init__(self):
+        """Initialize the SearchableResults instance"""
+        pass
+    def extract_original_docs(self, docs: List[FilterDoc]) -> List[str]:
+        """Extract all original_docs from a list of document metadata"""
+        all_files = []
+        for doc in docs:
+            # Extract from metadata if available
+            metadata = doc.source_code.metadata
+            if "original_docs" in metadata:
+                all_files.extend(metadata["original_docs"])
+            # Also include the module_name from source_code as a fallback
+            else:
+                all_files.append(doc.source_code.module_name)
+        return all_files
+    def count_file_occurrences(self, files: List[str]) -> List[FileOccurrence]:
+        """Count occurrences of each file and return sorted list"""
+        # Count occurrences
+        counter = Counter(files)
+        # Convert to FileOccurrence objects
+        occurrences = [
+            FileOccurrence(file_path=file_path, count=count)
+            for file_path, count in counter.items()
+        ]
+        # Sort by count (descending)
+        return sorted(occurrences, key=lambda x: x.count, reverse=True)
+    def reorder(self, docs: List[FilterDoc]) -> List[FileOccurrence]:
+        """Process search results to extract and rank files by occurrence (main entry point)"""
+        all_files = self.extract_original_docs(docs)
+        return FileResult(files=self.count_file_occurrences(all_files))

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~288~~"
1	+ __version__ = "0.1.290"

{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.288.dist-info → auto_coder-0.1.290.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.288__py3-none-any.whl → 0.1.290__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.288py3-none-any.whl → 0.1.290py3-none-any.whl