PyPI - auto-coder - Versions diffs - 0.1.283__py3-none-any.whl → 0.1.284__py3-none-any.whl - Mend

auto-coder 0.1.283py3-none-any.whl → 0.1.284py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (11) hide show

{auto_coder-0.1.283.dist-info → auto_coder-0.1.284.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.283
+Version: 0.1.284
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence

{auto_coder-0.1.283.dist-info → auto_coder-0.1.284.dist-info}/RECORD RENAMED Viewed

@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
 autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
 autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
 autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
-autocoder/version.py,sha256=gD3sSROI4mWkMlhRoIZLn--lc2LLLHyqeGIDWZ8UCTM,23
+autocoder/version.py,sha256=CdPfaa9UyiMW7CWw6BaV5azX5klLdPvg_B_GcYjlyFk,23
 autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
 autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -30,7 +30,7 @@ autocoder/commands/auto_command.py,sha256=3ZQvG_JX2oWxTv_xiXQDQwMfTAVK-Tynqo6mC9
 autocoder/commands/tools.py,sha256=lanjoBGR6H8HDJSY3KrM6ibrtHZbgKX6mKJHSSE66dg,20493
 autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
 autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
-autocoder/common/__init__.py,sha256=Z6gvzhzLWYnXGVCnek2UoWic5DRiqWGQh4AiGZL3XVQ,12989
+autocoder/common/__init__.py,sha256=nmvI1UImcPzPMrO1E6_5H7rXFA8bP8i1qGBYYDD5kBc,13182
 autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
 autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
 autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
@@ -110,9 +110,10 @@ autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
 autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
 autocoder/rag/document_retriever.py,sha256=MGn6oIPo49BbRC99xmLMFkZrpHfcDfKoGYqWxXF554U,8051
-autocoder/rag/lang.py,sha256=TVNx5m7OtBcdfahzI29tMj9m1yrEm32G1c1zc4ZNIPs,3130
+autocoder/rag/lang.py,sha256=_jmUtxZDG1fmF4b2mhMJbYS1YQDb2ZE8nyAn5_vrvjA,3350
 autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
-autocoder/rag/long_context_rag.py,sha256=mI7X_UT_QgL9uGmX1K5jSiRGC0K5o6m3CgtQESaG6Vk,40581
+autocoder/rag/long_context_rag.py,sha256=Q-kVwfauaLcPtlVlHS5smOG07gyL-8uDg6ewwIfw13A,40121
+autocoder/rag/qa_conversation_strategy.py,sha256=bWFSMcAsacEgvV7nTHtCroia2mstxqhWj8nz7k4HECI,4898
 autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
 autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
 autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -168,9 +169,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
 autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-auto_coder-0.1.283.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-auto_coder-0.1.283.dist-info/METADATA,sha256=pLzj-iE-hpBIpDnMabXu-4cpgkQmR3qSrOMruAEY098,2643
-auto_coder-0.1.283.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-auto_coder-0.1.283.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
-auto_coder-0.1.283.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
-auto_coder-0.1.283.dist-info/RECORD,,
+auto_coder-0.1.284.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+auto_coder-0.1.284.dist-info/METADATA,sha256=NYN7m8jbf2aikPm1nXpD_hTBHMJOmVcpb0-Y8DbhveE,2643
+auto_coder-0.1.284.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+auto_coder-0.1.284.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
+auto_coder-0.1.284.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
+auto_coder-0.1.284.dist-info/RECORD,,

autocoder/common/__init__.py CHANGED Viewed

@@ -295,7 +295,13 @@ class AutoCoderArgs(pydantic.BaseModel):
     rag_type: Optional[str] = "storage"
     rag_params_max_tokens: Optional[int] = 4096
     rag_doc_filter_relevance: Optional[int] = 5
-    rag_context_window_limit: Optional[int] = 120000
+    rag_context_window_limit: Optional[int] = 120000
+    # 回答用户问题时，使用哪种对话历史策略
+    # single_round: 单轮对话
+    # multi_round: 多轮对话
+    rag_qa_conversation_strategy: Optional[str] = "multi_round"
     verify_file_relevance_score: int = 6
     enable_rag_search: Optional[Union[bool, str]] = False
     enable_rag_context: Optional[Union[bool, str]] = False

autocoder/rag/lang.py CHANGED Viewed

@@ -13,7 +13,8 @@ MESSAGES = {
         "doc_filter_start": "Document filtering start, total {{total}} documents",
         "doc_filter_progress": "Document filtering progress: {{progress_percent}}% processed {{relevant_count}}/{{total}} documents",
         "doc_filter_error": "Document filtering error: {{error}}",
-        "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents"
+        "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents",
+        "context_docs_names": "The following are the documents related to the user's question: {{context_docs_names}}",
     },
     "zh": {
         "rag_error_title": "RAG 错误",
@@ -26,7 +27,8 @@ MESSAGES = {
         "doc_filter_start": "开始过滤文档，共 {{total}} 个文档",
         "doc_filter_progress": "文档过滤进度：{{progress_percent}}%，处理了 {{relevant_count}}/{{total}} 个文档",
         "doc_filter_error": "文档过滤错误：{{error}}",
-        "doc_filter_complete": "文档过滤完成，耗时 {{total_time}} 秒，找到 {{relevant_count}} 个相关文档"
+        "doc_filter_complete": "文档过滤完成，耗时 {{total_time}} 秒，找到 {{relevant_count}} 个相关文档",
+        "context_docs_names": "以下是和用户问题相关的文档：{{context_docs_names}}",
     }
 }

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -37,6 +37,7 @@ from autocoder.rag.relevant_utils import DocFilterResult
 from pydantic import BaseModel
 from byzerllm.utils.types import SingleOutputMeta
 from autocoder.rag.lang import get_message_with_format_and_newline
+from autocoder.rag.qa_conversation_strategy import get_qa_strategy
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -173,10 +174,11 @@ class LongContextRAG:
         self.token_limit = self.args.rag_context_window_limit or 120000
         retriever_class = self._get_document_retriever_class()
         if self.args.enable_hybrid_index and not self.on_ray:
             if self.emb_llm is None:
-                raise ValueError("emb_llm is required for local byzer storage cache")
+                raise ValueError(
+                    "emb_llm is required for local byzer storage cache")
         self.document_retriever = retriever_class(
             self.path,
@@ -255,36 +257,7 @@ class LongContextRAG:
         请根据提供的文档内容、用户对话历史以及最后一个问题，提取并总结文档中与问题相关的重要信息。
         如果文档中没有相关信息，请回复"该文档中没有与问题相关的信息"。
         提取的信息尽量保持和原文中的一样，并且只输出这些信息。
-        """
-    @byzerllm.prompt()
-    def _answer_question(
-        self, query: str, relevant_docs: List[str]
-    ) -> Generator[str, None, None]:
-        """
-        文档：
-        <documents>
-        {% for doc in relevant_docs %}
-        {{ doc }}
-        {% endfor %}
-        </documents>
-        使用以上文档来回答用户的问题。回答要求：
-        1. 严格基于文档内容回答
-        - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
-        - 不要添加、推测或扩展文档未提及的信息
-        2. 格式如 ![image](./path.png) 的 Markdown 图片处理
-        - 根据Markdown 图片前后文本内容推测改图片与问题的相关性，有相关性则在回答中输出该Markdown图片路径
-        - 根据相关图片在文档中的位置，自然融入答复内容,保持上下文连贯
-        - 完整保留原始图片路径,不省略任何部分
-        3. 回答格式要求
-        - 使用markdown格式提升可读性
-        问题：{{ query }}
-        """
+        """
     def _get_document_retriever_class(self):
         """Get the document retriever class based on configuration."""
@@ -627,13 +600,22 @@ class LongContextRAG:
             if not relevant_docs:
                 yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
-                                        generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
-                                        rag_stat.chunk_stat.total_generated_tokens,
-                                        ))
+                                                            generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
+                                                            rag_stat.chunk_stat.total_generated_tokens,
+                                                            ))
                 return
             context = [doc.source_code.module_name for doc in relevant_docs]
+            yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
+                                        generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
+                                        rag_stat.chunk_stat.total_generated_tokens,
+                                        reasoning_content=get_message_with_format_and_newline(
+                                            "context_docs_names",
+                                            context_docs_names=",".join(
+                                                context))
+                                        ))
             # 将 FilterDoc 转化为 SourceCode 方便后续的逻辑继续做处理
             relevant_docs = [doc.source_code for doc in relevant_docs]
@@ -792,7 +774,7 @@ class LongContextRAG:
                                             tokens=request_tokens
                                         )
                                         ))
             yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
                                         generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
                                         rag_stat.chunk_stat.total_generated_tokens,
@@ -834,16 +816,12 @@ class LongContextRAG:
                 self._print_rag_stats(rag_stat)
             else:
-                new_conversations = conversations[:-1] + [
-                    {
-                        "role": "user",
-                        "content": self._answer_question.prompt(
-                            query=query,
-                            relevant_docs=[
-                                doc.source_code for doc in relevant_docs],
-                        ),
-                    }
-                ]
+                qa_strategy = get_qa_strategy(self.args.rag_qa_conversation_strategy)
+                new_conversations = qa_strategy.create_conversation(
+                    documents=[doc.source_code for doc in relevant_docs],
+                    conversations=conversations
+                )
                 chunks = target_llm.stream_chat_oai(
                     conversations=new_conversations,
@@ -864,7 +842,7 @@ class LongContextRAG:
                         chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
                             rag_stat.chunk_stat.total_generated_tokens + \
                             rag_stat.answer_stat.total_generated_tokens
                     yield chunk
                 self._print_rag_stats(rag_stat)

autocoder/rag/qa_conversation_strategy.py ADDED Viewed

@@ -0,0 +1,132 @@
+from abc import ABC, abstractmethod
+from typing import List, Dict, Any,Generator
+import byzerllm
+class QAConversationStrategy(ABC):
+    """
+    Abstract base class for conversation strategies.
+    Different strategies organize documents and conversations differently.
+    """
+    @abstractmethod
+    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
+        """
+        Create a conversation structure based on documents and history
+        Args:
+            documents: List of retrieved documents
+            conversations: conversation turns
+        Returns:
+            List of message dictionaries representing the conversation to send to the model
+        """
+        pass
+class MultiRoundStrategy(QAConversationStrategy):
+    """
+    Multi-round strategy: First let the model read documents, then do Q&A.
+    Creates multiple conversation turns.
+    """
+    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
+        messages = []
+        messages.extend([
+            {"role": "user", "content": self._read_docs_prompt.prompt(documents)},
+            {"role": "assistant", "content": "好的"}
+        ])
+        messages.extend(conversations)
+        return messages
+    @byzerllm.prompt()
+    def _read_docs_prompt(
+        self, relevant_docs: List[str]
+    ) -> Generator[str, None, None]:
+        """
+        请阅读以下：
+        <documents>
+        {% for doc in relevant_docs %}
+        {{ doc }}
+        {% endfor %}
+        </documents>
+        阅读完成后，使用以上文档来回答用户的问题。回答要求：
+        1. 严格基于文档内容回答
+        - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
+        - 不要添加、推测或扩展文档未提及的信息
+        2. 格式如 ![image](./path.png) 的 Markdown 图片处理
+        - 根据Markdown 图片前后文本内容推测改图片与问题的相关性，有相关性则在回答中输出该Markdown图片路径
+        - 根据相关图片在文档中的位置，自然融入答复内容,保持上下文连贯
+        - 完整保留原始图片路径,不省略任何部分
+        3. 回答格式要求
+        - 使用markdown格式提升可读性
+        """
+class SingleRoundStrategy(QAConversationStrategy):
+    """
+    Single-round strategy: Put documents and conversation history in a single round.
+    """
+    def create_conversation(self, documents: List[Any], conversations: List[Dict[str,str]]) -> List[Dict]:
+        messages = []
+        messages.extend([
+            {"role": "user", "content": self._single_round_answer_question.prompt(documents, conversations)}
+        ])
+        return messages
+    @byzerllm.prompt()
+    def _single_round_answer_question(
+        self, relevant_docs: List[str], conversations: List[Dict[str, str]]
+    ) -> Generator[str, None, None]:
+        """
+        文档：
+        <documents>
+        {% for doc in relevant_docs %}
+        {{ doc }}
+        {% endfor %}
+        </documents>
+        用户历史对话：
+        <conversations>
+        {% for msg in conversations %}
+        <{{ msg.role }}>: {{ msg.content }}
+        {% endfor %}
+        </conversations>
+        使用以上文档来回答用户最后的问题。回答要求：
+        1. 严格基于文档内容回答
+        - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
+        - 不要添加、推测或扩展文档未提及的信息
+        2. 格式如 ![image](./path.png) 的 Markdown 图片处理
+        - 根据Markdown 图片前后文本内容推测改图片与问题的相关性，有相关性则在回答中输出该Markdown图片路径
+        - 根据相关图片在文档中的位置，自然融入答复内容,保持上下文连贯
+        - 完整保留原始图片路径,不省略任何部分
+        3. 回答格式要求
+        - 使用markdown格式提升可读性
+        """
+def get_qa_strategy(strategy_name: str) -> QAConversationStrategy:
+    """
+    Factory method to get the appropriate conversation strategy
+    Args:
+        strategy_name: Name of the strategy to use
+    Returns:
+        An instance of the requested strategy
+    Raises:
+        ValueError: If the requested strategy doesn't exist
+    """
+    strategies = {
+        "multi_round": MultiRoundStrategy,
+        "single_round": SingleRoundStrategy,
+    }
+    if strategy_name not in strategies:
+        raise ValueError(f"Unknown strategy: {strategy_name}. Available strategies: {list(strategies.keys())}")
+    return strategies[strategy_name]()

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~283~~"
1	+ __version__ = "0.1.284"

{auto_coder-0.1.283.dist-info → auto_coder-0.1.284.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.283.dist-info → auto_coder-0.1.284.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.283.dist-info → auto_coder-0.1.284.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.283.dist-info → auto_coder-0.1.284.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.283__py3-none-any.whl → 0.1.284__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.283py3-none-any.whl → 0.1.284py3-none-any.whl