PyPI - auto-coder - Versions diffs - 0.1.375__py3-none-any.whl → 0.1.376__py3-none-any.whl - Mend

auto-coder 0.1.375py3-none-any.whl → 0.1.376py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (51) hide show

{auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/METADATA +1 -1
{auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/RECORD +17 -51
autocoder/agent/base_agentic/base_agent.py +9 -8
autocoder/auto_coder_rag.py +12 -0
autocoder/models.py +2 -2
autocoder/rag/cache/local_duckdb_storage_cache.py +63 -33
autocoder/rag/conversation_to_queries.py +37 -5
autocoder/rag/long_context_rag.py +161 -41
autocoder/rag/tools/recall_tool.py +2 -1
autocoder/rag/tools/search_tool.py +2 -1
autocoder/rag/types.py +36 -0
autocoder/utils/_markitdown.py +59 -13
autocoder/version.py +1 -1
autocoder/agent/agentic_edit.py +0 -833
autocoder/agent/agentic_edit_tools/__init__.py +0 -28
autocoder/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +0 -32
autocoder/agent/agentic_edit_tools/attempt_completion_tool_resolver.py +0 -29
autocoder/agent/agentic_edit_tools/base_tool_resolver.py +0 -29
autocoder/agent/agentic_edit_tools/execute_command_tool_resolver.py +0 -84
autocoder/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py +0 -75
autocoder/agent/agentic_edit_tools/list_files_tool_resolver.py +0 -62
autocoder/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py +0 -30
autocoder/agent/agentic_edit_tools/read_file_tool_resolver.py +0 -36
autocoder/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +0 -95
autocoder/agent/agentic_edit_tools/search_files_tool_resolver.py +0 -70
autocoder/agent/agentic_edit_tools/use_mcp_tool_resolver.py +0 -55
autocoder/agent/agentic_edit_tools/write_to_file_tool_resolver.py +0 -98
autocoder/agent/agentic_edit_types.py +0 -124
autocoder/auto_coder_lang.py +0 -60
autocoder/auto_coder_rag_client_mcp.py +0 -170
autocoder/auto_coder_rag_mcp.py +0 -193
autocoder/common/llm_rerank.py +0 -84
autocoder/common/model_speed_test.py +0 -392
autocoder/common/v2/agent/agentic_edit_conversation.py +0 -188
autocoder/common/v2/agent/ignore_utils.py +0 -50
autocoder/dispacher/actions/plugins/action_translate.py +0 -214
autocoder/ignorefiles/__init__.py +0 -4
autocoder/ignorefiles/ignore_file_utils.py +0 -63
autocoder/ignorefiles/test_ignore_file_utils.py +0 -91
autocoder/linters/code_linter.py +0 -588
autocoder/rag/loaders/test_image_loader.py +0 -209
autocoder/rag/raw_rag.py +0 -96
autocoder/rag/simple_directory_reader.py +0 -646
autocoder/rag/simple_rag.py +0 -404
autocoder/regex_project/__init__.py +0 -162
autocoder/utils/coder.py +0 -125
autocoder/utils/tests.py +0 -37
{auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/LICENSE +0 -0
{auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/WHEEL +0 -0
{auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/top_level.txt +0 -0

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -29,6 +29,9 @@ from autocoder.rag.searchable import SearchableResults
 from autocoder.rag.conversation_to_queries import extract_search_queries
 from autocoder.common import openai_content as OpenAIContentProcessor
 from autocoder.common.save_formatted_log import save_formatted_log
+from autocoder.rag.types import (
+    RecallStat,ChunkStat,AnswerStat,OtherStat,RAGStat
+)
 import json, os
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -42,29 +45,6 @@ except ImportError:
     LLMComputeEngine = None
-class RecallStat(BaseModel):
-    total_input_tokens: int
-    total_generated_tokens: int
-    model_name: str = "unknown"
-class ChunkStat(BaseModel):
-    total_input_tokens: int
-    total_generated_tokens: int
-    model_name: str = "unknown"
-class AnswerStat(BaseModel):
-    total_input_tokens: int
-    total_generated_tokens: int
-    model_name: str = "unknown"
-class RAGStat(BaseModel):
-    recall_stat: RecallStat
-    chunk_stat: ChunkStat
-    answer_stat: AnswerStat
 class LongContextRAG:
     def __init__(
@@ -690,7 +670,7 @@ class LongContextRAG:
                             yield gen_item
                         # 打印最终的统计信息
-                        self._print_rag_stats(rag_stat)
+                        self._print_rag_stats(rag_stat, conversations)
                         return
     def _process_document_retrieval(self, conversations,
@@ -716,7 +696,7 @@ class LongContextRAG:
         # 提取查询并检索候选文档
         queries = extract_search_queries(
-            conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries)
+            conversations=conversations, args=self.args, llm=self.llm, max_queries=self.args.rag_recall_max_queries,rag_stat=rag_stat)
         documents = self._retrieve_documents(
             options={"queries": [query] + [query.query for query in queries]})
@@ -913,7 +893,7 @@ class LongContextRAG:
                         rag_stat.answer_stat.total_generated_tokens
                 yield chunk
-    def _print_rag_stats(self, rag_stat: RAGStat) -> None:
+    def _print_rag_stats(self, rag_stat: RAGStat, conversations: Optional[List[Dict[str, str]]] = None) -> None:
         """打印RAG执行的详细统计信息"""
         total_input_tokens = (
             rag_stat.recall_stat.total_input_tokens +
@@ -937,12 +917,46 @@ class LongContextRAG:
                              rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
             answer_percent = (rag_stat.answer_stat.total_input_tokens +
                               rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
-        logger.info(
-            f"=== RAG 执行统计信息 ===\n"
+        # 计算其他阶段的令牌占比
+        other_percents = []
+        if total_tokens > 0 and rag_stat.other_stats:
+            for other_stat in rag_stat.other_stats:
+                other_percent = (other_stat.total_input_tokens +
+                                other_stat.total_generated_tokens) / total_tokens * 100
+                other_percents.append(other_percent)
+        # 计算成本分布百分比
+        if rag_stat.cost == 0:
+            recall_cost_percent = chunk_cost_percent = answer_cost_percent = 0
+        else:
+            recall_cost_percent = rag_stat.recall_stat.cost / rag_stat.cost * 100
+            chunk_cost_percent = rag_stat.chunk_stat.cost / rag_stat.cost * 100
+            answer_cost_percent = rag_stat.answer_stat.cost / rag_stat.cost * 100
+        # 计算其他阶段的成本占比
+        other_costs_percent = []
+        if rag_stat.cost > 0 and rag_stat.other_stats:
+            for other_stat in rag_stat.other_stats:
+                other_costs_percent.append(other_stat.cost / rag_stat.cost * 100)
+        ## 这里会计算每个阶段的成本
+        estimated_cost = self._estimate_token_cost(rag_stat)
+        # 构建统计信息字符串
+        query_content = ""
+        if conversations and len(conversations) > 0:
+            query_content = conversations[-1].get("content", "")
+            if len(query_content) > 100:
+                query_content = query_content[:100] + "..."
+            query_content = f"查询内容: {query_content}\n"
+        stats_str = (
+            f"=== (RAG 执行统计信息) ===\n"
+            f"{query_content}"
             f"总令牌使用: {total_tokens} 令牌\n"
             f"  * 输入令牌总数: {total_input_tokens}\n"
             f"  * 生成令牌总数: {total_generated_tokens}\n"
+            f"  * 总成本: {rag_stat.cost:.6f}\n"
             f"\n"
             f"阶段统计:\n"
             f"  1. 文档检索阶段:\n"
@@ -950,40 +964,146 @@ class LongContextRAG:
             f"     - 输入令牌: {rag_stat.recall_stat.total_input_tokens}\n"
             f"     - 生成令牌: {rag_stat.recall_stat.total_generated_tokens}\n"
             f"     - 阶段总计: {rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens}\n"
+            f"     - 阶段成本: {rag_stat.recall_stat.cost:.6f}\n"
             f"\n"
             f"  2. 文档分块阶段:\n"
             f"     - 模型: {rag_stat.chunk_stat.model_name}\n"
             f"     - 输入令牌: {rag_stat.chunk_stat.total_input_tokens}\n"
             f"     - 生成令牌: {rag_stat.chunk_stat.total_generated_tokens}\n"
             f"     - 阶段总计: {rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens}\n"
+            f"     - 阶段成本: {rag_stat.chunk_stat.cost:.6f}\n"
             f"\n"
             f"  3. 答案生成阶段:\n"
             f"     - 模型: {rag_stat.answer_stat.model_name}\n"
             f"     - 输入令牌: {rag_stat.answer_stat.total_input_tokens}\n"
             f"     - 生成令牌: {rag_stat.answer_stat.total_generated_tokens}\n"
             f"     - 阶段总计: {rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens}\n"
+            f"     - 阶段成本: {rag_stat.answer_stat.cost:.6f}\n"
             f"\n"
+        )
+        # 如果存在 other_stats，添加其统计信息
+        if rag_stat.other_stats:
+            for i, other_stat in enumerate(rag_stat.other_stats):
+                stats_str += (
+                    f"  {i+4}. 其他阶段 {i+1}:\n"
+                    f"     - 模型: {other_stat.model_name}\n"
+                    f"     - 输入令牌: {other_stat.total_input_tokens}\n"
+                    f"     - 生成令牌: {other_stat.total_generated_tokens}\n"
+                    f"     - 阶段总计: {other_stat.total_input_tokens + other_stat.total_generated_tokens}\n"
+                    f"     - 阶段成本: {other_stat.cost:.6f}\n"
+                    f"\n"
+                )
+        # 添加令牌分布百分比
+        stats_str += (
             f"令牌分布百分比:\n"
             f"  - 文档检索: {recall_percent:.1f}%\n"
             f"  - 文档分块: {chunk_percent:.1f}%\n"
             f"  - 答案生成: {answer_percent:.1f}%\n"
         )
+        # 如果存在 other_stats，添加其令牌占比
+        if rag_stat.other_stats:
+            for i, other_percent in enumerate(other_percents):
+                if other_percent > 0:
+                    stats_str += f"  - 其他阶段 {i+1}: {other_percent:.1f}%\n"
+        # 添加成本分布百分比
+        stats_str += (
+            f"\n"
+            f"成本分布百分比:\n"
+            f"  - 文档检索: {recall_cost_percent:.1f}%\n"
+            f"  - 文档分块: {chunk_cost_percent:.1f}%\n"
+            f"  - 答案生成: {answer_cost_percent:.1f}%\n"
+        )
+        # 如果存在 other_stats，添加其成本占比
+        if rag_stat.other_stats:
+            for i, other_cost_percent in enumerate(other_costs_percent):
+                if other_cost_percent > 0:
+                    stats_str += f"  - 其他阶段 {i+1}: {other_cost_percent:.1f}%\n"
+        # 输出统计信息
+        logger.info(stats_str)
         # 记录原始统计数据，以便调试
         logger.debug(f"RAG Stat 原始数据: {rag_stat}")
-        # 返回成本估算
-        estimated_cost = self._estimate_token_cost(
-            total_input_tokens, total_generated_tokens)
         if estimated_cost > 0:
-            logger.info(f"估计成本: 约 ${estimated_cost:.4f} 人民币")
+            logger.info(f"估计成本: 约 {estimated_cost:.4f} ")
-    def _estimate_token_cost(self, input_tokens: int, output_tokens: int) -> float:
+    def _estimate_token_cost(self, rag_stat: RAGStat) -> float:
         """估算当前请求的令牌成本（人民币）"""
-        # 实际应用中，可以根据不同模型设置不同价格
-        input_cost_per_1m = 2.0/1000000   # 每百万输入令牌的成本
-        output_cost_per_1m = 8.0/100000   # 每百万输出令牌的成本
-        cost = (input_tokens * input_cost_per_1m / 1000000) + \
-            (output_tokens * output_cost_per_1m/1000000)
-        return cost
+        from autocoder.models import get_model_by_name
+        total_cost = 0.0
+        # 计算召回阶段成本
+        if rag_stat.recall_stat.model_name != "unknown":
+            try:
+                recall_model = get_model_by_name(rag_stat.recall_stat.model_name)
+                input_cost = recall_model.get("input_price", 0.0) / 1000000
+                output_cost = recall_model.get("output_price", 0.0) / 1000000
+                recall_cost = (rag_stat.recall_stat.total_input_tokens * input_cost) + \
+                             (rag_stat.recall_stat.total_generated_tokens * output_cost)
+                total_cost += recall_cost
+            except Exception as e:
+                logger.warning(f"计算召回阶段成本时出错: {str(e)}")
+                recall_cost = 0.0
+                total_cost += recall_cost
+            rag_stat.recall_stat.cost = recall_cost
+        # 计算分块阶段成本
+        if rag_stat.chunk_stat.model_name != "unknown":
+            try:
+                chunk_model = get_model_by_name(rag_stat.chunk_stat.model_name)
+                input_cost = chunk_model.get("input_price", 0.0) / 1000000
+                output_cost = chunk_model.get("output_price", 0.0) / 1000000
+                chunk_cost = (rag_stat.chunk_stat.total_input_tokens * input_cost) + \
+                            (rag_stat.chunk_stat.total_generated_tokens * output_cost)
+                total_cost += chunk_cost
+            except Exception as e:
+                logger.warning(f"计算分块阶段成本时出错: {str(e)}")
+                # 使用默认值
+                chunk_cost = 0.0
+                total_cost += chunk_cost
+            rag_stat.chunk_stat.cost = chunk_cost
+        # 计算答案生成阶段成本
+        if rag_stat.answer_stat.model_name != "unknown":
+            try:
+                answer_model = get_model_by_name(rag_stat.answer_stat.model_name)
+                input_cost = answer_model.get("input_price", 0.0) / 1000000
+                output_cost = answer_model.get("output_price", 0.0) / 1000000
+                answer_cost = (rag_stat.answer_stat.total_input_tokens * input_cost) + \
+                             (rag_stat.answer_stat.total_generated_tokens * output_cost)
+                total_cost += answer_cost
+            except Exception as e:
+                logger.warning(f"计算答案生成阶段成本时出错: {str(e)}")
+                # 使用默认值
+                answer_cost = 0.0
+                total_cost += answer_cost
+            rag_stat.answer_stat.cost = answer_cost
+        # 计算其他阶段成本（如果存在）
+        for i, other_stat in enumerate(rag_stat.other_stats):
+            if other_stat.model_name != "unknown":
+                try:
+                    other_model = get_model_by_name(other_stat.model_name)
+                    input_cost = other_model.get("input_price", 0.0) / 1000000
+                    output_cost = other_model.get("output_price", 0.0) / 1000000
+                    other_cost = (other_stat.total_input_tokens * input_cost) + \
+                                (other_stat.total_generated_tokens * output_cost)
+                    total_cost += other_cost
+                except Exception as e:
+                    logger.warning(f"计算其他阶段 {i+1} 成本时出错: {str(e)}")
+                    # 使用默认值
+                    other_cost = 0.0
+                    total_cost += other_cost
+                rag_stat.other_stats[i].cost = other_cost
+        # 将总成本保存到 rag_stat
+        rag_stat.cost = total_cost
+        return total_cost

autocoder/rag/tools/recall_tool.py CHANGED Viewed

@@ -16,7 +16,8 @@ from autocoder.agent.base_agentic.tool_registry import ToolRegistry
 from autocoder.agent.base_agentic.tools.base_tool_resolver import BaseToolResolver
 from autocoder.agent.base_agentic.types import ToolDescription, ToolExample
 from autocoder.common import AutoCoderArgs
-from autocoder.rag.long_context_rag import LongContextRAG, RecallStat, ChunkStat, AnswerStat, RAGStat
+from autocoder.rag.long_context_rag import LongContextRAG
+from autocoder.rag.types import RecallStat, ChunkStat, AnswerStat, RAGStat
 from autocoder.rag.relevant_utils import FilterDoc, DocRelevance, DocFilterResult
 from autocoder.common import SourceCode
 from autocoder.rag.relevant_utils import TaskTiming

autocoder/rag/tools/search_tool.py CHANGED Viewed

@@ -15,7 +15,8 @@ from autocoder.agent.base_agentic.tool_registry import ToolRegistry
 from autocoder.agent.base_agentic.tools.base_tool_resolver import BaseToolResolver
 from autocoder.agent.base_agentic.types import ToolDescription, ToolExample
 from autocoder.common import AutoCoderArgs
-from autocoder.rag.long_context_rag import LongContextRAG, RecallStat, ChunkStat, AnswerStat, RAGStat
+from autocoder.rag.long_context_rag import LongContextRAG
+from autocoder.rag.types import RecallStat, ChunkStat, AnswerStat, RAGStat
 from autocoder.rag.relevant_utils import FilterDoc, DocRelevance, DocFilterResult

autocoder/rag/types.py CHANGED Viewed

@@ -3,10 +3,46 @@ import os
 import json
 import time
 import pydantic
+from pydantic import BaseModel
 from typing import Dict, Any, Optional, List
 import psutil
 import glob
+class RecallStat(BaseModel):
+    total_input_tokens: int
+    total_generated_tokens: int
+    model_name: str = "unknown"
+    cost:float = 0.0
+class ChunkStat(BaseModel):
+    total_input_tokens: int
+    total_generated_tokens: int
+    model_name: str = "unknown"
+    cost:float = 0.0
+class AnswerStat(BaseModel):
+    total_input_tokens: int
+    total_generated_tokens: int
+    model_name: str = "unknown"
+    cost:float = 0.0
+class OtherStat(BaseModel):
+    total_input_tokens: int = 0
+    total_generated_tokens: int = 0
+    model_name: str = "unknown"
+    cost:float = 0.0
+class RAGStat(BaseModel):
+    recall_stat: RecallStat
+    chunk_stat: ChunkStat
+    answer_stat: AnswerStat
+    other_stats: List[OtherStat] = []
+    cost:float = 0.0
 class RAGServiceInfo(pydantic.BaseModel):
     host: str
     port: int

autocoder/utils/_markitdown.py CHANGED Viewed

@@ -151,7 +151,31 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
         return "![%s](%s%s)" % (alt, src, title_part)
     def convert_soup(self, soup: Any) -> str:
-        return super().convert_soup(soup)  # type: ignore
+        try:
+            # 设置递归深度限制，避免复杂文档导致的递归错误
+            import sys
+            original_limit = sys.getrecursionlimit()
+            try:
+                # 增加递归深度限制
+                sys.setrecursionlimit(10000)  # 设置更高的递归限制
+                return super().convert_soup(soup)  # type: ignore
+            finally:
+                # 恢复原始递归深度限制
+                sys.setrecursionlimit(original_limit)
+        except RecursionError:
+            # 处理递归错误，尝试简化处理
+            logger.warning("RecursionError in convert_soup, falling back to simplified conversion")
+            # 返回简化的文本内容
+            return self._simplified_convert(soup)
+    def _simplified_convert(self, soup: Any) -> str:
+        """简化的转换方法，用于处理复杂文档时的回退方案"""
+        # 提取纯文本内容
+        text = soup.get_text(separator="\n", strip=True)
+        # 基本清理
+        text = re.sub(r'\s+', ' ', text)
+        text = re.sub(r'\n{3,}', '\n\n', text)
+        return text
 class DocumentConverterResult:
@@ -224,20 +248,42 @@ class HtmlConverter(DocumentConverter):
         for script in soup(["script", "style"]):
             script.extract()
-        # Print only the main content
-        body_elm = soup.find("body")
-        webpage_text = ""
-        if body_elm:
-            webpage_text = _CustomMarkdownify().convert_soup(body_elm)
-        else:
-            webpage_text = _CustomMarkdownify().convert_soup(soup)
+        try:
+            # Print only the main content
+            body_elm = soup.find("body")
+            webpage_text = ""
+            if body_elm:
+                webpage_text = _CustomMarkdownify().convert_soup(body_elm)
+            else:
+                webpage_text = _CustomMarkdownify().convert_soup(soup)
-        assert isinstance(webpage_text, str)
+            assert isinstance(webpage_text, str)
-        return DocumentConverterResult(
-            title=None if soup.title is None else soup.title.string,
-            text_content=webpage_text,
-        )
+            return DocumentConverterResult(
+                title=None if soup.title is None else soup.title.string,
+                text_content=webpage_text,
+            )
+        except Exception as e:
+            # 如果转换过程中出现任何错误，尝试使用简化的方法提取文本
+            logger.warning(f"Error in HTML conversion: {str(e)}. Falling back to simplified text extraction.")
+            try:
+                # 简化的文本提取
+                text = soup.get_text(separator="\n", strip=True)
+                # 基本清理
+                text = re.sub(r'\s+', ' ', text)
+                text = re.sub(r'\n{3,}', '\n\n', text)
+                return DocumentConverterResult(
+                    title=None if soup.title is None else soup.title.string,
+                    text_content=text,
+                )
+            except Exception as inner_e:
+                # 如果简化提取也失败，记录错误并返回空结果
+                logger.error(f"Failed to extract text with simplified method: {str(inner_e)}")
+                return DocumentConverterResult(
+                    title=None,
+                    text_content=f"[文档转换失败] 无法提取内容: {str(e)}",
+                )
 class WikipediaConverter(DocumentConverter):

autocoder/version.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = "0.1.375"
+__version__ = "0.1.376"

auto-coder 0.1.375__py3-none-any.whl → 0.1.376__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.375py3-none-any.whl → 0.1.376py3-none-any.whl