PyPI - auto-coder - Versions diffs - 0.1.268__py3-none-any.whl → 0.1.270__py3-none-any.whl - Mend

auto-coder 0.1.268py3-none-any.whl → 0.1.270py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (23) hide show

{auto_coder-0.1.268.dist-info → auto_coder-0.1.270.dist-info}/METADATA +2 -2
{auto_coder-0.1.268.dist-info → auto_coder-0.1.270.dist-info}/RECORD +23 -20
autocoder/agent/auto_learn_from_commit.py +209 -0
autocoder/auto_coder.py +4 -0
autocoder/auto_coder_runner.py +2647 -0
autocoder/chat_auto_coder.py +54 -2630
autocoder/commands/auto_command.py +23 -33
autocoder/common/__init__.py +6 -2
autocoder/common/auto_coder_lang.py +21 -4
autocoder/common/auto_configure.py +41 -30
autocoder/common/code_modification_ranker.py +55 -11
autocoder/common/command_templates.py +2 -3
autocoder/common/context_pruner.py +214 -14
autocoder/common/conversation_pruner.py +11 -10
autocoder/index/entry.py +44 -22
autocoder/index/index.py +1 -1
autocoder/utils/auto_project_type.py +120 -0
autocoder/utils/model_provider_selector.py +23 -23
autocoder/version.py +1 -1
{auto_coder-0.1.268.dist-info → auto_coder-0.1.270.dist-info}/LICENSE +0 -0
{auto_coder-0.1.268.dist-info → auto_coder-0.1.270.dist-info}/WHEEL +0 -0
{auto_coder-0.1.268.dist-info → auto_coder-0.1.270.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.268.dist-info → auto_coder-0.1.270.dist-info}/top_level.txt +0 -0

autocoder/commands/auto_command.py CHANGED Viewed

@@ -21,6 +21,8 @@ from loguru import logger
 from autocoder.utils import llms as llms_utils
 from autocoder.rag.token_counter import count_tokens
 from autocoder.common.global_cancel import global_cancel
+from autocoder.common.auto_configure import config_readme
+from autocoder.utils.auto_project_type import ProjectTypeAnalyzer
 class CommandMessage(BaseModel):
     role: str
@@ -155,7 +157,8 @@ class CommandAutoTuner:
         self.printer = Printer()
         self.memory_config = memory_config
         self.command_config = command_config
-        self.tools = AutoCommandTools(args=args, llm=self.llm)
+        self.tools = AutoCommandTools(args=args, llm=self.llm)
+        self.project_type_analyzer = ProjectTypeAnalyzer(args=args, llm=self.llm)
     def get_conversations(self) -> List[CommandMessage]:
         """Get conversation history from memory file"""
@@ -440,7 +443,7 @@ class CommandAutoTuner:
                         safe_zone=self.args.conversation_prune_safe_zone_tokens
                     )
                     from autocoder.common.conversation_pruner import ConversationPruner
-                    pruner = ConversationPruner(self.llm)
+                    pruner = ConversationPruner(self.args, self.llm)
                     conversations = pruner.prune_conversations(conversations)
                 title = printer.get_message_from_key("auto_command_analyzing")
@@ -646,34 +649,7 @@ class CommandAutoTuner:
         常见的一些配置选项示例：
-        # 配置项说明
-        ## auto_merge: 代码合并方式，可选值为editblock、diff、wholefile.
-        - editblock: 生成 SEARCH/REPLACE 块，然后根据 SEARCH块到对应的源码查找，如果相似度阈值大于 editblock_similarity， 那么则将
-        找到的代码块替换为 REPLACE 块。大部分情况都推荐使用 editblock。
-        - wholefile: 重新生成整个文件，然后替换原来的文件。对于重构场景，推荐使用 wholefile。
-        - diff: 生成标准 git diff 格式，适用于简单的代码修改。
-        ## editblock_similarity: editblock相似度阈值
-        - editblock相似度阈值，取值范围为0-1，默认值为0.9。如果设置的太低，虽然能合并进去，但是会引入错误。推荐不要修改该值。
-        ## generate_times_same_model: 相同模型生成次数,也叫采样数
-        当进行生成代码时，大模型会对同一个需求生成多份代码，然后会使用 generate_rerank_model 模型对多份代码进行重排序，
-        然后选择得分最高的代码。一般次数越多，最终得到正确的代码概率越高。默认值为1，推荐设置为3。但是设置值越多，可能速度就越慢，消耗的token也越多。
-        当用户提到，帮我采样数设置为3， 那么你就设置该参数即可。
-        ## skip_filter_index: 是否跳过索引过滤
-        是否跳过根据用户的query 自动查找上下文。推荐设置为 false
-        ## skip_build_index: 是否跳过索引构建
-        是否自动构建索引。推荐设置为 false。注意，如果该值设置为 true, 那么 skip_filter_index 设置不会生效。
-        ## enable_global_memory: 是否开启全局记忆
-        是否开启全局记忆。
-        ## rank_times_same_model: 相同模型重排序次数
-        默认值为1. 如果 generate_times_same_model 参数设置大于1，那么 coding 函数会自动对多份代码进行重排序。
-        rank_times_same_model 表示重拍的次数，次数越多，选择到最好的代码的可能性越高，但是也会显著增加消耗的token和时间。
-        建议保持默认，要修改也建议不要超过3。
+        {{ config_readme }}
         比如你想开启索引，则可以执行：
@@ -1190,10 +1166,26 @@ class CommandAutoTuner:
          exclude_files(query="/drop regex://.*/package-lock\.json")
         </usage>
         </command>
+        <command>
+        <name>get_project_type</name>
+        <description>获取项目类型。</description>
+        <usage>
+         该命令获取项目类型。
+         使用例子：
+         get_project_type()
+         此时会返回诸如 "ts,py,java,go,js,ts" 这样的字符串，表示项目类型。
+        </usage>
+        </command>
         </commands>
         '''
+        return {
+            "config_readme": config_readme.prompt()
+        }
     def execute_auto_command(self, command: str, parameters: Dict[str, Any]) -> None:
         """
@@ -1232,9 +1224,7 @@ class CommandAutoTuner:
             "get_project_related_files": self.tools.get_project_related_files,
             "ask_user":self.tools.ask_user,
             "read_file_with_keyword_ranges": self.tools.read_file_with_keyword_ranges,
+            "get_project_type": self.project_type_analyzer.analyze,
         }
         if command not in command_map:

autocoder/common/__init__.py CHANGED Viewed

@@ -376,12 +376,16 @@ class AutoCoderArgs(pydantic.BaseModel):
     conversation_prune_group_size: Optional[int] = 4
     conversation_prune_strategy: Optional[str] = "summarize"
-    context_prune_strategy: Optional[str] = "score"
+    context_prune_strategy: Optional[str] = "extract"
     context_prune: Optional[bool] = True
+    context_prune_sliding_window_size: Optional[int] = 1000
+    context_prune_sliding_window_overlap: Optional[int] = 100
     auto_command_max_iterations: Optional[int] = 10
-    skip_commit: Optional[bool] = False
+    skip_commit: Optional[bool] = False
+    enable_beta: Optional[bool] = False
     class Config:
         protected_namespaces = ()

autocoder/common/auto_coder_lang.py CHANGED Viewed

@@ -130,7 +130,7 @@ MESSAGES = {
         "quick_filter_too_long": "⚠️ index file is too large ({{ tokens_len }}/{{ max_tokens }}). The query will be split into {{ split_size }} chunks.",
         "quick_filter_tokens_len": "📊 Current index size: {{ tokens_len }} tokens",
         "estimated_chat_input_tokens": "Estimated chat input tokens: {{ estimated_input_tokens }}",
-        "estimated_input_tokens_in_generate": "Estimated input tokens in generate ({{ generate_mode }}): {{ estimated_input_tokens }}",
+        "estimated_input_tokens_in_generate": "Estimated input tokens in generate ({{ generate_mode }}): {{ estimated_input_tokens }}",
         "model_has_access_restrictions": "{{model_name}} has access restrictions, cannot use the current function",
         "auto_command_not_found": "Auto command not found: {{command}}. Please check your input and try again.",
         "auto_command_failed": "Auto command failed: {{error}}. Please check your input and try again.",
@@ -161,9 +161,22 @@ MESSAGES = {
         "index_import_success": "Index imported successfully: {{path}}",
         "edits_title": "edits",
         "diff_blocks_title":"diff blocks",
-        "index_exclude_files_error": "index filter exclude files fail: {{ error }}"
+        "index_exclude_files_error": "index filter exclude files fail: {{ error }}",
+        "file_sliding_window_processing": "File {{ file_path }} is too large ({{ tokens }} tokens), processing with sliding window...",
+        "file_snippet_processing": "Processing file {{ file_path }} with code snippet extraction...",
+        "context_pruning_start": "⚠️ Context pruning started. Total tokens: {{ total_tokens }} (max allowed: {{ max_tokens }}). Applying strategy: {{ strategy }}.",
+        "context_pruning_reason": "Context length exceeds maximum limit ({{ total_tokens }} > {{ max_tokens }}). Pruning is required to fit within the model's context window.",
+        "rank_code_modification_title": "{{model_name}} ranking codes",
+        "sorted_files_message": "Reordered files:\n{% for file in files %}- {{ file }}\n{% endfor %}",
+        "estimated_input_tokens_in_ranking": "estimate input token {{ estimated_input_tokens }} when ranking",
+        "file_snippet_procesed": "{{ file_path }} processed with tokens: {{ tokens }} => {{ snippet_tokens }}. Current total tokens: {{ total_tokens }}",
     },
     "zh": {
+        "file_sliding_window_processing": "文件 {{ file_path }} 过大 ({{ tokens }} tokens)，正在使用滑动窗口处理...",
+        "file_snippet_processing": "正在对文件 {{ file_path }} 进行代码片段提取...",
+        "context_pruning_start": "⚠️ 开始上下文剪枝。总token数: {{ total_tokens }} (最大允许: {{ max_tokens }})。正在应用策略: {{ strategy }}。",
+        "context_pruning_reason": "上下文长度超过最大限制 ({{ total_tokens }} > {{ max_tokens }})。需要进行剪枝以适配模型的上下文窗口。",
         "file_scored_message": "文件评分: {{file_path}} - 分数: {{score}}",
         "invalid_file_pattern": "无效的文件模式: {{file_pattern}}. 例如: regex://.*/package-lock\\.json",
         "conf_not_found": "未找到配置文件: {{path}}",
@@ -304,7 +317,7 @@ MESSAGES = {
         "quick_filter_title": "{{ model_name }} 正在分析如何筛选上下文...",
         "quick_filter_failed": "❌ 快速过滤器失败: {{ error }}. ",
         "estimated_chat_input_tokens": "对话输入token预估为: {{ estimated_input_tokens }}",
-        "estimated_input_tokens_in_generate": "生成代码({{ generate_mode }})预计输入token数: {{ estimated_input_tokens_in_generate }}",
+        "estimated_input_tokens_in_generate": "生成代码({{ generate_mode }})预计输入token数: {{ estimated_input_tokens }}",
         "model_has_access_restrictions": "{{model_name}} 有访问限制，无法使用当前功能",
         "auto_command_not_found": "未找到自动命令: {{command}}。请检查您的输入并重试。",
         "auto_command_failed": "自动命令执行失败: {{error}}。请检查您的输入并重试。",
@@ -320,7 +333,11 @@ MESSAGES = {
         "index_import_success": "索引导入成功: {{path}}",
         "edits_title": "编辑块",
         "diff_blocks_title": "差异块",
-        "index_exclude_files_error": "索引排除文件时出错: {{error}}"
+        "index_exclude_files_error": "索引排除文件时出错: {{error}}",
+        "rank_code_modification_title": "模型{{model_name}}对代码打分",
+        "sorted_files_message": "重新排序后的文件路径:\n{% for file in files %}- {{ file }}\n{% endfor %}",
+        "estimated_input_tokens_in_ranking": "排序预计输入token数: {{ estimated_input_tokens }}",
+        "file_snippet_procesed": "文件 {{ file_path }} 处理后token数: {{ tokens }} => {{ snippet_tokens }} 当前总token数: {{ total_tokens }}"
     }}

autocoder/common/auto_configure.py CHANGED Viewed

@@ -119,7 +119,45 @@ class AutoConfigRequest(BaseModel):
 class AutoConfigResponse(BaseModel):
     configs: List[Dict[str, Any]] = Field(default_factory=list)
-    reasoning: str = ""
+    reasoning: str = ""
+@byzerllm.prompt()
+def config_readme() -> str:
+    """
+    # 配置项说明
+    ## auto_merge: 代码合并方式，可选值为editblock、diff、wholefile.
+    - editblock: 生成 SEARCH/REPLACE 块，然后根据 SEARCH块到对应的源码查找，如果相似度阈值大于 editblock_similarity， 那么则将
+    找到的代码块替换为 REPLACE 块。大部分情况都推荐使用 editblock。
+    - wholefile: 重新生成整个文件，然后替换原来的文件。对于重构场景，推荐使用 wholefile。
+    - diff: 生成标准 git diff 格式，适用于简单的代码修改。
+    ## editblock_similarity: editblock相似度阈值
+    - editblock相似度阈值，取值范围为0-1，默认值为0.9。如果设置的太低，虽然能合并进去，但是会引入错误。推荐不要修改该值。
+    ## generate_times_same_model: 相同模型生成次数
+    当进行生成代码时，大模型会对同一个需求生成多份代码，然后会使用 generate_rerank_model 模型对多份代码进行重排序，
+    然后选择得分最高的代码。一般次数越多，最终得到正确的代码概率越高。默认值为1，推荐设置为3。但是设置值越多，可能速度就越慢，消耗的token也越多。
+    ## skip_filter_index: 是否跳过索引过滤
+    是否跳过根据用户的query 自动查找上下文。推荐设置为 false
+    ## skip_build_index: 是否跳过索引构建
+    是否自动构建索引。推荐设置为 false。注意，如果该值设置为 true, 那么 skip_filter_index 设置不会生效。
+    ## rank_times_same_model: 相同模型重排序次数
+    默认值为1. 如果 generate_times_same_model 参数设置大于1，那么 coding 函数会自动对多份代码进行重排序。
+    rank_times_same_model 表示重拍的次数，次数越多，选择到最好的代码的可能性越高，但是也会显著增加消耗的token和时间。
+    建议保持默认，要修改也建议不要超过3。
+    ## project_type: 项目类型
+    项目类型通常为如下三种选择：
+    1. ts
+    2. py
+    3. 代码文件后缀名列表（比如.java,.py,.go,.js,.ts），多个按逗号分割
+    推荐使用 3 选项，因为项目类型通常为多种后缀名混合。
+    """
 class ConfigAutoTuner:
     def __init__(self,args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM], memory_config: MemoryConfig):
@@ -135,34 +173,7 @@ class ConfigAutoTuner:
         self.memory_config.configure(conf, skip_print)
-    @byzerllm.prompt()
-    def config_readme(self) -> str:
-        """
-        # 配置项说明
-        ## auto_merge: 代码合并方式，可选值为editblock、diff、wholefile.
-        - editblock: 生成 SEARCH/REPLACE 块，然后根据 SEARCH块到对应的源码查找，如果相似度阈值大于 editblock_similarity， 那么则将
-        找到的代码块替换为 REPLACE 块。大部分情况都推荐使用 editblock。
-        - wholefile: 重新生成整个文件，然后替换原来的文件。对于重构场景，推荐使用 wholefile。
-        - diff: 生成标准 git diff 格式，适用于简单的代码修改。
-        ## editblock_similarity: editblock相似度阈值
-        - editblock相似度阈值，取值范围为0-1，默认值为0.9。如果设置的太低，虽然能合并进去，但是会引入错误。推荐不要修改该值。
-        ## generate_times_same_model: 相同模型生成次数
-        当进行生成代码时，大模型会对同一个需求生成多份代码，然后会使用 generate_rerank_model 模型对多份代码进行重排序，
-        然后选择得分最高的代码。一般次数越多，最终得到正确的代码概率越高。默认值为1，推荐设置为3。但是设置值越多，可能速度就越慢，消耗的token也越多。
-        ## skip_filter_index: 是否跳过索引过滤
-        是否跳过根据用户的query 自动查找上下文。推荐设置为 false
-        ## skip_build_index: 是否跳过索引构建
-        是否自动构建索引。推荐设置为 false。注意，如果该值设置为 true, 那么 skip_filter_index 设置不会生效。
-        ## rank_times_same_model: 相同模型重排序次数
-        默认值为1. 如果 generate_times_same_model 参数设置大于1，那么 coding 函数会自动对多份代码进行重排序。
-        rank_times_same_model 表示重拍的次数，次数越多，选择到最好的代码的可能性越高，但是也会显著增加消耗的token和时间。
-        建议保持默认，要修改也建议不要超过3。
-        """
     def command_readme(self) -> str:
         """
@@ -212,7 +223,7 @@ class ConfigAutoTuner:
             "query": request.query,
             "current_conf": json.dumps(self.memory_config.memory["conf"], indent=2),
             "last_execution_stat": "",
-            "config_readme": self.config_readme.prompt()
+            "config_readme": config_readme.prompt()
         }
     def tune(self, request: AutoConfigRequest) -> 'AutoConfigResponse':

autocoder/common/code_modification_ranker.py CHANGED Viewed

@@ -6,11 +6,13 @@ from pydantic import BaseModel
 from autocoder.common.printer import Printer
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import traceback
-from autocoder.common.utils_code_auto_generate import chat_with_continue
+from autocoder.common.utils_code_auto_generate import chat_with_continue,stream_chat_with_continue
 from byzerllm.utils.str2model import to_model
+from autocoder.utils.auto_coder_utils.chat_stream_out import stream_out
 from autocoder.utils.llms import get_llm_names, get_model_info
 from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
 import os
+from autocoder.rag.token_counter import count_tokens
 class RankResult(BaseModel):
     rank_result: List[int]
@@ -133,6 +135,15 @@ class CodeModificationRanker:
         else:
             raise Exception(f"Invalid rank strategy: {self.args.rank_strategy}")
+        # 计算 query 的 token 数量
+        token_count = count_tokens(query)
+        # 打印 token 统计信息
+        self.printer.print_in_terminal(
+            "estimated_input_tokens_in_ranking",
+            estimated_input_tokens=token_count
+        )
         input_tokens_count = 0
         generated_tokens_count = 0
         try:
@@ -145,16 +156,25 @@ class CodeModificationRanker:
                     self.printer.print_in_terminal(
                         "ranking_start", style="blue", count=len(generate_result.contents), model_name=model_name)
-                    for _ in range(rank_times):
-                        futures.append(
-                            executor.submit(
-                                chat_with_continue,
-                                llm,
-                                [{"role": "user", "content": query}],
-                                {}
+                    for i in range(rank_times):
+                        if i == 0:
+                            futures.append(
+                                executor.submit(
+                                    stream_chat_with_continue,
+                                    llm,
+                                    [{"role": "user", "content": query}],
+                                    {}
+                                )
+                            )
+                        else:
+                            futures.append(
+                                executor.submit(
+                                    chat_with_continue,
+                                    llm,
+                                    [{"role": "user", "content": query}],
+                                    {}
+                                )
                             )
-                        )
                 # Collect all results
                 results = []
@@ -180,7 +200,31 @@ class CodeModificationRanker:
                 total_input_cost = 0.0
                 total_output_cost = 0.0
-                for future, model_name in zip(futures, model_names):
+                # 第一个future使用流式输出
+                stream_future = futures[0]
+                model_name = model_names[0]
+                stream_generator = stream_future.result()
+                full_response, last_meta = stream_out(
+                        stream_generator,
+                        model_name=model_name,
+                        title=self.printer.get_message_from_key_with_format(
+                            "rank_code_modification_title", model_name=model_name),
+                        args=self.args
+                    )
+                if last_meta:
+                    input_tokens_count += last_meta.input_tokens_count
+                    generated_tokens_count += last_meta.generated_tokens_count
+                    # 计算成本
+                    info = model_info_map.get(model_name, {})
+                    # 计算公式:token数 * 单价 / 1000000
+                    total_input_cost += (last_meta.input_tokens_count * info.get("input_cost", 0.0)) / 1000000
+                    total_output_cost += (last_meta.generated_tokens_count * info.get("output_cost", 0.0)) / 1000000
+                v = to_model(full_response,RankResult)
+                results.append(v.rank_result)
+                for future, model_name in zip(futures[1:], model_names[1:]):
                     try:
                         result = future.result()
                         input_tokens_count += result.input_tokens_count

autocoder/common/command_templates.py CHANGED Viewed

@@ -174,9 +174,8 @@ def base_base(source_dir:str,project_type:str)->str:
     source_dir: {{ source_dir }}
     target_file: {{ target_file }}
-    model: v3_chat
-    model_max_input_length: 100000
-    model_max_input_length: 120000
+    model: v3_chat
+    model_max_input_length: 60000
     enable_multi_round_generate: false
     index_filter_workers: 100
     index_build_workers: 100

auto-coder 0.1.268__py3-none-any.whl → 0.1.270__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.268py3-none-any.whl → 0.1.270py3-none-any.whl