PyPI - auto-coder - Versions diffs - 0.1.263__py3-none-any.whl → 0.1.264__py3-none-any.whl - Mend

auto-coder 0.1.263py3-none-any.whl → 0.1.264py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (24) hide show

{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/METADATA +1 -1
{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/RECORD +24 -23
autocoder/chat_auto_coder.py +53 -49
autocoder/common/__init__.py +6 -0
autocoder/common/auto_coder_lang.py +6 -2
autocoder/common/code_auto_generate_diff.py +9 -9
autocoder/common/code_auto_merge.py +23 -3
autocoder/common/code_auto_merge_diff.py +28 -3
autocoder/common/code_auto_merge_editblock.py +24 -4
autocoder/common/code_auto_merge_strict_diff.py +23 -3
autocoder/common/code_modification_ranker.py +65 -3
autocoder/common/conf_validator.py +6 -0
autocoder/common/context_pruner.py +305 -0
autocoder/index/entry.py +8 -2
autocoder/index/filter/normal_filter.py +13 -2
autocoder/index/filter/quick_filter.py +127 -13
autocoder/index/index.py +3 -2
autocoder/utils/project_structure.py +258 -3
autocoder/utils/thread_utils.py +6 -1
autocoder/version.py +1 -1
{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/LICENSE +0 -0
{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/WHEEL +0 -0
{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/top_level.txt +0 -0

autocoder/index/filter/quick_filter.py CHANGED Viewed

@@ -20,6 +20,8 @@ from byzerllm import MetaHolder
 from autocoder.utils.llms import get_llm_names, get_model_info
 from loguru import logger
+from byzerllm.utils.client.code_utils import extract_code
+import json
 def get_file_path(file_path):
@@ -32,6 +34,15 @@ class QuickFilterResult(BaseModel):
     files: Dict[str, TargetFile]
     has_error: bool
     error_message: Optional[str] = None
+    file_positions: Optional[Dict[str, int]] = {}
+    def get_sorted_file_positions(self) -> List[str]:
+        """
+        返回按 value 排序的文件列表
+        """
+        if not self.file_positions:
+            return []
+        return [file_path for file_path, _ in sorted(self.file_positions.items(), key=lambda x: x[1])]
 class QuickFilter():
@@ -82,6 +93,7 @@ class QuickFilter():
                     self.index_manager.index_filter_llm)
                 model_name = ",".join(model_names)
                 files: Dict[str, TargetFile] = {}
+                file_positions: Dict[str, int] = {}
                 # 获取模型价格信息
                 model_info_map = {}
@@ -166,7 +178,7 @@ class QuickFilter():
                     )
                 if file_number_list:
-                    for file_number in file_number_list.file_list:
+                    for index,file_number in enumerate(file_number_list.file_list):
                         if file_number < 0 or file_number >= len(chunk):
                             self.printer.print_in_terminal(
                                 "invalid_file_number",
@@ -182,9 +194,11 @@ class QuickFilter():
                             reason=self.printer.get_message_from_key(
                                 "quick_filter_reason")
                         )
+                        file_positions[file_path] = index
                 return QuickFilterResult(
                     files=files,
-                    has_error=False
+                    has_error=False,
+                    file_positions=file_positions
                 )
             except Exception as e:
@@ -212,6 +226,7 @@ class QuickFilter():
         # 合并所有结果
         final_files: Dict[str, TargetFile] = {}
+        final_file_positions: Dict[str, int] = {}
         has_error = False
         error_messages: List[str] = []
@@ -222,16 +237,57 @@ class QuickFilter():
                     error_messages.append(result.error_message)
             final_files.update(result.files)
+        for result in results:
+            if result.has_error:
+                has_error = True
+                if result.error_message:
+                    error_messages.append(result.error_message)
+            ## 实现多个 result.file_positions 交织排序
+            # 比如第一个是 {file_path_1_0: 0, file_path_1_1: 1, file_path_1_2: 2}
+            # 第二个是 {file_path_2_0: 0, file_path_2_1: 1}
+            # 第三个是 {file_path_3_0: 0, file_path_3_1: 1, file_path_3_2: 2, file_path_3_3: 3}
+            # 收集逻辑为所以 0 的为一组，然后序号为 0,1,2, 所有1 的为一组，序号是 3,4,5,依次往下推
+            # {file_path_1_0: 0, file_path_2_0: 1, file_path_3_0: 2, file_path_1_1: 3, file_path_2_1: 4, file_path_3_1: 5}
+            #
+            # 获取所有结果的最大 position 值
+            max_position = max([max(pos.values()) for pos in [result.file_positions for result in results if result.file_positions]] + [0])
+            # 创建一个映射表，用于记录每个 position 对应的文件路径
+            position_map = {}
+            for result in results:
+                if result.file_positions:
+                    for file_path, position in result.file_positions.items():
+                        if position not in position_map:
+                            position_map[position] = []
+                        position_map[position].append(file_path)
+            # 重新排序文件路径
+            new_file_positions = {}
+            current_index = 0
+            for position in range(max_position + 1):
+                if position in position_map:
+                    for file_path in position_map[position]:
+                        new_file_positions[file_path] = current_index
+                        current_index += 1
+            # 更新 final_file_positions
+            final_file_positions.update(new_file_positions)
         return QuickFilterResult(
             files=final_files,
             has_error=has_error,
             error_message="\n".join(error_messages) if error_messages else None
         )
     @byzerllm.prompt()
     def quick_filter_files(self, file_meta_list: List[IndexItem], query: str) -> str:
         '''
-        当用户提一个需求的时候，我们需要找到相关的文件，然后阅读这些文件，并且修改其中部分文件。
+        当用户提一个需求的时候，我们要找到两种类型的源码文件：
+        1. 根据需求需要被修改的文件，我们叫 edited_files
+        2. 为了能够完成修改这些文件，还需要的一些额外参考文件, 我们叫 reference_files
         现在，给定下面的索引文件：
         <index>
@@ -258,12 +314,13 @@ class QuickFilter():
         }
         ```
-        特别注意
-        1. 如果用户的query里 @文件 或者 @@符号，那么被@的文件或者@@的符号必须要返回，并且尝试通过索引文件诸如导入语句等信息找到这些文件依赖的其他文件，再分析这些文件是否需要提供才能满足后续编码。
-        2. 如果 query 里是一段历史对话，那么对话里的内容提及的文件路径必须要返回。
-        3. 想想，如果是你需要修改代码，然后满足这个需求，根据索引文件，你希望查看哪些文件，修改哪些文件，然后返回这些文件。
-        4. 如果用户需求为空，则直接返回空列表即可。
-        5. 返回的 json格式数据不允许有注释
+        特别注意:
+        1. 如果用户的query里有 @文件 或者 @@符号，那么被@的文件或者@@的符号必须要返回。
+        2. 根据需求以及根据 @文件 或者 @@符号 找到的文件，猜测需要被修改的edited_files文件，然后尝试通过索引文件诸如导入语句等信息找到这些文件依赖的其他文件得到 reference_files。
+        3. file_list 里的文件序号，按被 @ 或者 @@ 文件，edited_files文件，reference_files文件的顺序排列。注意，reference_files 你要根据需求来猜测是否需要，过滤掉不相关的，避免返回文件数过多。
+        4. 如果 query 里是一段历史对话，那么对话里的内容提及的文件路径必须要返回。
+        5. 如果用户需求为空，则直接返回空列表即可。
+        6. 返回的 json格式数据不允许有注释
         '''
         file_meta_str = "\n".join(
@@ -273,9 +330,58 @@ class QuickFilter():
             "query": query
         }
         return context
+    def _extract_code_snippets_from_overflow_files(self, validated_file_numbers: List[int],index_items: List[IndexItem], conversations: List[Dict[str, str]]):
+        token_count = 0
+        selected_files = []
+        selected_file_contents = []
+        full_file_tokens = int(self.max_tokens * 0.8)
+        for file_number in validated_file_numbers:
+            file_path = get_file_path(index_items[file_number].module_name)
+            with open(file_path, "r", encoding="utf-8") as f:
+                content = f.read()
+            tokens = count_tokens(content)
+            if token_count + tokens <= full_file_tokens:
+                selected_files.append(file_number)
+                selected_file_contents.append(content)
+                token_count += tokens
+            else:
+                # 对超出部分抽取代码片段
+                try:
+                    extracted_info = (
+                        self.extract_code_snippets_from_files.options(
+                            {"llm_config": {"max_length": 100}}
+                        )
+                        .with_llm(self.index_manager.index_filter_llm)
+                        .run(conversations, [content])
+                    )
+                    json_str = extract_code(extracted_info)[0][1]
+                    json_objs = json.loads(json_str)
+                    new_content = ""
+                    if json_objs:
+                        for json_obj in json_objs:
+                            start_line = json_obj["start_line"] - 1
+                            end_line = json_obj["end_line"]
+                            chunk = "\n".join(content.split("\n")[start_line:end_line])
+                            new_content += chunk + "\n"
+                        token_count += count_tokens(new_content)
+                        if token_count >= self.max_tokens:
+                            break
+                        else:
+                            selected_files.append(file_number)
+                            selected_file_contents.append(new_content)
+                except Exception as e:
+                    logger.error(f"Failed to extract code snippets from {file_path}: {e}")
+        return selected_files
     def filter(self, index_items: List[IndexItem], query: str) -> QuickFilterResult:
         final_files: Dict[str, TargetFile] = {}
+        final_file_positions: Dict[str, int] = {}
         start_time = time.monotonic()
         prompt_str = self.quick_filter_files.prompt(index_items, query)
@@ -385,6 +491,7 @@ class QuickFilter():
             )
         if file_number_list:
+            validated_file_numbers = []
             for file_number in file_number_list.file_list:
                 if file_number < 0 or file_number >= len(index_items):
                     self.printer.print_in_terminal(
@@ -394,14 +501,21 @@ class QuickFilter():
                         total_files=len(index_items)
                     )
                     continue
-                final_files[get_file_path(index_items[file_number].module_name)] = TargetFile(
+                validated_file_numbers.append(file_number)
+            # 将最终选中的文件加入final_files
+            for index,file_number in enumerate(validated_file_numbers):
+                file_path = get_file_path(index_items[file_number].module_name)
+                final_files[file_path] = TargetFile(
                     file_path=index_items[file_number].module_name,
-                    reason=self.printer.get_message_from_key(
-                        "quick_filter_reason")
+                    reason=self.printer.get_message_from_key("quick_filter_reason")
                 )
+                final_file_positions[file_path] = index
         end_time = time.monotonic()
         self.stats["timings"]["quick_filter"] = end_time - start_time
         return QuickFilterResult(
             files=final_files,
-            has_error=False
+            has_error=False,
+            file_positions=final_file_positions
         )

autocoder/index/index.py CHANGED Viewed

@@ -26,6 +26,7 @@ from autocoder.index.types import (
 )
 from autocoder.common.global_cancel import global_cancel
 from autocoder.utils.llms import get_llm_names
+from autocoder.rag.token_counter import count_tokens
 class IndexManager:
     def __init__(
         self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
@@ -257,13 +258,13 @@ class IndexManager:
             total_input_cost = 0.0
             total_output_cost = 0.0
-            if len(source.source_code) > self.max_input_length:
+            if count_tokens(source.source_code) > self.args.conversation_prune_safe_zone_tokens:
                 self.printer.print_in_terminal(
                     "index_file_too_large",
                     style="yellow",
                     file_path=source.module_name,
                     file_size=len(source.source_code),
-                    max_length=self.max_input_length
+                    max_length=self.args.conversation_prune_safe_zone_tokens
                 )
                 chunks = self.split_text_into_chunks(
                     source_code, self.max_input_length - 1000

autocoder/utils/project_structure.py CHANGED Viewed

@@ -1,9 +1,264 @@
+from collections import defaultdict
+import os
+import re
+from dataclasses import dataclass
+from typing import List, Pattern, Dict, Any, Set, Union
+from concurrent.futures import ThreadPoolExecutor
+import byzerllm
+from pydantic import BaseModel
+from rich.tree import Tree
+from rich.console import Console
+from loguru import logger
 from autocoder.pyproject import PyProject
 from autocoder.tsproject import TSProject
 from autocoder.suffixproject import SuffixProject
 from autocoder.common import AutoCoderArgs
-import byzerllm
-from typing import Union
+@dataclass
+class AnalysisConfig:
+    exclude_dirs: List[str] = None
+    exclude_file_patterns: List[Pattern] = None
+    exclude_extensions: List[str] = None
+    max_depth: int = -1
+    show_hidden: bool = False
+    parallel_processing: bool = True
+class ExtentionResult(BaseModel):
+    code: List[str] = []
+    config: List[str] = []
+    data: List[str] = []
+    document: List[str] = []
+    other: List[str] = []
+class EnhancedFileAnalyzer:
+    DEFAULT_EXCLUDE_DIRS = [".git", "node_modules", "__pycache__", "venv"]
+    DEFAULT_EXCLUDE_EXTS = [".log", ".tmp", ".bak", ".swp"]
+    def __init__(self, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM], config: AnalysisConfig = None,):
+        self.directory = os.path.abspath(args.source_dir)
+        self.config = config or self.default_config()
+        self.llm = llm
+        self.console = Console()
+        self.file_filter = EnhancedFileFilter(self.config)
+    @classmethod
+    def default_config(cls) -> AnalysisConfig:
+        return AnalysisConfig(
+            exclude_dirs=cls.DEFAULT_EXCLUDE_DIRS,
+            exclude_file_patterns=[re.compile(r'~$')],  # 默认排除临时文件
+            exclude_extensions=cls.DEFAULT_EXCLUDE_EXTS
+        )
+    def analyze(self) -> Dict[str, Any]:
+        """执行完整分析流程"""
+        return {
+            "structure": self.get_tree_structure(),
+            "extensions": self.analyze_extensions(),
+            "stats": self.get_directory_stats()
+        }
+    def get_tree_structure(self) -> Dict:
+        """获取优化的树形结构"""
+        tree = {}
+        if self.config.parallel_processing:
+            return self._parallel_tree_build()
+        return self._sequential_tree_build()
+    def _sequential_tree_build(self) -> Dict:
+        """单线程构建目录树"""
+        tree = {}
+        for root, dirs, files in os.walk(self.directory):
+            dirs[:] = [d for d in dirs if not self.file_filter.should_ignore(d, True)]
+            relative_path = os.path.relpath(root, self.directory)
+            current = tree
+            for part in relative_path.split(os.sep):
+                current = current.setdefault(part, {})
+            current.update({f: None for f in files if not self.file_filter.should_ignore(f, False)})
+        return tree
+    def _parallel_tree_build(self) -> Dict:
+        """并行构建目录树"""
+        from concurrent.futures import ThreadPoolExecutor, as_completed
+        import threading
+        tree = {}
+        tree_lock = threading.Lock()
+        def process_directory(root: str, dirs: List[str], files: List[str]) -> Dict:
+            local_tree = {}
+            relative_path = os.path.relpath(root, self.directory)
+            current = local_tree
+            for part in relative_path.split(os.sep):
+                current = current.setdefault(part, {})
+            current.update({f: None for f in files if not self.file_filter.should_ignore(f, False)})
+            return local_tree
+        with ThreadPoolExecutor() as executor:
+            futures = []
+            for root, dirs, files in os.walk(self.directory):
+                dirs[:] = [d for d in dirs if not self.file_filter.should_ignore(d, True)]
+                futures.append(executor.submit(process_directory, root, dirs, files))
+            for future in as_completed(futures):
+                try:
+                    local_tree = future.result()
+                    with tree_lock:
+                        self._merge_trees(tree, local_tree)
+                except Exception as e:
+                    logger.error(f"Error processing directory: {e}")
+        return tree
+    def _merge_trees(self, base_tree: Dict, new_tree: Dict) -> None:
+        """递归合并两个目录树"""
+        for key, value in new_tree.items():
+            if key in base_tree:
+                if isinstance(value, dict) and isinstance(base_tree[key], dict):
+                    self._merge_trees(base_tree[key], value)
+            else:
+                base_tree[key] = value
+    def analyze_extensions(self) -> Dict:
+        """增强版后缀分析"""
+        from collections import defaultdict
+        extensions = self._collect_extensions()
+        if self.llm:
+            return self._llm_enhanced_analysis.with_llm(self.llm).run(extensions)
+        return self._basic_analysis(extensions)
+    def _collect_extensions(self) -> Set[str]:
+        """带过滤的文件后缀收集"""
+        extensions = set()
+        for root, dirs, files in os.walk(self.directory):
+            dirs[:] = [d for d in dirs if not self.file_filter.should_ignore(d, True)]
+            for file in files:
+                if self.file_filter.should_ignore(file, False):
+                    continue
+                ext = os.path.splitext(file)[1].lower()
+                if ext:  # 排除无后缀文件
+                    extensions.add(ext)
+        return extensions
+    @byzerllm.prompt()
+    def _llm_enhanced_analysis(self, extensions: List[str]) -> Dict:
+        """LLM增强分析"""
+        '''
+        请根据以下文件后缀列表，按照以下规则进行分类：
+        1. 代码文件：包含可编译代码、有语法结构的文件
+        2. 配置文件：包含参数设置、环境配置的文件
+        3. 数据文件：包含结构化或非结构化数据的文件
+        4. 文档文件：包含文档、说明、笔记的文件
+        5. 其他文件：无法明确分类的文件
+        文件后缀列表：
+        {{ extensions | join(', ') }}
+        请返回如下JSON格式：
+        {
+            "code": ["后缀1", "后缀2"],
+            "config": ["后缀3", "后缀4"],
+            "data": ["后缀5", "后缀6"],
+            "document": ["后缀7", "后缀8"],
+            "other": ["后缀9", "后缀10"]
+        }
+        '''
+        return {
+            "extensions": extensions
+        }
+    def _basic_analysis(self, extensions: Set[str]) -> Dict:
+        """基于规则的基础分析"""
+        CODE_EXTS = {'.py', '.js', '.ts', '.java', '.c', '.cpp'}
+        CONFIG_EXTS = {'.yml', '.yaml', '.json', '.toml', '.ini'}
+        return {
+            "code": [ext for ext in extensions if ext in CODE_EXTS],
+            "config": [ext for ext in extensions if ext in CONFIG_EXTS],
+            "unknown": [ext for ext in extensions if ext not in CODE_EXTS | CONFIG_EXTS]
+        }
+    def get_directory_stats(self) -> Dict:
+        """获取目录统计信息"""
+        stats = {
+            'total_files': 0,
+            'total_dirs': 0,
+            'by_extension': defaultdict(int),
+            'file_types': {
+                'code': 0,
+                'config': 0,
+                'data': 0,
+                'document': 0,
+                'other': 0
+            }
+        }
+        for root, dirs, files in os.walk(self.directory):
+            dirs[:] = [d for d in dirs if not self.file_filter.should_ignore(d, True)]
+            stats['total_dirs'] += len(dirs)
+            for file in files:
+                if self.file_filter.should_ignore(file, False):
+                    continue
+                stats['total_files'] += 1
+                ext = os.path.splitext(file)[1].lower()
+                stats['by_extension'][ext] += 1
+                # 根据扩展名分类
+                if ext in ['.py', '.js', '.ts', '.java', '.c', '.cpp']:
+                    stats['file_types']['code'] += 1
+                elif ext in ['.yml', '.yaml', '.json', '.toml', '.ini']:
+                    stats['file_types']['config'] += 1
+                else:
+                    stats['file_types']['other'] += 1
+        return stats
+    def interactive_display(self):
+        """交互式可视化展示"""
+        tree = self.build_interactive_tree(self.directory, self.config)
+        self.console.print(tree)
+        self.console.print("\n[bold]Statistical Summary:[/]")
+        stats = self.get_directory_stats()
+        from rich.table import Table
+        table = Table(title="Directory Statistics", show_header=True, header_style="bold magenta")
+        table.add_column("Metric", style="cyan")
+        table.add_column("Value", style="green")
+        table.add_row("Total Files", str(stats['total_files']))
+        table.add_row("Total Directories", str(stats['total_dirs']))
+        table.add_row("Code Files", str(stats['file_types']['code']))
+        table.add_row("Config Files", str(stats['file_types']['config']))
+        self.console.print(table)
+class EnhancedFileFilter:
+    """增强版文件过滤器"""
+    def __init__(self, config: AnalysisConfig):
+        self.config = config
+    def should_ignore(self, path: str, is_dir: bool) -> bool:
+        """综合判断是否应忽略路径"""
+        base_name = os.path.basename(path)
+        # 隐藏文件处理
+        if not self.config.show_hidden and base_name.startswith('.'):
+            return True
+        # 目录排除
+        if is_dir and base_name in self.config.exclude_dirs:
+            return True
+        # 文件扩展名排除
+        if not is_dir:
+            ext = os.path.splitext(path)[1].lower()
+            if ext in self.config.exclude_extensions:
+                return True
+        # 正则匹配排除
+        full_path = os.path.abspath(path)
+        for pattern in self.config.exclude_file_patterns:
+            if pattern.search(full_path):
+                return True
+        return False
 def get_project_structure(args:AutoCoderArgs, llm:Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM]):
     if args.project_type == "ts":
@@ -12,4 +267,4 @@ def get_project_structure(args:AutoCoderArgs, llm:Union[byzerllm.ByzerLLM, byzer
         pp = PyProject(args=args, llm=llm)
     else:
         pp = SuffixProject(args=args, llm=llm, file_filter=None)
-    return pp.get_tree_like_directory_structure()
+    return pp.get_tree_like_directory_structure()

autocoder/utils/thread_utils.py CHANGED Viewed

@@ -176,7 +176,12 @@ def run_in_raw_thread():
             exception = []
             def worker():
                 try:
-                    # global_cancel.reset()
+                    # 如果刚开始就遇到了,可能是用户中断的还没有释放
+                    # 等待五秒后强行释放
+                    if global_cancel.requested:
+                        time.sleep(5)
+                        global_cancel.reset()
                     ret = func(*args, **kwargs)
                     result.append(ret)
                     global_cancel.reset()

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~263~~"
1	+ __version__ = "0.1.264"

{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.263.dist-info → auto_coder-0.1.264.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.263__py3-none-any.whl → 0.1.264__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.263py3-none-any.whl → 0.1.264py3-none-any.whl