PyPI - auto-coder - Versions diffs - 0.1.232__tar.gz → 0.1.233__tar.gz - Mend

auto-coder 0.1.232tar.gz → 0.1.233tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (156) hide show

{auto_coder-0.1.232 → auto_coder-0.1.233}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.232
+Version: 0.1.233
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/auto_coder.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.232
+Version: 0.1.233
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/auto_coder.egg-info/SOURCES.txt RENAMED Viewed

@@ -84,10 +84,14 @@ src/autocoder/dispacher/actions/plugins/__init__.py
 src/autocoder/dispacher/actions/plugins/action_regex_project.py
 src/autocoder/dispacher/actions/plugins/action_translate.py
 src/autocoder/index/__init__.py
+src/autocoder/index/entry.py
 src/autocoder/index/for_command.py
 src/autocoder/index/index.py
 src/autocoder/index/symbols_utils.py
 src/autocoder/index/types.py
+src/autocoder/index/filter/__init__.py
+src/autocoder/index/filter/normal_filter.py
+src/autocoder/index/filter/quick_filter.py
 src/autocoder/pyproject/__init__.py
 src/autocoder/rag/__init__.py
 src/autocoder/rag/api_server.py

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/autocoder/auto_coder.py RENAMED Viewed

@@ -42,6 +42,7 @@ from rich.live import Live
 from autocoder.auto_coder_lang import get_message
 from autocoder.common.memory_manager import save_to_memory_file
 from autocoder import models as models_module
+from autocoder.common.utils_code_auto_generate import stream_chat_with_continue
 from autocoder.utils.auto_coder_utils.chat_stream_out import stream_out
 console = Console()
@@ -443,7 +444,24 @@ def main(input_args: Optional[List[str]] = None):
                         "saas.is_reasoning": model_info["is_reasoning"]
                     }
                 )
-                llm.setup_sub_client("inference_model", inference_model)
+                llm.setup_sub_client("inference_model", inference_model)
+            if args.index_filter_model:
+                model_info = models_module.get_model_by_name(args.index_filter_model)
+                model_name = args.index_filter_model
+                index_filter_model = byzerllm.SimpleByzerLLM(default_model_name=model_name)
+                index_filter_model.deploy(
+                    model_path="",
+                    pretrained_model_type=model_info["model_type"],
+                    udf_name=model_name,
+                    infer_params={
+                        "saas.base_url": model_info["base_url"],
+                        "saas.api_key": model_info["api_key"],
+                        "saas.model": model_info["model_name"],
+                        "saas.is_reasoning": model_info["is_reasoning"]
+                    }
+                )
+                llm.setup_sub_client("index_filter_model", index_filter_model)
         if args.product_mode == "pro":
@@ -482,7 +500,12 @@ def main(input_args: Optional[List[str]] = None):
             if args.inference_model:
                 inference_model = byzerllm.ByzerLLM()
                 inference_model.setup_default_model_name(args.inference_model)
-                llm.setup_sub_client("inference_model", inference_model)
+                llm.setup_sub_client("inference_model", inference_model)
+            if args.index_filter_model:
+                index_filter_model = byzerllm.ByzerLLM()
+                index_filter_model.setup_default_model_name(args.index_filter_model)
+                llm.setup_sub_client("index_filter_model", index_filter_model)
         if args.human_as_model:
@@ -1106,7 +1129,8 @@ def main(input_args: Optional[List[str]] = None):
                         {"role": "assistant", "content": "read"})
                     source_count += 1
-            from autocoder.index.index import IndexManager, build_index_and_filter_files
+            from autocoder.index.index import IndexManager
+            from autocoder.index.entry import build_index_and_filter_files
             from autocoder.pyproject import PyProject
             from autocoder.tsproject import TSProject
             from autocoder.suffixproject import SuffixProject
@@ -1272,8 +1296,7 @@ def main(input_args: Optional[List[str]] = None):
                     )
                 )
                 v = [[response.result,None]]
-            else:
-                from autocoder.common.utils_code_auto_generate import stream_chat_with_continue
+            else:
                 v = stream_chat_with_continue(
                     llm=chat_llm,
                     conversations=loaded_conversations,

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/autocoder/chat_auto_coder.py RENAMED Viewed

@@ -52,6 +52,7 @@ from autocoder.common.memory_manager import get_global_memory_file_paths
 from autocoder import models
 import shlex
 from autocoder.utils.llms import get_single_llm
+import pkg_resources
 class SymbolItem(BaseModel):
     symbol_name: str
@@ -2478,6 +2479,17 @@ def lib_command(args: List[str]):
 def main():
+    from autocoder.rag.variable_holder import VariableHolder
+    from tokenizers import Tokenizer
+    try:
+        tokenizer_path = pkg_resources.resource_filename(
+            "autocoder", "data/tokenizer.json"
+        )
+        VariableHolder.TOKENIZER_PATH = tokenizer_path
+        VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(tokenizer_path)
+    except FileNotFoundError:
+        tokenizer_path = None
     ARGS = parse_arguments()
     if ARGS.lite:

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/autocoder/common/__init__.py RENAMED Viewed

@@ -259,6 +259,7 @@ class AutoCoderArgs(pydantic.BaseModel):
     skip_filter_index: Optional[bool] = False
     index_model: Optional[str] = ""
+    index_filter_model: Optional[str] = ""
     index_model_max_length: Optional[int] = 0
     index_model_max_input_length: Optional[int] = 0
     index_model_anti_quota_limit: Optional[int] = 0

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/autocoder/dispacher/actions/action.py RENAMED Viewed

@@ -9,7 +9,7 @@ from autocoder.common.buildin_tokenizer import BuildinTokenizer
 from autocoder.pyproject import PyProject, Level1PyProject
 from autocoder.tsproject import TSProject
 from autocoder.suffixproject import SuffixProject
-from autocoder.index.index import build_index_and_filter_files
+from autocoder.index.entry import build_index_and_filter_files
 from autocoder.common.code_auto_merge import CodeAutoMerge
 from autocoder.common.code_auto_merge_diff import CodeAutoMergeDiff
 from autocoder.common.code_auto_merge_strict_diff import CodeAutoMergeStrictDiff

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/autocoder/dispacher/actions/copilot.py RENAMED Viewed

@@ -9,7 +9,7 @@ from autocoder.common.JupyterClient import JupyterNotebook
 from autocoder.common.ShellClient import ShellClient
 from autocoder.suffixproject import SuffixProject
 from autocoder.common.search import Search, SearchEngine
-from autocoder.index.index import build_index_and_filter_files
+from autocoder.index.entry import build_index_and_filter_files
 from autocoder.common.image_to_page import ImageToPage, ImageToPageDirectly
 from typing import Optional, Dict, Any, List
 import byzerllm

{auto_coder-0.1.232 → auto_coder-0.1.233}/src/autocoder/dispacher/actions/plugins/action_regex_project.py RENAMED Viewed

@@ -9,7 +9,7 @@ from autocoder.common.code_auto_generate import CodeAutoGenerate
 from autocoder.common.code_auto_generate_diff import CodeAutoGenerateDiff
 from autocoder.common.code_auto_generate_strict_diff import CodeAutoGenerateStrictDiff
 from autocoder.common.code_auto_generate_editblock import CodeAutoGenerateEditBlock
-from autocoder.index.index import build_index_and_filter_files
+from autocoder.index.entry import build_index_and_filter_files
 from autocoder.regexproject import RegexProject
 from autocoder.utils.conversation_store import store_code_model_conversation
 from loguru import logger

auto_coder-0.1.233/src/autocoder/index/entry.py ADDED Viewed

@@ -0,0 +1,286 @@
+import os
+import json
+import time
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from autocoder.common import SourceCode, AutoCoderArgs
+from rich.console import Console
+from rich.table import Table
+from rich.panel import Panel
+from loguru import logger
+from autocoder.utils.queue_communicate import (
+    queue_communicate,
+    CommunicateEvent,
+    CommunicateEventType,
+)
+from autocoder.index.types import (
+    TargetFile
+)
+from autocoder.index.filter.quick_filter import QuickFilter
+from autocoder.index.filter.normal_filter import NormalFilter
+from autocoder.index.index import IndexManager
+def build_index_and_filter_files(
+    llm, args: AutoCoderArgs, sources: List[SourceCode]
+) -> str:
+    # Initialize timing and statistics
+    total_start_time = time.monotonic()
+    stats = {
+        "total_files": len(sources),
+        "indexed_files": 0,
+        "level1_filtered": 0,
+        "level2_filtered": 0,
+        "verified_files": 0,
+        "final_files": 0,
+        "timings": {
+            "process_tagged_sources": 0.0,
+            "build_index": 0.0,
+            "quick_filter": 0.0,
+            "normal_filter": {
+                "level1_filter": 0.0,
+                "level2_filter": 0.0,
+                "relevance_verification": 0.0,
+            },
+            "file_selection": 0.0,
+            "prepare_output": 0.0,
+            "total": 0.0
+        }
+    }
+    def get_file_path(file_path):
+        if file_path.startswith("##"):
+            return file_path.strip()[2:]
+        return file_path
+    final_files: Dict[str, TargetFile] = {}
+    # Phase 1: Process REST/RAG/Search sources
+    logger.info("Phase 1: Processing REST/RAG/Search sources...")
+    phase_start = time.monotonic()
+    for source in sources:
+        if source.tag in ["REST", "RAG", "SEARCH"]:
+            final_files[get_file_path(source.module_name)] = TargetFile(
+                file_path=source.module_name, reason="Rest/Rag/Search"
+            )
+    phase_end = time.monotonic()
+    stats["timings"]["process_tagged_sources"] = phase_end - phase_start
+    if not args.skip_build_index and llm:
+        # Phase 2: Build index
+        if args.request_id and not args.skip_events:
+            queue_communicate.send_event(
+                request_id=args.request_id,
+                event=CommunicateEvent(
+                    event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
+                    data=json.dumps({"total_files": len(sources)})
+                )
+            )
+        logger.info("Phase 2: Building index for all files...")
+        phase_start = time.monotonic()
+        index_manager = IndexManager(llm=llm, sources=sources, args=args)
+        index_data = index_manager.build_index()
+        stats["indexed_files"] = len(index_data) if index_data else 0
+        phase_end = time.monotonic()
+        stats["timings"]["build_index"] = phase_end - phase_start
+        if args.request_id and not args.skip_events:
+            queue_communicate.send_event(
+                request_id=args.request_id,
+                event=CommunicateEvent(
+                    event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
+                    data=json.dumps({
+                        "indexed_files": stats["indexed_files"],
+                        "build_index_time": stats["timings"]["build_index"],
+                    })
+                )
+            )
+        quick_filter = QuickFilter(index_manager,stats,sources)
+        final_files = quick_filter.filter(index_manager.read_index(),args.query)
+        if not final_files:
+            normal_filter = NormalFilter(index_manager,stats,sources)
+            final_files = normal_filter.filter(index_manager.read_index(),args.query)
+    def display_table_and_get_selections(data):
+        from prompt_toolkit.shortcuts import checkboxlist_dialog
+        from prompt_toolkit.styles import Style
+        choices = [(file, f"{file} - {reason}") for file, reason in data]
+        selected_files = [file for file, _ in choices]
+        style = Style.from_dict(
+            {
+                "dialog": "bg:#88ff88",
+                "dialog frame.label": "bg:#ffffff #000000",
+                "dialog.body": "bg:#88ff88 #000000",
+                "dialog shadow": "bg:#00aa00",
+            }
+        )
+        result = checkboxlist_dialog(
+            title="Target Files",
+            text="Tab to switch between buttons, and Space/Enter to select/deselect.",
+            values=choices,
+            style=style,
+            default_values=selected_files,
+        ).run()
+        return [file for file in result] if result else []
+    def print_selected(data):
+        console = Console()
+        table = Table(
+            title="Files Used as Context",
+            show_header=True,
+            header_style="bold magenta",
+        )
+        table.add_column("File Path", style="cyan", no_wrap=True)
+        table.add_column("Reason", style="green")
+        for file, reason in data:
+            table.add_row(file, reason)
+        panel = Panel(
+            table,
+            expand=False,
+            border_style="bold blue",
+            padding=(1, 1),
+        )
+        console.print(panel)
+    # Phase 6: File selection and limitation
+    logger.info("Phase 6: Processing file selection and limits...")
+    phase_start = time.monotonic()
+    if args.index_filter_file_num > 0:
+        logger.info(
+            f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
+    if args.skip_confirm:
+        final_filenames = [file.file_path for file in final_files.values()]
+        if args.index_filter_file_num > 0:
+            final_filenames = final_filenames[: args.index_filter_file_num]
+    else:
+        target_files_data = [
+            (file.file_path, file.reason) for file in final_files.values()
+        ]
+        if not target_files_data:
+            logger.warning(
+                "No target files found, you may need to rewrite the query and try again."
+            )
+            final_filenames = []
+        else:
+            final_filenames = display_table_and_get_selections(
+                target_files_data)
+        if args.index_filter_file_num > 0:
+            final_filenames = final_filenames[: args.index_filter_file_num]
+    phase_end = time.monotonic()
+    stats["timings"]["file_selection"] = phase_end - phase_start
+    # Phase 7: Display results and prepare output
+    logger.info("Phase 7: Preparing final output...")
+    phase_start = time.monotonic()
+    try:
+        print_selected(
+            [
+                (file.file_path, file.reason)
+                for file in final_files.values()
+                if file.file_path in final_filenames
+            ]
+        )
+    except Exception as e:
+        logger.warning(
+            "Failed to display selected files in terminal mode. Falling back to simple print."
+        )
+        print("Target Files Selected:")
+        for file in final_filenames:
+            print(f"{file} - {final_files[file].reason}")
+    source_code = ""
+    depulicated_sources = set()
+    for file in sources:
+        if file.module_name in final_filenames:
+            if file.module_name in depulicated_sources:
+                continue
+            depulicated_sources.add(file.module_name)
+            source_code += f"##File: {file.module_name}\n"
+            source_code += f"{file.source_code}\n\n"
+    if args.request_id and not args.skip_events:
+        queue_communicate.send_event(
+            request_id=args.request_id,
+            event=CommunicateEvent(
+                event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
+                data=json.dumps([
+                    (file.file_path, file.reason)
+                    for file in final_files.values()
+                    if file.file_path in depulicated_sources
+                ])
+            )
+        )
+    stats["final_files"] = len(depulicated_sources)
+    phase_end = time.monotonic()
+    stats["timings"]["prepare_output"] = phase_end - phase_start
+    # Calculate total time and print summary
+    total_end_time = time.monotonic()
+    total_time = total_end_time - total_start_time
+    stats["timings"]["total"] = total_time
+    # Calculate total filter time
+    total_filter_time = (
+        stats["timings"]["quick_filter"] +
+        stats["timings"]["normal_filter"]["level1_filter"] +
+        stats["timings"]["normal_filter"]["level2_filter"] +
+        stats["timings"]["normal_filter"]["relevance_verification"]
+    )
+    # Print final statistics in a more structured way
+    summary = f"""
+=== Indexing and Filtering Summary ===
+• Total files scanned: {stats['total_files']}
+• Files indexed: {stats['indexed_files']}
+• Files filtered:
+  - Level 1 (query-based): {stats['level1_filtered']}
+  - Level 2 (related files): {stats['level2_filtered']}
+  - Relevance verified: {stats.get('verified_files', 0)}
+• Final files selected: {stats['final_files']}
+=== Time Breakdown ===
+• Index build: {stats['timings'].get('build_index', 0):.2f}s
+• Quick filter: {stats['timings'].get('quick_filter', 0):.2f}s
+• Normal filter:
+    - Level 1 filter: {stats['timings']["normal_filter"].get('level1_filter', 0):.2f}s
+    - Level 2 filter: {stats['timings']["normal_filter"].get('level2_filter', 0):.2f}s
+    - Relevance check: {stats['timings']["normal_filter"].get('relevance_verification', 0):.2f}s
+• File selection: {stats['timings'].get('file_selection', 0):.2f}s
+• Total time: {total_time:.2f}s
+====================================
+"""
+    logger.info(summary)
+    if args.request_id and not args.skip_events:
+        queue_communicate.send_event(
+            request_id=args.request_id,
+            event=CommunicateEvent(
+                event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
+                data=json.dumps({
+                    "filtered_files": stats["final_files"],
+                    "filter_time": total_filter_time
+                })
+            )
+        )
+    return source_code

auto_coder-0.1.233/src/autocoder/index/filter/normal_filter.py ADDED Viewed

@@ -0,0 +1,168 @@
+from typing import List, Union,Dict,Any
+from autocoder.index.types import IndexItem
+from autocoder.common import SourceCode, AutoCoderArgs
+import byzerllm
+import time
+from autocoder.index.index import IndexManager
+from autocoder.index.types import (
+    IndexItem,
+    TargetFile,
+    VerifyFileRelevance,
+    FileList,
+    FileNumberList
+)
+from loguru import logger
+from autocoder.utils.queue_communicate import (
+    queue_communicate,
+    CommunicateEvent,
+    CommunicateEventType,
+)
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import json
+def get_file_path(file_path):
+    if file_path.startswith("##"):
+        return file_path.strip()[2:]
+    return file_path
+class NormalFilter():
+    def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
+        self.index_manager = index_manager
+        self.args = index_manager.args
+        self.stats = stats
+        self.sources = sources
+    def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
+        final_files: Dict[str, TargetFile] = {}
+        if not self.args.skip_filter_index:
+            if self.args.request_id and not self.args.skip_events:
+                queue_communicate.send_event(
+                    request_id=self.args.request_id,
+                    event=CommunicateEvent(
+                        event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
+                        data=json.dumps({})
+                    )
+                )
+            # Phase 3: Level 1 filtering - Query-based
+            logger.info(
+                "Phase 3: Performing Level 1 filtering (query-based)...")
+            phase_start = time.monotonic()
+            target_files = self.index_manager.get_target_files_by_query(self.args.query)
+            if target_files:
+                for file in target_files.file_list:
+                    file_path = file.file_path.strip()
+                    final_files[get_file_path(file_path)] = file
+                self.stats["level1_filtered"] = len(target_files.file_list)
+            phase_end = time.monotonic()
+            self.stats["timings"]["normal_filter"]["level1_filter"] = phase_end - phase_start
+            # Phase 4: Level 2 filtering - Related files
+            if target_files is not None and self.args.index_filter_level >= 2:
+                logger.info(
+                    "Phase 4: Performing Level 2 filtering (related files)...")
+                if self.args.request_id and not self.args.skip_events:
+                    queue_communicate.send_event(
+                        request_id=self.args.request_id,
+                        event=CommunicateEvent(
+                            event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
+                            data=json.dumps({})
+                        )
+                    )
+                phase_start = time.monotonic()
+                related_files = self.index_manager.get_related_files(
+                    [file.file_path for file in target_files.file_list]
+                )
+                if related_files is not None:
+                    for file in related_files.file_list:
+                        file_path = file.file_path.strip()
+                        final_files[get_file_path(file_path)] = file
+                    self.stats["level2_filtered"] = len(related_files.file_list)
+                phase_end = time.monotonic()
+                self.stats["timings"]["normal_filter"]["level2_filter"] = phase_end - phase_start
+            if not final_files:
+                logger.warning("No related files found, using all files")
+                for source in self.sources:
+                    final_files[get_file_path(source.module_name)] = TargetFile(
+                        file_path=source.module_name,
+                        reason="No related files found, use all files",
+                    )
+            # Phase 5: Relevance verification
+            logger.info("Phase 5: Performing relevance verification...")
+            if self.args.index_filter_enable_relevance_verification:
+                phase_start = time.monotonic()
+                verified_files = {}
+                temp_files = list(final_files.values())
+                verification_results = []
+                def print_verification_results(results):
+                    from rich.table import Table
+                    from rich.console import Console
+                    console = Console()
+                    table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
+                    table.add_column("File Path", style="cyan", no_wrap=True)
+                    table.add_column("Score", justify="right", style="green")
+                    table.add_column("Status", style="yellow")
+                    table.add_column("Reason/Error")
+                    for file_path, score, status, reason in results:
+                        table.add_row(
+                            file_path,
+                            str(score) if score is not None else "N/A",
+                            status,
+                            reason
+                        )
+                    console.print(table)
+                def verify_single_file(file: TargetFile):
+                    for source in self.sources:
+                        if source.module_name == file.file_path:
+                            file_content = source.source_code
+                            try:
+                                result = self.index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
+                                    file_content=file_content,
+                                    query=self.args.query
+                                )
+                                if result.relevant_score >= self.args.verify_file_relevance_score:
+                                    verified_files[file.file_path] = TargetFile(
+                                        file_path=file.file_path,
+                                        reason=f"Score:{result.relevant_score}, {result.reason}"
+                                    )
+                                    return file.file_path, result.relevant_score, "PASS", result.reason
+                                else:
+                                    return file.file_path, result.relevant_score, "FAIL", result.reason
+                            except Exception as e:
+                                error_msg = str(e)
+                                verified_files[file.file_path] = TargetFile(
+                                    file_path=file.file_path,
+                                    reason=f"Verification failed: {error_msg}"
+                                )
+                                return file.file_path, None, "ERROR", error_msg
+                    return None
+                with ThreadPoolExecutor(max_workers=self.args.index_filter_workers) as executor:
+                    futures = [executor.submit(verify_single_file, file)
+                            for file in temp_files]
+                    for future in as_completed(futures):
+                        result = future.result()
+                        if result:
+                            verification_results.append(result)
+                            time.sleep(self.args.anti_quota_limit)
+                # Print verification results in a table
+                print_verification_results(verification_results)
+                self.stats["verified_files"] = len(verified_files)
+                phase_end = time.monotonic()
+                self.stats["timings"]["normal_filter"]["relevance_verification"] = phase_end - phase_start
+                # Keep all files, not just verified ones
+                final_files = verified_files
+        return final_files

auto-coder 0.1.232__tar.gz → 0.1.233__tar.gz

Potentially problematic release.

auto-coder 0.1.232tar.gz → 0.1.233tar.gz