PyPI - auto-coder - Versions diffs - 0.1.232__py3-none-any.whl → 0.1.235__py3-none-any.whl - Mend

auto-coder 0.1.232py3-none-any.whl → 0.1.235py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (21) hide show

{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/METADATA +1 -2
{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/RECORD +21 -15
autocoder/auto_coder.py +92 -37
autocoder/chat_auto_coder.py +41 -1
autocoder/common/__init__.py +4 -0
autocoder/common/auto_coder_lang.py +90 -0
autocoder/common/printer.py +36 -0
autocoder/dispacher/actions/action.py +18 -2
autocoder/dispacher/actions/copilot.py +1 -1
autocoder/dispacher/actions/plugins/action_regex_project.py +6 -1
autocoder/index/entry.py +316 -0
autocoder/index/filter/__init__.py +0 -0
autocoder/index/filter/normal_filter.py +168 -0
autocoder/index/filter/quick_filter.py +99 -0
autocoder/index/index.py +61 -410
autocoder/index/types.py +4 -1
autocoder/version.py +1 -1
{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/LICENSE +0 -0
{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/WHEEL +0 -0
{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/top_level.txt +0 -0

autocoder/index/index.py CHANGED Viewed

@@ -6,37 +6,23 @@ from datetime import datetime
 from autocoder.common import SourceCode, AutoCoderArgs
 from autocoder.index.symbols_utils import (
     extract_symbols,
-    SymbolsInfo,
     SymbolType,
     symbols_info_to_str,
 )
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import threading
-import pydantic
 import byzerllm
 import hashlib
-import textwrap
-import tabulate
-from rich.console import Console
-from rich.table import Table
-from rich.panel import Panel
-from rich.text import Text
-from loguru import logger
-from autocoder.utils.queue_communicate import (
-    queue_communicate,
-    CommunicateEvent,
-    CommunicateEventType,
-)
+from autocoder.common.printer import Printer
+from autocoder.common.auto_coder_lang import get_message
 from autocoder.index.types import (
     IndexItem,
     TargetFile,
-    VerifyFileRelevance,
     FileList,
 )
 class IndexManager:
     def __init__(
         self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
@@ -53,16 +39,23 @@ class IndexManager:
         else:
             self.index_llm = llm
+        if llm and (s := llm.get_sub_client("index_filter_model")):
+            self.index_filter_llm = s
+        else:
+            self.index_filter_llm = llm
         self.llm = llm
         self.args = args
         self.max_input_length = (
             args.index_model_max_input_length or args.model_max_input_length
         )
+        self.printer = Printer()
         # 如果索引目录不存在,则创建它
         if not os.path.exists(self.index_dir):
             os.makedirs(self.index_dir)
     @byzerllm.prompt()
     def verify_file_relevance(self, file_content: str, query: str) -> str:
         """
@@ -215,8 +208,12 @@ class IndexManager:
             start_time = time.monotonic()
             source_code = source.source_code
             if len(source.source_code) > self.max_input_length:
-                logger.warning(
-                    f"Warning[Build Index]: The length of source code({source.module_name}) is too long ({len(source.source_code)}) > model_max_input_length({self.max_input_length}), splitting into chunks..."
+                self.printer.print_in_terminal(
+                    "index_file_too_large",
+                    style="yellow",
+                    file_path=source.module_name,
+                    file_size=len(source.source_code),
+                    max_length=self.max_input_length
                 )
                 chunks = self.split_text_into_chunks(
                     source_code, self.max_input_length - 1000
@@ -233,12 +230,23 @@ class IndexManager:
                     self.index_llm).run(source.module_name, source_code)
                 time.sleep(self.anti_quota_limit)
-            logger.info(
-                f"Parse and update index for {file_path} md5: {md5} took {time.monotonic() - start_time:.2f}s"
+            self.printer.print_in_terminal(
+                "index_update_success",
+                style="green",
+                file_path=file_path,
+                md5=md5,
+                duration=time.monotonic() - start_time
             )
         except Exception as e:
-            logger.warning(f"Error: {e}")
+            # import traceback
+            # traceback.print_exc()
+            self.printer.print_in_terminal(
+                "index_build_error",
+                style="red",
+                file_path=file_path,
+                error=str(e)
+            )
             return None
         return {
@@ -264,8 +272,11 @@ class IndexManager:
         for item in index_data.keys():
             if not item.startswith(self.source_dir):
-                logger.warning(
-                    error_message(source_dir=self.source_dir, file_path=item)
+                self.printer.print_in_terminal(
+                    "index_source_dir_mismatch",
+                    style="yellow",
+                    source_dir=self.source_dir,
+                    file_path=item
                 )
                 break
@@ -300,8 +311,12 @@ class IndexManager:
             counter = 0
             num_files = len(wait_to_build_files)
             total_files = len(self.sources)
-            logger.info(
-                f"Total Files: {total_files}, Need to Build Index: {num_files}")
+            self.printer.print_in_terminal(
+                "index_build_summary",
+                style="bold blue",
+                total_files=total_files,
+                num_files=num_files
+            )
             futures = [
                 executor.submit(self.build_index_for_single_source, source)
@@ -311,7 +326,12 @@ class IndexManager:
                 result = future.result()
                 if result is not None:
                     counter += 1
-                    logger.info(f"Building Index:{counter}/{num_files}...")
+                    self.printer.print_in_terminal(
+                        "building_index_progress",
+                        style="blue",
+                        counter=counter,
+                        num_files=num_files
+                    )
                     module_name = result["module_name"]
                     index_data[module_name] = result
                     updated_sources.append(module_name)
@@ -413,8 +433,10 @@ class IndexManager:
                 with lock:
                     all_results.extend(result.file_list)
             else:
-                logger.warning(
-                    f"Fail to find related files for chunk {chunk_count}. This may be caused by the model limit or the query not being suitable for the files."
+                self.printer.print_in_terminal(
+                    "index_related_files_fail",
+                    style="yellow",
+                    chunk_count=chunk_count
                 )
             time.sleep(self.args.anti_quota_limit)
@@ -451,8 +473,10 @@ class IndexManager:
                     all_results.extend(result.file_list)
                     completed_threads += 1
             else:
-                logger.warning(
-                    f"Fail to find target files for chunk. This is caused by the model response not being in JSON format or the JSON being empty."
+                self.printer.print_in_terminal(
+                    "index_related_files_fail",
+                    style="yellow",
+                    chunk_count="unknown"
                 )
             time.sleep(self.args.anti_quota_limit)
@@ -466,7 +490,12 @@ class IndexManager:
             for future in as_completed(futures):
                 future.result()
-        logger.info(f"Completed {completed_threads}/{total_threads} threads")
+        self.printer.print_in_terminal(
+            "index_threads_completed",
+            style="green",
+            completed_threads=completed_threads,
+            total_threads=total_threads
+        )
         return all_results, total_threads, completed_threads
     def get_target_files_by_query(self, query: str) -> FileList:
@@ -565,381 +594,3 @@ class IndexManager:
         请确保结果的准确性和完整性，包括所有可能相关的文件。
         """
-def build_index_and_filter_files(
-    llm, args: AutoCoderArgs, sources: List[SourceCode]
-) -> str:
-    # Initialize timing and statistics
-    total_start_time = time.monotonic()
-    stats = {
-        "total_files": len(sources),
-        "indexed_files": 0,
-        "level1_filtered": 0,
-        "level2_filtered": 0,
-        "verified_files": 0,
-        "final_files": 0,
-        "timings": {
-            "process_tagged_sources": 0.0,
-            "build_index": 0.0,
-            "level1_filter": 0.0,
-            "level2_filter": 0.0,
-            "relevance_verification": 0.0,
-            "file_selection": 0.0,
-            "prepare_output": 0.0,
-            "total": 0.0
-        }
-    }
-    def get_file_path(file_path):
-        if file_path.startswith("##"):
-            return file_path.strip()[2:]
-        return file_path
-    final_files: Dict[str, TargetFile] = {}
-    # Phase 1: Process REST/RAG/Search sources
-    logger.info("Phase 1: Processing REST/RAG/Search sources...")
-    phase_start = time.monotonic()
-    for source in sources:
-        if source.tag in ["REST", "RAG", "SEARCH"]:
-            final_files[get_file_path(source.module_name)] = TargetFile(
-                file_path=source.module_name, reason="Rest/Rag/Search"
-            )
-    phase_end = time.monotonic()
-    stats["timings"]["process_tagged_sources"] = phase_end - phase_start
-    if not args.skip_build_index and llm:
-        # Phase 2: Build index
-        if args.request_id and not args.skip_events:
-            queue_communicate.send_event(
-                request_id=args.request_id,
-                event=CommunicateEvent(
-                    event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
-                    data=json.dumps({"total_files": len(sources)})
-                )
-            )
-        logger.info("Phase 2: Building index for all files...")
-        phase_start = time.monotonic()
-        index_manager = IndexManager(llm=llm, sources=sources, args=args)
-        index_data = index_manager.build_index()
-        stats["indexed_files"] = len(index_data) if index_data else 0
-        phase_end = time.monotonic()
-        stats["timings"]["build_index"] = phase_end - phase_start
-        if args.request_id and not args.skip_events:
-            queue_communicate.send_event(
-                request_id=args.request_id,
-                event=CommunicateEvent(
-                    event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
-                    data=json.dumps({
-                        "indexed_files": stats["indexed_files"],
-                        "build_index_time": stats["timings"]["build_index"],
-                    })
-                )
-            )
-        if not args.skip_filter_index:
-            if args.request_id and not args.skip_events:
-                queue_communicate.send_event(
-                    request_id=args.request_id,
-                    event=CommunicateEvent(
-                        event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
-                        data=json.dumps({})
-                    )
-                )
-            # Phase 3: Level 1 filtering - Query-based
-            logger.info(
-                "Phase 3: Performing Level 1 filtering (query-based)...")
-            phase_start = time.monotonic()
-            target_files = index_manager.get_target_files_by_query(args.query)
-            if target_files:
-                for file in target_files.file_list:
-                    file_path = file.file_path.strip()
-                    final_files[get_file_path(file_path)] = file
-                stats["level1_filtered"] = len(target_files.file_list)
-            phase_end = time.monotonic()
-            stats["timings"]["level1_filter"] = phase_end - phase_start
-            # Phase 4: Level 2 filtering - Related files
-            if target_files is not None and args.index_filter_level >= 2:
-                logger.info(
-                    "Phase 4: Performing Level 2 filtering (related files)...")
-                if args.request_id and not args.skip_events:
-                    queue_communicate.send_event(
-                        request_id=args.request_id,
-                        event=CommunicateEvent(
-                            event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
-                            data=json.dumps({})
-                        )
-                    )
-                phase_start = time.monotonic()
-                related_files = index_manager.get_related_files(
-                    [file.file_path for file in target_files.file_list]
-                )
-                if related_files is not None:
-                    for file in related_files.file_list:
-                        file_path = file.file_path.strip()
-                        final_files[get_file_path(file_path)] = file
-                    stats["level2_filtered"] = len(related_files.file_list)
-                phase_end = time.monotonic()
-                stats["timings"]["level2_filter"] = phase_end - phase_start
-            if not final_files:
-                logger.warning("No related files found, using all files")
-                for source in sources:
-                    final_files[get_file_path(source.module_name)] = TargetFile(
-                        file_path=source.module_name,
-                        reason="No related files found, use all files",
-                    )
-            # Phase 5: Relevance verification
-            logger.info("Phase 5: Performing relevance verification...")
-            if args.index_filter_enable_relevance_verification:
-                phase_start = time.monotonic()
-                verified_files = {}
-                temp_files = list(final_files.values())
-                verification_results = []
-                def print_verification_results(results):
-                    from rich.table import Table
-                    from rich.console import Console
-                    console = Console()
-                    table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
-                    table.add_column("File Path", style="cyan", no_wrap=True)
-                    table.add_column("Score", justify="right", style="green")
-                    table.add_column("Status", style="yellow")
-                    table.add_column("Reason/Error")
-                    for file_path, score, status, reason in results:
-                        table.add_row(
-                            file_path,
-                            str(score) if score is not None else "N/A",
-                            status,
-                            reason
-                        )
-                    console.print(table)
-                def verify_single_file(file: TargetFile):
-                    for source in sources:
-                        if source.module_name == file.file_path:
-                            file_content = source.source_code
-                            try:
-                                result = index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
-                                    file_content=file_content,
-                                    query=args.query
-                                )
-                                if result.relevant_score >= args.verify_file_relevance_score:
-                                    verified_files[file.file_path] = TargetFile(
-                                        file_path=file.file_path,
-                                        reason=f"Score:{result.relevant_score}, {result.reason}"
-                                    )
-                                    return file.file_path, result.relevant_score, "PASS", result.reason
-                                else:
-                                    return file.file_path, result.relevant_score, "FAIL", result.reason
-                            except Exception as e:
-                                error_msg = str(e)
-                                verified_files[file.file_path] = TargetFile(
-                                    file_path=file.file_path,
-                                    reason=f"Verification failed: {error_msg}"
-                                )
-                                return file.file_path, None, "ERROR", error_msg
-                    return None
-                with ThreadPoolExecutor(max_workers=args.index_filter_workers) as executor:
-                    futures = [executor.submit(verify_single_file, file)
-                            for file in temp_files]
-                    for future in as_completed(futures):
-                        result = future.result()
-                        if result:
-                            verification_results.append(result)
-                            time.sleep(args.anti_quota_limit)
-                # Print verification results in a table
-                print_verification_results(verification_results)
-                stats["verified_files"] = len(verified_files)
-                phase_end = time.monotonic()
-                stats["timings"]["relevance_verification"] = phase_end - phase_start
-                # Keep all files, not just verified ones
-                final_files = verified_files
-    def display_table_and_get_selections(data):
-        from prompt_toolkit.shortcuts import checkboxlist_dialog
-        from prompt_toolkit.styles import Style
-        choices = [(file, f"{file} - {reason}") for file, reason in data]
-        selected_files = [file for file, _ in choices]
-        style = Style.from_dict(
-            {
-                "dialog": "bg:#88ff88",
-                "dialog frame.label": "bg:#ffffff #000000",
-                "dialog.body": "bg:#88ff88 #000000",
-                "dialog shadow": "bg:#00aa00",
-            }
-        )
-        result = checkboxlist_dialog(
-            title="Target Files",
-            text="Tab to switch between buttons, and Space/Enter to select/deselect.",
-            values=choices,
-            style=style,
-            default_values=selected_files,
-        ).run()
-        return [file for file in result] if result else []
-    def print_selected(data):
-        console = Console()
-        table = Table(
-            title="Files Used as Context",
-            show_header=True,
-            header_style="bold magenta",
-        )
-        table.add_column("File Path", style="cyan", no_wrap=True)
-        table.add_column("Reason", style="green")
-        for file, reason in data:
-            table.add_row(file, reason)
-        panel = Panel(
-            table,
-            expand=False,
-            border_style="bold blue",
-            padding=(1, 1),
-        )
-        console.print(panel)
-    # Phase 6: File selection and limitation
-    logger.info("Phase 6: Processing file selection and limits...")
-    phase_start = time.monotonic()
-    if args.index_filter_file_num > 0:
-        logger.info(
-            f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
-    if args.skip_confirm:
-        final_filenames = [file.file_path for file in final_files.values()]
-        if args.index_filter_file_num > 0:
-            final_filenames = final_filenames[: args.index_filter_file_num]
-    else:
-        target_files_data = [
-            (file.file_path, file.reason) for file in final_files.values()
-        ]
-        if not target_files_data:
-            logger.warning(
-                "No target files found, you may need to rewrite the query and try again."
-            )
-            final_filenames = []
-        else:
-            final_filenames = display_table_and_get_selections(
-                target_files_data)
-        if args.index_filter_file_num > 0:
-            final_filenames = final_filenames[: args.index_filter_file_num]
-    phase_end = time.monotonic()
-    stats["timings"]["file_selection"] = phase_end - phase_start
-    # Phase 7: Display results and prepare output
-    logger.info("Phase 7: Preparing final output...")
-    phase_start = time.monotonic()
-    try:
-        print_selected(
-            [
-                (file.file_path, file.reason)
-                for file in final_files.values()
-                if file.file_path in final_filenames
-            ]
-        )
-    except Exception as e:
-        logger.warning(
-            "Failed to display selected files in terminal mode. Falling back to simple print."
-        )
-        print("Target Files Selected:")
-        for file in final_filenames:
-            print(f"{file} - {final_files[file].reason}")
-    source_code = ""
-    depulicated_sources = set()
-    for file in sources:
-        if file.module_name in final_filenames:
-            if file.module_name in depulicated_sources:
-                continue
-            depulicated_sources.add(file.module_name)
-            source_code += f"##File: {file.module_name}\n"
-            source_code += f"{file.source_code}\n\n"
-    if args.request_id and not args.skip_events:
-        queue_communicate.send_event(
-            request_id=args.request_id,
-            event=CommunicateEvent(
-                event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
-                data=json.dumps([
-                    (file.file_path, file.reason)
-                    for file in final_files.values()
-                    if file.file_path in depulicated_sources
-                ])
-            )
-        )
-    stats["final_files"] = len(depulicated_sources)
-    phase_end = time.monotonic()
-    stats["timings"]["prepare_output"] = phase_end - phase_start
-    # Calculate total time and print summary
-    total_end_time = time.monotonic()
-    total_time = total_end_time - total_start_time
-    stats["timings"]["total"] = total_time
-    # Calculate total filter time
-    total_filter_time = (
-        stats["timings"]["level1_filter"] +
-        stats["timings"]["level2_filter"] +
-        stats["timings"]["relevance_verification"]
-    )
-    # Print final statistics in a more structured way
-    summary = f"""
-=== Indexing and Filtering Summary ===
-• Total files scanned: {stats['total_files']}
-• Files indexed: {stats['indexed_files']}
-• Files filtered:
-  - Level 1 (query-based): {stats['level1_filtered']}
-  - Level 2 (related files): {stats['level2_filtered']}
-  - Relevance verified: {stats.get('verified_files', 0)}
-• Final files selected: {stats['final_files']}
-=== Time Breakdown ===
-• Index build: {stats['timings'].get('build_index', 0):.2f}s
-• Level 1 filter: {stats['timings'].get('level1_filter', 0):.2f}s
-• Level 2 filter: {stats['timings'].get('level2_filter', 0):.2f}s
-• Relevance check: {stats['timings'].get('relevance_verification', 0):.2f}s
-• File selection: {stats['timings'].get('file_selection', 0):.2f}s
-• Total time: {total_time:.2f}s
-====================================
-"""
-    logger.info(summary)
-    if args.request_id and not args.skip_events:
-        queue_communicate.send_event(
-            request_id=args.request_id,
-            event=CommunicateEvent(
-                event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
-                data=json.dumps({
-                    "filtered_files": stats["final_files"],
-                    "filter_time": total_filter_time
-                })
-            )
-        )
-    return source_code

autocoder/index/types.py CHANGED Viewed

@@ -21,4 +21,7 @@ class VerifyFileRelevance(pydantic.BaseModel):
 class FileList(pydantic.BaseModel):
-    file_list: List[TargetFile]
+    file_list: List[TargetFile]
+class FileNumberList(pydantic.BaseModel):
+    file_list: List[int]

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~232~~"
1	+ __version__ = "0.1.235"

{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.232.dist-info → auto_coder-0.1.235.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.232__py3-none-any.whl → 0.1.235__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.232py3-none-any.whl → 0.1.235py3-none-any.whl