PyPI - auto-coder - Versions diffs - 0.1.183__tar.gz → 0.1.185__tar.gz - Mend

auto-coder 0.1.183tar.gz → 0.1.185tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (121) hide show

{auto_coder-0.1.183 → auto_coder-0.1.185}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.183
+Version: 0.1.185
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
 Requires-Dist: jupyter_client
 Requires-Dist: prompt-toolkit
 Requires-Dist: tokenizers
-Requires-Dist: byzerllm[saas]>=0.1.136
+Requires-Dist: byzerllm[saas]>=0.1.137
 Requires-Dist: patch
 Requires-Dist: diff_match_patch
 Requires-Dist: GitPython

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/auto_coder.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.183
+Version: 0.1.185
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
 Requires-Dist: jupyter_client
 Requires-Dist: prompt-toolkit
 Requires-Dist: tokenizers
-Requires-Dist: byzerllm[saas]>=0.1.136
+Requires-Dist: byzerllm[saas]>=0.1.137
 Requires-Dist: patch
 Requires-Dist: diff_match_patch
 Requires-Dist: GitPython

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/auto_coder.egg-info/SOURCES.txt RENAMED Viewed

@@ -82,7 +82,13 @@ src/autocoder/rag/token_checker.py
 src/autocoder/rag/token_counter.py
 src/autocoder/rag/token_limiter.py
 src/autocoder/rag/types.py
+src/autocoder/rag/utils.py
 src/autocoder/rag/variable_holder.py
+src/autocoder/rag/cache/__init__.py
+src/autocoder/rag/cache/base_cache.py
+src/autocoder/rag/cache/byzer_storage_cache.py
+src/autocoder/rag/cache/file_monitor_cache.py
+src/autocoder/rag/cache/simple_cache.py
 src/autocoder/rag/loaders/__init__.py
 src/autocoder/rag/loaders/docx_loader.py
 src/autocoder/rag/loaders/excel_loader.py

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/auto_coder.egg-info/requires.txt RENAMED Viewed

@@ -16,7 +16,7 @@ tabulate
 jupyter_client
 prompt-toolkit
 tokenizers
-byzerllm[saas]>=0.1.136
+byzerllm[saas]>=0.1.137
 patch
 diff_match_patch
 GitPython

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/autocoder/auto_coder.py RENAMED Viewed

@@ -385,7 +385,7 @@ def main(input_args: Optional[List[str]] = None):
         llm.setup_template(model=args.model, template="auto")
         llm.setup_default_model_name(args.model)
         llm.setup_max_output_length(args.model, args.model_max_length)
         llm.setup_max_input_length(args.model, args.model_max_input_length)
         llm.setup_extra_generation_params(
@@ -395,7 +395,7 @@ def main(input_args: Optional[List[str]] = None):
         if args.chat_model:
             chat_model = byzerllm.ByzerLLM()
             chat_model.setup_default_model_name(args.chat_model)
-            llm.setup_sub_client("chat_model", chat_model)
+            llm.setup_sub_client("chat_model", chat_model)
         if args.vl_model:
             vl_model = byzerllm.ByzerLLM()
@@ -642,22 +642,24 @@ def main(input_args: Optional[List[str]] = None):
                     )
                 )
             return
         elif raw_args.agent_command == "designer":
-            from autocoder.agent.designer import SVGDesigner, SDDesigner
+            from autocoder.agent.designer import SVGDesigner, SDDesigner
             if args.agent_designer_mode == "svg":
                 designer = SVGDesigner(args, llm)
-                designer.run(args.query)
+                designer.run(args.query)
                 print("Successfully generated image in output.png")
             elif args.agent_designer_mode == "sd":
                 designer = SDDesigner(args, llm)
-                designer.run(args.query)
+                designer.run(args.query)
                 print("Successfully generated image in output.jpg")
             if args.request_id:
                 request_queue.add_request(
                     args.request_id,
                     RequestValue(
-                        value=DefaultValue(value=response), status=RequestOption.COMPLETED
+                        value=DefaultValue(value="Successfully generated image"),
+                        status=RequestOption.COMPLETED,
                     ),
                 )
             return
@@ -693,6 +695,12 @@ def main(input_args: Optional[List[str]] = None):
                 {"role": "user", "content": args.query}
             )
+            if llm.get_sub_client("chat_model"):
+                chat_llm = llm.get_sub_client("chat_model")
+            else:
+                chat_llm = llm
+            source_count = 0
             pre_conversations = []
             if args.context:
                 context = json.loads(args.context)
@@ -701,25 +709,138 @@ def main(input_args: Optional[List[str]] = None):
                     pre_conversations.append(
                         {
                             "role": "user",
-                            "content": f"下面是一些文档和源码，如果用户的问题和他们相关，请参考他们：{file_content}",
+                            "content": f"下面是一些文档和源码，如果用户的问题和他们相关，请参考他们：\n{file_content}",
                         },
                     )
                     pre_conversations.append({"role": "assistant", "content": "read"})
+                    source_count += 1
-            loaded_conversations = (
-                pre_conversations + chat_history["ask_conversation"][-31:]
-            )
+            from autocoder.index.index import IndexManager, build_index_and_filter_files
+            from autocoder.pyproject import PyProject
+            from autocoder.tsproject import TSProject
+            from autocoder.suffixproject import SuffixProject
-            if llm.get_sub_client("chat_model"):
-                chat_llm = llm.get_sub_client("chat_model")
+            if args.project_type == "ts":
+                pp = TSProject(args=args, llm=llm)
+            elif args.project_type == "py":
+                pp = PyProject(args=args, llm=llm)
             else:
-                chat_llm = llm
+                pp = SuffixProject(args=args, llm=llm, file_filter=None)
+            pp.run()
+            sources = pp.sources
+            s = build_index_and_filter_files(llm=llm, args=args, sources=sources)
+            if s:
+                pre_conversations.append(
+                    {
+                        "role": "user",
+                        "content": f"下面是一些文档和源码，如果用户的问题和他们相关，请参考他们：\n{s}",
+                    }
+                )
+                pre_conversations.append({"role": "assistant", "content": "read"})
+                source_count += 1
+            loaded_conversations = pre_conversations + chat_history["ask_conversation"]
+            if args.human_as_model:
+                console = Console()
+                @byzerllm.prompt()
+                def chat_with_human_as_model(
+                    source_codes, pre_conversations, last_conversation
+                ):
+                    """
+                    {% if source_codes %}
+                    {{ source_codes }}
+                    {% endif %}
+                    {% if pre_conversations %}
+                    下面是我们之间的历史对话，假设我是A，你是B。
+                    {% for conv in pre_conversations %}
+                    {{ "A" if conv.role == "user" else "B" }}: {{ conv.content }}
+                    {% endfor %}
+                    {% endif %}
+                    参考上面的文件以及对话，回答用户的问题。
+                    用户的问题: {{ last_conversation.content }}
+                    """
+                source_codes_conversations = loaded_conversations[0 : source_count * 2]
+                source_codes = ""
+                for conv in source_codes_conversations:
+                    if conv["role"] == "user":
+                        source_codes += conv["content"]
+                chat_content = chat_with_human_as_model.prompt(
+                    source_codes=source_codes,
+                    pre_conversations=loaded_conversations[source_count * 2 : -1],
+                    last_conversation=loaded_conversations[-1],
+                )
+                try:
+                    import pyperclip
+                    pyperclip.copy(chat_content)
+                    console.print(
+                        Panel(
+                            get_message("chat_human_as_model_instructions"),
+                            title="Instructions",
+                            border_style="blue",
+                            expand=False,
+                        )
+                    )
+                except Exception:
+                    logger.warning(get_message("clipboard_not_supported"))
+                    console.print(
+                        Panel(
+                            get_message("human_as_model_instructions_no_clipboard"),
+                            title="Instructions",
+                            border_style="blue",
+                            expand=False,
+                        )
+                    )
+                    return
+                # Save chat content to file
+                with open(args.target_file, "w") as f:
+                    f.write(chat_content)
+                lines = []
+                while True:
+                    line = prompt(FormattedText([("#00FF00", "> ")]), multiline=False)
+                    line_lower = line.strip().lower()
+                    if line_lower in ["eof", "/eof"]:
+                        break
+                    elif line_lower in ["/clear"]:
+                        lines = []
+                        print("\033[2J\033[H")  # Clear terminal screen
+                        continue
+                    elif line_lower in ["/break"]:
+                        raise Exception("User requested to break the operation.")
+                    lines.append(line)
+                result = "\n".join(lines)
+                # Update chat history with user's response
+                chat_history["ask_conversation"].append(
+                    {"role": "assistant", "content": result}
+                )
+                with open(memory_file, "w") as f:
+                    json.dump(chat_history, f, ensure_ascii=False)
+                request_queue.add_request(
+                    args.request_id,
+                    RequestValue(
+                        value=DefaultValue(value=result), status=RequestOption.COMPLETED
+                    ),
+                )
+                return {}
             if args.enable_rag_search or args.enable_rag_context:
                 rag = RAGFactory.get_rag(llm=chat_llm, args=args, path="")
                 response = rag.stream_chat_oai(conversations=loaded_conversations)[0]
                 v = ([item, None] for item in response)
-            else:
+            else:
                 v = chat_llm.stream_chat_oai(
                     conversations=loaded_conversations, delta_mode=True
                 )
@@ -813,7 +934,7 @@ def main(input_args: Optional[List[str]] = None):
                     llm, args, code_auto_execute.Mode.SINGLE_ROUND
                 )
                 executor.run(query=args.query, context=s, source_code="")
-            return
+            return
         elif raw_args.doc_command == "serve":
             from autocoder.rag.llm_wrapper import LLWrapper
@@ -838,7 +959,7 @@ def main(input_args: Optional[List[str]] = None):
             llm_wrapper = LLWrapper(llm=llm, rag=rag)
             serve(llm=llm_wrapper, args=server_args)
             return
         elif raw_args.doc_command == "chat":
             from autocoder.rag.rag_entry import RAGFactory

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/autocoder/auto_coder_lang.py RENAMED Viewed

@@ -16,7 +16,13 @@ MESSAGES = {
             "The system is waiting for your input. When finished, enter 'EOF' on a new line to submit.\n"
             "Use '/break' to exit this mode. If you have issues with copy-paste, use '/clear' to clean and paste again."
         ),
+        "chat_human_as_model_instructions": (
+            "Chat is now in Human as Model mode.\n"
+            "The question has been copied to your clipboard.\n"
+            "Please use Web version model to get the answer.\n"
+            "Or use /conf human_as_model:false to close this mode and get the answer in terminal directly."
+            "Paste the answer to the input box below, use '/break' to exit, '/clear' to clear the screen, '/eof' to submit."
+        )
     },
     "zh": {
         "human_as_model_instructions": (
@@ -33,7 +39,13 @@ MESSAGES = {
             "系统正在等待您的输入。完成后，在新行输入'EOF'提交。\n"
             "使用'/break'退出此模式。如果复制粘贴有问题，使用'/clear'清理并重新粘贴。"
         ),
+        "chat_human_as_model_instructions": (
+            "\n============= Chat 处于 Human as Model 模式 =============\n"
+            "问题已复制到剪贴板\n"
+            "请使用Web版本模型获取答案\n"
+            "或者使用 /conf human_as_model:false 关闭该模式直接在终端获得答案。"
+            "将获得答案黏贴到下面的输入框，换行后，使用 '/break' 退出，'/clear' 清屏，'/eof' 提交。"
+        ),
     }
 }

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/autocoder/auto_coder_rag.py RENAMED Viewed

@@ -17,6 +17,7 @@ import shlex
 from rich.console import Console
 from rich.table import Table
 import os
+from loguru import logger
 from autocoder.rag.document_retriever import process_file_local
 from autocoder.rag.token_counter import TokenCounter
@@ -144,6 +145,20 @@ def main(input_args: Optional[List[str]] = None):
     parser = argparse.ArgumentParser(description="Auto Coder RAG Server")
     subparsers = parser.add_subparsers(dest="command", help="Available commands")
+    # Build hybrid index command
+    build_index_parser = subparsers.add_parser("build_hybrid_index", help="Build hybrid index for RAG")
+    build_index_parser.add_argument("--quick", action="store_true", help="Skip system initialization")
+    build_index_parser.add_argument("--file", default="", help=desc["file"])
+    build_index_parser.add_argument("--model", default="deepseek_chat", help=desc["model"])
+    build_index_parser.add_argument("--index_model", default="", help=desc["index_model"])
+    build_index_parser.add_argument("--emb_model", default="", help=desc["emb_model"])
+    build_index_parser.add_argument("--ray_address", default="auto", help=desc["ray_address"])
+    build_index_parser.add_argument("--required_exts", default="", help=desc["doc_build_parse_required_exts"])
+    build_index_parser.add_argument("--source_dir", default=".", help="Source directory path")
+    build_index_parser.add_argument("--tokenizer_path", default="", help="Path to tokenizer file")
+    build_index_parser.add_argument("--doc_dir", default="", help="Document directory path")
+    build_index_parser.add_argument("--enable_hybrid_index", action="store_true", help="Enable hybrid index")
     # Serve command
     serve_parser = subparsers.add_parser("serve", help="Start the RAG server")
     serve_parser.add_argument(
@@ -242,6 +257,18 @@ def main(input_args: Optional[List[str]] = None):
         help="Enable deep thought in inference mode",
     )
+    serve_parser.add_argument(
+        "--enable_hybrid_index",
+        action="store_true",
+        help="Enable hybrid index",
+    )
+    serve_parser.add_argument(
+        "--hybrid_index_max_output_tokens",
+        type=int,
+        default=1000000,
+        help="The maximum number of tokens in the output. This is only used when enable_hybrid_index is true.",
+    )
     # Tools command
     tools_parser = subparsers.add_parser("tools", help="Various tools")
     tools_subparsers = tools_parser.add_subparsers(dest="tool", help="Available tools")
@@ -275,10 +302,28 @@ def main(input_args: Optional[List[str]] = None):
             }
         )
-        byzerllm.connect_cluster(address=args.ray_address)
+        if auto_coder_args.enable_hybrid_index:
+            # 尝试连接storage
+            try:
+                from byzerllm.apps.byzer_storage.simple_api import ByzerStorage
+                storage = ByzerStorage("byzerai_store", "rag", "files")
+                storage.retrieval.cluster_info("byzerai_store")
+            except Exception as e:
+                logger.error("When enable_hybrid_index is true, ByzerStorage must be started")
+                logger.error("Please run 'byzerllm storage start' first")
+                return
+        else:
+            byzerllm.connect_cluster(address=args.ray_address)
         llm = byzerllm.ByzerLLM()
         llm.setup_default_model_name(args.model)
+        # 当启用hybrid_index时,检查必要的组件
+        if auto_coder_args.enable_hybrid_index:
+            if not llm.is_model_exist("emb"):
+                logger.error("When enable_hybrid_index is true, an 'emb' model must be deployed")
+                return
+            llm.setup_default_emb_model_name("emb")
         if server_args.doc_dir:
             auto_coder_args.rag_type = "simple"
             rag = RAGFactory.get_rag(
@@ -292,6 +337,52 @@ def main(input_args: Optional[List[str]] = None):
         llm_wrapper = LLWrapper(llm=llm, rag=rag)
         serve(llm=llm_wrapper, args=server_args)
+    elif args.command == "build_hybrid_index":
+        if not args.quick:
+            initialize_system()
+        auto_coder_args = AutoCoderArgs(
+            **{
+                arg: getattr(args, arg)
+                for arg in vars(AutoCoderArgs())
+                if hasattr(args, arg)
+            }
+        )
+        auto_coder_args.enable_hybrid_index = True
+        auto_coder_args.rag_type = "simple"
+        try:
+            from byzerllm.apps.byzer_storage.simple_api import ByzerStorage
+            storage = ByzerStorage("byzerai_store", "rag", "files")
+            storage.retrieval.cluster_info("byzerai_store")
+        except Exception as e:
+            logger.error("When enable_hybrid_index is true, ByzerStorage must be started")
+            logger.error("Please run 'byzerllm storage start' first")
+            return
+        llm = byzerllm.ByzerLLM()
+        llm.setup_default_model_name(args.model)
+        # 当启用hybrid_index时,检查必要的组件
+        if auto_coder_args.enable_hybrid_index:
+            if not llm.is_model_exist("emb"):
+                logger.error("When enable_hybrid_index is true, an 'emb' model must be deployed")
+                return
+            llm.setup_default_emb_model_name("emb")
+        rag = RAGFactory.get_rag(
+            llm=llm,
+            args=auto_coder_args,
+            path=args.doc_dir,
+            tokenizer_path=args.tokenizer_path,
+        )
+        if hasattr(rag.document_retriever, "cacher"):
+            rag.document_retriever.cacher.build_cache()
+        else:
+            logger.error("The document retriever does not support hybrid index building")
     elif args.command == "tools" and args.tool == "count":
         # auto-coder.rag tools count --tokenizer_path /Users/allwefantasy/Downloads/tokenizer.json --file /Users/allwefantasy/data/yum/schema/schema.xlsx
         count_tokens(args.tokenizer_path, args.file)

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/autocoder/chat_auto_coder.py RENAMED Viewed

@@ -718,8 +718,8 @@ class CommandCompleter(Completer):
                 parser.coding()
                 current_word = parser.current_word()
-                if len(new_text.strip()) == 0 or new_text.strip()=="/":
+                if len(new_text.strip()) == 0 or new_text.strip() == "/":
                     for command in parser.get_sub_commands():
                         if command.startswith(current_word):
                             yield Completion(command, start_position=-len(current_word))
@@ -797,7 +797,7 @@ class CommandCompleter(Completer):
                                 start_position=-len(name),
                                 display=f"{symbol.symbol_name} ({display_name}/{symbol.symbol_type})",
                             )
                 tags = [tag for tag in parser.tags]
                 if current_word.startswith("<"):
@@ -810,7 +810,7 @@ class CommandCompleter(Completer):
                                 )
                         elif tag.startswith(name):
                             yield Completion(tag, start_position=-len(current_word))
                 if tags and tags[-1].start_tag == "<img>" and tags[-1].end_tag == "":
                     raw_file_name = tags[0].content
                     file_name = raw_file_name.strip()
@@ -932,7 +932,7 @@ class CommandCompleter(Completer):
                         field_name + ":"
                         for field_name in AutoCoderArgs.model_fields.keys()
                         if field_name.startswith(current_word)
-                    ]
+                    ]
                 for completion in completions:
                     yield Completion(completion, start_position=-len(current_word))
@@ -1489,46 +1489,39 @@ def coding(query: str):
 @byzerllm.prompt()
 def code_review(query: str) -> str:
     """
-    对前面的代码进行review，参考如下检查点：
+    对代码进行review，参考如下检查点。
     1. 有没有调用不符合方法，类的签名的调用
-    2. 有没有没有未声明直接使用的变量，方法，类
+    2. 有没有未声明直接使用的变量，方法，类
     3. 有没有明显的语法错误
-    4. 用户的额外的检查需求：{{ query }}
-    如果用户的需求包含了@一个文件名 或者 @@符号， 那么重点关注这些文件或者符号（函数，类）进行上述的review
+    4. 如果是python代码，检查有没有缩进方面的错误
+    5. 如果是python代码，检查是否 try 后面缺少 except 或者 finally
+    {% if query %}
+    6. 用户的额外的检查需求：{{ query }}
+    {% endif %}
+    如果用户的需求包含了@一个文件名 或者 @@符号， 那么重点关注这些文件或者符号（函数，类）进行上述的review。
+    review 过程中严格遵循上述的检查点，不要遗漏，没有发现异常的点直接跳过，只对发现的异常点，给出具体的修改后的代码。
     """
 def chat(query: str):
     conf = memory.get("conf", {})
-    current_files = memory["current_files"]["files"] + get_llm_friendly_package_docs(
-        return_paths=True
-    )
-    file_contents = []
-    for file in current_files:
-        if os.path.exists(file):
-            try:
-                with open(file, "r") as f:
-                    content = f.read()
-                    s = f"##File: {file}\n{content}\n\n"
-                    file_contents.append(s)
-            except Exception as e:
-                print(f"Failed to read file: {file}. Error: {str(e)}")
-    all_file_content = "".join(file_contents)
     yaml_config = {
         "include_file": ["./base/base.yml"],
-        "include_project_structure": conf.get("include_project_structure", "true")
-        == "true",
+        "include_project_structure": conf.get("include_project_structure", "true") in ["true","True"],
+        "human_as_model": conf.get("human_as_model", "false") == "true",
+        "skip_build_index": conf.get("skip_build_index", "true") == "true",
+        "skip_confirm": conf.get("skip_confirm", "true") == "true",
+        "silence": conf.get("silence", "true") == "true",
     }
-    yaml_config["context"] = json.dumps(
-        {"file_content": all_file_content}, ensure_ascii=False
+    current_files = memory["current_files"]["files"] + get_llm_friendly_package_docs(
+        return_paths=True
     )
+    yaml_config["urls"] = current_files
     if "emb_model" in conf:
         yaml_config["emb_model"] = conf["emb_model"]

{auto_coder-0.1.183 → auto_coder-0.1.185}/src/autocoder/common/__init__.py RENAMED Viewed

@@ -282,8 +282,10 @@ class AutoCoderArgs(pydantic.BaseModel):
     command: Optional[str] = None
     doc_command: Optional[str] = None
     required_exts: Optional[str] = None
+    hybrid_index_max_output_tokens: Optional[int] = 1000000
     monitor_mode: bool = False
+    enable_hybrid_index: bool = False
     disable_auto_window: bool = False
     disable_segment_reorder: bool = False
     rag_doc_filter_relevance: int = 5

auto_coder-0.1.185/src/autocoder/rag/cache/base_cache.py ADDED Viewed

@@ -0,0 +1,14 @@
+from pydantic import BaseModel
+from typing import List, Tuple,Dict,Optional,Any
+from abc import ABC, abstractmethod
+class DeleteEvent(BaseModel):
+    file_paths: List[str]
+class AddOrUpdateEvent(BaseModel):
+    file_infos: List[Tuple[str, str, float]]
+class BaseCacheManager(ABC):
+    @abstractmethod
+    def get_cache(self,options:Optional[Dict[str,Any]]=None) -> Dict[str, Dict]:
+        pass

auto-coder 0.1.183__tar.gz → 0.1.185__tar.gz

Potentially problematic release.

auto-coder 0.1.183tar.gz → 0.1.185tar.gz