PyPI - auto-coder - Versions diffs - 0.1.298__py3-none-any.whl → 0.1.300__py3-none-any.whl - Mend

auto-coder 0.1.298py3-none-any.whl → 0.1.300py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (23) hide show

{auto_coder-0.1.298.dist-info → auto_coder-0.1.300.dist-info}/METADATA +2 -2
{auto_coder-0.1.298.dist-info → auto_coder-0.1.300.dist-info}/RECORD +23 -21
autocoder/agent/auto_learn_from_commit.py +125 -59
autocoder/agent/auto_review_commit.py +106 -16
autocoder/auto_coder.py +65 -66
autocoder/auto_coder_runner.py +23 -40
autocoder/command_parser.py +280 -0
autocoder/commands/auto_command.py +112 -33
autocoder/commands/tools.py +170 -10
autocoder/common/__init__.py +5 -1
autocoder/common/action_yml_file_manager.py +367 -0
autocoder/common/auto_coder_lang.py +8 -2
autocoder/common/auto_configure.py +6 -0
autocoder/common/command_completer.py +8 -1
autocoder/common/memory_manager.py +5 -1
autocoder/index/entry.py +17 -0
autocoder/rag/cache/local_duckdb_storage_cache.py +111 -17
autocoder/utils/__init__.py +13 -9
autocoder/version.py +1 -1
{auto_coder-0.1.298.dist-info → auto_coder-0.1.300.dist-info}/LICENSE +0 -0
{auto_coder-0.1.298.dist-info → auto_coder-0.1.300.dist-info}/WHEEL +0 -0
{auto_coder-0.1.298.dist-info → auto_coder-0.1.300.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.298.dist-info → auto_coder-0.1.300.dist-info}/top_level.txt +0 -0

autocoder/commands/auto_command.py CHANGED Viewed

@@ -180,7 +180,7 @@ class CommandAutoTuner:
     @byzerllm.prompt()
     def _analyze(self, request: AutoCommandRequest) -> str:
         """
-        当前用户环境信息如下:
+        ## 当前用户环境信息如下:
         <os_info>
         操作系统: {{ env_info.os_name }} {{ env_info.os_version }}
         操作系统发行版: {{ os_distribution }}
@@ -204,7 +204,7 @@ class CommandAutoTuner:
         我们的目标是根据用户输入和当前上下文，组合多个函数来完成用户的需求。
         {% if current_files %}
-        当前活跃区文件列表：
+        ## 当前活跃区文件列表：
         <current_files>
         {% for file in current_files %}
         - {{ file }}
@@ -213,41 +213,22 @@ class CommandAutoTuner:
         {% endif %}
-        当前用户的配置选项如下:
+        ## 当前用户的配置选项如下:
         <current_conf>
         {{ current_conf }}
         </current_conf>
-        可用函数列表:
+        ## 可用函数列表:
         {{ available_commands }}
-        函数组合说明：
-        <function_combination_readme>
-        如果用户是一个编码需求，你可以先简单观察当前活跃区文件列表：
-        0. 关注下当前软件的配置，诸如索引开启关闭。如果有觉得不合理的可以通过 help 函数来修改。
-        1. 如果你觉得这些文件不够满足用户的需求，而当前的索引配置关闭的，那么你可以通过help("将skip_filter_index 和 skip_build_index 设置为 false") 让
-        chat,coding 函数来获取更多文件，或者你也可以自己通过调用 get_project_structure 函数来获取项目结构，然后通过 get_project_map 函数来获取某个文件的用途，符号列表，以及
-        文件大小（tokens数）,最后再通过 read_files/read_file_with_keyword_ranges 函数来读取文件内容, 最后通过 add_files 函数来添加文件到活跃区。
-        确保 chat,coding 函数能够正常使用。
-        2. 对于一个比较复杂的代码需求，你可以先通过 chat 函数来获得一些设计，根据chat返回的结果，你可以选择多次调用chat调整最后的设计。最后，当你满意后，可以通过 coding("/apply 根据历史对话实现代码，请不要有遗漏") 来完成最后的编码。
-        3. 注意，为了防止对话过长，你可以使用 chat("/new") 来创新新的会话。然后接着正常再次调用 chat 函数。 即可
-        4. 当用户询问项目，比如询问什么什么功能在哪里的时候，或者哪个文件实现了什么功能，推荐的工具组合是 get_project_map 和 get_project_structure。可以直通过 get_project_map 查看整个项目文件的索引（该索引包含了文件列表，每个文件的用途和符号列表），也可以
-        通过 get_project_structure 来获取项目结构，然后通过 get_project_map 来获取你想看的某个文件的用途，符号列表，最后再通过 read_files/read_file_with_keyword_ranges 函数来读取文件内容,确认对应的功能是否在相关的文件里。
-        5. 调用 coding 函数的时候，尽可能多的 @文件和@@符号，让需求更加清晰明了，建议多描述具体怎么完成对应的需求。
-        6. 对于代码需求设计，尽可能使用 chat 函数。
-        7. 如果成功执行了 coding 函数，最好再调用一次 chat("/review /commit")
-        8. 我们所有的对话不能超过 {{ conversation_safe_zone_tokens }} 个tokens,当你读取索引文件 (get_project_map) 的时候，你可以看到
-        每个文件的tokens数，你可以根据这个信息来决定如何读取这个文件。比如对于很小的文件，那么可以直接全部读取，
-        而对于分析一个超大文件推荐组合 read_files 带上 line_ranges 参数来读取，或者组合 read_file_withread_file_with_keyword_ranges 等来读取，
-        每个函数你还可以使用多次来获取更多信息。
-        9. 根据操作系统，终端类型，脚本类型等各种信息，在涉及到路径或者脚本的时候，需要考虑平台差异性。
-        10. 使用 read_files 时，一次性读取文件数量不要超过1个,每次只读取200行。如果发现读取的内容不够，则继续读取下面200行。
-        </function_combination_readme>
+        ## 当前大模型窗口安全值
+        {{ conversation_safe_zone_tokens }}
+        ## 函数组合说明：
+        {{ command_combination_readme }}
         {% if conversation_history %}
-        历史对话:
+        ## 历史对话:
         <conversation_history>
         {% for conv in conversation_history %}
         ({{ conv.role }}): {{ conv.content }}
@@ -255,7 +236,7 @@ class CommandAutoTuner:
         </conversation_history>
         {% endif %}
-        用户需求:
+        ## 用户需求:
         <user_input>
         {{ user_input }}
         </user_input>
@@ -299,9 +280,50 @@ class CommandAutoTuner:
             "shell_encoding": shells.get_terminal_encoding(),
             "conversation_safe_zone_tokens": self.args.conversation_prune_safe_zone_tokens,
             "os_distribution": shells.get_os_distribution(),
-            "current_user": shells.get_current_username()
+            "current_user": shells.get_current_username(),
+            "command_combination_readme": self._command_combination_readme.prompt()
         }
+    def _command_combination_readme(self) -> str:
+        """
+        <function_combination_readme>
+        如果用户是一个编码需求，你可以先简单观察当前活跃区文件列表：
+        ### 是否根据需求动态修改auto-coder软件配置
+        关注下当前软件的配置，结合当前用户的需求，如果觉得不合理的地方，可以通过 ask_user 函数来询问用户，是否要通过 help 函数修改一些配置。
+        ### 如何了解当前项目
+        通常可以自己通过调用 get_project_structure 函数来获取项目结构(如果项目结构太大，该函数会拒绝返回，你可以选择 list_files 函数来查看目录)，然后通过 get_project_map 函数来获取某几个文件的用途，符号列表，以及
+        文件大小（tokens数）,最后再通过 read_files/read_file_with_keyword_ranges 函数来读取文件内容,从而更好的结合当前项目理解用户的需求。
+        ### 复杂需求，先做讨论设计
+        对于一个比较复杂的代码需求，你可以先通过 chat 函数来获得一些设计，根据chat返回的结果，你可以选择多次调用chat调整最后的设计。最后，当你满意后，可以通过 coding("/apply") 来完成最后的编码。
+        注意，为了防止对话过长，你可以使用 chat("/new") 来创新新的会话。然后接着正常再次调用 chat 函数。 即可。
+        尽可通过了解项目后，多用 @文件和@@符号，这样 chat 函数可以更清晰的理解你关注的代码，文档和意图。
+        ### 调用 coding 函数应该注意的事项
+        调用 coding 函数的时候，尽可能多的 @文件和@@符号，让需求更加清晰明了，建议多描述具体怎么完成对应的需求。
+        对于代码需求设计，尽可能使用 chat 函数。如果成功执行了 coding 函数， 最好再调用一次 chat("/review /commit")，方便总结这次代码变更。
+        注意，review 完后，需要询问用户是否要做啥调整不，如果用户说不用，那么就停止。否则根据意图进行后续操作。
+        ### 关于对话大小的问题
+        我们对话历史以及查看的内容累计不能超过 {{ conversation_safe_zone_tokens }} 个tokens,当你读取索引文件 (get_project_map) 的时候，你可以看到
+        每个文件的tokens数，你可以根据这个信息来决定如何读取这个文件。如果不确定，使用 count_file_tokens 函数来获取文件的tokens数,再决定如何读取。
+        而对于分析一个超大文件推荐组合 read_files 带上 line_ranges 参数来读取，或者组合 read_file_withread_file_with_keyword_ranges 等来读取，
+        每个函数你还可以使用多次来获取更多信息。
+        ### 善用脚本完成一些基本的操作
+        根据操作系统，终端类型，脚本类型等各种信息，在涉及到路径或者脚本的时候，需要考虑平台差异性。
+        ## 其他一些注意事项
+        1. 使用 read_files 时，一次性读取文件数量不要超过1个,每次只读取200行。如果发现读取的内容不够，则继续读取下面200行。
+        2. 确实有必要才使用 get_project_structure 函数，否则可以多使用 list_files 函数来查看目录。
+        3. 最后，不要局限在我们前面描述的使用说明中，根据各个函数的说明，灵活组合和使用各个函数，发挥自己的想象力，尽可能的完成用户的需求。
+        </function_combination_readme>
+        """
     @byzerllm.prompt()
     def _execute_command_result(self, result:str) -> str:
         '''
@@ -1004,11 +1026,16 @@ class CommandAutoTuner:
         <name>get_project_map</name>
         <description>返回项目中指定文件包括文件用途、导入的包、定义的类、函数、变量等。</description>
         <usage>
-         该命令接受一个参数 file_path，为文件路径（文件名或者文件路径的一部分）
+         该命令接受一个参数 file_paths，路径list,或者是以逗号分割的多个文件路径。
+         路径支持相对路径和绝对路径。
          使用例子：
-         get_project_map(file_path="main.py")
+         get_project_map(file_paths=["full/path/to/main.py","partial/path/to/utils.py"])，
+         或者：
+         get_project_map(file_paths="full/path/to/main.py,partial/path/to/utils.py")
          该函数特别适合你想要了解某个文件的用途，以及该文件的导入的包，定义的类，函数，变量等信息。
          同时，你还能看到文件的大小（tokens数），以及索引的大小（tokens数），以及构建索引花费费用等信息。
@@ -1223,9 +1250,57 @@ class CommandAutoTuner:
          response_user(response="你好，我是 auto-coder")
         </usage>
         </command>
+        <command>
+        <name>count_file_tokens</name>
+        <description>计算指定文件的token数量。</description>
+        <usage>
+         该函数接受一个参数 file_path, 为要计算的文件路径。
+         使用例子：
+         count_file_tokens(file_path="full")
+         注意：
+         - 返回值为int类型，表示文件的token数量。
+        </usage>
+        </command>
+        <command>
+        <name>count_string_tokens</name>
+        <description>计算指定字符串的token数量。</description>
+        <usage>
+         该函数接受一个参数 text, 为要计算的文本。
+         使用例子：
+         count_string_tokens(text="你好，世界")
+         注意：
+         - 返回值为int类型，表示文本的token数量。
+        </usage>
+        </command>
+        <command>
+        <n>find_symbol_definition</n>
+        <description>查找指定符号的定义所在的文件路径。</description>
+        <usage>
+         该函数接受一个参数 symbol, 为要查找的符号名称。
+         使用例子：
+         find_symbol_definition(symbol="MyClass")
+         find_symbol_definition(symbol="process_data")
+         注意：
+         - 返回值为字符串，包含符号定义所在的文件路径列表，以逗号分隔
+         - 支持精确匹配和模糊匹配（不区分大小写）
+         - 如果未找到匹配项，会返回提示信息
+        </usage>
+        </command>
         <command>
-        <name>execute_mcp_server</name>
+        <n>execute_mcp_server</n>
         <description>执行MCP服务器</description>
         <usage>
          该函数接受一个参数 query, 为要执行的MCP服务器查询字符串。
@@ -1276,6 +1351,7 @@ class CommandAutoTuner:
             "get_related_files_by_symbols": self.tools.get_related_files_by_symbols,
             "get_project_map": self.tools.get_project_map,
             "get_project_structure": self.tools.get_project_structure,
+            "list_files": self.tools.list_files,
             "read_files": self.tools.read_files,
             "find_files_by_name": self.tools.find_files_by_name,
             "find_files_by_content": self.tools.find_files_by_content,
@@ -1285,6 +1361,9 @@ class CommandAutoTuner:
             "get_project_type": self.project_type_analyzer.analyze,
             "response_user": self.tools.response_user,
             "execute_mcp_server": self.tools.execute_mcp_server,
+            "count_file_tokens": self.tools.count_file_tokens,
+            "count_string_tokens": self.tools.count_string_tokens,
+            "find_symbol_definition": self.tools.find_symbol_definition,
         }

autocoder/commands/tools.py CHANGED Viewed

@@ -31,6 +31,12 @@ from autocoder.common import files as files_utils
 from autocoder.common.printer import Printer
 from prompt_toolkit import PromptSession
 from prompt_toolkit.styles import Style
+from autocoder.rag.token_counter import count_tokens
+from autocoder.index.symbols_utils import (
+    extract_symbols,
+    SymbolType,
+    symbols_info_to_str,
+)
 @byzerllm.prompt()
@@ -251,14 +257,8 @@ class AutoCommandTools:
         })
         return v
-    def get_project_map(self, file_path: Optional[str] = None) -> str:
-        """
-        该工具会返回项目中所有已经被构建索引的文件以及该文件的信息，诸如该文件的用途，导入的包，定义的类，函数，变量等信息。
-        返回的是json格式文本。
-        注意，这个工具无法返回所有文件的信息，因为有些文件可能没有被索引。
-        尽量避免使用该工具。
-        """
+    def _get_sources(self):
         if self.args.project_type == "ts":
             pp = TSProject(args=self.args, llm=self.llm)
         elif self.args.project_type == "py":
@@ -266,13 +266,36 @@ class AutoCommandTools:
         else:
             pp = SuffixProject(args=self.args, llm=self.llm, file_filter=None)
         pp.run()
-        sources = pp.sources
+        return pp.sources
+    def _get_index(self):
+        sources = self._get_sources()
         index_manager = IndexManager(llm=self.llm, sources=sources, args=self.args)
+        return index_manager
+    def get_project_map(self, file_paths: Optional[str] = None) -> str:
+        """
+        该工具会返回项目中所有已经被构建索引的文件以及该文件的信息，诸如该文件的用途，导入的包，定义的类，函数，变量等信息。
+        返回的是json格式文本。
+        参数说明:
+        file_paths (Optional[str]): 可选参数，以逗号分隔的文件路径列表，用于筛选特定文件。
+                                  例如："main.py,utils.py"或"/path/to/main.py,/path/to/utils.py"
+        注意，这个工具无法返回所有文件的信息，因为有些文件可能没有被索引。
+        尽量避免使用该工具。
+        """
+        index_manager = self._get_index()
         s = index_manager.read_index_as_str()
         index_data = json.loads(s)
         final_result = []
+        # 解析文件路径列表（如果提供了）
+        file_path_list = []
+        if file_paths:
+            file_path_list = [path.strip() for path in file_paths.split(",")]
         for k in index_data.values():
             value = {}
             value["file_name"] = k["module_name"]
@@ -281,12 +304,30 @@ class AutoCommandTools:
             value["index_tokens"] = k.get("generated_tokens_count", -1)
             value["file_tokens_cost"] = k.get("input_tokens_cost", -1)
             value["index_tokens_cost"] = k.get("generated_tokens_cost", -1)
-            if file_path and file_path in k["module_name"]:
+            # 如果提供了文件路径列表，检查当前文件是否匹配任何一个路径
+            if file_path_list:
+                if any(path in k["module_name"] for path in file_path_list):
+                    final_result.append(value)
+            else:
                 final_result.append(value)
         v = json.dumps(final_result, ensure_ascii=False)
+        tokens = count_tokens(v)
+        if tokens > self.args.conversation_prune_safe_zone_tokens/2.0:
+            result = f"The project map is too large to return. (tokens: {tokens}). Try to use another function."
+            self.result_manager.add_result(content=result, meta = {
+                "action": "get_project_map",
+                "input": {
+                    "file_paths": file_paths
+                }
+            })
+            return result
         self.result_manager.add_result(content=v, meta = {
             "action": "get_project_map",
-            "input": {
+            "input": {
+                "file_paths": file_paths
             }
         })
         return v
@@ -469,6 +510,97 @@ class AutoCommandTools:
         })
         return source_code_str
+    def find_symbol_definition(self, symbol: str) -> str:
+        """
+        该工具用于查找指定符号的定义。
+        输入参数 symbol: 要查找的符号
+        返回值是符号的定义所在的文件路径列表，以逗号分隔。
+        """
+        index_manager = self._get_index()
+        result = []
+        index_items = index_manager.read_index()
+        for item in index_items:
+            symbols = extract_symbols(item.symbols)
+            for symbol_info in symbols:
+                # 进行精确匹配和模糊匹配
+                if (symbol_info.name == symbol or
+                    symbol.lower() in symbol_info.name.lower()):
+                    # 检查是否已经添加过该文件路径
+                    if symbol_info.module_name not in result:
+                        result.append(symbol_info.module_name)
+        # 生成以逗号分隔的文件路径列表
+        file_paths = ",".join(result)
+        # 如果没有找到任何匹配项，返回提示信息
+        if not file_paths:
+            file_paths = f"未找到符号 '{symbol}' 的定义"
+        # 记录操作结果
+        self.result_manager.add_result(content=file_paths, meta={
+            "action": "find_symbols_definition",
+            "input": {
+                "symbol": symbol
+            }
+        })
+        return file_paths
+    def list_files(self, path: str) -> str:
+        """
+        该工具用于列出指定目录下的所有文件（不包括子目录中的文件）。
+        输入参数 path: 要列出文件的目录路径
+        返回值是目录下所有文件的列表，以逗号分隔。
+        """
+        # 处理绝对路径和相对路径
+        target_path = path
+        if not os.path.isabs(path):
+            # 如果是相对路径，将其转换为绝对路径
+            target_path = os.path.join(self.args.source_dir, path)
+        # 确保路径存在且是目录
+        if not os.path.exists(target_path):
+            result = f"目录不存在: {target_path}"
+            self.result_manager.add_result(content=result, meta={
+                "action": "list_files",
+                "input": {
+                    "path": path
+                }
+            })
+            return result
+        if not os.path.isdir(target_path):
+            result = f"指定路径不是目录: {target_path}"
+            self.result_manager.add_result(content=result, meta={
+                "action": "list_files",
+                "input": {
+                    "path": path
+                }
+            })
+            return result
+        # 只收集当前目录下的文件，不递归子目录
+        file_list = []
+        for item in os.listdir(target_path):
+            item_path = os.path.join(target_path, item)
+            # 只添加文件，不添加目录
+            if os.path.isfile(item_path):
+                file_list.append(item_path)
+        # 生成以逗号分隔的文件列表
+        result = ",".join(file_list)
+        # 记录结果
+        self.result_manager.add_result(content=result, meta={
+            "action": "list_files",
+            "input": {
+                "path": path
+            }
+        })
+        return result
     def get_project_structure(self) -> str:
         if self.args.project_type == "ts":
             pp = TSProject(args=self.args, llm=self.llm)
@@ -478,6 +610,17 @@ class AutoCommandTools:
             pp = SuffixProject(args=self.args, llm=self.llm, file_filter=None)
         pp.run()
         s = pp.get_tree_like_directory_structure()
+        tokens = count_tokens(s)
+        if tokens > self.args.conversation_prune_safe_zone_tokens / 2.0:
+            result = f"The project structure is too large to return. (tokens: {tokens}). Try to use another function."
+            self.result_manager.add_result(content=result, meta = {
+                "action": "get_project_structure",
+                "input": {
+                }
+            })
+            return result
         self.result_manager.add_result(content=s, meta = {
             "action": "get_project_structure",
             "input": {
@@ -509,6 +652,23 @@ class AutoCommandTools:
             }
         })
         return v
+    def count_file_tokens(self, file_path: str) -> int:
+        """
+        该工具用于计算指定文件的token数量。
+        输入参数 file_path: 文件路径
+        返回值是文件的token数量。
+        """
+        content = files_utils.read_file(file_path)
+        return count_tokens(content)
+    def count_string_tokens(self, text: str) -> int:
+        """
+        该工具用于计算指定字符串的token数量。
+        输入参数 text: 要计算的文本
+        返回值是字符串的token数量。
+        """
+        return count_tokens(text)
     def find_files_by_content(self, keyword: str) -> str:
         """

autocoder/common/__init__.py CHANGED Viewed

@@ -377,7 +377,7 @@ class AutoCoderArgs(pydantic.BaseModel):
     # block:给定每个文件修改的代码块 file: 给定每个文件修改前后内容
     rank_strategy: Optional[str] = "file"
-    action: List[str] = []
+    action: Union[List[str], Dict[str, Any]] = []
     enable_global_memory: Optional[bool] = False
     product_mode: Optional[str] = "lite"
@@ -402,6 +402,10 @@ class AutoCoderArgs(pydantic.BaseModel):
     enable_beta: Optional[bool] = False
+    how_to_reproduce: Optional[str] = None
+    dynamic_urls: List[str] = []
     class Config:
         protected_namespaces = ()

auto-coder 0.1.298__py3-none-any.whl → 0.1.300__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.298py3-none-any.whl → 0.1.300py3-none-any.whl