PyPI - autocoder-nano - Versions diffs - 0.1.30__py3-none-any.whl → 0.1.34__py3-none-any.whl - Mend

autocoder-nano 0.1.30py3-none-any.whl → 0.1.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

autocoder_nano/git_utils.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+from typing import Tuple, List, Dict, Optional
 from autocoder_nano.llm_prompt import prompt
 from git import Repo, GitCommandError
@@ -540,4 +541,65 @@ def generate_commit_message(changes_report: str) -> str:
     {{ changes_report }}
     请输出commit message, 不要输出任何其他内容.
-    '''
+    '''
+def get_commit_changes(
+        repo_path: str, commit_id: str
+) -> Tuple[List[Tuple[str, List[str], Dict[str, Tuple[str, str]]]], Optional[str]]:
+    """ 直接从Git仓库获取指定commit的变更 """
+    querie_with_urls_and_changes = []
+    try:
+        repo = get_repo(repo_path)
+        commit = repo.commit(commit_id)
+        modified_files = []
+        changes = {}
+        # 检查是否是首次提交（没有父提交）
+        if not commit.parents:
+            # 首次提交，获取所有文件
+            for item in commit.tree.traverse():
+                if item.type == 'blob':  # 只处理文件，不处理目录
+                    file_path = item.path
+                    modified_files.append(file_path)
+                    # 首次提交前没有内容
+                    before_content = None
+                    # 获取提交后的内容
+                    after_content = repo.git.show(f"{commit.hexsha}:{file_path}")
+                    changes[file_path] = (before_content, after_content)
+        else:
+            # 获取parent commit
+            parent = commit.parents[0]
+            # 获取变更的文件列表
+            for diff_item in parent.diff(commit):
+                file_path = diff_item.a_path if diff_item.a_path else diff_item.b_path
+                modified_files.append(file_path)
+                # 获取变更前内容
+                before_content = None
+                try:
+                    if diff_item.a_blob:
+                        before_content = repo.git.show(f"{parent.hexsha}:{file_path}")
+                except GitCommandError:
+                    pass  # 文件可能是新增的
+                # 获取变更后内容
+                after_content = None
+                try:
+                    if diff_item.b_blob:
+                        after_content = repo.git.show(f"{commit.hexsha}:{file_path}")
+                except GitCommandError:
+                    pass  # 文件可能被删除
+                changes[file_path] = (before_content, after_content)
+        # 使用commit消息作为查询内容
+        query = commit.message
+        querie_with_urls_and_changes.append((query, modified_files, changes))
+    except GitCommandError as e:
+        printer.print_text(f"git_command_error: {e}.", style="red")
+    except Exception as e:
+        printer.print_text(f"get_commit_changes_error: {e}.", style="red")
+    return querie_with_urls_and_changes, None

autocoder_nano/llm_client.py CHANGED Viewed

@@ -1,10 +1,10 @@
-from typing import List
+from typing import List, Generator, Any, Optional, Dict, Union
 # from loguru import logger
 from openai import OpenAI, Stream
 from openai.types.chat import ChatCompletionChunk, ChatCompletion
-from autocoder_nano.llm_types import LLMRequest, LLMResponse
+from autocoder_nano.llm_types import LLMRequest, LLMResponse, AutoCoderArgs, SingleOutputMeta
 from autocoder_nano.utils.printer_utils import Printer
@@ -53,6 +53,126 @@ class AutoLLM:
         res = self._query(model, request, stream=True)
         return res
+    def stream_chat_ai_ex(
+            self, conversations, model: Optional[str] = None, role_mapping=None, delta_mode: bool = False,
+            is_reasoning: bool = False, llm_config: dict | None = None
+    ):
+        if llm_config is None:
+            llm_config = {}
+        if not model:
+            model = self.default_model_name
+        client: OpenAI = self.sub_clients[model]["client"]
+        model_name = self.sub_clients[model]["model_name"]
+        request = LLMRequest(
+            model=model_name,
+            messages=conversations,
+            stream=True
+        )
+        if is_reasoning:
+            response = client.chat.completions.create(
+                messages=request.messages,
+                model=request.model,
+                stream=request.stream,
+                stream_options={"include_usage": True},
+                extra_headers={
+                    "HTTP-Referer": "https://auto-coder.chat",
+                    "X-Title": "auto-coder-nano"
+                },
+                **llm_config
+            )
+        else:
+            response = client.chat.completions.create(
+                messages=conversations,
+                model=model_name,
+                temperature=llm_config.get("temperature", request.temperature),
+                max_tokens=llm_config.get("max_tokens", request.max_tokens),
+                top_p=llm_config.get("top_p", request.top_p),
+                stream=request.stream,
+                stream_options={"include_usage": True},
+                **llm_config
+            )
+        last_meta = None
+        if delta_mode:
+            for chunk in response:
+                if hasattr(chunk, "usage") and chunk.usage:
+                    input_tokens_count = chunk.usage.prompt_tokens
+                    generated_tokens_count = chunk.usage.completion_tokens
+                else:
+                    input_tokens_count = 0
+                    generated_tokens_count = 0
+                if not chunk.choices:
+                    if last_meta:
+                        yield (
+                            "",
+                            SingleOutputMeta(
+                                input_tokens_count=input_tokens_count,
+                                generated_tokens_count=generated_tokens_count,
+                                reasoning_content="",
+                                finish_reason=last_meta.finish_reason,
+                            ),
+                        )
+                    continue
+                content = chunk.choices[0].delta.content or ""
+                reasoning_text = ""
+                if hasattr(chunk.choices[0].delta, "reasoning_content"):
+                    reasoning_text = chunk.choices[0].delta.reasoning_content or ""
+                last_meta = SingleOutputMeta(
+                    input_tokens_count=input_tokens_count,
+                    generated_tokens_count=generated_tokens_count,
+                    reasoning_content=reasoning_text,
+                    finish_reason=chunk.choices[0].finish_reason,
+                )
+                yield content, last_meta
+        else:
+            s = ""
+            all_reasoning_text = ""
+            for chunk in response:
+                if hasattr(chunk, "usage") and chunk.usage:
+                    input_tokens_count = chunk.usage.prompt_tokens
+                    generated_tokens_count = chunk.usage.completion_tokens
+                else:
+                    input_tokens_count = 0
+                    generated_tokens_count = 0
+                if not chunk.choices:
+                    if last_meta:
+                        yield (
+                            s,
+                            SingleOutputMeta(
+                                input_tokens_count=input_tokens_count,
+                                generated_tokens_count=generated_tokens_count,
+                                reasoning_content=all_reasoning_text,
+                                finish_reason=last_meta.finish_reason,
+                            ),
+                        )
+                    continue
+                content = chunk.choices[0].delta.content or ""
+                reasoning_text = ""
+                if hasattr(chunk.choices[0].delta, "reasoning_content"):
+                    reasoning_text = chunk.choices[0].delta.reasoning_content or ""
+                s += content
+                all_reasoning_text += reasoning_text
+                yield (
+                    s,
+                    SingleOutputMeta(
+                        input_tokens_count=input_tokens_count,
+                        generated_tokens_count=generated_tokens_count,
+                        reasoning_content=all_reasoning_text,
+                        finish_reason=chunk.choices[0].finish_reason,
+                    ),
+                )
     def chat_ai(self, conversations, model=None) -> LLMResponse:
         # conversations = [{"role": "user", "content": prompt_str}]  deepseek-chat
         if not model and not self.default_model_name:
@@ -129,4 +249,51 @@ class AutoLLM:
                 "model": res.model,
                 "created": res.created
             }
-        )
+        )
+def stream_chat_with_continue(
+        llm: AutoLLM, conversations: List[dict], llm_config: dict, args: AutoCoderArgs
+) -> Generator[Any, None, None]:
+    """ 流式处理并继续生成内容，直到完成 """
+    count = 0
+    temp_conversations = [] + conversations
+    current_metadata = None
+    metadatas = {}
+    while True:
+        # 使用流式接口获取生成内容
+        stream_generator = llm.stream_chat_ai_ex(
+            conversations=temp_conversations,
+            model=args.chat_model,
+            delta_mode=True,
+            llm_config={**llm_config}
+        )
+        current_content = ""
+        for res in stream_generator:
+            content = res[0]
+            current_content += content
+            if current_metadata is None:
+                current_metadata = res[1]
+                metadatas[count] = res[1]
+            else:
+                metadatas[count] = res[1]
+                current_metadata.finish_reason = res[1].finish_reason
+                current_metadata.reasoning_content = res[1].reasoning_content
+            # Yield 当前的 StreamChatWithContinueResult
+            current_metadata.generated_tokens_count = sum([v.generated_tokens_count for _, v in metadatas.items()])
+            current_metadata.input_tokens_count = sum([v.input_tokens_count for _, v in metadatas.items()])
+            yield content, current_metadata
+        # 更新对话历史
+        temp_conversations.append({"role": "assistant", "content": current_content})
+        # 检查是否需要继续生成
+        if current_metadata.finish_reason != "length" or count >= args.generate_max_rounds:
+            if count >= args.generate_max_rounds:
+                printer.print_text(f"LLM生成达到的最大次数, 当前次数:{count}, 最大次数: {args.generate_max_rounds}, "
+                                   f"Tokens: {current_metadata.generated_tokens_count}", style="yellow")
+            break
+        count += 1

autocoder_nano/llm_types.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import dataclasses
 from enum import Enum
-from typing import List, Dict, Any, Optional, Union, Tuple, Set
+from typing import List, Dict, Any, Optional, Union, Tuple, Set, Callable
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, SkipValidation
 class AutoCoderArgs(BaseModel):
@@ -18,6 +18,7 @@ class AutoCoderArgs(BaseModel):
     index_filter_level: Optional[int] = 0  # 用于查找相关文件的过滤级别
     index_filter_file_num: Optional[int] = -1  #
     index_filter_workers: Optional[int] = 1  # 过滤文件的线程数量
+    index_model_max_input_length: Optional[int] = 6000  # 模型最大输入长度
     filter_batch_size: Optional[int] = 5  #
     anti_quota_limit: Optional[int] = 1  # 请求模型时的间隔时间(s)
     skip_build_index: Optional[bool] = False  # 是否跳过索引构建(索引可以帮助您通过查询找到相关文件)
@@ -46,7 +47,7 @@ class AutoCoderArgs(BaseModel):
     full_text_ratio: Optional[float] = 0.7
     segment_ratio: Optional[float] = 0.2
     buff_ratio: Optional[float] = 0.1
-    required_exts: Optional[str] = None    # 指定处理的文件后缀,例如.pdf,.doc
+    required_exts: Optional[str] = None  # 指定处理的文件后缀,例如.pdf,.doc
     monitor_mode: bool = False  # 监控模式,会监控doc_dir目录中的文件变化
     enable_hybrid_index: bool = False  # 开启混合索引
     disable_auto_window: bool = False
@@ -57,26 +58,32 @@ class AutoCoderArgs(BaseModel):
     enable_rag_context: Optional[Union[bool, str]] = False
     disable_segment_reorder: bool = False
     disable_inference_enhance: bool = False
-    duckdb_vector_dim: Optional[int] = 1024    # DuckDB 向量化存储的维度
+    duckdb_vector_dim: Optional[int] = 1024  # DuckDB 向量化存储的维度
     duckdb_query_similarity: Optional[float] = 0.7  # DuckDB 向量化检索 相似度 阈值
-    duckdb_query_top_k: Optional[int] = 50    # DuckDB 向量化检索 返回 TopK个结果(且大于相似度)
+    duckdb_query_top_k: Optional[int] = 50  # DuckDB 向量化检索 返回 TopK个结果(且大于相似度)
     # Git 相关参数
     skip_commit: Optional[bool] = False
+    # Rules 相关参数
+    enable_rules: Optional[bool] = False
+    # Agent 相关参数
+    generate_max_rounds: Optional[int] = 5
     # 模型相关参数
     current_chat_model: Optional[str] = ""
     current_code_model: Optional[str] = ""
-    model: Optional[str] = ""    # 默认模型
-    chat_model: Optional[str] = ""    # AI Chat交互模型
-    index_model: Optional[str] = ""    # 代码索引生成模型
-    code_model: Optional[str] = ""    # 编码模型
-    commit_model: Optional[str] = ""    # Git Commit 模型
-    emb_model: Optional[str] = ""    # RAG Emb 模型
-    recall_model: Optional[str] = ""    # RAG 召回阶段模型
-    chunk_model: Optional[str] = ""    # 段落重排序模型
-    qa_model: Optional[str] = ""    # RAG 提问模型
-    vl_model: Optional[str] = ""    # 多模态模型
+    model: Optional[str] = ""  # 默认模型
+    chat_model: Optional[str] = ""  # AI Chat交互模型
+    index_model: Optional[str] = ""  # 代码索引生成模型
+    code_model: Optional[str] = ""  # 编码模型
+    commit_model: Optional[str] = ""  # Git Commit 模型
+    emb_model: Optional[str] = ""  # RAG Emb 模型
+    recall_model: Optional[str] = ""  # RAG 召回阶段模型
+    chunk_model: Optional[str] = ""  # 段落重排序模型
+    qa_model: Optional[str] = ""  # RAG 提问模型
+    vl_model: Optional[str] = ""  # 多模态模型
     class Config:
         protected_namespaces = ()
@@ -117,6 +124,14 @@ class SourceCode(BaseModel):
     metadata: Dict[str, Any] = Field(default_factory=dict)
+class SourceCodeList:
+    def __init__(self, sources: List[SourceCode]):
+        self.sources = sources
+    def to_str(self):
+        return "\n".join([f"##File: {source.module_name}\n{source.source_code}\n" for source in self.sources])
 class LLMRequest(BaseModel):
     model: str  # 指定使用的语言模型名称
     messages: List[Dict[str, str]]  # 包含对话消息的列表，每个消息是一个字典，包含 "role"（角色）和 "content"（内容）
@@ -138,6 +153,21 @@ class LLMResponse(BaseModel):
     )
+class SingleOutputMeta:
+    def __init__(self, input_tokens_count: int = 0,
+                 generated_tokens_count: int = 0,
+                 reasoning_content: str = "",
+                 finish_reason: str = "",
+                 first_token_time: float = 0.0,
+                 extra_info: Dict[str, Any] = {}):
+        self.input_tokens_count = input_tokens_count
+        self.generated_tokens_count = generated_tokens_count
+        self.reasoning_content = reasoning_content
+        self.finish_reason = finish_reason
+        self.first_token_time = first_token_time
+        self.extra_info = extra_info
 class IndexItem(BaseModel):
     module_name: str
     symbols: str
@@ -211,6 +241,23 @@ class Tag(BaseModel):
     end_tag: str
+class FileSystemModel(BaseModel):
+    project_root: str
+    get_all_file_names_in_project: SkipValidation[Callable]
+    get_all_file_in_project: SkipValidation[Callable]
+    get_all_dir_names_in_project: SkipValidation[Callable]
+    get_all_file_in_project_with_dot: SkipValidation[Callable]
+    get_symbol_list: SkipValidation[Callable]
+class MemoryConfig(BaseModel):
+    get_memory_func: SkipValidation[Callable]
+    save_memory_func: SkipValidation[Callable]
+    class Config:
+        arbitrary_types_allowed = True
 class SymbolItem(BaseModel):
     symbol_name: str
     symbol_type: SymbolType
@@ -269,4 +316,13 @@ class FileInfo(BaseModel):
     file_path: str
     relative_path: str
     modify_time: float
-    file_md5: str
+    file_md5: str
+class RuleFile(BaseModel):
+    """规则文件的Pydantic模型"""
+    description: str = Field(default="", description="规则的描述")
+    globs: List[str] = Field(default_factory=list, description="文件匹配模式列表")
+    always_apply: bool = Field(default=False, description="是否总是应用规则")
+    content: str = Field(default="", description="规则文件的正文内容")
+    file_path: str = Field(default="", description="规则文件的路径")

autocoder_nano/rules/rules_learn.py ADDED Viewed

@@ -0,0 +1,221 @@
+import os
+from typing import List, Tuple, Dict, Optional, Generator
+from autocoder_nano.git_utils import get_commit_changes
+from autocoder_nano.llm_client import AutoLLM
+from autocoder_nano.llm_prompt import prompt
+from autocoder_nano.llm_types import AutoCoderArgs, SourceCodeList
+from autocoder_nano.utils.printer_utils import Printer
+printer = Printer()
+class AutoRulesLearn:
+    def __init__(self, args: AutoCoderArgs, llm: AutoLLM):
+        self.args = args
+        self.llm = llm
+    @prompt()
+    def _analyze_commit_changes(
+        self, querie_with_urls_and_changes: List[Tuple[str, List[str], Dict[str, Tuple[str, str]]]]
+    ):
+        """
+        下面是用户一次提交的代码变更：
+        <changes>
+        {% for query,urls,changes in querie_with_urls_and_changes %}
+        ## 原始的任务需求
+        {{ query }}
+        修改的文件:
+        {% for url in urls %}
+        - {{ url }}
+        {% endfor %}
+        代码变更:
+        {% for file_path, (before, after) in changes.items() %}
+        ##File: {{ file_path }}
+        ##修改前:
+        {{ before or "New file" }}
+        ##File: {{ file_path }}
+        ##修改后:
+        {{ after or "File deleted" }}
+        {% endfor %}
+        {% endfor %}
+        </changes>
+        请对根据上面的代码变更进行深入分析，提取具有通用价值的功能模式和设计模式，转化为可在其他项目中复用的代码规则（rules）。
+        - 识别代码变更中具有普遍应用价值的功能点和模式
+        - 将这些功能点提炼为结构化规则，便于在其他项目中快速复用
+        - 生成清晰的使用示例，包含完整依赖和调用方式
+        最后，新生成的文件格式要是这种形态的：
+        <example_rules>
+        ---
+        description: [简明描述规则的功能，20字以内]
+        globs: [匹配应用此规则的文件路径，如"src/services/*.py"]
+        alwaysApply: [是否总是应用，通常为false]
+        ---
+        # [规则主标题]
+        ## 简要说明
+        [该规则的功能、适用场景和价值，100字以内]
+        ## 典型用法
+        ```python
+        # 完整的代码示例，包含:
+        # 1. 必要的import语句
+        # 2. 类/函数定义
+        # 3. 参数说明
+        # 4. 调用方式
+        # 5. 关键注释
+        ```
+        ## 依赖说明
+        - [必要的依赖库及版本]
+        - [环境要求]
+        - [初始化流程(如有)]
+        ## 学习来源
+        [从哪个提交变更的哪部分代码中提取的该功能点]
+        </example_rules>
+        """
+    @prompt()
+    def _analyze_modules(self, sources: SourceCodeList):
+        """
+        下面是用户提供的需要抽取规则的代码：
+        <files>
+        {% for source in sources.sources %}
+        ##File: {{ source.module_name }}
+        {{ source.source_code }}
+        {% endfor %}
+        </files>
+        请对对上面的代码进行深入分析，提取具有通用价值的功能模式和设计模式，转化为可在其他项目中复用的代码规则（rules）。
+        - 识别代码变更中具有普遍应用价值的功能点和模式
+        - 将这些功能点提炼为结构化规则，便于在其他项目中快速复用
+        - 生成清晰的使用示例，包含完整依赖和调用方式
+        最后，新生成的文件格式要是这种形态的：
+        <example_rules>
+        ---
+        description: [简明描述规则的功能，20字以内]
+        globs: [匹配应用此规则的文件路径，如"src/services/*.py"]
+        alwaysApply: [是否总是应用，通常为false]
+        ---
+        # [规则主标题]
+        ## 简要说明
+        [该规则的功能、适用场景和价值，100字以内]
+        ## 典型用法
+        ```python
+        # 完整的代码示例，包含:
+        # 1. 必要的import语句
+        # 2. 类/函数定义
+        # 3. 参数说明
+        # 4. 调用方式
+        # 5. 关键注释
+        ```
+        ## 依赖说明
+        - [必要的依赖库及版本]
+        - [环境要求]
+        - [初始化流程(如有)]
+        ## 学习来源
+        [从哪个提交变更的哪部分代码中提取的该功能点]
+        </example_rules>
+        """
+    def analyze_commit_changes(
+        self, commit_id: str, conversations=None
+    ) -> str:
+        """ 分析指定commit的代码变更 """
+        if conversations is None:
+            conversations = []
+        changes, _ = get_commit_changes(self.args.source_dir, commit_id)
+        if not changes:
+            printer.print_text("未发现代码变更(Commit)", style="yellow")
+            return ""
+        try:
+            # 获取prompt内容
+            prompt_content = self._analyze_commit_changes.prompt(
+                querie_with_urls_and_changes=changes
+            )
+            # 准备对话历史
+            if conversations:
+                new_conversations = conversations[:-1]
+            else:
+                new_conversations = []
+            new_conversations.append({"role": "user", "content": prompt_content})
+            self.llm.setup_default_model_name(self.args.chat_model)
+            v = self.llm.chat_ai(new_conversations, self.args.chat_model)
+            return v.output
+        except Exception as e:
+            printer.print_text(f"代码变更分析失败: {e}", style="red")
+            return ""
+    def analyze_modules(
+        self, sources: SourceCodeList, conversations=None
+    ) -> str:
+        """ 分析给定的模块文件，根据用户需求生成可复用功能点的总结。 """
+        if conversations is None:
+            conversations = []
+        if not sources or not sources.sources:
+            printer.print_text("没有提供有效的模块文件进行分析.", style="red")
+            return ""
+        try:
+            # 准备 Prompt
+            prompt_content = self._analyze_modules.prompt(
+                sources=sources
+            )
+            # 准备对话历史
+            # 如果提供了 conversations，我们假设最后一个是用户的原始查询，替换它
+            if conversations:
+                new_conversations = conversations[:-1]
+            else:
+                new_conversations = []
+            new_conversations.append({"role": "user", "content": prompt_content})
+            self.llm.setup_default_model_name(self.args.chat_model)
+            v = self.llm.chat_ai(new_conversations, self.args.chat_model)
+            return v.output
+        except Exception as e:
+            printer.print_text(f"代码模块分析失败: {e}", style="red")
+            return ""
+    def _get_index_file_content(self) -> str:
+        """获取索引文件内容"""
+        index_file_path = os.path.join(os.path.abspath(self.args.source_dir), ".autocoderrules", "index.md")
+        index_file_content = ""
+        try:
+            if os.path.exists(index_file_path):
+                with open(index_file_path, 'r', encoding='utf-8') as f:
+                    index_file_content = f.read()
+        except Exception as e:
+            printer.print_text(f"读取索引文件时出错: {str(e)}", style="yellow")
+        return index_file_content

autocoder_nano/templates.py CHANGED Viewed

@@ -99,7 +99,7 @@ def init_command_template(source_dir: str):
     ## 2. 查找0和1中的文件引用的相关文件
     ## 第一次建议使用0
     index_filter_level: 0
-    index_model_max_input_length: 30000
+    index_model_max_input_length: 100000
     ## 过滤文件的线程数量
     ## 如果您有一个大项目，可以增加这个数字

autocoder-nano 0.1.30__py3-none-any.whl → 0.1.34__py3-none-any.whl

autocoder-nano 0.1.30py3-none-any.whl → 0.1.34py3-none-any.whl