PyPI - auto-coder - Versions diffs - 0.1.353__py3-none-any.whl → 0.1.355__py3-none-any.whl - Mend

auto-coder 0.1.353py3-none-any.whl → 0.1.355py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (60) hide show

{auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/METADATA +1 -1
{auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/RECORD +60 -45
autocoder/agent/agentic_filter.py +1 -1
autocoder/auto_coder.py +8 -0
autocoder/auto_coder_rag.py +37 -1
autocoder/auto_coder_runner.py +58 -77
autocoder/chat/conf_command.py +270 -0
autocoder/chat/models_command.py +485 -0
autocoder/chat_auto_coder.py +29 -24
autocoder/chat_auto_coder_lang.py +26 -2
autocoder/commands/auto_command.py +60 -132
autocoder/commands/auto_web.py +1 -1
autocoder/commands/tools.py +1 -1
autocoder/common/__init__.py +3 -1
autocoder/common/command_completer.py +58 -12
autocoder/common/command_completer_v2.py +576 -0
autocoder/common/conversations/__init__.py +52 -0
autocoder/common/conversations/compatibility.py +303 -0
autocoder/common/conversations/conversation_manager.py +502 -0
autocoder/common/conversations/example.py +152 -0
autocoder/common/file_monitor/__init__.py +5 -0
autocoder/common/file_monitor/monitor.py +383 -0
autocoder/common/global_cancel.py +53 -16
autocoder/common/ignorefiles/__init__.py +4 -0
autocoder/common/ignorefiles/ignore_file_utils.py +103 -0
autocoder/common/ignorefiles/test_ignore_file_utils.py +91 -0
autocoder/common/rulefiles/__init__.py +15 -0
autocoder/common/rulefiles/autocoderrules_utils.py +173 -0
autocoder/common/save_formatted_log.py +54 -0
autocoder/common/v2/agent/agentic_edit.py +10 -39
autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +1 -1
autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +73 -43
autocoder/common/v2/code_agentic_editblock_manager.py +9 -9
autocoder/common/v2/code_diff_manager.py +2 -2
autocoder/common/v2/code_editblock_manager.py +31 -18
autocoder/common/v2/code_strict_diff_manager.py +3 -2
autocoder/dispacher/actions/action.py +6 -6
autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
autocoder/events/event_manager_singleton.py +1 -1
autocoder/index/index.py +3 -3
autocoder/models.py +22 -9
autocoder/rag/api_server.py +14 -2
autocoder/rag/cache/local_byzer_storage_cache.py +1 -1
autocoder/rag/cache/local_duckdb_storage_cache.py +8 -0
autocoder/rag/cache/simple_cache.py +63 -33
autocoder/rag/loaders/docx_loader.py +1 -1
autocoder/rag/loaders/filter_utils.py +133 -76
autocoder/rag/loaders/image_loader.py +15 -3
autocoder/rag/loaders/pdf_loader.py +2 -2
autocoder/rag/long_context_rag.py +11 -0
autocoder/rag/qa_conversation_strategy.py +5 -31
autocoder/rag/utils.py +21 -2
autocoder/utils/_markitdown.py +66 -25
autocoder/utils/auto_coder_utils/chat_stream_out.py +4 -4
autocoder/utils/thread_utils.py +9 -27
autocoder/version.py +1 -1
{auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/LICENSE +0 -0
{auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/WHEEL +0 -0
{auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/entry_points.txt +0 -0
{auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/top_level.txt +0 -0

autocoder/rag/cache/simple_cache.py CHANGED Viewed

@@ -24,6 +24,7 @@ from .failed_files_utils import load_failed_files, save_failed_files
 from autocoder.common import AutoCoderArgs
 from byzerllm import SimpleByzerLLM, ByzerLLM
 from autocoder.utils.llms import get_llm_names
+from autocoder.common.file_monitor.monitor import get_file_monitor, Change
 default_ignore_dirs = [
@@ -50,7 +51,7 @@ def generate_content_md5(content: Union[str, bytes]) -> str:
 class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
-    def __init__(self, path: str, ignore_spec, required_exts: list, update_interval: int = 5, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None):
+    def __init__(self, path: str, ignore_spec, required_exts: list, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None):
         """
         初始化异步更新队列，用于管理代码文件的缓存。
@@ -58,7 +59,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
             path: 需要索引的代码库根目录
             ignore_spec: 指定哪些文件/目录应被忽略的规则
             required_exts: 需要处理的文件扩展名列表
-            update_interval: 自动触发更新的时间间隔（秒），默认为5秒
+            args: AutoCoderArgs 对象，包含配置信息
+            llm: 用于代码分析的 LLM 实例
         缓存结构 (self.cache):
             self.cache 是一个字典，其结构如下:
@@ -99,7 +101,6 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
         self.args = args
         self.llm = llm
         self.product_mode = args.product_mode or "lite"
-        self.update_interval = update_interval
         self.queue = []
         self.cache = {}  # 初始化为空字典，稍后通过 read_cache() 填充
         self.lock = threading.Lock()
@@ -115,10 +116,16 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
         self.queue_thread.daemon = True
         self.queue_thread.start()
-        # 启动定时触发更新的线程
-        self.update_thread = threading.Thread(target=self._periodic_update)
-        self.update_thread.daemon = True
-        self.update_thread.start()
+        # 注册文件监控回调
+        self.file_monitor = get_file_monitor(self.path)
+        # 注册根目录的监控，这样可以捕获所有子目录和文件的变化
+        self.file_monitor.register(self.path, self._on_file_change)
+        # 确保监控器已启动
+        if not self.file_monitor.is_running():
+            self.file_monitor.start()
+            logger.info(f"Started file monitor for {self.path}")
+        else:
+            logger.info(f"File monitor already running for {self.path}")
         self.cache = self.read_cache()
@@ -130,37 +137,57 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
                 logger.error(f"Error in process_queue: {e}")
             time.sleep(1)  # 避免过于频繁的检查
-    def _periodic_update(self):
-        """定时触发文件更新检查"""
-        while not self.stop_event.is_set():
-            try:
-                logger.debug(
-                    f"Periodic update triggered (every {self.update_interval}s)")
-                # 如果没有被初始化过，不会增量触发
-                if not self.cache:
-                    time.sleep(self.update_interval)
-                    continue
-                self.trigger_update()
-            except Exception as e:
-                logger.error(f"Error in periodic update: {e}")
-            time.sleep(self.update_interval)
+    def _on_file_change(self, change_type: Change, file_path: str):
+        """
+        文件监控回调函数，当文件发生变化时触发更新
+        参数:
+            change_type: 变化类型 (Change.added, Change.modified, Change.deleted)
+            file_path: 发生变化的文件路径
+        """
+        try:
+            # 如果缓存还没有初始化，跳过触发
+            if not self.cache:
+                return
+            # 检查文件扩展名，如果不在需要处理的扩展名列表中，跳过
+            if self.required_exts and not any(file_path.endswith(ext) for ext in self.required_exts):
+                return
+            # 检查是否在忽略规则中
+            if self.ignore_spec and self.ignore_spec.match_file(os.path.relpath(file_path, self.path)):
+                return
+            logger.info(f"File change detected: {change_type} - {file_path}")
+            self.trigger_update()
+        except Exception as e:
+            logger.error(f"Error in file change handler: {e}")
+            logger.exception(e)
     def stop(self):
         self.stop_event.set()
-        self.queue_thread.join()
-        self.update_thread.join()
+        # 取消注册文件监控回调
+        try:
+            self.file_monitor.unregister(self.path, self._on_file_change)
+            logger.info(f"Unregistered file monitor callback for {self.path}")
+        except Exception as e:
+            logger.error(f"Error unregistering file monitor callback: {e}")
+        # 只等待队列处理线程结束
+        if hasattr(self, 'queue_thread') and self.queue_thread.is_alive():
+            self.queue_thread.join(timeout=2.0)
     def fileinfo_to_tuple(self, file_info: FileInfo) -> Tuple[str, str, float, str]:
         return (file_info.file_path, file_info.relative_path, file_info.modify_time, file_info.file_md5)
     def __del__(self):
+        # 确保在对象被销毁时停止监控并清理资源
         self.stop()
     def load_first(self):
         with self.lock:
             if self.cache:
                 return
-            files_to_process = []
+            files_to_process = []
             for file_info in self.get_all_files():
                 file_path, _, modify_time, file_md5 = file_info
                 if (
@@ -175,7 +202,7 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
             #     [process_file.remote(file_info) for file_info in files_to_process]
             # )
             from autocoder.rag.token_counter import initialize_tokenizer
-            llm_name = get_llm_names(self.llm)[0] if self.llm else None
+            llm_name = get_llm_names(self.llm)[0] if self.llm else None
             with Pool(
                 processes=os.cpu_count(),
                 initializer=initialize_tokenizer,
@@ -184,8 +211,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
                 worker_func = functools.partial(
                     process_file_in_multi_process, llm=llm_name, product_mode=self.product_mode)
-                results = pool.map(worker_func, files_to_process)
+                results = pool.map(worker_func, files_to_process)
             for file_info, result in zip(files_to_process, results):
                 if result:  # 只有当result不为空时才更新缓存
                     self.update_cache(file_info, result)
@@ -203,16 +230,15 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
             file_path, relative_path, modify_time, file_md5 = file_info
             current_files.add(file_path)
             # 如果文件曾经解析失败，跳过本次增量更新
-            if file_path in self.failed_files:
-                # logger.info(f"文件 {file_path} 之前解析失败，跳过此次更新")
+            if file_path in self.failed_files:
                 continue
-            # 变更检测
+            # 变更检测
             if (
                 file_path not in self.cache
                 or self.cache[file_path].get("md5", "") != file_md5
             ):
                 files_to_process.append(
-                    (file_path, relative_path, modify_time, file_md5))
+                    (file_path, relative_path, modify_time, file_md5))
         deleted_files = set(self.cache.keys()) - current_files
         logger.info(f"files_to_process: {files_to_process}")
@@ -289,6 +315,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
                 for line in f:
                     data = json.loads(line)
                     cache[data["file_path"]] = data
+        else:
+            self.load_first()
         return cache
     def write_cache(self):
@@ -366,6 +394,9 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
             dirs[:] = [d for d in dirs if not d.startswith(
                 ".") and d not in default_ignore_dirs]
+            # Filter out files that start with a dot
+            files[:] = [f for f in files if not f.startswith(".")]
             if self.ignore_spec:
                 relative_root = os.path.relpath(root, self.path)
                 dirs[:] = [
@@ -390,6 +421,5 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
                 modify_time = os.path.getmtime(file_path)
                 file_md5 = generate_file_md5(file_path)
                 all_files.append(
-                    (file_path, relative_path, modify_time, file_md5))
+                    (file_path, relative_path, modify_time, file_md5))
         return all_files

autocoder/rag/loaders/docx_loader.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from io import BytesIO
-from autocoder.utils._markitdown import MarkItDown
 import traceback
 def extract_text_from_docx_old(docx_path):
@@ -13,6 +12,7 @@ def extract_text_from_docx_old(docx_path):
 def extract_text_from_docx(docx_path):
     try:
+        from autocoder.utils._markitdown import MarkItDown
         md_converter = MarkItDown()
         result = md_converter.convert(docx_path)
         return result.text_content

autocoder/rag/loaders/filter_utils.py CHANGED Viewed

@@ -1,106 +1,163 @@
 import os
 import json
-from typing import Dict, Optional
+import threading
+from typing import Dict, Optional, List
 from loguru import logger
+from functools import lru_cache
 class FilterRuleManager:
     '''
+    单例模式的过滤规则管理器。支持按文件类型定义不同的过滤规则。
+    支持的规则格式：
     {
-        "whitelist": [
-            "glob:*.png",
-            "regex:^/tmp/.*hidden.*"
-        ],
-        "blacklist": [
-            "glob:*/private/*",
-            "regex:.*/secret/.*\\.jpg$"
-        ]
+        "image": {
+            "whitelist": ["*.png", "*.jpg"],
+            "blacklist": ["*/private/*"]
+        },
+        "document": {
+            "whitelist": ["*.pdf", "*.docx"],
+            "blacklist": ["*/tmp/*"]
+        },
+        "default": {
+            "whitelist": [],
+            "blacklist": ["*/node_modules/*", "*/.*"]
         }
-    '''
-    _cache_rules: Optional[Dict] = None
-    _cache_mtime: Optional[float] = None
-    def __init__(self, llm, source_dir: str):
-        """
-        初始化过滤规则管理器
-        参数:
-            llm: 大模型对象，当前未使用，预留
-            source_dir: 项目根目录路径
-        """
-        self.llm = llm
-        self.source_dir = source_dir
-        self.filter_rules_path = os.path.join(self.source_dir, ".cache", "filterrules")
+    }
+    '''
+    _instance = None
+    _lock = threading.RLock()  # 使用可重入锁避免死锁
+    def __new__(cls, *args, **kwargs):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:  # 双重检查锁定模式
+                    cls._instance = super(FilterRuleManager, cls).__new__(cls)
+                    cls._instance._initialized = False
+        return cls._instance
+    @classmethod
+    def get_instance(cls):
+        return cls()  # 直接调用__new__，不需要重复加锁
+    def __init__(self):
+        with self._lock:
+            if hasattr(self, '_initialized') and self._initialized:
+                return
+            self.source_dir = os.getcwd()
+            self.filter_rules_path = os.path.join(self.source_dir, ".cache", "filterrules")
+            self._cache_rules: Optional[Dict] = None
+            self._cache_mtime: Optional[float] = None
+            self._rule_lock = threading.RLock()  # 单独的锁用于规则访问
+            self._initialized = True
     def load_filter_rules(self) -> Dict:
-        try:
-            current_mtime = os.path.getmtime(self.filter_rules_path) if os.path.exists(self.filter_rules_path) else None
-        except Exception:
-            current_mtime = None
+        # 先检查是否需要重新加载，不持有锁
+        current_mtime = self._get_file_mtime()
         need_reload = False
-        # 如果缓存为空，或者文件已更新，触发重新加载
-        if FilterRuleManager._cache_rules is None:
+        if self._cache_rules is None:
             need_reload = True
-        elif current_mtime is not None and FilterRuleManager._cache_mtime != current_mtime:
+        elif current_mtime is not None and self._cache_mtime != current_mtime:
             need_reload = True
+        # 只在需要重新加载时获取锁
         if need_reload:
-            FilterRuleManager._cache_rules = {"whitelist": [], "blacklist": []}
-            try:
-                if os.path.exists(self.filter_rules_path):
-                    with open(self.filter_rules_path, "r", encoding="utf-8") as f:
-                        FilterRuleManager._cache_rules = json.load(f)
-                FilterRuleManager._cache_mtime = current_mtime
-            except Exception as e:
-                logger.warning(f"Failed to load filterrules: {e}")
-        return FilterRuleManager._cache_rules or {"whitelist": [], "blacklist": []}
+            with self._rule_lock:
+                # 双重检查，避免多线程重复加载
+                current_mtime = self._get_file_mtime()
+                if self._cache_rules is None or (current_mtime is not None and self._cache_mtime != current_mtime):
+                    self._load_rules_from_file(current_mtime)
+        # 返回规则副本，避免外部修改影响缓存
+        with self._rule_lock:
+            return self._cache_rules.copy() if self._cache_rules else self._get_default_rules()
+    def _get_file_mtime(self) -> Optional[float]:
+        """获取文件修改时间，与IO相关的操作单独提取出来"""
+        try:
+            return os.path.getmtime(self.filter_rules_path) if os.path.exists(self.filter_rules_path) else None
+        except Exception:
+            logger.warning(f"Failed to get mtime for {self.filter_rules_path}")
+            return None
+    def _get_default_rules(self) -> Dict:
+        """返回默认的规则结构"""
+        return {
+            "default": {
+                "whitelist": [],
+                "blacklist": []
+            }
+        }
-    def should_parse_image(self, file_path: str) -> bool:
+    def _load_rules_from_file(self, current_mtime: Optional[float]) -> None:
+        """从文件加载规则，仅在持有锁时调用"""
+        self._cache_rules = self._get_default_rules()
+        try:
+            if os.path.exists(self.filter_rules_path):
+                with open(self.filter_rules_path, "r", encoding="utf-8") as f:
+                    file_rules = json.load(f)
+                    # 转换旧格式规则到新格式（如果需要）
+                    if "whitelist" in file_rules or "blacklist" in file_rules:
+                        # 旧格式转换为新格式
+                        self._cache_rules = {
+                            "default": {
+                                "whitelist": file_rules.get("whitelist", []),
+                                "blacklist": file_rules.get("blacklist", [])
+                            }
+                        }
+                        logger.info("Converted old format rules to new format")
+                    else:
+                        # 新格式直接使用
+                        self._cache_rules = file_rules
+            self._cache_mtime = current_mtime
+        except Exception as e:
+            logger.warning(f"Failed to load filterrules: {e}")
+    @lru_cache(maxsize=1024)  # 缓存频繁使用的路径判断结果
+    def should_parse_file(self, file_path: str, file_type: str = "default") -> bool:
         """
-        判断某个文件是否需要对图片进行解析。
-        支持规则格式：
-        - glob通配符匹配，示例："glob:*.png" 或 "*.png"
-        - 正则表达式匹配，示例："regex:^/tmp/.*hidden.*"
+        判断某个文件是否需要进行解析。
+        参数:
+            file_path: 文件路径
+            file_type: 文件类型（如"image"、"document"等），默认为"default"
         返回:
             True 表示应该解析
             False 表示不解析
         """
         import fnmatch
-        import re
         rules = self.load_filter_rules()
-        whitelist = rules.get("whitelist", [])
-        blacklist = rules.get("blacklist", [])
-        def match_pattern(pattern: str, path: str) -> bool:
-            if pattern.startswith("glob:"):
-                pat = pattern[len("glob:"):]
-                return fnmatch.fnmatch(path, pat)
-            elif pattern.startswith("regex:"):
-                pat = pattern[len("regex:"):]
-                try:
-                    return re.search(pat, path) is not None
-                except re.error:
-                    logger.warning(f"Invalid regex pattern: {pat}")
-                    return False
-            else:
-                # 默认按glob处理
-                return fnmatch.fnmatch(path, pattern)
+        # 获取指定类型的规则，如果不存在则使用默认规则
+        type_rules = rules.get(file_type, rules.get("default", {"whitelist": [], "blacklist": []}))
+        whitelist = type_rules.get("whitelist", [])
+        blacklist = type_rules.get("blacklist", [])
         # 优先匹配黑名单
         for pattern in blacklist:
-            if match_pattern(pattern, file_path):
+            if fnmatch.fnmatch(file_path, pattern):
                 return False
-        # 再匹配白名单
+        # 如果白名单为空，则默认所有文件都通过（除非被黑名单过滤）
+        if not whitelist:
+            return True
+        # 匹配白名单
         for pattern in whitelist:
-            if match_pattern(pattern, file_path):
+            if fnmatch.fnmatch(file_path, pattern):
                 return True
-        # 默认不解析
+        # 有白名单但不匹配，不通过
         return False
+    # 保持向后兼容
+    def should_parse_image(self, file_path: str) -> bool:
+        """
+        判断某个图片文件是否需要解析（兼容旧版API）
+        """
+        return self.should_parse_file(file_path, "image")

autocoder/rag/loaders/image_loader.py CHANGED Viewed

@@ -538,7 +538,7 @@ class ImageLoader:
     def image_to_markdown(
         image_path: str,
         llm,
-        engine: str = "vl",
+        engine: str = "paddle",
         product_mode: str = "lite",
         paddle_kwargs: dict = None
     ) -> str:
@@ -554,6 +554,13 @@ class ImageLoader:
         Returns:
             markdown内容字符串
         """
+        logger.info(f"image_path: {image_path} engine: {engine} product_mode: {product_mode} paddle_kwargs: {paddle_kwargs}")
+        # 新增：如果 engine 为 paddle 且 PaddleOCR 为 None，直接返回空字符串
+        if engine == "paddle" and PaddleOCR is None:
+            logger.warning("PaddleOCR 未安装，无法识别图片内容，直接返回空字符串。")
+            return ""
         md_content = ImageLoader.extract_text_from_image(
             image_path,
             llm,
@@ -561,8 +568,13 @@ class ImageLoader:
             product_mode=product_mode,
             paddle_kwargs=paddle_kwargs
         )
-        md_path = os.path.splitext(image_path)[0] + ".md"
+        # Get directory and filename separately
+        dir_name = os.path.dirname(image_path)
+        file_name = os.path.basename(image_path)
+        base_name = os.path.splitext(file_name)[0]
+        # Create new path with dot before filename
+        md_path = os.path.join(dir_name, f".{base_name}.md")
         try:
             with open(md_path, "w", encoding="utf-8") as f:
                 f.write(md_content)

autocoder/rag/loaders/pdf_loader.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from io import BytesIO
 from pypdf import PdfReader
-from autocoder.utils._markitdown import MarkItDown
 import traceback
@@ -15,7 +14,8 @@ def extract_text_from_pdf_old(file_path):
     return text
 def extract_text_from_pdf(file_path, llm=None, product_mode="lite"):
-    try:
+    try:
+        from autocoder.utils._markitdown import MarkItDown
         md_converter = MarkItDown(llm=llm, product_mode=product_mode)
         result = md_converter.convert(file_path)
         return result.text_content

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -41,6 +41,8 @@ from autocoder.rag.qa_conversation_strategy import get_qa_strategy
 from autocoder.rag.searchable import SearchableResults
 from autocoder.rag.conversation_to_queries import extract_search_queries
 from autocoder.common import openai_content as OpenAIContentProcessor
+from autocoder.common.save_formatted_log import save_formatted_log
+import json, os
 try:
     from autocoder_pro.rag.llm_compute import LLMComputeEngine
     pro_version = version("auto-coder-pro")
@@ -849,6 +851,15 @@ class LongContextRAG:
                     conversations=conversations, local_image_host=self.args.local_image_host
                 )
+                # 保存 new_conversations
+                try:
+                    logger.info(f"Saving new_conversations log to {self.args.source_dir}/.cache/logs")
+                    project_root = self.args.source_dir
+                    json_text = json.dumps(new_conversations, ensure_ascii=False)
+                    save_formatted_log(project_root, json_text, "rag_conversation")
+                except Exception as e:
+                    logger.warning(f"Failed to save new_conversations log: {e}")
                 chunks = target_llm.stream_chat_oai(
                     conversations=new_conversations,
                     model=model,

autocoder/rag/qa_conversation_strategy.py CHANGED Viewed

@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
 from typing import List, Dict, Any, Generator
 import byzerllm
 from autocoder.common import AutoCoderArgs
+from autocoder.common.rulefiles.autocoderrules_utils import get_rules
 class QAConversationStrategy(ABC):
     """
@@ -124,22 +125,8 @@ class MultiRoundStrategy(QAConversationStrategy):
         {% endfor %}
         {% endif %}
-        """
-        import os
-        extra_docs = {}
-        rules_dir = os.path.join(self.args.source_dir, ".autocoderrules")
-        if os.path.isdir(rules_dir):
-            for fname in os.listdir(rules_dir):
-                if fname.endswith(".md"):
-                    fpath = os.path.join(rules_dir, fname)
-                    try:
-                        with open(fpath, "r", encoding="utf-8") as f:
-                            content = f.read()
-                            key = os.path.splitext(fname)[0]
-                            extra_docs[key] = content
-                    except Exception:
-                        continue
+        """
+        extra_docs = get_rules()
         return {"extra_docs": extra_docs}
 class SingleRoundStrategy(QAConversationStrategy):
@@ -253,21 +240,8 @@ class SingleRoundStrategy(QAConversationStrategy):
         {% endfor %}
         {% endif %}
-        """
-        import os
-        extra_docs = {}
-        rules_dir = os.path.join(getattr(self, 'args', None).source_dir if getattr(self, 'args', None) else ".", ".autocoderrules")
-        if os.path.isdir(rules_dir):
-            for fname in os.listdir(rules_dir):
-                if fname.endswith(".md"):
-                    fpath = os.path.join(rules_dir, fname)
-                    try:
-                        with open(fpath, "r", encoding="utf-8") as f:
-                            content = f.read()
-                            key = os.path.splitext(fname)[0]
-                            extra_docs[key] = content
-                    except Exception:
-                        continue
+        """
+        extra_docs = extra_docs = get_rules()
         return {"extra_docs": extra_docs}
 def get_qa_strategy(args: AutoCoderArgs) -> QAConversationStrategy:

autocoder/rag/utils.py CHANGED Viewed

@@ -2,8 +2,9 @@ from autocoder.common import SourceCode
 from autocoder.rag.token_counter import count_tokens_worker, count_tokens
 from autocoder.rag.loaders.pdf_loader import extract_text_from_pdf
 from autocoder.rag.loaders.docx_loader import extract_text_from_docx
-from autocoder.rag.loaders.excel_loader import extract_text_from_excel
+from autocoder.rag.loaders.excel_loader import extract_text_from_excel
 from autocoder.rag.loaders.ppt_loader import extract_text_from_ppt
+from autocoder.rag.loaders.image_loader import ImageLoader
 from typing import List, Tuple, Optional, Union
 import time
 from loguru import logger
@@ -21,7 +22,7 @@ def process_file_in_multi_process(
         llm = get_single_llm(llm,product_mode)
     start_time = time.time()
-    file_path, relative_path, _, _ = file_info
+    file_path, relative_path, _, _ = file_info
     try:
         if file_path.endswith(".pdf"):
             content = extract_text_from_pdf(file_path, llm, product_mode)
@@ -61,6 +62,15 @@ def process_file_in_multi_process(
                     tokens=count_tokens_worker(content),
                 )
             ]
+        elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):
+            content = ImageLoader.image_to_markdown(file_path, llm=llm, product_mode=product_mode)
+            v = [
+                SourceCode(
+                    module_name=f"##File: {file_path}",
+                    source_code=content,
+                    tokens=count_tokens_worker(content),
+                )
+            ]
         else:
             with open(file_path, "r", encoding="utf-8") as f:
                 content = f.read()
@@ -126,6 +136,15 @@ def process_file_local(
                     tokens=count_tokens(content),
                 )
             ]
+        elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):
+            content = ImageLoader.image_to_markdown(file_path, llm=llm, product_mode=product_mode)
+            v = [
+                SourceCode(
+                    module_name=f"##File: {file_path}",
+                    source_code=content,
+                    tokens=count_tokens(content),
+                )
+            ]
         else:
             with open(file_path, "r", encoding="utf-8") as f:
                 content = f.read()

auto-coder 0.1.353__py3-none-any.whl → 0.1.355__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.353py3-none-any.whl → 0.1.355py3-none-any.whl