auto-coder 0.1.347__py3-none-any.whl → 0.1.349__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.347.dist-info → auto_coder-0.1.349.dist-info}/METADATA +1 -1
- {auto_coder-0.1.347.dist-info → auto_coder-0.1.349.dist-info}/RECORD +37 -27
- autocoder/auto_coder_runner.py +19 -14
- autocoder/chat_auto_coder_lang.py +5 -3
- autocoder/common/auto_coder_lang.py +3 -3
- autocoder/common/model_speed_tester.py +392 -0
- autocoder/common/printer.py +7 -8
- autocoder/common/run_cmd.py +247 -0
- autocoder/common/test_run_cmd.py +110 -0
- autocoder/common/v2/agent/agentic_edit.py +82 -29
- autocoder/common/v2/agent/agentic_edit_conversation.py +9 -0
- autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +21 -36
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +4 -7
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +2 -5
- autocoder/helper/rag_doc_creator.py +141 -0
- autocoder/ignorefiles/__init__.py +4 -0
- autocoder/ignorefiles/ignore_file_utils.py +63 -0
- autocoder/ignorefiles/test_ignore_file_utils.py +91 -0
- autocoder/models.py +49 -9
- autocoder/plugins/__init__.py +20 -0
- autocoder/rag/cache/byzer_storage_cache.py +10 -4
- autocoder/rag/cache/file_monitor_cache.py +27 -24
- autocoder/rag/cache/local_byzer_storage_cache.py +11 -5
- autocoder/rag/cache/local_duckdb_storage_cache.py +203 -128
- autocoder/rag/cache/simple_cache.py +56 -37
- autocoder/rag/loaders/filter_utils.py +106 -0
- autocoder/rag/loaders/image_loader.py +573 -0
- autocoder/rag/loaders/pdf_loader.py +3 -3
- autocoder/rag/loaders/test_image_loader.py +209 -0
- autocoder/rag/qa_conversation_strategy.py +3 -5
- autocoder/rag/utils.py +20 -9
- autocoder/utils/_markitdown.py +35 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.347.dist-info → auto_coder-0.1.349.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.347.dist-info → auto_coder-0.1.349.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.347.dist-info → auto_coder-0.1.349.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.347.dist-info → auto_coder-0.1.349.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from threading import Lock
|
|
5
|
+
import pathspec
|
|
6
|
+
|
|
7
|
+
DEFAULT_EXCLUDES = [
|
|
8
|
+
'.git', '.auto-coder', 'node_modules', '.mvn', '.idea',
|
|
9
|
+
'__pycache__', '.venv', 'venv', 'dist', 'build', '.gradle',".next"
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class IgnoreFileManager:
|
|
14
|
+
_instance = None
|
|
15
|
+
_lock = Lock()
|
|
16
|
+
|
|
17
|
+
def __new__(cls):
|
|
18
|
+
if not cls._instance:
|
|
19
|
+
with cls._lock:
|
|
20
|
+
if not cls._instance:
|
|
21
|
+
cls._instance = super(IgnoreFileManager, cls).__new__(cls)
|
|
22
|
+
cls._instance._initialized = False
|
|
23
|
+
return cls._instance
|
|
24
|
+
|
|
25
|
+
def __init__(self):
|
|
26
|
+
if self._initialized:
|
|
27
|
+
return
|
|
28
|
+
self._initialized = True
|
|
29
|
+
self._spec = None
|
|
30
|
+
self._load_ignore_spec()
|
|
31
|
+
|
|
32
|
+
def _load_ignore_spec(self):
|
|
33
|
+
ignore_patterns = []
|
|
34
|
+
project_root = Path(os.getcwd())
|
|
35
|
+
|
|
36
|
+
ignore_file_paths = [
|
|
37
|
+
project_root / '.autocoderignore',
|
|
38
|
+
project_root / '.auto-coder' / '.autocoderignore'
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
for ignore_file in ignore_file_paths:
|
|
42
|
+
if ignore_file.is_file():
|
|
43
|
+
with open(ignore_file, 'r', encoding='utf-8') as f:
|
|
44
|
+
ignore_patterns = f.read().splitlines()
|
|
45
|
+
break
|
|
46
|
+
|
|
47
|
+
# 添加默认排除目录
|
|
48
|
+
ignore_patterns.extend(DEFAULT_EXCLUDES)
|
|
49
|
+
|
|
50
|
+
self._spec = pathspec.PathSpec.from_lines('gitwildmatch', ignore_patterns)
|
|
51
|
+
|
|
52
|
+
def should_ignore(self, path: str) -> bool:
|
|
53
|
+
rel_path = os.path.relpath(path, os.getcwd())
|
|
54
|
+
# 标准化分隔符
|
|
55
|
+
rel_path = rel_path.replace(os.sep, '/')
|
|
56
|
+
return self._spec.match_file(rel_path)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# 对外提供单例
|
|
60
|
+
_ignore_manager = IgnoreFileManager()
|
|
61
|
+
|
|
62
|
+
def should_ignore(path: str) -> bool:
|
|
63
|
+
return _ignore_manager.should_ignore(path)
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import pytest
|
|
7
|
+
|
|
8
|
+
from src.autocoder.ignorefiles import ignore_file_utils
|
|
9
|
+
|
|
10
|
+
@pytest.fixture(autouse=True)
|
|
11
|
+
def cleanup_ignore_manager(monkeypatch):
|
|
12
|
+
"""
|
|
13
|
+
在每个测试前后清理 IgnoreFileManager 的单例状态,保证测试隔离
|
|
14
|
+
"""
|
|
15
|
+
# 备份原始实例
|
|
16
|
+
original_instance = ignore_file_utils._ignore_manager
|
|
17
|
+
# 强制重新加载忽略规则
|
|
18
|
+
def reset_ignore_manager():
|
|
19
|
+
ignore_file_utils.IgnoreFileManager._instance = None
|
|
20
|
+
return ignore_file_utils.IgnoreFileManager()
|
|
21
|
+
|
|
22
|
+
monkeypatch.setattr(ignore_file_utils, "_ignore_manager", reset_ignore_manager())
|
|
23
|
+
yield
|
|
24
|
+
# 恢复原始实例
|
|
25
|
+
ignore_file_utils._ignore_manager = original_instance
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_default_excludes(tmp_path, monkeypatch):
|
|
29
|
+
# 切换当前工作目录
|
|
30
|
+
monkeypatch.chdir(tmp_path)
|
|
31
|
+
|
|
32
|
+
# 不创建任何 .autocoderignore 文件,使用默认排除规则
|
|
33
|
+
# 创建默认排除目录
|
|
34
|
+
for dirname in ignore_file_utils.DEFAULT_EXCLUDES:
|
|
35
|
+
(tmp_path / dirname).mkdir(parents=True, exist_ok=True)
|
|
36
|
+
# 应该被忽略
|
|
37
|
+
assert ignore_file_utils.should_ignore(str(tmp_path / dirname)) is True
|
|
38
|
+
|
|
39
|
+
# 创建不会被忽略的文件
|
|
40
|
+
normal_file = tmp_path / "myfile.txt"
|
|
41
|
+
normal_file.write_text("hello")
|
|
42
|
+
assert ignore_file_utils.should_ignore(str(normal_file)) is False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_custom_ignore_file(tmp_path, monkeypatch):
|
|
46
|
+
monkeypatch.chdir(tmp_path)
|
|
47
|
+
|
|
48
|
+
# 创建自定义忽略文件
|
|
49
|
+
ignore_file = tmp_path / ".autocoderignore"
|
|
50
|
+
ignore_file.write_text("data/**\nsecret.txt")
|
|
51
|
+
|
|
52
|
+
# 重新初始化忽略管理器以加载新规则
|
|
53
|
+
ignore_file_utils.IgnoreFileManager._instance = None
|
|
54
|
+
ignore_file_utils._ignore_manager = ignore_file_utils.IgnoreFileManager()
|
|
55
|
+
|
|
56
|
+
# 符合忽略规则的路径
|
|
57
|
+
ignored_dir = tmp_path / "data" / "subdir"
|
|
58
|
+
ignored_dir.mkdir(parents=True)
|
|
59
|
+
ignored_file = tmp_path / "secret.txt"
|
|
60
|
+
ignored_file.write_text("secret")
|
|
61
|
+
|
|
62
|
+
assert ignore_file_utils.should_ignore(str(ignored_dir)) is True
|
|
63
|
+
assert ignore_file_utils.should_ignore(str(ignored_file)) is True
|
|
64
|
+
|
|
65
|
+
# 不应被忽略的文件
|
|
66
|
+
normal_file = tmp_path / "keepme.txt"
|
|
67
|
+
normal_file.write_text("keep me")
|
|
68
|
+
assert ignore_file_utils.should_ignore(str(normal_file)) is False
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_nested_ignore_file(tmp_path, monkeypatch):
|
|
72
|
+
monkeypatch.chdir(tmp_path)
|
|
73
|
+
|
|
74
|
+
# 没有根目录的.ignore,创建.auto-coder/.autocoderignore
|
|
75
|
+
nested_dir = tmp_path / ".auto-coder"
|
|
76
|
+
nested_dir.mkdir()
|
|
77
|
+
|
|
78
|
+
ignore_file = nested_dir / ".autocoderignore"
|
|
79
|
+
ignore_file.write_text("logs/**")
|
|
80
|
+
|
|
81
|
+
# 重新初始化忽略管理器以加载新规则
|
|
82
|
+
ignore_file_utils.IgnoreFileManager._instance = None
|
|
83
|
+
ignore_file_utils._ignore_manager = ignore_file_utils.IgnoreFileManager()
|
|
84
|
+
|
|
85
|
+
ignored_dir = tmp_path / "logs" / "2024"
|
|
86
|
+
ignored_dir.mkdir(parents=True)
|
|
87
|
+
assert ignore_file_utils.should_ignore(str(ignored_dir)) is True
|
|
88
|
+
|
|
89
|
+
normal_file = tmp_path / "main.py"
|
|
90
|
+
normal_file.write_text("# main")
|
|
91
|
+
assert ignore_file_utils.should_ignore(str(normal_file)) is False
|
autocoder/models.py
CHANGED
|
@@ -8,7 +8,7 @@ MODELS_JSON = os.path.expanduser("~/.auto-coder/keys/models.json")
|
|
|
8
8
|
# Default built-in models
|
|
9
9
|
default_models_list = [
|
|
10
10
|
{
|
|
11
|
-
"name": "
|
|
11
|
+
"name": "deepseek/r1",
|
|
12
12
|
"description": "DeepSeek Reasoner is for design/review",
|
|
13
13
|
"model_name": "deepseek-reasoner",
|
|
14
14
|
"model_type": "saas/openai",
|
|
@@ -21,7 +21,7 @@ default_models_list = [
|
|
|
21
21
|
"max_output_tokens": 8096
|
|
22
22
|
},
|
|
23
23
|
{
|
|
24
|
-
"name": "
|
|
24
|
+
"name": "deepseek/v3",
|
|
25
25
|
"description": "DeepSeek Chat is for coding",
|
|
26
26
|
"model_name": "deepseek-chat",
|
|
27
27
|
"model_type": "saas/openai",
|
|
@@ -34,16 +34,56 @@ default_models_list = [
|
|
|
34
34
|
"max_output_tokens": 8096
|
|
35
35
|
},
|
|
36
36
|
{
|
|
37
|
-
"name":"
|
|
38
|
-
"description": "
|
|
39
|
-
"model_name": "
|
|
37
|
+
"name": "ark/deepseek-v3-250324",
|
|
38
|
+
"description": "DeepSeek Chat is for coding",
|
|
39
|
+
"model_name": "deepseek-v3-250324",
|
|
40
40
|
"model_type": "saas/openai",
|
|
41
|
-
"base_url": "https://
|
|
41
|
+
"base_url": "https://ark.cn-beijing.volces.com/api/v3",
|
|
42
42
|
"api_key_path": "",
|
|
43
|
-
"is_reasoning":
|
|
43
|
+
"is_reasoning": False,
|
|
44
|
+
"input_price": 2.0,
|
|
45
|
+
"output_price": 8.0,
|
|
46
|
+
"average_speed": 0.0,
|
|
47
|
+
"max_output_tokens": 8096
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"name": "ark/deepseek-v3-250324",
|
|
51
|
+
"description": "DeepSeek Chat is for coding",
|
|
52
|
+
"model_name": "deepseek-v3-250324",
|
|
53
|
+
"model_type": "saas/openai",
|
|
54
|
+
"base_url": "https://ark.cn-beijing.volces.com/api/v3",
|
|
55
|
+
"api_key_path": "",
|
|
56
|
+
"is_reasoning": False,
|
|
57
|
+
"input_price": 2.0,
|
|
58
|
+
"output_price": 8.0,
|
|
59
|
+
"average_speed": 0.0,
|
|
60
|
+
"max_output_tokens": 8096
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"name": "openrouter/quasar-alpha",
|
|
64
|
+
"description": "",
|
|
65
|
+
"model_name": "openrouter/quasar-alpha",
|
|
66
|
+
"model_type": "saas/openai",
|
|
67
|
+
"base_url": "https://openrouter.ai/api/v1",
|
|
68
|
+
"api_key_path": "",
|
|
69
|
+
"is_reasoning": False,
|
|
44
70
|
"input_price": 0.0,
|
|
45
71
|
"output_price": 0.0,
|
|
46
|
-
"average_speed": 0.0
|
|
72
|
+
"average_speed": 0.0,
|
|
73
|
+
"max_output_tokens": 8096*2
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"name": "openrouter/google/gemini-2.5-pro-preview-03-25",
|
|
77
|
+
"description": "",
|
|
78
|
+
"model_name": "google/gemini-2.5-pro-preview-03-25",
|
|
79
|
+
"model_type": "saas/openai",
|
|
80
|
+
"base_url": "https://openrouter.ai/api/v1",
|
|
81
|
+
"api_key_path": "",
|
|
82
|
+
"is_reasoning": False,
|
|
83
|
+
"input_price": 0.0,
|
|
84
|
+
"output_price": 0.0,
|
|
85
|
+
"average_speed": 0.0,
|
|
86
|
+
"max_output_tokens": 8096*2
|
|
47
87
|
}
|
|
48
88
|
]
|
|
49
89
|
|
|
@@ -263,7 +303,7 @@ def update_model_with_api_key(name: str, api_key: str) -> Dict:
|
|
|
263
303
|
if not found_model:
|
|
264
304
|
return None
|
|
265
305
|
|
|
266
|
-
api_key_path = name
|
|
306
|
+
api_key_path = name.replace("/", "_") # 替换 / 为 _,保证文件名合法
|
|
267
307
|
if api_key_path:
|
|
268
308
|
found_model["api_key_path"] = api_key_path
|
|
269
309
|
|
autocoder/plugins/__init__.py
CHANGED
|
@@ -256,6 +256,7 @@ class PluginManager:
|
|
|
256
256
|
"/plugins/dirs /remove",
|
|
257
257
|
"/plugins/dirs /clear",
|
|
258
258
|
]
|
|
259
|
+
self._wrapped_functions: Dict[str, Callable] = {}
|
|
259
260
|
|
|
260
261
|
@property
|
|
261
262
|
def cached_discover_plugins(self) -> List[Type[Plugin]]:
|
|
@@ -561,6 +562,7 @@ class PluginManager:
|
|
|
561
562
|
return result
|
|
562
563
|
return None
|
|
563
564
|
|
|
565
|
+
self._wrapped_functions[func_name] = wrapped
|
|
564
566
|
return wrapped
|
|
565
567
|
|
|
566
568
|
def register_function_interception(self, plugin_name: str, func_name: str) -> None:
|
|
@@ -1107,6 +1109,24 @@ class PluginManager:
|
|
|
1107
1109
|
|
|
1108
1110
|
return processed_completions
|
|
1109
1111
|
|
|
1112
|
+
def get_wrapped_functions(self) -> Dict[str, Callable]:
|
|
1113
|
+
"""获取所有已包装的函数。
|
|
1114
|
+
|
|
1115
|
+
Returns:
|
|
1116
|
+
Dict[str, Callable]: 包含所有已包装函数的字典,键为函数名,值为包装后的函数
|
|
1117
|
+
"""
|
|
1118
|
+
return self._wrapped_functions
|
|
1119
|
+
|
|
1120
|
+
def get_wrapped_function(self, func_name: str) -> Callable:
|
|
1121
|
+
"""获取已包装的函数。
|
|
1122
|
+
|
|
1123
|
+
Args:
|
|
1124
|
+
func_name: 函数名
|
|
1125
|
+
|
|
1126
|
+
Returns:
|
|
1127
|
+
已包装的函数,如果未找到则返回 None
|
|
1128
|
+
"""
|
|
1129
|
+
return self._wrapped_functions.get(func_name)
|
|
1110
1130
|
|
|
1111
1131
|
def register_global_plugin_dir(plugin_dir: str) -> None:
|
|
1112
1132
|
"""注册一个全局插件目录。
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from token import OP
|
|
1
2
|
from autocoder.rag.cache.base_cache import (
|
|
2
3
|
BaseCacheManager,
|
|
3
4
|
DeleteEvent,
|
|
@@ -31,6 +32,8 @@ from pydantic import BaseModel
|
|
|
31
32
|
from autocoder.rag.cache.cache_result_merge import CacheResultMerger, MergeStrategy
|
|
32
33
|
from .failed_files_utils import save_failed_files, load_failed_files
|
|
33
34
|
import time
|
|
35
|
+
from byzerllm import ByzerLLM, SimpleByzerLLM
|
|
36
|
+
from autocoder.utils.llms import get_llm_names
|
|
34
37
|
|
|
35
38
|
if platform.system() != "Windows":
|
|
36
39
|
import fcntl
|
|
@@ -66,8 +69,8 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
66
69
|
ignore_spec,
|
|
67
70
|
required_exts,
|
|
68
71
|
extra_params: Optional[AutoCoderArgs] = None,
|
|
69
|
-
args=None,
|
|
70
|
-
llm=None,
|
|
72
|
+
args:Optional[AutoCoderArgs]=None,
|
|
73
|
+
llm:Optional[Union[ByzerLLM,SimpleByzerLLM,str]]=None,
|
|
71
74
|
):
|
|
72
75
|
"""
|
|
73
76
|
初始化基于云端 Byzer Storage 的 RAG 缓存管理器。
|
|
@@ -78,6 +81,7 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
78
81
|
self.extra_params = extra_params
|
|
79
82
|
self.args = args
|
|
80
83
|
self.llm = llm
|
|
84
|
+
self.product_mode = self.args.product_mode
|
|
81
85
|
self.rag_build_name = extra_params.rag_build_name
|
|
82
86
|
self.storage = ByzerStorage("byzerai_store", "rag", self.rag_build_name)
|
|
83
87
|
self.queue = []
|
|
@@ -218,6 +222,8 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
218
222
|
from autocoder.rag.token_counter import initialize_tokenizer
|
|
219
223
|
|
|
220
224
|
logger.info("[BUILD CACHE] Starting parallel file processing...")
|
|
225
|
+
llm_name = get_llm_names(self.llm)[0] if self.llm else None
|
|
226
|
+
product_mode = self.product_mode
|
|
221
227
|
start_time = time.time()
|
|
222
228
|
with Pool(
|
|
223
229
|
processes=os.cpu_count(),
|
|
@@ -227,7 +233,7 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
227
233
|
target_files_to_process = []
|
|
228
234
|
for file_info in files_to_process:
|
|
229
235
|
target_files_to_process.append(self.fileinfo_to_tuple(file_info))
|
|
230
|
-
results = pool.map(process_file_in_multi_process, target_files_to_process)
|
|
236
|
+
results = pool.map(process_file_in_multi_process, target_files_to_process, llm=llm_name, product_mode=product_mode)
|
|
231
237
|
processing_time = time.time() - start_time
|
|
232
238
|
logger.info(f"[BUILD CACHE] File processing completed, time elapsed: {processing_time:.2f}s")
|
|
233
239
|
|
|
@@ -417,7 +423,7 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
417
423
|
for file_info in file_list.file_infos:
|
|
418
424
|
logger.info(f"[QUEUE PROCESSING] Processing file update: {file_info.file_path}")
|
|
419
425
|
try:
|
|
420
|
-
content = process_file_local(self.fileinfo_to_tuple(file_info))
|
|
426
|
+
content = process_file_local(self.fileinfo_to_tuple(file_info), llm=self.llm, product_mode=self.product_mode)
|
|
421
427
|
if content:
|
|
422
428
|
self.cache[file_info.file_path] = CacheItem(
|
|
423
429
|
file_path=file_info.file_path,
|
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
from autocoder.rag.cache.base_cache import BaseCacheManager
|
|
2
|
-
from typing import Dict, List,Any,Optional
|
|
2
|
+
from typing import Dict, List, Any, Optional, Union
|
|
3
3
|
import os
|
|
4
4
|
import threading
|
|
5
5
|
from loguru import logger
|
|
6
6
|
from watchfiles import watch, Change
|
|
7
7
|
from autocoder.rag.variable_holder import VariableHolder
|
|
8
8
|
from autocoder.common import SourceCode
|
|
9
|
-
from autocoder.rag.utils import process_file_in_multi_process,process_file_local
|
|
9
|
+
from autocoder.rag.utils import process_file_in_multi_process, process_file_local
|
|
10
10
|
from watchfiles import Change, DefaultFilter, awatch, watch
|
|
11
|
+
from byzerllm import SimpleByzerLLM, ByzerLLM
|
|
12
|
+
from autocoder.common import AutoCoderArgs
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class AutoCoderRAGDocListener(BaseCacheManager):
|
|
14
16
|
"""
|
|
15
17
|
基于文件系统实时监控的代码缓存管理器。
|
|
16
|
-
|
|
18
|
+
|
|
17
19
|
此类实现了对代码库的实时监控,当文件发生变化时(新增、修改、删除)自动更新缓存。
|
|
18
20
|
与其他缓存管理器不同,它使用 watchfiles 库进行文件变更监控,无需定期扫描文件系统。
|
|
19
|
-
|
|
21
|
+
|
|
20
22
|
类属性:
|
|
21
23
|
cache: 缓存字典,存储处理后的文件内容
|
|
22
24
|
ignore_dirs: 需要忽略的目录列表
|
|
@@ -48,15 +50,15 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
48
50
|
r"^test.*$",
|
|
49
51
|
]
|
|
50
52
|
|
|
51
|
-
def __init__(self, path: str, ignore_spec, required_exts: List, args=None, llm=None) -> None:
|
|
53
|
+
def __init__(self, path: str, ignore_spec, required_exts: List, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None) -> None:
|
|
52
54
|
"""
|
|
53
55
|
初始化文件监控缓存管理器。
|
|
54
|
-
|
|
56
|
+
|
|
55
57
|
参数:
|
|
56
58
|
path: 需要监控的代码库根目录
|
|
57
59
|
ignore_spec: 指定哪些文件/目录应被忽略的规则
|
|
58
60
|
required_exts: 需要处理的文件扩展名列表
|
|
59
|
-
|
|
61
|
+
|
|
60
62
|
缓存结构 (self.cache):
|
|
61
63
|
self.cache 是一个字典,其结构比其他缓存管理器更简单:
|
|
62
64
|
{
|
|
@@ -67,19 +69,19 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
67
69
|
"file_path2": { ... },
|
|
68
70
|
...
|
|
69
71
|
}
|
|
70
|
-
|
|
72
|
+
|
|
71
73
|
与其他缓存管理器的主要区别:
|
|
72
74
|
1. 不需要存储 MD5 哈希或修改时间,因为文件变更通过监控系统直接获取
|
|
73
75
|
2. 没有本地持久化机制,所有缓存在内存中维护
|
|
74
76
|
3. 缓存更新基于事件驱动,而非定期扫描
|
|
75
|
-
|
|
77
|
+
|
|
76
78
|
文件监控机制:
|
|
77
79
|
- 使用 watchfiles 库监控文件系统变更
|
|
78
80
|
- 支持三种事件类型: 添加(added)、修改(modified)、删除(deleted)
|
|
79
81
|
- 使用单独线程进行监控,不阻塞主线程
|
|
80
82
|
- 监控遵循配置的忽略规则和所需扩展名过滤
|
|
81
83
|
- 初始化时会先加载所有符合条件的文件
|
|
82
|
-
|
|
84
|
+
|
|
83
85
|
源代码处理:
|
|
84
86
|
使用 process_file_local 函数处理单个文件:
|
|
85
87
|
- 参数: file_path (文件路径)
|
|
@@ -111,7 +113,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
111
113
|
def stop(self):
|
|
112
114
|
"""
|
|
113
115
|
停止文件监控线程。
|
|
114
|
-
|
|
116
|
+
|
|
115
117
|
设置停止事件并等待监控线程结束,用于在对象销毁前优雅地关闭监控。
|
|
116
118
|
"""
|
|
117
119
|
self.stop_event.set()
|
|
@@ -126,7 +128,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
126
128
|
def load_first(self):
|
|
127
129
|
"""
|
|
128
130
|
初始化时加载所有符合条件的文件。
|
|
129
|
-
|
|
131
|
+
|
|
130
132
|
获取所有符合过滤条件的文件,并将它们添加到缓存中。
|
|
131
133
|
这确保了缓存在开始监控前已经包含所有现有文件。
|
|
132
134
|
"""
|
|
@@ -139,16 +141,17 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
139
141
|
def update_cache(self, file_path):
|
|
140
142
|
"""
|
|
141
143
|
处理单个文件并更新缓存。
|
|
142
|
-
|
|
144
|
+
|
|
143
145
|
参数:
|
|
144
146
|
file_path: 文件的绝对路径
|
|
145
|
-
|
|
147
|
+
|
|
146
148
|
处理流程:
|
|
147
149
|
1. 使用 process_file_local 函数解析文件内容
|
|
148
150
|
2. 将解析结果序列化并存储在缓存中
|
|
149
151
|
3. 日志记录更新的文件及当前缓存状态
|
|
150
152
|
"""
|
|
151
|
-
source_code = process_file_local(
|
|
153
|
+
source_code = process_file_local(
|
|
154
|
+
file_path, llm=self.llm, product_mode=self.product_mode)
|
|
152
155
|
self.cache[file_path] = {
|
|
153
156
|
"file_path": file_path,
|
|
154
157
|
"content": [c.model_dump() for c in source_code],
|
|
@@ -159,7 +162,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
159
162
|
def remove_cache(self, file_path):
|
|
160
163
|
"""
|
|
161
164
|
从缓存中移除指定文件。
|
|
162
|
-
|
|
165
|
+
|
|
163
166
|
参数:
|
|
164
167
|
file_path: 要移除的文件的绝对路径
|
|
165
168
|
"""
|
|
@@ -170,7 +173,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
170
173
|
def open_watch(self):
|
|
171
174
|
"""
|
|
172
175
|
启动文件系统监控线程。
|
|
173
|
-
|
|
176
|
+
|
|
174
177
|
此方法会持续监控文件系统变更,直到 stop_event 被设置。
|
|
175
178
|
当检测到文件变更时,会根据变更类型执行相应的操作:
|
|
176
179
|
- 添加/修改文件: 调用 update_cache 更新缓存
|
|
@@ -190,10 +193,10 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
190
193
|
def get_cache(self, options: Optional[Dict[str, Any]] = None):
|
|
191
194
|
"""
|
|
192
195
|
获取当前缓存。
|
|
193
|
-
|
|
196
|
+
|
|
194
197
|
参数:
|
|
195
198
|
options: 可选的参数,指定获取缓存时的选项
|
|
196
|
-
|
|
199
|
+
|
|
197
200
|
返回:
|
|
198
201
|
当前内存中的缓存字典
|
|
199
202
|
"""
|
|
@@ -202,9 +205,9 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
202
205
|
def _load_ignore_file(self):
|
|
203
206
|
"""
|
|
204
207
|
加载忽略文件规则。
|
|
205
|
-
|
|
208
|
+
|
|
206
209
|
首先尝试加载 .serveignore 文件,如果不存在,则尝试加载 .gitignore 文件。
|
|
207
|
-
|
|
210
|
+
|
|
208
211
|
返回:
|
|
209
212
|
包含忽略规则的字符串列表
|
|
210
213
|
"""
|
|
@@ -224,10 +227,10 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
224
227
|
def get_all_files(self) -> List[str]:
|
|
225
228
|
"""
|
|
226
229
|
获取所有符合条件的文件路径。
|
|
227
|
-
|
|
230
|
+
|
|
228
231
|
遍历指定目录,应用忽略规则和扩展名过滤,
|
|
229
232
|
返回所有符合条件的文件的绝对路径。
|
|
230
|
-
|
|
233
|
+
|
|
231
234
|
返回:
|
|
232
235
|
符合条件的文件路径列表
|
|
233
236
|
"""
|
|
@@ -258,4 +261,4 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
258
261
|
absolute_path = os.path.abspath(file_path)
|
|
259
262
|
all_files.append(absolute_path)
|
|
260
263
|
|
|
261
|
-
return all_files
|
|
264
|
+
return all_files
|
|
@@ -8,6 +8,7 @@ from autocoder.rag.cache.base_cache import (
|
|
|
8
8
|
from typing import Generator, List, Dict, Any, Optional, Tuple
|
|
9
9
|
from autocoder.common import SourceCode
|
|
10
10
|
from loguru import logger
|
|
11
|
+
import functools
|
|
11
12
|
import pathspec
|
|
12
13
|
import os
|
|
13
14
|
import uuid
|
|
@@ -30,7 +31,9 @@ from typing import Union
|
|
|
30
31
|
from byzerllm import SimpleByzerLLM, ByzerLLM
|
|
31
32
|
from autocoder.rag.cache.cache_result_merge import CacheResultMerger, MergeStrategy
|
|
32
33
|
import time
|
|
34
|
+
from typing import Optional,Union
|
|
33
35
|
from .failed_files_utils import save_failed_files, load_failed_files
|
|
36
|
+
from autocoder.utils.llms import get_llm_names
|
|
34
37
|
|
|
35
38
|
if platform.system() != "Windows":
|
|
36
39
|
import fcntl
|
|
@@ -71,8 +74,8 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
71
74
|
emb_llm: Union[ByzerLLM, SimpleByzerLLM] = None,
|
|
72
75
|
host: str = "127.0.0.1",
|
|
73
76
|
port: int = 33333,
|
|
74
|
-
args=None,
|
|
75
|
-
llm=None,
|
|
77
|
+
args:Optional[AutoCoderArgs]=None,
|
|
78
|
+
llm:Optional[Union[ByzerLLM,SimpleByzerLLM,str]]=None,
|
|
76
79
|
):
|
|
77
80
|
"""
|
|
78
81
|
初始化基于 Byzer Storage 的 RAG 缓存管理器。
|
|
@@ -225,6 +228,8 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
225
228
|
from autocoder.rag.token_counter import initialize_tokenizer
|
|
226
229
|
|
|
227
230
|
logger.info("[BUILD CACHE] Starting parallel file processing...")
|
|
231
|
+
llm_name = get_llm_names(self.llm)[0] if self.llm else None
|
|
232
|
+
product_mode = self.args.product_mode
|
|
228
233
|
start_time = time.time()
|
|
229
234
|
with Pool(
|
|
230
235
|
processes=os.cpu_count(),
|
|
@@ -235,8 +240,9 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
235
240
|
for file_info in files_to_process:
|
|
236
241
|
target_files_to_process.append(
|
|
237
242
|
self.fileinfo_to_tuple(file_info))
|
|
238
|
-
|
|
239
|
-
|
|
243
|
+
worker_func = functools.partial(process_file_in_multi_process, llm=llm_name, product_mode=product_mode)
|
|
244
|
+
results = pool.map(worker_func, target_files_to_process)
|
|
245
|
+
|
|
240
246
|
processing_time = time.time() - start_time
|
|
241
247
|
logger.info(f"[BUILD CACHE] File processing completed, time elapsed: {processing_time:.2f}s")
|
|
242
248
|
|
|
@@ -452,7 +458,7 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
452
458
|
f"[QUEUE PROCESSING] Processing file update: {file_info.file_path}")
|
|
453
459
|
try:
|
|
454
460
|
content = process_file_local(
|
|
455
|
-
self.fileinfo_to_tuple(file_info))
|
|
461
|
+
self.fileinfo_to_tuple(file_info), llm=self.llm, product_mode=self.product_mode)
|
|
456
462
|
if content:
|
|
457
463
|
self.cache[file_info.file_path] = CacheItem(
|
|
458
464
|
file_path=file_info.file_path,
|