auto-coder 0.1.348__py3-none-any.whl → 0.1.349__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.348.dist-info → auto_coder-0.1.349.dist-info}/METADATA +1 -1
- {auto_coder-0.1.348.dist-info → auto_coder-0.1.349.dist-info}/RECORD +35 -26
- autocoder/auto_coder_runner.py +14 -10
- autocoder/chat_auto_coder_lang.py +5 -3
- autocoder/common/model_speed_tester.py +392 -0
- autocoder/common/printer.py +7 -8
- autocoder/common/run_cmd.py +247 -0
- autocoder/common/test_run_cmd.py +110 -0
- autocoder/common/v2/agent/agentic_edit.py +61 -11
- autocoder/common/v2/agent/agentic_edit_conversation.py +9 -0
- autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py +21 -36
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +4 -7
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +2 -5
- autocoder/helper/rag_doc_creator.py +141 -0
- autocoder/ignorefiles/__init__.py +4 -0
- autocoder/ignorefiles/ignore_file_utils.py +63 -0
- autocoder/ignorefiles/test_ignore_file_utils.py +91 -0
- autocoder/models.py +49 -9
- autocoder/rag/cache/byzer_storage_cache.py +10 -4
- autocoder/rag/cache/file_monitor_cache.py +27 -24
- autocoder/rag/cache/local_byzer_storage_cache.py +11 -5
- autocoder/rag/cache/local_duckdb_storage_cache.py +203 -128
- autocoder/rag/cache/simple_cache.py +56 -37
- autocoder/rag/loaders/filter_utils.py +106 -0
- autocoder/rag/loaders/image_loader.py +45 -23
- autocoder/rag/loaders/pdf_loader.py +3 -3
- autocoder/rag/loaders/test_image_loader.py +209 -0
- autocoder/rag/qa_conversation_strategy.py +3 -5
- autocoder/rag/utils.py +20 -9
- autocoder/utils/_markitdown.py +35 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.348.dist-info → auto_coder-0.1.349.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.348.dist-info → auto_coder-0.1.349.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.348.dist-info → auto_coder-0.1.349.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.348.dist-info → auto_coder-0.1.349.dist-info}/top_level.txt +0 -0
|
@@ -1,22 +1,24 @@
|
|
|
1
1
|
from autocoder.rag.cache.base_cache import BaseCacheManager
|
|
2
|
-
from typing import Dict, List,Any,Optional
|
|
2
|
+
from typing import Dict, List, Any, Optional, Union
|
|
3
3
|
import os
|
|
4
4
|
import threading
|
|
5
5
|
from loguru import logger
|
|
6
6
|
from watchfiles import watch, Change
|
|
7
7
|
from autocoder.rag.variable_holder import VariableHolder
|
|
8
8
|
from autocoder.common import SourceCode
|
|
9
|
-
from autocoder.rag.utils import process_file_in_multi_process,process_file_local
|
|
9
|
+
from autocoder.rag.utils import process_file_in_multi_process, process_file_local
|
|
10
10
|
from watchfiles import Change, DefaultFilter, awatch, watch
|
|
11
|
+
from byzerllm import SimpleByzerLLM, ByzerLLM
|
|
12
|
+
from autocoder.common import AutoCoderArgs
|
|
11
13
|
|
|
12
14
|
|
|
13
15
|
class AutoCoderRAGDocListener(BaseCacheManager):
|
|
14
16
|
"""
|
|
15
17
|
基于文件系统实时监控的代码缓存管理器。
|
|
16
|
-
|
|
18
|
+
|
|
17
19
|
此类实现了对代码库的实时监控,当文件发生变化时(新增、修改、删除)自动更新缓存。
|
|
18
20
|
与其他缓存管理器不同,它使用 watchfiles 库进行文件变更监控,无需定期扫描文件系统。
|
|
19
|
-
|
|
21
|
+
|
|
20
22
|
类属性:
|
|
21
23
|
cache: 缓存字典,存储处理后的文件内容
|
|
22
24
|
ignore_dirs: 需要忽略的目录列表
|
|
@@ -48,15 +50,15 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
48
50
|
r"^test.*$",
|
|
49
51
|
]
|
|
50
52
|
|
|
51
|
-
def __init__(self, path: str, ignore_spec, required_exts: List, args=None, llm=None) -> None:
|
|
53
|
+
def __init__(self, path: str, ignore_spec, required_exts: List, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None) -> None:
|
|
52
54
|
"""
|
|
53
55
|
初始化文件监控缓存管理器。
|
|
54
|
-
|
|
56
|
+
|
|
55
57
|
参数:
|
|
56
58
|
path: 需要监控的代码库根目录
|
|
57
59
|
ignore_spec: 指定哪些文件/目录应被忽略的规则
|
|
58
60
|
required_exts: 需要处理的文件扩展名列表
|
|
59
|
-
|
|
61
|
+
|
|
60
62
|
缓存结构 (self.cache):
|
|
61
63
|
self.cache 是一个字典,其结构比其他缓存管理器更简单:
|
|
62
64
|
{
|
|
@@ -67,19 +69,19 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
67
69
|
"file_path2": { ... },
|
|
68
70
|
...
|
|
69
71
|
}
|
|
70
|
-
|
|
72
|
+
|
|
71
73
|
与其他缓存管理器的主要区别:
|
|
72
74
|
1. 不需要存储 MD5 哈希或修改时间,因为文件变更通过监控系统直接获取
|
|
73
75
|
2. 没有本地持久化机制,所有缓存在内存中维护
|
|
74
76
|
3. 缓存更新基于事件驱动,而非定期扫描
|
|
75
|
-
|
|
77
|
+
|
|
76
78
|
文件监控机制:
|
|
77
79
|
- 使用 watchfiles 库监控文件系统变更
|
|
78
80
|
- 支持三种事件类型: 添加(added)、修改(modified)、删除(deleted)
|
|
79
81
|
- 使用单独线程进行监控,不阻塞主线程
|
|
80
82
|
- 监控遵循配置的忽略规则和所需扩展名过滤
|
|
81
83
|
- 初始化时会先加载所有符合条件的文件
|
|
82
|
-
|
|
84
|
+
|
|
83
85
|
源代码处理:
|
|
84
86
|
使用 process_file_local 函数处理单个文件:
|
|
85
87
|
- 参数: file_path (文件路径)
|
|
@@ -111,7 +113,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
111
113
|
def stop(self):
|
|
112
114
|
"""
|
|
113
115
|
停止文件监控线程。
|
|
114
|
-
|
|
116
|
+
|
|
115
117
|
设置停止事件并等待监控线程结束,用于在对象销毁前优雅地关闭监控。
|
|
116
118
|
"""
|
|
117
119
|
self.stop_event.set()
|
|
@@ -126,7 +128,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
126
128
|
def load_first(self):
|
|
127
129
|
"""
|
|
128
130
|
初始化时加载所有符合条件的文件。
|
|
129
|
-
|
|
131
|
+
|
|
130
132
|
获取所有符合过滤条件的文件,并将它们添加到缓存中。
|
|
131
133
|
这确保了缓存在开始监控前已经包含所有现有文件。
|
|
132
134
|
"""
|
|
@@ -139,16 +141,17 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
139
141
|
def update_cache(self, file_path):
|
|
140
142
|
"""
|
|
141
143
|
处理单个文件并更新缓存。
|
|
142
|
-
|
|
144
|
+
|
|
143
145
|
参数:
|
|
144
146
|
file_path: 文件的绝对路径
|
|
145
|
-
|
|
147
|
+
|
|
146
148
|
处理流程:
|
|
147
149
|
1. 使用 process_file_local 函数解析文件内容
|
|
148
150
|
2. 将解析结果序列化并存储在缓存中
|
|
149
151
|
3. 日志记录更新的文件及当前缓存状态
|
|
150
152
|
"""
|
|
151
|
-
source_code = process_file_local(
|
|
153
|
+
source_code = process_file_local(
|
|
154
|
+
file_path, llm=self.llm, product_mode=self.product_mode)
|
|
152
155
|
self.cache[file_path] = {
|
|
153
156
|
"file_path": file_path,
|
|
154
157
|
"content": [c.model_dump() for c in source_code],
|
|
@@ -159,7 +162,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
159
162
|
def remove_cache(self, file_path):
|
|
160
163
|
"""
|
|
161
164
|
从缓存中移除指定文件。
|
|
162
|
-
|
|
165
|
+
|
|
163
166
|
参数:
|
|
164
167
|
file_path: 要移除的文件的绝对路径
|
|
165
168
|
"""
|
|
@@ -170,7 +173,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
170
173
|
def open_watch(self):
|
|
171
174
|
"""
|
|
172
175
|
启动文件系统监控线程。
|
|
173
|
-
|
|
176
|
+
|
|
174
177
|
此方法会持续监控文件系统变更,直到 stop_event 被设置。
|
|
175
178
|
当检测到文件变更时,会根据变更类型执行相应的操作:
|
|
176
179
|
- 添加/修改文件: 调用 update_cache 更新缓存
|
|
@@ -190,10 +193,10 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
190
193
|
def get_cache(self, options: Optional[Dict[str, Any]] = None):
|
|
191
194
|
"""
|
|
192
195
|
获取当前缓存。
|
|
193
|
-
|
|
196
|
+
|
|
194
197
|
参数:
|
|
195
198
|
options: 可选的参数,指定获取缓存时的选项
|
|
196
|
-
|
|
199
|
+
|
|
197
200
|
返回:
|
|
198
201
|
当前内存中的缓存字典
|
|
199
202
|
"""
|
|
@@ -202,9 +205,9 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
202
205
|
def _load_ignore_file(self):
|
|
203
206
|
"""
|
|
204
207
|
加载忽略文件规则。
|
|
205
|
-
|
|
208
|
+
|
|
206
209
|
首先尝试加载 .serveignore 文件,如果不存在,则尝试加载 .gitignore 文件。
|
|
207
|
-
|
|
210
|
+
|
|
208
211
|
返回:
|
|
209
212
|
包含忽略规则的字符串列表
|
|
210
213
|
"""
|
|
@@ -224,10 +227,10 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
224
227
|
def get_all_files(self) -> List[str]:
|
|
225
228
|
"""
|
|
226
229
|
获取所有符合条件的文件路径。
|
|
227
|
-
|
|
230
|
+
|
|
228
231
|
遍历指定目录,应用忽略规则和扩展名过滤,
|
|
229
232
|
返回所有符合条件的文件的绝对路径。
|
|
230
|
-
|
|
233
|
+
|
|
231
234
|
返回:
|
|
232
235
|
符合条件的文件路径列表
|
|
233
236
|
"""
|
|
@@ -258,4 +261,4 @@ class AutoCoderRAGDocListener(BaseCacheManager):
|
|
|
258
261
|
absolute_path = os.path.abspath(file_path)
|
|
259
262
|
all_files.append(absolute_path)
|
|
260
263
|
|
|
261
|
-
return all_files
|
|
264
|
+
return all_files
|
|
@@ -8,6 +8,7 @@ from autocoder.rag.cache.base_cache import (
|
|
|
8
8
|
from typing import Generator, List, Dict, Any, Optional, Tuple
|
|
9
9
|
from autocoder.common import SourceCode
|
|
10
10
|
from loguru import logger
|
|
11
|
+
import functools
|
|
11
12
|
import pathspec
|
|
12
13
|
import os
|
|
13
14
|
import uuid
|
|
@@ -30,7 +31,9 @@ from typing import Union
|
|
|
30
31
|
from byzerllm import SimpleByzerLLM, ByzerLLM
|
|
31
32
|
from autocoder.rag.cache.cache_result_merge import CacheResultMerger, MergeStrategy
|
|
32
33
|
import time
|
|
34
|
+
from typing import Optional,Union
|
|
33
35
|
from .failed_files_utils import save_failed_files, load_failed_files
|
|
36
|
+
from autocoder.utils.llms import get_llm_names
|
|
34
37
|
|
|
35
38
|
if platform.system() != "Windows":
|
|
36
39
|
import fcntl
|
|
@@ -71,8 +74,8 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
71
74
|
emb_llm: Union[ByzerLLM, SimpleByzerLLM] = None,
|
|
72
75
|
host: str = "127.0.0.1",
|
|
73
76
|
port: int = 33333,
|
|
74
|
-
args=None,
|
|
75
|
-
llm=None,
|
|
77
|
+
args:Optional[AutoCoderArgs]=None,
|
|
78
|
+
llm:Optional[Union[ByzerLLM,SimpleByzerLLM,str]]=None,
|
|
76
79
|
):
|
|
77
80
|
"""
|
|
78
81
|
初始化基于 Byzer Storage 的 RAG 缓存管理器。
|
|
@@ -225,6 +228,8 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
225
228
|
from autocoder.rag.token_counter import initialize_tokenizer
|
|
226
229
|
|
|
227
230
|
logger.info("[BUILD CACHE] Starting parallel file processing...")
|
|
231
|
+
llm_name = get_llm_names(self.llm)[0] if self.llm else None
|
|
232
|
+
product_mode = self.args.product_mode
|
|
228
233
|
start_time = time.time()
|
|
229
234
|
with Pool(
|
|
230
235
|
processes=os.cpu_count(),
|
|
@@ -235,8 +240,9 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
235
240
|
for file_info in files_to_process:
|
|
236
241
|
target_files_to_process.append(
|
|
237
242
|
self.fileinfo_to_tuple(file_info))
|
|
238
|
-
|
|
239
|
-
|
|
243
|
+
worker_func = functools.partial(process_file_in_multi_process, llm=llm_name, product_mode=product_mode)
|
|
244
|
+
results = pool.map(worker_func, target_files_to_process)
|
|
245
|
+
|
|
240
246
|
processing_time = time.time() - start_time
|
|
241
247
|
logger.info(f"[BUILD CACHE] File processing completed, time elapsed: {processing_time:.2f}s")
|
|
242
248
|
|
|
@@ -452,7 +458,7 @@ class LocalByzerStorageCache(BaseCacheManager):
|
|
|
452
458
|
f"[QUEUE PROCESSING] Processing file update: {file_info.file_path}")
|
|
453
459
|
try:
|
|
454
460
|
content = process_file_local(
|
|
455
|
-
self.fileinfo_to_tuple(file_info))
|
|
461
|
+
self.fileinfo_to_tuple(file_info), llm=self.llm, product_mode=self.product_mode)
|
|
456
462
|
if content:
|
|
457
463
|
self.cache[file_info.file_path] = CacheItem(
|
|
458
464
|
file_path=file_info.file_path,
|