auto-coder 0.1.352__py3-none-any.whl → 0.1.354__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/METADATA +1 -1
- {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/RECORD +43 -30
- autocoder/auto_coder_rag.py +37 -1
- autocoder/auto_coder_runner.py +8 -0
- autocoder/commands/auto_command.py +59 -131
- autocoder/commands/tools.py +1 -1
- autocoder/common/__init__.py +1 -1
- autocoder/common/conversations/__init__.py +52 -0
- autocoder/common/conversations/compatibility.py +303 -0
- autocoder/common/conversations/conversation_manager.py +502 -0
- autocoder/common/conversations/example.py +152 -0
- autocoder/common/file_monitor/__init__.py +5 -0
- autocoder/common/file_monitor/monitor.py +383 -0
- autocoder/common/git_utils.py +1 -1
- autocoder/common/ignorefiles/__init__.py +4 -0
- autocoder/common/ignorefiles/ignore_file_utils.py +103 -0
- autocoder/common/ignorefiles/test_ignore_file_utils.py +91 -0
- autocoder/common/rulefiles/__init__.py +15 -0
- autocoder/common/rulefiles/autocoderrules_utils.py +173 -0
- autocoder/common/save_formatted_log.py +54 -0
- autocoder/common/v2/agent/agentic_edit.py +40 -36
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +1 -1
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +73 -43
- autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +163 -0
- autocoder/common/v2/code_editblock_manager.py +20 -8
- autocoder/index/index.py +1 -1
- autocoder/models.py +22 -9
- autocoder/rag/api_server.py +14 -2
- autocoder/rag/cache/simple_cache.py +63 -33
- autocoder/rag/loaders/docx_loader.py +1 -1
- autocoder/rag/loaders/filter_utils.py +133 -76
- autocoder/rag/loaders/image_loader.py +15 -3
- autocoder/rag/loaders/pdf_loader.py +2 -2
- autocoder/rag/long_context_rag.py +11 -0
- autocoder/rag/qa_conversation_strategy.py +5 -31
- autocoder/rag/utils.py +21 -2
- autocoder/utils/_markitdown.py +66 -25
- autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import os
|
|
3
|
+
import tempfile
|
|
4
|
+
import shutil
|
|
5
|
+
from unittest.mock import patch, MagicMock
|
|
6
|
+
|
|
7
|
+
from autocoder.common.v2.agent.agentic_edit_tools.search_files_tool_resolver import SearchFilesToolResolver
|
|
8
|
+
from autocoder.common.v2.agent.agentic_edit_types import SearchFilesTool, ToolResult
|
|
9
|
+
from autocoder.common import AutoCoderArgs
|
|
10
|
+
|
|
11
|
+
# Helper function to create a directory structure with files for testing
|
|
12
|
+
def create_test_files(base_dir, structure):
|
|
13
|
+
"""
|
|
14
|
+
Creates a directory structure with files based on the provided dictionary.
|
|
15
|
+
Keys are filenames (relative to base_dir), values are file contents.
|
|
16
|
+
Directories are created automatically.
|
|
17
|
+
"""
|
|
18
|
+
for path, content in structure.items():
|
|
19
|
+
full_path = os.path.join(base_dir, path)
|
|
20
|
+
os.makedirs(os.path.dirname(full_path), exist_ok=True)
|
|
21
|
+
with open(full_path, 'w') as f:
|
|
22
|
+
f.write(content)
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def search_tool_resolver(temp_search_dir):
|
|
26
|
+
"""Fixture to provide an instance of SearchFilesToolResolver."""
|
|
27
|
+
# Create AutoCoderArgs with the temp directory as source_dir to allow the security check to pass
|
|
28
|
+
args = AutoCoderArgs()
|
|
29
|
+
args.source_dir = temp_search_dir # Set the source_dir to our temp directory
|
|
30
|
+
return SearchFilesToolResolver(None, SearchFilesTool(path="", regex=""), args)
|
|
31
|
+
|
|
32
|
+
@pytest.fixture(scope="function")
|
|
33
|
+
def temp_search_dir():
|
|
34
|
+
"""Fixture to create a temporary directory with test files for searching."""
|
|
35
|
+
temp_dir = tempfile.mkdtemp()
|
|
36
|
+
test_structure = {
|
|
37
|
+
"file1.txt": "Hello world\nThis is a test file.",
|
|
38
|
+
"subdir/file2.py": "import sys\n\ndef main():\n print('Python script')\n",
|
|
39
|
+
"subdir/another.txt": "Another text file with world.",
|
|
40
|
+
".hiddenfile": "This should be ignored by default",
|
|
41
|
+
"no_match.md": "Markdown file."
|
|
42
|
+
}
|
|
43
|
+
create_test_files(temp_dir, test_structure)
|
|
44
|
+
yield temp_dir # Provide the path to the test function
|
|
45
|
+
shutil.rmtree(temp_dir) # Cleanup after test
|
|
46
|
+
|
|
47
|
+
# --- Test Cases ---
|
|
48
|
+
|
|
49
|
+
def test_resolve_finds_matches(search_tool_resolver, temp_search_dir):
|
|
50
|
+
"""Test that resolve finds matches correctly."""
|
|
51
|
+
# Set up the tool with the pattern we want to search for
|
|
52
|
+
tool = SearchFilesTool(
|
|
53
|
+
path="", # Use empty path to search in the source_dir itself
|
|
54
|
+
regex="world",
|
|
55
|
+
file_pattern="*.txt"
|
|
56
|
+
)
|
|
57
|
+
search_tool_resolver.tool = tool
|
|
58
|
+
|
|
59
|
+
# Call the resolve method directly
|
|
60
|
+
response = search_tool_resolver.resolve()
|
|
61
|
+
|
|
62
|
+
# Check the response
|
|
63
|
+
assert isinstance(response, ToolResult)
|
|
64
|
+
assert response.success
|
|
65
|
+
assert "Search completed. Found 2 matches" in response.message
|
|
66
|
+
|
|
67
|
+
# Check that the correct files were found
|
|
68
|
+
assert len(response.content) == 2
|
|
69
|
+
paths = [result["path"] for result in response.content]
|
|
70
|
+
assert any("file1.txt" in path for path in paths)
|
|
71
|
+
assert any("another.txt" in path for path in paths)
|
|
72
|
+
|
|
73
|
+
# Check that the match lines contain our search pattern
|
|
74
|
+
for result in response.content:
|
|
75
|
+
assert "world" in result["match_line"]
|
|
76
|
+
|
|
77
|
+
def test_resolve_no_matches(search_tool_resolver, temp_search_dir):
|
|
78
|
+
"""Test that resolve handles no matches correctly."""
|
|
79
|
+
tool = SearchFilesTool(
|
|
80
|
+
path="", # Use empty path to search in the source_dir itself
|
|
81
|
+
regex="nonexistent_pattern",
|
|
82
|
+
file_pattern="*"
|
|
83
|
+
)
|
|
84
|
+
search_tool_resolver.tool = tool
|
|
85
|
+
|
|
86
|
+
response = search_tool_resolver.resolve()
|
|
87
|
+
|
|
88
|
+
assert isinstance(response, ToolResult)
|
|
89
|
+
assert response.success # Still success, just no results
|
|
90
|
+
assert "Search completed. Found 0 matches" in response.message
|
|
91
|
+
assert len(response.content) == 0
|
|
92
|
+
|
|
93
|
+
def test_resolve_file_pattern(search_tool_resolver, temp_search_dir):
|
|
94
|
+
"""Test that the file_pattern is correctly applied."""
|
|
95
|
+
# Test .txt pattern
|
|
96
|
+
tool_txt = SearchFilesTool(
|
|
97
|
+
path="", # Use empty path to search in the source_dir itself
|
|
98
|
+
regex="world",
|
|
99
|
+
file_pattern="*.txt" # Only search .txt files
|
|
100
|
+
)
|
|
101
|
+
search_tool_resolver.tool = tool_txt
|
|
102
|
+
|
|
103
|
+
response_txt = search_tool_resolver.resolve()
|
|
104
|
+
|
|
105
|
+
assert isinstance(response_txt, ToolResult)
|
|
106
|
+
assert response_txt.success
|
|
107
|
+
assert "Search completed. Found 2 matches" in response_txt.message
|
|
108
|
+
# Ensure only .txt files were matched
|
|
109
|
+
for result in response_txt.content:
|
|
110
|
+
assert result["path"].endswith(".txt")
|
|
111
|
+
|
|
112
|
+
# Test .py pattern
|
|
113
|
+
tool_py = SearchFilesTool(
|
|
114
|
+
path="", # Use empty path to search in the source_dir itself
|
|
115
|
+
regex="print",
|
|
116
|
+
file_pattern="*.py" # Only search .py files
|
|
117
|
+
)
|
|
118
|
+
search_tool_resolver.tool = tool_py
|
|
119
|
+
|
|
120
|
+
response_py = search_tool_resolver.resolve()
|
|
121
|
+
|
|
122
|
+
assert isinstance(response_py, ToolResult)
|
|
123
|
+
assert response_py.success
|
|
124
|
+
assert "Search completed. Found 1 matches" in response_py.message
|
|
125
|
+
# Ensure only .py files were matched
|
|
126
|
+
for result in response_py.content:
|
|
127
|
+
assert result["path"].endswith(".py")
|
|
128
|
+
|
|
129
|
+
def test_invalid_regex(search_tool_resolver, temp_search_dir):
|
|
130
|
+
"""Test that an invalid regex pattern is properly handled."""
|
|
131
|
+
tool = SearchFilesTool(
|
|
132
|
+
path="", # Use empty path to search in the source_dir itself
|
|
133
|
+
regex="[invalid regex", # Invalid regex pattern
|
|
134
|
+
file_pattern="*"
|
|
135
|
+
)
|
|
136
|
+
search_tool_resolver.tool = tool
|
|
137
|
+
|
|
138
|
+
response = search_tool_resolver.resolve()
|
|
139
|
+
|
|
140
|
+
assert isinstance(response, ToolResult)
|
|
141
|
+
assert not response.success
|
|
142
|
+
assert "Invalid regex pattern" in response.message
|
|
143
|
+
|
|
144
|
+
def test_nonexistent_path(search_tool_resolver, temp_search_dir):
|
|
145
|
+
"""Test that a nonexistent path is properly handled."""
|
|
146
|
+
# Create a path that we know doesn't exist under temp_search_dir
|
|
147
|
+
nonexistent_path = "nonexistent_subdirectory"
|
|
148
|
+
|
|
149
|
+
tool = SearchFilesTool(
|
|
150
|
+
path=nonexistent_path, # This path doesn't exist in our temp directory
|
|
151
|
+
regex="pattern",
|
|
152
|
+
file_pattern="*"
|
|
153
|
+
)
|
|
154
|
+
search_tool_resolver.tool = tool
|
|
155
|
+
|
|
156
|
+
response = search_tool_resolver.resolve()
|
|
157
|
+
|
|
158
|
+
assert isinstance(response, ToolResult)
|
|
159
|
+
assert not response.success
|
|
160
|
+
assert "Error: Search path not found" in response.message
|
|
161
|
+
|
|
162
|
+
# Add more tests as needed
|
|
163
|
+
|
|
@@ -396,24 +396,36 @@ class CodeEditBlockManager:
|
|
|
396
396
|
def _format_blocks(merge: MergeCodeWithoutEffect) -> Tuple[str, str]:
|
|
397
397
|
unmerged_formatted_text = ""
|
|
398
398
|
for file_path, head, update in merge.failed_blocks:
|
|
399
|
-
unmerged_formatted_text += "```lang
|
|
400
|
-
unmerged_formatted_text +=
|
|
401
|
-
unmerged_formatted_text += "
|
|
399
|
+
unmerged_formatted_text += "```lang"
|
|
400
|
+
unmerged_formatted_text += "\n"
|
|
401
|
+
unmerged_formatted_text += f"##File: {file_path}"
|
|
402
|
+
unmerged_formatted_text += "\n"
|
|
403
|
+
unmerged_formatted_text += "<<<<<<< SEARCH"
|
|
404
|
+
unmerged_formatted_text += "\n"
|
|
402
405
|
unmerged_formatted_text += head
|
|
403
|
-
unmerged_formatted_text += "
|
|
406
|
+
unmerged_formatted_text += "\n"
|
|
407
|
+
unmerged_formatted_text += "======="
|
|
408
|
+
unmerged_formatted_text += "\n"
|
|
404
409
|
unmerged_formatted_text += update
|
|
405
|
-
unmerged_formatted_text += "
|
|
410
|
+
unmerged_formatted_text += "\n"
|
|
411
|
+
unmerged_formatted_text += ">>>>>>> REPLACE"
|
|
412
|
+
unmerged_formatted_text += "\n"
|
|
406
413
|
unmerged_formatted_text += "```"
|
|
407
414
|
unmerged_formatted_text += "\n"
|
|
408
415
|
|
|
409
416
|
merged_formatted_text = ""
|
|
410
417
|
if merge.merged_blocks:
|
|
411
418
|
for file_path, head, update in merge.merged_blocks:
|
|
412
|
-
merged_formatted_text += "```lang
|
|
413
|
-
merged_formatted_text +=
|
|
419
|
+
merged_formatted_text += "```lang"
|
|
420
|
+
merged_formatted_text += "\n"
|
|
421
|
+
merged_formatted_text += f"##File: {file_path}"
|
|
422
|
+
merged_formatted_text += "\n"
|
|
414
423
|
merged_formatted_text += head
|
|
415
|
-
merged_formatted_text += "
|
|
424
|
+
merged_formatted_text += "\n"
|
|
425
|
+
merged_formatted_text += "======="
|
|
426
|
+
merged_formatted_text += "\n"
|
|
416
427
|
merged_formatted_text += update
|
|
428
|
+
merged_formatted_text += "\n"
|
|
417
429
|
merged_formatted_text += "```"
|
|
418
430
|
merged_formatted_text += "\n"
|
|
419
431
|
|
autocoder/index/index.py
CHANGED
|
@@ -462,7 +462,7 @@ class IndexManager:
|
|
|
462
462
|
def filter_exclude_files(self, file_path, exclude_patterns):
|
|
463
463
|
# 增加 ignore_file_utils 的过滤
|
|
464
464
|
try:
|
|
465
|
-
from
|
|
465
|
+
from autocoder.common.ignorefiles import ignore_file_utils
|
|
466
466
|
if ignore_file_utils.should_ignore(file_path):
|
|
467
467
|
return True
|
|
468
468
|
except Exception:
|
autocoder/models.py
CHANGED
|
@@ -60,22 +60,35 @@ default_models_list = [
|
|
|
60
60
|
"max_output_tokens": 8096
|
|
61
61
|
},
|
|
62
62
|
{
|
|
63
|
-
"name": "
|
|
63
|
+
"name": "openai/gpt-4.1-mini",
|
|
64
64
|
"description": "",
|
|
65
|
-
"model_name": "
|
|
65
|
+
"model_name": "openai/gpt-4.1-mini",
|
|
66
66
|
"model_type": "saas/openai",
|
|
67
67
|
"base_url": "https://openrouter.ai/api/v1",
|
|
68
68
|
"api_key_path": "",
|
|
69
69
|
"is_reasoning": False,
|
|
70
|
-
"input_price":
|
|
71
|
-
"output_price":
|
|
70
|
+
"input_price": 2.8,
|
|
71
|
+
"output_price": 11.2,
|
|
72
72
|
"average_speed": 0.0,
|
|
73
|
-
"max_output_tokens": 8096*
|
|
73
|
+
"max_output_tokens": 8096*3
|
|
74
74
|
},
|
|
75
75
|
{
|
|
76
|
-
"name": "
|
|
76
|
+
"name": "openai/gpt-4.1",
|
|
77
77
|
"description": "",
|
|
78
|
-
"model_name": "
|
|
78
|
+
"model_name": "openai/gpt-4.1",
|
|
79
|
+
"model_type": "saas/openai",
|
|
80
|
+
"base_url": "https://openrouter.ai/api/v1",
|
|
81
|
+
"api_key_path": "",
|
|
82
|
+
"is_reasoning": False,
|
|
83
|
+
"input_price": 14.0,
|
|
84
|
+
"output_price": 42.0,
|
|
85
|
+
"average_speed": 0.0,
|
|
86
|
+
"max_output_tokens": 8096*3
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"name": "openai/gpt-4.1-nano",
|
|
90
|
+
"description": "",
|
|
91
|
+
"model_name": "openai/gpt-4.1-nano",
|
|
79
92
|
"model_type": "saas/openai",
|
|
80
93
|
"base_url": "https://openrouter.ai/api/v1",
|
|
81
94
|
"api_key_path": "",
|
|
@@ -83,8 +96,8 @@ default_models_list = [
|
|
|
83
96
|
"input_price": 0.0,
|
|
84
97
|
"output_price": 0.0,
|
|
85
98
|
"average_speed": 0.0,
|
|
86
|
-
"max_output_tokens": 8096*
|
|
87
|
-
},
|
|
99
|
+
"max_output_tokens": 8096*3
|
|
100
|
+
},
|
|
88
101
|
{
|
|
89
102
|
"name": "openrouter/google/gemini-2.5-pro-preview-03-25",
|
|
90
103
|
"description": "",
|
autocoder/rag/api_server.py
CHANGED
|
@@ -187,9 +187,16 @@ async def serve_static_file(full_path: str, request: Request):
|
|
|
187
187
|
# 直接使用规范化的路径
|
|
188
188
|
file_path = os.path.join("/", os.path.normpath(unquote(full_path)))
|
|
189
189
|
|
|
190
|
+
# 获取允许的静态文件目录
|
|
191
|
+
allowed_static_abs = request.app.state.allowed_static_abs
|
|
192
|
+
logger.info(f"==allowed_static_abs==: {allowed_static_abs}")
|
|
193
|
+
|
|
194
|
+
if file_path.startswith(("/_images","_images")):
|
|
195
|
+
file_path = os.path.join(allowed_static_abs, file_path)
|
|
196
|
+
|
|
190
197
|
# 检查文件是否存在
|
|
191
198
|
if not os.path.exists(file_path):
|
|
192
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
199
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
193
200
|
|
|
194
201
|
# 如果启用了Nginx X-Accel-Redirect,使用X-Accel特性
|
|
195
202
|
if hasattr(request.app.state, "enable_nginx_x_accel") and request.app.state.enable_nginx_x_accel:
|
|
@@ -273,6 +280,9 @@ def serve(llm:ByzerLLM, args: ServerArgs):
|
|
|
273
280
|
allowed_static_abs = os.path.abspath(allowed_static_dir)
|
|
274
281
|
logger.info(f"Static files root directory: {allowed_static_abs}")
|
|
275
282
|
|
|
283
|
+
# 将允许的静态文件目录存储到应用状态中
|
|
284
|
+
router_app.state.allowed_static_abs = allowed_static_abs
|
|
285
|
+
|
|
276
286
|
router_app.add_middleware(
|
|
277
287
|
CORSMiddleware,
|
|
278
288
|
allow_origins=args.allowed_origins,
|
|
@@ -309,9 +319,11 @@ def serve(llm:ByzerLLM, args: ServerArgs):
|
|
|
309
319
|
|
|
310
320
|
# Check if path is in allowed directory
|
|
311
321
|
abs_path = os.path.abspath(os.path.join("/", normalized_path))
|
|
322
|
+
if abs_path.startswith("/_images"):
|
|
323
|
+
return await call_next(request)
|
|
312
324
|
|
|
313
325
|
# 使用预先计算好的allowed_static_abs
|
|
314
|
-
is_allowed = abs_path.startswith(allowed_static_abs)
|
|
326
|
+
is_allowed = abs_path.startswith(request.app.state.allowed_static_abs)
|
|
315
327
|
|
|
316
328
|
if not is_allowed:
|
|
317
329
|
logger.warning(f"Unauthorized path access: {abs_path}")
|
|
@@ -24,6 +24,7 @@ from .failed_files_utils import load_failed_files, save_failed_files
|
|
|
24
24
|
from autocoder.common import AutoCoderArgs
|
|
25
25
|
from byzerllm import SimpleByzerLLM, ByzerLLM
|
|
26
26
|
from autocoder.utils.llms import get_llm_names
|
|
27
|
+
from autocoder.common.file_monitor.monitor import get_file_monitor, Change
|
|
27
28
|
|
|
28
29
|
|
|
29
30
|
default_ignore_dirs = [
|
|
@@ -50,7 +51,7 @@ def generate_content_md5(content: Union[str, bytes]) -> str:
|
|
|
50
51
|
|
|
51
52
|
|
|
52
53
|
class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
53
|
-
def __init__(self, path: str, ignore_spec, required_exts: list,
|
|
54
|
+
def __init__(self, path: str, ignore_spec, required_exts: list, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None):
|
|
54
55
|
"""
|
|
55
56
|
初始化异步更新队列,用于管理代码文件的缓存。
|
|
56
57
|
|
|
@@ -58,7 +59,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
58
59
|
path: 需要索引的代码库根目录
|
|
59
60
|
ignore_spec: 指定哪些文件/目录应被忽略的规则
|
|
60
61
|
required_exts: 需要处理的文件扩展名列表
|
|
61
|
-
|
|
62
|
+
args: AutoCoderArgs 对象,包含配置信息
|
|
63
|
+
llm: 用于代码分析的 LLM 实例
|
|
62
64
|
|
|
63
65
|
缓存结构 (self.cache):
|
|
64
66
|
self.cache 是一个字典,其结构如下:
|
|
@@ -99,7 +101,6 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
99
101
|
self.args = args
|
|
100
102
|
self.llm = llm
|
|
101
103
|
self.product_mode = args.product_mode or "lite"
|
|
102
|
-
self.update_interval = update_interval
|
|
103
104
|
self.queue = []
|
|
104
105
|
self.cache = {} # 初始化为空字典,稍后通过 read_cache() 填充
|
|
105
106
|
self.lock = threading.Lock()
|
|
@@ -115,10 +116,16 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
115
116
|
self.queue_thread.daemon = True
|
|
116
117
|
self.queue_thread.start()
|
|
117
118
|
|
|
118
|
-
#
|
|
119
|
-
self.
|
|
120
|
-
|
|
121
|
-
self.
|
|
119
|
+
# 注册文件监控回调
|
|
120
|
+
self.file_monitor = get_file_monitor(self.path)
|
|
121
|
+
# 注册根目录的监控,这样可以捕获所有子目录和文件的变化
|
|
122
|
+
self.file_monitor.register(self.path, self._on_file_change)
|
|
123
|
+
# 确保监控器已启动
|
|
124
|
+
if not self.file_monitor.is_running():
|
|
125
|
+
self.file_monitor.start()
|
|
126
|
+
logger.info(f"Started file monitor for {self.path}")
|
|
127
|
+
else:
|
|
128
|
+
logger.info(f"File monitor already running for {self.path}")
|
|
122
129
|
|
|
123
130
|
self.cache = self.read_cache()
|
|
124
131
|
|
|
@@ -130,37 +137,57 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
130
137
|
logger.error(f"Error in process_queue: {e}")
|
|
131
138
|
time.sleep(1) # 避免过于频繁的检查
|
|
132
139
|
|
|
133
|
-
def
|
|
134
|
-
"""
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
140
|
+
def _on_file_change(self, change_type: Change, file_path: str):
|
|
141
|
+
"""
|
|
142
|
+
文件监控回调函数,当文件发生变化时触发更新
|
|
143
|
+
|
|
144
|
+
参数:
|
|
145
|
+
change_type: 变化类型 (Change.added, Change.modified, Change.deleted)
|
|
146
|
+
file_path: 发生变化的文件路径
|
|
147
|
+
"""
|
|
148
|
+
try:
|
|
149
|
+
# 如果缓存还没有初始化,跳过触发
|
|
150
|
+
if not self.cache:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
# 检查文件扩展名,如果不在需要处理的扩展名列表中,跳过
|
|
154
|
+
if self.required_exts and not any(file_path.endswith(ext) for ext in self.required_exts):
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
# 检查是否在忽略规则中
|
|
158
|
+
if self.ignore_spec and self.ignore_spec.match_file(os.path.relpath(file_path, self.path)):
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
logger.info(f"File change detected: {change_type} - {file_path}")
|
|
162
|
+
self.trigger_update()
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error(f"Error in file change handler: {e}")
|
|
165
|
+
logger.exception(e)
|
|
147
166
|
|
|
148
167
|
def stop(self):
|
|
149
168
|
self.stop_event.set()
|
|
150
|
-
|
|
151
|
-
|
|
169
|
+
# 取消注册文件监控回调
|
|
170
|
+
try:
|
|
171
|
+
self.file_monitor.unregister(self.path, self._on_file_change)
|
|
172
|
+
logger.info(f"Unregistered file monitor callback for {self.path}")
|
|
173
|
+
except Exception as e:
|
|
174
|
+
logger.error(f"Error unregistering file monitor callback: {e}")
|
|
175
|
+
# 只等待队列处理线程结束
|
|
176
|
+
if hasattr(self, 'queue_thread') and self.queue_thread.is_alive():
|
|
177
|
+
self.queue_thread.join(timeout=2.0)
|
|
152
178
|
|
|
153
179
|
def fileinfo_to_tuple(self, file_info: FileInfo) -> Tuple[str, str, float, str]:
|
|
154
180
|
return (file_info.file_path, file_info.relative_path, file_info.modify_time, file_info.file_md5)
|
|
155
181
|
|
|
156
182
|
def __del__(self):
|
|
183
|
+
# 确保在对象被销毁时停止监控并清理资源
|
|
157
184
|
self.stop()
|
|
158
185
|
|
|
159
186
|
def load_first(self):
|
|
160
187
|
with self.lock:
|
|
161
188
|
if self.cache:
|
|
162
189
|
return
|
|
163
|
-
files_to_process = []
|
|
190
|
+
files_to_process = []
|
|
164
191
|
for file_info in self.get_all_files():
|
|
165
192
|
file_path, _, modify_time, file_md5 = file_info
|
|
166
193
|
if (
|
|
@@ -175,7 +202,7 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
175
202
|
# [process_file.remote(file_info) for file_info in files_to_process]
|
|
176
203
|
# )
|
|
177
204
|
from autocoder.rag.token_counter import initialize_tokenizer
|
|
178
|
-
llm_name = get_llm_names(self.llm)[0] if self.llm else None
|
|
205
|
+
llm_name = get_llm_names(self.llm)[0] if self.llm else None
|
|
179
206
|
with Pool(
|
|
180
207
|
processes=os.cpu_count(),
|
|
181
208
|
initializer=initialize_tokenizer,
|
|
@@ -184,8 +211,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
184
211
|
|
|
185
212
|
worker_func = functools.partial(
|
|
186
213
|
process_file_in_multi_process, llm=llm_name, product_mode=self.product_mode)
|
|
187
|
-
results = pool.map(worker_func, files_to_process)
|
|
188
|
-
|
|
214
|
+
results = pool.map(worker_func, files_to_process)
|
|
215
|
+
|
|
189
216
|
for file_info, result in zip(files_to_process, results):
|
|
190
217
|
if result: # 只有当result不为空时才更新缓存
|
|
191
218
|
self.update_cache(file_info, result)
|
|
@@ -203,16 +230,15 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
203
230
|
file_path, relative_path, modify_time, file_md5 = file_info
|
|
204
231
|
current_files.add(file_path)
|
|
205
232
|
# 如果文件曾经解析失败,跳过本次增量更新
|
|
206
|
-
if file_path in self.failed_files:
|
|
207
|
-
# logger.info(f"文件 {file_path} 之前解析失败,跳过此次更新")
|
|
233
|
+
if file_path in self.failed_files:
|
|
208
234
|
continue
|
|
209
|
-
# 变更检测
|
|
235
|
+
# 变更检测
|
|
210
236
|
if (
|
|
211
237
|
file_path not in self.cache
|
|
212
238
|
or self.cache[file_path].get("md5", "") != file_md5
|
|
213
239
|
):
|
|
214
240
|
files_to_process.append(
|
|
215
|
-
(file_path, relative_path, modify_time, file_md5))
|
|
241
|
+
(file_path, relative_path, modify_time, file_md5))
|
|
216
242
|
|
|
217
243
|
deleted_files = set(self.cache.keys()) - current_files
|
|
218
244
|
logger.info(f"files_to_process: {files_to_process}")
|
|
@@ -289,6 +315,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
289
315
|
for line in f:
|
|
290
316
|
data = json.loads(line)
|
|
291
317
|
cache[data["file_path"]] = data
|
|
318
|
+
else:
|
|
319
|
+
self.load_first()
|
|
292
320
|
return cache
|
|
293
321
|
|
|
294
322
|
def write_cache(self):
|
|
@@ -366,6 +394,9 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
366
394
|
dirs[:] = [d for d in dirs if not d.startswith(
|
|
367
395
|
".") and d not in default_ignore_dirs]
|
|
368
396
|
|
|
397
|
+
# Filter out files that start with a dot
|
|
398
|
+
files[:] = [f for f in files if not f.startswith(".")]
|
|
399
|
+
|
|
369
400
|
if self.ignore_spec:
|
|
370
401
|
relative_root = os.path.relpath(root, self.path)
|
|
371
402
|
dirs[:] = [
|
|
@@ -390,6 +421,5 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
390
421
|
modify_time = os.path.getmtime(file_path)
|
|
391
422
|
file_md5 = generate_file_md5(file_path)
|
|
392
423
|
all_files.append(
|
|
393
|
-
(file_path, relative_path, modify_time, file_md5))
|
|
394
|
-
|
|
424
|
+
(file_path, relative_path, modify_time, file_md5))
|
|
395
425
|
return all_files
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
|
-
from autocoder.utils._markitdown import MarkItDown
|
|
3
2
|
import traceback
|
|
4
3
|
|
|
5
4
|
def extract_text_from_docx_old(docx_path):
|
|
@@ -13,6 +12,7 @@ def extract_text_from_docx_old(docx_path):
|
|
|
13
12
|
|
|
14
13
|
def extract_text_from_docx(docx_path):
|
|
15
14
|
try:
|
|
15
|
+
from autocoder.utils._markitdown import MarkItDown
|
|
16
16
|
md_converter = MarkItDown()
|
|
17
17
|
result = md_converter.convert(docx_path)
|
|
18
18
|
return result.text_content
|