auto-coder 0.1.352__py3-none-any.whl → 0.1.354__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (43) hide show
  1. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/METADATA +1 -1
  2. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/RECORD +43 -30
  3. autocoder/auto_coder_rag.py +37 -1
  4. autocoder/auto_coder_runner.py +8 -0
  5. autocoder/commands/auto_command.py +59 -131
  6. autocoder/commands/tools.py +1 -1
  7. autocoder/common/__init__.py +1 -1
  8. autocoder/common/conversations/__init__.py +52 -0
  9. autocoder/common/conversations/compatibility.py +303 -0
  10. autocoder/common/conversations/conversation_manager.py +502 -0
  11. autocoder/common/conversations/example.py +152 -0
  12. autocoder/common/file_monitor/__init__.py +5 -0
  13. autocoder/common/file_monitor/monitor.py +383 -0
  14. autocoder/common/git_utils.py +1 -1
  15. autocoder/common/ignorefiles/__init__.py +4 -0
  16. autocoder/common/ignorefiles/ignore_file_utils.py +103 -0
  17. autocoder/common/ignorefiles/test_ignore_file_utils.py +91 -0
  18. autocoder/common/rulefiles/__init__.py +15 -0
  19. autocoder/common/rulefiles/autocoderrules_utils.py +173 -0
  20. autocoder/common/save_formatted_log.py +54 -0
  21. autocoder/common/v2/agent/agentic_edit.py +40 -36
  22. autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +1 -1
  23. autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +73 -43
  24. autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +163 -0
  25. autocoder/common/v2/code_editblock_manager.py +20 -8
  26. autocoder/index/index.py +1 -1
  27. autocoder/models.py +22 -9
  28. autocoder/rag/api_server.py +14 -2
  29. autocoder/rag/cache/simple_cache.py +63 -33
  30. autocoder/rag/loaders/docx_loader.py +1 -1
  31. autocoder/rag/loaders/filter_utils.py +133 -76
  32. autocoder/rag/loaders/image_loader.py +15 -3
  33. autocoder/rag/loaders/pdf_loader.py +2 -2
  34. autocoder/rag/long_context_rag.py +11 -0
  35. autocoder/rag/qa_conversation_strategy.py +5 -31
  36. autocoder/rag/utils.py +21 -2
  37. autocoder/utils/_markitdown.py +66 -25
  38. autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -0
  39. autocoder/version.py +1 -1
  40. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/LICENSE +0 -0
  41. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/WHEEL +0 -0
  42. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/entry_points.txt +0 -0
  43. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,163 @@
1
+ import pytest
2
+ import os
3
+ import tempfile
4
+ import shutil
5
+ from unittest.mock import patch, MagicMock
6
+
7
+ from autocoder.common.v2.agent.agentic_edit_tools.search_files_tool_resolver import SearchFilesToolResolver
8
+ from autocoder.common.v2.agent.agentic_edit_types import SearchFilesTool, ToolResult
9
+ from autocoder.common import AutoCoderArgs
10
+
11
+ # Helper function to create a directory structure with files for testing
12
+ def create_test_files(base_dir, structure):
13
+ """
14
+ Creates a directory structure with files based on the provided dictionary.
15
+ Keys are filenames (relative to base_dir), values are file contents.
16
+ Directories are created automatically.
17
+ """
18
+ for path, content in structure.items():
19
+ full_path = os.path.join(base_dir, path)
20
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
21
+ with open(full_path, 'w') as f:
22
+ f.write(content)
23
+
24
+ @pytest.fixture
25
+ def search_tool_resolver(temp_search_dir):
26
+ """Fixture to provide an instance of SearchFilesToolResolver."""
27
+ # Create AutoCoderArgs with the temp directory as source_dir to allow the security check to pass
28
+ args = AutoCoderArgs()
29
+ args.source_dir = temp_search_dir # Set the source_dir to our temp directory
30
+ return SearchFilesToolResolver(None, SearchFilesTool(path="", regex=""), args)
31
+
32
+ @pytest.fixture(scope="function")
33
+ def temp_search_dir():
34
+ """Fixture to create a temporary directory with test files for searching."""
35
+ temp_dir = tempfile.mkdtemp()
36
+ test_structure = {
37
+ "file1.txt": "Hello world\nThis is a test file.",
38
+ "subdir/file2.py": "import sys\n\ndef main():\n print('Python script')\n",
39
+ "subdir/another.txt": "Another text file with world.",
40
+ ".hiddenfile": "This should be ignored by default",
41
+ "no_match.md": "Markdown file."
42
+ }
43
+ create_test_files(temp_dir, test_structure)
44
+ yield temp_dir # Provide the path to the test function
45
+ shutil.rmtree(temp_dir) # Cleanup after test
46
+
47
+ # --- Test Cases ---
48
+
49
+ def test_resolve_finds_matches(search_tool_resolver, temp_search_dir):
50
+ """Test that resolve finds matches correctly."""
51
+ # Set up the tool with the pattern we want to search for
52
+ tool = SearchFilesTool(
53
+ path="", # Use empty path to search in the source_dir itself
54
+ regex="world",
55
+ file_pattern="*.txt"
56
+ )
57
+ search_tool_resolver.tool = tool
58
+
59
+ # Call the resolve method directly
60
+ response = search_tool_resolver.resolve()
61
+
62
+ # Check the response
63
+ assert isinstance(response, ToolResult)
64
+ assert response.success
65
+ assert "Search completed. Found 2 matches" in response.message
66
+
67
+ # Check that the correct files were found
68
+ assert len(response.content) == 2
69
+ paths = [result["path"] for result in response.content]
70
+ assert any("file1.txt" in path for path in paths)
71
+ assert any("another.txt" in path for path in paths)
72
+
73
+ # Check that the match lines contain our search pattern
74
+ for result in response.content:
75
+ assert "world" in result["match_line"]
76
+
77
+ def test_resolve_no_matches(search_tool_resolver, temp_search_dir):
78
+ """Test that resolve handles no matches correctly."""
79
+ tool = SearchFilesTool(
80
+ path="", # Use empty path to search in the source_dir itself
81
+ regex="nonexistent_pattern",
82
+ file_pattern="*"
83
+ )
84
+ search_tool_resolver.tool = tool
85
+
86
+ response = search_tool_resolver.resolve()
87
+
88
+ assert isinstance(response, ToolResult)
89
+ assert response.success # Still success, just no results
90
+ assert "Search completed. Found 0 matches" in response.message
91
+ assert len(response.content) == 0
92
+
93
+ def test_resolve_file_pattern(search_tool_resolver, temp_search_dir):
94
+ """Test that the file_pattern is correctly applied."""
95
+ # Test .txt pattern
96
+ tool_txt = SearchFilesTool(
97
+ path="", # Use empty path to search in the source_dir itself
98
+ regex="world",
99
+ file_pattern="*.txt" # Only search .txt files
100
+ )
101
+ search_tool_resolver.tool = tool_txt
102
+
103
+ response_txt = search_tool_resolver.resolve()
104
+
105
+ assert isinstance(response_txt, ToolResult)
106
+ assert response_txt.success
107
+ assert "Search completed. Found 2 matches" in response_txt.message
108
+ # Ensure only .txt files were matched
109
+ for result in response_txt.content:
110
+ assert result["path"].endswith(".txt")
111
+
112
+ # Test .py pattern
113
+ tool_py = SearchFilesTool(
114
+ path="", # Use empty path to search in the source_dir itself
115
+ regex="print",
116
+ file_pattern="*.py" # Only search .py files
117
+ )
118
+ search_tool_resolver.tool = tool_py
119
+
120
+ response_py = search_tool_resolver.resolve()
121
+
122
+ assert isinstance(response_py, ToolResult)
123
+ assert response_py.success
124
+ assert "Search completed. Found 1 matches" in response_py.message
125
+ # Ensure only .py files were matched
126
+ for result in response_py.content:
127
+ assert result["path"].endswith(".py")
128
+
129
+ def test_invalid_regex(search_tool_resolver, temp_search_dir):
130
+ """Test that an invalid regex pattern is properly handled."""
131
+ tool = SearchFilesTool(
132
+ path="", # Use empty path to search in the source_dir itself
133
+ regex="[invalid regex", # Invalid regex pattern
134
+ file_pattern="*"
135
+ )
136
+ search_tool_resolver.tool = tool
137
+
138
+ response = search_tool_resolver.resolve()
139
+
140
+ assert isinstance(response, ToolResult)
141
+ assert not response.success
142
+ assert "Invalid regex pattern" in response.message
143
+
144
+ def test_nonexistent_path(search_tool_resolver, temp_search_dir):
145
+ """Test that a nonexistent path is properly handled."""
146
+ # Create a path that we know doesn't exist under temp_search_dir
147
+ nonexistent_path = "nonexistent_subdirectory"
148
+
149
+ tool = SearchFilesTool(
150
+ path=nonexistent_path, # This path doesn't exist in our temp directory
151
+ regex="pattern",
152
+ file_pattern="*"
153
+ )
154
+ search_tool_resolver.tool = tool
155
+
156
+ response = search_tool_resolver.resolve()
157
+
158
+ assert isinstance(response, ToolResult)
159
+ assert not response.success
160
+ assert "Error: Search path not found" in response.message
161
+
162
+ # Add more tests as needed
163
+
@@ -396,24 +396,36 @@ class CodeEditBlockManager:
396
396
  def _format_blocks(merge: MergeCodeWithoutEffect) -> Tuple[str, str]:
397
397
  unmerged_formatted_text = ""
398
398
  for file_path, head, update in merge.failed_blocks:
399
- unmerged_formatted_text += "```lang\n"
400
- unmerged_formatted_text += f"##File: {file_path}\n"
401
- unmerged_formatted_text += "<<<<<<< SEARCH\n"
399
+ unmerged_formatted_text += "```lang"
400
+ unmerged_formatted_text += "\n"
401
+ unmerged_formatted_text += f"##File: {file_path}"
402
+ unmerged_formatted_text += "\n"
403
+ unmerged_formatted_text += "<<<<<<< SEARCH"
404
+ unmerged_formatted_text += "\n"
402
405
  unmerged_formatted_text += head
403
- unmerged_formatted_text += "=======\n"
406
+ unmerged_formatted_text += "\n"
407
+ unmerged_formatted_text += "======="
408
+ unmerged_formatted_text += "\n"
404
409
  unmerged_formatted_text += update
405
- unmerged_formatted_text += ">>>>>>> REPLACE\n"
410
+ unmerged_formatted_text += "\n"
411
+ unmerged_formatted_text += ">>>>>>> REPLACE"
412
+ unmerged_formatted_text += "\n"
406
413
  unmerged_formatted_text += "```"
407
414
  unmerged_formatted_text += "\n"
408
415
 
409
416
  merged_formatted_text = ""
410
417
  if merge.merged_blocks:
411
418
  for file_path, head, update in merge.merged_blocks:
412
- merged_formatted_text += "```lang\n"
413
- merged_formatted_text += f"##File: {file_path}\n"
419
+ merged_formatted_text += "```lang"
420
+ merged_formatted_text += "\n"
421
+ merged_formatted_text += f"##File: {file_path}"
422
+ merged_formatted_text += "\n"
414
423
  merged_formatted_text += head
415
- merged_formatted_text += "=======\n"
424
+ merged_formatted_text += "\n"
425
+ merged_formatted_text += "======="
426
+ merged_formatted_text += "\n"
416
427
  merged_formatted_text += update
428
+ merged_formatted_text += "\n"
417
429
  merged_formatted_text += "```"
418
430
  merged_formatted_text += "\n"
419
431
 
autocoder/index/index.py CHANGED
@@ -462,7 +462,7 @@ class IndexManager:
462
462
  def filter_exclude_files(self, file_path, exclude_patterns):
463
463
  # 增加 ignore_file_utils 的过滤
464
464
  try:
465
- from src.autocoder.ignorefiles import ignore_file_utils
465
+ from autocoder.common.ignorefiles import ignore_file_utils
466
466
  if ignore_file_utils.should_ignore(file_path):
467
467
  return True
468
468
  except Exception:
autocoder/models.py CHANGED
@@ -60,22 +60,35 @@ default_models_list = [
60
60
  "max_output_tokens": 8096
61
61
  },
62
62
  {
63
- "name": "openrouter/quasar-alpha",
63
+ "name": "openai/gpt-4.1-mini",
64
64
  "description": "",
65
- "model_name": "openrouter/quasar-alpha",
65
+ "model_name": "openai/gpt-4.1-mini",
66
66
  "model_type": "saas/openai",
67
67
  "base_url": "https://openrouter.ai/api/v1",
68
68
  "api_key_path": "",
69
69
  "is_reasoning": False,
70
- "input_price": 0.0,
71
- "output_price": 0.0,
70
+ "input_price": 2.8,
71
+ "output_price": 11.2,
72
72
  "average_speed": 0.0,
73
- "max_output_tokens": 8096*2
73
+ "max_output_tokens": 8096*3
74
74
  },
75
75
  {
76
- "name": "openrouter/optimus-alpha",
76
+ "name": "openai/gpt-4.1",
77
77
  "description": "",
78
- "model_name": "openrouter/optimus-alpha",
78
+ "model_name": "openai/gpt-4.1",
79
+ "model_type": "saas/openai",
80
+ "base_url": "https://openrouter.ai/api/v1",
81
+ "api_key_path": "",
82
+ "is_reasoning": False,
83
+ "input_price": 14.0,
84
+ "output_price": 42.0,
85
+ "average_speed": 0.0,
86
+ "max_output_tokens": 8096*3
87
+ },
88
+ {
89
+ "name": "openai/gpt-4.1-nano",
90
+ "description": "",
91
+ "model_name": "openai/gpt-4.1-nano",
79
92
  "model_type": "saas/openai",
80
93
  "base_url": "https://openrouter.ai/api/v1",
81
94
  "api_key_path": "",
@@ -83,8 +96,8 @@ default_models_list = [
83
96
  "input_price": 0.0,
84
97
  "output_price": 0.0,
85
98
  "average_speed": 0.0,
86
- "max_output_tokens": 8096*2
87
- },
99
+ "max_output_tokens": 8096*3
100
+ },
88
101
  {
89
102
  "name": "openrouter/google/gemini-2.5-pro-preview-03-25",
90
103
  "description": "",
@@ -187,9 +187,16 @@ async def serve_static_file(full_path: str, request: Request):
187
187
  # 直接使用规范化的路径
188
188
  file_path = os.path.join("/", os.path.normpath(unquote(full_path)))
189
189
 
190
+ # 获取允许的静态文件目录
191
+ allowed_static_abs = request.app.state.allowed_static_abs
192
+ logger.info(f"==allowed_static_abs==: {allowed_static_abs}")
193
+
194
+ if file_path.startswith(("/_images","_images")):
195
+ file_path = os.path.join(allowed_static_abs, file_path)
196
+
190
197
  # 检查文件是否存在
191
198
  if not os.path.exists(file_path):
192
- raise FileNotFoundError(f"File not found: {file_path}")
199
+ raise FileNotFoundError(f"File not found: {file_path}")
193
200
 
194
201
  # 如果启用了Nginx X-Accel-Redirect,使用X-Accel特性
195
202
  if hasattr(request.app.state, "enable_nginx_x_accel") and request.app.state.enable_nginx_x_accel:
@@ -273,6 +280,9 @@ def serve(llm:ByzerLLM, args: ServerArgs):
273
280
  allowed_static_abs = os.path.abspath(allowed_static_dir)
274
281
  logger.info(f"Static files root directory: {allowed_static_abs}")
275
282
 
283
+ # 将允许的静态文件目录存储到应用状态中
284
+ router_app.state.allowed_static_abs = allowed_static_abs
285
+
276
286
  router_app.add_middleware(
277
287
  CORSMiddleware,
278
288
  allow_origins=args.allowed_origins,
@@ -309,9 +319,11 @@ def serve(llm:ByzerLLM, args: ServerArgs):
309
319
 
310
320
  # Check if path is in allowed directory
311
321
  abs_path = os.path.abspath(os.path.join("/", normalized_path))
322
+ if abs_path.startswith("/_images"):
323
+ return await call_next(request)
312
324
 
313
325
  # 使用预先计算好的allowed_static_abs
314
- is_allowed = abs_path.startswith(allowed_static_abs)
326
+ is_allowed = abs_path.startswith(request.app.state.allowed_static_abs)
315
327
 
316
328
  if not is_allowed:
317
329
  logger.warning(f"Unauthorized path access: {abs_path}")
@@ -24,6 +24,7 @@ from .failed_files_utils import load_failed_files, save_failed_files
24
24
  from autocoder.common import AutoCoderArgs
25
25
  from byzerllm import SimpleByzerLLM, ByzerLLM
26
26
  from autocoder.utils.llms import get_llm_names
27
+ from autocoder.common.file_monitor.monitor import get_file_monitor, Change
27
28
 
28
29
 
29
30
  default_ignore_dirs = [
@@ -50,7 +51,7 @@ def generate_content_md5(content: Union[str, bytes]) -> str:
50
51
 
51
52
 
52
53
  class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
53
- def __init__(self, path: str, ignore_spec, required_exts: list, update_interval: int = 5, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None):
54
+ def __init__(self, path: str, ignore_spec, required_exts: list, args: Optional[AutoCoderArgs] = None, llm: Optional[Union[ByzerLLM, SimpleByzerLLM, str]] = None):
54
55
  """
55
56
  初始化异步更新队列,用于管理代码文件的缓存。
56
57
 
@@ -58,7 +59,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
58
59
  path: 需要索引的代码库根目录
59
60
  ignore_spec: 指定哪些文件/目录应被忽略的规则
60
61
  required_exts: 需要处理的文件扩展名列表
61
- update_interval: 自动触发更新的时间间隔(秒),默认为5秒
62
+ args: AutoCoderArgs 对象,包含配置信息
63
+ llm: 用于代码分析的 LLM 实例
62
64
 
63
65
  缓存结构 (self.cache):
64
66
  self.cache 是一个字典,其结构如下:
@@ -99,7 +101,6 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
99
101
  self.args = args
100
102
  self.llm = llm
101
103
  self.product_mode = args.product_mode or "lite"
102
- self.update_interval = update_interval
103
104
  self.queue = []
104
105
  self.cache = {} # 初始化为空字典,稍后通过 read_cache() 填充
105
106
  self.lock = threading.Lock()
@@ -115,10 +116,16 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
115
116
  self.queue_thread.daemon = True
116
117
  self.queue_thread.start()
117
118
 
118
- # 启动定时触发更新的线程
119
- self.update_thread = threading.Thread(target=self._periodic_update)
120
- self.update_thread.daemon = True
121
- self.update_thread.start()
119
+ # 注册文件监控回调
120
+ self.file_monitor = get_file_monitor(self.path)
121
+ # 注册根目录的监控,这样可以捕获所有子目录和文件的变化
122
+ self.file_monitor.register(self.path, self._on_file_change)
123
+ # 确保监控器已启动
124
+ if not self.file_monitor.is_running():
125
+ self.file_monitor.start()
126
+ logger.info(f"Started file monitor for {self.path}")
127
+ else:
128
+ logger.info(f"File monitor already running for {self.path}")
122
129
 
123
130
  self.cache = self.read_cache()
124
131
 
@@ -130,37 +137,57 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
130
137
  logger.error(f"Error in process_queue: {e}")
131
138
  time.sleep(1) # 避免过于频繁的检查
132
139
 
133
- def _periodic_update(self):
134
- """定时触发文件更新检查"""
135
- while not self.stop_event.is_set():
136
- try:
137
- logger.debug(
138
- f"Periodic update triggered (every {self.update_interval}s)")
139
- # 如果没有被初始化过,不会增量触发
140
- if not self.cache:
141
- time.sleep(self.update_interval)
142
- continue
143
- self.trigger_update()
144
- except Exception as e:
145
- logger.error(f"Error in periodic update: {e}")
146
- time.sleep(self.update_interval)
140
+ def _on_file_change(self, change_type: Change, file_path: str):
141
+ """
142
+ 文件监控回调函数,当文件发生变化时触发更新
143
+
144
+ 参数:
145
+ change_type: 变化类型 (Change.added, Change.modified, Change.deleted)
146
+ file_path: 发生变化的文件路径
147
+ """
148
+ try:
149
+ # 如果缓存还没有初始化,跳过触发
150
+ if not self.cache:
151
+ return
152
+
153
+ # 检查文件扩展名,如果不在需要处理的扩展名列表中,跳过
154
+ if self.required_exts and not any(file_path.endswith(ext) for ext in self.required_exts):
155
+ return
156
+
157
+ # 检查是否在忽略规则中
158
+ if self.ignore_spec and self.ignore_spec.match_file(os.path.relpath(file_path, self.path)):
159
+ return
160
+
161
+ logger.info(f"File change detected: {change_type} - {file_path}")
162
+ self.trigger_update()
163
+ except Exception as e:
164
+ logger.error(f"Error in file change handler: {e}")
165
+ logger.exception(e)
147
166
 
148
167
  def stop(self):
149
168
  self.stop_event.set()
150
- self.queue_thread.join()
151
- self.update_thread.join()
169
+ # 取消注册文件监控回调
170
+ try:
171
+ self.file_monitor.unregister(self.path, self._on_file_change)
172
+ logger.info(f"Unregistered file monitor callback for {self.path}")
173
+ except Exception as e:
174
+ logger.error(f"Error unregistering file monitor callback: {e}")
175
+ # 只等待队列处理线程结束
176
+ if hasattr(self, 'queue_thread') and self.queue_thread.is_alive():
177
+ self.queue_thread.join(timeout=2.0)
152
178
 
153
179
  def fileinfo_to_tuple(self, file_info: FileInfo) -> Tuple[str, str, float, str]:
154
180
  return (file_info.file_path, file_info.relative_path, file_info.modify_time, file_info.file_md5)
155
181
 
156
182
  def __del__(self):
183
+ # 确保在对象被销毁时停止监控并清理资源
157
184
  self.stop()
158
185
 
159
186
  def load_first(self):
160
187
  with self.lock:
161
188
  if self.cache:
162
189
  return
163
- files_to_process = []
190
+ files_to_process = []
164
191
  for file_info in self.get_all_files():
165
192
  file_path, _, modify_time, file_md5 = file_info
166
193
  if (
@@ -175,7 +202,7 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
175
202
  # [process_file.remote(file_info) for file_info in files_to_process]
176
203
  # )
177
204
  from autocoder.rag.token_counter import initialize_tokenizer
178
- llm_name = get_llm_names(self.llm)[0] if self.llm else None
205
+ llm_name = get_llm_names(self.llm)[0] if self.llm else None
179
206
  with Pool(
180
207
  processes=os.cpu_count(),
181
208
  initializer=initialize_tokenizer,
@@ -184,8 +211,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
184
211
 
185
212
  worker_func = functools.partial(
186
213
  process_file_in_multi_process, llm=llm_name, product_mode=self.product_mode)
187
- results = pool.map(worker_func, files_to_process)
188
-
214
+ results = pool.map(worker_func, files_to_process)
215
+
189
216
  for file_info, result in zip(files_to_process, results):
190
217
  if result: # 只有当result不为空时才更新缓存
191
218
  self.update_cache(file_info, result)
@@ -203,16 +230,15 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
203
230
  file_path, relative_path, modify_time, file_md5 = file_info
204
231
  current_files.add(file_path)
205
232
  # 如果文件曾经解析失败,跳过本次增量更新
206
- if file_path in self.failed_files:
207
- # logger.info(f"文件 {file_path} 之前解析失败,跳过此次更新")
233
+ if file_path in self.failed_files:
208
234
  continue
209
- # 变更检测
235
+ # 变更检测
210
236
  if (
211
237
  file_path not in self.cache
212
238
  or self.cache[file_path].get("md5", "") != file_md5
213
239
  ):
214
240
  files_to_process.append(
215
- (file_path, relative_path, modify_time, file_md5))
241
+ (file_path, relative_path, modify_time, file_md5))
216
242
 
217
243
  deleted_files = set(self.cache.keys()) - current_files
218
244
  logger.info(f"files_to_process: {files_to_process}")
@@ -289,6 +315,8 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
289
315
  for line in f:
290
316
  data = json.loads(line)
291
317
  cache[data["file_path"]] = data
318
+ else:
319
+ self.load_first()
292
320
  return cache
293
321
 
294
322
  def write_cache(self):
@@ -366,6 +394,9 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
366
394
  dirs[:] = [d for d in dirs if not d.startswith(
367
395
  ".") and d not in default_ignore_dirs]
368
396
 
397
+ # Filter out files that start with a dot
398
+ files[:] = [f for f in files if not f.startswith(".")]
399
+
369
400
  if self.ignore_spec:
370
401
  relative_root = os.path.relpath(root, self.path)
371
402
  dirs[:] = [
@@ -390,6 +421,5 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
390
421
  modify_time = os.path.getmtime(file_path)
391
422
  file_md5 = generate_file_md5(file_path)
392
423
  all_files.append(
393
- (file_path, relative_path, modify_time, file_md5))
394
-
424
+ (file_path, relative_path, modify_time, file_md5))
395
425
  return all_files
@@ -1,5 +1,4 @@
1
1
  from io import BytesIO
2
- from autocoder.utils._markitdown import MarkItDown
3
2
  import traceback
4
3
 
5
4
  def extract_text_from_docx_old(docx_path):
@@ -13,6 +12,7 @@ def extract_text_from_docx_old(docx_path):
13
12
 
14
13
  def extract_text_from_docx(docx_path):
15
14
  try:
15
+ from autocoder.utils._markitdown import MarkItDown
16
16
  md_converter = MarkItDown()
17
17
  result = md_converter.convert(docx_path)
18
18
  return result.text_content