auto-coder 0.1.361__py3-none-any.whl → 0.1.363__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (57) hide show
  1. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/METADATA +2 -1
  2. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/RECORD +57 -29
  3. autocoder/agent/auto_learn.py +249 -262
  4. autocoder/agent/base_agentic/__init__.py +0 -0
  5. autocoder/agent/base_agentic/agent_hub.py +169 -0
  6. autocoder/agent/base_agentic/agentic_lang.py +112 -0
  7. autocoder/agent/base_agentic/agentic_tool_display.py +180 -0
  8. autocoder/agent/base_agentic/base_agent.py +1582 -0
  9. autocoder/agent/base_agentic/default_tools.py +683 -0
  10. autocoder/agent/base_agentic/test_base_agent.py +82 -0
  11. autocoder/agent/base_agentic/tool_registry.py +425 -0
  12. autocoder/agent/base_agentic/tools/__init__.py +12 -0
  13. autocoder/agent/base_agentic/tools/ask_followup_question_tool_resolver.py +72 -0
  14. autocoder/agent/base_agentic/tools/attempt_completion_tool_resolver.py +37 -0
  15. autocoder/agent/base_agentic/tools/base_tool_resolver.py +35 -0
  16. autocoder/agent/base_agentic/tools/example_tool_resolver.py +46 -0
  17. autocoder/agent/base_agentic/tools/execute_command_tool_resolver.py +72 -0
  18. autocoder/agent/base_agentic/tools/list_files_tool_resolver.py +110 -0
  19. autocoder/agent/base_agentic/tools/plan_mode_respond_tool_resolver.py +35 -0
  20. autocoder/agent/base_agentic/tools/read_file_tool_resolver.py +54 -0
  21. autocoder/agent/base_agentic/tools/replace_in_file_tool_resolver.py +156 -0
  22. autocoder/agent/base_agentic/tools/search_files_tool_resolver.py +134 -0
  23. autocoder/agent/base_agentic/tools/talk_to_group_tool_resolver.py +96 -0
  24. autocoder/agent/base_agentic/tools/talk_to_tool_resolver.py +79 -0
  25. autocoder/agent/base_agentic/tools/use_mcp_tool_resolver.py +44 -0
  26. autocoder/agent/base_agentic/tools/write_to_file_tool_resolver.py +58 -0
  27. autocoder/agent/base_agentic/types.py +189 -0
  28. autocoder/agent/base_agentic/utils.py +100 -0
  29. autocoder/auto_coder.py +1 -1
  30. autocoder/auto_coder_runner.py +36 -14
  31. autocoder/chat/conf_command.py +11 -10
  32. autocoder/commands/auto_command.py +227 -159
  33. autocoder/common/__init__.py +2 -2
  34. autocoder/common/ignorefiles/ignore_file_utils.py +12 -8
  35. autocoder/common/result_manager.py +10 -2
  36. autocoder/common/rulefiles/autocoderrules_utils.py +169 -0
  37. autocoder/common/save_formatted_log.py +1 -1
  38. autocoder/common/v2/agent/agentic_edit.py +53 -41
  39. autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py +15 -12
  40. autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +73 -1
  41. autocoder/common/v2/agent/agentic_edit_tools/write_to_file_tool_resolver.py +132 -4
  42. autocoder/common/v2/agent/agentic_edit_types.py +1 -2
  43. autocoder/common/v2/agent/agentic_tool_display.py +2 -3
  44. autocoder/common/v2/code_auto_generate_editblock.py +3 -1
  45. autocoder/index/index.py +14 -8
  46. autocoder/privacy/model_filter.py +297 -35
  47. autocoder/rag/long_context_rag.py +424 -397
  48. autocoder/rag/test_doc_filter.py +393 -0
  49. autocoder/rag/test_long_context_rag.py +473 -0
  50. autocoder/rag/test_token_limiter.py +342 -0
  51. autocoder/shadows/shadow_manager.py +1 -3
  52. autocoder/utils/_markitdown.py +22 -3
  53. autocoder/version.py +1 -1
  54. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/LICENSE +0 -0
  55. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/WHEEL +0 -0
  56. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/entry_points.txt +0 -0
  57. {auto_coder-0.1.361.dist-info → auto_coder-0.1.363.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,393 @@
1
+ import pytest
2
+ import os
3
+ import shutil
4
+ import tempfile
5
+ from loguru import logger
6
+ from pathlib import Path
7
+ import time
8
+ from typing import Dict, Any, List, Optional, Generator, Tuple
9
+
10
+ # 导入被测模块
11
+ from autocoder.rag.doc_filter import DocFilter
12
+ from autocoder.common import AutoCoderArgs, SourceCode
13
+ from autocoder.rag.long_context_rag import RAGStat, RecallStat, ChunkStat, AnswerStat
14
+ from autocoder.rag.relevant_utils import DocFilterResult, DocRelevance, ProgressUpdate
15
+ from byzerllm.utils.types import SingleOutputMeta
16
+
17
+ # 1. 初始化FileMonitor(必须最先进行)
18
+ @pytest.fixture(scope="function")
19
+ def setup_file_monitor(temp_test_dir):
20
+ """初始化FileMonitor,必须最先执行"""
21
+ try:
22
+ from autocoder.common.file_monitor.monitor import FileMonitor
23
+ monitor = FileMonitor(temp_test_dir)
24
+ monitor.reset_instance()
25
+ if not monitor.is_running():
26
+ monitor.start()
27
+ logger.info(f"文件监控已启动: {temp_test_dir}")
28
+ else:
29
+ logger.info(f"文件监控已在运行中: {monitor.root_dir}")
30
+ except Exception as e:
31
+ logger.error(f"初始化文件监控出错: {e}")
32
+
33
+ # 2. 加载规则文件
34
+ try:
35
+ from autocoder.common.rulefiles.autocoderrules_utils import get_rules, reset_rules_manager
36
+ reset_rules_manager()
37
+ rules = get_rules(temp_test_dir)
38
+ logger.info(f"已加载规则: {len(rules)} 条")
39
+ except Exception as e:
40
+ logger.error(f"加载规则出错: {e}")
41
+
42
+ return temp_test_dir
43
+
44
+ # Pytest Fixture: 临时测试目录
45
+ @pytest.fixture(scope="function")
46
+ def temp_test_dir():
47
+ """提供一个临时的、测试后自动清理的目录"""
48
+ temp_dir = tempfile.mkdtemp()
49
+ logger.info(f"创建测试临时目录: {temp_dir}")
50
+ yield temp_dir
51
+ logger.info(f"清理测试临时目录: {temp_dir}")
52
+ shutil.rmtree(temp_dir)
53
+
54
+ # Pytest Fixture: 测试文件结构
55
+ @pytest.fixture(scope="function")
56
+ def test_files(temp_test_dir):
57
+ """创建测试所需的文件/目录结构"""
58
+ # 创建示例文件
59
+ file_structure = {
60
+ "docs/guide.md": "# DocFilter 使用指南\n使用DocFilter可以筛选相关文档。",
61
+ "docs/api.md": "# API说明\n## 初始化\n```python\ndoc_filter = DocFilter(llm, args, on_ray=False, path='.')\n```",
62
+ "src/example.py": "def add(a, b):\n return a + b\n\ndef subtract(a, b):\n return a - b",
63
+ "src/utils/helpers.py": "def format_text(text):\n return text.strip()\n\ndef count_words(text):\n return len(text.split())",
64
+ ".gitignore": "*.log\n__pycache__/\n.cache/",
65
+ ".autocoderignore": "*.log\n__pycache__/\n.cache/"
66
+ }
67
+
68
+ for file_path, content in file_structure.items():
69
+ full_path = os.path.join(temp_test_dir, file_path)
70
+ os.makedirs(os.path.dirname(full_path), exist_ok=True)
71
+ with open(full_path, 'w', encoding='utf-8') as f:
72
+ f.write(content)
73
+
74
+ return temp_test_dir
75
+
76
+ # Pytest Fixture: 配置参数
77
+ @pytest.fixture
78
+ def test_args():
79
+ """创建测试用配置参数"""
80
+ return AutoCoderArgs(
81
+ source_dir=".",
82
+ rag_context_window_limit=8000,
83
+ rag_doc_filter_relevance=3,
84
+ full_text_ratio=0.7,
85
+ segment_ratio=0.2,
86
+ index_filter_workers=1,
87
+ required_exts=".py,.md",
88
+ monitor_mode=False,
89
+ enable_hybrid_index=False,
90
+ rag_recall_max_queries=3
91
+ )
92
+
93
+ # 3. 加载tokenizer (必须在FileMonitor和rules初始化之后)
94
+ @pytest.fixture
95
+ def load_tokenizer_fixture(setup_file_monitor):
96
+ """加载tokenizer,必须在FileMonitor和rules初始化之后"""
97
+ from autocoder.auto_coder_runner import load_tokenizer
98
+ load_tokenizer()
99
+ logger.info("Tokenizer加载完成")
100
+ return True
101
+
102
+ # 4. 初始化LLM
103
+ @pytest.fixture
104
+ def real_llm(load_tokenizer_fixture):
105
+ """创建真实的LLM对象,必须在tokenizer加载之后"""
106
+ from autocoder.utils.llms import get_single_llm
107
+ llm = get_single_llm("v3_chat", product_mode="lite")
108
+ logger.info(f"LLM初始化完成: {llm.default_model_name}")
109
+ return llm
110
+
111
+ # 5. DocFilter实例
112
+ @pytest.fixture
113
+ def doc_filter(real_llm, test_args, test_files):
114
+ """创建DocFilter实例"""
115
+ doc_filter = DocFilter(
116
+ llm=real_llm,
117
+ args=test_args,
118
+ on_ray=False,
119
+ path=test_files
120
+ )
121
+ logger.info("DocFilter初始化完成")
122
+ return doc_filter
123
+
124
+ # 6. 测试文档生成函数
125
+ @pytest.fixture
126
+ def test_documents(test_files):
127
+ """生成测试用文档"""
128
+ documents = [
129
+ SourceCode(
130
+ module_name=os.path.join(test_files, "docs/guide.md"),
131
+ source_code="# DocFilter 使用指南\n使用DocFilter可以筛选相关文档。",
132
+ metadata={}
133
+ ),
134
+ SourceCode(
135
+ module_name=os.path.join(test_files, "docs/api.md"),
136
+ source_code="# API说明\n## 初始化\n```python\ndoc_filter = DocFilter(llm, args, on_ray=False, path='.')\n```",
137
+ metadata={}
138
+ ),
139
+ SourceCode(
140
+ module_name=os.path.join(test_files, "src/example.py"),
141
+ source_code="def add(a, b):\n return a + b\n\ndef subtract(a, b):\n return a - b",
142
+ metadata={}
143
+ ),
144
+ SourceCode(
145
+ module_name=os.path.join(test_files, "src/utils/helpers.py"),
146
+ source_code="def format_text(text):\n return text.strip()\n\ndef count_words(text):\n return len(text.split())",
147
+ metadata={}
148
+ )
149
+ ]
150
+
151
+ def document_generator():
152
+ for doc in documents:
153
+ yield doc
154
+
155
+ return document_generator
156
+
157
+ # --- 测试用例 ---
158
+
159
+ def test_filter_docs_basic(doc_filter, test_documents):
160
+ """测试DocFilter基本文档过滤功能"""
161
+ # 创建对话
162
+ conversations = [{"role": "user", "content": "如何使用DocFilter筛选文档?"}]
163
+
164
+ # 执行文档过滤
165
+ result = doc_filter.filter_docs(
166
+ conversations=conversations,
167
+ documents=test_documents()
168
+ )
169
+
170
+ # 验证结果
171
+ assert result is not None, "应该返回结果"
172
+ assert len(result.docs) > 0, "应该至少返回一个文档"
173
+ assert len(result.input_tokens_counts) > 0, "应该有输入token计数"
174
+ assert len(result.generated_tokens_counts) > 0, "应该有生成token计数"
175
+
176
+ # 检查返回的文档是否都有相关性信息
177
+ for doc in result.docs:
178
+ assert hasattr(doc, "relevance"), "应该有相关性信息"
179
+ assert isinstance(doc.relevance, DocRelevance), "相关性应该是DocRelevance类型"
180
+
181
+ # 打印测试结果详情
182
+ logger.info("="*80)
183
+ logger.info("DocFilter基本功能测试结果:")
184
+ logger.info("-"*80)
185
+ logger.info(f"过滤后的文档数: {len(result.docs)}")
186
+ logger.info(f"输入token计数: {result.input_tokens_counts}")
187
+ logger.info(f"生成token计数: {result.generated_tokens_counts}")
188
+ logger.info(f"处理时间: {result.durations}")
189
+ logger.info(f"模型名称: {result.model_name}")
190
+
191
+ # 打印文档相关性信息
192
+ for i, doc in enumerate(result.docs):
193
+ rel_score = doc.relevance.score if hasattr(doc.relevance, 'score') else "未知"
194
+ is_relevant = doc.relevance.is_relevant if hasattr(doc.relevance, 'is_relevant') else "未知"
195
+ reason = doc.relevance.reason if hasattr(doc.relevance, 'reason') else "无"
196
+
197
+ logger.info(f"文档[{i}]: {os.path.basename(doc.source_code.module_name)}")
198
+ logger.info(f" - 相关性评分: {rel_score}")
199
+ logger.info(f" - 是否相关: {is_relevant}")
200
+ logger.info(f" - 原因: {reason}")
201
+
202
+ logger.info("="*80)
203
+
204
+ def test_filter_docs_with_progress(doc_filter, test_documents):
205
+ """测试带进度报告的文档过滤功能"""
206
+ # 创建对话
207
+ conversations = [{"role": "user", "content": "如何使用DocFilter进行文档过滤?"}]
208
+
209
+ # 使用带进度报告的过滤方法
210
+ progress_updates = []
211
+ final_result = None
212
+
213
+ # 收集进度更新和最终结果
214
+ for progress_update, result in doc_filter.filter_docs_with_progress(
215
+ conversations=conversations,
216
+ documents=test_documents()
217
+ ):
218
+ if progress_update:
219
+ progress_updates.append(progress_update)
220
+ if result is not None:
221
+ final_result = result
222
+
223
+ # 验证进度更新
224
+ assert len(progress_updates) > 0, "应该至少有一个进度更新"
225
+ for update in progress_updates:
226
+ assert isinstance(update, ProgressUpdate), "进度更新应该是ProgressUpdate类型"
227
+ assert hasattr(update, "completed"), "应该有completed属性"
228
+ assert hasattr(update, "total"), "应该有total属性"
229
+
230
+ # 验证最终结果
231
+ assert final_result is not None, "应该有最终结果"
232
+ assert len(final_result.docs) > 0, "应该至少返回一个文档"
233
+
234
+ # 打印测试结果详情
235
+ logger.info("="*80)
236
+ logger.info("DocFilter带进度报告功能测试结果:")
237
+ logger.info("-"*80)
238
+ logger.info(f"进度更新次数: {len(progress_updates)}")
239
+ logger.info(f"最终文档数: {len(final_result.docs)}")
240
+
241
+ # 打印进度更新信息
242
+ for i, update in enumerate(progress_updates):
243
+ logger.info(f"进度更新[{i}]: {update.message} ({update.completed}/{update.total})")
244
+
245
+ logger.info("="*80)
246
+
247
+ def test_process_document_retrieval_integration(doc_filter, test_documents, real_llm):
248
+ """集成测试:模拟LongContextRAG中的_process_document_retrieval调用"""
249
+ # 创建对话和查询
250
+ query = "如何使用DocFilter筛选文档?"
251
+ conversations = [{"role": "user", "content": query}]
252
+
253
+ # 准备RAG统计数据
254
+ rag_stat = RAGStat(
255
+ recall_stat=RecallStat(
256
+ total_input_tokens=0,
257
+ total_generated_tokens=0,
258
+ model_name=real_llm.default_model_name,
259
+ ),
260
+ chunk_stat=ChunkStat(
261
+ total_input_tokens=0,
262
+ total_generated_tokens=0,
263
+ model_name=real_llm.default_model_name,
264
+ ),
265
+ answer_stat=AnswerStat(
266
+ total_input_tokens=0,
267
+ total_generated_tokens=0,
268
+ model_name=real_llm.default_model_name,
269
+ ),
270
+ )
271
+
272
+ # 模拟_process_document_retrieval的流程
273
+ # 1. 初始生成模型信息
274
+ mock_progress_items = []
275
+
276
+ mock_progress_items.append(("", SingleOutputMeta(
277
+ input_tokens_count=0,
278
+ generated_tokens_count=0,
279
+ reasoning_content=f"正在使用{rag_stat.recall_stat.model_name}模型搜索文档..."
280
+ )))
281
+
282
+ # 创建初始的DocFilterResult
283
+ doc_filter_result = DocFilterResult(
284
+ docs=[],
285
+ raw_docs=[],
286
+ input_tokens_counts=[],
287
+ generated_tokens_counts=[],
288
+ durations=[],
289
+ model_name=rag_stat.recall_stat.model_name
290
+ )
291
+
292
+ # 2. 使用带进度报告的过滤方法
293
+ for progress_update, result in doc_filter.filter_docs_with_progress(
294
+ conversations=conversations,
295
+ documents=test_documents()
296
+ ):
297
+ if result is not None:
298
+ doc_filter_result = result
299
+ else:
300
+ # 生成进度更新
301
+ mock_progress_items.append(("", SingleOutputMeta(
302
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens,
303
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
304
+ reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
305
+ )))
306
+
307
+ # 3. 更新统计信息
308
+ rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
309
+ rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
310
+ rag_stat.recall_stat.model_name = doc_filter_result.model_name
311
+
312
+ relevant_docs = doc_filter_result.docs
313
+
314
+ # 4. 添加过滤结果信息
315
+ mock_progress_items.append(("", SingleOutputMeta(
316
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens,
317
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
318
+ reasoning_content=f"已完成文档过滤,共找到{len(relevant_docs)}篇相关文档。"
319
+ )))
320
+
321
+ # 5. 仅保留高相关性文档
322
+ highly_relevant_docs = [doc for doc in relevant_docs if doc.relevance.is_relevant]
323
+ if highly_relevant_docs:
324
+ relevant_docs = highly_relevant_docs
325
+ logger.info(f"Found {len(relevant_docs)} highly relevant documents")
326
+
327
+ # 6. 返回最终结果
328
+ mock_progress_items.append({"result": relevant_docs})
329
+
330
+ # 验证结果
331
+ assert len(mock_progress_items) > 0, "应该有处理结果"
332
+ assert isinstance(mock_progress_items[-1], dict), "最后一项应该是结果字典"
333
+ assert "result" in mock_progress_items[-1], "结果字典应该包含'result'键"
334
+ assert len(mock_progress_items[-1]["result"]) > 0, "应该至少返回一个文档"
335
+ assert rag_stat.recall_stat.total_input_tokens > 0, "输入token计数应该增加"
336
+ assert rag_stat.recall_stat.total_generated_tokens > 0, "生成token计数应该增加"
337
+
338
+ # 打印测试结果详情
339
+ logger.info("="*80)
340
+ logger.info("DocFilter集成测试结果:")
341
+ logger.info("-"*80)
342
+ logger.info(f"处理项数: {len(mock_progress_items)}")
343
+ logger.info(f"相关文档数: {len(mock_progress_items[-1]['result'])}")
344
+ logger.info(f"输入token总数: {rag_stat.recall_stat.total_input_tokens}")
345
+ logger.info(f"生成token总数: {rag_stat.recall_stat.total_generated_tokens}")
346
+
347
+ # 打印高相关性文档
348
+ if highly_relevant_docs:
349
+ logger.info(f"高相关性文档数: {len(highly_relevant_docs)}")
350
+ for i, doc in enumerate(highly_relevant_docs):
351
+ logger.info(f"高相关文档[{i}]: {os.path.basename(doc.source_code.module_name)}")
352
+ logger.info(f" - 相关性评分: {doc.relevance.score}")
353
+ logger.info(f" - 原因: {doc.relevance.reason}")
354
+
355
+ logger.info("="*80)
356
+
357
+ def test_extract_search_queries_integration(doc_filter, real_llm, test_args):
358
+ """测试集成查询提取功能"""
359
+ # 创建对话
360
+ conversations = [
361
+ {"role": "system", "content": "你是一个助手,帮助用户解决问题。"},
362
+ {"role": "user", "content": "我想知道DocFilter如何工作,以及如何使用它来筛选文档?"}
363
+ ]
364
+
365
+ # 使用extract_search_queries函数
366
+ from autocoder.rag.conversation_to_queries import extract_search_queries
367
+
368
+ queries = extract_search_queries(
369
+ conversations=conversations,
370
+ args=test_args,
371
+ llm=real_llm,
372
+ max_queries=test_args.rag_recall_max_queries
373
+ )
374
+
375
+ # 验证结果
376
+ assert queries is not None, "应该返回查询结果"
377
+ assert len(queries) > 0, "应该至少有一个查询"
378
+
379
+ # 打印测试结果详情
380
+ logger.info("="*80)
381
+ logger.info("查询提取测试结果:")
382
+ logger.info("-"*80)
383
+ logger.info(f"提取的查询数量: {len(queries)}")
384
+
385
+ for i, query in enumerate(queries):
386
+ logger.info(f"查询[{i}]: {query.query}")
387
+ if hasattr(query, 'explanation') and query.explanation:
388
+ logger.info(f" - 解释: {query.explanation}")
389
+
390
+ logger.info("="*80)
391
+
392
+ if __name__ == "__main__":
393
+ pytest.main(["-xvs", "test_doc_filter.py"])