auto-coder 0.1.375__py3-none-any.whl → 0.1.376__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (51) hide show
  1. {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/METADATA +1 -1
  2. {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/RECORD +17 -51
  3. autocoder/agent/base_agentic/base_agent.py +9 -8
  4. autocoder/auto_coder_rag.py +12 -0
  5. autocoder/models.py +2 -2
  6. autocoder/rag/cache/local_duckdb_storage_cache.py +63 -33
  7. autocoder/rag/conversation_to_queries.py +37 -5
  8. autocoder/rag/long_context_rag.py +161 -41
  9. autocoder/rag/tools/recall_tool.py +2 -1
  10. autocoder/rag/tools/search_tool.py +2 -1
  11. autocoder/rag/types.py +36 -0
  12. autocoder/utils/_markitdown.py +59 -13
  13. autocoder/version.py +1 -1
  14. autocoder/agent/agentic_edit.py +0 -833
  15. autocoder/agent/agentic_edit_tools/__init__.py +0 -28
  16. autocoder/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +0 -32
  17. autocoder/agent/agentic_edit_tools/attempt_completion_tool_resolver.py +0 -29
  18. autocoder/agent/agentic_edit_tools/base_tool_resolver.py +0 -29
  19. autocoder/agent/agentic_edit_tools/execute_command_tool_resolver.py +0 -84
  20. autocoder/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py +0 -75
  21. autocoder/agent/agentic_edit_tools/list_files_tool_resolver.py +0 -62
  22. autocoder/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py +0 -30
  23. autocoder/agent/agentic_edit_tools/read_file_tool_resolver.py +0 -36
  24. autocoder/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +0 -95
  25. autocoder/agent/agentic_edit_tools/search_files_tool_resolver.py +0 -70
  26. autocoder/agent/agentic_edit_tools/use_mcp_tool_resolver.py +0 -55
  27. autocoder/agent/agentic_edit_tools/write_to_file_tool_resolver.py +0 -98
  28. autocoder/agent/agentic_edit_types.py +0 -124
  29. autocoder/auto_coder_lang.py +0 -60
  30. autocoder/auto_coder_rag_client_mcp.py +0 -170
  31. autocoder/auto_coder_rag_mcp.py +0 -193
  32. autocoder/common/llm_rerank.py +0 -84
  33. autocoder/common/model_speed_test.py +0 -392
  34. autocoder/common/v2/agent/agentic_edit_conversation.py +0 -188
  35. autocoder/common/v2/agent/ignore_utils.py +0 -50
  36. autocoder/dispacher/actions/plugins/action_translate.py +0 -214
  37. autocoder/ignorefiles/__init__.py +0 -4
  38. autocoder/ignorefiles/ignore_file_utils.py +0 -63
  39. autocoder/ignorefiles/test_ignore_file_utils.py +0 -91
  40. autocoder/linters/code_linter.py +0 -588
  41. autocoder/rag/loaders/test_image_loader.py +0 -209
  42. autocoder/rag/raw_rag.py +0 -96
  43. autocoder/rag/simple_directory_reader.py +0 -646
  44. autocoder/rag/simple_rag.py +0 -404
  45. autocoder/regex_project/__init__.py +0 -162
  46. autocoder/utils/coder.py +0 -125
  47. autocoder/utils/tests.py +0 -37
  48. {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/LICENSE +0 -0
  49. {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/WHEEL +0 -0
  50. {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/entry_points.txt +0 -0
  51. {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/top_level.txt +0 -0
@@ -1,209 +0,0 @@
1
- import os
2
- import re
3
- import tempfile
4
-
5
- import pytest
6
-
7
- from autocoder.rag.loaders.image_loader import ImageLoader, ReplaceInFileTool
8
- from autocoder.utils.llms import get_single_llm
9
-
10
- # 模拟一个简单的llm对象(避免测试中真实调用LLM)
11
- class DummyLLM:
12
- def get_sub_client(self, name):
13
- return None
14
-
15
- def run(self, *args, **kwargs):
16
- return "dummy response"
17
-
18
- @pytest.fixture(scope="module")
19
- def dummy_llm():
20
- # 这里可以替换为真实llm,或Mock
21
- return DummyLLM()
22
-
23
- def test_parse_diff_basic():
24
- diff = """
25
- <<<<<<< SEARCH
26
- foo
27
- bar
28
- =======
29
- hello
30
- world
31
- >>>>>>> REPLACE
32
- """
33
- blocks = ImageLoader.parse_diff(diff)
34
- assert len(blocks) == 1
35
- search, replace = blocks[0]
36
- assert "foo" in search
37
- assert "hello" in replace
38
-
39
- def test_extract_replace_in_file_tools():
40
- text = """
41
- <replace_in_file>
42
- <path>file1.py</path>
43
- <diff>
44
- <<<<<<< SEARCH
45
- old content
46
- =======
47
- new content
48
- >>>>>>> REPLACE
49
- </diff>
50
- </replace_in_file>
51
-
52
- <replace_in_file>
53
- <path>file2.py</path>
54
- <diff>
55
- <<<<<<< SEARCH
56
- x=1
57
- =======
58
- x=2
59
- >>>>>>> REPLACE
60
- </diff>
61
- </replace_in_file>
62
- """
63
- tools = ImageLoader.extract_replace_in_file_tools(text)
64
- assert len(tools) == 2
65
- assert tools[0].path == "file1.py"
66
- assert "old content" in tools[0].diff
67
- assert tools[1].path == "file2.py"
68
- assert "x=1" in tools[1].diff
69
-
70
- def test_format_table_in_content_apply_diff(dummy_llm):
71
- # 模拟一个OCR文本和对应diff
72
- original = """这里是介绍
73
- 产品 价格 数量
74
- 苹果 5 10
75
- 香蕉 3 20
76
- 结束"""
77
-
78
- # 构造符合replace_in_file格式的llm返回
79
- llm_response = """
80
- <replace_in_file>
81
- <path>content</path>
82
- <diff>
83
- <<<<<<< SEARCH
84
- 产品 价格 数量
85
- 苹果 5 10
86
- 香蕉 3 20
87
- =======
88
- | 产品 | 价格 | 数量 |
89
- | --- | --- | --- |
90
- | 苹果 | 5 | 10 |
91
- | 香蕉 | 3 | 20 |
92
- >>>>>>> REPLACE
93
- </diff>
94
- </replace_in_file>
95
- """
96
-
97
- # 模拟调用llm时返回llm_response
98
- class FakeLLM:
99
- def get_sub_client(self, name):
100
- return None
101
-
102
- def run(self, *args, **kwargs):
103
- return llm_response
104
-
105
- fake_llm = FakeLLM()
106
-
107
- # patch _format_table 方法,让它直接返回llm_response
108
- import byzerllm
109
-
110
- class DummyPrompt:
111
- def __call__(self, *args, **kwargs):
112
- # 使其可装饰函数
113
- def decorator(func):
114
- class FakePromptWrapper:
115
- def with_llm(self_inner, llm_obj):
116
- class Runner:
117
- def run(self_inner_inner, content):
118
- return llm_response
119
- return Runner()
120
- return FakePromptWrapper()
121
- return decorator
122
-
123
- orig_prompt = byzerllm.prompt
124
- byzerllm.prompt = DummyPrompt()
125
-
126
- try:
127
- formatted = ImageLoader.format_table_in_content(original, llm=fake_llm)
128
- assert "| 产品 | 价格 | 数量 |" in formatted
129
- assert "这里是介绍" in formatted
130
- assert "结束" in formatted
131
- finally:
132
- byzerllm.prompt = orig_prompt
133
-
134
- def test_paddleocr_extract_text_type_error_fix(monkeypatch):
135
- """
136
- 测试paddleocr_extract_text对异常结构的兼容性,模拟paddleocr.ocr返回非字符串结构
137
- """
138
- # 模拟PaddleOCR类
139
- class FakeOCR:
140
- def __init__(self, **kwargs):
141
- pass
142
-
143
- def ocr(self, file_path, **kwargs):
144
- # 模拟返回嵌套list,第二个元素是list而非str,之前会报错
145
- return [
146
- [
147
- # page 1
148
- [[ [0,0],[1,1] ], (["text_in_list"], 0.9)],
149
- [[ [0,0],[1,1] ], ("normal text", 0.95)],
150
- ]
151
- ]
152
- # patch PaddleOCR
153
- import autocoder.rag.loaders.image_loader as ilmod
154
- monkeypatch.setattr(ilmod, "PaddleOCR", FakeOCR)
155
-
156
- # 创建临时文件模拟图片
157
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpf:
158
- tmp_path = tmpf.name
159
-
160
- try:
161
- text = ImageLoader.paddleocr_extract_text(tmp_path)
162
- # 应该不会抛异常,且返回内容包含normal text和text_in_list
163
- assert "normal text" in text
164
- assert "text_in_list" in text
165
- finally:
166
- os.remove(tmp_path)
167
-
168
- def test_paddlex_table_extract_markdown_no_paddlex(monkeypatch):
169
- # paddlex_module为None时应返回""
170
- import autocoder.rag.loaders.image_loader as ilmod
171
- monkeypatch.setattr(ilmod, "paddlex_module", None)
172
- md = ImageLoader.paddlex_table_extract_markdown("dummy_path.png")
173
- assert md == ""
174
-
175
- def test_html_table_to_markdown_simple():
176
- html = """
177
- <table>
178
- <tr><th>头1</th><th>头2</th></tr>
179
- <tr><td>数据1</td><td>数据2</td></tr>
180
- <tr><td>数据3</td><td>数据4</td></tr>
181
- </table>
182
- """
183
- md = ImageLoader.html_table_to_markdown(html)
184
- assert "| 头1 | 头2 |" in md
185
- assert "| 数据1 | 数据2 |" in md
186
- assert "| 数据3 | 数据4 |" in md
187
-
188
- def test_extract_text_from_image_unknown_engine(dummy_llm):
189
- res = ImageLoader.extract_text_from_image("non_exist.png", dummy_llm, engine="xxx")
190
- assert res == ""
191
-
192
- def test_image_to_markdown_creates_file(tmp_path, dummy_llm, monkeypatch):
193
- # 准备一个假图片文件
194
- imgfile = tmp_path / "testimg.png"
195
- imgfile.write_bytes(b"fake image content")
196
-
197
- # monkeypatch extract_text_from_image返回固定内容
198
- monkeypatch.setattr(ImageLoader, "extract_text_from_image", staticmethod(lambda *args, **kwargs: "# hello world"))
199
-
200
- md_content = ImageLoader.image_to_markdown(str(imgfile), dummy_llm, engine="vl")
201
- assert "# hello world" in md_content
202
-
203
- md_file = imgfile.with_suffix(".md")
204
- assert md_file.exists()
205
- assert "# hello world" in md_file.read_text()
206
-
207
- if __name__ == "__main__":
208
- # 手动运行全部测试
209
- pytest.main([__file__])
autocoder/rag/raw_rag.py DELETED
@@ -1,96 +0,0 @@
1
- import os
2
- import byzerllm
3
- from byzerllm.apps.byzer_storage.simple_api import (
4
- ByzerStorage,
5
- DataType,
6
- FieldOption,
7
- SortOption,
8
- )
9
-
10
-
11
- def chunk_text(text, max_length=1000):
12
- chunks = []
13
- current_chunk = []
14
- current_length = 0
15
-
16
- for line in text.split("\n"):
17
- if current_length + len(line) > max_length and current_chunk:
18
- chunks.append("\n".join(current_chunk))
19
- current_chunk = []
20
- current_length = 0
21
- current_chunk.append(line)
22
- current_length += len(line)
23
-
24
- if current_chunk:
25
- chunks.append("\n".join(current_chunk))
26
-
27
- return chunks
28
-
29
-
30
- @byzerllm.prompt()
31
- def process_query(context: str, query: str) -> str:
32
- """
33
- Based on the following context, please answer the query:
34
-
35
- Context:
36
- {{ context }}
37
-
38
- Query: {{ query }}
39
-
40
- Please provide a concise and accurate answer based on the given context.
41
- """
42
-
43
-
44
- class RawRAG:
45
- def __init__(
46
- self, llm_model="v3_chat", emb_model="emb", storage_name="byzerai_store"
47
- ):
48
- self.storage = ByzerStorage(
49
- storage_name, "rag_database", "rag_table", emb_model=emb_model
50
- )
51
- self.llm = byzerllm.ByzerLLM()
52
- self.llm.setup_default_model_name(llm_model)
53
-
54
- # Create schema if not exists
55
- _ = (
56
- self.storage.schema_builder()
57
- .add_field("_id", DataType.STRING)
58
- .add_field("content", DataType.STRING, [FieldOption.ANALYZE])
59
- .add_field("raw_content", DataType.STRING, [FieldOption.NO_INDEX])
60
- .add_array_field("vector", DataType.FLOAT)
61
- .execute()
62
- )
63
-
64
- def build(self, directory):
65
- for filename in os.listdir(directory):
66
- if filename.endswith(".md"):
67
- with open(os.path.join(directory, filename), "r") as file:
68
- content = file.read()
69
- chunks = chunk_text(content)
70
-
71
- for i, chunk in enumerate(chunks):
72
- item = {
73
- "_id": f"{filename}_{i}",
74
- "content": chunk,
75
- "raw_content": chunk,
76
- "vector": chunk,
77
- }
78
- self.storage.write_builder().add_items(
79
- [item], vector_fields=["vector"], search_fields=["content"]
80
- ).execute()
81
-
82
- self.storage.commit()
83
-
84
- def query(self, query_text):
85
- query = self.storage.query_builder()
86
- query.set_vector_query(query_text, fields=["vector"])
87
- results = query.execute()
88
-
89
- if results:
90
- context = results[0]["raw_content"]
91
- response = process_query.with_llm(self.llm).run(
92
- context=context, query=query_text
93
- )
94
- return response
95
- else:
96
- return "Sorry, I couldn't find relevant information to answer your query."