auto-coder 0.1.375__py3-none-any.whl → 0.1.376__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/METADATA +1 -1
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/RECORD +17 -51
- autocoder/agent/base_agentic/base_agent.py +9 -8
- autocoder/auto_coder_rag.py +12 -0
- autocoder/models.py +2 -2
- autocoder/rag/cache/local_duckdb_storage_cache.py +63 -33
- autocoder/rag/conversation_to_queries.py +37 -5
- autocoder/rag/long_context_rag.py +161 -41
- autocoder/rag/tools/recall_tool.py +2 -1
- autocoder/rag/tools/search_tool.py +2 -1
- autocoder/rag/types.py +36 -0
- autocoder/utils/_markitdown.py +59 -13
- autocoder/version.py +1 -1
- autocoder/agent/agentic_edit.py +0 -833
- autocoder/agent/agentic_edit_tools/__init__.py +0 -28
- autocoder/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +0 -32
- autocoder/agent/agentic_edit_tools/attempt_completion_tool_resolver.py +0 -29
- autocoder/agent/agentic_edit_tools/base_tool_resolver.py +0 -29
- autocoder/agent/agentic_edit_tools/execute_command_tool_resolver.py +0 -84
- autocoder/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py +0 -75
- autocoder/agent/agentic_edit_tools/list_files_tool_resolver.py +0 -62
- autocoder/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py +0 -30
- autocoder/agent/agentic_edit_tools/read_file_tool_resolver.py +0 -36
- autocoder/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +0 -95
- autocoder/agent/agentic_edit_tools/search_files_tool_resolver.py +0 -70
- autocoder/agent/agentic_edit_tools/use_mcp_tool_resolver.py +0 -55
- autocoder/agent/agentic_edit_tools/write_to_file_tool_resolver.py +0 -98
- autocoder/agent/agentic_edit_types.py +0 -124
- autocoder/auto_coder_lang.py +0 -60
- autocoder/auto_coder_rag_client_mcp.py +0 -170
- autocoder/auto_coder_rag_mcp.py +0 -193
- autocoder/common/llm_rerank.py +0 -84
- autocoder/common/model_speed_test.py +0 -392
- autocoder/common/v2/agent/agentic_edit_conversation.py +0 -188
- autocoder/common/v2/agent/ignore_utils.py +0 -50
- autocoder/dispacher/actions/plugins/action_translate.py +0 -214
- autocoder/ignorefiles/__init__.py +0 -4
- autocoder/ignorefiles/ignore_file_utils.py +0 -63
- autocoder/ignorefiles/test_ignore_file_utils.py +0 -91
- autocoder/linters/code_linter.py +0 -588
- autocoder/rag/loaders/test_image_loader.py +0 -209
- autocoder/rag/raw_rag.py +0 -96
- autocoder/rag/simple_directory_reader.py +0 -646
- autocoder/rag/simple_rag.py +0 -404
- autocoder/regex_project/__init__.py +0 -162
- autocoder/utils/coder.py +0 -125
- autocoder/utils/tests.py +0 -37
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.376.dist-info}/top_level.txt +0 -0
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import re
|
|
3
|
-
import tempfile
|
|
4
|
-
|
|
5
|
-
import pytest
|
|
6
|
-
|
|
7
|
-
from autocoder.rag.loaders.image_loader import ImageLoader, ReplaceInFileTool
|
|
8
|
-
from autocoder.utils.llms import get_single_llm
|
|
9
|
-
|
|
10
|
-
# 模拟一个简单的llm对象(避免测试中真实调用LLM)
|
|
11
|
-
class DummyLLM:
|
|
12
|
-
def get_sub_client(self, name):
|
|
13
|
-
return None
|
|
14
|
-
|
|
15
|
-
def run(self, *args, **kwargs):
|
|
16
|
-
return "dummy response"
|
|
17
|
-
|
|
18
|
-
@pytest.fixture(scope="module")
|
|
19
|
-
def dummy_llm():
|
|
20
|
-
# 这里可以替换为真实llm,或Mock
|
|
21
|
-
return DummyLLM()
|
|
22
|
-
|
|
23
|
-
def test_parse_diff_basic():
|
|
24
|
-
diff = """
|
|
25
|
-
<<<<<<< SEARCH
|
|
26
|
-
foo
|
|
27
|
-
bar
|
|
28
|
-
=======
|
|
29
|
-
hello
|
|
30
|
-
world
|
|
31
|
-
>>>>>>> REPLACE
|
|
32
|
-
"""
|
|
33
|
-
blocks = ImageLoader.parse_diff(diff)
|
|
34
|
-
assert len(blocks) == 1
|
|
35
|
-
search, replace = blocks[0]
|
|
36
|
-
assert "foo" in search
|
|
37
|
-
assert "hello" in replace
|
|
38
|
-
|
|
39
|
-
def test_extract_replace_in_file_tools():
|
|
40
|
-
text = """
|
|
41
|
-
<replace_in_file>
|
|
42
|
-
<path>file1.py</path>
|
|
43
|
-
<diff>
|
|
44
|
-
<<<<<<< SEARCH
|
|
45
|
-
old content
|
|
46
|
-
=======
|
|
47
|
-
new content
|
|
48
|
-
>>>>>>> REPLACE
|
|
49
|
-
</diff>
|
|
50
|
-
</replace_in_file>
|
|
51
|
-
|
|
52
|
-
<replace_in_file>
|
|
53
|
-
<path>file2.py</path>
|
|
54
|
-
<diff>
|
|
55
|
-
<<<<<<< SEARCH
|
|
56
|
-
x=1
|
|
57
|
-
=======
|
|
58
|
-
x=2
|
|
59
|
-
>>>>>>> REPLACE
|
|
60
|
-
</diff>
|
|
61
|
-
</replace_in_file>
|
|
62
|
-
"""
|
|
63
|
-
tools = ImageLoader.extract_replace_in_file_tools(text)
|
|
64
|
-
assert len(tools) == 2
|
|
65
|
-
assert tools[0].path == "file1.py"
|
|
66
|
-
assert "old content" in tools[0].diff
|
|
67
|
-
assert tools[1].path == "file2.py"
|
|
68
|
-
assert "x=1" in tools[1].diff
|
|
69
|
-
|
|
70
|
-
def test_format_table_in_content_apply_diff(dummy_llm):
|
|
71
|
-
# 模拟一个OCR文本和对应diff
|
|
72
|
-
original = """这里是介绍
|
|
73
|
-
产品 价格 数量
|
|
74
|
-
苹果 5 10
|
|
75
|
-
香蕉 3 20
|
|
76
|
-
结束"""
|
|
77
|
-
|
|
78
|
-
# 构造符合replace_in_file格式的llm返回
|
|
79
|
-
llm_response = """
|
|
80
|
-
<replace_in_file>
|
|
81
|
-
<path>content</path>
|
|
82
|
-
<diff>
|
|
83
|
-
<<<<<<< SEARCH
|
|
84
|
-
产品 价格 数量
|
|
85
|
-
苹果 5 10
|
|
86
|
-
香蕉 3 20
|
|
87
|
-
=======
|
|
88
|
-
| 产品 | 价格 | 数量 |
|
|
89
|
-
| --- | --- | --- |
|
|
90
|
-
| 苹果 | 5 | 10 |
|
|
91
|
-
| 香蕉 | 3 | 20 |
|
|
92
|
-
>>>>>>> REPLACE
|
|
93
|
-
</diff>
|
|
94
|
-
</replace_in_file>
|
|
95
|
-
"""
|
|
96
|
-
|
|
97
|
-
# 模拟调用llm时返回llm_response
|
|
98
|
-
class FakeLLM:
|
|
99
|
-
def get_sub_client(self, name):
|
|
100
|
-
return None
|
|
101
|
-
|
|
102
|
-
def run(self, *args, **kwargs):
|
|
103
|
-
return llm_response
|
|
104
|
-
|
|
105
|
-
fake_llm = FakeLLM()
|
|
106
|
-
|
|
107
|
-
# patch _format_table 方法,让它直接返回llm_response
|
|
108
|
-
import byzerllm
|
|
109
|
-
|
|
110
|
-
class DummyPrompt:
|
|
111
|
-
def __call__(self, *args, **kwargs):
|
|
112
|
-
# 使其可装饰函数
|
|
113
|
-
def decorator(func):
|
|
114
|
-
class FakePromptWrapper:
|
|
115
|
-
def with_llm(self_inner, llm_obj):
|
|
116
|
-
class Runner:
|
|
117
|
-
def run(self_inner_inner, content):
|
|
118
|
-
return llm_response
|
|
119
|
-
return Runner()
|
|
120
|
-
return FakePromptWrapper()
|
|
121
|
-
return decorator
|
|
122
|
-
|
|
123
|
-
orig_prompt = byzerllm.prompt
|
|
124
|
-
byzerllm.prompt = DummyPrompt()
|
|
125
|
-
|
|
126
|
-
try:
|
|
127
|
-
formatted = ImageLoader.format_table_in_content(original, llm=fake_llm)
|
|
128
|
-
assert "| 产品 | 价格 | 数量 |" in formatted
|
|
129
|
-
assert "这里是介绍" in formatted
|
|
130
|
-
assert "结束" in formatted
|
|
131
|
-
finally:
|
|
132
|
-
byzerllm.prompt = orig_prompt
|
|
133
|
-
|
|
134
|
-
def test_paddleocr_extract_text_type_error_fix(monkeypatch):
|
|
135
|
-
"""
|
|
136
|
-
测试paddleocr_extract_text对异常结构的兼容性,模拟paddleocr.ocr返回非字符串结构
|
|
137
|
-
"""
|
|
138
|
-
# 模拟PaddleOCR类
|
|
139
|
-
class FakeOCR:
|
|
140
|
-
def __init__(self, **kwargs):
|
|
141
|
-
pass
|
|
142
|
-
|
|
143
|
-
def ocr(self, file_path, **kwargs):
|
|
144
|
-
# 模拟返回嵌套list,第二个元素是list而非str,之前会报错
|
|
145
|
-
return [
|
|
146
|
-
[
|
|
147
|
-
# page 1
|
|
148
|
-
[[ [0,0],[1,1] ], (["text_in_list"], 0.9)],
|
|
149
|
-
[[ [0,0],[1,1] ], ("normal text", 0.95)],
|
|
150
|
-
]
|
|
151
|
-
]
|
|
152
|
-
# patch PaddleOCR
|
|
153
|
-
import autocoder.rag.loaders.image_loader as ilmod
|
|
154
|
-
monkeypatch.setattr(ilmod, "PaddleOCR", FakeOCR)
|
|
155
|
-
|
|
156
|
-
# 创建临时文件模拟图片
|
|
157
|
-
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmpf:
|
|
158
|
-
tmp_path = tmpf.name
|
|
159
|
-
|
|
160
|
-
try:
|
|
161
|
-
text = ImageLoader.paddleocr_extract_text(tmp_path)
|
|
162
|
-
# 应该不会抛异常,且返回内容包含normal text和text_in_list
|
|
163
|
-
assert "normal text" in text
|
|
164
|
-
assert "text_in_list" in text
|
|
165
|
-
finally:
|
|
166
|
-
os.remove(tmp_path)
|
|
167
|
-
|
|
168
|
-
def test_paddlex_table_extract_markdown_no_paddlex(monkeypatch):
|
|
169
|
-
# paddlex_module为None时应返回""
|
|
170
|
-
import autocoder.rag.loaders.image_loader as ilmod
|
|
171
|
-
monkeypatch.setattr(ilmod, "paddlex_module", None)
|
|
172
|
-
md = ImageLoader.paddlex_table_extract_markdown("dummy_path.png")
|
|
173
|
-
assert md == ""
|
|
174
|
-
|
|
175
|
-
def test_html_table_to_markdown_simple():
|
|
176
|
-
html = """
|
|
177
|
-
<table>
|
|
178
|
-
<tr><th>头1</th><th>头2</th></tr>
|
|
179
|
-
<tr><td>数据1</td><td>数据2</td></tr>
|
|
180
|
-
<tr><td>数据3</td><td>数据4</td></tr>
|
|
181
|
-
</table>
|
|
182
|
-
"""
|
|
183
|
-
md = ImageLoader.html_table_to_markdown(html)
|
|
184
|
-
assert "| 头1 | 头2 |" in md
|
|
185
|
-
assert "| 数据1 | 数据2 |" in md
|
|
186
|
-
assert "| 数据3 | 数据4 |" in md
|
|
187
|
-
|
|
188
|
-
def test_extract_text_from_image_unknown_engine(dummy_llm):
|
|
189
|
-
res = ImageLoader.extract_text_from_image("non_exist.png", dummy_llm, engine="xxx")
|
|
190
|
-
assert res == ""
|
|
191
|
-
|
|
192
|
-
def test_image_to_markdown_creates_file(tmp_path, dummy_llm, monkeypatch):
|
|
193
|
-
# 准备一个假图片文件
|
|
194
|
-
imgfile = tmp_path / "testimg.png"
|
|
195
|
-
imgfile.write_bytes(b"fake image content")
|
|
196
|
-
|
|
197
|
-
# monkeypatch extract_text_from_image返回固定内容
|
|
198
|
-
monkeypatch.setattr(ImageLoader, "extract_text_from_image", staticmethod(lambda *args, **kwargs: "# hello world"))
|
|
199
|
-
|
|
200
|
-
md_content = ImageLoader.image_to_markdown(str(imgfile), dummy_llm, engine="vl")
|
|
201
|
-
assert "# hello world" in md_content
|
|
202
|
-
|
|
203
|
-
md_file = imgfile.with_suffix(".md")
|
|
204
|
-
assert md_file.exists()
|
|
205
|
-
assert "# hello world" in md_file.read_text()
|
|
206
|
-
|
|
207
|
-
if __name__ == "__main__":
|
|
208
|
-
# 手动运行全部测试
|
|
209
|
-
pytest.main([__file__])
|
autocoder/rag/raw_rag.py
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import byzerllm
|
|
3
|
-
from byzerllm.apps.byzer_storage.simple_api import (
|
|
4
|
-
ByzerStorage,
|
|
5
|
-
DataType,
|
|
6
|
-
FieldOption,
|
|
7
|
-
SortOption,
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def chunk_text(text, max_length=1000):
|
|
12
|
-
chunks = []
|
|
13
|
-
current_chunk = []
|
|
14
|
-
current_length = 0
|
|
15
|
-
|
|
16
|
-
for line in text.split("\n"):
|
|
17
|
-
if current_length + len(line) > max_length and current_chunk:
|
|
18
|
-
chunks.append("\n".join(current_chunk))
|
|
19
|
-
current_chunk = []
|
|
20
|
-
current_length = 0
|
|
21
|
-
current_chunk.append(line)
|
|
22
|
-
current_length += len(line)
|
|
23
|
-
|
|
24
|
-
if current_chunk:
|
|
25
|
-
chunks.append("\n".join(current_chunk))
|
|
26
|
-
|
|
27
|
-
return chunks
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
@byzerllm.prompt()
|
|
31
|
-
def process_query(context: str, query: str) -> str:
|
|
32
|
-
"""
|
|
33
|
-
Based on the following context, please answer the query:
|
|
34
|
-
|
|
35
|
-
Context:
|
|
36
|
-
{{ context }}
|
|
37
|
-
|
|
38
|
-
Query: {{ query }}
|
|
39
|
-
|
|
40
|
-
Please provide a concise and accurate answer based on the given context.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
class RawRAG:
|
|
45
|
-
def __init__(
|
|
46
|
-
self, llm_model="v3_chat", emb_model="emb", storage_name="byzerai_store"
|
|
47
|
-
):
|
|
48
|
-
self.storage = ByzerStorage(
|
|
49
|
-
storage_name, "rag_database", "rag_table", emb_model=emb_model
|
|
50
|
-
)
|
|
51
|
-
self.llm = byzerllm.ByzerLLM()
|
|
52
|
-
self.llm.setup_default_model_name(llm_model)
|
|
53
|
-
|
|
54
|
-
# Create schema if not exists
|
|
55
|
-
_ = (
|
|
56
|
-
self.storage.schema_builder()
|
|
57
|
-
.add_field("_id", DataType.STRING)
|
|
58
|
-
.add_field("content", DataType.STRING, [FieldOption.ANALYZE])
|
|
59
|
-
.add_field("raw_content", DataType.STRING, [FieldOption.NO_INDEX])
|
|
60
|
-
.add_array_field("vector", DataType.FLOAT)
|
|
61
|
-
.execute()
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
def build(self, directory):
|
|
65
|
-
for filename in os.listdir(directory):
|
|
66
|
-
if filename.endswith(".md"):
|
|
67
|
-
with open(os.path.join(directory, filename), "r") as file:
|
|
68
|
-
content = file.read()
|
|
69
|
-
chunks = chunk_text(content)
|
|
70
|
-
|
|
71
|
-
for i, chunk in enumerate(chunks):
|
|
72
|
-
item = {
|
|
73
|
-
"_id": f"{filename}_{i}",
|
|
74
|
-
"content": chunk,
|
|
75
|
-
"raw_content": chunk,
|
|
76
|
-
"vector": chunk,
|
|
77
|
-
}
|
|
78
|
-
self.storage.write_builder().add_items(
|
|
79
|
-
[item], vector_fields=["vector"], search_fields=["content"]
|
|
80
|
-
).execute()
|
|
81
|
-
|
|
82
|
-
self.storage.commit()
|
|
83
|
-
|
|
84
|
-
def query(self, query_text):
|
|
85
|
-
query = self.storage.query_builder()
|
|
86
|
-
query.set_vector_query(query_text, fields=["vector"])
|
|
87
|
-
results = query.execute()
|
|
88
|
-
|
|
89
|
-
if results:
|
|
90
|
-
context = results[0]["raw_content"]
|
|
91
|
-
response = process_query.with_llm(self.llm).run(
|
|
92
|
-
context=context, query=query_text
|
|
93
|
-
)
|
|
94
|
-
return response
|
|
95
|
-
else:
|
|
96
|
-
return "Sorry, I couldn't find relevant information to answer your query."
|