auto-coder 0.1.208__py3-none-any.whl → 0.1.211__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.208.dist-info → auto_coder-0.1.211.dist-info}/METADATA +3 -2
- {auto_coder-0.1.208.dist-info → auto_coder-0.1.211.dist-info}/RECORD +23 -19
- autocoder/chat_auto_coder.py +153 -150
- autocoder/common/buildin_tokenizer.py +37 -0
- autocoder/common/code_auto_generate.py +1 -3
- autocoder/common/code_auto_generate_diff.py +1 -3
- autocoder/common/code_auto_generate_editblock.py +1 -3
- autocoder/common/code_auto_generate_strict_diff.py +1 -3
- autocoder/common/code_modification_ranker.py +35 -17
- autocoder/common/mcp_hub.py +326 -0
- autocoder/common/mcp_server.py +83 -0
- autocoder/common/mcp_tools.py +682 -0
- autocoder/dispacher/actions/action.py +40 -21
- autocoder/rag/cache/simple_cache.py +8 -2
- autocoder/rag/loaders/docx_loader.py +3 -2
- autocoder/rag/loaders/pdf_loader.py +3 -1
- autocoder/rag/long_context_rag.py +10 -2
- autocoder/rag/utils.py +14 -9
- autocoder/version.py +1 -1
- {auto_coder-0.1.208.dist-info → auto_coder-0.1.211.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.208.dist-info → auto_coder-0.1.211.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.208.dist-info → auto_coder-0.1.211.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.208.dist-info → auto_coder-0.1.211.dist-info}/top_level.txt +0 -0
|
@@ -5,6 +5,7 @@ from autocoder.common import (
|
|
|
5
5
|
split_code_into_segments,
|
|
6
6
|
SourceCode,
|
|
7
7
|
)
|
|
8
|
+
from autocoder.common.buildin_tokenizer import BuildinTokenizer
|
|
8
9
|
from autocoder.pyproject import PyProject, Level1PyProject
|
|
9
10
|
from autocoder.tsproject import TSProject
|
|
10
11
|
from autocoder.suffixproject import SuffixProject
|
|
@@ -23,9 +24,19 @@ import os
|
|
|
23
24
|
from autocoder.common.image_to_page import ImageToPage, ImageToPageDirectly
|
|
24
25
|
from autocoder.utils.conversation_store import store_code_model_conversation
|
|
25
26
|
from loguru import logger
|
|
27
|
+
import time
|
|
26
28
|
|
|
27
29
|
|
|
28
|
-
class
|
|
30
|
+
class BaseAction:
|
|
31
|
+
def _get_content_length(self, content: str) -> int:
|
|
32
|
+
try:
|
|
33
|
+
tokenizer = BuildinTokenizer()
|
|
34
|
+
return tokenizer.count_tokens(content)
|
|
35
|
+
except Exception as e:
|
|
36
|
+
logger.warning(f"Failed to use tokenizer to count tokens, fallback to len(): {e}")
|
|
37
|
+
return len(content)
|
|
38
|
+
|
|
39
|
+
class ActionTSProject(BaseAction):
|
|
29
40
|
def __init__(
|
|
30
41
|
self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
|
|
31
42
|
) -> None:
|
|
@@ -74,14 +85,15 @@ class ActionTSProject:
|
|
|
74
85
|
args = self.args
|
|
75
86
|
|
|
76
87
|
if args.execute and self.llm and not args.human_as_model:
|
|
77
|
-
|
|
88
|
+
content_length = self._get_content_length(content)
|
|
89
|
+
if content_length > self.args.model_max_input_length:
|
|
78
90
|
logger.warning(
|
|
79
|
-
f"Content
|
|
80
|
-
)
|
|
81
|
-
content = content[: self.args.model_max_input_length]
|
|
91
|
+
f"Content(send to model) is {content_length} tokens, which is larger than the maximum input length {self.args.model_max_input_length}"
|
|
92
|
+
)
|
|
82
93
|
|
|
83
|
-
if args.execute:
|
|
94
|
+
if args.execute:
|
|
84
95
|
logger.info("Auto generate the code...")
|
|
96
|
+
start_time = time.time()
|
|
85
97
|
if args.auto_merge == "diff":
|
|
86
98
|
generate = CodeAutoGenerateDiff(
|
|
87
99
|
llm=self.llm, args=self.args, action=self
|
|
@@ -104,6 +116,7 @@ class ActionTSProject:
|
|
|
104
116
|
generate_result = generate.single_round_run(
|
|
105
117
|
query=args.query, source_content=content
|
|
106
118
|
)
|
|
119
|
+
logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
|
|
107
120
|
merge_result = None
|
|
108
121
|
if args.execute and args.auto_merge:
|
|
109
122
|
logger.info("Auto merge the code...")
|
|
@@ -141,7 +154,7 @@ class ActionTSProject:
|
|
|
141
154
|
file.write(content)
|
|
142
155
|
|
|
143
156
|
|
|
144
|
-
class ActionPyScriptProject:
|
|
157
|
+
class ActionPyScriptProject(BaseAction):
|
|
145
158
|
def __init__(
|
|
146
159
|
self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
|
|
147
160
|
) -> None:
|
|
@@ -162,6 +175,8 @@ class ActionPyScriptProject:
|
|
|
162
175
|
def process_content(self, content: str):
|
|
163
176
|
args = self.args
|
|
164
177
|
if args.execute:
|
|
178
|
+
logger.info("Auto generate the code...")
|
|
179
|
+
start_time = time.time()
|
|
165
180
|
if args.auto_merge == "diff":
|
|
166
181
|
generate = CodeAutoGenerateDiff(
|
|
167
182
|
llm=self.llm, args=self.args, action=self
|
|
@@ -184,6 +199,8 @@ class ActionPyScriptProject:
|
|
|
184
199
|
generate_result = generate.single_round_run(
|
|
185
200
|
query=args.query, source_content=content
|
|
186
201
|
)
|
|
202
|
+
|
|
203
|
+
logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
|
|
187
204
|
merge_result = None
|
|
188
205
|
if args.execute and args.auto_merge:
|
|
189
206
|
logger.info("Auto merge the code...")
|
|
@@ -218,11 +235,13 @@ class ActionPyScriptProject:
|
|
|
218
235
|
model=self.llm.default_model_name,
|
|
219
236
|
)
|
|
220
237
|
|
|
238
|
+
end_time = time.time()
|
|
239
|
+
logger.info(f"Code generation completed in {end_time - start_time:.2f} seconds")
|
|
221
240
|
with open(self.args.target_file, "w") as file:
|
|
222
241
|
file.write(content)
|
|
223
242
|
|
|
224
243
|
|
|
225
|
-
class ActionPyProject:
|
|
244
|
+
class ActionPyProject(BaseAction):
|
|
226
245
|
def __init__(
|
|
227
246
|
self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
|
|
228
247
|
) -> None:
|
|
@@ -250,15 +269,15 @@ class ActionPyProject:
|
|
|
250
269
|
args = self.args
|
|
251
270
|
|
|
252
271
|
if args.execute and self.llm and not args.human_as_model:
|
|
253
|
-
|
|
272
|
+
content_length = self._get_content_length(content)
|
|
273
|
+
if content_length > self.args.model_max_input_length:
|
|
254
274
|
logger.warning(
|
|
255
|
-
f'''Content
|
|
275
|
+
f'''Content(send to model) is {content_length} tokens (you may collect too much files), which is larger than the maximum input length {self.args.model_max_input_length}'''
|
|
256
276
|
)
|
|
257
|
-
content = content[: self.args.model_max_input_length]
|
|
258
277
|
|
|
259
278
|
if args.execute:
|
|
260
279
|
logger.info("Auto generate the code...")
|
|
261
|
-
|
|
280
|
+
start_time = time.time()
|
|
262
281
|
if args.auto_merge == "diff":
|
|
263
282
|
generate = CodeAutoGenerateDiff(
|
|
264
283
|
llm=self.llm, args=self.args, action=self
|
|
@@ -283,7 +302,7 @@ class ActionPyProject:
|
|
|
283
302
|
generate_result = generate.single_round_run(
|
|
284
303
|
query=args.query, source_content=content
|
|
285
304
|
)
|
|
286
|
-
|
|
305
|
+
logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
|
|
287
306
|
merge_result = None
|
|
288
307
|
if args.execute and args.auto_merge:
|
|
289
308
|
logger.info("Auto merge the code...")
|
|
@@ -316,13 +335,12 @@ class ActionPyProject:
|
|
|
316
335
|
instruction=self.args.query,
|
|
317
336
|
conversations=generate_result.conversations[0],
|
|
318
337
|
model=self.llm.default_model_name,
|
|
319
|
-
)
|
|
320
|
-
|
|
338
|
+
)
|
|
321
339
|
with open(args.target_file, "w") as file:
|
|
322
340
|
file.write(content)
|
|
323
341
|
|
|
324
342
|
|
|
325
|
-
class ActionSuffixProject:
|
|
343
|
+
class ActionSuffixProject(BaseAction):
|
|
326
344
|
def __init__(
|
|
327
345
|
self, args: AutoCoderArgs, llm: Optional[byzerllm.ByzerLLM] = None
|
|
328
346
|
) -> None:
|
|
@@ -346,14 +364,15 @@ class ActionSuffixProject:
|
|
|
346
364
|
args = self.args
|
|
347
365
|
|
|
348
366
|
if args.execute and self.llm and not args.human_as_model:
|
|
349
|
-
|
|
367
|
+
content_length = self._get_content_length(content)
|
|
368
|
+
if content_length > self.args.model_max_input_length:
|
|
350
369
|
logger.warning(
|
|
351
|
-
f"Content
|
|
352
|
-
)
|
|
353
|
-
content = content[: self.args.model_max_input_length]
|
|
370
|
+
f"Content(send to model) is {content_length} tokens, which is larger than the maximum input length {self.args.model_max_input_length}"
|
|
371
|
+
)
|
|
354
372
|
|
|
355
373
|
if args.execute:
|
|
356
374
|
logger.info("Auto generate the code...")
|
|
375
|
+
start_time = time.time()
|
|
357
376
|
if args.auto_merge == "diff":
|
|
358
377
|
generate = CodeAutoGenerateDiff(
|
|
359
378
|
llm=self.llm, args=self.args, action=self
|
|
@@ -377,7 +396,7 @@ class ActionSuffixProject:
|
|
|
377
396
|
query=args.query, source_content=content
|
|
378
397
|
)
|
|
379
398
|
|
|
380
|
-
|
|
399
|
+
logger.info(f"Code generation completed in {time.time() - start_time:.2f} seconds")
|
|
381
400
|
merge_result = None
|
|
382
401
|
if args.execute and args.auto_merge:
|
|
383
402
|
logger.info("Auto merge the code...")
|
|
@@ -98,7 +98,10 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
98
98
|
process_file_in_multi_process, files_to_process)
|
|
99
99
|
|
|
100
100
|
for file_info, result in zip(files_to_process, results):
|
|
101
|
-
|
|
101
|
+
if result: # 只有当result不为空时才更新缓存
|
|
102
|
+
self.update_cache(file_info, result)
|
|
103
|
+
else:
|
|
104
|
+
logger.warning(f"Empty result for file: {file_info[0]}, skipping cache update")
|
|
102
105
|
|
|
103
106
|
self.write_cache()
|
|
104
107
|
|
|
@@ -138,7 +141,10 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
|
|
|
138
141
|
logger.info(f"{file_info[0]} is detected to be updated")
|
|
139
142
|
try:
|
|
140
143
|
result = process_file_local(file_info[0])
|
|
141
|
-
|
|
144
|
+
if result: # 只有当result不为空时才更新缓存
|
|
145
|
+
self.update_cache(file_info, result)
|
|
146
|
+
else:
|
|
147
|
+
logger.warning(f"Empty result for file: {file_info[0]}, skipping cache update")
|
|
142
148
|
except Exception as e:
|
|
143
149
|
logger.error(
|
|
144
150
|
f"SimpleCache Error in process_queue: {e}")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
import docx2txt
|
|
3
3
|
from autocoder.utils._markitdown import MarkItDown
|
|
4
|
-
|
|
4
|
+
import traceback
|
|
5
5
|
|
|
6
6
|
def extract_text_from_docx_old(docx_path):
|
|
7
7
|
with open(docx_path, "rb") as f:
|
|
@@ -16,5 +16,6 @@ def extract_text_from_docx(docx_path):
|
|
|
16
16
|
md_converter = MarkItDown()
|
|
17
17
|
result = md_converter.convert(docx_path)
|
|
18
18
|
return result.text_content
|
|
19
|
-
except Exception as e:
|
|
19
|
+
except (BaseException, Exception) as e:
|
|
20
|
+
traceback.print_exc()
|
|
20
21
|
return extract_text_from_docx_old(docx_path)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from io import BytesIO
|
|
2
2
|
from pypdf import PdfReader
|
|
3
3
|
from autocoder.utils._markitdown import MarkItDown
|
|
4
|
+
import traceback
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
def extract_text_from_pdf_old(file_path):
|
|
@@ -18,5 +19,6 @@ def extract_text_from_pdf(file_path):
|
|
|
18
19
|
md_converter = MarkItDown()
|
|
19
20
|
result = md_converter.convert(file_path)
|
|
20
21
|
return result.text_content
|
|
21
|
-
except Exception as e:
|
|
22
|
+
except (BaseException, Exception) as e:
|
|
23
|
+
traceback.print_exc()
|
|
22
24
|
return extract_text_from_pdf_old(file_path)
|
|
@@ -182,19 +182,25 @@ class LongContextRAG:
|
|
|
182
182
|
使用以下文档和对话历史来提取相关信息。
|
|
183
183
|
|
|
184
184
|
文档:
|
|
185
|
+
<documents>
|
|
185
186
|
{% for doc in documents %}
|
|
186
187
|
{{ doc }}
|
|
187
188
|
{% endfor %}
|
|
189
|
+
</documents>
|
|
188
190
|
|
|
189
191
|
对话历史:
|
|
192
|
+
<conversations>
|
|
190
193
|
{% for msg in conversations %}
|
|
191
|
-
|
|
194
|
+
[{{ msg.role }}]:
|
|
195
|
+
{{ msg.content }}
|
|
196
|
+
|
|
192
197
|
{% endfor %}
|
|
198
|
+
</conversations>
|
|
193
199
|
|
|
194
200
|
请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
|
|
195
201
|
如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
|
|
196
202
|
提取的信息尽量保持和原文中的一样,并且只输出这些信息。
|
|
197
|
-
"""
|
|
203
|
+
"""
|
|
198
204
|
|
|
199
205
|
@byzerllm.prompt()
|
|
200
206
|
def _answer_question(
|
|
@@ -202,9 +208,11 @@ class LongContextRAG:
|
|
|
202
208
|
) -> Generator[str, None, None]:
|
|
203
209
|
"""
|
|
204
210
|
文档:
|
|
211
|
+
<documents>
|
|
205
212
|
{% for doc in relevant_docs %}
|
|
206
213
|
{{ doc }}
|
|
207
214
|
{% endfor %}
|
|
215
|
+
</documents>
|
|
208
216
|
|
|
209
217
|
使用以上文档来回答用户的问题。回答要求:
|
|
210
218
|
|
autocoder/rag/utils.py
CHANGED
|
@@ -9,13 +9,14 @@ import time
|
|
|
9
9
|
from loguru import logger
|
|
10
10
|
import traceback
|
|
11
11
|
|
|
12
|
+
|
|
12
13
|
def process_file_in_multi_process(
|
|
13
14
|
file_info: Tuple[str, str, float]
|
|
14
15
|
) -> List[SourceCode]:
|
|
15
16
|
start_time = time.time()
|
|
16
17
|
file_path, relative_path, _, _ = file_info
|
|
17
18
|
try:
|
|
18
|
-
if file_path.endswith(".pdf"):
|
|
19
|
+
if file_path.endswith(".pdf"):
|
|
19
20
|
content = extract_text_from_pdf(file_path)
|
|
20
21
|
v = [
|
|
21
22
|
SourceCode(
|
|
@@ -24,7 +25,7 @@ def process_file_in_multi_process(
|
|
|
24
25
|
tokens=count_tokens_worker(content),
|
|
25
26
|
)
|
|
26
27
|
]
|
|
27
|
-
elif file_path.endswith(".docx"):
|
|
28
|
+
elif file_path.endswith(".docx"):
|
|
28
29
|
content = extract_text_from_docx(file_path)
|
|
29
30
|
v = [
|
|
30
31
|
SourceCode(
|
|
@@ -45,7 +46,8 @@ def process_file_in_multi_process(
|
|
|
45
46
|
]
|
|
46
47
|
elif file_path.endswith(".pptx"):
|
|
47
48
|
slides = extract_text_from_ppt(file_path)
|
|
48
|
-
content = "".join(
|
|
49
|
+
content = "".join(
|
|
50
|
+
f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
|
|
49
51
|
v = [
|
|
50
52
|
SourceCode(
|
|
51
53
|
module_name=f"##File: {file_path}",
|
|
@@ -65,15 +67,16 @@ def process_file_in_multi_process(
|
|
|
65
67
|
]
|
|
66
68
|
logger.info(f"Load file {file_path} in {time.time() - start_time}")
|
|
67
69
|
return v
|
|
68
|
-
except Exception as e:
|
|
70
|
+
except (BaseException, Exception) as e:
|
|
69
71
|
logger.error(f"Error processing file {file_path}: {str(e)}")
|
|
72
|
+
logger.error(f"Error type: {type(e).__name__}")
|
|
70
73
|
return []
|
|
71
74
|
|
|
72
75
|
|
|
73
76
|
def process_file_local(file_path: str) -> List[SourceCode]:
|
|
74
77
|
start_time = time.time()
|
|
75
78
|
try:
|
|
76
|
-
if file_path.endswith(".pdf"):
|
|
79
|
+
if file_path.endswith(".pdf"):
|
|
77
80
|
content = extract_text_from_pdf(file_path)
|
|
78
81
|
v = [
|
|
79
82
|
SourceCode(
|
|
@@ -82,7 +85,7 @@ def process_file_local(file_path: str) -> List[SourceCode]:
|
|
|
82
85
|
tokens=count_tokens(content),
|
|
83
86
|
)
|
|
84
87
|
]
|
|
85
|
-
elif file_path.endswith(".docx"):
|
|
88
|
+
elif file_path.endswith(".docx"):
|
|
86
89
|
content = extract_text_from_docx(file_path)
|
|
87
90
|
v = [
|
|
88
91
|
SourceCode(
|
|
@@ -103,7 +106,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
|
|
|
103
106
|
]
|
|
104
107
|
elif file_path.endswith(".pptx"):
|
|
105
108
|
slides = extract_text_from_ppt(file_path)
|
|
106
|
-
content = "".join(
|
|
109
|
+
content = "".join(
|
|
110
|
+
f"#{slide[0]}\n{slide[1]}\n\n" for slide in slides)
|
|
107
111
|
v = [
|
|
108
112
|
SourceCode(
|
|
109
113
|
module_name=f"##File: {file_path}",
|
|
@@ -123,7 +127,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
|
|
|
123
127
|
]
|
|
124
128
|
logger.info(f"Load file {file_path} in {time.time() - start_time}")
|
|
125
129
|
return v
|
|
126
|
-
except Exception as e:
|
|
130
|
+
except (BaseException, Exception) as e:
|
|
127
131
|
logger.error(f"Error processing file {file_path}: {str(e)}")
|
|
132
|
+
logger.error(f"Error type: {type(e).__name__}")
|
|
128
133
|
traceback.print_exc()
|
|
129
|
-
return []
|
|
134
|
+
return []
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.211"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|