auto-coder 0.1.207__py3-none-any.whl → 0.1.209__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/METADATA +4 -3
- {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/RECORD +37 -34
- autocoder/agent/auto_demand_organizer.py +212 -0
- autocoder/agent/auto_guess_query.py +284 -0
- autocoder/auto_coder.py +64 -19
- autocoder/auto_coder_rag.py +6 -0
- autocoder/chat_auto_coder.py +119 -16
- autocoder/command_args.py +21 -5
- autocoder/common/__init__.py +7 -1
- autocoder/common/code_auto_generate.py +32 -10
- autocoder/common/code_auto_generate_diff.py +85 -47
- autocoder/common/code_auto_generate_editblock.py +50 -28
- autocoder/common/code_auto_generate_strict_diff.py +79 -45
- autocoder/common/code_auto_merge.py +51 -15
- autocoder/common/code_auto_merge_diff.py +55 -2
- autocoder/common/code_auto_merge_editblock.py +84 -14
- autocoder/common/code_auto_merge_strict_diff.py +69 -32
- autocoder/common/code_modification_ranker.py +100 -0
- autocoder/common/command_completer.py +6 -4
- autocoder/common/types.py +10 -2
- autocoder/dispacher/actions/action.py +141 -94
- autocoder/dispacher/actions/plugins/action_regex_project.py +35 -25
- autocoder/lang.py +9 -1
- autocoder/pyproject/__init__.py +4 -0
- autocoder/rag/cache/simple_cache.py +8 -2
- autocoder/rag/loaders/docx_loader.py +3 -2
- autocoder/rag/loaders/pdf_loader.py +3 -1
- autocoder/rag/long_context_rag.py +12 -2
- autocoder/rag/rag_entry.py +2 -2
- autocoder/rag/utils.py +14 -9
- autocoder/suffixproject/__init__.py +2 -0
- autocoder/tsproject/__init__.py +4 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.207.dist-info → auto_coder-0.1.209.dist-info}/top_level.txt +0 -0
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
import os
|
|
3
3
|
from byzerllm.utils.client import code_utils
|
|
4
4
|
from autocoder.common import AutoCoderArgs,git_utils
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List,Union,Tuple
|
|
6
6
|
import pydantic
|
|
7
7
|
import byzerllm
|
|
8
8
|
from loguru import logger
|
|
9
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
10
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
9
11
|
import hashlib
|
|
10
12
|
|
|
11
13
|
class PathAndCode(pydantic.BaseModel):
|
|
@@ -58,7 +60,26 @@ class CodeAutoMerge:
|
|
|
58
60
|
elif start_marker_count > 0:
|
|
59
61
|
block.append(line)
|
|
60
62
|
|
|
61
|
-
return path_and_code_list
|
|
63
|
+
return path_and_code_list
|
|
64
|
+
|
|
65
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
66
|
+
result = self.choose_best_choice(generate_result)
|
|
67
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
71
|
+
if len(generate_result.contents) == 1:
|
|
72
|
+
return generate_result
|
|
73
|
+
|
|
74
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
75
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
76
|
+
# Filter out contents with failed blocks
|
|
77
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
78
|
+
merge_result = self._merge_code_without_effect(content)
|
|
79
|
+
if not merge_result.failed_blocks:
|
|
80
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
81
|
+
# If all have failed blocks, return the first one
|
|
82
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
62
83
|
|
|
63
84
|
|
|
64
85
|
def parse_text(self, text: str) -> List[PathAndCode]:
|
|
@@ -99,7 +120,34 @@ class CodeAutoMerge:
|
|
|
99
120
|
Error: {{ error }}
|
|
100
121
|
'''
|
|
101
122
|
|
|
102
|
-
def
|
|
123
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
124
|
+
"""Merge code without any side effects like git operations or file writing.
|
|
125
|
+
Returns a tuple of:
|
|
126
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
127
|
+
- list of (file_path, content) tuples for failed to merge blocks"""
|
|
128
|
+
codes = self.parse_whole_text_v2(content)
|
|
129
|
+
file_content_mapping = {}
|
|
130
|
+
failed_blocks = []
|
|
131
|
+
|
|
132
|
+
for block in codes:
|
|
133
|
+
file_path = block.path
|
|
134
|
+
if not os.path.exists(file_path):
|
|
135
|
+
file_content_mapping[file_path] = block.content
|
|
136
|
+
else:
|
|
137
|
+
if file_path not in file_content_mapping:
|
|
138
|
+
with open(file_path, "r") as f:
|
|
139
|
+
file_content_mapping[file_path] = f.read()
|
|
140
|
+
if file_content_mapping[file_path] != block.content:
|
|
141
|
+
file_content_mapping[file_path] = block.content
|
|
142
|
+
else:
|
|
143
|
+
failed_blocks.append((file_path, block.content))
|
|
144
|
+
|
|
145
|
+
return MergeCodeWithoutEffect(
|
|
146
|
+
success_blocks=[(path, content) for path, content in file_content_mapping.items()],
|
|
147
|
+
failed_blocks=failed_blocks
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def _merge_code(self, content: str,force_skip_git:bool=False):
|
|
103
151
|
total = 0
|
|
104
152
|
|
|
105
153
|
file_content = open(self.args.file).read()
|
|
@@ -114,18 +162,6 @@ class CodeAutoMerge:
|
|
|
114
162
|
logger.error(self.git_require_msg(source_dir=self.args.source_dir,error=str(e)))
|
|
115
163
|
return
|
|
116
164
|
|
|
117
|
-
# codes = code_utils.extract_code(content)
|
|
118
|
-
# for (lang,code) in codes:
|
|
119
|
-
# parsed_blocks = self.parse_text(code)
|
|
120
|
-
|
|
121
|
-
# for block in parsed_blocks:
|
|
122
|
-
# file_path = block.path
|
|
123
|
-
# os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
124
|
-
|
|
125
|
-
# with open(file_path, "w") as f:
|
|
126
|
-
# logger.info(f"Upsert path: {file_path}")
|
|
127
|
-
# total += 1
|
|
128
|
-
# f.write(block.content)
|
|
129
165
|
codes = self.parse_whole_text_v2(content)
|
|
130
166
|
for block in codes:
|
|
131
167
|
file_path = block.path
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import difflib
|
|
3
3
|
from autocoder.common import AutoCoderArgs,git_utils
|
|
4
|
-
from typing import List
|
|
4
|
+
from typing import List,Union,Tuple
|
|
5
5
|
import pydantic
|
|
6
6
|
import byzerllm
|
|
7
7
|
from loguru import logger
|
|
@@ -15,6 +15,8 @@ from autocoder.common.search_replace import (
|
|
|
15
15
|
flexible_search_and_replace,
|
|
16
16
|
search_and_replace,
|
|
17
17
|
)
|
|
18
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
19
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
18
20
|
|
|
19
21
|
class PathAndCode(pydantic.BaseModel):
|
|
20
22
|
path: str
|
|
@@ -374,6 +376,25 @@ class CodeAutoMergeDiff:
|
|
|
374
376
|
edits.append((path, hunk))
|
|
375
377
|
|
|
376
378
|
return edits
|
|
379
|
+
|
|
380
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
381
|
+
result = self.choose_best_choice(generate_result)
|
|
382
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
383
|
+
return result
|
|
384
|
+
|
|
385
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
386
|
+
if len(generate_result.contents) == 1:
|
|
387
|
+
return generate_result
|
|
388
|
+
|
|
389
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
390
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
391
|
+
# Filter out contents with failed blocks
|
|
392
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
393
|
+
merge_result = self._merge_code_without_effect(content)
|
|
394
|
+
if not merge_result.failed_blocks:
|
|
395
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
396
|
+
# If all have failed blocks, return the first one
|
|
397
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
377
398
|
|
|
378
399
|
@byzerllm.prompt(render="jinja2")
|
|
379
400
|
def git_require_msg(self,source_dir:str,error:str)->str:
|
|
@@ -450,7 +471,39 @@ class CodeAutoMergeDiff:
|
|
|
450
471
|
errors += other_hunks_applied
|
|
451
472
|
raise ValueError(errors)
|
|
452
473
|
|
|
453
|
-
def
|
|
474
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
475
|
+
"""Merge code without any side effects like git operations or file writing.
|
|
476
|
+
Returns a tuple of:
|
|
477
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
478
|
+
- list of (file_path, hunk) tuples for failed to merge blocks"""
|
|
479
|
+
edits = self.get_edits(content)
|
|
480
|
+
file_content_mapping = {}
|
|
481
|
+
failed_blocks = []
|
|
482
|
+
|
|
483
|
+
for path, hunk in edits:
|
|
484
|
+
full_path = self.abs_root_path(path)
|
|
485
|
+
if not os.path.exists(full_path):
|
|
486
|
+
_, after = hunk_to_before_after(hunk)
|
|
487
|
+
file_content_mapping[full_path] = after
|
|
488
|
+
continue
|
|
489
|
+
|
|
490
|
+
if full_path not in file_content_mapping:
|
|
491
|
+
with open(full_path, "r") as f:
|
|
492
|
+
file_content_mapping[full_path] = f.read()
|
|
493
|
+
|
|
494
|
+
content = file_content_mapping[full_path]
|
|
495
|
+
new_content = do_replace(full_path, content, hunk)
|
|
496
|
+
if new_content:
|
|
497
|
+
file_content_mapping[full_path] = new_content
|
|
498
|
+
else:
|
|
499
|
+
failed_blocks.append((full_path, "\n".join(hunk)))
|
|
500
|
+
|
|
501
|
+
return MergeCodeWithoutEffect(
|
|
502
|
+
success_blocks=[(path, content) for path, content in file_content_mapping.items()],
|
|
503
|
+
failed_blocks=failed_blocks
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
def _merge_code(self, content: str,force_skip_git:bool=False):
|
|
454
507
|
total = 0
|
|
455
508
|
|
|
456
509
|
file_content = open(self.args.file).read()
|
|
@@ -7,7 +7,6 @@ from autocoder.utils.queue_communicate import (
|
|
|
7
7
|
CommunicateEvent,
|
|
8
8
|
CommunicateEventType,
|
|
9
9
|
)
|
|
10
|
-
from typing import List
|
|
11
10
|
import pydantic
|
|
12
11
|
import byzerllm
|
|
13
12
|
from loguru import logger
|
|
@@ -18,6 +17,9 @@ from rich.console import Console
|
|
|
18
17
|
from rich.panel import Panel
|
|
19
18
|
from rich.syntax import Syntax
|
|
20
19
|
import json
|
|
20
|
+
from typing import Union, List, Tuple
|
|
21
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
22
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class PathAndCode(pydantic.BaseModel):
|
|
@@ -138,7 +140,7 @@ class CodeAutoMergeEditBlock:
|
|
|
138
140
|
elif end_marker(line, index) and start_marker_count == 1:
|
|
139
141
|
start_marker_count -= 1
|
|
140
142
|
if block:
|
|
141
|
-
if current_editblock_mode == "two_line_mode":
|
|
143
|
+
if current_editblock_mode == "two_line_mode":
|
|
142
144
|
path = block[0].split(":", 1)[1].strip()
|
|
143
145
|
content = "\n".join(block[1:])
|
|
144
146
|
else:
|
|
@@ -152,6 +154,25 @@ class CodeAutoMergeEditBlock:
|
|
|
152
154
|
|
|
153
155
|
return path_and_code_list
|
|
154
156
|
|
|
157
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
158
|
+
result = self.choose_best_choice(generate_result)
|
|
159
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
163
|
+
if len(generate_result.contents) == 1:
|
|
164
|
+
return generate_result
|
|
165
|
+
|
|
166
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
167
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
168
|
+
# Filter out contents with failed blocks
|
|
169
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
170
|
+
merge_result = self._merge_code_without_effect(content)
|
|
171
|
+
if not merge_result.failed_blocks:
|
|
172
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
173
|
+
# If all have failed blocks, return the first one
|
|
174
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
175
|
+
|
|
155
176
|
@byzerllm.prompt()
|
|
156
177
|
def git_require_msg(self, source_dir: str, error: str) -> str:
|
|
157
178
|
"""
|
|
@@ -197,9 +218,57 @@ class CodeAutoMergeEditBlock:
|
|
|
197
218
|
if in_updated:
|
|
198
219
|
updates.append(line)
|
|
199
220
|
result.append((edit.path, "\n".join(heads), "\n".join(updates)))
|
|
200
|
-
return result
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
224
|
+
"""Merge code without any side effects like git operations, linting or file writing.
|
|
225
|
+
Returns a tuple of:
|
|
226
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
227
|
+
- list of (file_path, head, update) tuples for failed to merge blocks"""
|
|
228
|
+
codes = self.get_edits(content)
|
|
229
|
+
file_content_mapping = {}
|
|
230
|
+
failed_blocks = []
|
|
231
|
+
|
|
232
|
+
for block in codes:
|
|
233
|
+
file_path, head, update = block
|
|
234
|
+
if not os.path.exists(file_path):
|
|
235
|
+
file_content_mapping[file_path] = update
|
|
236
|
+
else:
|
|
237
|
+
if file_path not in file_content_mapping:
|
|
238
|
+
with open(file_path, "r") as f:
|
|
239
|
+
temp = f.read()
|
|
240
|
+
file_content_mapping[file_path] = temp
|
|
241
|
+
existing_content = file_content_mapping[file_path]
|
|
242
|
+
|
|
243
|
+
# First try exact match
|
|
244
|
+
new_content = (
|
|
245
|
+
existing_content.replace(head, update, 1)
|
|
246
|
+
if head
|
|
247
|
+
else existing_content + "\n" + update
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# If exact match fails, try similarity match
|
|
251
|
+
if new_content == existing_content and head:
|
|
252
|
+
similarity, best_window = TextSimilarity(
|
|
253
|
+
head, existing_content
|
|
254
|
+
).get_best_matching_window()
|
|
255
|
+
if similarity > self.args.editblock_similarity:
|
|
256
|
+
new_content = existing_content.replace(
|
|
257
|
+
best_window, update, 1
|
|
258
|
+
)
|
|
201
259
|
|
|
202
|
-
|
|
260
|
+
if new_content != existing_content:
|
|
261
|
+
file_content_mapping[file_path] = new_content
|
|
262
|
+
else:
|
|
263
|
+
failed_blocks.append((file_path, head, update))
|
|
264
|
+
|
|
265
|
+
return MergeCodeWithoutEffect(
|
|
266
|
+
success_blocks=[(path, content)
|
|
267
|
+
for path, content in file_content_mapping.items()],
|
|
268
|
+
failed_blocks=failed_blocks
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _merge_code(self, content: str, force_skip_git: bool = False):
|
|
203
272
|
file_content = open(self.args.file).read()
|
|
204
273
|
md5 = hashlib.md5(file_content.encode("utf-8")).hexdigest()
|
|
205
274
|
file_name = os.path.basename(self.args.file)
|
|
@@ -207,15 +276,15 @@ class CodeAutoMergeEditBlock:
|
|
|
207
276
|
codes = self.get_edits(content)
|
|
208
277
|
changes_to_make = []
|
|
209
278
|
changes_made = False
|
|
210
|
-
unmerged_blocks = []
|
|
211
|
-
merged_blocks = []
|
|
279
|
+
unmerged_blocks = []
|
|
280
|
+
merged_blocks = []
|
|
212
281
|
|
|
213
282
|
# First, check if there are any changes to be made
|
|
214
283
|
file_content_mapping = {}
|
|
215
284
|
for block in codes:
|
|
216
285
|
file_path, head, update = block
|
|
217
286
|
if not os.path.exists(file_path):
|
|
218
|
-
changes_to_make.append((file_path, None, update))
|
|
287
|
+
changes_to_make.append((file_path, None, update))
|
|
219
288
|
file_content_mapping[file_path] = update
|
|
220
289
|
merged_blocks.append((file_path, "", update, 1))
|
|
221
290
|
changes_made = True
|
|
@@ -235,7 +304,7 @@ class CodeAutoMergeEditBlock:
|
|
|
235
304
|
(file_path, existing_content, new_content))
|
|
236
305
|
file_content_mapping[file_path] = new_content
|
|
237
306
|
merged_blocks.append((file_path, head, update, 1))
|
|
238
|
-
changes_made = True
|
|
307
|
+
changes_made = True
|
|
239
308
|
else:
|
|
240
309
|
# If the SEARCH BLOCK is not found exactly, then try to use
|
|
241
310
|
# the similarity ratio to find the best matching block
|
|
@@ -250,8 +319,9 @@ class CodeAutoMergeEditBlock:
|
|
|
250
319
|
(file_path, existing_content, new_content)
|
|
251
320
|
)
|
|
252
321
|
file_content_mapping[file_path] = new_content
|
|
253
|
-
merged_blocks.append(
|
|
254
|
-
|
|
322
|
+
merged_blocks.append(
|
|
323
|
+
(file_path, head, update, similarity))
|
|
324
|
+
changes_made = True
|
|
255
325
|
else:
|
|
256
326
|
unmerged_blocks.append(
|
|
257
327
|
(file_path, head, update, similarity))
|
|
@@ -317,10 +387,10 @@ class CodeAutoMergeEditBlock:
|
|
|
317
387
|
file_path, head, update, similarity = code
|
|
318
388
|
event_data.append(
|
|
319
389
|
{
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
390
|
+
"file_path": file_path,
|
|
391
|
+
"head": head,
|
|
392
|
+
"update": update,
|
|
393
|
+
"similarity": similarity,
|
|
324
394
|
}
|
|
325
395
|
)
|
|
326
396
|
|
|
@@ -2,12 +2,14 @@ import os
|
|
|
2
2
|
import difflib
|
|
3
3
|
import diff_match_patch as dmp_module
|
|
4
4
|
from autocoder.common import AutoCoderArgs, git_utils
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List,Tuple
|
|
6
6
|
import pydantic
|
|
7
7
|
import byzerllm
|
|
8
8
|
from loguru import logger
|
|
9
9
|
import hashlib
|
|
10
10
|
from pathlib import Path
|
|
11
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
12
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
11
13
|
|
|
12
14
|
class PathAndCode(pydantic.BaseModel):
|
|
13
15
|
path: str
|
|
@@ -125,7 +127,26 @@ class CodeAutoMergeStrictDiff:
|
|
|
125
127
|
elif start_marker_count > 0:
|
|
126
128
|
block.append(line)
|
|
127
129
|
|
|
128
|
-
return path_and_code_list
|
|
130
|
+
return path_and_code_list
|
|
131
|
+
|
|
132
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
133
|
+
result = self.choose_best_choice(generate_result)
|
|
134
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
135
|
+
return result
|
|
136
|
+
|
|
137
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
138
|
+
if len(generate_result.contents) == 1:
|
|
139
|
+
return generate_result
|
|
140
|
+
|
|
141
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
142
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
143
|
+
# Filter out contents with failed blocks
|
|
144
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
145
|
+
merge_result = self._merge_code_without_effect(content)
|
|
146
|
+
if not merge_result.failed_blocks:
|
|
147
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
148
|
+
# If all have failed blocks, return the first one
|
|
149
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
129
150
|
|
|
130
151
|
|
|
131
152
|
def abs_root_path(self, path):
|
|
@@ -134,7 +155,52 @@ class CodeAutoMergeStrictDiff:
|
|
|
134
155
|
res = Path(self.args.source_dir) / path
|
|
135
156
|
return safe_abs_path(res)
|
|
136
157
|
|
|
137
|
-
def
|
|
158
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
159
|
+
"""Merge code without any side effects like git operations or file writing.
|
|
160
|
+
Returns a tuple of:
|
|
161
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
162
|
+
- list of (file_path, content) tuples for failed to merge blocks"""
|
|
163
|
+
diff_blocks = self.parse_diff_block(content)
|
|
164
|
+
file_content_mapping = {}
|
|
165
|
+
failed_blocks = []
|
|
166
|
+
|
|
167
|
+
for block in diff_blocks:
|
|
168
|
+
path = block.path
|
|
169
|
+
content = block.content
|
|
170
|
+
full_path = self.abs_root_path(path)
|
|
171
|
+
|
|
172
|
+
if not os.path.exists(full_path):
|
|
173
|
+
file_content_mapping[full_path] = content
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
if full_path not in file_content_mapping:
|
|
177
|
+
with open(full_path, "r") as f:
|
|
178
|
+
file_content_mapping[full_path] = f.read()
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
import patch
|
|
182
|
+
patch_obj = patch.fromstring(content.encode('utf-8'))
|
|
183
|
+
root_path = None
|
|
184
|
+
if not path.startswith(self.args.source_dir):
|
|
185
|
+
root_path = self.args.source_dir
|
|
186
|
+
|
|
187
|
+
# Create a copy of the content to apply patch
|
|
188
|
+
temp_content = file_content_mapping[full_path]
|
|
189
|
+
success = patch_obj.apply(root=root_path, content=temp_content)
|
|
190
|
+
if success:
|
|
191
|
+
file_content_mapping[full_path] = temp_content
|
|
192
|
+
else:
|
|
193
|
+
failed_blocks.append((full_path, content))
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.warning(f"Failed to apply patch to {full_path}: {str(e)}")
|
|
196
|
+
failed_blocks.append((full_path, content))
|
|
197
|
+
|
|
198
|
+
return MergeCodeWithoutEffect(
|
|
199
|
+
success_blocks=[(path, content) for path, content in file_content_mapping.items()],
|
|
200
|
+
failed_blocks=failed_blocks
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def _merge_code(self, content: str, force_skip_git: bool = False):
|
|
138
204
|
total = 0
|
|
139
205
|
|
|
140
206
|
file_content = open(self.args.file).read()
|
|
@@ -154,35 +220,6 @@ class CodeAutoMergeStrictDiff:
|
|
|
154
220
|
path = diff_blocks.path
|
|
155
221
|
content = diff_blocks.content
|
|
156
222
|
|
|
157
|
-
# unidiff_patch = unidiff.PatchSet(content)
|
|
158
|
-
# dmp_patches = []
|
|
159
|
-
# for patched_file in unidiff_patch:
|
|
160
|
-
# diffs = []
|
|
161
|
-
# start_line = 0
|
|
162
|
-
# for hunk in patched_file:
|
|
163
|
-
# start_line = hunk.target_start - 1 # 获取hunk的起始位置
|
|
164
|
-
# for line in hunk:
|
|
165
|
-
# if line.is_added:
|
|
166
|
-
# diffs.append(dmp_module.diff('', line.value.strip(), start_line))
|
|
167
|
-
# start_line += 1
|
|
168
|
-
# elif line.is_removed:
|
|
169
|
-
# diffs.append(dmp_module.diff(line.value.strip(), '', start_line))
|
|
170
|
-
# else:
|
|
171
|
-
# start_line += 1
|
|
172
|
-
# patch_text = dmp.patch_make(diffs)
|
|
173
|
-
# dmp_patches.extend(patch_text)
|
|
174
|
-
|
|
175
|
-
# with open(path, 'r') as f:
|
|
176
|
-
# original_content = f.read()
|
|
177
|
-
|
|
178
|
-
# dmp = dmp_module.diff_match_patch()
|
|
179
|
-
# new_text, results = dmp.patch_apply(dmp_patches, original_content)
|
|
180
|
-
# if any(results) is False:
|
|
181
|
-
# raise Exception("Error applying diff to file: " + path)
|
|
182
|
-
# with open(self.abs_root_path(path), 'w') as f:
|
|
183
|
-
# f.write(new_text)
|
|
184
|
-
# total += 1
|
|
185
|
-
|
|
186
223
|
import patch
|
|
187
224
|
patch_obj = patch.fromstring(content.encode('utf-8'))
|
|
188
225
|
root_path = None
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import byzerllm
|
|
2
|
+
from typing import List,Union
|
|
3
|
+
from autocoder.common import AutoCoderArgs
|
|
4
|
+
from autocoder.common.types import CodeGenerateResult
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from loguru import logger
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
8
|
+
import traceback
|
|
9
|
+
|
|
10
|
+
class RankResult(BaseModel):
|
|
11
|
+
rank_result:List[int]
|
|
12
|
+
|
|
13
|
+
class CodeModificationRanker:
|
|
14
|
+
def __init__(self, llm: byzerllm.ByzerLLM, args: AutoCoderArgs):
|
|
15
|
+
self.llm = llm
|
|
16
|
+
self.args = args
|
|
17
|
+
if self.llm.get_sub_client("generate_rerank_model"):
|
|
18
|
+
self.rerank_llm = self.llm.get_sub_client("generate_rerank_model")
|
|
19
|
+
else:
|
|
20
|
+
self.rerank_llm = self.llm
|
|
21
|
+
|
|
22
|
+
@byzerllm.prompt()
|
|
23
|
+
def _rank_modifications(self, s:CodeGenerateResult) -> str:
|
|
24
|
+
'''
|
|
25
|
+
对一组代码修改进行质量评估并排序。
|
|
26
|
+
|
|
27
|
+
下面是修改需求:
|
|
28
|
+
|
|
29
|
+
<edit_requirement>
|
|
30
|
+
{{ s.conversations[0][-2]["content"] }}
|
|
31
|
+
</edit_requirement>
|
|
32
|
+
|
|
33
|
+
下面是相应的代码修改:
|
|
34
|
+
{% for content in s.contents %}
|
|
35
|
+
<edit_block id="{{ loop.index0 }}">
|
|
36
|
+
{{content}}
|
|
37
|
+
</edit_block>
|
|
38
|
+
{% endfor %}
|
|
39
|
+
|
|
40
|
+
请输出如下格式的评估结果,只包含 JSON 数据:
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"rank_result": [id1, id2, id3] // id 为 edit_block 的 id,按质量从高到低排序
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
注意,只输出前面要求的 Json 格式就好,不要输出其他内容,Json 需要使用 ```json ```包裹。
|
|
49
|
+
'''
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def rank_modifications(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
53
|
+
import time
|
|
54
|
+
start_time = time.time()
|
|
55
|
+
|
|
56
|
+
# 如果只有一个候选,直接返回
|
|
57
|
+
if len(generate_result.contents) == 1:
|
|
58
|
+
logger.info("Only 1 candidate, skip ranking")
|
|
59
|
+
return generate_result
|
|
60
|
+
|
|
61
|
+
logger.info(f"Start ranking {len(generate_result.contents)} candidates")
|
|
62
|
+
generate_times = self.args.generate_times_same_model
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
# Create a thread pool with generate_times workers
|
|
66
|
+
with ThreadPoolExecutor(max_workers=generate_times) as executor:
|
|
67
|
+
# Submit tasks
|
|
68
|
+
futures = [
|
|
69
|
+
executor.submit(
|
|
70
|
+
self._rank_modifications.with_llm(self.rerank_llm).with_return_type(RankResult).run,
|
|
71
|
+
generate_result
|
|
72
|
+
) for _ in range(generate_times)
|
|
73
|
+
]
|
|
74
|
+
|
|
75
|
+
# Process results as they complete
|
|
76
|
+
for future in as_completed(futures):
|
|
77
|
+
try:
|
|
78
|
+
v = future.result()
|
|
79
|
+
# If we get a valid result, use it and cancel other tasks
|
|
80
|
+
for f in futures:
|
|
81
|
+
f.cancel()
|
|
82
|
+
|
|
83
|
+
elapsed = time.time() - start_time
|
|
84
|
+
logger.info(f"Ranking completed in {elapsed:.2f}s, best candidate index: {v.rank_result[0]}")
|
|
85
|
+
|
|
86
|
+
rerank_contents = [generate_result.contents[i] for i in v.rank_result]
|
|
87
|
+
rerank_conversations = [generate_result.conversations[i] for i in v.rank_result]
|
|
88
|
+
return CodeGenerateResult(contents=rerank_contents,conversations=rerank_conversations)
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.warning(f"Ranking request failed: {str(e)}")
|
|
91
|
+
logger.debug(traceback.format_exc())
|
|
92
|
+
continue
|
|
93
|
+
except Exception as e:
|
|
94
|
+
logger.error(f"Ranking process failed: {str(e)}")
|
|
95
|
+
logger.debug(traceback.format_exc())
|
|
96
|
+
|
|
97
|
+
# If all requests failed, use the original codes
|
|
98
|
+
elapsed = time.time() - start_time
|
|
99
|
+
logger.warning(f"All ranking requests failed in {elapsed:.2f}s, using original order")
|
|
100
|
+
return generate_result
|
|
@@ -12,7 +12,7 @@ COMMANDS = {
|
|
|
12
12
|
"/svg": {},
|
|
13
13
|
"/sd": {},
|
|
14
14
|
},
|
|
15
|
-
"/coding": {},
|
|
15
|
+
"/coding": {"/apply": {}, "/next": {}},
|
|
16
16
|
"/chat": {"/new": {}, "/review": {}, "/no_context": {}},
|
|
17
17
|
"/lib": {
|
|
18
18
|
"/add": "",
|
|
@@ -147,7 +147,8 @@ class CommandTextParser:
|
|
|
147
147
|
current_word += v
|
|
148
148
|
self.is_extracted = True
|
|
149
149
|
self.current_word_end_pos = self.pos + 1
|
|
150
|
-
self.current_word_start_pos = self.current_word_end_pos -
|
|
150
|
+
self.current_word_start_pos = self.current_word_end_pos - \
|
|
151
|
+
len(current_word)
|
|
151
152
|
|
|
152
153
|
def previous(self) -> str:
|
|
153
154
|
if self.pos > 1:
|
|
@@ -227,10 +228,11 @@ class CommandTextParser:
|
|
|
227
228
|
self.is_extracted = True
|
|
228
229
|
|
|
229
230
|
self.current_word_end_pos = self.pos + 1
|
|
230
|
-
self.current_word_start_pos = self.current_word_end_pos -
|
|
231
|
+
self.current_word_start_pos = self.current_word_end_pos - \
|
|
232
|
+
len(current_word)
|
|
231
233
|
|
|
232
234
|
def current_word(self) -> str:
|
|
233
|
-
return self.text[self.current_word_start_pos
|
|
235
|
+
return self.text[self.current_word_start_pos: self.current_word_end_pos]
|
|
234
236
|
|
|
235
237
|
def get_current_word(self) -> str:
|
|
236
238
|
return self.current_word()
|
autocoder/common/types.py
CHANGED
|
@@ -1,10 +1,18 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
import pydantic
|
|
3
|
-
|
|
3
|
+
from typing import List, Dict, Tuple,Any
|
|
4
4
|
class Mode(Enum):
|
|
5
5
|
MULTI_ROUND = "multi_round"
|
|
6
6
|
SINGLE_ROUND = "single_round"
|
|
7
7
|
|
|
8
8
|
class StepNum(pydantic.BaseModel):
|
|
9
9
|
step_num:int= pydantic.Field(1,description="总共步骤数")
|
|
10
|
-
content:int= pydantic.Field(1,description="详细的执行步骤,每个步骤需要包含一个shell/python 代码块")
|
|
10
|
+
content:int= pydantic.Field(1,description="详细的执行步骤,每个步骤需要包含一个shell/python 代码块")
|
|
11
|
+
|
|
12
|
+
class CodeGenerateResult(pydantic.BaseModel):
|
|
13
|
+
contents:List[str]
|
|
14
|
+
conversations:List[List[Dict[str, Any]]]
|
|
15
|
+
|
|
16
|
+
class MergeCodeWithoutEffect(pydantic.BaseModel):
|
|
17
|
+
success_blocks: List[Tuple[str, str]]
|
|
18
|
+
failed_blocks: List[Any]
|