auto-coder 0.1.206__py3-none-any.whl → 0.1.208__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.206.dist-info → auto_coder-0.1.208.dist-info}/METADATA +2 -2
- {auto_coder-0.1.206.dist-info → auto_coder-0.1.208.dist-info}/RECORD +34 -31
- autocoder/agent/auto_demand_organizer.py +212 -0
- autocoder/agent/auto_guess_query.py +284 -0
- autocoder/auto_coder.py +64 -19
- autocoder/auto_coder_rag.py +11 -2
- autocoder/benchmark.py +50 -47
- autocoder/chat_auto_coder.py +125 -17
- autocoder/command_args.py +21 -5
- autocoder/common/__init__.py +7 -1
- autocoder/common/code_auto_generate.py +32 -10
- autocoder/common/code_auto_generate_diff.py +85 -47
- autocoder/common/code_auto_generate_editblock.py +50 -28
- autocoder/common/code_auto_generate_strict_diff.py +79 -45
- autocoder/common/code_auto_merge.py +51 -15
- autocoder/common/code_auto_merge_diff.py +55 -2
- autocoder/common/code_auto_merge_editblock.py +84 -14
- autocoder/common/code_auto_merge_strict_diff.py +69 -32
- autocoder/common/code_modification_ranker.py +100 -0
- autocoder/common/command_completer.py +6 -4
- autocoder/common/types.py +10 -2
- autocoder/dispacher/actions/action.py +141 -94
- autocoder/dispacher/actions/plugins/action_regex_project.py +35 -25
- autocoder/lang.py +9 -1
- autocoder/pyproject/__init__.py +4 -0
- autocoder/rag/long_context_rag.py +2 -0
- autocoder/rag/rag_entry.py +2 -2
- autocoder/suffixproject/__init__.py +2 -0
- autocoder/tsproject/__init__.py +4 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.206.dist-info → auto_coder-0.1.208.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.206.dist-info → auto_coder-0.1.208.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.206.dist-info → auto_coder-0.1.208.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.206.dist-info → auto_coder-0.1.208.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
from typing import List, Dict, Tuple
|
|
2
|
-
from autocoder.common.types import Mode
|
|
2
|
+
from autocoder.common.types import Mode, CodeGenerateResult
|
|
3
3
|
from autocoder.common import AutoCoderArgs
|
|
4
4
|
import byzerllm
|
|
5
5
|
from autocoder.utils.queue_communicate import queue_communicate, CommunicateEvent, CommunicateEventType
|
|
6
6
|
from autocoder.common import sys_prompt
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
8
|
+
import json
|
|
7
9
|
|
|
8
10
|
class CodeAutoGenerateStrictDiff:
|
|
9
11
|
def __init__(
|
|
@@ -12,12 +14,15 @@ class CodeAutoGenerateStrictDiff:
|
|
|
12
14
|
self.llm = llm
|
|
13
15
|
self.args = args
|
|
14
16
|
self.action = action
|
|
17
|
+
self.llms = []
|
|
18
|
+
self.generate_times_same_model = args.generate_times_same_model
|
|
15
19
|
if not self.llm:
|
|
16
20
|
raise ValueError(
|
|
17
21
|
"Please provide a valid model instance to use for code generation."
|
|
18
|
-
)
|
|
19
|
-
|
|
20
|
-
|
|
22
|
+
)
|
|
23
|
+
self.llms = self.llm.get_sub_client("code_model") or [self.llm]
|
|
24
|
+
if not isinstance(self.llms, list):
|
|
25
|
+
self.llms = [self.llms]
|
|
21
26
|
|
|
22
27
|
@byzerllm.prompt(llm=lambda self: self.llm)
|
|
23
28
|
def multi_round_instruction(
|
|
@@ -258,74 +263,106 @@ class CodeAutoGenerateStrictDiff:
|
|
|
258
263
|
|
|
259
264
|
def single_round_run(
|
|
260
265
|
self, query: str, source_content: str
|
|
261
|
-
) ->
|
|
266
|
+
) -> CodeGenerateResult:
|
|
262
267
|
llm_config = {"human_as_model": self.args.human_as_model}
|
|
263
268
|
|
|
264
|
-
if self.args.
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
269
|
+
if self.args.template == "common":
|
|
270
|
+
init_prompt = self.single_round_instruction.prompt(
|
|
271
|
+
instruction=query, content=source_content, context=self.args.context
|
|
272
|
+
)
|
|
273
|
+
elif self.args.template == "auto_implement":
|
|
274
|
+
init_prompt = self.auto_implement_function.prompt(
|
|
275
|
+
instruction=query, content=source_content
|
|
271
276
|
)
|
|
272
|
-
|
|
273
|
-
init_prompt = self.single_round_instruction.prompt(
|
|
274
|
-
instruction=query, content=source_content, context=self.args.context
|
|
275
|
-
)
|
|
276
277
|
|
|
277
278
|
with open(self.args.target_file, "w") as file:
|
|
278
279
|
file.write(init_prompt)
|
|
279
280
|
|
|
280
281
|
conversations = []
|
|
282
|
+
|
|
281
283
|
if self.args.system_prompt and self.args.system_prompt.strip() == "claude":
|
|
282
|
-
conversations.append(
|
|
284
|
+
conversations.append(
|
|
285
|
+
{"role": "system", "content": sys_prompt.claude_sys_prompt.prompt()})
|
|
283
286
|
elif self.args.system_prompt:
|
|
284
|
-
conversations.append(
|
|
285
|
-
|
|
287
|
+
conversations.append(
|
|
288
|
+
{"role": "system", "content": self.args.system_prompt})
|
|
289
|
+
|
|
286
290
|
conversations.append({"role": "user", "content": init_prompt})
|
|
287
291
|
|
|
288
|
-
|
|
289
|
-
|
|
292
|
+
if self.args.request_id and not self.args.skip_events:
|
|
293
|
+
_ = queue_communicate.send_event(
|
|
294
|
+
request_id=self.args.request_id,
|
|
295
|
+
event=CommunicateEvent(
|
|
296
|
+
event_type=CommunicateEventType.CODE_GENERATE_START.value,
|
|
297
|
+
data=json.dumps({}, ensure_ascii=False),
|
|
298
|
+
),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
conversations_list = []
|
|
302
|
+
results = []
|
|
303
|
+
if not self.args.human_as_model:
|
|
304
|
+
with ThreadPoolExecutor(max_workers=len(self.llms) * self.generate_times_same_model) as executor:
|
|
305
|
+
futures = []
|
|
306
|
+
for llm in self.llms:
|
|
307
|
+
for _ in range(self.generate_times_same_model):
|
|
308
|
+
futures.append(executor.submit(
|
|
309
|
+
llm.chat_oai, conversations=conversations, llm_config=llm_config))
|
|
310
|
+
results = [future.result()[0].output for future in futures]
|
|
311
|
+
for result in results:
|
|
312
|
+
conversations_list.append(
|
|
313
|
+
conversations + [{"role": "assistant", "content": result}])
|
|
314
|
+
else:
|
|
315
|
+
results = []
|
|
316
|
+
conversations_list = []
|
|
317
|
+
for _ in range(self.args.human_model_num):
|
|
318
|
+
v = self.llms[0].chat_oai(
|
|
319
|
+
conversations=conversations, llm_config=llm_config)
|
|
320
|
+
results.append(v[0].output)
|
|
321
|
+
conversations_list.append(conversations + [{"role": "assistant", "content": v[0].output}])
|
|
290
322
|
|
|
291
323
|
if self.args.request_id and not self.args.skip_events:
|
|
292
|
-
queue_communicate.
|
|
324
|
+
_ = queue_communicate.send_event(
|
|
293
325
|
request_id=self.args.request_id,
|
|
294
326
|
event=CommunicateEvent(
|
|
295
327
|
event_type=CommunicateEventType.CODE_GENERATE_END.value,
|
|
296
|
-
data=
|
|
328
|
+
data=json.dumps({}, ensure_ascii=False),
|
|
297
329
|
),
|
|
298
330
|
)
|
|
299
331
|
|
|
300
|
-
return
|
|
332
|
+
return CodeGenerateResult(contents=results, conversations=conversations_list)
|
|
301
333
|
|
|
302
334
|
def multi_round_run(
|
|
303
335
|
self, query: str, source_content: str, max_steps: int = 10
|
|
304
|
-
) ->
|
|
336
|
+
) -> CodeGenerateResult:
|
|
305
337
|
llm_config = {"human_as_model": self.args.human_as_model}
|
|
306
338
|
result = []
|
|
307
339
|
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
340
|
+
if self.args.template == "common":
|
|
341
|
+
init_prompt = self.multi_round_instruction.prompt(
|
|
342
|
+
instruction=query, content=source_content, context=self.args.context
|
|
343
|
+
)
|
|
344
|
+
elif self.args.template == "auto_implement":
|
|
345
|
+
init_prompt = self.auto_implement_function.prompt(
|
|
346
|
+
instruction=query, content=source_content
|
|
347
|
+
)
|
|
311
348
|
|
|
312
|
-
conversations = [
|
|
349
|
+
conversations = []
|
|
350
|
+
# conversations.append({"role": "system", "content": sys_prompt.prompt()})
|
|
351
|
+
conversations.append({"role": "user", "content": init_prompt})
|
|
313
352
|
|
|
314
353
|
with open(self.args.target_file, "w") as file:
|
|
315
354
|
file.write(init_prompt)
|
|
316
|
-
|
|
317
|
-
|
|
355
|
+
|
|
356
|
+
code_llm = self.llms[0]
|
|
357
|
+
t = code_llm.chat_oai(conversations=conversations,
|
|
358
|
+
llm_config=llm_config)
|
|
318
359
|
|
|
319
360
|
result.append(t[0].output)
|
|
320
361
|
|
|
321
362
|
conversations.append({"role": "assistant", "content": t[0].output})
|
|
322
363
|
|
|
323
|
-
if
|
|
324
|
-
"
|
|
325
|
-
or "/done" in t[0].output
|
|
326
|
-
or "__EOF__" in t[0].output
|
|
327
|
-
):
|
|
328
|
-
return result, conversations
|
|
364
|
+
if "__完成__" in t[0].output or "/done" in t[0].output or "__EOF__" in t[0].output:
|
|
365
|
+
return CodeGenerateResult(contents=["\n\n".join(result)], conversations=[conversations])
|
|
329
366
|
|
|
330
367
|
current_step = 0
|
|
331
368
|
|
|
@@ -336,17 +373,14 @@ class CodeAutoGenerateStrictDiff:
|
|
|
336
373
|
with open(self.args.target_file, "w") as file:
|
|
337
374
|
file.write("继续")
|
|
338
375
|
|
|
339
|
-
t =
|
|
376
|
+
t = code_llm.chat_oai(
|
|
377
|
+
conversations=conversations, llm_config=llm_config)
|
|
340
378
|
|
|
341
379
|
result.append(t[0].output)
|
|
342
380
|
conversations.append({"role": "assistant", "content": t[0].output})
|
|
343
381
|
current_step += 1
|
|
344
382
|
|
|
345
|
-
if
|
|
346
|
-
"
|
|
347
|
-
or "/done" in t[0].output
|
|
348
|
-
or "__EOF__" in t[0].output
|
|
349
|
-
):
|
|
350
|
-
return result, conversations
|
|
383
|
+
if "__完成__" in t[0].output or "/done" in t[0].output or "__EOF__" in t[0].output:
|
|
384
|
+
return CodeGenerateResult(contents=["\n\n".join(result)], conversations=[conversations])
|
|
351
385
|
|
|
352
|
-
return result, conversations
|
|
386
|
+
return CodeGenerateResult(contents=["\n\n".join(result)], conversations=[conversations])
|
|
@@ -2,10 +2,12 @@
|
|
|
2
2
|
import os
|
|
3
3
|
from byzerllm.utils.client import code_utils
|
|
4
4
|
from autocoder.common import AutoCoderArgs,git_utils
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List,Union,Tuple
|
|
6
6
|
import pydantic
|
|
7
7
|
import byzerllm
|
|
8
8
|
from loguru import logger
|
|
9
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
10
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
9
11
|
import hashlib
|
|
10
12
|
|
|
11
13
|
class PathAndCode(pydantic.BaseModel):
|
|
@@ -58,7 +60,26 @@ class CodeAutoMerge:
|
|
|
58
60
|
elif start_marker_count > 0:
|
|
59
61
|
block.append(line)
|
|
60
62
|
|
|
61
|
-
return path_and_code_list
|
|
63
|
+
return path_and_code_list
|
|
64
|
+
|
|
65
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
66
|
+
result = self.choose_best_choice(generate_result)
|
|
67
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
68
|
+
return result
|
|
69
|
+
|
|
70
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
71
|
+
if len(generate_result.contents) == 1:
|
|
72
|
+
return generate_result
|
|
73
|
+
|
|
74
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
75
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
76
|
+
# Filter out contents with failed blocks
|
|
77
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
78
|
+
merge_result = self._merge_code_without_effect(content)
|
|
79
|
+
if not merge_result.failed_blocks:
|
|
80
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
81
|
+
# If all have failed blocks, return the first one
|
|
82
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
62
83
|
|
|
63
84
|
|
|
64
85
|
def parse_text(self, text: str) -> List[PathAndCode]:
|
|
@@ -99,7 +120,34 @@ class CodeAutoMerge:
|
|
|
99
120
|
Error: {{ error }}
|
|
100
121
|
'''
|
|
101
122
|
|
|
102
|
-
def
|
|
123
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
124
|
+
"""Merge code without any side effects like git operations or file writing.
|
|
125
|
+
Returns a tuple of:
|
|
126
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
127
|
+
- list of (file_path, content) tuples for failed to merge blocks"""
|
|
128
|
+
codes = self.parse_whole_text_v2(content)
|
|
129
|
+
file_content_mapping = {}
|
|
130
|
+
failed_blocks = []
|
|
131
|
+
|
|
132
|
+
for block in codes:
|
|
133
|
+
file_path = block.path
|
|
134
|
+
if not os.path.exists(file_path):
|
|
135
|
+
file_content_mapping[file_path] = block.content
|
|
136
|
+
else:
|
|
137
|
+
if file_path not in file_content_mapping:
|
|
138
|
+
with open(file_path, "r") as f:
|
|
139
|
+
file_content_mapping[file_path] = f.read()
|
|
140
|
+
if file_content_mapping[file_path] != block.content:
|
|
141
|
+
file_content_mapping[file_path] = block.content
|
|
142
|
+
else:
|
|
143
|
+
failed_blocks.append((file_path, block.content))
|
|
144
|
+
|
|
145
|
+
return MergeCodeWithoutEffect(
|
|
146
|
+
success_blocks=[(path, content) for path, content in file_content_mapping.items()],
|
|
147
|
+
failed_blocks=failed_blocks
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def _merge_code(self, content: str,force_skip_git:bool=False):
|
|
103
151
|
total = 0
|
|
104
152
|
|
|
105
153
|
file_content = open(self.args.file).read()
|
|
@@ -114,18 +162,6 @@ class CodeAutoMerge:
|
|
|
114
162
|
logger.error(self.git_require_msg(source_dir=self.args.source_dir,error=str(e)))
|
|
115
163
|
return
|
|
116
164
|
|
|
117
|
-
# codes = code_utils.extract_code(content)
|
|
118
|
-
# for (lang,code) in codes:
|
|
119
|
-
# parsed_blocks = self.parse_text(code)
|
|
120
|
-
|
|
121
|
-
# for block in parsed_blocks:
|
|
122
|
-
# file_path = block.path
|
|
123
|
-
# os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
|
124
|
-
|
|
125
|
-
# with open(file_path, "w") as f:
|
|
126
|
-
# logger.info(f"Upsert path: {file_path}")
|
|
127
|
-
# total += 1
|
|
128
|
-
# f.write(block.content)
|
|
129
165
|
codes = self.parse_whole_text_v2(content)
|
|
130
166
|
for block in codes:
|
|
131
167
|
file_path = block.path
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import difflib
|
|
3
3
|
from autocoder.common import AutoCoderArgs,git_utils
|
|
4
|
-
from typing import List
|
|
4
|
+
from typing import List,Union,Tuple
|
|
5
5
|
import pydantic
|
|
6
6
|
import byzerllm
|
|
7
7
|
from loguru import logger
|
|
@@ -15,6 +15,8 @@ from autocoder.common.search_replace import (
|
|
|
15
15
|
flexible_search_and_replace,
|
|
16
16
|
search_and_replace,
|
|
17
17
|
)
|
|
18
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
19
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
18
20
|
|
|
19
21
|
class PathAndCode(pydantic.BaseModel):
|
|
20
22
|
path: str
|
|
@@ -374,6 +376,25 @@ class CodeAutoMergeDiff:
|
|
|
374
376
|
edits.append((path, hunk))
|
|
375
377
|
|
|
376
378
|
return edits
|
|
379
|
+
|
|
380
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
381
|
+
result = self.choose_best_choice(generate_result)
|
|
382
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
383
|
+
return result
|
|
384
|
+
|
|
385
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
386
|
+
if len(generate_result.contents) == 1:
|
|
387
|
+
return generate_result
|
|
388
|
+
|
|
389
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
390
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
391
|
+
# Filter out contents with failed blocks
|
|
392
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
393
|
+
merge_result = self._merge_code_without_effect(content)
|
|
394
|
+
if not merge_result.failed_blocks:
|
|
395
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
396
|
+
# If all have failed blocks, return the first one
|
|
397
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
377
398
|
|
|
378
399
|
@byzerllm.prompt(render="jinja2")
|
|
379
400
|
def git_require_msg(self,source_dir:str,error:str)->str:
|
|
@@ -450,7 +471,39 @@ class CodeAutoMergeDiff:
|
|
|
450
471
|
errors += other_hunks_applied
|
|
451
472
|
raise ValueError(errors)
|
|
452
473
|
|
|
453
|
-
def
|
|
474
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
475
|
+
"""Merge code without any side effects like git operations or file writing.
|
|
476
|
+
Returns a tuple of:
|
|
477
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
478
|
+
- list of (file_path, hunk) tuples for failed to merge blocks"""
|
|
479
|
+
edits = self.get_edits(content)
|
|
480
|
+
file_content_mapping = {}
|
|
481
|
+
failed_blocks = []
|
|
482
|
+
|
|
483
|
+
for path, hunk in edits:
|
|
484
|
+
full_path = self.abs_root_path(path)
|
|
485
|
+
if not os.path.exists(full_path):
|
|
486
|
+
_, after = hunk_to_before_after(hunk)
|
|
487
|
+
file_content_mapping[full_path] = after
|
|
488
|
+
continue
|
|
489
|
+
|
|
490
|
+
if full_path not in file_content_mapping:
|
|
491
|
+
with open(full_path, "r") as f:
|
|
492
|
+
file_content_mapping[full_path] = f.read()
|
|
493
|
+
|
|
494
|
+
content = file_content_mapping[full_path]
|
|
495
|
+
new_content = do_replace(full_path, content, hunk)
|
|
496
|
+
if new_content:
|
|
497
|
+
file_content_mapping[full_path] = new_content
|
|
498
|
+
else:
|
|
499
|
+
failed_blocks.append((full_path, "\n".join(hunk)))
|
|
500
|
+
|
|
501
|
+
return MergeCodeWithoutEffect(
|
|
502
|
+
success_blocks=[(path, content) for path, content in file_content_mapping.items()],
|
|
503
|
+
failed_blocks=failed_blocks
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
def _merge_code(self, content: str,force_skip_git:bool=False):
|
|
454
507
|
total = 0
|
|
455
508
|
|
|
456
509
|
file_content = open(self.args.file).read()
|
|
@@ -7,7 +7,6 @@ from autocoder.utils.queue_communicate import (
|
|
|
7
7
|
CommunicateEvent,
|
|
8
8
|
CommunicateEventType,
|
|
9
9
|
)
|
|
10
|
-
from typing import List
|
|
11
10
|
import pydantic
|
|
12
11
|
import byzerllm
|
|
13
12
|
from loguru import logger
|
|
@@ -18,6 +17,9 @@ from rich.console import Console
|
|
|
18
17
|
from rich.panel import Panel
|
|
19
18
|
from rich.syntax import Syntax
|
|
20
19
|
import json
|
|
20
|
+
from typing import Union, List, Tuple
|
|
21
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
22
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class PathAndCode(pydantic.BaseModel):
|
|
@@ -138,7 +140,7 @@ class CodeAutoMergeEditBlock:
|
|
|
138
140
|
elif end_marker(line, index) and start_marker_count == 1:
|
|
139
141
|
start_marker_count -= 1
|
|
140
142
|
if block:
|
|
141
|
-
if current_editblock_mode == "two_line_mode":
|
|
143
|
+
if current_editblock_mode == "two_line_mode":
|
|
142
144
|
path = block[0].split(":", 1)[1].strip()
|
|
143
145
|
content = "\n".join(block[1:])
|
|
144
146
|
else:
|
|
@@ -152,6 +154,25 @@ class CodeAutoMergeEditBlock:
|
|
|
152
154
|
|
|
153
155
|
return path_and_code_list
|
|
154
156
|
|
|
157
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
158
|
+
result = self.choose_best_choice(generate_result)
|
|
159
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
163
|
+
if len(generate_result.contents) == 1:
|
|
164
|
+
return generate_result
|
|
165
|
+
|
|
166
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
167
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
168
|
+
# Filter out contents with failed blocks
|
|
169
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
170
|
+
merge_result = self._merge_code_without_effect(content)
|
|
171
|
+
if not merge_result.failed_blocks:
|
|
172
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
173
|
+
# If all have failed blocks, return the first one
|
|
174
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
175
|
+
|
|
155
176
|
@byzerllm.prompt()
|
|
156
177
|
def git_require_msg(self, source_dir: str, error: str) -> str:
|
|
157
178
|
"""
|
|
@@ -197,9 +218,57 @@ class CodeAutoMergeEditBlock:
|
|
|
197
218
|
if in_updated:
|
|
198
219
|
updates.append(line)
|
|
199
220
|
result.append((edit.path, "\n".join(heads), "\n".join(updates)))
|
|
200
|
-
return result
|
|
221
|
+
return result
|
|
222
|
+
|
|
223
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
224
|
+
"""Merge code without any side effects like git operations, linting or file writing.
|
|
225
|
+
Returns a tuple of:
|
|
226
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
227
|
+
- list of (file_path, head, update) tuples for failed to merge blocks"""
|
|
228
|
+
codes = self.get_edits(content)
|
|
229
|
+
file_content_mapping = {}
|
|
230
|
+
failed_blocks = []
|
|
231
|
+
|
|
232
|
+
for block in codes:
|
|
233
|
+
file_path, head, update = block
|
|
234
|
+
if not os.path.exists(file_path):
|
|
235
|
+
file_content_mapping[file_path] = update
|
|
236
|
+
else:
|
|
237
|
+
if file_path not in file_content_mapping:
|
|
238
|
+
with open(file_path, "r") as f:
|
|
239
|
+
temp = f.read()
|
|
240
|
+
file_content_mapping[file_path] = temp
|
|
241
|
+
existing_content = file_content_mapping[file_path]
|
|
242
|
+
|
|
243
|
+
# First try exact match
|
|
244
|
+
new_content = (
|
|
245
|
+
existing_content.replace(head, update, 1)
|
|
246
|
+
if head
|
|
247
|
+
else existing_content + "\n" + update
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# If exact match fails, try similarity match
|
|
251
|
+
if new_content == existing_content and head:
|
|
252
|
+
similarity, best_window = TextSimilarity(
|
|
253
|
+
head, existing_content
|
|
254
|
+
).get_best_matching_window()
|
|
255
|
+
if similarity > self.args.editblock_similarity:
|
|
256
|
+
new_content = existing_content.replace(
|
|
257
|
+
best_window, update, 1
|
|
258
|
+
)
|
|
201
259
|
|
|
202
|
-
|
|
260
|
+
if new_content != existing_content:
|
|
261
|
+
file_content_mapping[file_path] = new_content
|
|
262
|
+
else:
|
|
263
|
+
failed_blocks.append((file_path, head, update))
|
|
264
|
+
|
|
265
|
+
return MergeCodeWithoutEffect(
|
|
266
|
+
success_blocks=[(path, content)
|
|
267
|
+
for path, content in file_content_mapping.items()],
|
|
268
|
+
failed_blocks=failed_blocks
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _merge_code(self, content: str, force_skip_git: bool = False):
|
|
203
272
|
file_content = open(self.args.file).read()
|
|
204
273
|
md5 = hashlib.md5(file_content.encode("utf-8")).hexdigest()
|
|
205
274
|
file_name = os.path.basename(self.args.file)
|
|
@@ -207,15 +276,15 @@ class CodeAutoMergeEditBlock:
|
|
|
207
276
|
codes = self.get_edits(content)
|
|
208
277
|
changes_to_make = []
|
|
209
278
|
changes_made = False
|
|
210
|
-
unmerged_blocks = []
|
|
211
|
-
merged_blocks = []
|
|
279
|
+
unmerged_blocks = []
|
|
280
|
+
merged_blocks = []
|
|
212
281
|
|
|
213
282
|
# First, check if there are any changes to be made
|
|
214
283
|
file_content_mapping = {}
|
|
215
284
|
for block in codes:
|
|
216
285
|
file_path, head, update = block
|
|
217
286
|
if not os.path.exists(file_path):
|
|
218
|
-
changes_to_make.append((file_path, None, update))
|
|
287
|
+
changes_to_make.append((file_path, None, update))
|
|
219
288
|
file_content_mapping[file_path] = update
|
|
220
289
|
merged_blocks.append((file_path, "", update, 1))
|
|
221
290
|
changes_made = True
|
|
@@ -235,7 +304,7 @@ class CodeAutoMergeEditBlock:
|
|
|
235
304
|
(file_path, existing_content, new_content))
|
|
236
305
|
file_content_mapping[file_path] = new_content
|
|
237
306
|
merged_blocks.append((file_path, head, update, 1))
|
|
238
|
-
changes_made = True
|
|
307
|
+
changes_made = True
|
|
239
308
|
else:
|
|
240
309
|
# If the SEARCH BLOCK is not found exactly, then try to use
|
|
241
310
|
# the similarity ratio to find the best matching block
|
|
@@ -250,8 +319,9 @@ class CodeAutoMergeEditBlock:
|
|
|
250
319
|
(file_path, existing_content, new_content)
|
|
251
320
|
)
|
|
252
321
|
file_content_mapping[file_path] = new_content
|
|
253
|
-
merged_blocks.append(
|
|
254
|
-
|
|
322
|
+
merged_blocks.append(
|
|
323
|
+
(file_path, head, update, similarity))
|
|
324
|
+
changes_made = True
|
|
255
325
|
else:
|
|
256
326
|
unmerged_blocks.append(
|
|
257
327
|
(file_path, head, update, similarity))
|
|
@@ -317,10 +387,10 @@ class CodeAutoMergeEditBlock:
|
|
|
317
387
|
file_path, head, update, similarity = code
|
|
318
388
|
event_data.append(
|
|
319
389
|
{
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
390
|
+
"file_path": file_path,
|
|
391
|
+
"head": head,
|
|
392
|
+
"update": update,
|
|
393
|
+
"similarity": similarity,
|
|
324
394
|
}
|
|
325
395
|
)
|
|
326
396
|
|
|
@@ -2,12 +2,14 @@ import os
|
|
|
2
2
|
import difflib
|
|
3
3
|
import diff_match_patch as dmp_module
|
|
4
4
|
from autocoder.common import AutoCoderArgs, git_utils
|
|
5
|
-
from typing import List
|
|
5
|
+
from typing import List,Tuple
|
|
6
6
|
import pydantic
|
|
7
7
|
import byzerllm
|
|
8
8
|
from loguru import logger
|
|
9
9
|
import hashlib
|
|
10
10
|
from pathlib import Path
|
|
11
|
+
from autocoder.common.types import CodeGenerateResult, MergeCodeWithoutEffect
|
|
12
|
+
from autocoder.common.code_modification_ranker import CodeModificationRanker
|
|
11
13
|
|
|
12
14
|
class PathAndCode(pydantic.BaseModel):
|
|
13
15
|
path: str
|
|
@@ -125,7 +127,26 @@ class CodeAutoMergeStrictDiff:
|
|
|
125
127
|
elif start_marker_count > 0:
|
|
126
128
|
block.append(line)
|
|
127
129
|
|
|
128
|
-
return path_and_code_list
|
|
130
|
+
return path_and_code_list
|
|
131
|
+
|
|
132
|
+
def merge_code(self, generate_result: CodeGenerateResult, force_skip_git: bool = False):
|
|
133
|
+
result = self.choose_best_choice(generate_result)
|
|
134
|
+
self._merge_code(result.contents[0], force_skip_git)
|
|
135
|
+
return result
|
|
136
|
+
|
|
137
|
+
def choose_best_choice(self, generate_result: CodeGenerateResult) -> CodeGenerateResult:
|
|
138
|
+
if len(generate_result.contents) == 1:
|
|
139
|
+
return generate_result
|
|
140
|
+
|
|
141
|
+
ranker = CodeModificationRanker(self.llm, self.args)
|
|
142
|
+
ranked_result = ranker.rank_modifications(generate_result)
|
|
143
|
+
# Filter out contents with failed blocks
|
|
144
|
+
for content,conversations in zip(ranked_result.contents,ranked_result.conversations):
|
|
145
|
+
merge_result = self._merge_code_without_effect(content)
|
|
146
|
+
if not merge_result.failed_blocks:
|
|
147
|
+
return CodeGenerateResult(contents=[content], conversations=[conversations])
|
|
148
|
+
# If all have failed blocks, return the first one
|
|
149
|
+
return CodeGenerateResult(contents=[ranked_result.contents[0]], conversations=[ranked_result.conversations[0]])
|
|
129
150
|
|
|
130
151
|
|
|
131
152
|
def abs_root_path(self, path):
|
|
@@ -134,7 +155,52 @@ class CodeAutoMergeStrictDiff:
|
|
|
134
155
|
res = Path(self.args.source_dir) / path
|
|
135
156
|
return safe_abs_path(res)
|
|
136
157
|
|
|
137
|
-
def
|
|
158
|
+
def _merge_code_without_effect(self, content: str) -> MergeCodeWithoutEffect:
|
|
159
|
+
"""Merge code without any side effects like git operations or file writing.
|
|
160
|
+
Returns a tuple of:
|
|
161
|
+
- list of (file_path, new_content) tuples for successfully merged blocks
|
|
162
|
+
- list of (file_path, content) tuples for failed to merge blocks"""
|
|
163
|
+
diff_blocks = self.parse_diff_block(content)
|
|
164
|
+
file_content_mapping = {}
|
|
165
|
+
failed_blocks = []
|
|
166
|
+
|
|
167
|
+
for block in diff_blocks:
|
|
168
|
+
path = block.path
|
|
169
|
+
content = block.content
|
|
170
|
+
full_path = self.abs_root_path(path)
|
|
171
|
+
|
|
172
|
+
if not os.path.exists(full_path):
|
|
173
|
+
file_content_mapping[full_path] = content
|
|
174
|
+
continue
|
|
175
|
+
|
|
176
|
+
if full_path not in file_content_mapping:
|
|
177
|
+
with open(full_path, "r") as f:
|
|
178
|
+
file_content_mapping[full_path] = f.read()
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
import patch
|
|
182
|
+
patch_obj = patch.fromstring(content.encode('utf-8'))
|
|
183
|
+
root_path = None
|
|
184
|
+
if not path.startswith(self.args.source_dir):
|
|
185
|
+
root_path = self.args.source_dir
|
|
186
|
+
|
|
187
|
+
# Create a copy of the content to apply patch
|
|
188
|
+
temp_content = file_content_mapping[full_path]
|
|
189
|
+
success = patch_obj.apply(root=root_path, content=temp_content)
|
|
190
|
+
if success:
|
|
191
|
+
file_content_mapping[full_path] = temp_content
|
|
192
|
+
else:
|
|
193
|
+
failed_blocks.append((full_path, content))
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.warning(f"Failed to apply patch to {full_path}: {str(e)}")
|
|
196
|
+
failed_blocks.append((full_path, content))
|
|
197
|
+
|
|
198
|
+
return MergeCodeWithoutEffect(
|
|
199
|
+
success_blocks=[(path, content) for path, content in file_content_mapping.items()],
|
|
200
|
+
failed_blocks=failed_blocks
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
def _merge_code(self, content: str, force_skip_git: bool = False):
|
|
138
204
|
total = 0
|
|
139
205
|
|
|
140
206
|
file_content = open(self.args.file).read()
|
|
@@ -154,35 +220,6 @@ class CodeAutoMergeStrictDiff:
|
|
|
154
220
|
path = diff_blocks.path
|
|
155
221
|
content = diff_blocks.content
|
|
156
222
|
|
|
157
|
-
# unidiff_patch = unidiff.PatchSet(content)
|
|
158
|
-
# dmp_patches = []
|
|
159
|
-
# for patched_file in unidiff_patch:
|
|
160
|
-
# diffs = []
|
|
161
|
-
# start_line = 0
|
|
162
|
-
# for hunk in patched_file:
|
|
163
|
-
# start_line = hunk.target_start - 1 # 获取hunk的起始位置
|
|
164
|
-
# for line in hunk:
|
|
165
|
-
# if line.is_added:
|
|
166
|
-
# diffs.append(dmp_module.diff('', line.value.strip(), start_line))
|
|
167
|
-
# start_line += 1
|
|
168
|
-
# elif line.is_removed:
|
|
169
|
-
# diffs.append(dmp_module.diff(line.value.strip(), '', start_line))
|
|
170
|
-
# else:
|
|
171
|
-
# start_line += 1
|
|
172
|
-
# patch_text = dmp.patch_make(diffs)
|
|
173
|
-
# dmp_patches.extend(patch_text)
|
|
174
|
-
|
|
175
|
-
# with open(path, 'r') as f:
|
|
176
|
-
# original_content = f.read()
|
|
177
|
-
|
|
178
|
-
# dmp = dmp_module.diff_match_patch()
|
|
179
|
-
# new_text, results = dmp.patch_apply(dmp_patches, original_content)
|
|
180
|
-
# if any(results) is False:
|
|
181
|
-
# raise Exception("Error applying diff to file: " + path)
|
|
182
|
-
# with open(self.abs_root_path(path), 'w') as f:
|
|
183
|
-
# f.write(new_text)
|
|
184
|
-
# total += 1
|
|
185
|
-
|
|
186
223
|
import patch
|
|
187
224
|
patch_obj = patch.fromstring(content.encode('utf-8'))
|
|
188
225
|
root_path = None
|