auto-coder 0.1.231__py3-none-any.whl → 0.1.233__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.231.dist-info → auto_coder-0.1.233.dist-info}/METADATA +1 -1
- {auto_coder-0.1.231.dist-info → auto_coder-0.1.233.dist-info}/RECORD +21 -17
- autocoder/auto_coder.py +32 -6
- autocoder/chat_auto_coder.py +12 -0
- autocoder/common/__init__.py +1 -0
- autocoder/common/utils_code_auto_generate.py +55 -3
- autocoder/dispacher/actions/action.py +1 -1
- autocoder/dispacher/actions/copilot.py +1 -1
- autocoder/dispacher/actions/plugins/action_regex_project.py +1 -1
- autocoder/index/entry.py +286 -0
- autocoder/index/filter/__init__.py +0 -0
- autocoder/index/filter/normal_filter.py +168 -0
- autocoder/index/filter/quick_filter.py +98 -0
- autocoder/index/index.py +6 -393
- autocoder/index/types.py +4 -1
- autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -1
- autocoder/version.py +1 -1
- {auto_coder-0.1.231.dist-info → auto_coder-0.1.233.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.231.dist-info → auto_coder-0.1.233.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.231.dist-info → auto_coder-0.1.233.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.231.dist-info → auto_coder-0.1.233.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from typing import List, Union,Dict,Any
|
|
2
|
+
from autocoder.index.types import IndexItem
|
|
3
|
+
from autocoder.common import SourceCode, AutoCoderArgs
|
|
4
|
+
import byzerllm
|
|
5
|
+
import time
|
|
6
|
+
from autocoder.index.index import IndexManager
|
|
7
|
+
from autocoder.index.types import (
|
|
8
|
+
IndexItem,
|
|
9
|
+
TargetFile,
|
|
10
|
+
VerifyFileRelevance,
|
|
11
|
+
FileList,
|
|
12
|
+
FileNumberList
|
|
13
|
+
)
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from autocoder.utils.queue_communicate import (
|
|
16
|
+
queue_communicate,
|
|
17
|
+
CommunicateEvent,
|
|
18
|
+
CommunicateEventType,
|
|
19
|
+
)
|
|
20
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
21
|
+
import json
|
|
22
|
+
|
|
23
|
+
def get_file_path(file_path):
|
|
24
|
+
if file_path.startswith("##"):
|
|
25
|
+
return file_path.strip()[2:]
|
|
26
|
+
return file_path
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class NormalFilter():
|
|
30
|
+
def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
|
|
31
|
+
self.index_manager = index_manager
|
|
32
|
+
self.args = index_manager.args
|
|
33
|
+
self.stats = stats
|
|
34
|
+
self.sources = sources
|
|
35
|
+
|
|
36
|
+
def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
|
|
37
|
+
final_files: Dict[str, TargetFile] = {}
|
|
38
|
+
if not self.args.skip_filter_index:
|
|
39
|
+
if self.args.request_id and not self.args.skip_events:
|
|
40
|
+
queue_communicate.send_event(
|
|
41
|
+
request_id=self.args.request_id,
|
|
42
|
+
event=CommunicateEvent(
|
|
43
|
+
event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
|
|
44
|
+
data=json.dumps({})
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
# Phase 3: Level 1 filtering - Query-based
|
|
48
|
+
logger.info(
|
|
49
|
+
"Phase 3: Performing Level 1 filtering (query-based)...")
|
|
50
|
+
|
|
51
|
+
phase_start = time.monotonic()
|
|
52
|
+
target_files = self.index_manager.get_target_files_by_query(self.args.query)
|
|
53
|
+
|
|
54
|
+
if target_files:
|
|
55
|
+
for file in target_files.file_list:
|
|
56
|
+
file_path = file.file_path.strip()
|
|
57
|
+
final_files[get_file_path(file_path)] = file
|
|
58
|
+
self.stats["level1_filtered"] = len(target_files.file_list)
|
|
59
|
+
phase_end = time.monotonic()
|
|
60
|
+
self.stats["timings"]["normal_filter"]["level1_filter"] = phase_end - phase_start
|
|
61
|
+
|
|
62
|
+
# Phase 4: Level 2 filtering - Related files
|
|
63
|
+
if target_files is not None and self.args.index_filter_level >= 2:
|
|
64
|
+
logger.info(
|
|
65
|
+
"Phase 4: Performing Level 2 filtering (related files)...")
|
|
66
|
+
if self.args.request_id and not self.args.skip_events:
|
|
67
|
+
queue_communicate.send_event(
|
|
68
|
+
request_id=self.args.request_id,
|
|
69
|
+
event=CommunicateEvent(
|
|
70
|
+
event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
|
|
71
|
+
data=json.dumps({})
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
phase_start = time.monotonic()
|
|
75
|
+
related_files = self.index_manager.get_related_files(
|
|
76
|
+
[file.file_path for file in target_files.file_list]
|
|
77
|
+
)
|
|
78
|
+
if related_files is not None:
|
|
79
|
+
for file in related_files.file_list:
|
|
80
|
+
file_path = file.file_path.strip()
|
|
81
|
+
final_files[get_file_path(file_path)] = file
|
|
82
|
+
self.stats["level2_filtered"] = len(related_files.file_list)
|
|
83
|
+
phase_end = time.monotonic()
|
|
84
|
+
self.stats["timings"]["normal_filter"]["level2_filter"] = phase_end - phase_start
|
|
85
|
+
|
|
86
|
+
if not final_files:
|
|
87
|
+
logger.warning("No related files found, using all files")
|
|
88
|
+
for source in self.sources:
|
|
89
|
+
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
90
|
+
file_path=source.module_name,
|
|
91
|
+
reason="No related files found, use all files",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Phase 5: Relevance verification
|
|
95
|
+
logger.info("Phase 5: Performing relevance verification...")
|
|
96
|
+
if self.args.index_filter_enable_relevance_verification:
|
|
97
|
+
phase_start = time.monotonic()
|
|
98
|
+
verified_files = {}
|
|
99
|
+
temp_files = list(final_files.values())
|
|
100
|
+
verification_results = []
|
|
101
|
+
|
|
102
|
+
def print_verification_results(results):
|
|
103
|
+
from rich.table import Table
|
|
104
|
+
from rich.console import Console
|
|
105
|
+
|
|
106
|
+
console = Console()
|
|
107
|
+
table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
|
|
108
|
+
table.add_column("File Path", style="cyan", no_wrap=True)
|
|
109
|
+
table.add_column("Score", justify="right", style="green")
|
|
110
|
+
table.add_column("Status", style="yellow")
|
|
111
|
+
table.add_column("Reason/Error")
|
|
112
|
+
|
|
113
|
+
for file_path, score, status, reason in results:
|
|
114
|
+
table.add_row(
|
|
115
|
+
file_path,
|
|
116
|
+
str(score) if score is not None else "N/A",
|
|
117
|
+
status,
|
|
118
|
+
reason
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
console.print(table)
|
|
122
|
+
|
|
123
|
+
def verify_single_file(file: TargetFile):
|
|
124
|
+
for source in self.sources:
|
|
125
|
+
if source.module_name == file.file_path:
|
|
126
|
+
file_content = source.source_code
|
|
127
|
+
try:
|
|
128
|
+
result = self.index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
|
|
129
|
+
file_content=file_content,
|
|
130
|
+
query=self.args.query
|
|
131
|
+
)
|
|
132
|
+
if result.relevant_score >= self.args.verify_file_relevance_score:
|
|
133
|
+
verified_files[file.file_path] = TargetFile(
|
|
134
|
+
file_path=file.file_path,
|
|
135
|
+
reason=f"Score:{result.relevant_score}, {result.reason}"
|
|
136
|
+
)
|
|
137
|
+
return file.file_path, result.relevant_score, "PASS", result.reason
|
|
138
|
+
else:
|
|
139
|
+
return file.file_path, result.relevant_score, "FAIL", result.reason
|
|
140
|
+
except Exception as e:
|
|
141
|
+
error_msg = str(e)
|
|
142
|
+
verified_files[file.file_path] = TargetFile(
|
|
143
|
+
file_path=file.file_path,
|
|
144
|
+
reason=f"Verification failed: {error_msg}"
|
|
145
|
+
)
|
|
146
|
+
return file.file_path, None, "ERROR", error_msg
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
with ThreadPoolExecutor(max_workers=self.args.index_filter_workers) as executor:
|
|
150
|
+
futures = [executor.submit(verify_single_file, file)
|
|
151
|
+
for file in temp_files]
|
|
152
|
+
for future in as_completed(futures):
|
|
153
|
+
result = future.result()
|
|
154
|
+
if result:
|
|
155
|
+
verification_results.append(result)
|
|
156
|
+
time.sleep(self.args.anti_quota_limit)
|
|
157
|
+
|
|
158
|
+
# Print verification results in a table
|
|
159
|
+
print_verification_results(verification_results)
|
|
160
|
+
|
|
161
|
+
self.stats["verified_files"] = len(verified_files)
|
|
162
|
+
phase_end = time.monotonic()
|
|
163
|
+
self.stats["timings"]["normal_filter"]["relevance_verification"] = phase_end - phase_start
|
|
164
|
+
|
|
165
|
+
# Keep all files, not just verified ones
|
|
166
|
+
final_files = verified_files
|
|
167
|
+
|
|
168
|
+
return final_files
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from typing import List, Union,Dict,Any
|
|
2
|
+
from autocoder.index.types import IndexItem
|
|
3
|
+
from autocoder.common import AutoCoderArgs,SourceCode
|
|
4
|
+
import byzerllm
|
|
5
|
+
import time
|
|
6
|
+
from autocoder.index.index import IndexManager
|
|
7
|
+
from autocoder.index.types import (
|
|
8
|
+
IndexItem,
|
|
9
|
+
TargetFile,
|
|
10
|
+
FileNumberList
|
|
11
|
+
)
|
|
12
|
+
from autocoder.rag.token_counter import count_tokens
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_file_path(file_path):
|
|
17
|
+
if file_path.startswith("##"):
|
|
18
|
+
return file_path.strip()[2:]
|
|
19
|
+
return file_path
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class QuickFilter():
|
|
23
|
+
def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
|
|
24
|
+
self.index_manager = index_manager
|
|
25
|
+
self.args = index_manager.args
|
|
26
|
+
self.stats = stats
|
|
27
|
+
self.sources = sources
|
|
28
|
+
|
|
29
|
+
@byzerllm.prompt()
|
|
30
|
+
def quick_filter_files(self,file_meta_list:List[IndexItem],query:str) -> str:
|
|
31
|
+
'''
|
|
32
|
+
当用户提一个需求的时候,我们需要找到相关的文件,然后阅读这些文件,并且修改其中部分文件。
|
|
33
|
+
现在,给定下面的索引文件:
|
|
34
|
+
|
|
35
|
+
<index>
|
|
36
|
+
{{ content }}
|
|
37
|
+
</index>
|
|
38
|
+
|
|
39
|
+
索引文件包含文件序号(##[]括起来的部分),文件路径,文件符号信息等。
|
|
40
|
+
下面是用户的查询需求:
|
|
41
|
+
|
|
42
|
+
<query>
|
|
43
|
+
{{ query }}
|
|
44
|
+
</query>
|
|
45
|
+
|
|
46
|
+
请根据用户的需求,找到相关的文件,并给出文件序号列表。请返回如下json格式:
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"file_list": [
|
|
51
|
+
file_index1,
|
|
52
|
+
file_index2,
|
|
53
|
+
...
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
特别注意,如果用户的query里 @文件 或者 @@符号,那么被@的文件或者@@的符号必须要返回,并且查看他们依赖的文件是否相关。
|
|
59
|
+
'''
|
|
60
|
+
file_meta_str = "\n".join([f"##[{index}]{item.module_name}\n{item.symbols}" for index,item in enumerate(file_meta_list)])
|
|
61
|
+
context = {
|
|
62
|
+
"content": file_meta_str,
|
|
63
|
+
"query": query
|
|
64
|
+
}
|
|
65
|
+
return context
|
|
66
|
+
|
|
67
|
+
def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
|
|
68
|
+
final_files: Dict[str, TargetFile] = {}
|
|
69
|
+
if not self.args.skip_filter_index and self.args.index_filter_model:
|
|
70
|
+
start_time = time.monotonic()
|
|
71
|
+
index_items = self.index_manager.read_index()
|
|
72
|
+
|
|
73
|
+
prompt_str = self.quick_filter_files.prompt(index_items,query)
|
|
74
|
+
|
|
75
|
+
print(prompt_str)
|
|
76
|
+
|
|
77
|
+
tokens_len = count_tokens(prompt_str)
|
|
78
|
+
|
|
79
|
+
if tokens_len > 55*1024:
|
|
80
|
+
logger.warning(f"Quick filter prompt is too long, tokens_len: {tokens_len}/{55*1024} fallback to normal filter")
|
|
81
|
+
return final_files
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
file_number_list = self.quick_filter_files.with_llm(
|
|
85
|
+
self.index_manager.index_filter_llm).with_return_type(FileNumberList).run(index_items, self.args.query)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Quick filter failed, error: {str(e)} fallback to normal filter")
|
|
88
|
+
return final_files
|
|
89
|
+
|
|
90
|
+
if file_number_list:
|
|
91
|
+
for file_number in file_number_list.file_list:
|
|
92
|
+
final_files[get_file_path(index_items[file_number].module_name)] = TargetFile(
|
|
93
|
+
file_path=index_items[file_number].module_name,
|
|
94
|
+
reason="Quick Filter"
|
|
95
|
+
)
|
|
96
|
+
end_time = time.monotonic()
|
|
97
|
+
self.stats["timings"]["quick_filter"] = end_time - start_time
|
|
98
|
+
return final_files
|