auto-coder 0.1.231__py3-none-any.whl → 0.1.233__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -0,0 +1,168 @@
1
+ from typing import List, Union,Dict,Any
2
+ from autocoder.index.types import IndexItem
3
+ from autocoder.common import SourceCode, AutoCoderArgs
4
+ import byzerllm
5
+ import time
6
+ from autocoder.index.index import IndexManager
7
+ from autocoder.index.types import (
8
+ IndexItem,
9
+ TargetFile,
10
+ VerifyFileRelevance,
11
+ FileList,
12
+ FileNumberList
13
+ )
14
+ from loguru import logger
15
+ from autocoder.utils.queue_communicate import (
16
+ queue_communicate,
17
+ CommunicateEvent,
18
+ CommunicateEventType,
19
+ )
20
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
+ import json
22
+
23
+ def get_file_path(file_path):
24
+ if file_path.startswith("##"):
25
+ return file_path.strip()[2:]
26
+ return file_path
27
+
28
+
29
+ class NormalFilter():
30
+ def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
31
+ self.index_manager = index_manager
32
+ self.args = index_manager.args
33
+ self.stats = stats
34
+ self.sources = sources
35
+
36
+ def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
37
+ final_files: Dict[str, TargetFile] = {}
38
+ if not self.args.skip_filter_index:
39
+ if self.args.request_id and not self.args.skip_events:
40
+ queue_communicate.send_event(
41
+ request_id=self.args.request_id,
42
+ event=CommunicateEvent(
43
+ event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
44
+ data=json.dumps({})
45
+ )
46
+ )
47
+ # Phase 3: Level 1 filtering - Query-based
48
+ logger.info(
49
+ "Phase 3: Performing Level 1 filtering (query-based)...")
50
+
51
+ phase_start = time.monotonic()
52
+ target_files = self.index_manager.get_target_files_by_query(self.args.query)
53
+
54
+ if target_files:
55
+ for file in target_files.file_list:
56
+ file_path = file.file_path.strip()
57
+ final_files[get_file_path(file_path)] = file
58
+ self.stats["level1_filtered"] = len(target_files.file_list)
59
+ phase_end = time.monotonic()
60
+ self.stats["timings"]["normal_filter"]["level1_filter"] = phase_end - phase_start
61
+
62
+ # Phase 4: Level 2 filtering - Related files
63
+ if target_files is not None and self.args.index_filter_level >= 2:
64
+ logger.info(
65
+ "Phase 4: Performing Level 2 filtering (related files)...")
66
+ if self.args.request_id and not self.args.skip_events:
67
+ queue_communicate.send_event(
68
+ request_id=self.args.request_id,
69
+ event=CommunicateEvent(
70
+ event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
71
+ data=json.dumps({})
72
+ )
73
+ )
74
+ phase_start = time.monotonic()
75
+ related_files = self.index_manager.get_related_files(
76
+ [file.file_path for file in target_files.file_list]
77
+ )
78
+ if related_files is not None:
79
+ for file in related_files.file_list:
80
+ file_path = file.file_path.strip()
81
+ final_files[get_file_path(file_path)] = file
82
+ self.stats["level2_filtered"] = len(related_files.file_list)
83
+ phase_end = time.monotonic()
84
+ self.stats["timings"]["normal_filter"]["level2_filter"] = phase_end - phase_start
85
+
86
+ if not final_files:
87
+ logger.warning("No related files found, using all files")
88
+ for source in self.sources:
89
+ final_files[get_file_path(source.module_name)] = TargetFile(
90
+ file_path=source.module_name,
91
+ reason="No related files found, use all files",
92
+ )
93
+
94
+ # Phase 5: Relevance verification
95
+ logger.info("Phase 5: Performing relevance verification...")
96
+ if self.args.index_filter_enable_relevance_verification:
97
+ phase_start = time.monotonic()
98
+ verified_files = {}
99
+ temp_files = list(final_files.values())
100
+ verification_results = []
101
+
102
+ def print_verification_results(results):
103
+ from rich.table import Table
104
+ from rich.console import Console
105
+
106
+ console = Console()
107
+ table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
108
+ table.add_column("File Path", style="cyan", no_wrap=True)
109
+ table.add_column("Score", justify="right", style="green")
110
+ table.add_column("Status", style="yellow")
111
+ table.add_column("Reason/Error")
112
+
113
+ for file_path, score, status, reason in results:
114
+ table.add_row(
115
+ file_path,
116
+ str(score) if score is not None else "N/A",
117
+ status,
118
+ reason
119
+ )
120
+
121
+ console.print(table)
122
+
123
+ def verify_single_file(file: TargetFile):
124
+ for source in self.sources:
125
+ if source.module_name == file.file_path:
126
+ file_content = source.source_code
127
+ try:
128
+ result = self.index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
129
+ file_content=file_content,
130
+ query=self.args.query
131
+ )
132
+ if result.relevant_score >= self.args.verify_file_relevance_score:
133
+ verified_files[file.file_path] = TargetFile(
134
+ file_path=file.file_path,
135
+ reason=f"Score:{result.relevant_score}, {result.reason}"
136
+ )
137
+ return file.file_path, result.relevant_score, "PASS", result.reason
138
+ else:
139
+ return file.file_path, result.relevant_score, "FAIL", result.reason
140
+ except Exception as e:
141
+ error_msg = str(e)
142
+ verified_files[file.file_path] = TargetFile(
143
+ file_path=file.file_path,
144
+ reason=f"Verification failed: {error_msg}"
145
+ )
146
+ return file.file_path, None, "ERROR", error_msg
147
+ return None
148
+
149
+ with ThreadPoolExecutor(max_workers=self.args.index_filter_workers) as executor:
150
+ futures = [executor.submit(verify_single_file, file)
151
+ for file in temp_files]
152
+ for future in as_completed(futures):
153
+ result = future.result()
154
+ if result:
155
+ verification_results.append(result)
156
+ time.sleep(self.args.anti_quota_limit)
157
+
158
+ # Print verification results in a table
159
+ print_verification_results(verification_results)
160
+
161
+ self.stats["verified_files"] = len(verified_files)
162
+ phase_end = time.monotonic()
163
+ self.stats["timings"]["normal_filter"]["relevance_verification"] = phase_end - phase_start
164
+
165
+ # Keep all files, not just verified ones
166
+ final_files = verified_files
167
+
168
+ return final_files
@@ -0,0 +1,98 @@
1
+ from typing import List, Union,Dict,Any
2
+ from autocoder.index.types import IndexItem
3
+ from autocoder.common import AutoCoderArgs,SourceCode
4
+ import byzerllm
5
+ import time
6
+ from autocoder.index.index import IndexManager
7
+ from autocoder.index.types import (
8
+ IndexItem,
9
+ TargetFile,
10
+ FileNumberList
11
+ )
12
+ from autocoder.rag.token_counter import count_tokens
13
+ from loguru import logger
14
+
15
+
16
+ def get_file_path(file_path):
17
+ if file_path.startswith("##"):
18
+ return file_path.strip()[2:]
19
+ return file_path
20
+
21
+
22
+ class QuickFilter():
23
+ def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
24
+ self.index_manager = index_manager
25
+ self.args = index_manager.args
26
+ self.stats = stats
27
+ self.sources = sources
28
+
29
+ @byzerllm.prompt()
30
+ def quick_filter_files(self,file_meta_list:List[IndexItem],query:str) -> str:
31
+ '''
32
+ 当用户提一个需求的时候,我们需要找到相关的文件,然后阅读这些文件,并且修改其中部分文件。
33
+ 现在,给定下面的索引文件:
34
+
35
+ <index>
36
+ {{ content }}
37
+ </index>
38
+
39
+ 索引文件包含文件序号(##[]括起来的部分),文件路径,文件符号信息等。
40
+ 下面是用户的查询需求:
41
+
42
+ <query>
43
+ {{ query }}
44
+ </query>
45
+
46
+ 请根据用户的需求,找到相关的文件,并给出文件序号列表。请返回如下json格式:
47
+
48
+ ```json
49
+ {
50
+ "file_list": [
51
+ file_index1,
52
+ file_index2,
53
+ ...
54
+ ]
55
+ }
56
+ ```
57
+
58
+ 特别注意,如果用户的query里 @文件 或者 @@符号,那么被@的文件或者@@的符号必须要返回,并且查看他们依赖的文件是否相关。
59
+ '''
60
+ file_meta_str = "\n".join([f"##[{index}]{item.module_name}\n{item.symbols}" for index,item in enumerate(file_meta_list)])
61
+ context = {
62
+ "content": file_meta_str,
63
+ "query": query
64
+ }
65
+ return context
66
+
67
+ def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
68
+ final_files: Dict[str, TargetFile] = {}
69
+ if not self.args.skip_filter_index and self.args.index_filter_model:
70
+ start_time = time.monotonic()
71
+ index_items = self.index_manager.read_index()
72
+
73
+ prompt_str = self.quick_filter_files.prompt(index_items,query)
74
+
75
+ print(prompt_str)
76
+
77
+ tokens_len = count_tokens(prompt_str)
78
+
79
+ if tokens_len > 55*1024:
80
+ logger.warning(f"Quick filter prompt is too long, tokens_len: {tokens_len}/{55*1024} fallback to normal filter")
81
+ return final_files
82
+
83
+ try:
84
+ file_number_list = self.quick_filter_files.with_llm(
85
+ self.index_manager.index_filter_llm).with_return_type(FileNumberList).run(index_items, self.args.query)
86
+ except Exception as e:
87
+ logger.error(f"Quick filter failed, error: {str(e)} fallback to normal filter")
88
+ return final_files
89
+
90
+ if file_number_list:
91
+ for file_number in file_number_list.file_list:
92
+ final_files[get_file_path(index_items[file_number].module_name)] = TargetFile(
93
+ file_path=index_items[file_number].module_name,
94
+ reason="Quick Filter"
95
+ )
96
+ end_time = time.monotonic()
97
+ self.stats["timings"]["quick_filter"] = end_time - start_time
98
+ return final_files