auto-coder 0.1.192__py3-none-any.whl → 0.1.194__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.192.dist-info → auto_coder-0.1.194.dist-info}/METADATA +1 -1
- {auto_coder-0.1.192.dist-info → auto_coder-0.1.194.dist-info}/RECORD +21 -21
- autocoder/agent/designer.py +224 -32
- autocoder/auto_coder.py +91 -57
- autocoder/auto_coder_server.py +2 -2
- autocoder/chat_auto_coder.py +5 -1
- autocoder/command_args.py +13 -0
- autocoder/common/__init__.py +3 -1
- autocoder/common/code_auto_merge_editblock.py +1 -1
- autocoder/common/command_templates.py +2 -2
- autocoder/index/index.py +237 -66
- autocoder/lang.py +7 -3
- autocoder/pyproject/__init__.py +1 -2
- autocoder/suffixproject/__init__.py +1 -2
- autocoder/tsproject/__init__.py +1 -2
- autocoder/utils/queue_communicate.py +30 -7
- autocoder/version.py +1 -1
- {auto_coder-0.1.192.dist-info → auto_coder-0.1.194.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.192.dist-info → auto_coder-0.1.194.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.192.dist-info → auto_coder-0.1.194.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.192.dist-info → auto_coder-0.1.194.dist-info}/top_level.txt +0 -0
autocoder/index/index.py
CHANGED
|
@@ -39,6 +39,9 @@ class TargetFile(pydantic.BaseModel):
|
|
|
39
39
|
..., description="The reason why the file is the target file"
|
|
40
40
|
)
|
|
41
41
|
|
|
42
|
+
class VerifyFileRelevance(pydantic.BaseModel):
|
|
43
|
+
relevant_score: int
|
|
44
|
+
reason: str
|
|
42
45
|
|
|
43
46
|
class FileList(pydantic.BaseModel):
|
|
44
47
|
file_list: List[TargetFile]
|
|
@@ -70,23 +73,67 @@ class IndexManager:
|
|
|
70
73
|
if not os.path.exists(self.index_dir):
|
|
71
74
|
os.makedirs(self.index_dir)
|
|
72
75
|
|
|
73
|
-
@byzerllm.prompt(
|
|
74
|
-
def
|
|
76
|
+
@byzerllm.prompt()
|
|
77
|
+
def verify_file_relevance(self, file_content: str, query: str) -> str:
|
|
75
78
|
"""
|
|
76
|
-
|
|
79
|
+
请验证下面的文件内容是否与用户问题相关:
|
|
77
80
|
|
|
78
|
-
|
|
81
|
+
文件内容:
|
|
82
|
+
{{ file_content }}
|
|
79
83
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
84
|
+
用户问题:
|
|
85
|
+
{{ query }}
|
|
86
|
+
|
|
87
|
+
相关是指,需要依赖这个文件提供上下文,或者需要修改这个文件才能解决用户的问题。
|
|
88
|
+
请给出相应的可能性分数:0-10,并结合用户问题,理由控制在50字以内。格式如下:
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"relevant_score": 0-10, // 相关分数
|
|
93
|
+
"reason": "这是相关的原因..."
|
|
94
|
+
}
|
|
95
|
+
```
|
|
96
|
+
"""
|
|
97
|
+
|
|
98
|
+
@byzerllm.prompt()
|
|
99
|
+
def _get_related_files(self, indices: str, file_paths: str) -> str:
|
|
100
|
+
"""
|
|
101
|
+
下面是所有文件以及对应的符号信息:
|
|
102
|
+
|
|
103
|
+
{{ indices }}
|
|
83
104
|
|
|
84
105
|
请参考上面的信息,找到被下列文件使用或者引用到的文件列表:
|
|
85
106
|
|
|
86
107
|
{{ file_paths }}
|
|
108
|
+
|
|
109
|
+
请按如下格式进行输出:
|
|
110
|
+
|
|
111
|
+
```json
|
|
112
|
+
{
|
|
113
|
+
"file_list": [
|
|
114
|
+
{
|
|
115
|
+
"file_path": "path/to/file.py",
|
|
116
|
+
"reason": "The reason why the file is the target file"
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"file_path": "path/to/file.py",
|
|
120
|
+
"reason": "The reason why the file is the target file"
|
|
121
|
+
}
|
|
122
|
+
]
|
|
123
|
+
}
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
注意,
|
|
127
|
+
1. 找到的文件名必须出现在上面的文件列表中
|
|
128
|
+
2. 原因控制在20字以内
|
|
129
|
+
3. 如果没有相关的文件,输出如下 json 即可:
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{"file_list": []}
|
|
133
|
+
```
|
|
87
134
|
"""
|
|
88
135
|
|
|
89
|
-
@byzerllm.prompt(
|
|
136
|
+
@byzerllm.prompt()
|
|
90
137
|
def get_all_file_symbols(self, path: str, code: str) -> str:
|
|
91
138
|
"""
|
|
92
139
|
你的目标是从给定的代码中获取代码里的符号,需要获取的符号类型包括:
|
|
@@ -184,12 +231,12 @@ class IndexManager:
|
|
|
184
231
|
)
|
|
185
232
|
symbols = []
|
|
186
233
|
for chunk in chunks:
|
|
187
|
-
chunk_symbols = self.get_all_file_symbols(source.module_name, chunk)
|
|
234
|
+
chunk_symbols = self.get_all_file_symbols.with_llm(self.index_llm).run(source.module_name, chunk)
|
|
188
235
|
time.sleep(self.anti_quota_limit)
|
|
189
236
|
symbols.append(chunk_symbols)
|
|
190
237
|
symbols = "\n".join(symbols)
|
|
191
238
|
else:
|
|
192
|
-
symbols = self.get_all_file_symbols(source.module_name, source_code)
|
|
239
|
+
symbols = self.get_all_file_symbols.with_llm(self.index_llm).run(source.module_name, source_code)
|
|
193
240
|
time.sleep(self.anti_quota_limit)
|
|
194
241
|
|
|
195
242
|
logger.info(
|
|
@@ -309,35 +356,56 @@ class IndexManager:
|
|
|
309
356
|
current_chunk = []
|
|
310
357
|
current_size = 0
|
|
311
358
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
359
|
+
if max_chunk_size == -1:
|
|
360
|
+
for item in index_items:
|
|
361
|
+
symbols_str = item.symbols
|
|
362
|
+
if includes:
|
|
363
|
+
symbol_info = extract_symbols(symbols_str)
|
|
364
|
+
symbols_str = symbols_info_to_str(symbol_info, includes)
|
|
317
365
|
|
|
318
|
-
|
|
366
|
+
item_str = f"##{item.module_name}\n{symbols_str}\n\n"
|
|
319
367
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
item_size = len(item_str)
|
|
368
|
+
if skip_symbols:
|
|
369
|
+
item_str = f"{item.module_name}\n"
|
|
323
370
|
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
current_chunk.append(item_str)
|
|
330
|
-
current_size += item_size
|
|
371
|
+
if len(current_chunk) > self.args.filter_batch_size:
|
|
372
|
+
yield "".join(current_chunk)
|
|
373
|
+
current_chunk = [item_str]
|
|
374
|
+
else:
|
|
375
|
+
current_chunk.append(item_str)
|
|
331
376
|
|
|
332
|
-
|
|
333
|
-
|
|
377
|
+
if current_chunk:
|
|
378
|
+
yield "".join(current_chunk)
|
|
379
|
+
else:
|
|
380
|
+
for item in index_items:
|
|
381
|
+
symbols_str = item.symbols
|
|
382
|
+
if includes:
|
|
383
|
+
symbol_info = extract_symbols(symbols_str)
|
|
384
|
+
symbols_str = symbols_info_to_str(symbol_info, includes)
|
|
385
|
+
|
|
386
|
+
item_str = f"##{item.module_name}\n{symbols_str}\n\n"
|
|
387
|
+
|
|
388
|
+
if skip_symbols:
|
|
389
|
+
item_str = f"{item.module_name}\n"
|
|
390
|
+
item_size = len(item_str)
|
|
391
|
+
|
|
392
|
+
if current_size + item_size > max_chunk_size:
|
|
393
|
+
yield "".join(current_chunk)
|
|
394
|
+
current_chunk = [item_str]
|
|
395
|
+
current_size = item_size
|
|
396
|
+
else:
|
|
397
|
+
current_chunk.append(item_str)
|
|
398
|
+
current_size += item_size
|
|
399
|
+
|
|
400
|
+
if current_chunk:
|
|
401
|
+
yield "".join(current_chunk)
|
|
334
402
|
|
|
335
403
|
def get_related_files(self, file_paths: List[str]):
|
|
336
404
|
all_results = []
|
|
337
405
|
lock = threading.Lock()
|
|
338
406
|
|
|
339
407
|
def process_chunk(chunk, chunk_count):
|
|
340
|
-
result = self._get_related_files(chunk, "\n".join(file_paths))
|
|
408
|
+
result = self._get_related_files.with_llm(self.llm).with_return_type(FileList).run(chunk, "\n".join(file_paths))
|
|
341
409
|
if result is not None:
|
|
342
410
|
with lock:
|
|
343
411
|
all_results.extend(result.file_list)
|
|
@@ -351,7 +419,7 @@ class IndexManager:
|
|
|
351
419
|
futures = []
|
|
352
420
|
chunk_count = 0
|
|
353
421
|
for chunk in self._get_meta_str(
|
|
354
|
-
max_chunk_size=
|
|
422
|
+
max_chunk_size= -1
|
|
355
423
|
):
|
|
356
424
|
future = executor.submit(process_chunk, chunk, chunk_count)
|
|
357
425
|
futures.append(future)
|
|
@@ -371,7 +439,7 @@ class IndexManager:
|
|
|
371
439
|
|
|
372
440
|
def process_chunk(chunk):
|
|
373
441
|
nonlocal completed_threads
|
|
374
|
-
result = self._get_target_files_by_query(chunk, query)
|
|
442
|
+
result = self._get_target_files_by_query.with_llm(self.llm).with_return_type(FileList).run(chunk, query)
|
|
375
443
|
if result is not None:
|
|
376
444
|
with lock:
|
|
377
445
|
all_results.extend(result.file_list)
|
|
@@ -401,25 +469,21 @@ class IndexManager:
|
|
|
401
469
|
def w():
|
|
402
470
|
return self._get_meta_str(
|
|
403
471
|
skip_symbols=False,
|
|
404
|
-
max_chunk_size=
|
|
472
|
+
max_chunk_size= -1,
|
|
405
473
|
includes=[SymbolType.USAGE],
|
|
406
474
|
)
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
temp_result, total_threads, completed_threads = self._query_index_with_thread(query, w)
|
|
410
|
-
logger.info(f"Used {self.args.index_filter_workers} workers in parallel. Completed {completed_threads}/{total_threads} threads.")
|
|
475
|
+
|
|
476
|
+
temp_result, total_threads, completed_threads = self._query_index_with_thread(query, w)
|
|
411
477
|
all_results.extend(temp_result)
|
|
412
478
|
|
|
413
479
|
if self.args.index_filter_level >= 1:
|
|
414
|
-
logger.info("Find the related files by query according to the symbols...")
|
|
415
480
|
|
|
416
481
|
def w():
|
|
417
482
|
return self._get_meta_str(
|
|
418
|
-
skip_symbols=False, max_chunk_size=
|
|
483
|
+
skip_symbols=False, max_chunk_size= -1
|
|
419
484
|
)
|
|
420
485
|
|
|
421
|
-
temp_result, total_threads, completed_threads = self._query_index_with_thread(query, w)
|
|
422
|
-
logger.info(f"Used {self.args.index_filter_workers} workers in parallel. Completed {completed_threads}/{total_threads} threads.")
|
|
486
|
+
temp_result, total_threads, completed_threads = self._query_index_with_thread(query, w)
|
|
423
487
|
all_results.extend(temp_result)
|
|
424
488
|
|
|
425
489
|
all_results = list({file.file_path: file for file in all_results}.values())
|
|
@@ -427,8 +491,8 @@ class IndexManager:
|
|
|
427
491
|
limited_results = all_results[: self.args.index_filter_file_num]
|
|
428
492
|
return FileList(file_list=limited_results)
|
|
429
493
|
|
|
430
|
-
@byzerllm.prompt(
|
|
431
|
-
def _get_target_files_by_query(self, indices: str, query: str) ->
|
|
494
|
+
@byzerllm.prompt()
|
|
495
|
+
def _get_target_files_by_query(self, indices: str, query: str) -> str:
|
|
432
496
|
"""
|
|
433
497
|
下面是已知文件以及对应的符号信息:
|
|
434
498
|
|
|
@@ -438,7 +502,28 @@ class IndexManager:
|
|
|
438
502
|
|
|
439
503
|
{{ query }}
|
|
440
504
|
|
|
441
|
-
|
|
505
|
+
现在,请根据用户的问题以及前面的文件和符号信息,寻找相关文件路径。返回结果按如下格式:
|
|
506
|
+
|
|
507
|
+
```json
|
|
508
|
+
{
|
|
509
|
+
"file_list": [
|
|
510
|
+
{
|
|
511
|
+
"file_path": "path/to/file.py",
|
|
512
|
+
"reason": "The reason why the file is the target file"
|
|
513
|
+
},
|
|
514
|
+
{
|
|
515
|
+
"file_path": "path/to/file.py",
|
|
516
|
+
"reason": "The reason why the file is the target file"
|
|
517
|
+
}
|
|
518
|
+
]
|
|
519
|
+
}
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
如果没有找到,返回如下 json 即可:
|
|
523
|
+
|
|
524
|
+
```json
|
|
525
|
+
{"file_list": []}
|
|
526
|
+
```
|
|
442
527
|
|
|
443
528
|
请严格遵循以下步骤:
|
|
444
529
|
|
|
@@ -459,15 +544,9 @@ class IndexManager:
|
|
|
459
544
|
- 如果找到了相关文件,也包括与之直接相关的依赖文件。
|
|
460
545
|
|
|
461
546
|
5. 考虑文件用途:
|
|
462
|
-
- 使用每个文件的 "用途" 信息来判断其与查询的相关性。
|
|
547
|
+
- 使用每个文件的 "用途" 信息来判断其与查询的相关性。
|
|
463
548
|
|
|
464
|
-
6.
|
|
465
|
-
- 对于每个相关文件,创建一个TargetFile对象。
|
|
466
|
-
- 在reason字段中,详细说明为什么这个文件与查询相关。
|
|
467
|
-
|
|
468
|
-
7. 返回结果:
|
|
469
|
-
- 将所有找到的TargetFile对象放入FileList中返回。
|
|
470
|
-
- 如果没有找到相关文件,返回一个空的FileList。
|
|
549
|
+
6. 按格式要求返回结果
|
|
471
550
|
|
|
472
551
|
请确保结果的准确性和完整性,包括所有可能相关的文件。
|
|
473
552
|
"""
|
|
@@ -476,6 +555,16 @@ class IndexManager:
|
|
|
476
555
|
def build_index_and_filter_files(
|
|
477
556
|
llm, args: AutoCoderArgs, sources: List[SourceCode]
|
|
478
557
|
) -> str:
|
|
558
|
+
# Initialize timing and statistics
|
|
559
|
+
total_start_time = time.monotonic()
|
|
560
|
+
stats = {
|
|
561
|
+
"total_files": len(sources),
|
|
562
|
+
"indexed_files": 0,
|
|
563
|
+
"level1_filtered": 0,
|
|
564
|
+
"level2_filtered": 0,
|
|
565
|
+
"final_files": 0,
|
|
566
|
+
"timings": {}
|
|
567
|
+
}
|
|
479
568
|
|
|
480
569
|
def get_file_path(file_path):
|
|
481
570
|
if file_path.startswith("##"):
|
|
@@ -484,46 +573,97 @@ def build_index_and_filter_files(
|
|
|
484
573
|
|
|
485
574
|
final_files: Dict[str, TargetFile] = {}
|
|
486
575
|
|
|
487
|
-
|
|
576
|
+
# Phase 1: Process REST/RAG/Search sources
|
|
577
|
+
logger.info("Phase 1: Processing REST/RAG/Search sources...")
|
|
578
|
+
phase_start = time.monotonic()
|
|
488
579
|
for source in sources:
|
|
489
580
|
if source.tag in ["REST", "RAG", "SEARCH"]:
|
|
490
581
|
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
491
582
|
file_path=source.module_name, reason="Rest/Rag/Search"
|
|
492
583
|
)
|
|
584
|
+
stats["timings"]["process_tagged_sources"] = time.monotonic() - phase_start
|
|
493
585
|
|
|
494
586
|
if not args.skip_build_index and llm:
|
|
495
|
-
|
|
587
|
+
# Phase 2: Build index
|
|
588
|
+
logger.info("Phase 2: Building index for all files...")
|
|
589
|
+
phase_start = time.monotonic()
|
|
496
590
|
index_manager = IndexManager(llm=llm, sources=sources, args=args)
|
|
497
|
-
index_manager.build_index()
|
|
591
|
+
index_data = index_manager.build_index()
|
|
592
|
+
stats["indexed_files"] = len(index_data) if index_data else 0
|
|
593
|
+
stats["timings"]["build_index"] = time.monotonic() - phase_start
|
|
498
594
|
|
|
499
595
|
if not args.skip_filter_index:
|
|
500
|
-
|
|
501
|
-
|
|
596
|
+
# Phase 3: Level 1 filtering - Query-based
|
|
597
|
+
logger.info("Phase 3: Performing Level 1 filtering (query-based)...")
|
|
598
|
+
phase_start = time.monotonic()
|
|
502
599
|
target_files = index_manager.get_target_files_by_query(args.query)
|
|
503
600
|
|
|
504
601
|
if target_files:
|
|
505
602
|
for file in target_files.file_list:
|
|
506
603
|
file_path = file.file_path.strip()
|
|
507
604
|
final_files[get_file_path(file_path)] = file
|
|
605
|
+
stats["level1_filtered"] = len(target_files.file_list)
|
|
606
|
+
stats["timings"]["level1_filter"] = time.monotonic() - phase_start
|
|
508
607
|
|
|
608
|
+
# Phase 4: Level 2 filtering - Related files
|
|
509
609
|
if target_files is not None and args.index_filter_level >= 2:
|
|
510
|
-
|
|
610
|
+
logger.info("Phase 4: Performing Level 2 filtering (related files)...")
|
|
611
|
+
phase_start = time.monotonic()
|
|
612
|
+
related_files = index_manager.get_related_files(
|
|
511
613
|
[file.file_path for file in target_files.file_list]
|
|
512
614
|
)
|
|
513
|
-
if
|
|
514
|
-
for file in
|
|
615
|
+
if related_files is not None:
|
|
616
|
+
for file in related_files.file_list:
|
|
515
617
|
file_path = file.file_path.strip()
|
|
516
618
|
final_files[get_file_path(file_path)] = file
|
|
619
|
+
stats["level2_filtered"] = len(related_files.file_list)
|
|
620
|
+
stats["timings"]["level2_filter"] = time.monotonic() - phase_start
|
|
517
621
|
|
|
518
622
|
if not final_files:
|
|
519
|
-
logger.warning("
|
|
623
|
+
logger.warning("No related files found, using all files")
|
|
520
624
|
for source in sources:
|
|
521
625
|
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
522
626
|
file_path=source.module_name,
|
|
523
627
|
reason="No related files found, use all files",
|
|
524
628
|
)
|
|
525
629
|
|
|
526
|
-
|
|
630
|
+
# Phase 5: Relevance verification
|
|
631
|
+
logger.info("Phase 5: Performing relevance verification...")
|
|
632
|
+
phase_start = time.monotonic()
|
|
633
|
+
verified_files = {}
|
|
634
|
+
temp_files = list(final_files.values())
|
|
635
|
+
|
|
636
|
+
def verify_single_file(file: TargetFile):
|
|
637
|
+
for source in sources:
|
|
638
|
+
if source.module_name == file.file_path:
|
|
639
|
+
file_content = source.source_code
|
|
640
|
+
try:
|
|
641
|
+
result = index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
|
|
642
|
+
file_content=file_content,
|
|
643
|
+
query=args.query
|
|
644
|
+
)
|
|
645
|
+
if result.relevant_score >= args.verify_file_relevance_score:
|
|
646
|
+
return file.file_path, TargetFile(
|
|
647
|
+
file_path=file.file_path,
|
|
648
|
+
reason=f"Score:{result.relevant_score}, {result.reason}"
|
|
649
|
+
)
|
|
650
|
+
except Exception as e:
|
|
651
|
+
logger.warning(f"Failed to verify file {file.file_path}: {str(e)}")
|
|
652
|
+
return None
|
|
653
|
+
|
|
654
|
+
with ThreadPoolExecutor(max_workers=args.index_filter_workers) as executor:
|
|
655
|
+
futures = [executor.submit(verify_single_file, file) for file in temp_files]
|
|
656
|
+
for future in as_completed(futures):
|
|
657
|
+
result = future.result()
|
|
658
|
+
if result:
|
|
659
|
+
file_path, target_file = result
|
|
660
|
+
verified_files[file_path] = target_file
|
|
661
|
+
time.sleep(args.anti_quota_limit)
|
|
662
|
+
|
|
663
|
+
stats["verified_files"] = len(verified_files)
|
|
664
|
+
stats["timings"]["relevance_verification"] = time.monotonic() - phase_start
|
|
665
|
+
|
|
666
|
+
final_files = verified_files if verified_files else final_files
|
|
527
667
|
|
|
528
668
|
def display_table_and_get_selections(data):
|
|
529
669
|
from prompt_toolkit.shortcuts import checkboxlist_dialog
|
|
@@ -574,12 +714,15 @@ def build_index_and_filter_files(
|
|
|
574
714
|
|
|
575
715
|
console.print(panel)
|
|
576
716
|
|
|
717
|
+
# Phase 6: File selection and limitation
|
|
718
|
+
logger.info("Phase 6: Processing file selection and limits...")
|
|
719
|
+
phase_start = time.monotonic()
|
|
720
|
+
|
|
577
721
|
if args.index_filter_file_num > 0:
|
|
578
|
-
logger.info(f"
|
|
722
|
+
logger.info(f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
|
|
579
723
|
|
|
580
724
|
if args.skip_confirm:
|
|
581
725
|
final_filenames = [file.file_path for file in final_files.values()]
|
|
582
|
-
# Limit the number of files based on index_filter_file_num
|
|
583
726
|
if args.index_filter_file_num > 0:
|
|
584
727
|
final_filenames = final_filenames[: args.index_filter_file_num]
|
|
585
728
|
else:
|
|
@@ -593,10 +736,16 @@ def build_index_and_filter_files(
|
|
|
593
736
|
final_filenames = []
|
|
594
737
|
else:
|
|
595
738
|
final_filenames = display_table_and_get_selections(target_files_data)
|
|
596
|
-
|
|
739
|
+
|
|
597
740
|
if args.index_filter_file_num > 0:
|
|
598
741
|
final_filenames = final_filenames[: args.index_filter_file_num]
|
|
742
|
+
|
|
743
|
+
stats["timings"]["file_selection"] = time.monotonic() - phase_start
|
|
599
744
|
|
|
745
|
+
# Phase 7: Display results and prepare output
|
|
746
|
+
logger.info("Phase 7: Preparing final output...")
|
|
747
|
+
phase_start = time.monotonic()
|
|
748
|
+
|
|
600
749
|
try:
|
|
601
750
|
print_selected(
|
|
602
751
|
[
|
|
@@ -607,9 +756,9 @@ def build_index_and_filter_files(
|
|
|
607
756
|
)
|
|
608
757
|
except Exception as e:
|
|
609
758
|
logger.warning(
|
|
610
|
-
|
|
759
|
+
"Failed to display selected files in terminal mode. Falling back to simple print."
|
|
611
760
|
)
|
|
612
|
-
print(
|
|
761
|
+
print("Target Files Selected:")
|
|
613
762
|
for file in final_filenames:
|
|
614
763
|
print(f"{file} - {final_files[file].reason}")
|
|
615
764
|
|
|
@@ -623,4 +772,26 @@ def build_index_and_filter_files(
|
|
|
623
772
|
depulicated_sources.add(file.module_name)
|
|
624
773
|
source_code += f"##File: {file.module_name}\n"
|
|
625
774
|
source_code += f"{file.source_code}\n\n"
|
|
775
|
+
|
|
776
|
+
stats["final_files"] = len(depulicated_sources)
|
|
777
|
+
stats["timings"]["prepare_output"] = time.monotonic() - phase_start
|
|
778
|
+
|
|
779
|
+
# Calculate total time and print summary
|
|
780
|
+
total_time = time.monotonic() - total_start_time
|
|
781
|
+
stats["timings"]["total"] = total_time
|
|
782
|
+
|
|
783
|
+
# Print final statistics
|
|
784
|
+
logger.info("\n=== Build Index and Filter Files Summary ===")
|
|
785
|
+
logger.info(f"Total files in project: {stats['total_files']}")
|
|
786
|
+
logger.info(f"Files indexed: {stats['indexed_files']}")
|
|
787
|
+
logger.info(f"Files after Level 1 filter: {stats['level1_filtered']}")
|
|
788
|
+
logger.info(f"Files after Level 2 filter: {stats['level2_filtered']}")
|
|
789
|
+
logger.info(f"Files after relevance verification: {stats.get('verified_files', 0)}")
|
|
790
|
+
logger.info(f"Final files selected: {stats['final_files']}")
|
|
791
|
+
logger.info("\nTime breakdown:")
|
|
792
|
+
for phase, duration in stats["timings"].items():
|
|
793
|
+
logger.info(f" - {phase}: {duration:.2f}s")
|
|
794
|
+
logger.info(f"Total execution time: {total_time:.2f}s")
|
|
795
|
+
logger.info("==========================================\n")
|
|
796
|
+
|
|
626
797
|
return source_code
|
autocoder/lang.py
CHANGED
|
@@ -53,6 +53,7 @@ lang_desc = {
|
|
|
53
53
|
"index_model_max_input_length":"The maximum length of the input to the index model. Default is 0, which means using the value of model_max_input_length",
|
|
54
54
|
"index_model_anti_quota_limit":"Time to wait in seconds after each API request for the index model. Default is 0, which means using the value of anti_quota_limit",
|
|
55
55
|
"doc_build_parse_required_exts":"The required file extensions for doc build. Default is empty string",
|
|
56
|
+
"verify_file_relevance_score": "The relevance score threshold for file verification. Default is 6",
|
|
56
57
|
"init_desc": "Initialize a new auto-coder project directory",
|
|
57
58
|
"init_dir": "The directory to initialize the auto-coder project in",
|
|
58
59
|
"index_build_workers": "Number of workers to use for building index",
|
|
@@ -69,7 +70,8 @@ lang_desc = {
|
|
|
69
70
|
"next_desc": "create a new action file based one the last action file",
|
|
70
71
|
"base_dir": "Alternative path for /~/.auto-coder to store or retrieve text embeddings. Using /~/.auto-coder/ if not specified",
|
|
71
72
|
"editblock_similarity": "The similarity threshold of TextSimilarity when merging edit blocks. Default is 0.9",
|
|
72
|
-
"include_project_structure": "Whether to include the project directory structure in the code generation prompt. Default is False"
|
|
73
|
+
"include_project_structure": "Whether to include the project directory structure in the code generation prompt. Default is False",
|
|
74
|
+
"filter_batch_size": "The batch size used when filtering files. Default is 5"
|
|
73
75
|
},
|
|
74
76
|
"zh": {
|
|
75
77
|
"request_id": "Request ID",
|
|
@@ -123,8 +125,9 @@ lang_desc = {
|
|
|
123
125
|
"enable_multi_round_generate":"是否开启多轮对话生成。默认为False",
|
|
124
126
|
"index_model_max_length":"索引模型生成代码的最大长度。默认为0,表示使用model_max_length的值",
|
|
125
127
|
"index_model_max_input_length":"索引模型的最大输入长度。默认为0,表示使用model_max_input_length的值",
|
|
126
|
-
"index_model_anti_quota_limit":"每次索引模型API请求后等待的秒数。默认为0,表示使用anti_quota_limit的值",
|
|
128
|
+
"index_model_anti_quota_limit": "每次索引模型API请求后等待的秒数。默认为0,表示使用anti_quota_limit的值",
|
|
127
129
|
"doc_build_parse_required_exts":"doc构建所需的文件扩展名。默认为空字符串",
|
|
130
|
+
"verify_file_relevance_score": "验证文件相关性的分数阈值。默认为6",
|
|
128
131
|
"init_desc": "初始化一个新的auto-coder项目目录",
|
|
129
132
|
"init_dir": "初始化auto-coder项目的目录",
|
|
130
133
|
"index_build_workers": "用于构建索引的工作线程数",
|
|
@@ -141,6 +144,7 @@ lang_desc = {
|
|
|
141
144
|
"next_from_yaml": "创建新的action文件时要从中复制内容的YAML文件。支持前缀匹配,例如,指定'001'将匹配'001_abc.yml'。",
|
|
142
145
|
"base_dir": "用于替代byzerllm中/~/.auto-coder的路径存放或读取向量化后的文本。不指定则使用默认路径",
|
|
143
146
|
"editblock_similarity": "合并编辑块时TextSimilarity的相似度阈值。默认为0.9",
|
|
144
|
-
"include_project_structure": "在生成代码的提示中是否包含项目目录结构。默认为False"
|
|
147
|
+
"include_project_structure": "在生成代码的提示中是否包含项目目录结构。默认为False",
|
|
148
|
+
"filter_batch_size": "文件过滤时使用的批处理大小。默认为5"
|
|
145
149
|
}
|
|
146
150
|
}
|
autocoder/pyproject/__init__.py
CHANGED
|
@@ -163,8 +163,7 @@ class PyProject:
|
|
|
163
163
|
|
|
164
164
|
def should_exclude(self, file_path):
|
|
165
165
|
for pattern in self.exclude_patterns:
|
|
166
|
-
if pattern.search(file_path):
|
|
167
|
-
logger.info(f"Excluding file: {file_path}")
|
|
166
|
+
if pattern.search(file_path):
|
|
168
167
|
return True
|
|
169
168
|
return False
|
|
170
169
|
|
|
@@ -104,8 +104,7 @@ class SuffixProject:
|
|
|
104
104
|
|
|
105
105
|
def should_exclude(self, file_path):
|
|
106
106
|
for pattern in self.exclude_patterns:
|
|
107
|
-
if pattern.search(file_path):
|
|
108
|
-
logger.info(f"Excluding file: {file_path}")
|
|
107
|
+
if pattern.search(file_path):
|
|
109
108
|
return True
|
|
110
109
|
return False
|
|
111
110
|
|
autocoder/tsproject/__init__.py
CHANGED
|
@@ -15,12 +15,15 @@ class CommunicateEventType(Enum):
|
|
|
15
15
|
CODE_END = "code_end"
|
|
16
16
|
CODE_HUMAN_AS_MODEL = "code_human_as_model"
|
|
17
17
|
ASK_HUMAN = "ask_human"
|
|
18
|
+
CODE_ERROR = "code_error"
|
|
18
19
|
|
|
20
|
+
TIMEOUT = 600*3
|
|
19
21
|
@dataclass(eq=True, frozen=True)
|
|
20
22
|
class CommunicateEvent:
|
|
21
23
|
event_type: str
|
|
22
24
|
data: str
|
|
23
25
|
|
|
26
|
+
|
|
24
27
|
class Singleton(type):
|
|
25
28
|
_instances = {}
|
|
26
29
|
_lock = threading.Lock()
|
|
@@ -34,11 +37,31 @@ class Singleton(type):
|
|
|
34
37
|
|
|
35
38
|
class QueueCommunicate(metaclass=Singleton):
|
|
36
39
|
def __init__(self):
|
|
40
|
+
# Structure:
|
|
41
|
+
# {
|
|
42
|
+
# "request_id_1": Queue(),
|
|
43
|
+
# "request_id_2": Queue(),
|
|
44
|
+
# ...
|
|
45
|
+
# }
|
|
37
46
|
self.request_queues = {}
|
|
47
|
+
|
|
48
|
+
# Structure:
|
|
49
|
+
# {
|
|
50
|
+
# "request_id_1": {
|
|
51
|
+
# event1: Queue(),
|
|
52
|
+
# event2: Queue(),
|
|
53
|
+
# ...
|
|
54
|
+
# },
|
|
55
|
+
# "request_id_2": {
|
|
56
|
+
# event1: Queue(),
|
|
57
|
+
# event2: Queue(),
|
|
58
|
+
# ...
|
|
59
|
+
# }
|
|
60
|
+
# }
|
|
38
61
|
self.response_queues = {}
|
|
39
62
|
self.lock = threading.Lock()
|
|
40
|
-
self.send_event_executor = ThreadPoolExecutor(max_workers=
|
|
41
|
-
self.consume_event_executor = ThreadPoolExecutor(max_workers=
|
|
63
|
+
self.send_event_executor = ThreadPoolExecutor(max_workers=100)
|
|
64
|
+
self.consume_event_executor = ThreadPoolExecutor(max_workers=100)
|
|
42
65
|
|
|
43
66
|
def shutdown(self):
|
|
44
67
|
self.send_event_executor.shutdown()
|
|
@@ -56,7 +79,7 @@ class QueueCommunicate(metaclass=Singleton):
|
|
|
56
79
|
if request_id in self.response_queues:
|
|
57
80
|
self.response_queues.pop(request_id)
|
|
58
81
|
|
|
59
|
-
def send_event(self, request_id: str, event: Any, timeout: int =
|
|
82
|
+
def send_event(self, request_id: str, event: Any, timeout: int = TIMEOUT) -> Any:
|
|
60
83
|
if not request_id:
|
|
61
84
|
return None
|
|
62
85
|
|
|
@@ -64,8 +87,8 @@ class QueueCommunicate(metaclass=Singleton):
|
|
|
64
87
|
self._send_event_task, request_id, event
|
|
65
88
|
)
|
|
66
89
|
return future.result(timeout=timeout)
|
|
67
|
-
|
|
68
|
-
def send_event_no_wait(self, request_id: str, event: Any, timeout: int =
|
|
90
|
+
|
|
91
|
+
def send_event_no_wait(self, request_id: str, event: Any, timeout: int = TIMEOUT) -> Any:
|
|
69
92
|
if not request_id:
|
|
70
93
|
return None
|
|
71
94
|
|
|
@@ -74,7 +97,7 @@ class QueueCommunicate(metaclass=Singleton):
|
|
|
74
97
|
)
|
|
75
98
|
return future
|
|
76
99
|
|
|
77
|
-
def _send_event_task(self, request_id: str, event: Any, timeout: int =
|
|
100
|
+
def _send_event_task(self, request_id: str, event: Any, timeout: int = TIMEOUT) -> Any:
|
|
78
101
|
with self.lock:
|
|
79
102
|
if request_id not in self.request_queues:
|
|
80
103
|
self.request_queues[request_id] = Queue()
|
|
@@ -87,7 +110,7 @@ class QueueCommunicate(metaclass=Singleton):
|
|
|
87
110
|
response_queues[event] = response_queue
|
|
88
111
|
|
|
89
112
|
request_queue.put(event)
|
|
90
|
-
response = response_queue.get(timeout=timeout)
|
|
113
|
+
response = response_queue.get(timeout=timeout)
|
|
91
114
|
return response
|
|
92
115
|
|
|
93
116
|
def consume_events(self, request_id: str, event_handler: Callable[[Any], Any]):
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.194"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|