auto-coder 0.1.232__py3-none-any.whl → 0.1.233__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.232
3
+ Version: 0.1.233
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -1,17 +1,17 @@
1
1
  autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- autocoder/auto_coder.py,sha256=REnZJUy4Dx9b7Z8gM9D4SWMkbHXl0JjF-DFiod4aB-A,58357
2
+ autocoder/auto_coder.py,sha256=r2uHo-xPDbel6OhZwGzl8Iz1GP4d9KUWcDReEol9LOQ,59541
3
3
  autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,3229
4
4
  autocoder/auto_coder_rag.py,sha256=illKgzP2bv-Tq50ujsofJnOHdI4pzr0ALtfR8NHHWdQ,22351
5
5
  autocoder/auto_coder_rag_client_mcp.py,sha256=WV7j5JUiQge0x4-B7Hp5-pSAFXLbvLpzQMcCovbauIM,6276
6
6
  autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
7
7
  autocoder/auto_coder_server.py,sha256=XU9b4SBH7zjPPXaTWWHV4_zJm-XYa6njuLQaplYJH_c,20290
8
8
  autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
9
- autocoder/chat_auto_coder.py,sha256=KtDAwIiBB1b2jBSY8BCoSj88iRSwtRACkzME9h91ido,101601
9
+ autocoder/chat_auto_coder.py,sha256=PzrbhpwTgJEMuPOT7vBd4uPi58mwqLc59l2fVV6rJc8,102049
10
10
  autocoder/chat_auto_coder_lang.py,sha256=YJsFi8an0Kjbo9X7xKZfpdbHS3rbhrvChZNjWqEQ5Sw,11032
11
11
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
12
12
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
13
13
  autocoder/models.py,sha256=FlBrF6HhGao_RiCSgYhCmP7vs0KlG4hI_BI6dyZiL9s,5292
14
- autocoder/version.py,sha256=pvV6K1v2HfuQywwyaRYD8uMohj-o0EfkRauHjMIrYtk,24
14
+ autocoder/version.py,sha256=VphyhuzLuUXUhi7WwvwaGVGg2OSz77iY97Prah1F5g8,24
15
15
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
17
17
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -24,7 +24,7 @@ autocoder/agent/project_reader.py,sha256=tWLaPoLw1gI6kO_NzivQj28KbobU2ceOLuppHMb
24
24
  autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
25
  autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
26
26
  autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
27
- autocoder/common/__init__.py,sha256=-ATCQp7EhFepyUsupHEZpUN3ouQ-OcM3PAoCnAXezhg,11652
27
+ autocoder/common/__init__.py,sha256=JDIZ_1JP7Nm74OL_aCFwvhiwiynukaE5oPCfh5GPQWU,11695
28
28
  autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
29
29
  autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
30
30
  autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
@@ -69,16 +69,20 @@ autocoder/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  autocoder/db/store.py,sha256=tFT66bP2ZKIqZip-uhLkHRSLaaOAUUDZfozJwcqix3c,1908
70
70
  autocoder/dispacher/__init__.py,sha256=YoA64dIxnx4jcE1pwSfg81sjkQtjDkhddkfac1-cMWo,1230
71
71
  autocoder/dispacher/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
- autocoder/dispacher/actions/action.py,sha256=JqghsBpgrM-5J79UYjGIVivNgz6YnMtibwBPP0Ww9xY,19299
73
- autocoder/dispacher/actions/copilot.py,sha256=BdD-L0LFb5YUH2C87IVCm446SJLyOltaaG9shmsQ25Q,13072
72
+ autocoder/dispacher/actions/action.py,sha256=NjJGLek8H0FlIOreBnl2KEC-jJ5Jq-V8D1RuI6ifUjc,19299
73
+ autocoder/dispacher/actions/copilot.py,sha256=iMh4ckj9hO5Q-iemF3CStXd7DatWai7Eci5zOlKxK9c,13072
74
74
  autocoder/dispacher/actions/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
- autocoder/dispacher/actions/plugins/action_regex_project.py,sha256=oldAZeGaDcGQBiTdCigO6yiV_GJQTGRLy_B_hVHAQZw,5325
75
+ autocoder/dispacher/actions/plugins/action_regex_project.py,sha256=-ly-NRgQ8LfDQDoH0QmD_LP-G932Kt08WXy9oIvLy10,5325
76
76
  autocoder/dispacher/actions/plugins/action_translate.py,sha256=nVAtRSQpdGNmZxg1R_9zXG3AuTv3CHf2v7ODgj8u65c,7727
77
77
  autocoder/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
78
+ autocoder/index/entry.py,sha256=1T41clV3GXwiRbowubQ1iZM5k3_2ECS-DtcnbA9QxAk,10081
78
79
  autocoder/index/for_command.py,sha256=LGnz-OWogT8rd24m4Zcan7doLaijxqorAuiMk7WuRq0,3125
79
- autocoder/index/index.py,sha256=1HLwK-ylpibYjHgPommdaL1bRwGP1QGWONaNYExzqRc,34828
80
+ autocoder/index/index.py,sha256=lwaobSHvOnzhTMf8SQXzw3nIJQUS4lyo6nLdtv0Ebc0,19223
80
81
  autocoder/index/symbols_utils.py,sha256=CjcjUVajmJZB75Ty3a7kMv1BZphrm-tIBAdOJv6uo-0,2037
81
- autocoder/index/types.py,sha256=tYoFicbS6k1Dx4EoMpuNq71-4pF6hhEbtej0VYCVlSo,524
82
+ autocoder/index/types.py,sha256=a2s_KV5FJlq7jqA2ELSo9E1sjuLwDB-JJYMhSpzBAhU,596
83
+ autocoder/index/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
+ autocoder/index/filter/normal_filter.py,sha256=pE5QwcBq6NYHFtYhwhfMJmYQYJwErNs-Q7iZmVBAh-k,7964
85
+ autocoder/index/filter/quick_filter.py,sha256=BxOiZOlK2v6EnX0yV28R3ielXboTmrMvVwielCrqKpE,3678
82
86
  autocoder/pyproject/__init__.py,sha256=dQ2_7YZ7guybT9BhfxSGn43eLQJGQN2zgeKa6--JlaQ,14403
83
87
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
84
88
  autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
@@ -133,9 +137,9 @@ autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
133
137
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
134
138
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=6D_SIa5hHSwIHC1poO_ztK7IVugAqNHu-jQySd7EnfQ,4181
135
139
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
136
- auto_coder-0.1.232.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
137
- auto_coder-0.1.232.dist-info/METADATA,sha256=5XhWylx2N9N4DNDoJpF3GUWHJzpNVg2wwgBZ5m_rBDU,2641
138
- auto_coder-0.1.232.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
139
- auto_coder-0.1.232.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
140
- auto_coder-0.1.232.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
141
- auto_coder-0.1.232.dist-info/RECORD,,
140
+ auto_coder-0.1.233.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
141
+ auto_coder-0.1.233.dist-info/METADATA,sha256=w___EO9VOCh1T58HsPM3Qi5OYm2ktUVVD6I5KucBO3w,2641
142
+ auto_coder-0.1.233.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
143
+ auto_coder-0.1.233.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
144
+ auto_coder-0.1.233.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
145
+ auto_coder-0.1.233.dist-info/RECORD,,
autocoder/auto_coder.py CHANGED
@@ -42,6 +42,7 @@ from rich.live import Live
42
42
  from autocoder.auto_coder_lang import get_message
43
43
  from autocoder.common.memory_manager import save_to_memory_file
44
44
  from autocoder import models as models_module
45
+ from autocoder.common.utils_code_auto_generate import stream_chat_with_continue
45
46
  from autocoder.utils.auto_coder_utils.chat_stream_out import stream_out
46
47
 
47
48
  console = Console()
@@ -443,7 +444,24 @@ def main(input_args: Optional[List[str]] = None):
443
444
  "saas.is_reasoning": model_info["is_reasoning"]
444
445
  }
445
446
  )
446
- llm.setup_sub_client("inference_model", inference_model)
447
+ llm.setup_sub_client("inference_model", inference_model)
448
+
449
+ if args.index_filter_model:
450
+ model_info = models_module.get_model_by_name(args.index_filter_model)
451
+ model_name = args.index_filter_model
452
+ index_filter_model = byzerllm.SimpleByzerLLM(default_model_name=model_name)
453
+ index_filter_model.deploy(
454
+ model_path="",
455
+ pretrained_model_type=model_info["model_type"],
456
+ udf_name=model_name,
457
+ infer_params={
458
+ "saas.base_url": model_info["base_url"],
459
+ "saas.api_key": model_info["api_key"],
460
+ "saas.model": model_info["model_name"],
461
+ "saas.is_reasoning": model_info["is_reasoning"]
462
+ }
463
+ )
464
+ llm.setup_sub_client("index_filter_model", index_filter_model)
447
465
 
448
466
 
449
467
  if args.product_mode == "pro":
@@ -482,7 +500,12 @@ def main(input_args: Optional[List[str]] = None):
482
500
  if args.inference_model:
483
501
  inference_model = byzerllm.ByzerLLM()
484
502
  inference_model.setup_default_model_name(args.inference_model)
485
- llm.setup_sub_client("inference_model", inference_model)
503
+ llm.setup_sub_client("inference_model", inference_model)
504
+
505
+ if args.index_filter_model:
506
+ index_filter_model = byzerllm.ByzerLLM()
507
+ index_filter_model.setup_default_model_name(args.index_filter_model)
508
+ llm.setup_sub_client("index_filter_model", index_filter_model)
486
509
 
487
510
 
488
511
  if args.human_as_model:
@@ -1106,7 +1129,8 @@ def main(input_args: Optional[List[str]] = None):
1106
1129
  {"role": "assistant", "content": "read"})
1107
1130
  source_count += 1
1108
1131
 
1109
- from autocoder.index.index import IndexManager, build_index_and_filter_files
1132
+ from autocoder.index.index import IndexManager
1133
+ from autocoder.index.entry import build_index_and_filter_files
1110
1134
  from autocoder.pyproject import PyProject
1111
1135
  from autocoder.tsproject import TSProject
1112
1136
  from autocoder.suffixproject import SuffixProject
@@ -1272,8 +1296,7 @@ def main(input_args: Optional[List[str]] = None):
1272
1296
  )
1273
1297
  )
1274
1298
  v = [[response.result,None]]
1275
- else:
1276
- from autocoder.common.utils_code_auto_generate import stream_chat_with_continue
1299
+ else:
1277
1300
  v = stream_chat_with_continue(
1278
1301
  llm=chat_llm,
1279
1302
  conversations=loaded_conversations,
@@ -52,6 +52,7 @@ from autocoder.common.memory_manager import get_global_memory_file_paths
52
52
  from autocoder import models
53
53
  import shlex
54
54
  from autocoder.utils.llms import get_single_llm
55
+ import pkg_resources
55
56
 
56
57
  class SymbolItem(BaseModel):
57
58
  symbol_name: str
@@ -2478,6 +2479,17 @@ def lib_command(args: List[str]):
2478
2479
 
2479
2480
 
2480
2481
  def main():
2482
+ from autocoder.rag.variable_holder import VariableHolder
2483
+ from tokenizers import Tokenizer
2484
+ try:
2485
+ tokenizer_path = pkg_resources.resource_filename(
2486
+ "autocoder", "data/tokenizer.json"
2487
+ )
2488
+ VariableHolder.TOKENIZER_PATH = tokenizer_path
2489
+ VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(tokenizer_path)
2490
+ except FileNotFoundError:
2491
+ tokenizer_path = None
2492
+
2481
2493
  ARGS = parse_arguments()
2482
2494
 
2483
2495
  if ARGS.lite:
@@ -259,6 +259,7 @@ class AutoCoderArgs(pydantic.BaseModel):
259
259
  skip_filter_index: Optional[bool] = False
260
260
 
261
261
  index_model: Optional[str] = ""
262
+ index_filter_model: Optional[str] = ""
262
263
  index_model_max_length: Optional[int] = 0
263
264
  index_model_max_input_length: Optional[int] = 0
264
265
  index_model_anti_quota_limit: Optional[int] = 0
@@ -9,7 +9,7 @@ from autocoder.common.buildin_tokenizer import BuildinTokenizer
9
9
  from autocoder.pyproject import PyProject, Level1PyProject
10
10
  from autocoder.tsproject import TSProject
11
11
  from autocoder.suffixproject import SuffixProject
12
- from autocoder.index.index import build_index_and_filter_files
12
+ from autocoder.index.entry import build_index_and_filter_files
13
13
  from autocoder.common.code_auto_merge import CodeAutoMerge
14
14
  from autocoder.common.code_auto_merge_diff import CodeAutoMergeDiff
15
15
  from autocoder.common.code_auto_merge_strict_diff import CodeAutoMergeStrictDiff
@@ -9,7 +9,7 @@ from autocoder.common.JupyterClient import JupyterNotebook
9
9
  from autocoder.common.ShellClient import ShellClient
10
10
  from autocoder.suffixproject import SuffixProject
11
11
  from autocoder.common.search import Search, SearchEngine
12
- from autocoder.index.index import build_index_and_filter_files
12
+ from autocoder.index.entry import build_index_and_filter_files
13
13
  from autocoder.common.image_to_page import ImageToPage, ImageToPageDirectly
14
14
  from typing import Optional, Dict, Any, List
15
15
  import byzerllm
@@ -9,7 +9,7 @@ from autocoder.common.code_auto_generate import CodeAutoGenerate
9
9
  from autocoder.common.code_auto_generate_diff import CodeAutoGenerateDiff
10
10
  from autocoder.common.code_auto_generate_strict_diff import CodeAutoGenerateStrictDiff
11
11
  from autocoder.common.code_auto_generate_editblock import CodeAutoGenerateEditBlock
12
- from autocoder.index.index import build_index_and_filter_files
12
+ from autocoder.index.entry import build_index_and_filter_files
13
13
  from autocoder.regexproject import RegexProject
14
14
  from autocoder.utils.conversation_store import store_code_model_conversation
15
15
  from loguru import logger
@@ -0,0 +1,286 @@
1
+ import os
2
+ import json
3
+ import time
4
+ from typing import List, Dict, Any, Optional
5
+ from datetime import datetime
6
+ from autocoder.common import SourceCode, AutoCoderArgs
7
+
8
+ from rich.console import Console
9
+ from rich.table import Table
10
+ from rich.panel import Panel
11
+
12
+ from loguru import logger
13
+ from autocoder.utils.queue_communicate import (
14
+ queue_communicate,
15
+ CommunicateEvent,
16
+ CommunicateEventType,
17
+ )
18
+ from autocoder.index.types import (
19
+ TargetFile
20
+ )
21
+
22
+ from autocoder.index.filter.quick_filter import QuickFilter
23
+ from autocoder.index.filter.normal_filter import NormalFilter
24
+ from autocoder.index.index import IndexManager
25
+
26
+ def build_index_and_filter_files(
27
+ llm, args: AutoCoderArgs, sources: List[SourceCode]
28
+ ) -> str:
29
+ # Initialize timing and statistics
30
+ total_start_time = time.monotonic()
31
+ stats = {
32
+ "total_files": len(sources),
33
+ "indexed_files": 0,
34
+ "level1_filtered": 0,
35
+ "level2_filtered": 0,
36
+ "verified_files": 0,
37
+ "final_files": 0,
38
+ "timings": {
39
+ "process_tagged_sources": 0.0,
40
+ "build_index": 0.0,
41
+ "quick_filter": 0.0,
42
+ "normal_filter": {
43
+ "level1_filter": 0.0,
44
+ "level2_filter": 0.0,
45
+ "relevance_verification": 0.0,
46
+ },
47
+ "file_selection": 0.0,
48
+ "prepare_output": 0.0,
49
+ "total": 0.0
50
+ }
51
+ }
52
+
53
+ def get_file_path(file_path):
54
+ if file_path.startswith("##"):
55
+ return file_path.strip()[2:]
56
+ return file_path
57
+
58
+ final_files: Dict[str, TargetFile] = {}
59
+
60
+ # Phase 1: Process REST/RAG/Search sources
61
+ logger.info("Phase 1: Processing REST/RAG/Search sources...")
62
+ phase_start = time.monotonic()
63
+ for source in sources:
64
+ if source.tag in ["REST", "RAG", "SEARCH"]:
65
+ final_files[get_file_path(source.module_name)] = TargetFile(
66
+ file_path=source.module_name, reason="Rest/Rag/Search"
67
+ )
68
+ phase_end = time.monotonic()
69
+ stats["timings"]["process_tagged_sources"] = phase_end - phase_start
70
+
71
+ if not args.skip_build_index and llm:
72
+ # Phase 2: Build index
73
+ if args.request_id and not args.skip_events:
74
+ queue_communicate.send_event(
75
+ request_id=args.request_id,
76
+ event=CommunicateEvent(
77
+ event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
78
+ data=json.dumps({"total_files": len(sources)})
79
+ )
80
+ )
81
+
82
+ logger.info("Phase 2: Building index for all files...")
83
+ phase_start = time.monotonic()
84
+ index_manager = IndexManager(llm=llm, sources=sources, args=args)
85
+ index_data = index_manager.build_index()
86
+ stats["indexed_files"] = len(index_data) if index_data else 0
87
+ phase_end = time.monotonic()
88
+ stats["timings"]["build_index"] = phase_end - phase_start
89
+
90
+ if args.request_id and not args.skip_events:
91
+ queue_communicate.send_event(
92
+ request_id=args.request_id,
93
+ event=CommunicateEvent(
94
+ event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
95
+ data=json.dumps({
96
+ "indexed_files": stats["indexed_files"],
97
+ "build_index_time": stats["timings"]["build_index"],
98
+ })
99
+ )
100
+ )
101
+
102
+ quick_filter = QuickFilter(index_manager,stats,sources)
103
+ final_files = quick_filter.filter(index_manager.read_index(),args.query)
104
+
105
+ if not final_files:
106
+ normal_filter = NormalFilter(index_manager,stats,sources)
107
+ final_files = normal_filter.filter(index_manager.read_index(),args.query)
108
+
109
+
110
+ def display_table_and_get_selections(data):
111
+ from prompt_toolkit.shortcuts import checkboxlist_dialog
112
+ from prompt_toolkit.styles import Style
113
+
114
+ choices = [(file, f"{file} - {reason}") for file, reason in data]
115
+ selected_files = [file for file, _ in choices]
116
+
117
+ style = Style.from_dict(
118
+ {
119
+ "dialog": "bg:#88ff88",
120
+ "dialog frame.label": "bg:#ffffff #000000",
121
+ "dialog.body": "bg:#88ff88 #000000",
122
+ "dialog shadow": "bg:#00aa00",
123
+ }
124
+ )
125
+
126
+ result = checkboxlist_dialog(
127
+ title="Target Files",
128
+ text="Tab to switch between buttons, and Space/Enter to select/deselect.",
129
+ values=choices,
130
+ style=style,
131
+ default_values=selected_files,
132
+ ).run()
133
+
134
+ return [file for file in result] if result else []
135
+
136
+ def print_selected(data):
137
+ console = Console()
138
+
139
+ table = Table(
140
+ title="Files Used as Context",
141
+ show_header=True,
142
+ header_style="bold magenta",
143
+ )
144
+ table.add_column("File Path", style="cyan", no_wrap=True)
145
+ table.add_column("Reason", style="green")
146
+
147
+ for file, reason in data:
148
+ table.add_row(file, reason)
149
+
150
+ panel = Panel(
151
+ table,
152
+ expand=False,
153
+ border_style="bold blue",
154
+ padding=(1, 1),
155
+ )
156
+
157
+ console.print(panel)
158
+
159
+ # Phase 6: File selection and limitation
160
+ logger.info("Phase 6: Processing file selection and limits...")
161
+ phase_start = time.monotonic()
162
+
163
+ if args.index_filter_file_num > 0:
164
+ logger.info(
165
+ f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
166
+
167
+ if args.skip_confirm:
168
+ final_filenames = [file.file_path for file in final_files.values()]
169
+ if args.index_filter_file_num > 0:
170
+ final_filenames = final_filenames[: args.index_filter_file_num]
171
+ else:
172
+ target_files_data = [
173
+ (file.file_path, file.reason) for file in final_files.values()
174
+ ]
175
+ if not target_files_data:
176
+ logger.warning(
177
+ "No target files found, you may need to rewrite the query and try again."
178
+ )
179
+ final_filenames = []
180
+ else:
181
+ final_filenames = display_table_and_get_selections(
182
+ target_files_data)
183
+
184
+ if args.index_filter_file_num > 0:
185
+ final_filenames = final_filenames[: args.index_filter_file_num]
186
+
187
+ phase_end = time.monotonic()
188
+ stats["timings"]["file_selection"] = phase_end - phase_start
189
+
190
+ # Phase 7: Display results and prepare output
191
+ logger.info("Phase 7: Preparing final output...")
192
+ phase_start = time.monotonic()
193
+ try:
194
+ print_selected(
195
+ [
196
+ (file.file_path, file.reason)
197
+ for file in final_files.values()
198
+ if file.file_path in final_filenames
199
+ ]
200
+ )
201
+ except Exception as e:
202
+ logger.warning(
203
+ "Failed to display selected files in terminal mode. Falling back to simple print."
204
+ )
205
+ print("Target Files Selected:")
206
+ for file in final_filenames:
207
+ print(f"{file} - {final_files[file].reason}")
208
+
209
+ source_code = ""
210
+ depulicated_sources = set()
211
+
212
+ for file in sources:
213
+ if file.module_name in final_filenames:
214
+ if file.module_name in depulicated_sources:
215
+ continue
216
+ depulicated_sources.add(file.module_name)
217
+ source_code += f"##File: {file.module_name}\n"
218
+ source_code += f"{file.source_code}\n\n"
219
+
220
+ if args.request_id and not args.skip_events:
221
+ queue_communicate.send_event(
222
+ request_id=args.request_id,
223
+ event=CommunicateEvent(
224
+ event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
225
+ data=json.dumps([
226
+ (file.file_path, file.reason)
227
+ for file in final_files.values()
228
+ if file.file_path in depulicated_sources
229
+ ])
230
+ )
231
+ )
232
+
233
+ stats["final_files"] = len(depulicated_sources)
234
+ phase_end = time.monotonic()
235
+ stats["timings"]["prepare_output"] = phase_end - phase_start
236
+
237
+ # Calculate total time and print summary
238
+ total_end_time = time.monotonic()
239
+ total_time = total_end_time - total_start_time
240
+ stats["timings"]["total"] = total_time
241
+
242
+ # Calculate total filter time
243
+ total_filter_time = (
244
+ stats["timings"]["quick_filter"] +
245
+ stats["timings"]["normal_filter"]["level1_filter"] +
246
+ stats["timings"]["normal_filter"]["level2_filter"] +
247
+ stats["timings"]["normal_filter"]["relevance_verification"]
248
+ )
249
+
250
+ # Print final statistics in a more structured way
251
+ summary = f"""
252
+ === Indexing and Filtering Summary ===
253
+ • Total files scanned: {stats['total_files']}
254
+ • Files indexed: {stats['indexed_files']}
255
+ • Files filtered:
256
+ - Level 1 (query-based): {stats['level1_filtered']}
257
+ - Level 2 (related files): {stats['level2_filtered']}
258
+ - Relevance verified: {stats.get('verified_files', 0)}
259
+ • Final files selected: {stats['final_files']}
260
+
261
+ === Time Breakdown ===
262
+ • Index build: {stats['timings'].get('build_index', 0):.2f}s
263
+ • Quick filter: {stats['timings'].get('quick_filter', 0):.2f}s
264
+ • Normal filter:
265
+ - Level 1 filter: {stats['timings']["normal_filter"].get('level1_filter', 0):.2f}s
266
+ - Level 2 filter: {stats['timings']["normal_filter"].get('level2_filter', 0):.2f}s
267
+ - Relevance check: {stats['timings']["normal_filter"].get('relevance_verification', 0):.2f}s
268
+ • File selection: {stats['timings'].get('file_selection', 0):.2f}s
269
+ • Total time: {total_time:.2f}s
270
+ ====================================
271
+ """
272
+ logger.info(summary)
273
+
274
+ if args.request_id and not args.skip_events:
275
+ queue_communicate.send_event(
276
+ request_id=args.request_id,
277
+ event=CommunicateEvent(
278
+ event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
279
+ data=json.dumps({
280
+ "filtered_files": stats["final_files"],
281
+ "filter_time": total_filter_time
282
+ })
283
+ )
284
+ )
285
+
286
+ return source_code
File without changes
@@ -0,0 +1,168 @@
1
+ from typing import List, Union,Dict,Any
2
+ from autocoder.index.types import IndexItem
3
+ from autocoder.common import SourceCode, AutoCoderArgs
4
+ import byzerllm
5
+ import time
6
+ from autocoder.index.index import IndexManager
7
+ from autocoder.index.types import (
8
+ IndexItem,
9
+ TargetFile,
10
+ VerifyFileRelevance,
11
+ FileList,
12
+ FileNumberList
13
+ )
14
+ from loguru import logger
15
+ from autocoder.utils.queue_communicate import (
16
+ queue_communicate,
17
+ CommunicateEvent,
18
+ CommunicateEventType,
19
+ )
20
+ from concurrent.futures import ThreadPoolExecutor, as_completed
21
+ import json
22
+
23
+ def get_file_path(file_path):
24
+ if file_path.startswith("##"):
25
+ return file_path.strip()[2:]
26
+ return file_path
27
+
28
+
29
+ class NormalFilter():
30
+ def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
31
+ self.index_manager = index_manager
32
+ self.args = index_manager.args
33
+ self.stats = stats
34
+ self.sources = sources
35
+
36
+ def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
37
+ final_files: Dict[str, TargetFile] = {}
38
+ if not self.args.skip_filter_index:
39
+ if self.args.request_id and not self.args.skip_events:
40
+ queue_communicate.send_event(
41
+ request_id=self.args.request_id,
42
+ event=CommunicateEvent(
43
+ event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
44
+ data=json.dumps({})
45
+ )
46
+ )
47
+ # Phase 3: Level 1 filtering - Query-based
48
+ logger.info(
49
+ "Phase 3: Performing Level 1 filtering (query-based)...")
50
+
51
+ phase_start = time.monotonic()
52
+ target_files = self.index_manager.get_target_files_by_query(self.args.query)
53
+
54
+ if target_files:
55
+ for file in target_files.file_list:
56
+ file_path = file.file_path.strip()
57
+ final_files[get_file_path(file_path)] = file
58
+ self.stats["level1_filtered"] = len(target_files.file_list)
59
+ phase_end = time.monotonic()
60
+ self.stats["timings"]["normal_filter"]["level1_filter"] = phase_end - phase_start
61
+
62
+ # Phase 4: Level 2 filtering - Related files
63
+ if target_files is not None and self.args.index_filter_level >= 2:
64
+ logger.info(
65
+ "Phase 4: Performing Level 2 filtering (related files)...")
66
+ if self.args.request_id and not self.args.skip_events:
67
+ queue_communicate.send_event(
68
+ request_id=self.args.request_id,
69
+ event=CommunicateEvent(
70
+ event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
71
+ data=json.dumps({})
72
+ )
73
+ )
74
+ phase_start = time.monotonic()
75
+ related_files = self.index_manager.get_related_files(
76
+ [file.file_path for file in target_files.file_list]
77
+ )
78
+ if related_files is not None:
79
+ for file in related_files.file_list:
80
+ file_path = file.file_path.strip()
81
+ final_files[get_file_path(file_path)] = file
82
+ self.stats["level2_filtered"] = len(related_files.file_list)
83
+ phase_end = time.monotonic()
84
+ self.stats["timings"]["normal_filter"]["level2_filter"] = phase_end - phase_start
85
+
86
+ if not final_files:
87
+ logger.warning("No related files found, using all files")
88
+ for source in self.sources:
89
+ final_files[get_file_path(source.module_name)] = TargetFile(
90
+ file_path=source.module_name,
91
+ reason="No related files found, use all files",
92
+ )
93
+
94
+ # Phase 5: Relevance verification
95
+ logger.info("Phase 5: Performing relevance verification...")
96
+ if self.args.index_filter_enable_relevance_verification:
97
+ phase_start = time.monotonic()
98
+ verified_files = {}
99
+ temp_files = list(final_files.values())
100
+ verification_results = []
101
+
102
+ def print_verification_results(results):
103
+ from rich.table import Table
104
+ from rich.console import Console
105
+
106
+ console = Console()
107
+ table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
108
+ table.add_column("File Path", style="cyan", no_wrap=True)
109
+ table.add_column("Score", justify="right", style="green")
110
+ table.add_column("Status", style="yellow")
111
+ table.add_column("Reason/Error")
112
+
113
+ for file_path, score, status, reason in results:
114
+ table.add_row(
115
+ file_path,
116
+ str(score) if score is not None else "N/A",
117
+ status,
118
+ reason
119
+ )
120
+
121
+ console.print(table)
122
+
123
+ def verify_single_file(file: TargetFile):
124
+ for source in self.sources:
125
+ if source.module_name == file.file_path:
126
+ file_content = source.source_code
127
+ try:
128
+ result = self.index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
129
+ file_content=file_content,
130
+ query=self.args.query
131
+ )
132
+ if result.relevant_score >= self.args.verify_file_relevance_score:
133
+ verified_files[file.file_path] = TargetFile(
134
+ file_path=file.file_path,
135
+ reason=f"Score:{result.relevant_score}, {result.reason}"
136
+ )
137
+ return file.file_path, result.relevant_score, "PASS", result.reason
138
+ else:
139
+ return file.file_path, result.relevant_score, "FAIL", result.reason
140
+ except Exception as e:
141
+ error_msg = str(e)
142
+ verified_files[file.file_path] = TargetFile(
143
+ file_path=file.file_path,
144
+ reason=f"Verification failed: {error_msg}"
145
+ )
146
+ return file.file_path, None, "ERROR", error_msg
147
+ return None
148
+
149
+ with ThreadPoolExecutor(max_workers=self.args.index_filter_workers) as executor:
150
+ futures = [executor.submit(verify_single_file, file)
151
+ for file in temp_files]
152
+ for future in as_completed(futures):
153
+ result = future.result()
154
+ if result:
155
+ verification_results.append(result)
156
+ time.sleep(self.args.anti_quota_limit)
157
+
158
+ # Print verification results in a table
159
+ print_verification_results(verification_results)
160
+
161
+ self.stats["verified_files"] = len(verified_files)
162
+ phase_end = time.monotonic()
163
+ self.stats["timings"]["normal_filter"]["relevance_verification"] = phase_end - phase_start
164
+
165
+ # Keep all files, not just verified ones
166
+ final_files = verified_files
167
+
168
+ return final_files
@@ -0,0 +1,98 @@
1
+ from typing import List, Union,Dict,Any
2
+ from autocoder.index.types import IndexItem
3
+ from autocoder.common import AutoCoderArgs,SourceCode
4
+ import byzerllm
5
+ import time
6
+ from autocoder.index.index import IndexManager
7
+ from autocoder.index.types import (
8
+ IndexItem,
9
+ TargetFile,
10
+ FileNumberList
11
+ )
12
+ from autocoder.rag.token_counter import count_tokens
13
+ from loguru import logger
14
+
15
+
16
+ def get_file_path(file_path):
17
+ if file_path.startswith("##"):
18
+ return file_path.strip()[2:]
19
+ return file_path
20
+
21
+
22
+ class QuickFilter():
23
+ def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
24
+ self.index_manager = index_manager
25
+ self.args = index_manager.args
26
+ self.stats = stats
27
+ self.sources = sources
28
+
29
+ @byzerllm.prompt()
30
+ def quick_filter_files(self,file_meta_list:List[IndexItem],query:str) -> str:
31
+ '''
32
+ 当用户提一个需求的时候,我们需要找到相关的文件,然后阅读这些文件,并且修改其中部分文件。
33
+ 现在,给定下面的索引文件:
34
+
35
+ <index>
36
+ {{ content }}
37
+ </index>
38
+
39
+ 索引文件包含文件序号(##[]括起来的部分),文件路径,文件符号信息等。
40
+ 下面是用户的查询需求:
41
+
42
+ <query>
43
+ {{ query }}
44
+ </query>
45
+
46
+ 请根据用户的需求,找到相关的文件,并给出文件序号列表。请返回如下json格式:
47
+
48
+ ```json
49
+ {
50
+ "file_list": [
51
+ file_index1,
52
+ file_index2,
53
+ ...
54
+ ]
55
+ }
56
+ ```
57
+
58
+ 特别注意,如果用户的query里 @文件 或者 @@符号,那么被@的文件或者@@的符号必须要返回,并且查看他们依赖的文件是否相关。
59
+ '''
60
+ file_meta_str = "\n".join([f"##[{index}]{item.module_name}\n{item.symbols}" for index,item in enumerate(file_meta_list)])
61
+ context = {
62
+ "content": file_meta_str,
63
+ "query": query
64
+ }
65
+ return context
66
+
67
+ def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
68
+ final_files: Dict[str, TargetFile] = {}
69
+ if not self.args.skip_filter_index and self.args.index_filter_model:
70
+ start_time = time.monotonic()
71
+ index_items = self.index_manager.read_index()
72
+
73
+ prompt_str = self.quick_filter_files.prompt(index_items,query)
74
+
75
+ print(prompt_str)
76
+
77
+ tokens_len = count_tokens(prompt_str)
78
+
79
+ if tokens_len > 55*1024:
80
+ logger.warning(f"Quick filter prompt is too long, tokens_len: {tokens_len}/{55*1024} fallback to normal filter")
81
+ return final_files
82
+
83
+ try:
84
+ file_number_list = self.quick_filter_files.with_llm(
85
+ self.index_manager.index_filter_llm).with_return_type(FileNumberList).run(index_items, self.args.query)
86
+ except Exception as e:
87
+ logger.error(f"Quick filter failed, error: {str(e)} fallback to normal filter")
88
+ return final_files
89
+
90
+ if file_number_list:
91
+ for file_number in file_number_list.file_list:
92
+ final_files[get_file_path(index_items[file_number].module_name)] = TargetFile(
93
+ file_path=index_items[file_number].module_name,
94
+ reason="Quick Filter"
95
+ )
96
+ end_time = time.monotonic()
97
+ self.stats["timings"]["quick_filter"] = end_time - start_time
98
+ return final_files
autocoder/index/index.py CHANGED
@@ -6,37 +6,22 @@ from datetime import datetime
6
6
  from autocoder.common import SourceCode, AutoCoderArgs
7
7
  from autocoder.index.symbols_utils import (
8
8
  extract_symbols,
9
- SymbolsInfo,
10
9
  SymbolType,
11
10
  symbols_info_to_str,
12
11
  )
13
12
  from concurrent.futures import ThreadPoolExecutor, as_completed
14
13
  import threading
15
14
 
16
- import pydantic
17
15
  import byzerllm
18
16
  import hashlib
19
- import textwrap
20
- import tabulate
21
- from rich.console import Console
22
- from rich.table import Table
23
- from rich.panel import Panel
24
- from rich.text import Text
25
17
 
26
18
  from loguru import logger
27
- from autocoder.utils.queue_communicate import (
28
- queue_communicate,
29
- CommunicateEvent,
30
- CommunicateEventType,
31
- )
32
19
  from autocoder.index.types import (
33
20
  IndexItem,
34
21
  TargetFile,
35
- VerifyFileRelevance,
36
22
  FileList,
37
23
  )
38
24
 
39
-
40
25
  class IndexManager:
41
26
  def __init__(
42
27
  self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
@@ -53,6 +38,11 @@ class IndexManager:
53
38
  else:
54
39
  self.index_llm = llm
55
40
 
41
+ if llm and (s := llm.get_sub_client("index_filter_model")):
42
+ self.index_filter_llm = s
43
+ else:
44
+ self.index_filter_llm = llm
45
+
56
46
  self.llm = llm
57
47
  self.args = args
58
48
  self.max_input_length = (
@@ -63,6 +53,7 @@ class IndexManager:
63
53
  if not os.path.exists(self.index_dir):
64
54
  os.makedirs(self.index_dir)
65
55
 
56
+
66
57
  @byzerllm.prompt()
67
58
  def verify_file_relevance(self, file_content: str, query: str) -> str:
68
59
  """
@@ -565,381 +556,3 @@ class IndexManager:
565
556
  请确保结果的准确性和完整性,包括所有可能相关的文件。
566
557
  """
567
558
 
568
-
569
- def build_index_and_filter_files(
570
- llm, args: AutoCoderArgs, sources: List[SourceCode]
571
- ) -> str:
572
- # Initialize timing and statistics
573
- total_start_time = time.monotonic()
574
- stats = {
575
- "total_files": len(sources),
576
- "indexed_files": 0,
577
- "level1_filtered": 0,
578
- "level2_filtered": 0,
579
- "verified_files": 0,
580
- "final_files": 0,
581
- "timings": {
582
- "process_tagged_sources": 0.0,
583
- "build_index": 0.0,
584
- "level1_filter": 0.0,
585
- "level2_filter": 0.0,
586
- "relevance_verification": 0.0,
587
- "file_selection": 0.0,
588
- "prepare_output": 0.0,
589
- "total": 0.0
590
- }
591
- }
592
-
593
- def get_file_path(file_path):
594
- if file_path.startswith("##"):
595
- return file_path.strip()[2:]
596
- return file_path
597
-
598
- final_files: Dict[str, TargetFile] = {}
599
-
600
- # Phase 1: Process REST/RAG/Search sources
601
- logger.info("Phase 1: Processing REST/RAG/Search sources...")
602
- phase_start = time.monotonic()
603
- for source in sources:
604
- if source.tag in ["REST", "RAG", "SEARCH"]:
605
- final_files[get_file_path(source.module_name)] = TargetFile(
606
- file_path=source.module_name, reason="Rest/Rag/Search"
607
- )
608
- phase_end = time.monotonic()
609
- stats["timings"]["process_tagged_sources"] = phase_end - phase_start
610
-
611
- if not args.skip_build_index and llm:
612
- # Phase 2: Build index
613
- if args.request_id and not args.skip_events:
614
- queue_communicate.send_event(
615
- request_id=args.request_id,
616
- event=CommunicateEvent(
617
- event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
618
- data=json.dumps({"total_files": len(sources)})
619
- )
620
- )
621
-
622
- logger.info("Phase 2: Building index for all files...")
623
- phase_start = time.monotonic()
624
- index_manager = IndexManager(llm=llm, sources=sources, args=args)
625
- index_data = index_manager.build_index()
626
- stats["indexed_files"] = len(index_data) if index_data else 0
627
- phase_end = time.monotonic()
628
- stats["timings"]["build_index"] = phase_end - phase_start
629
-
630
- if args.request_id and not args.skip_events:
631
- queue_communicate.send_event(
632
- request_id=args.request_id,
633
- event=CommunicateEvent(
634
- event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
635
- data=json.dumps({
636
- "indexed_files": stats["indexed_files"],
637
- "build_index_time": stats["timings"]["build_index"],
638
- })
639
- )
640
- )
641
-
642
- if not args.skip_filter_index:
643
- if args.request_id and not args.skip_events:
644
- queue_communicate.send_event(
645
- request_id=args.request_id,
646
- event=CommunicateEvent(
647
- event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
648
- data=json.dumps({})
649
- )
650
- )
651
- # Phase 3: Level 1 filtering - Query-based
652
- logger.info(
653
- "Phase 3: Performing Level 1 filtering (query-based)...")
654
-
655
- phase_start = time.monotonic()
656
- target_files = index_manager.get_target_files_by_query(args.query)
657
-
658
- if target_files:
659
- for file in target_files.file_list:
660
- file_path = file.file_path.strip()
661
- final_files[get_file_path(file_path)] = file
662
- stats["level1_filtered"] = len(target_files.file_list)
663
- phase_end = time.monotonic()
664
- stats["timings"]["level1_filter"] = phase_end - phase_start
665
-
666
- # Phase 4: Level 2 filtering - Related files
667
- if target_files is not None and args.index_filter_level >= 2:
668
- logger.info(
669
- "Phase 4: Performing Level 2 filtering (related files)...")
670
- if args.request_id and not args.skip_events:
671
- queue_communicate.send_event(
672
- request_id=args.request_id,
673
- event=CommunicateEvent(
674
- event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
675
- data=json.dumps({})
676
- )
677
- )
678
- phase_start = time.monotonic()
679
- related_files = index_manager.get_related_files(
680
- [file.file_path for file in target_files.file_list]
681
- )
682
- if related_files is not None:
683
- for file in related_files.file_list:
684
- file_path = file.file_path.strip()
685
- final_files[get_file_path(file_path)] = file
686
- stats["level2_filtered"] = len(related_files.file_list)
687
- phase_end = time.monotonic()
688
- stats["timings"]["level2_filter"] = phase_end - phase_start
689
-
690
- if not final_files:
691
- logger.warning("No related files found, using all files")
692
- for source in sources:
693
- final_files[get_file_path(source.module_name)] = TargetFile(
694
- file_path=source.module_name,
695
- reason="No related files found, use all files",
696
- )
697
-
698
- # Phase 5: Relevance verification
699
- logger.info("Phase 5: Performing relevance verification...")
700
- if args.index_filter_enable_relevance_verification:
701
- phase_start = time.monotonic()
702
- verified_files = {}
703
- temp_files = list(final_files.values())
704
- verification_results = []
705
-
706
- def print_verification_results(results):
707
- from rich.table import Table
708
- from rich.console import Console
709
-
710
- console = Console()
711
- table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
712
- table.add_column("File Path", style="cyan", no_wrap=True)
713
- table.add_column("Score", justify="right", style="green")
714
- table.add_column("Status", style="yellow")
715
- table.add_column("Reason/Error")
716
-
717
- for file_path, score, status, reason in results:
718
- table.add_row(
719
- file_path,
720
- str(score) if score is not None else "N/A",
721
- status,
722
- reason
723
- )
724
-
725
- console.print(table)
726
-
727
- def verify_single_file(file: TargetFile):
728
- for source in sources:
729
- if source.module_name == file.file_path:
730
- file_content = source.source_code
731
- try:
732
- result = index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
733
- file_content=file_content,
734
- query=args.query
735
- )
736
- if result.relevant_score >= args.verify_file_relevance_score:
737
- verified_files[file.file_path] = TargetFile(
738
- file_path=file.file_path,
739
- reason=f"Score:{result.relevant_score}, {result.reason}"
740
- )
741
- return file.file_path, result.relevant_score, "PASS", result.reason
742
- else:
743
- return file.file_path, result.relevant_score, "FAIL", result.reason
744
- except Exception as e:
745
- error_msg = str(e)
746
- verified_files[file.file_path] = TargetFile(
747
- file_path=file.file_path,
748
- reason=f"Verification failed: {error_msg}"
749
- )
750
- return file.file_path, None, "ERROR", error_msg
751
- return None
752
-
753
- with ThreadPoolExecutor(max_workers=args.index_filter_workers) as executor:
754
- futures = [executor.submit(verify_single_file, file)
755
- for file in temp_files]
756
- for future in as_completed(futures):
757
- result = future.result()
758
- if result:
759
- verification_results.append(result)
760
- time.sleep(args.anti_quota_limit)
761
-
762
- # Print verification results in a table
763
- print_verification_results(verification_results)
764
-
765
- stats["verified_files"] = len(verified_files)
766
- phase_end = time.monotonic()
767
- stats["timings"]["relevance_verification"] = phase_end - phase_start
768
-
769
- # Keep all files, not just verified ones
770
- final_files = verified_files
771
-
772
- def display_table_and_get_selections(data):
773
- from prompt_toolkit.shortcuts import checkboxlist_dialog
774
- from prompt_toolkit.styles import Style
775
-
776
- choices = [(file, f"{file} - {reason}") for file, reason in data]
777
- selected_files = [file for file, _ in choices]
778
-
779
- style = Style.from_dict(
780
- {
781
- "dialog": "bg:#88ff88",
782
- "dialog frame.label": "bg:#ffffff #000000",
783
- "dialog.body": "bg:#88ff88 #000000",
784
- "dialog shadow": "bg:#00aa00",
785
- }
786
- )
787
-
788
- result = checkboxlist_dialog(
789
- title="Target Files",
790
- text="Tab to switch between buttons, and Space/Enter to select/deselect.",
791
- values=choices,
792
- style=style,
793
- default_values=selected_files,
794
- ).run()
795
-
796
- return [file for file in result] if result else []
797
-
798
- def print_selected(data):
799
- console = Console()
800
-
801
- table = Table(
802
- title="Files Used as Context",
803
- show_header=True,
804
- header_style="bold magenta",
805
- )
806
- table.add_column("File Path", style="cyan", no_wrap=True)
807
- table.add_column("Reason", style="green")
808
-
809
- for file, reason in data:
810
- table.add_row(file, reason)
811
-
812
- panel = Panel(
813
- table,
814
- expand=False,
815
- border_style="bold blue",
816
- padding=(1, 1),
817
- )
818
-
819
- console.print(panel)
820
-
821
- # Phase 6: File selection and limitation
822
- logger.info("Phase 6: Processing file selection and limits...")
823
- phase_start = time.monotonic()
824
-
825
- if args.index_filter_file_num > 0:
826
- logger.info(
827
- f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
828
-
829
- if args.skip_confirm:
830
- final_filenames = [file.file_path for file in final_files.values()]
831
- if args.index_filter_file_num > 0:
832
- final_filenames = final_filenames[: args.index_filter_file_num]
833
- else:
834
- target_files_data = [
835
- (file.file_path, file.reason) for file in final_files.values()
836
- ]
837
- if not target_files_data:
838
- logger.warning(
839
- "No target files found, you may need to rewrite the query and try again."
840
- )
841
- final_filenames = []
842
- else:
843
- final_filenames = display_table_and_get_selections(
844
- target_files_data)
845
-
846
- if args.index_filter_file_num > 0:
847
- final_filenames = final_filenames[: args.index_filter_file_num]
848
-
849
- phase_end = time.monotonic()
850
- stats["timings"]["file_selection"] = phase_end - phase_start
851
-
852
- # Phase 7: Display results and prepare output
853
- logger.info("Phase 7: Preparing final output...")
854
- phase_start = time.monotonic()
855
- try:
856
- print_selected(
857
- [
858
- (file.file_path, file.reason)
859
- for file in final_files.values()
860
- if file.file_path in final_filenames
861
- ]
862
- )
863
- except Exception as e:
864
- logger.warning(
865
- "Failed to display selected files in terminal mode. Falling back to simple print."
866
- )
867
- print("Target Files Selected:")
868
- for file in final_filenames:
869
- print(f"{file} - {final_files[file].reason}")
870
-
871
- source_code = ""
872
- depulicated_sources = set()
873
-
874
- for file in sources:
875
- if file.module_name in final_filenames:
876
- if file.module_name in depulicated_sources:
877
- continue
878
- depulicated_sources.add(file.module_name)
879
- source_code += f"##File: {file.module_name}\n"
880
- source_code += f"{file.source_code}\n\n"
881
-
882
- if args.request_id and not args.skip_events:
883
- queue_communicate.send_event(
884
- request_id=args.request_id,
885
- event=CommunicateEvent(
886
- event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
887
- data=json.dumps([
888
- (file.file_path, file.reason)
889
- for file in final_files.values()
890
- if file.file_path in depulicated_sources
891
- ])
892
- )
893
- )
894
-
895
- stats["final_files"] = len(depulicated_sources)
896
- phase_end = time.monotonic()
897
- stats["timings"]["prepare_output"] = phase_end - phase_start
898
-
899
- # Calculate total time and print summary
900
- total_end_time = time.monotonic()
901
- total_time = total_end_time - total_start_time
902
- stats["timings"]["total"] = total_time
903
-
904
- # Calculate total filter time
905
- total_filter_time = (
906
- stats["timings"]["level1_filter"] +
907
- stats["timings"]["level2_filter"] +
908
- stats["timings"]["relevance_verification"]
909
- )
910
-
911
- # Print final statistics in a more structured way
912
- summary = f"""
913
- === Indexing and Filtering Summary ===
914
- • Total files scanned: {stats['total_files']}
915
- • Files indexed: {stats['indexed_files']}
916
- • Files filtered:
917
- - Level 1 (query-based): {stats['level1_filtered']}
918
- - Level 2 (related files): {stats['level2_filtered']}
919
- - Relevance verified: {stats.get('verified_files', 0)}
920
- • Final files selected: {stats['final_files']}
921
-
922
- === Time Breakdown ===
923
- • Index build: {stats['timings'].get('build_index', 0):.2f}s
924
- • Level 1 filter: {stats['timings'].get('level1_filter', 0):.2f}s
925
- • Level 2 filter: {stats['timings'].get('level2_filter', 0):.2f}s
926
- • Relevance check: {stats['timings'].get('relevance_verification', 0):.2f}s
927
- • File selection: {stats['timings'].get('file_selection', 0):.2f}s
928
- • Total time: {total_time:.2f}s
929
- ====================================
930
- """
931
- logger.info(summary)
932
-
933
- if args.request_id and not args.skip_events:
934
- queue_communicate.send_event(
935
- request_id=args.request_id,
936
- event=CommunicateEvent(
937
- event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
938
- data=json.dumps({
939
- "filtered_files": stats["final_files"],
940
- "filter_time": total_filter_time
941
- })
942
- )
943
- )
944
-
945
- return source_code
autocoder/index/types.py CHANGED
@@ -21,4 +21,7 @@ class VerifyFileRelevance(pydantic.BaseModel):
21
21
 
22
22
 
23
23
  class FileList(pydantic.BaseModel):
24
- file_list: List[TargetFile]
24
+ file_list: List[TargetFile]
25
+
26
+ class FileNumberList(pydantic.BaseModel):
27
+ file_list: List[int]
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.232"
1
+ __version__ = "0.1.233"