auto-coder 0.1.232__py3-none-any.whl → 0.1.233__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.232.dist-info → auto_coder-0.1.233.dist-info}/METADATA +1 -1
- {auto_coder-0.1.232.dist-info → auto_coder-0.1.233.dist-info}/RECORD +19 -15
- autocoder/auto_coder.py +28 -5
- autocoder/chat_auto_coder.py +12 -0
- autocoder/common/__init__.py +1 -0
- autocoder/dispacher/actions/action.py +1 -1
- autocoder/dispacher/actions/copilot.py +1 -1
- autocoder/dispacher/actions/plugins/action_regex_project.py +1 -1
- autocoder/index/entry.py +286 -0
- autocoder/index/filter/__init__.py +0 -0
- autocoder/index/filter/normal_filter.py +168 -0
- autocoder/index/filter/quick_filter.py +98 -0
- autocoder/index/index.py +6 -393
- autocoder/index/types.py +4 -1
- autocoder/version.py +1 -1
- {auto_coder-0.1.232.dist-info → auto_coder-0.1.233.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.232.dist-info → auto_coder-0.1.233.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.232.dist-info → auto_coder-0.1.233.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.232.dist-info → auto_coder-0.1.233.dist-info}/top_level.txt +0 -0
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
autocoder/auto_coder.py,sha256=
|
|
2
|
+
autocoder/auto_coder.py,sha256=r2uHo-xPDbel6OhZwGzl8Iz1GP4d9KUWcDReEol9LOQ,59541
|
|
3
3
|
autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,3229
|
|
4
4
|
autocoder/auto_coder_rag.py,sha256=illKgzP2bv-Tq50ujsofJnOHdI4pzr0ALtfR8NHHWdQ,22351
|
|
5
5
|
autocoder/auto_coder_rag_client_mcp.py,sha256=WV7j5JUiQge0x4-B7Hp5-pSAFXLbvLpzQMcCovbauIM,6276
|
|
6
6
|
autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
|
|
7
7
|
autocoder/auto_coder_server.py,sha256=XU9b4SBH7zjPPXaTWWHV4_zJm-XYa6njuLQaplYJH_c,20290
|
|
8
8
|
autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
|
|
9
|
-
autocoder/chat_auto_coder.py,sha256=
|
|
9
|
+
autocoder/chat_auto_coder.py,sha256=PzrbhpwTgJEMuPOT7vBd4uPi58mwqLc59l2fVV6rJc8,102049
|
|
10
10
|
autocoder/chat_auto_coder_lang.py,sha256=YJsFi8an0Kjbo9X7xKZfpdbHS3rbhrvChZNjWqEQ5Sw,11032
|
|
11
11
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
12
12
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
13
13
|
autocoder/models.py,sha256=FlBrF6HhGao_RiCSgYhCmP7vs0KlG4hI_BI6dyZiL9s,5292
|
|
14
|
-
autocoder/version.py,sha256=
|
|
14
|
+
autocoder/version.py,sha256=VphyhuzLuUXUhi7WwvwaGVGg2OSz77iY97Prah1F5g8,24
|
|
15
15
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
17
17
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -24,7 +24,7 @@ autocoder/agent/project_reader.py,sha256=tWLaPoLw1gI6kO_NzivQj28KbobU2ceOLuppHMb
|
|
|
24
24
|
autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
|
|
26
26
|
autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
|
|
27
|
-
autocoder/common/__init__.py,sha256
|
|
27
|
+
autocoder/common/__init__.py,sha256=JDIZ_1JP7Nm74OL_aCFwvhiwiynukaE5oPCfh5GPQWU,11695
|
|
28
28
|
autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
|
|
29
29
|
autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
|
|
30
30
|
autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
|
|
@@ -69,16 +69,20 @@ autocoder/db/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
69
69
|
autocoder/db/store.py,sha256=tFT66bP2ZKIqZip-uhLkHRSLaaOAUUDZfozJwcqix3c,1908
|
|
70
70
|
autocoder/dispacher/__init__.py,sha256=YoA64dIxnx4jcE1pwSfg81sjkQtjDkhddkfac1-cMWo,1230
|
|
71
71
|
autocoder/dispacher/actions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
|
-
autocoder/dispacher/actions/action.py,sha256=
|
|
73
|
-
autocoder/dispacher/actions/copilot.py,sha256=
|
|
72
|
+
autocoder/dispacher/actions/action.py,sha256=NjJGLek8H0FlIOreBnl2KEC-jJ5Jq-V8D1RuI6ifUjc,19299
|
|
73
|
+
autocoder/dispacher/actions/copilot.py,sha256=iMh4ckj9hO5Q-iemF3CStXd7DatWai7Eci5zOlKxK9c,13072
|
|
74
74
|
autocoder/dispacher/actions/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
75
|
-
autocoder/dispacher/actions/plugins/action_regex_project.py,sha256
|
|
75
|
+
autocoder/dispacher/actions/plugins/action_regex_project.py,sha256=-ly-NRgQ8LfDQDoH0QmD_LP-G932Kt08WXy9oIvLy10,5325
|
|
76
76
|
autocoder/dispacher/actions/plugins/action_translate.py,sha256=nVAtRSQpdGNmZxg1R_9zXG3AuTv3CHf2v7ODgj8u65c,7727
|
|
77
77
|
autocoder/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
78
|
+
autocoder/index/entry.py,sha256=1T41clV3GXwiRbowubQ1iZM5k3_2ECS-DtcnbA9QxAk,10081
|
|
78
79
|
autocoder/index/for_command.py,sha256=LGnz-OWogT8rd24m4Zcan7doLaijxqorAuiMk7WuRq0,3125
|
|
79
|
-
autocoder/index/index.py,sha256=
|
|
80
|
+
autocoder/index/index.py,sha256=lwaobSHvOnzhTMf8SQXzw3nIJQUS4lyo6nLdtv0Ebc0,19223
|
|
80
81
|
autocoder/index/symbols_utils.py,sha256=CjcjUVajmJZB75Ty3a7kMv1BZphrm-tIBAdOJv6uo-0,2037
|
|
81
|
-
autocoder/index/types.py,sha256=
|
|
82
|
+
autocoder/index/types.py,sha256=a2s_KV5FJlq7jqA2ELSo9E1sjuLwDB-JJYMhSpzBAhU,596
|
|
83
|
+
autocoder/index/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
|
+
autocoder/index/filter/normal_filter.py,sha256=pE5QwcBq6NYHFtYhwhfMJmYQYJwErNs-Q7iZmVBAh-k,7964
|
|
85
|
+
autocoder/index/filter/quick_filter.py,sha256=BxOiZOlK2v6EnX0yV28R3ielXboTmrMvVwielCrqKpE,3678
|
|
82
86
|
autocoder/pyproject/__init__.py,sha256=dQ2_7YZ7guybT9BhfxSGn43eLQJGQN2zgeKa6--JlaQ,14403
|
|
83
87
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
84
88
|
autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
|
|
@@ -133,9 +137,9 @@ autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
|
|
|
133
137
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
134
138
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=6D_SIa5hHSwIHC1poO_ztK7IVugAqNHu-jQySd7EnfQ,4181
|
|
135
139
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
136
|
-
auto_coder-0.1.
|
|
137
|
-
auto_coder-0.1.
|
|
138
|
-
auto_coder-0.1.
|
|
139
|
-
auto_coder-0.1.
|
|
140
|
-
auto_coder-0.1.
|
|
141
|
-
auto_coder-0.1.
|
|
140
|
+
auto_coder-0.1.233.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
141
|
+
auto_coder-0.1.233.dist-info/METADATA,sha256=w___EO9VOCh1T58HsPM3Qi5OYm2ktUVVD6I5KucBO3w,2641
|
|
142
|
+
auto_coder-0.1.233.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
143
|
+
auto_coder-0.1.233.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
144
|
+
auto_coder-0.1.233.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
145
|
+
auto_coder-0.1.233.dist-info/RECORD,,
|
autocoder/auto_coder.py
CHANGED
|
@@ -42,6 +42,7 @@ from rich.live import Live
|
|
|
42
42
|
from autocoder.auto_coder_lang import get_message
|
|
43
43
|
from autocoder.common.memory_manager import save_to_memory_file
|
|
44
44
|
from autocoder import models as models_module
|
|
45
|
+
from autocoder.common.utils_code_auto_generate import stream_chat_with_continue
|
|
45
46
|
from autocoder.utils.auto_coder_utils.chat_stream_out import stream_out
|
|
46
47
|
|
|
47
48
|
console = Console()
|
|
@@ -443,7 +444,24 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
443
444
|
"saas.is_reasoning": model_info["is_reasoning"]
|
|
444
445
|
}
|
|
445
446
|
)
|
|
446
|
-
llm.setup_sub_client("inference_model", inference_model)
|
|
447
|
+
llm.setup_sub_client("inference_model", inference_model)
|
|
448
|
+
|
|
449
|
+
if args.index_filter_model:
|
|
450
|
+
model_info = models_module.get_model_by_name(args.index_filter_model)
|
|
451
|
+
model_name = args.index_filter_model
|
|
452
|
+
index_filter_model = byzerllm.SimpleByzerLLM(default_model_name=model_name)
|
|
453
|
+
index_filter_model.deploy(
|
|
454
|
+
model_path="",
|
|
455
|
+
pretrained_model_type=model_info["model_type"],
|
|
456
|
+
udf_name=model_name,
|
|
457
|
+
infer_params={
|
|
458
|
+
"saas.base_url": model_info["base_url"],
|
|
459
|
+
"saas.api_key": model_info["api_key"],
|
|
460
|
+
"saas.model": model_info["model_name"],
|
|
461
|
+
"saas.is_reasoning": model_info["is_reasoning"]
|
|
462
|
+
}
|
|
463
|
+
)
|
|
464
|
+
llm.setup_sub_client("index_filter_model", index_filter_model)
|
|
447
465
|
|
|
448
466
|
|
|
449
467
|
if args.product_mode == "pro":
|
|
@@ -482,7 +500,12 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
482
500
|
if args.inference_model:
|
|
483
501
|
inference_model = byzerllm.ByzerLLM()
|
|
484
502
|
inference_model.setup_default_model_name(args.inference_model)
|
|
485
|
-
llm.setup_sub_client("inference_model", inference_model)
|
|
503
|
+
llm.setup_sub_client("inference_model", inference_model)
|
|
504
|
+
|
|
505
|
+
if args.index_filter_model:
|
|
506
|
+
index_filter_model = byzerllm.ByzerLLM()
|
|
507
|
+
index_filter_model.setup_default_model_name(args.index_filter_model)
|
|
508
|
+
llm.setup_sub_client("index_filter_model", index_filter_model)
|
|
486
509
|
|
|
487
510
|
|
|
488
511
|
if args.human_as_model:
|
|
@@ -1106,7 +1129,8 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
1106
1129
|
{"role": "assistant", "content": "read"})
|
|
1107
1130
|
source_count += 1
|
|
1108
1131
|
|
|
1109
|
-
from autocoder.index.index import IndexManager
|
|
1132
|
+
from autocoder.index.index import IndexManager
|
|
1133
|
+
from autocoder.index.entry import build_index_and_filter_files
|
|
1110
1134
|
from autocoder.pyproject import PyProject
|
|
1111
1135
|
from autocoder.tsproject import TSProject
|
|
1112
1136
|
from autocoder.suffixproject import SuffixProject
|
|
@@ -1272,8 +1296,7 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
1272
1296
|
)
|
|
1273
1297
|
)
|
|
1274
1298
|
v = [[response.result,None]]
|
|
1275
|
-
else:
|
|
1276
|
-
from autocoder.common.utils_code_auto_generate import stream_chat_with_continue
|
|
1299
|
+
else:
|
|
1277
1300
|
v = stream_chat_with_continue(
|
|
1278
1301
|
llm=chat_llm,
|
|
1279
1302
|
conversations=loaded_conversations,
|
autocoder/chat_auto_coder.py
CHANGED
|
@@ -52,6 +52,7 @@ from autocoder.common.memory_manager import get_global_memory_file_paths
|
|
|
52
52
|
from autocoder import models
|
|
53
53
|
import shlex
|
|
54
54
|
from autocoder.utils.llms import get_single_llm
|
|
55
|
+
import pkg_resources
|
|
55
56
|
|
|
56
57
|
class SymbolItem(BaseModel):
|
|
57
58
|
symbol_name: str
|
|
@@ -2478,6 +2479,17 @@ def lib_command(args: List[str]):
|
|
|
2478
2479
|
|
|
2479
2480
|
|
|
2480
2481
|
def main():
|
|
2482
|
+
from autocoder.rag.variable_holder import VariableHolder
|
|
2483
|
+
from tokenizers import Tokenizer
|
|
2484
|
+
try:
|
|
2485
|
+
tokenizer_path = pkg_resources.resource_filename(
|
|
2486
|
+
"autocoder", "data/tokenizer.json"
|
|
2487
|
+
)
|
|
2488
|
+
VariableHolder.TOKENIZER_PATH = tokenizer_path
|
|
2489
|
+
VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(tokenizer_path)
|
|
2490
|
+
except FileNotFoundError:
|
|
2491
|
+
tokenizer_path = None
|
|
2492
|
+
|
|
2481
2493
|
ARGS = parse_arguments()
|
|
2482
2494
|
|
|
2483
2495
|
if ARGS.lite:
|
autocoder/common/__init__.py
CHANGED
|
@@ -259,6 +259,7 @@ class AutoCoderArgs(pydantic.BaseModel):
|
|
|
259
259
|
skip_filter_index: Optional[bool] = False
|
|
260
260
|
|
|
261
261
|
index_model: Optional[str] = ""
|
|
262
|
+
index_filter_model: Optional[str] = ""
|
|
262
263
|
index_model_max_length: Optional[int] = 0
|
|
263
264
|
index_model_max_input_length: Optional[int] = 0
|
|
264
265
|
index_model_anti_quota_limit: Optional[int] = 0
|
|
@@ -9,7 +9,7 @@ from autocoder.common.buildin_tokenizer import BuildinTokenizer
|
|
|
9
9
|
from autocoder.pyproject import PyProject, Level1PyProject
|
|
10
10
|
from autocoder.tsproject import TSProject
|
|
11
11
|
from autocoder.suffixproject import SuffixProject
|
|
12
|
-
from autocoder.index.
|
|
12
|
+
from autocoder.index.entry import build_index_and_filter_files
|
|
13
13
|
from autocoder.common.code_auto_merge import CodeAutoMerge
|
|
14
14
|
from autocoder.common.code_auto_merge_diff import CodeAutoMergeDiff
|
|
15
15
|
from autocoder.common.code_auto_merge_strict_diff import CodeAutoMergeStrictDiff
|
|
@@ -9,7 +9,7 @@ from autocoder.common.JupyterClient import JupyterNotebook
|
|
|
9
9
|
from autocoder.common.ShellClient import ShellClient
|
|
10
10
|
from autocoder.suffixproject import SuffixProject
|
|
11
11
|
from autocoder.common.search import Search, SearchEngine
|
|
12
|
-
from autocoder.index.
|
|
12
|
+
from autocoder.index.entry import build_index_and_filter_files
|
|
13
13
|
from autocoder.common.image_to_page import ImageToPage, ImageToPageDirectly
|
|
14
14
|
from typing import Optional, Dict, Any, List
|
|
15
15
|
import byzerllm
|
|
@@ -9,7 +9,7 @@ from autocoder.common.code_auto_generate import CodeAutoGenerate
|
|
|
9
9
|
from autocoder.common.code_auto_generate_diff import CodeAutoGenerateDiff
|
|
10
10
|
from autocoder.common.code_auto_generate_strict_diff import CodeAutoGenerateStrictDiff
|
|
11
11
|
from autocoder.common.code_auto_generate_editblock import CodeAutoGenerateEditBlock
|
|
12
|
-
from autocoder.index.
|
|
12
|
+
from autocoder.index.entry import build_index_and_filter_files
|
|
13
13
|
from autocoder.regexproject import RegexProject
|
|
14
14
|
from autocoder.utils.conversation_store import store_code_model_conversation
|
|
15
15
|
from loguru import logger
|
autocoder/index/entry.py
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import time
|
|
4
|
+
from typing import List, Dict, Any, Optional
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from autocoder.common import SourceCode, AutoCoderArgs
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
from rich.table import Table
|
|
10
|
+
from rich.panel import Panel
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
from autocoder.utils.queue_communicate import (
|
|
14
|
+
queue_communicate,
|
|
15
|
+
CommunicateEvent,
|
|
16
|
+
CommunicateEventType,
|
|
17
|
+
)
|
|
18
|
+
from autocoder.index.types import (
|
|
19
|
+
TargetFile
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from autocoder.index.filter.quick_filter import QuickFilter
|
|
23
|
+
from autocoder.index.filter.normal_filter import NormalFilter
|
|
24
|
+
from autocoder.index.index import IndexManager
|
|
25
|
+
|
|
26
|
+
def build_index_and_filter_files(
|
|
27
|
+
llm, args: AutoCoderArgs, sources: List[SourceCode]
|
|
28
|
+
) -> str:
|
|
29
|
+
# Initialize timing and statistics
|
|
30
|
+
total_start_time = time.monotonic()
|
|
31
|
+
stats = {
|
|
32
|
+
"total_files": len(sources),
|
|
33
|
+
"indexed_files": 0,
|
|
34
|
+
"level1_filtered": 0,
|
|
35
|
+
"level2_filtered": 0,
|
|
36
|
+
"verified_files": 0,
|
|
37
|
+
"final_files": 0,
|
|
38
|
+
"timings": {
|
|
39
|
+
"process_tagged_sources": 0.0,
|
|
40
|
+
"build_index": 0.0,
|
|
41
|
+
"quick_filter": 0.0,
|
|
42
|
+
"normal_filter": {
|
|
43
|
+
"level1_filter": 0.0,
|
|
44
|
+
"level2_filter": 0.0,
|
|
45
|
+
"relevance_verification": 0.0,
|
|
46
|
+
},
|
|
47
|
+
"file_selection": 0.0,
|
|
48
|
+
"prepare_output": 0.0,
|
|
49
|
+
"total": 0.0
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
def get_file_path(file_path):
|
|
54
|
+
if file_path.startswith("##"):
|
|
55
|
+
return file_path.strip()[2:]
|
|
56
|
+
return file_path
|
|
57
|
+
|
|
58
|
+
final_files: Dict[str, TargetFile] = {}
|
|
59
|
+
|
|
60
|
+
# Phase 1: Process REST/RAG/Search sources
|
|
61
|
+
logger.info("Phase 1: Processing REST/RAG/Search sources...")
|
|
62
|
+
phase_start = time.monotonic()
|
|
63
|
+
for source in sources:
|
|
64
|
+
if source.tag in ["REST", "RAG", "SEARCH"]:
|
|
65
|
+
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
66
|
+
file_path=source.module_name, reason="Rest/Rag/Search"
|
|
67
|
+
)
|
|
68
|
+
phase_end = time.monotonic()
|
|
69
|
+
stats["timings"]["process_tagged_sources"] = phase_end - phase_start
|
|
70
|
+
|
|
71
|
+
if not args.skip_build_index and llm:
|
|
72
|
+
# Phase 2: Build index
|
|
73
|
+
if args.request_id and not args.skip_events:
|
|
74
|
+
queue_communicate.send_event(
|
|
75
|
+
request_id=args.request_id,
|
|
76
|
+
event=CommunicateEvent(
|
|
77
|
+
event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
|
|
78
|
+
data=json.dumps({"total_files": len(sources)})
|
|
79
|
+
)
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
logger.info("Phase 2: Building index for all files...")
|
|
83
|
+
phase_start = time.monotonic()
|
|
84
|
+
index_manager = IndexManager(llm=llm, sources=sources, args=args)
|
|
85
|
+
index_data = index_manager.build_index()
|
|
86
|
+
stats["indexed_files"] = len(index_data) if index_data else 0
|
|
87
|
+
phase_end = time.monotonic()
|
|
88
|
+
stats["timings"]["build_index"] = phase_end - phase_start
|
|
89
|
+
|
|
90
|
+
if args.request_id and not args.skip_events:
|
|
91
|
+
queue_communicate.send_event(
|
|
92
|
+
request_id=args.request_id,
|
|
93
|
+
event=CommunicateEvent(
|
|
94
|
+
event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
|
|
95
|
+
data=json.dumps({
|
|
96
|
+
"indexed_files": stats["indexed_files"],
|
|
97
|
+
"build_index_time": stats["timings"]["build_index"],
|
|
98
|
+
})
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
quick_filter = QuickFilter(index_manager,stats,sources)
|
|
103
|
+
final_files = quick_filter.filter(index_manager.read_index(),args.query)
|
|
104
|
+
|
|
105
|
+
if not final_files:
|
|
106
|
+
normal_filter = NormalFilter(index_manager,stats,sources)
|
|
107
|
+
final_files = normal_filter.filter(index_manager.read_index(),args.query)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def display_table_and_get_selections(data):
|
|
111
|
+
from prompt_toolkit.shortcuts import checkboxlist_dialog
|
|
112
|
+
from prompt_toolkit.styles import Style
|
|
113
|
+
|
|
114
|
+
choices = [(file, f"{file} - {reason}") for file, reason in data]
|
|
115
|
+
selected_files = [file for file, _ in choices]
|
|
116
|
+
|
|
117
|
+
style = Style.from_dict(
|
|
118
|
+
{
|
|
119
|
+
"dialog": "bg:#88ff88",
|
|
120
|
+
"dialog frame.label": "bg:#ffffff #000000",
|
|
121
|
+
"dialog.body": "bg:#88ff88 #000000",
|
|
122
|
+
"dialog shadow": "bg:#00aa00",
|
|
123
|
+
}
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
result = checkboxlist_dialog(
|
|
127
|
+
title="Target Files",
|
|
128
|
+
text="Tab to switch between buttons, and Space/Enter to select/deselect.",
|
|
129
|
+
values=choices,
|
|
130
|
+
style=style,
|
|
131
|
+
default_values=selected_files,
|
|
132
|
+
).run()
|
|
133
|
+
|
|
134
|
+
return [file for file in result] if result else []
|
|
135
|
+
|
|
136
|
+
def print_selected(data):
|
|
137
|
+
console = Console()
|
|
138
|
+
|
|
139
|
+
table = Table(
|
|
140
|
+
title="Files Used as Context",
|
|
141
|
+
show_header=True,
|
|
142
|
+
header_style="bold magenta",
|
|
143
|
+
)
|
|
144
|
+
table.add_column("File Path", style="cyan", no_wrap=True)
|
|
145
|
+
table.add_column("Reason", style="green")
|
|
146
|
+
|
|
147
|
+
for file, reason in data:
|
|
148
|
+
table.add_row(file, reason)
|
|
149
|
+
|
|
150
|
+
panel = Panel(
|
|
151
|
+
table,
|
|
152
|
+
expand=False,
|
|
153
|
+
border_style="bold blue",
|
|
154
|
+
padding=(1, 1),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
console.print(panel)
|
|
158
|
+
|
|
159
|
+
# Phase 6: File selection and limitation
|
|
160
|
+
logger.info("Phase 6: Processing file selection and limits...")
|
|
161
|
+
phase_start = time.monotonic()
|
|
162
|
+
|
|
163
|
+
if args.index_filter_file_num > 0:
|
|
164
|
+
logger.info(
|
|
165
|
+
f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
|
|
166
|
+
|
|
167
|
+
if args.skip_confirm:
|
|
168
|
+
final_filenames = [file.file_path for file in final_files.values()]
|
|
169
|
+
if args.index_filter_file_num > 0:
|
|
170
|
+
final_filenames = final_filenames[: args.index_filter_file_num]
|
|
171
|
+
else:
|
|
172
|
+
target_files_data = [
|
|
173
|
+
(file.file_path, file.reason) for file in final_files.values()
|
|
174
|
+
]
|
|
175
|
+
if not target_files_data:
|
|
176
|
+
logger.warning(
|
|
177
|
+
"No target files found, you may need to rewrite the query and try again."
|
|
178
|
+
)
|
|
179
|
+
final_filenames = []
|
|
180
|
+
else:
|
|
181
|
+
final_filenames = display_table_and_get_selections(
|
|
182
|
+
target_files_data)
|
|
183
|
+
|
|
184
|
+
if args.index_filter_file_num > 0:
|
|
185
|
+
final_filenames = final_filenames[: args.index_filter_file_num]
|
|
186
|
+
|
|
187
|
+
phase_end = time.monotonic()
|
|
188
|
+
stats["timings"]["file_selection"] = phase_end - phase_start
|
|
189
|
+
|
|
190
|
+
# Phase 7: Display results and prepare output
|
|
191
|
+
logger.info("Phase 7: Preparing final output...")
|
|
192
|
+
phase_start = time.monotonic()
|
|
193
|
+
try:
|
|
194
|
+
print_selected(
|
|
195
|
+
[
|
|
196
|
+
(file.file_path, file.reason)
|
|
197
|
+
for file in final_files.values()
|
|
198
|
+
if file.file_path in final_filenames
|
|
199
|
+
]
|
|
200
|
+
)
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.warning(
|
|
203
|
+
"Failed to display selected files in terminal mode. Falling back to simple print."
|
|
204
|
+
)
|
|
205
|
+
print("Target Files Selected:")
|
|
206
|
+
for file in final_filenames:
|
|
207
|
+
print(f"{file} - {final_files[file].reason}")
|
|
208
|
+
|
|
209
|
+
source_code = ""
|
|
210
|
+
depulicated_sources = set()
|
|
211
|
+
|
|
212
|
+
for file in sources:
|
|
213
|
+
if file.module_name in final_filenames:
|
|
214
|
+
if file.module_name in depulicated_sources:
|
|
215
|
+
continue
|
|
216
|
+
depulicated_sources.add(file.module_name)
|
|
217
|
+
source_code += f"##File: {file.module_name}\n"
|
|
218
|
+
source_code += f"{file.source_code}\n\n"
|
|
219
|
+
|
|
220
|
+
if args.request_id and not args.skip_events:
|
|
221
|
+
queue_communicate.send_event(
|
|
222
|
+
request_id=args.request_id,
|
|
223
|
+
event=CommunicateEvent(
|
|
224
|
+
event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
|
|
225
|
+
data=json.dumps([
|
|
226
|
+
(file.file_path, file.reason)
|
|
227
|
+
for file in final_files.values()
|
|
228
|
+
if file.file_path in depulicated_sources
|
|
229
|
+
])
|
|
230
|
+
)
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
stats["final_files"] = len(depulicated_sources)
|
|
234
|
+
phase_end = time.monotonic()
|
|
235
|
+
stats["timings"]["prepare_output"] = phase_end - phase_start
|
|
236
|
+
|
|
237
|
+
# Calculate total time and print summary
|
|
238
|
+
total_end_time = time.monotonic()
|
|
239
|
+
total_time = total_end_time - total_start_time
|
|
240
|
+
stats["timings"]["total"] = total_time
|
|
241
|
+
|
|
242
|
+
# Calculate total filter time
|
|
243
|
+
total_filter_time = (
|
|
244
|
+
stats["timings"]["quick_filter"] +
|
|
245
|
+
stats["timings"]["normal_filter"]["level1_filter"] +
|
|
246
|
+
stats["timings"]["normal_filter"]["level2_filter"] +
|
|
247
|
+
stats["timings"]["normal_filter"]["relevance_verification"]
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Print final statistics in a more structured way
|
|
251
|
+
summary = f"""
|
|
252
|
+
=== Indexing and Filtering Summary ===
|
|
253
|
+
• Total files scanned: {stats['total_files']}
|
|
254
|
+
• Files indexed: {stats['indexed_files']}
|
|
255
|
+
• Files filtered:
|
|
256
|
+
- Level 1 (query-based): {stats['level1_filtered']}
|
|
257
|
+
- Level 2 (related files): {stats['level2_filtered']}
|
|
258
|
+
- Relevance verified: {stats.get('verified_files', 0)}
|
|
259
|
+
• Final files selected: {stats['final_files']}
|
|
260
|
+
|
|
261
|
+
=== Time Breakdown ===
|
|
262
|
+
• Index build: {stats['timings'].get('build_index', 0):.2f}s
|
|
263
|
+
• Quick filter: {stats['timings'].get('quick_filter', 0):.2f}s
|
|
264
|
+
• Normal filter:
|
|
265
|
+
- Level 1 filter: {stats['timings']["normal_filter"].get('level1_filter', 0):.2f}s
|
|
266
|
+
- Level 2 filter: {stats['timings']["normal_filter"].get('level2_filter', 0):.2f}s
|
|
267
|
+
- Relevance check: {stats['timings']["normal_filter"].get('relevance_verification', 0):.2f}s
|
|
268
|
+
• File selection: {stats['timings'].get('file_selection', 0):.2f}s
|
|
269
|
+
• Total time: {total_time:.2f}s
|
|
270
|
+
====================================
|
|
271
|
+
"""
|
|
272
|
+
logger.info(summary)
|
|
273
|
+
|
|
274
|
+
if args.request_id and not args.skip_events:
|
|
275
|
+
queue_communicate.send_event(
|
|
276
|
+
request_id=args.request_id,
|
|
277
|
+
event=CommunicateEvent(
|
|
278
|
+
event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
|
|
279
|
+
data=json.dumps({
|
|
280
|
+
"filtered_files": stats["final_files"],
|
|
281
|
+
"filter_time": total_filter_time
|
|
282
|
+
})
|
|
283
|
+
)
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
return source_code
|
|
File without changes
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from typing import List, Union,Dict,Any
|
|
2
|
+
from autocoder.index.types import IndexItem
|
|
3
|
+
from autocoder.common import SourceCode, AutoCoderArgs
|
|
4
|
+
import byzerllm
|
|
5
|
+
import time
|
|
6
|
+
from autocoder.index.index import IndexManager
|
|
7
|
+
from autocoder.index.types import (
|
|
8
|
+
IndexItem,
|
|
9
|
+
TargetFile,
|
|
10
|
+
VerifyFileRelevance,
|
|
11
|
+
FileList,
|
|
12
|
+
FileNumberList
|
|
13
|
+
)
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from autocoder.utils.queue_communicate import (
|
|
16
|
+
queue_communicate,
|
|
17
|
+
CommunicateEvent,
|
|
18
|
+
CommunicateEventType,
|
|
19
|
+
)
|
|
20
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
21
|
+
import json
|
|
22
|
+
|
|
23
|
+
def get_file_path(file_path):
|
|
24
|
+
if file_path.startswith("##"):
|
|
25
|
+
return file_path.strip()[2:]
|
|
26
|
+
return file_path
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class NormalFilter():
|
|
30
|
+
def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
|
|
31
|
+
self.index_manager = index_manager
|
|
32
|
+
self.args = index_manager.args
|
|
33
|
+
self.stats = stats
|
|
34
|
+
self.sources = sources
|
|
35
|
+
|
|
36
|
+
def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
|
|
37
|
+
final_files: Dict[str, TargetFile] = {}
|
|
38
|
+
if not self.args.skip_filter_index:
|
|
39
|
+
if self.args.request_id and not self.args.skip_events:
|
|
40
|
+
queue_communicate.send_event(
|
|
41
|
+
request_id=self.args.request_id,
|
|
42
|
+
event=CommunicateEvent(
|
|
43
|
+
event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
|
|
44
|
+
data=json.dumps({})
|
|
45
|
+
)
|
|
46
|
+
)
|
|
47
|
+
# Phase 3: Level 1 filtering - Query-based
|
|
48
|
+
logger.info(
|
|
49
|
+
"Phase 3: Performing Level 1 filtering (query-based)...")
|
|
50
|
+
|
|
51
|
+
phase_start = time.monotonic()
|
|
52
|
+
target_files = self.index_manager.get_target_files_by_query(self.args.query)
|
|
53
|
+
|
|
54
|
+
if target_files:
|
|
55
|
+
for file in target_files.file_list:
|
|
56
|
+
file_path = file.file_path.strip()
|
|
57
|
+
final_files[get_file_path(file_path)] = file
|
|
58
|
+
self.stats["level1_filtered"] = len(target_files.file_list)
|
|
59
|
+
phase_end = time.monotonic()
|
|
60
|
+
self.stats["timings"]["normal_filter"]["level1_filter"] = phase_end - phase_start
|
|
61
|
+
|
|
62
|
+
# Phase 4: Level 2 filtering - Related files
|
|
63
|
+
if target_files is not None and self.args.index_filter_level >= 2:
|
|
64
|
+
logger.info(
|
|
65
|
+
"Phase 4: Performing Level 2 filtering (related files)...")
|
|
66
|
+
if self.args.request_id and not self.args.skip_events:
|
|
67
|
+
queue_communicate.send_event(
|
|
68
|
+
request_id=self.args.request_id,
|
|
69
|
+
event=CommunicateEvent(
|
|
70
|
+
event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
|
|
71
|
+
data=json.dumps({})
|
|
72
|
+
)
|
|
73
|
+
)
|
|
74
|
+
phase_start = time.monotonic()
|
|
75
|
+
related_files = self.index_manager.get_related_files(
|
|
76
|
+
[file.file_path for file in target_files.file_list]
|
|
77
|
+
)
|
|
78
|
+
if related_files is not None:
|
|
79
|
+
for file in related_files.file_list:
|
|
80
|
+
file_path = file.file_path.strip()
|
|
81
|
+
final_files[get_file_path(file_path)] = file
|
|
82
|
+
self.stats["level2_filtered"] = len(related_files.file_list)
|
|
83
|
+
phase_end = time.monotonic()
|
|
84
|
+
self.stats["timings"]["normal_filter"]["level2_filter"] = phase_end - phase_start
|
|
85
|
+
|
|
86
|
+
if not final_files:
|
|
87
|
+
logger.warning("No related files found, using all files")
|
|
88
|
+
for source in self.sources:
|
|
89
|
+
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
90
|
+
file_path=source.module_name,
|
|
91
|
+
reason="No related files found, use all files",
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Phase 5: Relevance verification
|
|
95
|
+
logger.info("Phase 5: Performing relevance verification...")
|
|
96
|
+
if self.args.index_filter_enable_relevance_verification:
|
|
97
|
+
phase_start = time.monotonic()
|
|
98
|
+
verified_files = {}
|
|
99
|
+
temp_files = list(final_files.values())
|
|
100
|
+
verification_results = []
|
|
101
|
+
|
|
102
|
+
def print_verification_results(results):
|
|
103
|
+
from rich.table import Table
|
|
104
|
+
from rich.console import Console
|
|
105
|
+
|
|
106
|
+
console = Console()
|
|
107
|
+
table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
|
|
108
|
+
table.add_column("File Path", style="cyan", no_wrap=True)
|
|
109
|
+
table.add_column("Score", justify="right", style="green")
|
|
110
|
+
table.add_column("Status", style="yellow")
|
|
111
|
+
table.add_column("Reason/Error")
|
|
112
|
+
|
|
113
|
+
for file_path, score, status, reason in results:
|
|
114
|
+
table.add_row(
|
|
115
|
+
file_path,
|
|
116
|
+
str(score) if score is not None else "N/A",
|
|
117
|
+
status,
|
|
118
|
+
reason
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
console.print(table)
|
|
122
|
+
|
|
123
|
+
def verify_single_file(file: TargetFile):
|
|
124
|
+
for source in self.sources:
|
|
125
|
+
if source.module_name == file.file_path:
|
|
126
|
+
file_content = source.source_code
|
|
127
|
+
try:
|
|
128
|
+
result = self.index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
|
|
129
|
+
file_content=file_content,
|
|
130
|
+
query=self.args.query
|
|
131
|
+
)
|
|
132
|
+
if result.relevant_score >= self.args.verify_file_relevance_score:
|
|
133
|
+
verified_files[file.file_path] = TargetFile(
|
|
134
|
+
file_path=file.file_path,
|
|
135
|
+
reason=f"Score:{result.relevant_score}, {result.reason}"
|
|
136
|
+
)
|
|
137
|
+
return file.file_path, result.relevant_score, "PASS", result.reason
|
|
138
|
+
else:
|
|
139
|
+
return file.file_path, result.relevant_score, "FAIL", result.reason
|
|
140
|
+
except Exception as e:
|
|
141
|
+
error_msg = str(e)
|
|
142
|
+
verified_files[file.file_path] = TargetFile(
|
|
143
|
+
file_path=file.file_path,
|
|
144
|
+
reason=f"Verification failed: {error_msg}"
|
|
145
|
+
)
|
|
146
|
+
return file.file_path, None, "ERROR", error_msg
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
with ThreadPoolExecutor(max_workers=self.args.index_filter_workers) as executor:
|
|
150
|
+
futures = [executor.submit(verify_single_file, file)
|
|
151
|
+
for file in temp_files]
|
|
152
|
+
for future in as_completed(futures):
|
|
153
|
+
result = future.result()
|
|
154
|
+
if result:
|
|
155
|
+
verification_results.append(result)
|
|
156
|
+
time.sleep(self.args.anti_quota_limit)
|
|
157
|
+
|
|
158
|
+
# Print verification results in a table
|
|
159
|
+
print_verification_results(verification_results)
|
|
160
|
+
|
|
161
|
+
self.stats["verified_files"] = len(verified_files)
|
|
162
|
+
phase_end = time.monotonic()
|
|
163
|
+
self.stats["timings"]["normal_filter"]["relevance_verification"] = phase_end - phase_start
|
|
164
|
+
|
|
165
|
+
# Keep all files, not just verified ones
|
|
166
|
+
final_files = verified_files
|
|
167
|
+
|
|
168
|
+
return final_files
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
from typing import List, Union,Dict,Any
|
|
2
|
+
from autocoder.index.types import IndexItem
|
|
3
|
+
from autocoder.common import AutoCoderArgs,SourceCode
|
|
4
|
+
import byzerllm
|
|
5
|
+
import time
|
|
6
|
+
from autocoder.index.index import IndexManager
|
|
7
|
+
from autocoder.index.types import (
|
|
8
|
+
IndexItem,
|
|
9
|
+
TargetFile,
|
|
10
|
+
FileNumberList
|
|
11
|
+
)
|
|
12
|
+
from autocoder.rag.token_counter import count_tokens
|
|
13
|
+
from loguru import logger
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_file_path(file_path):
|
|
17
|
+
if file_path.startswith("##"):
|
|
18
|
+
return file_path.strip()[2:]
|
|
19
|
+
return file_path
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class QuickFilter():
|
|
23
|
+
def __init__(self, index_manager: IndexManager,stats:Dict[str,Any],sources:List[SourceCode]):
|
|
24
|
+
self.index_manager = index_manager
|
|
25
|
+
self.args = index_manager.args
|
|
26
|
+
self.stats = stats
|
|
27
|
+
self.sources = sources
|
|
28
|
+
|
|
29
|
+
@byzerllm.prompt()
|
|
30
|
+
def quick_filter_files(self,file_meta_list:List[IndexItem],query:str) -> str:
|
|
31
|
+
'''
|
|
32
|
+
当用户提一个需求的时候,我们需要找到相关的文件,然后阅读这些文件,并且修改其中部分文件。
|
|
33
|
+
现在,给定下面的索引文件:
|
|
34
|
+
|
|
35
|
+
<index>
|
|
36
|
+
{{ content }}
|
|
37
|
+
</index>
|
|
38
|
+
|
|
39
|
+
索引文件包含文件序号(##[]括起来的部分),文件路径,文件符号信息等。
|
|
40
|
+
下面是用户的查询需求:
|
|
41
|
+
|
|
42
|
+
<query>
|
|
43
|
+
{{ query }}
|
|
44
|
+
</query>
|
|
45
|
+
|
|
46
|
+
请根据用户的需求,找到相关的文件,并给出文件序号列表。请返回如下json格式:
|
|
47
|
+
|
|
48
|
+
```json
|
|
49
|
+
{
|
|
50
|
+
"file_list": [
|
|
51
|
+
file_index1,
|
|
52
|
+
file_index2,
|
|
53
|
+
...
|
|
54
|
+
]
|
|
55
|
+
}
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
特别注意,如果用户的query里 @文件 或者 @@符号,那么被@的文件或者@@的符号必须要返回,并且查看他们依赖的文件是否相关。
|
|
59
|
+
'''
|
|
60
|
+
file_meta_str = "\n".join([f"##[{index}]{item.module_name}\n{item.symbols}" for index,item in enumerate(file_meta_list)])
|
|
61
|
+
context = {
|
|
62
|
+
"content": file_meta_str,
|
|
63
|
+
"query": query
|
|
64
|
+
}
|
|
65
|
+
return context
|
|
66
|
+
|
|
67
|
+
def filter(self, index_items: List[IndexItem], query: str) -> Dict[str, TargetFile]:
|
|
68
|
+
final_files: Dict[str, TargetFile] = {}
|
|
69
|
+
if not self.args.skip_filter_index and self.args.index_filter_model:
|
|
70
|
+
start_time = time.monotonic()
|
|
71
|
+
index_items = self.index_manager.read_index()
|
|
72
|
+
|
|
73
|
+
prompt_str = self.quick_filter_files.prompt(index_items,query)
|
|
74
|
+
|
|
75
|
+
print(prompt_str)
|
|
76
|
+
|
|
77
|
+
tokens_len = count_tokens(prompt_str)
|
|
78
|
+
|
|
79
|
+
if tokens_len > 55*1024:
|
|
80
|
+
logger.warning(f"Quick filter prompt is too long, tokens_len: {tokens_len}/{55*1024} fallback to normal filter")
|
|
81
|
+
return final_files
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
file_number_list = self.quick_filter_files.with_llm(
|
|
85
|
+
self.index_manager.index_filter_llm).with_return_type(FileNumberList).run(index_items, self.args.query)
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.error(f"Quick filter failed, error: {str(e)} fallback to normal filter")
|
|
88
|
+
return final_files
|
|
89
|
+
|
|
90
|
+
if file_number_list:
|
|
91
|
+
for file_number in file_number_list.file_list:
|
|
92
|
+
final_files[get_file_path(index_items[file_number].module_name)] = TargetFile(
|
|
93
|
+
file_path=index_items[file_number].module_name,
|
|
94
|
+
reason="Quick Filter"
|
|
95
|
+
)
|
|
96
|
+
end_time = time.monotonic()
|
|
97
|
+
self.stats["timings"]["quick_filter"] = end_time - start_time
|
|
98
|
+
return final_files
|
autocoder/index/index.py
CHANGED
|
@@ -6,37 +6,22 @@ from datetime import datetime
|
|
|
6
6
|
from autocoder.common import SourceCode, AutoCoderArgs
|
|
7
7
|
from autocoder.index.symbols_utils import (
|
|
8
8
|
extract_symbols,
|
|
9
|
-
SymbolsInfo,
|
|
10
9
|
SymbolType,
|
|
11
10
|
symbols_info_to_str,
|
|
12
11
|
)
|
|
13
12
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
14
13
|
import threading
|
|
15
14
|
|
|
16
|
-
import pydantic
|
|
17
15
|
import byzerllm
|
|
18
16
|
import hashlib
|
|
19
|
-
import textwrap
|
|
20
|
-
import tabulate
|
|
21
|
-
from rich.console import Console
|
|
22
|
-
from rich.table import Table
|
|
23
|
-
from rich.panel import Panel
|
|
24
|
-
from rich.text import Text
|
|
25
17
|
|
|
26
18
|
from loguru import logger
|
|
27
|
-
from autocoder.utils.queue_communicate import (
|
|
28
|
-
queue_communicate,
|
|
29
|
-
CommunicateEvent,
|
|
30
|
-
CommunicateEventType,
|
|
31
|
-
)
|
|
32
19
|
from autocoder.index.types import (
|
|
33
20
|
IndexItem,
|
|
34
21
|
TargetFile,
|
|
35
|
-
VerifyFileRelevance,
|
|
36
22
|
FileList,
|
|
37
23
|
)
|
|
38
24
|
|
|
39
|
-
|
|
40
25
|
class IndexManager:
|
|
41
26
|
def __init__(
|
|
42
27
|
self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
|
|
@@ -53,6 +38,11 @@ class IndexManager:
|
|
|
53
38
|
else:
|
|
54
39
|
self.index_llm = llm
|
|
55
40
|
|
|
41
|
+
if llm and (s := llm.get_sub_client("index_filter_model")):
|
|
42
|
+
self.index_filter_llm = s
|
|
43
|
+
else:
|
|
44
|
+
self.index_filter_llm = llm
|
|
45
|
+
|
|
56
46
|
self.llm = llm
|
|
57
47
|
self.args = args
|
|
58
48
|
self.max_input_length = (
|
|
@@ -63,6 +53,7 @@ class IndexManager:
|
|
|
63
53
|
if not os.path.exists(self.index_dir):
|
|
64
54
|
os.makedirs(self.index_dir)
|
|
65
55
|
|
|
56
|
+
|
|
66
57
|
@byzerllm.prompt()
|
|
67
58
|
def verify_file_relevance(self, file_content: str, query: str) -> str:
|
|
68
59
|
"""
|
|
@@ -565,381 +556,3 @@ class IndexManager:
|
|
|
565
556
|
请确保结果的准确性和完整性,包括所有可能相关的文件。
|
|
566
557
|
"""
|
|
567
558
|
|
|
568
|
-
|
|
569
|
-
def build_index_and_filter_files(
|
|
570
|
-
llm, args: AutoCoderArgs, sources: List[SourceCode]
|
|
571
|
-
) -> str:
|
|
572
|
-
# Initialize timing and statistics
|
|
573
|
-
total_start_time = time.monotonic()
|
|
574
|
-
stats = {
|
|
575
|
-
"total_files": len(sources),
|
|
576
|
-
"indexed_files": 0,
|
|
577
|
-
"level1_filtered": 0,
|
|
578
|
-
"level2_filtered": 0,
|
|
579
|
-
"verified_files": 0,
|
|
580
|
-
"final_files": 0,
|
|
581
|
-
"timings": {
|
|
582
|
-
"process_tagged_sources": 0.0,
|
|
583
|
-
"build_index": 0.0,
|
|
584
|
-
"level1_filter": 0.0,
|
|
585
|
-
"level2_filter": 0.0,
|
|
586
|
-
"relevance_verification": 0.0,
|
|
587
|
-
"file_selection": 0.0,
|
|
588
|
-
"prepare_output": 0.0,
|
|
589
|
-
"total": 0.0
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
|
|
593
|
-
def get_file_path(file_path):
|
|
594
|
-
if file_path.startswith("##"):
|
|
595
|
-
return file_path.strip()[2:]
|
|
596
|
-
return file_path
|
|
597
|
-
|
|
598
|
-
final_files: Dict[str, TargetFile] = {}
|
|
599
|
-
|
|
600
|
-
# Phase 1: Process REST/RAG/Search sources
|
|
601
|
-
logger.info("Phase 1: Processing REST/RAG/Search sources...")
|
|
602
|
-
phase_start = time.monotonic()
|
|
603
|
-
for source in sources:
|
|
604
|
-
if source.tag in ["REST", "RAG", "SEARCH"]:
|
|
605
|
-
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
606
|
-
file_path=source.module_name, reason="Rest/Rag/Search"
|
|
607
|
-
)
|
|
608
|
-
phase_end = time.monotonic()
|
|
609
|
-
stats["timings"]["process_tagged_sources"] = phase_end - phase_start
|
|
610
|
-
|
|
611
|
-
if not args.skip_build_index and llm:
|
|
612
|
-
# Phase 2: Build index
|
|
613
|
-
if args.request_id and not args.skip_events:
|
|
614
|
-
queue_communicate.send_event(
|
|
615
|
-
request_id=args.request_id,
|
|
616
|
-
event=CommunicateEvent(
|
|
617
|
-
event_type=CommunicateEventType.CODE_INDEX_BUILD_START.value,
|
|
618
|
-
data=json.dumps({"total_files": len(sources)})
|
|
619
|
-
)
|
|
620
|
-
)
|
|
621
|
-
|
|
622
|
-
logger.info("Phase 2: Building index for all files...")
|
|
623
|
-
phase_start = time.monotonic()
|
|
624
|
-
index_manager = IndexManager(llm=llm, sources=sources, args=args)
|
|
625
|
-
index_data = index_manager.build_index()
|
|
626
|
-
stats["indexed_files"] = len(index_data) if index_data else 0
|
|
627
|
-
phase_end = time.monotonic()
|
|
628
|
-
stats["timings"]["build_index"] = phase_end - phase_start
|
|
629
|
-
|
|
630
|
-
if args.request_id and not args.skip_events:
|
|
631
|
-
queue_communicate.send_event(
|
|
632
|
-
request_id=args.request_id,
|
|
633
|
-
event=CommunicateEvent(
|
|
634
|
-
event_type=CommunicateEventType.CODE_INDEX_BUILD_END.value,
|
|
635
|
-
data=json.dumps({
|
|
636
|
-
"indexed_files": stats["indexed_files"],
|
|
637
|
-
"build_index_time": stats["timings"]["build_index"],
|
|
638
|
-
})
|
|
639
|
-
)
|
|
640
|
-
)
|
|
641
|
-
|
|
642
|
-
if not args.skip_filter_index:
|
|
643
|
-
if args.request_id and not args.skip_events:
|
|
644
|
-
queue_communicate.send_event(
|
|
645
|
-
request_id=args.request_id,
|
|
646
|
-
event=CommunicateEvent(
|
|
647
|
-
event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
|
|
648
|
-
data=json.dumps({})
|
|
649
|
-
)
|
|
650
|
-
)
|
|
651
|
-
# Phase 3: Level 1 filtering - Query-based
|
|
652
|
-
logger.info(
|
|
653
|
-
"Phase 3: Performing Level 1 filtering (query-based)...")
|
|
654
|
-
|
|
655
|
-
phase_start = time.monotonic()
|
|
656
|
-
target_files = index_manager.get_target_files_by_query(args.query)
|
|
657
|
-
|
|
658
|
-
if target_files:
|
|
659
|
-
for file in target_files.file_list:
|
|
660
|
-
file_path = file.file_path.strip()
|
|
661
|
-
final_files[get_file_path(file_path)] = file
|
|
662
|
-
stats["level1_filtered"] = len(target_files.file_list)
|
|
663
|
-
phase_end = time.monotonic()
|
|
664
|
-
stats["timings"]["level1_filter"] = phase_end - phase_start
|
|
665
|
-
|
|
666
|
-
# Phase 4: Level 2 filtering - Related files
|
|
667
|
-
if target_files is not None and args.index_filter_level >= 2:
|
|
668
|
-
logger.info(
|
|
669
|
-
"Phase 4: Performing Level 2 filtering (related files)...")
|
|
670
|
-
if args.request_id and not args.skip_events:
|
|
671
|
-
queue_communicate.send_event(
|
|
672
|
-
request_id=args.request_id,
|
|
673
|
-
event=CommunicateEvent(
|
|
674
|
-
event_type=CommunicateEventType.CODE_INDEX_FILTER_START.value,
|
|
675
|
-
data=json.dumps({})
|
|
676
|
-
)
|
|
677
|
-
)
|
|
678
|
-
phase_start = time.monotonic()
|
|
679
|
-
related_files = index_manager.get_related_files(
|
|
680
|
-
[file.file_path for file in target_files.file_list]
|
|
681
|
-
)
|
|
682
|
-
if related_files is not None:
|
|
683
|
-
for file in related_files.file_list:
|
|
684
|
-
file_path = file.file_path.strip()
|
|
685
|
-
final_files[get_file_path(file_path)] = file
|
|
686
|
-
stats["level2_filtered"] = len(related_files.file_list)
|
|
687
|
-
phase_end = time.monotonic()
|
|
688
|
-
stats["timings"]["level2_filter"] = phase_end - phase_start
|
|
689
|
-
|
|
690
|
-
if not final_files:
|
|
691
|
-
logger.warning("No related files found, using all files")
|
|
692
|
-
for source in sources:
|
|
693
|
-
final_files[get_file_path(source.module_name)] = TargetFile(
|
|
694
|
-
file_path=source.module_name,
|
|
695
|
-
reason="No related files found, use all files",
|
|
696
|
-
)
|
|
697
|
-
|
|
698
|
-
# Phase 5: Relevance verification
|
|
699
|
-
logger.info("Phase 5: Performing relevance verification...")
|
|
700
|
-
if args.index_filter_enable_relevance_verification:
|
|
701
|
-
phase_start = time.monotonic()
|
|
702
|
-
verified_files = {}
|
|
703
|
-
temp_files = list(final_files.values())
|
|
704
|
-
verification_results = []
|
|
705
|
-
|
|
706
|
-
def print_verification_results(results):
|
|
707
|
-
from rich.table import Table
|
|
708
|
-
from rich.console import Console
|
|
709
|
-
|
|
710
|
-
console = Console()
|
|
711
|
-
table = Table(title="File Relevance Verification Results", show_header=True, header_style="bold magenta")
|
|
712
|
-
table.add_column("File Path", style="cyan", no_wrap=True)
|
|
713
|
-
table.add_column("Score", justify="right", style="green")
|
|
714
|
-
table.add_column("Status", style="yellow")
|
|
715
|
-
table.add_column("Reason/Error")
|
|
716
|
-
|
|
717
|
-
for file_path, score, status, reason in results:
|
|
718
|
-
table.add_row(
|
|
719
|
-
file_path,
|
|
720
|
-
str(score) if score is not None else "N/A",
|
|
721
|
-
status,
|
|
722
|
-
reason
|
|
723
|
-
)
|
|
724
|
-
|
|
725
|
-
console.print(table)
|
|
726
|
-
|
|
727
|
-
def verify_single_file(file: TargetFile):
|
|
728
|
-
for source in sources:
|
|
729
|
-
if source.module_name == file.file_path:
|
|
730
|
-
file_content = source.source_code
|
|
731
|
-
try:
|
|
732
|
-
result = index_manager.verify_file_relevance.with_llm(llm).with_return_type(VerifyFileRelevance).run(
|
|
733
|
-
file_content=file_content,
|
|
734
|
-
query=args.query
|
|
735
|
-
)
|
|
736
|
-
if result.relevant_score >= args.verify_file_relevance_score:
|
|
737
|
-
verified_files[file.file_path] = TargetFile(
|
|
738
|
-
file_path=file.file_path,
|
|
739
|
-
reason=f"Score:{result.relevant_score}, {result.reason}"
|
|
740
|
-
)
|
|
741
|
-
return file.file_path, result.relevant_score, "PASS", result.reason
|
|
742
|
-
else:
|
|
743
|
-
return file.file_path, result.relevant_score, "FAIL", result.reason
|
|
744
|
-
except Exception as e:
|
|
745
|
-
error_msg = str(e)
|
|
746
|
-
verified_files[file.file_path] = TargetFile(
|
|
747
|
-
file_path=file.file_path,
|
|
748
|
-
reason=f"Verification failed: {error_msg}"
|
|
749
|
-
)
|
|
750
|
-
return file.file_path, None, "ERROR", error_msg
|
|
751
|
-
return None
|
|
752
|
-
|
|
753
|
-
with ThreadPoolExecutor(max_workers=args.index_filter_workers) as executor:
|
|
754
|
-
futures = [executor.submit(verify_single_file, file)
|
|
755
|
-
for file in temp_files]
|
|
756
|
-
for future in as_completed(futures):
|
|
757
|
-
result = future.result()
|
|
758
|
-
if result:
|
|
759
|
-
verification_results.append(result)
|
|
760
|
-
time.sleep(args.anti_quota_limit)
|
|
761
|
-
|
|
762
|
-
# Print verification results in a table
|
|
763
|
-
print_verification_results(verification_results)
|
|
764
|
-
|
|
765
|
-
stats["verified_files"] = len(verified_files)
|
|
766
|
-
phase_end = time.monotonic()
|
|
767
|
-
stats["timings"]["relevance_verification"] = phase_end - phase_start
|
|
768
|
-
|
|
769
|
-
# Keep all files, not just verified ones
|
|
770
|
-
final_files = verified_files
|
|
771
|
-
|
|
772
|
-
def display_table_and_get_selections(data):
|
|
773
|
-
from prompt_toolkit.shortcuts import checkboxlist_dialog
|
|
774
|
-
from prompt_toolkit.styles import Style
|
|
775
|
-
|
|
776
|
-
choices = [(file, f"{file} - {reason}") for file, reason in data]
|
|
777
|
-
selected_files = [file for file, _ in choices]
|
|
778
|
-
|
|
779
|
-
style = Style.from_dict(
|
|
780
|
-
{
|
|
781
|
-
"dialog": "bg:#88ff88",
|
|
782
|
-
"dialog frame.label": "bg:#ffffff #000000",
|
|
783
|
-
"dialog.body": "bg:#88ff88 #000000",
|
|
784
|
-
"dialog shadow": "bg:#00aa00",
|
|
785
|
-
}
|
|
786
|
-
)
|
|
787
|
-
|
|
788
|
-
result = checkboxlist_dialog(
|
|
789
|
-
title="Target Files",
|
|
790
|
-
text="Tab to switch between buttons, and Space/Enter to select/deselect.",
|
|
791
|
-
values=choices,
|
|
792
|
-
style=style,
|
|
793
|
-
default_values=selected_files,
|
|
794
|
-
).run()
|
|
795
|
-
|
|
796
|
-
return [file for file in result] if result else []
|
|
797
|
-
|
|
798
|
-
def print_selected(data):
|
|
799
|
-
console = Console()
|
|
800
|
-
|
|
801
|
-
table = Table(
|
|
802
|
-
title="Files Used as Context",
|
|
803
|
-
show_header=True,
|
|
804
|
-
header_style="bold magenta",
|
|
805
|
-
)
|
|
806
|
-
table.add_column("File Path", style="cyan", no_wrap=True)
|
|
807
|
-
table.add_column("Reason", style="green")
|
|
808
|
-
|
|
809
|
-
for file, reason in data:
|
|
810
|
-
table.add_row(file, reason)
|
|
811
|
-
|
|
812
|
-
panel = Panel(
|
|
813
|
-
table,
|
|
814
|
-
expand=False,
|
|
815
|
-
border_style="bold blue",
|
|
816
|
-
padding=(1, 1),
|
|
817
|
-
)
|
|
818
|
-
|
|
819
|
-
console.print(panel)
|
|
820
|
-
|
|
821
|
-
# Phase 6: File selection and limitation
|
|
822
|
-
logger.info("Phase 6: Processing file selection and limits...")
|
|
823
|
-
phase_start = time.monotonic()
|
|
824
|
-
|
|
825
|
-
if args.index_filter_file_num > 0:
|
|
826
|
-
logger.info(
|
|
827
|
-
f"Limiting files from {len(final_files)} to {args.index_filter_file_num}")
|
|
828
|
-
|
|
829
|
-
if args.skip_confirm:
|
|
830
|
-
final_filenames = [file.file_path for file in final_files.values()]
|
|
831
|
-
if args.index_filter_file_num > 0:
|
|
832
|
-
final_filenames = final_filenames[: args.index_filter_file_num]
|
|
833
|
-
else:
|
|
834
|
-
target_files_data = [
|
|
835
|
-
(file.file_path, file.reason) for file in final_files.values()
|
|
836
|
-
]
|
|
837
|
-
if not target_files_data:
|
|
838
|
-
logger.warning(
|
|
839
|
-
"No target files found, you may need to rewrite the query and try again."
|
|
840
|
-
)
|
|
841
|
-
final_filenames = []
|
|
842
|
-
else:
|
|
843
|
-
final_filenames = display_table_and_get_selections(
|
|
844
|
-
target_files_data)
|
|
845
|
-
|
|
846
|
-
if args.index_filter_file_num > 0:
|
|
847
|
-
final_filenames = final_filenames[: args.index_filter_file_num]
|
|
848
|
-
|
|
849
|
-
phase_end = time.monotonic()
|
|
850
|
-
stats["timings"]["file_selection"] = phase_end - phase_start
|
|
851
|
-
|
|
852
|
-
# Phase 7: Display results and prepare output
|
|
853
|
-
logger.info("Phase 7: Preparing final output...")
|
|
854
|
-
phase_start = time.monotonic()
|
|
855
|
-
try:
|
|
856
|
-
print_selected(
|
|
857
|
-
[
|
|
858
|
-
(file.file_path, file.reason)
|
|
859
|
-
for file in final_files.values()
|
|
860
|
-
if file.file_path in final_filenames
|
|
861
|
-
]
|
|
862
|
-
)
|
|
863
|
-
except Exception as e:
|
|
864
|
-
logger.warning(
|
|
865
|
-
"Failed to display selected files in terminal mode. Falling back to simple print."
|
|
866
|
-
)
|
|
867
|
-
print("Target Files Selected:")
|
|
868
|
-
for file in final_filenames:
|
|
869
|
-
print(f"{file} - {final_files[file].reason}")
|
|
870
|
-
|
|
871
|
-
source_code = ""
|
|
872
|
-
depulicated_sources = set()
|
|
873
|
-
|
|
874
|
-
for file in sources:
|
|
875
|
-
if file.module_name in final_filenames:
|
|
876
|
-
if file.module_name in depulicated_sources:
|
|
877
|
-
continue
|
|
878
|
-
depulicated_sources.add(file.module_name)
|
|
879
|
-
source_code += f"##File: {file.module_name}\n"
|
|
880
|
-
source_code += f"{file.source_code}\n\n"
|
|
881
|
-
|
|
882
|
-
if args.request_id and not args.skip_events:
|
|
883
|
-
queue_communicate.send_event(
|
|
884
|
-
request_id=args.request_id,
|
|
885
|
-
event=CommunicateEvent(
|
|
886
|
-
event_type=CommunicateEventType.CODE_INDEX_FILTER_FILE_SELECTED.value,
|
|
887
|
-
data=json.dumps([
|
|
888
|
-
(file.file_path, file.reason)
|
|
889
|
-
for file in final_files.values()
|
|
890
|
-
if file.file_path in depulicated_sources
|
|
891
|
-
])
|
|
892
|
-
)
|
|
893
|
-
)
|
|
894
|
-
|
|
895
|
-
stats["final_files"] = len(depulicated_sources)
|
|
896
|
-
phase_end = time.monotonic()
|
|
897
|
-
stats["timings"]["prepare_output"] = phase_end - phase_start
|
|
898
|
-
|
|
899
|
-
# Calculate total time and print summary
|
|
900
|
-
total_end_time = time.monotonic()
|
|
901
|
-
total_time = total_end_time - total_start_time
|
|
902
|
-
stats["timings"]["total"] = total_time
|
|
903
|
-
|
|
904
|
-
# Calculate total filter time
|
|
905
|
-
total_filter_time = (
|
|
906
|
-
stats["timings"]["level1_filter"] +
|
|
907
|
-
stats["timings"]["level2_filter"] +
|
|
908
|
-
stats["timings"]["relevance_verification"]
|
|
909
|
-
)
|
|
910
|
-
|
|
911
|
-
# Print final statistics in a more structured way
|
|
912
|
-
summary = f"""
|
|
913
|
-
=== Indexing and Filtering Summary ===
|
|
914
|
-
• Total files scanned: {stats['total_files']}
|
|
915
|
-
• Files indexed: {stats['indexed_files']}
|
|
916
|
-
• Files filtered:
|
|
917
|
-
- Level 1 (query-based): {stats['level1_filtered']}
|
|
918
|
-
- Level 2 (related files): {stats['level2_filtered']}
|
|
919
|
-
- Relevance verified: {stats.get('verified_files', 0)}
|
|
920
|
-
• Final files selected: {stats['final_files']}
|
|
921
|
-
|
|
922
|
-
=== Time Breakdown ===
|
|
923
|
-
• Index build: {stats['timings'].get('build_index', 0):.2f}s
|
|
924
|
-
• Level 1 filter: {stats['timings'].get('level1_filter', 0):.2f}s
|
|
925
|
-
• Level 2 filter: {stats['timings'].get('level2_filter', 0):.2f}s
|
|
926
|
-
• Relevance check: {stats['timings'].get('relevance_verification', 0):.2f}s
|
|
927
|
-
• File selection: {stats['timings'].get('file_selection', 0):.2f}s
|
|
928
|
-
• Total time: {total_time:.2f}s
|
|
929
|
-
====================================
|
|
930
|
-
"""
|
|
931
|
-
logger.info(summary)
|
|
932
|
-
|
|
933
|
-
if args.request_id and not args.skip_events:
|
|
934
|
-
queue_communicate.send_event(
|
|
935
|
-
request_id=args.request_id,
|
|
936
|
-
event=CommunicateEvent(
|
|
937
|
-
event_type=CommunicateEventType.CODE_INDEX_FILTER_END.value,
|
|
938
|
-
data=json.dumps({
|
|
939
|
-
"filtered_files": stats["final_files"],
|
|
940
|
-
"filter_time": total_filter_time
|
|
941
|
-
})
|
|
942
|
-
)
|
|
943
|
-
)
|
|
944
|
-
|
|
945
|
-
return source_code
|
autocoder/index/types.py
CHANGED
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.233"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|