auto-coder 0.1.267__py3-none-any.whl → 0.1.268__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.267.dist-info → auto_coder-0.1.268.dist-info}/METADATA +1 -1
- {auto_coder-0.1.267.dist-info → auto_coder-0.1.268.dist-info}/RECORD +12 -12
- autocoder/chat_auto_coder.py +2 -2
- autocoder/commands/auto_command.py +11 -21
- autocoder/common/auto_coder_lang.py +5 -1
- autocoder/common/context_pruner.py +13 -3
- autocoder/index/index.py +97 -38
- autocoder/version.py +1 -1
- {auto_coder-0.1.267.dist-info → auto_coder-0.1.268.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.267.dist-info → auto_coder-0.1.268.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.267.dist-info → auto_coder-0.1.268.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.267.dist-info → auto_coder-0.1.268.dist-info}/top_level.txt +0 -0
|
@@ -6,12 +6,12 @@ autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeat
|
|
|
6
6
|
autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
|
|
7
7
|
autocoder/auto_coder_server.py,sha256=6YQweNEKUrGAZ3yPvw8_qlNZJYLVSVUXGrn1K6udLts,20413
|
|
8
8
|
autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
|
|
9
|
-
autocoder/chat_auto_coder.py,sha256=
|
|
9
|
+
autocoder/chat_auto_coder.py,sha256=X6jlyvABAPNLAx2cnuV8O6XjKPqX6Kl7nv6iEuALm-4,115960
|
|
10
10
|
autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZcUkM,19572
|
|
11
11
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
12
12
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
13
13
|
autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
|
|
14
|
-
autocoder/version.py,sha256=
|
|
14
|
+
autocoder/version.py,sha256=5v4jQYuNlNWVifBwTlRwUJGC6N6-LuHjzLm1nfAkQQM,23
|
|
15
15
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
17
17
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -24,7 +24,7 @@ autocoder/agent/planner.py,sha256=SZTSZHxHzDmuWZo3K5fs79RwvJLWurg-nbJRRNbX65o,91
|
|
|
24
24
|
autocoder/agent/project_reader.py,sha256=tWLaPoLw1gI6kO_NzivQj28KbobU2ceOLuppHMbfGl8,18234
|
|
25
25
|
autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
26
|
autocoder/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
-
autocoder/commands/auto_command.py,sha256=
|
|
27
|
+
autocoder/commands/auto_command.py,sha256=G4SAmwKhoOxNS2JztUlXCyS3PWhXFhyLK_s22PtltwM,52894
|
|
28
28
|
autocoder/commands/tools.py,sha256=xDhGD1jRN67fGfeck33pM74TxXlAMeo49S3Q9K-VKco,20107
|
|
29
29
|
autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
|
|
30
30
|
autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
|
|
@@ -32,7 +32,7 @@ autocoder/common/__init__.py,sha256=eH5NyEsiCzJaNzsxI-Y8TJbB5kpFzuwBgbTrWz9Uthk,
|
|
|
32
32
|
autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
|
|
33
33
|
autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
|
|
34
34
|
autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
|
|
35
|
-
autocoder/common/auto_coder_lang.py,sha256=
|
|
35
|
+
autocoder/common/auto_coder_lang.py,sha256=74VM_RXNrUXEEyuIr6pXf5xbWQId3ZWhx9NDzQbGquw,29450
|
|
36
36
|
autocoder/common/auto_configure.py,sha256=y0fTplwUdtzVF91iOdZh8zl10MdYFjAquO_Kmj1BWqs,12300
|
|
37
37
|
autocoder/common/buildin_tokenizer.py,sha256=L7d5t39ZFvUd6EoMPXUhYK1toD0FHlRH1jtjKRGokWU,1236
|
|
38
38
|
autocoder/common/chunk_validation.py,sha256=BrR_ZWavW8IANuueEE7hS8NFAwEvm8TX34WnPx_1hs8,3030
|
|
@@ -53,7 +53,7 @@ autocoder/common/command_templates.py,sha256=BrmRwOJnyuMETY8v5AQw9D00UQb7ql00BiG
|
|
|
53
53
|
autocoder/common/conf_import_export.py,sha256=w__WsIobe6nmsGns2pV-laU7R5ZvtQNuIbXebxhbY7A,3967
|
|
54
54
|
autocoder/common/conf_validator.py,sha256=EzSmadpZ22D9e8iWmfeWodUeYJt0IgMoaAOmCleXliI,8795
|
|
55
55
|
autocoder/common/const.py,sha256=eTjhjh4Aj4CUzviJ81jaf3Y5cwqsLATySn2wJxaS6RQ,2911
|
|
56
|
-
autocoder/common/context_pruner.py,sha256=
|
|
56
|
+
autocoder/common/context_pruner.py,sha256=zLY7VllI3XG8icv8WtvDyV7GLOV1qnO3eBCY32ueH4U,12513
|
|
57
57
|
autocoder/common/conversation_pruner.py,sha256=mdMpTpTdPJl8f0UjC1TGKRiYtDc1o6QQD0nYPR9yp1c,5628
|
|
58
58
|
autocoder/common/files.py,sha256=2-9CJwOZtyWkk2TQM4gPSkpJ3_cwb-l_3sdsCd1H5GQ,3380
|
|
59
59
|
autocoder/common/git_utils.py,sha256=qeuF_IB3G3M72asHxWokROU3hINCuFA1nar-UtF9wIU,26022
|
|
@@ -95,7 +95,7 @@ autocoder/dispacher/actions/plugins/action_translate.py,sha256=GEn7dZA22jy5WyzIN
|
|
|
95
95
|
autocoder/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
96
96
|
autocoder/index/entry.py,sha256=WsJrOf586GAiHacFCdcXQx5MmGKQXIIeEqo5h46AHpE,13474
|
|
97
97
|
autocoder/index/for_command.py,sha256=BFvljE4t6VaMBGboZAuhUCzVK0EitCy_n5D_7FEnihw,3204
|
|
98
|
-
autocoder/index/index.py,sha256=
|
|
98
|
+
autocoder/index/index.py,sha256=1ZNw4s_Fq_ARqjnfU0Vmo1mvdzM6sjaLspjcfinTEh4,27350
|
|
99
99
|
autocoder/index/symbols_utils.py,sha256=_EP7E_qWXxluAxq3FGZLlLfdrfwx3FmxCdulI8VGuac,2244
|
|
100
100
|
autocoder/index/types.py,sha256=a2s_KV5FJlq7jqA2ELSo9E1sjuLwDB-JJYMhSpzBAhU,596
|
|
101
101
|
autocoder/index/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -161,9 +161,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
161
161
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
162
162
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
|
|
163
163
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
-
auto_coder-0.1.
|
|
165
|
-
auto_coder-0.1.
|
|
166
|
-
auto_coder-0.1.
|
|
167
|
-
auto_coder-0.1.
|
|
168
|
-
auto_coder-0.1.
|
|
169
|
-
auto_coder-0.1.
|
|
164
|
+
auto_coder-0.1.268.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
165
|
+
auto_coder-0.1.268.dist-info/METADATA,sha256=0SYv8MrsdLqjdHnrRix68X6NoyysothptSo8AfVHghE,2616
|
|
166
|
+
auto_coder-0.1.268.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
167
|
+
auto_coder-0.1.268.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
168
|
+
auto_coder-0.1.268.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
169
|
+
auto_coder-0.1.268.dist-info/RECORD,,
|
autocoder/chat_auto_coder.py
CHANGED
|
@@ -1507,7 +1507,7 @@ def coding(query: str):
|
|
|
1507
1507
|
"silence": conf.get("silence", "true") == "true",
|
|
1508
1508
|
"include_project_structure": conf.get("include_project_structure", "true")
|
|
1509
1509
|
== "true",
|
|
1510
|
-
"exclude_files":
|
|
1510
|
+
"exclude_files": memory.get("exclude_files", []),
|
|
1511
1511
|
}
|
|
1512
1512
|
|
|
1513
1513
|
yaml_config["context"] = ""
|
|
@@ -1636,7 +1636,7 @@ def chat(query: str):
|
|
|
1636
1636
|
"skip_build_index": conf.get("skip_build_index", "true") == "true",
|
|
1637
1637
|
"skip_confirm": conf.get("skip_confirm", "true") == "true",
|
|
1638
1638
|
"silence": conf.get("silence", "true") == "true",
|
|
1639
|
-
"exclude_files":
|
|
1639
|
+
"exclude_files": memory.get("exclude_files", []),
|
|
1640
1640
|
}
|
|
1641
1641
|
|
|
1642
1642
|
current_files = memory["current_files"]["files"] + get_llm_friendly_package_docs(
|
|
@@ -213,7 +213,11 @@ class CommandAutoTuner:
|
|
|
213
213
|
通过 get_project_structure 来获取项目结构,然后通过 get_project_map 来获取你想看的某个文件的用途,符号列表,最后再通过 read_files/read_file_with_keyword_ranges 函数来读取文件内容,确认对应的功能是否在相关的文件里。
|
|
214
214
|
5. 调用 coding 函数的时候,尽可能多的 @文件和@@符号,让需求更加清晰明了,建议多描述具体怎么完成对应的需求。
|
|
215
215
|
6. 对于代码需求设计,尽可能使用 chat 函数。
|
|
216
|
-
7. 如果成功执行了 coding 函数,最好再调用一次 chat("/review /commit")
|
|
216
|
+
7. 如果成功执行了 coding 函数,最好再调用一次 chat("/review /commit")
|
|
217
|
+
8. 我们所有的对话不能超过 {{ conversation_safe_zone_tokens }} 个tokens,当你读取索引文件 (get_project_map) 的时候,你可以看到
|
|
218
|
+
每个文件的tokens数,你可以根据这个信息来决定如何读取这个文件。比如对于很小的文件,那么可以直接全部读取,
|
|
219
|
+
而对于分析一个超大文件推荐组合 read_files 带上 line_ranges 参数来读取,或者组合 read_file_withread_file_with_keyword_ranges 等来读取,
|
|
220
|
+
每个函数你还可以使用多次来获取更多信息。
|
|
217
221
|
</function_combination_readme>
|
|
218
222
|
|
|
219
223
|
|
|
@@ -262,7 +266,8 @@ class CommandAutoTuner:
|
|
|
262
266
|
"current_conf": json.dumps(self.memory_config.memory["conf"], indent=2),
|
|
263
267
|
"env_info": env_info,
|
|
264
268
|
"shell_type": shells.get_terminal_name(),
|
|
265
|
-
"shell_encoding": shells.get_terminal_encoding()
|
|
269
|
+
"shell_encoding": shells.get_terminal_encoding(),
|
|
270
|
+
"conversation_safe_zone_tokens": self.args.conversation_prune_safe_zone_tokens
|
|
266
271
|
}
|
|
267
272
|
|
|
268
273
|
@byzerllm.prompt()
|
|
@@ -274,7 +279,7 @@ class CommandAutoTuner:
|
|
|
274
279
|
|
|
275
280
|
<function_result>
|
|
276
281
|
{{ result }}
|
|
277
|
-
</function_result>
|
|
282
|
+
</function_result>
|
|
278
283
|
|
|
279
284
|
请根据命令执行结果以及前面的对话,返回下一个函数。
|
|
280
285
|
|
|
@@ -283,7 +288,8 @@ class CommandAutoTuner:
|
|
|
283
288
|
2. 你最多尝试 {{ auto_command_max_iterations }} 次,如果 {{ auto_command_max_iterations }} 次都没有满足要求,则不要返回任何函数,确保 suggestions 为空。
|
|
284
289
|
'''
|
|
285
290
|
return {
|
|
286
|
-
"auto_command_max_iterations": self.args.auto_command_max_iterations
|
|
291
|
+
"auto_command_max_iterations": self.args.auto_command_max_iterations,
|
|
292
|
+
"conversation_safe_zone_tokens": self.args.conversation_prune_safe_zone_tokens
|
|
287
293
|
}
|
|
288
294
|
|
|
289
295
|
def analyze(self, request: AutoCommandRequest) -> AutoCommandResponse:
|
|
@@ -985,23 +991,7 @@ class CommandAutoTuner:
|
|
|
985
991
|
感兴趣,可以配合 read_files 函数来读取文件内容,从而帮你做更好的决策
|
|
986
992
|
|
|
987
993
|
</usage>
|
|
988
|
-
</command>
|
|
989
|
-
|
|
990
|
-
<command>
|
|
991
|
-
<name>get_related_files</name>
|
|
992
|
-
<description>根据类名、函数名或文件用途描述,返回项目中相关的文件。</description>
|
|
993
|
-
<usage>
|
|
994
|
-
该命令接受一个参数 query,为要查询的符号或描述字符串。
|
|
995
|
-
|
|
996
|
-
使用例子:
|
|
997
|
-
|
|
998
|
-
get_related_files(query="用户登录功能")
|
|
999
|
-
|
|
1000
|
-
注意:
|
|
1001
|
-
- 返回值为逗号分隔的文件路径列表
|
|
1002
|
-
- 只能返回已被索引的文件
|
|
1003
|
-
</usage>
|
|
1004
|
-
</command>
|
|
994
|
+
</command>
|
|
1005
995
|
|
|
1006
996
|
<command>
|
|
1007
997
|
<name>get_project_map</name>
|
|
@@ -3,6 +3,7 @@ from byzerllm.utils import format_str_jinja2
|
|
|
3
3
|
|
|
4
4
|
MESSAGES = {
|
|
5
5
|
"en": {
|
|
6
|
+
"file_scored_message": "File scored: {{file_path}} - Score: {{score}}",
|
|
6
7
|
"invalid_file_pattern": "Invalid file pattern: {{file_pattern}}. e.g. regex://.*/package-lock\\.json",
|
|
7
8
|
"config_validation_error": "Config validation error: {{error}}",
|
|
8
9
|
"invalid_boolean_value": "Value '{{value}}' is not a valid boolean(true/false)",
|
|
@@ -159,9 +160,11 @@ MESSAGES = {
|
|
|
159
160
|
"index_export_success": "Index exported successfully: {{path}}",
|
|
160
161
|
"index_import_success": "Index imported successfully: {{path}}",
|
|
161
162
|
"edits_title": "edits",
|
|
162
|
-
"diff_blocks_title":"diff blocks"
|
|
163
|
+
"diff_blocks_title":"diff blocks",
|
|
164
|
+
"index_exclude_files_error": "index filter exclude files fail: {{ error }}"
|
|
163
165
|
},
|
|
164
166
|
"zh": {
|
|
167
|
+
"file_scored_message": "文件评分: {{file_path}} - 分数: {{score}}",
|
|
165
168
|
"invalid_file_pattern": "无效的文件模式: {{file_pattern}}. 例如: regex://.*/package-lock\\.json",
|
|
166
169
|
"conf_not_found": "未找到配置文件: {{path}}",
|
|
167
170
|
"conf_import_success": "成功导入配置: {{path}}",
|
|
@@ -317,6 +320,7 @@ MESSAGES = {
|
|
|
317
320
|
"index_import_success": "索引导入成功: {{path}}",
|
|
318
321
|
"edits_title": "编辑块",
|
|
319
322
|
"diff_blocks_title": "差异块",
|
|
323
|
+
"index_exclude_files_error": "索引排除文件时出错: {{error}}"
|
|
320
324
|
}}
|
|
321
325
|
|
|
322
326
|
|
|
@@ -9,11 +9,15 @@ from autocoder.index.types import VerifyFileRelevance
|
|
|
9
9
|
import byzerllm
|
|
10
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
11
11
|
|
|
12
|
+
from autocoder.common.printer import Printer
|
|
13
|
+
from autocoder.common.auto_coder_lang import get_message_with_format
|
|
14
|
+
|
|
12
15
|
class PruneContext:
|
|
13
16
|
def __init__(self, max_tokens: int, args: AutoCoderArgs, llm: Union[byzerllm.ByzerLLM, byzerllm.SimpleByzerLLM]):
|
|
14
17
|
self.max_tokens = max_tokens
|
|
15
18
|
self.args = args
|
|
16
19
|
self.llm = llm
|
|
20
|
+
self.printer = Printer()
|
|
17
21
|
|
|
18
22
|
def _delete_overflow_files(self, file_paths: List[str]) -> List[SourceCode]:
|
|
19
23
|
"""直接删除超出 token 限制的文件"""
|
|
@@ -201,7 +205,7 @@ class PruneContext:
|
|
|
201
205
|
total_tokens,sources = self._count_tokens(file_paths)
|
|
202
206
|
if total_tokens <= self.max_tokens:
|
|
203
207
|
return sources
|
|
204
|
-
|
|
208
|
+
# print(f"total_tokens: {total_tokens} {self.max_tokens}, 进行策略: {strategy}")
|
|
205
209
|
if strategy == "score":
|
|
206
210
|
return self._score_and_filter_files(file_paths, conversations)
|
|
207
211
|
if strategy == "delete":
|
|
@@ -281,9 +285,15 @@ class PruneContext:
|
|
|
281
285
|
with ThreadPoolExecutor() as executor:
|
|
282
286
|
futures = [executor.submit(_score_file, file_path) for file_path in file_paths]
|
|
283
287
|
for future in as_completed(futures):
|
|
284
|
-
result = future.result()
|
|
285
|
-
print(f"score file {result['file_path']} {result['score']}")
|
|
288
|
+
result = future.result()
|
|
286
289
|
if result:
|
|
290
|
+
self.printer.print_str_in_terminal(
|
|
291
|
+
get_message_with_format(
|
|
292
|
+
"file_scored_message",
|
|
293
|
+
file_path=result["file_path"],
|
|
294
|
+
score=result["score"]
|
|
295
|
+
)
|
|
296
|
+
)
|
|
287
297
|
scored_files.append(result)
|
|
288
298
|
|
|
289
299
|
# 第二步:按分数从高到低排序
|
autocoder/index/index.py
CHANGED
|
@@ -12,6 +12,7 @@ from autocoder.index.symbols_utils import (
|
|
|
12
12
|
from autocoder.privacy.model_filter import ModelPathFilter
|
|
13
13
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
14
14
|
import threading
|
|
15
|
+
import re
|
|
15
16
|
|
|
16
17
|
import byzerllm
|
|
17
18
|
import hashlib
|
|
@@ -27,6 +28,8 @@ from autocoder.index.types import (
|
|
|
27
28
|
from autocoder.common.global_cancel import global_cancel
|
|
28
29
|
from autocoder.utils.llms import get_llm_names
|
|
29
30
|
from autocoder.rag.token_counter import count_tokens
|
|
31
|
+
|
|
32
|
+
|
|
30
33
|
class IndexManager:
|
|
31
34
|
def __init__(
|
|
32
35
|
self, llm: byzerllm.ByzerLLM, sources: List[SourceCode], args: AutoCoderArgs
|
|
@@ -52,12 +55,14 @@ class IndexManager:
|
|
|
52
55
|
self.index_filter_llm = llm
|
|
53
56
|
|
|
54
57
|
self.llm = llm
|
|
55
|
-
|
|
58
|
+
|
|
56
59
|
# Initialize model filters
|
|
57
60
|
if self.index_llm:
|
|
58
|
-
self.index_model_filter = ModelPathFilter.from_model_object(
|
|
61
|
+
self.index_model_filter = ModelPathFilter.from_model_object(
|
|
62
|
+
self.index_llm, args)
|
|
59
63
|
if self.index_filter_llm:
|
|
60
|
-
self.index_filter_model_filter = ModelPathFilter.from_model_object(
|
|
64
|
+
self.index_filter_model_filter = ModelPathFilter.from_model_object(
|
|
65
|
+
self.index_filter_llm, args)
|
|
61
66
|
self.args = args
|
|
62
67
|
self.max_input_length = (
|
|
63
68
|
args.index_model_max_input_length or args.model_max_input_length
|
|
@@ -68,7 +73,6 @@ class IndexManager:
|
|
|
68
73
|
if not os.path.exists(self.index_dir):
|
|
69
74
|
os.makedirs(self.index_dir)
|
|
70
75
|
|
|
71
|
-
|
|
72
76
|
@byzerllm.prompt()
|
|
73
77
|
def verify_file_relevance(self, file_content: str, query: str) -> str:
|
|
74
78
|
"""
|
|
@@ -201,12 +205,12 @@ class IndexManager:
|
|
|
201
205
|
if current_chunk:
|
|
202
206
|
chunks.append("\n".join(current_chunk))
|
|
203
207
|
return chunks
|
|
204
|
-
|
|
208
|
+
|
|
205
209
|
def should_skip(self, file_path: str):
|
|
206
210
|
ext = os.path.splitext(file_path)[1].lower()
|
|
207
211
|
if ext in [".md", ".html", ".txt", ".doc", ".pdf"]:
|
|
208
212
|
return True
|
|
209
|
-
|
|
213
|
+
|
|
210
214
|
# Check model filter restrictions
|
|
211
215
|
if self.index_model_filter and not self.index_model_filter.is_accessible(file_path):
|
|
212
216
|
self.printer.print_in_terminal(
|
|
@@ -216,10 +220,10 @@ class IndexManager:
|
|
|
216
220
|
model_name=",".join(get_llm_names(self.index_llm))
|
|
217
221
|
)
|
|
218
222
|
return True
|
|
219
|
-
|
|
223
|
+
|
|
220
224
|
return False
|
|
221
225
|
|
|
222
|
-
def build_index_for_single_source(self, source: SourceCode):
|
|
226
|
+
def build_index_for_single_source(self, source: SourceCode):
|
|
223
227
|
if global_cancel.requested:
|
|
224
228
|
return None
|
|
225
229
|
|
|
@@ -251,13 +255,13 @@ class IndexManager:
|
|
|
251
255
|
|
|
252
256
|
start_time = time.monotonic()
|
|
253
257
|
source_code = source.source_code
|
|
254
|
-
|
|
258
|
+
|
|
255
259
|
# 统计token和成本
|
|
256
260
|
total_input_tokens = 0
|
|
257
261
|
total_output_tokens = 0
|
|
258
262
|
total_input_cost = 0.0
|
|
259
263
|
total_output_cost = 0.0
|
|
260
|
-
|
|
264
|
+
|
|
261
265
|
if count_tokens(source.source_code) > self.args.conversation_prune_safe_zone_tokens:
|
|
262
266
|
self.printer.print_in_terminal(
|
|
263
267
|
"index_file_too_large",
|
|
@@ -276,34 +280,40 @@ class IndexManager:
|
|
|
276
280
|
self.index_llm).with_meta(meta_holder).run(source.module_name, chunk)
|
|
277
281
|
time.sleep(self.anti_quota_limit)
|
|
278
282
|
symbols.append(chunk_symbols)
|
|
279
|
-
|
|
283
|
+
|
|
280
284
|
if meta_holder.get_meta():
|
|
281
285
|
meta_dict = meta_holder.get_meta()
|
|
282
|
-
total_input_tokens += meta_dict.get(
|
|
283
|
-
|
|
284
|
-
|
|
286
|
+
total_input_tokens += meta_dict.get(
|
|
287
|
+
"input_tokens_count", 0)
|
|
288
|
+
total_output_tokens += meta_dict.get(
|
|
289
|
+
"generated_tokens_count", 0)
|
|
290
|
+
|
|
285
291
|
symbols = "\n".join(symbols)
|
|
286
292
|
else:
|
|
287
293
|
meta_holder = byzerllm.MetaHolder()
|
|
288
294
|
symbols = self.get_all_file_symbols.with_llm(
|
|
289
295
|
self.index_llm).with_meta(meta_holder).run(source.module_name, source_code)
|
|
290
296
|
time.sleep(self.anti_quota_limit)
|
|
291
|
-
|
|
297
|
+
|
|
292
298
|
if meta_holder.get_meta():
|
|
293
299
|
meta_dict = meta_holder.get_meta()
|
|
294
|
-
total_input_tokens += meta_dict.get(
|
|
295
|
-
|
|
296
|
-
|
|
300
|
+
total_input_tokens += meta_dict.get(
|
|
301
|
+
"input_tokens_count", 0)
|
|
302
|
+
total_output_tokens += meta_dict.get(
|
|
303
|
+
"generated_tokens_count", 0)
|
|
304
|
+
|
|
297
305
|
# 计算总成本
|
|
298
306
|
for name in model_names:
|
|
299
307
|
info = model_info_map.get(name, {})
|
|
300
|
-
total_input_cost += (total_input_tokens *
|
|
301
|
-
|
|
302
|
-
|
|
308
|
+
total_input_cost += (total_input_tokens *
|
|
309
|
+
info.get("input_price", 0.0)) / 1000000
|
|
310
|
+
total_output_cost += (total_output_tokens *
|
|
311
|
+
info.get("output_price", 0.0)) / 1000000
|
|
312
|
+
|
|
303
313
|
# 四舍五入到4位小数
|
|
304
314
|
total_input_cost = round(total_input_cost, 4)
|
|
305
315
|
total_output_cost = round(total_output_cost, 4)
|
|
306
|
-
|
|
316
|
+
|
|
307
317
|
self.printer.print_in_terminal(
|
|
308
318
|
"index_update_success",
|
|
309
319
|
style="green",
|
|
@@ -340,9 +350,44 @@ class IndexManager:
|
|
|
340
350
|
"generated_tokens_cost": total_output_cost
|
|
341
351
|
}
|
|
342
352
|
|
|
353
|
+
def parse_exclude_files(self, exclude_files):
|
|
354
|
+
if not exclude_files:
|
|
355
|
+
return []
|
|
356
|
+
|
|
357
|
+
if isinstance(exclude_files, str):
|
|
358
|
+
exclude_files = [exclude_files]
|
|
359
|
+
|
|
360
|
+
exclude_patterns = []
|
|
361
|
+
for pattern in exclude_files:
|
|
362
|
+
if pattern.startswith("regex://"):
|
|
363
|
+
pattern = pattern[8:]
|
|
364
|
+
exclude_patterns.append(re.compile(pattern))
|
|
365
|
+
elif pattern.startswith("human://"):
|
|
366
|
+
pattern = pattern[8:]
|
|
367
|
+
v = (
|
|
368
|
+
self.generate_regex_pattern.with_llm(self.llm)
|
|
369
|
+
.with_extractor(self.extract_regex_pattern)
|
|
370
|
+
.run(desc=pattern)
|
|
371
|
+
)
|
|
372
|
+
if not v:
|
|
373
|
+
raise ValueError(
|
|
374
|
+
"Fail to generate regex pattern, try again.")
|
|
375
|
+
exclude_patterns.append(re.compile(v))
|
|
376
|
+
else:
|
|
377
|
+
raise ValueError(
|
|
378
|
+
"Invalid exclude_files format. Expected 'regex://<pattern>' or 'human://<description>' "
|
|
379
|
+
)
|
|
380
|
+
return exclude_patterns
|
|
381
|
+
|
|
382
|
+
def filter_exclude_files(self, file_path, exclude_patterns):
|
|
383
|
+
for pattern in exclude_patterns:
|
|
384
|
+
if pattern.search(file_path):
|
|
385
|
+
return True
|
|
386
|
+
return False
|
|
387
|
+
|
|
343
388
|
def build_index(self):
|
|
344
389
|
if os.path.exists(self.index_file):
|
|
345
|
-
with open(self.index_file, "r",encoding="utf-8") as file:
|
|
390
|
+
with open(self.index_file, "r", encoding="utf-8") as file:
|
|
346
391
|
index_data = json.load(file)
|
|
347
392
|
else:
|
|
348
393
|
index_data = {}
|
|
@@ -351,14 +396,27 @@ class IndexManager:
|
|
|
351
396
|
keys_to_remove = []
|
|
352
397
|
for file_path in index_data:
|
|
353
398
|
if not os.path.exists(file_path):
|
|
354
|
-
keys_to_remove.append(file_path)
|
|
355
|
-
|
|
399
|
+
keys_to_remove.append(file_path)
|
|
400
|
+
|
|
401
|
+
# 删除被排除的文件
|
|
402
|
+
try:
|
|
403
|
+
exclude_patterns = self.parse_exclude_files(self.args.exclude_files)
|
|
404
|
+
for file_path in index_data:
|
|
405
|
+
if self.filter_exclude_files(file_path, exclude_patterns):
|
|
406
|
+
keys_to_remove.append(file_path)
|
|
407
|
+
except Exception as e:
|
|
408
|
+
self.printer.print_in_terminal(
|
|
409
|
+
"index_exclude_files_error",
|
|
410
|
+
style="red",
|
|
411
|
+
error=str(e)
|
|
412
|
+
)
|
|
413
|
+
|
|
356
414
|
# 删除无效条目并记录日志
|
|
357
415
|
for key in set(keys_to_remove):
|
|
358
416
|
if key in index_data:
|
|
359
417
|
del index_data[key]
|
|
360
418
|
self.printer.print_in_terminal(
|
|
361
|
-
"index_file_removed",
|
|
419
|
+
"index_file_removed",
|
|
362
420
|
style="yellow",
|
|
363
421
|
file_path=key
|
|
364
422
|
)
|
|
@@ -388,7 +446,7 @@ class IndexManager:
|
|
|
388
446
|
for line in v:
|
|
389
447
|
new_v.append(line[line.find(":"):])
|
|
390
448
|
source_code = "\n".join(new_v)
|
|
391
|
-
|
|
449
|
+
|
|
392
450
|
md5 = hashlib.md5(source_code.encode("utf-8")).hexdigest()
|
|
393
451
|
if (
|
|
394
452
|
source.module_name not in index_data
|
|
@@ -397,7 +455,8 @@ class IndexManager:
|
|
|
397
455
|
wait_to_build_files.append(source)
|
|
398
456
|
|
|
399
457
|
# Remove duplicates based on module_name
|
|
400
|
-
wait_to_build_files = list(
|
|
458
|
+
wait_to_build_files = list(
|
|
459
|
+
{source.module_name: source for source in wait_to_build_files}.values())
|
|
401
460
|
|
|
402
461
|
counter = 0
|
|
403
462
|
num_files = len(wait_to_build_files)
|
|
@@ -433,16 +492,17 @@ class IndexManager:
|
|
|
433
492
|
index_data[module_name] = result
|
|
434
493
|
updated_sources.append(module_name)
|
|
435
494
|
if len(updated_sources) > 5:
|
|
436
|
-
with open(self.index_file, "w",encoding="utf-8") as file:
|
|
437
|
-
json.dump(index_data, file,
|
|
495
|
+
with open(self.index_file, "w", encoding="utf-8") as file:
|
|
496
|
+
json.dump(index_data, file,
|
|
497
|
+
ensure_ascii=False, indent=2)
|
|
438
498
|
updated_sources = []
|
|
439
|
-
|
|
499
|
+
|
|
440
500
|
# 如果 updated_sources 或 keys_to_remove 有值,则保存索引文件
|
|
441
501
|
if updated_sources or keys_to_remove:
|
|
442
|
-
with open(self.index_file, "w",encoding="utf-8") as file:
|
|
502
|
+
with open(self.index_file, "w", encoding="utf-8") as file:
|
|
443
503
|
json.dump(index_data, file, ensure_ascii=False, indent=2)
|
|
444
504
|
|
|
445
|
-
print("")
|
|
505
|
+
print("")
|
|
446
506
|
self.printer.print_in_terminal(
|
|
447
507
|
"index_file_saved",
|
|
448
508
|
style="green",
|
|
@@ -461,14 +521,14 @@ class IndexManager:
|
|
|
461
521
|
if not os.path.exists(self.index_file):
|
|
462
522
|
return []
|
|
463
523
|
|
|
464
|
-
with open(self.index_file, "r",encoding="utf-8") as file:
|
|
524
|
+
with open(self.index_file, "r", encoding="utf-8") as file:
|
|
465
525
|
return file.read()
|
|
466
526
|
|
|
467
527
|
def read_index(self) -> List[IndexItem]:
|
|
468
528
|
if not os.path.exists(self.index_file):
|
|
469
529
|
return []
|
|
470
530
|
|
|
471
|
-
with open(self.index_file, "r",encoding="utf-8") as file:
|
|
531
|
+
with open(self.index_file, "r", encoding="utf-8") as file:
|
|
472
532
|
index_data = json.load(file)
|
|
473
533
|
|
|
474
534
|
index_items = []
|
|
@@ -572,7 +632,7 @@ class IndexManager:
|
|
|
572
632
|
{file.file_path: file for file in all_results}.values())
|
|
573
633
|
return FileList(file_list=all_results)
|
|
574
634
|
|
|
575
|
-
def _query_index_with_thread(self, query, func):
|
|
635
|
+
def _query_index_with_thread(self, query, func):
|
|
576
636
|
all_results = []
|
|
577
637
|
lock = threading.Lock()
|
|
578
638
|
completed_threads = 0
|
|
@@ -582,7 +642,7 @@ class IndexManager:
|
|
|
582
642
|
nonlocal completed_threads
|
|
583
643
|
result = self._get_target_files_by_query.with_llm(
|
|
584
644
|
self.llm).with_return_type(FileList).run(chunk, query)
|
|
585
|
-
|
|
645
|
+
|
|
586
646
|
if result is not None:
|
|
587
647
|
with lock:
|
|
588
648
|
all_results.extend(result.file_list)
|
|
@@ -708,4 +768,3 @@ class IndexManager:
|
|
|
708
768
|
|
|
709
769
|
请确保结果的准确性和完整性,包括所有可能相关的文件。
|
|
710
770
|
"""
|
|
711
|
-
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.268"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|