auto-coder 0.1.271__py3-none-any.whl → 0.1.272__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.271.dist-info → auto_coder-0.1.272.dist-info}/METADATA +2 -2
- {auto_coder-0.1.271.dist-info → auto_coder-0.1.272.dist-info}/RECORD +17 -16
- autocoder/auto_coder_runner.py +4 -4
- autocoder/common/auto_coder_lang.py +2 -2
- autocoder/common/auto_configure.py +1 -1
- autocoder/index/filter/quick_filter.py +4 -3
- autocoder/rag/doc_filter.py +165 -59
- autocoder/rag/llm_wrapper.py +3 -1
- autocoder/rag/long_context_rag.py +196 -51
- autocoder/rag/relevant_utils.py +12 -1
- autocoder/rag/token_limiter.py +159 -18
- autocoder/rag/token_limiter_utils.py +13 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.271.dist-info → auto_coder-0.1.272.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.271.dist-info → auto_coder-0.1.272.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.271.dist-info → auto_coder-0.1.272.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.271.dist-info → auto_coder-0.1.272.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: auto-coder
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.272
|
|
4
4
|
Summary: AutoCoder: AutoCoder
|
|
5
5
|
Author: allwefantasy
|
|
6
6
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
|
|
|
26
26
|
Requires-Dist: jupyter-client
|
|
27
27
|
Requires-Dist: prompt-toolkit
|
|
28
28
|
Requires-Dist: tokenizers
|
|
29
|
-
Requires-Dist: byzerllm[saas] >=0.1.
|
|
29
|
+
Requires-Dist: byzerllm[saas] >=0.1.169
|
|
30
30
|
Requires-Dist: patch
|
|
31
31
|
Requires-Dist: diff-match-patch
|
|
32
32
|
Requires-Dist: GitPython
|
|
@@ -4,7 +4,7 @@ autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,
|
|
|
4
4
|
autocoder/auto_coder_rag.py,sha256=nwgsXO2-scssWStjX3S910tDp-OZXZRddSYrpyC4Nq0,29021
|
|
5
5
|
autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
|
|
6
6
|
autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
|
|
7
|
-
autocoder/auto_coder_runner.py,sha256=
|
|
7
|
+
autocoder/auto_coder_runner.py,sha256=1SGmVP_X10vNJujdaDc3QuiTeHa4wDIpLp41poUJW7w,100773
|
|
8
8
|
autocoder/auto_coder_server.py,sha256=6YQweNEKUrGAZ3yPvw8_qlNZJYLVSVUXGrn1K6udLts,20413
|
|
9
9
|
autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
|
|
10
10
|
autocoder/chat_auto_coder.py,sha256=skujpqYqf4EvBLQARJELxj7Xwq9KQj2FGefUAiutF7c,16711
|
|
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
|
|
|
12
12
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
13
13
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
14
14
|
autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
|
|
15
|
-
autocoder/version.py,sha256=
|
|
15
|
+
autocoder/version.py,sha256=81VehHvNKUJ_hEbhkGJ2_z23uverZt6jaYYz6duFiJI,23
|
|
16
16
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
18
18
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -34,8 +34,8 @@ autocoder/common/__init__.py,sha256=d1AmAAYhm4b17dVhAJFwV3Vv2r1lUhMejpjr32Poyg8,
|
|
|
34
34
|
autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
|
|
35
35
|
autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
|
|
36
36
|
autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
|
|
37
|
-
autocoder/common/auto_coder_lang.py,sha256=
|
|
38
|
-
autocoder/common/auto_configure.py,sha256=
|
|
37
|
+
autocoder/common/auto_coder_lang.py,sha256=ONjY2R5BPc3wuSQOKkxvfMP1nID2dtKJ5s6XCZ0Jzjo,31733
|
|
38
|
+
autocoder/common/auto_configure.py,sha256=c1k6ttDyvc9_5amiCTt4PUq9mRu1IeG560Q3kIRy-Uk,12474
|
|
39
39
|
autocoder/common/buildin_tokenizer.py,sha256=L7d5t39ZFvUd6EoMPXUhYK1toD0FHlRH1jtjKRGokWU,1236
|
|
40
40
|
autocoder/common/chunk_validation.py,sha256=BrR_ZWavW8IANuueEE7hS8NFAwEvm8TX34WnPx_1hs8,3030
|
|
41
41
|
autocoder/common/cleaner.py,sha256=NU72i8C6o9m0vXExab7nao5bstBUsfJFcj11cXa9l4U,1089
|
|
@@ -102,25 +102,26 @@ autocoder/index/symbols_utils.py,sha256=_EP7E_qWXxluAxq3FGZLlLfdrfwx3FmxCdulI8VG
|
|
|
102
102
|
autocoder/index/types.py,sha256=a2s_KV5FJlq7jqA2ELSo9E1sjuLwDB-JJYMhSpzBAhU,596
|
|
103
103
|
autocoder/index/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
104
104
|
autocoder/index/filter/normal_filter.py,sha256=MI-8xdXCrniaxYCHVTLkq5tafvcUiauD3LN0b3ymRXI,8361
|
|
105
|
-
autocoder/index/filter/quick_filter.py,sha256=
|
|
105
|
+
autocoder/index/filter/quick_filter.py,sha256=5_P-HKK9Wim3ysHUECsegCS-z2PAKYOtfq8FrUY8K24,22777
|
|
106
106
|
autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI,72
|
|
107
107
|
autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
|
|
108
108
|
autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
|
|
109
109
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
110
|
autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
|
|
111
|
-
autocoder/rag/doc_filter.py,sha256=
|
|
111
|
+
autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
|
|
112
112
|
autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
|
|
113
|
-
autocoder/rag/llm_wrapper.py,sha256=
|
|
114
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
113
|
+
autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
|
|
114
|
+
autocoder/rag/long_context_rag.py,sha256=CzPC-ct6PVIKBkHsKon4s92YXmi8jZOlGgcquOwWQlI,31802
|
|
115
115
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
116
116
|
autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
|
|
117
117
|
autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
|
|
118
|
-
autocoder/rag/relevant_utils.py,sha256=
|
|
118
|
+
autocoder/rag/relevant_utils.py,sha256=tgTKGbojCrxuZ7dKbyPh2rCw9TIhwE6ltRxJosaA97U,1267
|
|
119
119
|
autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
|
|
120
120
|
autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
|
|
121
121
|
autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
|
|
122
122
|
autocoder/rag/token_counter.py,sha256=C-Lwc4oIjJpZDEqp9WLHGOe6hb4yhrdJpMtkrtp_1qc,2125
|
|
123
|
-
autocoder/rag/token_limiter.py,sha256=
|
|
123
|
+
autocoder/rag/token_limiter.py,sha256=3VgJF4may3ESyATmBIiOe05oc3VsidJcJTJ5EhoSvH8,18854
|
|
124
|
+
autocoder/rag/token_limiter_utils.py,sha256=FATNEXBnFJy8IK3PWNt1pspIv8wuTgy3F_ACNvqoc4I,404
|
|
124
125
|
autocoder/rag/types.py,sha256=WPgLpUTwbk0BAikyDOc0NOEwV5k73myF38zWdOuYdC4,2499
|
|
125
126
|
autocoder/rag/utils.py,sha256=x5L8gskxUUGkVEP5K-0C-iYntE84asuDXOCqdzdNUoA,4956
|
|
126
127
|
autocoder/rag/variable_holder.py,sha256=PFvBjFcR7-fNDD4Vcsc8CpH2Te057vcpwJMxtrfUgKI,75
|
|
@@ -164,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
164
165
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
166
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
|
|
166
167
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
-
auto_coder-0.1.
|
|
168
|
-
auto_coder-0.1.
|
|
169
|
-
auto_coder-0.1.
|
|
170
|
-
auto_coder-0.1.
|
|
171
|
-
auto_coder-0.1.
|
|
172
|
-
auto_coder-0.1.
|
|
168
|
+
auto_coder-0.1.272.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
169
|
+
auto_coder-0.1.272.dist-info/METADATA,sha256=_71Qvh-jvuaAUr81tcWZyyjmsY8-_bt50WO-jSimtTQ,2643
|
|
170
|
+
auto_coder-0.1.272.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
171
|
+
auto_coder-0.1.272.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
172
|
+
auto_coder-0.1.272.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
173
|
+
auto_coder-0.1.272.dist-info/RECORD,,
|
autocoder/auto_coder_runner.py
CHANGED
|
@@ -2133,7 +2133,7 @@ def manage_models(query: str):
|
|
|
2133
2133
|
# Check duplication
|
|
2134
2134
|
if any(m["name"] == data_dict["name"] for m in models_data):
|
|
2135
2135
|
printer.print_in_terminal("models_add_model_exists", style="yellow", name=data_dict["name"])
|
|
2136
|
-
result_manager.add_result(content=printer.
|
|
2136
|
+
result_manager.add_result(content=printer.get_message_from_key_with_format("models_add_model_exists",name=data_dict["name"]),meta={
|
|
2137
2137
|
"action": "models",
|
|
2138
2138
|
"input": {
|
|
2139
2139
|
"query": query
|
|
@@ -2177,7 +2177,7 @@ def manage_models(query: str):
|
|
|
2177
2177
|
filtered_models = [m for m in models_data if m["name"] != name]
|
|
2178
2178
|
if len(filtered_models) == len(models_data):
|
|
2179
2179
|
printer.print_in_terminal("models_add_model_remove", style="yellow", name=name)
|
|
2180
|
-
result_manager.add_result(content=printer.
|
|
2180
|
+
result_manager.add_result(content=printer.get_message_from_key_with_format("models_add_model_remove",name=name),meta={
|
|
2181
2181
|
"action": "models",
|
|
2182
2182
|
"input": {
|
|
2183
2183
|
"query": query
|
|
@@ -2186,7 +2186,7 @@ def manage_models(query: str):
|
|
|
2186
2186
|
return
|
|
2187
2187
|
models_module.save_models(filtered_models)
|
|
2188
2188
|
printer.print_in_terminal("models_add_model_removed", style="green", name=name)
|
|
2189
|
-
result_manager.add_result(content=printer.
|
|
2189
|
+
result_manager.add_result(content=printer.get_message_from_key_with_format("models_add_model_removed",name=name),meta={
|
|
2190
2190
|
"action": "models",
|
|
2191
2191
|
"input": {
|
|
2192
2192
|
"query": query
|
|
@@ -2194,7 +2194,7 @@ def manage_models(query: str):
|
|
|
2194
2194
|
})
|
|
2195
2195
|
else:
|
|
2196
2196
|
printer.print_in_terminal("models_unknown_subcmd", style="yellow", subcmd=subcmd)
|
|
2197
|
-
result_manager.add_result(content=printer.
|
|
2197
|
+
result_manager.add_result(content=printer.get_message_from_key_with_format("models_unknown_subcmd",subcmd=subcmd),meta={
|
|
2198
2198
|
"action": "models",
|
|
2199
2199
|
"input": {
|
|
2200
2200
|
"query": query
|
|
@@ -130,7 +130,7 @@ MESSAGES = {
|
|
|
130
130
|
"quick_filter_too_long": "⚠️ index file is too large ({{ tokens_len }}/{{ max_tokens }}). The query will be split into {{ split_size }} chunks.",
|
|
131
131
|
"quick_filter_tokens_len": "📊 Current index size: {{ tokens_len }} tokens",
|
|
132
132
|
"estimated_chat_input_tokens": "Estimated chat input tokens: {{ estimated_input_tokens }}",
|
|
133
|
-
"estimated_input_tokens_in_generate": "Estimated input tokens in generate ({{ generate_mode }}): {{
|
|
133
|
+
"estimated_input_tokens_in_generate": "Estimated input tokens in generate ({{ generate_mode }}): {{ estimated_input_tokens_in_generate }}",
|
|
134
134
|
"model_has_access_restrictions": "{{model_name}} has access restrictions, cannot use the current function",
|
|
135
135
|
"auto_command_not_found": "Auto command not found: {{command}}. Please check your input and try again.",
|
|
136
136
|
"auto_command_failed": "Auto command failed: {{error}}. Please check your input and try again.",
|
|
@@ -319,7 +319,7 @@ MESSAGES = {
|
|
|
319
319
|
"quick_filter_title": "{{ model_name }} 正在分析如何筛选上下文...",
|
|
320
320
|
"quick_filter_failed": "❌ 快速过滤器失败: {{ error }}. ",
|
|
321
321
|
"estimated_chat_input_tokens": "对话输入token预估为: {{ estimated_input_tokens }}",
|
|
322
|
-
"estimated_input_tokens_in_generate": "生成代码({{ generate_mode }})预计输入token数: {{
|
|
322
|
+
"estimated_input_tokens_in_generate": "生成代码({{ generate_mode }})预计输入token数: {{ estimated_input_tokens_in_generate }}",
|
|
323
323
|
"model_has_access_restrictions": "{{model_name}} 有访问限制,无法使用当前功能",
|
|
324
324
|
"auto_command_not_found": "未找到自动命令: {{command}}。请检查您的输入并重试。",
|
|
325
325
|
"auto_command_failed": "自动命令执行失败: {{error}}。请检查您的输入并重试。",
|
|
@@ -287,6 +287,7 @@ class QuickFilter():
|
|
|
287
287
|
当用户提一个需求的时候,我们要找到两种类型的源码文件:
|
|
288
288
|
1. 根据需求需要被修改的文件,我们叫 edited_files
|
|
289
289
|
2. 为了能够完成修改这些文件,还需要的一些额外参考文件, 我们叫 reference_files
|
|
290
|
+
3. 因为修改了 edited_files 文件,可能有一些依赖 edited_files 的文件也需要被修改,我们叫 dependent_files
|
|
290
291
|
|
|
291
292
|
现在,给定下面的索引文件:
|
|
292
293
|
|
|
@@ -315,9 +316,9 @@ class QuickFilter():
|
|
|
315
316
|
```
|
|
316
317
|
|
|
317
318
|
特别注意:
|
|
318
|
-
1. 如果用户的query里有 @文件 或者
|
|
319
|
-
2. 根据需求以及根据 @文件 或者 @@符号 找到的文件,猜测需要被修改的edited_files文件,然后尝试通过索引文件诸如导入语句等信息找到这些文件依赖的其他文件得到 reference_files。
|
|
320
|
-
3. file_list 里的文件序号,按被 @ 或者 @@ 文件,edited_files文件,reference_files文件的顺序排列。注意,reference_files 你要根据需求来猜测是否需要,过滤掉不相关的,避免返回文件数过多。
|
|
319
|
+
1. 如果用户的query里有 @文件 或者 @@符号,并且他们在索引文件中,那么被@的文件或者@@的符号必须要返回。
|
|
320
|
+
2. 根据需求以及根据 @文件 或者 @@符号 找到的文件,猜测需要被修改的edited_files文件,然后尝试通过索引文件诸如导入语句等信息找到这些文件依赖的其他文件得到 reference_files,dependent_files。
|
|
321
|
+
3. file_list 里的文件序号,按被 @ 或者 @@ 文件,edited_files文件,reference_files,dependent_files文件的顺序排列。注意,reference_files 你要根据需求来猜测是否需要,过滤掉不相关的,避免返回文件数过多。
|
|
321
322
|
4. 如果 query 里是一段历史对话,那么对话里的内容提及的文件路径必须要返回。
|
|
322
323
|
5. 如果用户需求为空,则直接返回空列表即可。
|
|
323
324
|
6. 返回的 json格式数据不允许有注释
|
autocoder/rag/doc_filter.py
CHANGED
|
@@ -5,8 +5,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
5
5
|
|
|
6
6
|
from autocoder.rag.relevant_utils import (
|
|
7
7
|
parse_relevance,
|
|
8
|
-
FilterDoc,
|
|
8
|
+
FilterDoc,
|
|
9
9
|
TaskTiming,
|
|
10
|
+
DocFilterResult
|
|
10
11
|
)
|
|
11
12
|
|
|
12
13
|
from autocoder.common import SourceCode, AutoCoderArgs
|
|
@@ -48,7 +49,6 @@ def _check_relevance_with_conversation(
|
|
|
48
49
|
其中, <relevant> 是你认为文档中和问题的相关度,0-10之间的数字,数字越大表示相关度越高。
|
|
49
50
|
"""
|
|
50
51
|
|
|
51
|
-
|
|
52
52
|
class DocFilter:
|
|
53
53
|
def __init__(
|
|
54
54
|
self,
|
|
@@ -62,40 +62,57 @@ class DocFilter:
|
|
|
62
62
|
self.recall_llm = self.llm.get_sub_client("recall_model")
|
|
63
63
|
else:
|
|
64
64
|
self.recall_llm = self.llm
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
self.args = args
|
|
67
67
|
self.relevant_score = self.args.rag_doc_filter_relevance
|
|
68
68
|
self.on_ray = on_ray
|
|
69
|
-
self.path = path
|
|
69
|
+
self.path = path
|
|
70
70
|
|
|
71
71
|
def filter_docs(
|
|
72
72
|
self, conversations: List[Dict[str, str]], documents: List[SourceCode]
|
|
73
|
-
) ->
|
|
74
|
-
return self.filter_docs_with_threads(conversations, documents)
|
|
73
|
+
) -> DocFilterResult:
|
|
74
|
+
return self.filter_docs_with_threads(conversations, documents)
|
|
75
75
|
|
|
76
76
|
def filter_docs_with_threads(
|
|
77
77
|
self, conversations: List[Dict[str, str]], documents: List[SourceCode]
|
|
78
|
-
) ->
|
|
79
|
-
|
|
78
|
+
) -> DocFilterResult:
|
|
79
|
+
|
|
80
|
+
start_time = time.time()
|
|
81
|
+
logger.info(f"=== DocFilter Starting ===")
|
|
82
|
+
logger.info(
|
|
83
|
+
f"Configuration: relevance_threshold={self.relevant_score}, thread_workers={self.args.index_filter_workers or 5}")
|
|
84
|
+
|
|
80
85
|
rag_manager = RagConfigManager(path=self.path)
|
|
81
86
|
rag_config = rag_manager.load_config()
|
|
82
|
-
|
|
83
|
-
|
|
87
|
+
|
|
88
|
+
documents = list(documents)
|
|
89
|
+
logger.info(f"Filtering {len(documents)} documents...")
|
|
90
|
+
|
|
91
|
+
submitted_tasks = 0
|
|
92
|
+
completed_tasks = 0
|
|
93
|
+
relevant_count = 0
|
|
94
|
+
model_name = self.recall_llm.default_model_name or "unknown"
|
|
95
|
+
|
|
84
96
|
with ThreadPoolExecutor(
|
|
85
97
|
max_workers=self.args.index_filter_workers or 5
|
|
86
98
|
) as executor:
|
|
87
99
|
future_to_doc = {}
|
|
100
|
+
|
|
101
|
+
# 提交所有任务
|
|
88
102
|
for doc in documents:
|
|
89
103
|
submit_time = time.time()
|
|
104
|
+
submitted_tasks += 1
|
|
90
105
|
|
|
91
106
|
def _run(conversations, docs):
|
|
92
107
|
submit_time_1 = time.time()
|
|
108
|
+
meta = None
|
|
93
109
|
try:
|
|
94
110
|
llm = self.recall_llm
|
|
111
|
+
meta_holder = byzerllm.MetaHolder()
|
|
95
112
|
|
|
96
113
|
v = (
|
|
97
114
|
_check_relevance_with_conversation.with_llm(
|
|
98
|
-
llm)
|
|
115
|
+
llm).with_meta(meta_holder)
|
|
99
116
|
.options({"llm_config": {"max_length": 10}})
|
|
100
117
|
.run(
|
|
101
118
|
conversations=conversations,
|
|
@@ -103,14 +120,16 @@ class DocFilter:
|
|
|
103
120
|
filter_config=rag_config.filter_config,
|
|
104
121
|
)
|
|
105
122
|
)
|
|
123
|
+
|
|
124
|
+
meta = meta_holder.get_meta_model()
|
|
106
125
|
except Exception as e:
|
|
107
126
|
logger.error(
|
|
108
127
|
f"Error in _check_relevance_with_conversation: {str(e)}"
|
|
109
128
|
)
|
|
110
|
-
return (None, submit_time_1, time.time())
|
|
129
|
+
return (None, submit_time_1, time.time(), meta)
|
|
111
130
|
|
|
112
131
|
end_time_2 = time.time()
|
|
113
|
-
return (v, submit_time_1, end_time_2)
|
|
132
|
+
return (v, submit_time_1, end_time_2, meta)
|
|
114
133
|
|
|
115
134
|
m = executor.submit(
|
|
116
135
|
_run,
|
|
@@ -119,57 +138,144 @@ class DocFilter:
|
|
|
119
138
|
)
|
|
120
139
|
future_to_doc[m] = (doc, submit_time)
|
|
121
140
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
and relevance.relevant_score >= self.relevant_score
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
141
|
+
logger.info(
|
|
142
|
+
f"Submitted {submitted_tasks} document filtering tasks to thread pool")
|
|
143
|
+
|
|
144
|
+
# 处理完成的任务
|
|
145
|
+
doc_filter_result = DocFilterResult(
|
|
146
|
+
docs=[],
|
|
147
|
+
raw_docs=[],
|
|
148
|
+
input_tokens_counts=[],
|
|
149
|
+
generated_tokens_counts=[],
|
|
150
|
+
durations=[],
|
|
151
|
+
model_name=model_name
|
|
152
|
+
)
|
|
153
|
+
relevant_docs = doc_filter_result.docs
|
|
154
|
+
for future in as_completed(list(future_to_doc.keys())):
|
|
155
|
+
try:
|
|
156
|
+
doc, submit_time = future_to_doc[future]
|
|
157
|
+
end_time = time.time()
|
|
158
|
+
completed_tasks += 1
|
|
159
|
+
progress_percent = (completed_tasks / len(documents)) * 100
|
|
160
|
+
|
|
161
|
+
v, submit_time_1, end_time_2, meta = future.result()
|
|
162
|
+
task_timing = TaskTiming(
|
|
163
|
+
submit_time=submit_time,
|
|
164
|
+
end_time=end_time,
|
|
165
|
+
duration=end_time - submit_time,
|
|
166
|
+
real_start_time=submit_time_1,
|
|
167
|
+
real_end_time=end_time_2,
|
|
168
|
+
real_duration=end_time_2 - submit_time_1,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
relevance = parse_relevance(v)
|
|
172
|
+
is_relevant = relevance and relevance.relevant_score >= self.relevant_score
|
|
173
|
+
|
|
174
|
+
if is_relevant:
|
|
175
|
+
relevant_count += 1
|
|
176
|
+
status_text = f"RELEVANT (Score: {relevance.relevant_score:.1f})"
|
|
177
|
+
else:
|
|
178
|
+
score_text = f"{relevance.relevant_score:.1f}" if relevance else "N/A"
|
|
179
|
+
status_text = f"NOT RELEVANT (Score: {score_text})"
|
|
180
|
+
|
|
181
|
+
queue_time = task_timing.real_start_time - task_timing.submit_time
|
|
182
|
+
|
|
183
|
+
input_tokens_count = meta.input_tokens_count if meta else 0
|
|
184
|
+
generated_tokens_count = meta.generated_tokens_count if meta else 0
|
|
185
|
+
|
|
186
|
+
logger.info(
|
|
187
|
+
f"Document filtering [{progress_percent:.1f}%] - {completed_tasks}/{len(documents)}:"
|
|
188
|
+
f"\n - File: {doc.module_name}"
|
|
189
|
+
f"\n - Status: {status_text}"
|
|
190
|
+
f"\n - Model: {model_name}"
|
|
191
|
+
f"\n - Threshold: {self.relevant_score}"
|
|
192
|
+
f"\n - Input tokens: {input_tokens_count}"
|
|
193
|
+
f"\n - Generated tokens: {generated_tokens_count}"
|
|
194
|
+
f"\n - Timing: Duration={task_timing.duration:.2f}s, Processing={task_timing.real_duration:.2f}s, Queue={queue_time:.2f}s"
|
|
195
|
+
f"\n - Response: {v}"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
if "rag" not in doc.metadata:
|
|
199
|
+
doc.metadata["rag"] = {}
|
|
200
|
+
doc.metadata["rag"]["recall"] = {
|
|
201
|
+
"input_tokens_count": input_tokens_count,
|
|
202
|
+
"generated_tokens_count": generated_tokens_count,
|
|
203
|
+
"recall_model": model_name,
|
|
204
|
+
"duration": task_timing.real_duration
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
doc_filter_result.input_tokens_counts.append(input_tokens_count)
|
|
208
|
+
doc_filter_result.generated_tokens_counts.append(generated_tokens_count)
|
|
209
|
+
doc_filter_result.durations.append(task_timing.real_duration)
|
|
210
|
+
|
|
211
|
+
new_filter_doc = FilterDoc(
|
|
157
212
|
source_code=doc,
|
|
158
213
|
relevance=relevance,
|
|
159
214
|
task_timing=task_timing,
|
|
160
215
|
)
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
216
|
+
|
|
217
|
+
doc_filter_result.raw_docs.append(new_filter_doc)
|
|
218
|
+
|
|
219
|
+
if is_relevant:
|
|
220
|
+
relevant_docs.append(
|
|
221
|
+
new_filter_doc
|
|
222
|
+
)
|
|
223
|
+
except Exception as exc:
|
|
224
|
+
try:
|
|
225
|
+
doc, submit_time = future_to_doc[future]
|
|
226
|
+
completed_tasks += 1
|
|
227
|
+
progress_percent = (
|
|
228
|
+
completed_tasks / len(documents)) * 100
|
|
229
|
+
logger.error(
|
|
230
|
+
f"Document filtering [{progress_percent:.1f}%] - {completed_tasks}/{len(documents)}:"
|
|
231
|
+
f"\n - File: {doc.module_name}"
|
|
232
|
+
f"\n - Error: {exc}"
|
|
233
|
+
f"\n - Duration: {time.time() - submit_time:.2f}s"
|
|
234
|
+
)
|
|
235
|
+
doc_filter_result.raw_docs.append(
|
|
236
|
+
FilterDoc(
|
|
237
|
+
source_code=doc,
|
|
238
|
+
relevance=None,
|
|
239
|
+
task_timing=TaskTiming(),
|
|
240
|
+
)
|
|
241
|
+
)
|
|
242
|
+
except Exception as e:
|
|
243
|
+
logger.error(
|
|
244
|
+
f"Document filtering error in task tracking: {exc}"
|
|
245
|
+
)
|
|
170
246
|
|
|
171
247
|
# Sort relevant_docs by relevance score in descending order
|
|
172
248
|
relevant_docs.sort(
|
|
173
249
|
key=lambda x: x.relevance.relevant_score, reverse=True)
|
|
174
|
-
|
|
175
|
-
|
|
250
|
+
|
|
251
|
+
total_time = time.time() - start_time
|
|
252
|
+
|
|
253
|
+
avg_processing_time = sum(
|
|
254
|
+
doc.task_timing.real_duration for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
|
|
255
|
+
avg_queue_time = sum(doc.task_timing.real_start_time -
|
|
256
|
+
doc.task_timing.submit_time for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
|
|
257
|
+
|
|
258
|
+
total_input_tokens = sum(doc_filter_result.input_tokens_counts)
|
|
259
|
+
total_generated_tokens = sum(doc_filter_result.generated_tokens_counts)
|
|
260
|
+
|
|
261
|
+
logger.info(
|
|
262
|
+
f"=== DocFilter Complete ==="
|
|
263
|
+
f"\n * Total time: {total_time:.2f}s"
|
|
264
|
+
f"\n * Documents processed: {completed_tasks}/{len(documents)}"
|
|
265
|
+
f"\n * Relevant documents: {relevant_count} (threshold: {self.relevant_score})"
|
|
266
|
+
f"\n * Average processing time: {avg_processing_time:.2f}s"
|
|
267
|
+
f"\n * Average queue time: {avg_queue_time:.2f}s"
|
|
268
|
+
f"\n * Total input tokens: {total_input_tokens}"
|
|
269
|
+
f"\n * Total generated tokens: {total_generated_tokens}"
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if relevant_docs:
|
|
273
|
+
logger.info(
|
|
274
|
+
f"Top 5 relevant documents:"
|
|
275
|
+
+ "".join([f"\n * {doc.source_code.module_name} (Score: {doc.relevance.relevant_score:.1f})"
|
|
276
|
+
for doc in relevant_docs[:5]])
|
|
277
|
+
)
|
|
278
|
+
else:
|
|
279
|
+
logger.warning("No relevant documents found!")
|
|
280
|
+
|
|
281
|
+
return doc_filter_result
|
autocoder/rag/llm_wrapper.py
CHANGED
|
@@ -44,13 +44,15 @@ class LLWrapper:
|
|
|
44
44
|
res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
|
|
45
45
|
for t in res:
|
|
46
46
|
yield (t,SingleOutputMeta(0,0))
|
|
47
|
+
|
|
47
48
|
|
|
48
49
|
async def async_stream_chat_oai(self,conversations,
|
|
49
50
|
model:Optional[str]=None,
|
|
50
51
|
role_mapping=None,
|
|
51
52
|
delta_mode=False,
|
|
52
53
|
llm_config:Dict[str,Any]={}):
|
|
53
|
-
res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
|
|
54
|
+
res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
|
|
55
|
+
# res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
|
|
54
56
|
for t in res:
|
|
55
57
|
yield (t,SingleOutputMeta(0,0))
|
|
56
58
|
|
|
@@ -31,6 +31,8 @@ from tokenizers import Tokenizer
|
|
|
31
31
|
from autocoder.rag.variable_holder import VariableHolder
|
|
32
32
|
from importlib.metadata import version
|
|
33
33
|
from autocoder.rag.stream_event import event_writer
|
|
34
|
+
from autocoder.rag.relevant_utils import DocFilterResult
|
|
35
|
+
from pydantic import BaseModel
|
|
34
36
|
|
|
35
37
|
try:
|
|
36
38
|
from autocoder_pro.rag.llm_compute import LLMComputeEngine
|
|
@@ -42,6 +44,24 @@ except ImportError:
|
|
|
42
44
|
LLMComputeEngine = None
|
|
43
45
|
|
|
44
46
|
|
|
47
|
+
class RecallStat(BaseModel):
|
|
48
|
+
total_input_tokens: int
|
|
49
|
+
total_generated_tokens: int
|
|
50
|
+
model_name: str = "unknown"
|
|
51
|
+
class ChunkStat(BaseModel):
|
|
52
|
+
total_input_tokens: int
|
|
53
|
+
total_generated_tokens: int
|
|
54
|
+
model_name: str = "unknown"
|
|
55
|
+
class AnswerStat(BaseModel):
|
|
56
|
+
total_input_tokens: int
|
|
57
|
+
total_generated_tokens: int
|
|
58
|
+
model_name: str = "unknown"
|
|
59
|
+
|
|
60
|
+
class RAGStat(BaseModel):
|
|
61
|
+
recall_stat: RecallStat
|
|
62
|
+
chunk_stat: ChunkStat
|
|
63
|
+
answer_stat: AnswerStat
|
|
64
|
+
|
|
45
65
|
class LongContextRAG:
|
|
46
66
|
def __init__(
|
|
47
67
|
self,
|
|
@@ -305,7 +325,7 @@ class LongContextRAG:
|
|
|
305
325
|
url = ",".join(contexts)
|
|
306
326
|
return [SourceCode(module_name=f"RAG:{url}", source_code="".join(v))]
|
|
307
327
|
|
|
308
|
-
def _filter_docs(self, conversations: List[Dict[str, str]]) ->
|
|
328
|
+
def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
|
|
309
329
|
query = conversations[-1]["content"]
|
|
310
330
|
documents = self._retrieve_documents(options={"query":query})
|
|
311
331
|
return self.doc_filter.filter_docs(
|
|
@@ -439,7 +459,32 @@ class LongContextRAG:
|
|
|
439
459
|
|
|
440
460
|
logger.info(f"Query: {query} only_contexts: {only_contexts}")
|
|
441
461
|
start_time = time.time()
|
|
442
|
-
|
|
462
|
+
|
|
463
|
+
rag_stat = RAGStat(
|
|
464
|
+
recall_stat=RecallStat(
|
|
465
|
+
total_input_tokens=0,
|
|
466
|
+
total_generated_tokens=0,
|
|
467
|
+
model_name=self.llm.default_model_name,
|
|
468
|
+
),
|
|
469
|
+
chunk_stat=ChunkStat(
|
|
470
|
+
total_input_tokens=0,
|
|
471
|
+
total_generated_tokens=0,
|
|
472
|
+
model_name=self.llm.default_model_name,
|
|
473
|
+
),
|
|
474
|
+
answer_stat=AnswerStat(
|
|
475
|
+
total_input_tokens=0,
|
|
476
|
+
total_generated_tokens=0,
|
|
477
|
+
model_name=self.llm.default_model_name,
|
|
478
|
+
),
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
doc_filter_result = self._filter_docs(conversations)
|
|
482
|
+
|
|
483
|
+
rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
|
|
484
|
+
rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
|
|
485
|
+
rag_stat.recall_stat.model_name = doc_filter_result.model_name
|
|
486
|
+
|
|
487
|
+
relevant_docs: List[FilterDoc] = doc_filter_result.docs
|
|
443
488
|
filter_time = time.time() - start_time
|
|
444
489
|
|
|
445
490
|
# Filter relevant_docs to only include those with is_relevant=True
|
|
@@ -469,17 +514,15 @@ class LongContextRAG:
|
|
|
469
514
|
# 将 FilterDoc 转化为 SourceCode 方便后续的逻辑继续做处理
|
|
470
515
|
relevant_docs = [doc.source_code for doc in relevant_docs]
|
|
471
516
|
|
|
472
|
-
|
|
517
|
+
logger.info(f"=== RAG Search Results ===")
|
|
518
|
+
logger.info(f"Query: {query}")
|
|
519
|
+
logger.info(f"Found relevant docs: {len(relevant_docs)}")
|
|
473
520
|
|
|
474
|
-
#
|
|
475
|
-
query_table = Table(title="Query Information", show_header=False)
|
|
476
|
-
query_table.add_row("Query", query)
|
|
477
|
-
query_table.add_row("Relevant docs", str(len(relevant_docs)))
|
|
478
|
-
|
|
479
|
-
# Add relevant docs information
|
|
521
|
+
# 记录相关文档信息
|
|
480
522
|
relevant_docs_info = []
|
|
481
|
-
for doc in relevant_docs:
|
|
482
|
-
|
|
523
|
+
for i, doc in enumerate(relevant_docs):
|
|
524
|
+
doc_path = doc.module_name.replace(self.path, '', 1)
|
|
525
|
+
info = f"{i+1}. {doc_path}"
|
|
483
526
|
if "original_docs" in doc.metadata:
|
|
484
527
|
original_docs = ", ".join(
|
|
485
528
|
[
|
|
@@ -490,8 +533,11 @@ class LongContextRAG:
|
|
|
490
533
|
info += f" (Original docs: {original_docs})"
|
|
491
534
|
relevant_docs_info.append(info)
|
|
492
535
|
|
|
493
|
-
|
|
494
|
-
|
|
536
|
+
if relevant_docs_info:
|
|
537
|
+
logger.info(
|
|
538
|
+
f"Relevant documents list:"
|
|
539
|
+
+ "".join([f"\n * {info}" for info in relevant_docs_info])
|
|
540
|
+
)
|
|
495
541
|
|
|
496
542
|
first_round_full_docs = []
|
|
497
543
|
second_round_extracted_docs = []
|
|
@@ -507,11 +553,18 @@ class LongContextRAG:
|
|
|
507
553
|
llm=self.llm,
|
|
508
554
|
disable_segment_reorder=self.args.disable_segment_reorder,
|
|
509
555
|
)
|
|
510
|
-
|
|
556
|
+
|
|
557
|
+
token_limiter_result = token_limiter.limit_tokens(
|
|
511
558
|
relevant_docs=relevant_docs,
|
|
512
559
|
conversations=conversations,
|
|
513
560
|
index_filter_workers=self.args.index_filter_workers or 5,
|
|
514
561
|
)
|
|
562
|
+
|
|
563
|
+
rag_stat.chunk_stat.total_input_tokens += sum(token_limiter_result.input_tokens_counts)
|
|
564
|
+
rag_stat.chunk_stat.total_generated_tokens += sum(token_limiter_result.generated_tokens_counts)
|
|
565
|
+
rag_stat.chunk_stat.model_name = token_limiter_result.model_name
|
|
566
|
+
|
|
567
|
+
final_relevant_docs = token_limiter_result.docs
|
|
515
568
|
first_round_full_docs = token_limiter.first_round_full_docs
|
|
516
569
|
second_round_extracted_docs = token_limiter.second_round_extracted_docs
|
|
517
570
|
sencond_round_time = token_limiter.sencond_round_time
|
|
@@ -522,57 +575,64 @@ class LongContextRAG:
|
|
|
522
575
|
|
|
523
576
|
logger.info(f"Finally send to model: {len(relevant_docs)}")
|
|
524
577
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
"
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
"
|
|
533
|
-
|
|
534
|
-
query_table.add_row(
|
|
535
|
-
"Second round time", f"{sencond_round_time:.2f} seconds"
|
|
578
|
+
# 记录分段处理的统计信息
|
|
579
|
+
logger.info(
|
|
580
|
+
f"=== Token Management ===\n"
|
|
581
|
+
f" * Only contexts: {only_contexts}\n"
|
|
582
|
+
f" * Filter time: {filter_time:.2f} seconds\n"
|
|
583
|
+
f" * Final relevant docs: {len(relevant_docs)}\n"
|
|
584
|
+
f" * First round full docs: {len(first_round_full_docs)}\n"
|
|
585
|
+
f" * Second round extracted docs: {len(second_round_extracted_docs)}\n"
|
|
586
|
+
f" * Second round time: {sencond_round_time:.2f} seconds"
|
|
536
587
|
)
|
|
537
588
|
|
|
538
|
-
#
|
|
589
|
+
# 记录最终选择的文档详情
|
|
539
590
|
final_relevant_docs_info = []
|
|
540
|
-
for doc in relevant_docs:
|
|
541
|
-
|
|
591
|
+
for i, doc in enumerate(relevant_docs):
|
|
592
|
+
doc_path = doc.module_name.replace(self.path, '', 1)
|
|
593
|
+
info = f"{i+1}. {doc_path}"
|
|
594
|
+
|
|
595
|
+
metadata_info = []
|
|
542
596
|
if "original_docs" in doc.metadata:
|
|
543
597
|
original_docs = ", ".join(
|
|
544
598
|
[
|
|
545
|
-
|
|
546
|
-
for
|
|
599
|
+
od.replace(self.path, "", 1)
|
|
600
|
+
for od in doc.metadata["original_docs"]
|
|
547
601
|
]
|
|
548
602
|
)
|
|
549
|
-
|
|
603
|
+
metadata_info.append(f"Original docs: {original_docs}")
|
|
604
|
+
|
|
550
605
|
if "chunk_ranges" in doc.metadata:
|
|
551
606
|
chunk_ranges = json.dumps(
|
|
552
607
|
doc.metadata["chunk_ranges"], ensure_ascii=False
|
|
553
608
|
)
|
|
554
|
-
|
|
609
|
+
metadata_info.append(f"Chunk ranges: {chunk_ranges}")
|
|
610
|
+
|
|
611
|
+
if "processing_time" in doc.metadata:
|
|
612
|
+
metadata_info.append(f"Processing time: {doc.metadata['processing_time']:.2f}s")
|
|
613
|
+
|
|
614
|
+
if metadata_info:
|
|
615
|
+
info += f" ({'; '.join(metadata_info)})"
|
|
616
|
+
|
|
555
617
|
final_relevant_docs_info.append(info)
|
|
556
618
|
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
panel = Panel(
|
|
562
|
-
query_table,
|
|
563
|
-
title="RAG Search Results",
|
|
564
|
-
expand=False,
|
|
619
|
+
if final_relevant_docs_info:
|
|
620
|
+
logger.info(
|
|
621
|
+
f"Final documents to be sent to model:"
|
|
622
|
+
+ "".join([f"\n * {info}" for info in final_relevant_docs_info])
|
|
565
623
|
)
|
|
566
624
|
|
|
567
|
-
#
|
|
568
|
-
console.print(panel)
|
|
569
|
-
|
|
625
|
+
# 记录令牌统计
|
|
570
626
|
request_tokens = sum([doc.tokens for doc in relevant_docs])
|
|
571
627
|
target_model = model or self.llm.default_model_name
|
|
572
628
|
logger.info(
|
|
573
|
-
f"
|
|
629
|
+
f"=== LLM Request ===\n"
|
|
630
|
+
f" * Target model: {target_model}\n"
|
|
631
|
+
f" * Total tokens: {request_tokens}"
|
|
574
632
|
)
|
|
575
633
|
|
|
634
|
+
logger.info(f"Start to send to model {target_model} with {request_tokens} tokens")
|
|
635
|
+
|
|
576
636
|
if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
|
|
577
637
|
llm_compute_engine = LLMComputeEngine(
|
|
578
638
|
llm=target_llm,
|
|
@@ -585,17 +645,22 @@ class LongContextRAG:
|
|
|
585
645
|
new_conversations = llm_compute_engine.process_conversation(
|
|
586
646
|
conversations, query, [doc.source_code for doc in relevant_docs]
|
|
587
647
|
)
|
|
588
|
-
|
|
589
|
-
return (
|
|
590
|
-
llm_compute_engine.stream_chat_oai(
|
|
648
|
+
chunks = llm_compute_engine.stream_chat_oai(
|
|
591
649
|
conversations=new_conversations,
|
|
592
650
|
model=model,
|
|
593
651
|
role_mapping=role_mapping,
|
|
594
652
|
llm_config=llm_config,
|
|
595
653
|
delta_mode=True,
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
)
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
def generate_chunks():
|
|
657
|
+
for chunk in chunks:
|
|
658
|
+
yield chunk[0]
|
|
659
|
+
if chunk[1] is not None:
|
|
660
|
+
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
661
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
662
|
+
self._print_rag_stats(rag_stat)
|
|
663
|
+
return generate_chunks(), context
|
|
599
664
|
|
|
600
665
|
new_conversations = conversations[:-1] + [
|
|
601
666
|
{
|
|
@@ -614,5 +679,85 @@ class LongContextRAG:
|
|
|
614
679
|
llm_config=llm_config,
|
|
615
680
|
delta_mode=True,
|
|
616
681
|
)
|
|
682
|
+
|
|
683
|
+
def generate_chunks():
|
|
684
|
+
for chunk in chunks:
|
|
685
|
+
yield chunk[0]
|
|
686
|
+
if chunk[1] is not None:
|
|
687
|
+
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
688
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
689
|
+
self._print_rag_stats(rag_stat)
|
|
690
|
+
return generate_chunks(), context
|
|
691
|
+
|
|
692
|
+
|
|
617
693
|
|
|
618
|
-
|
|
694
|
+
def _print_rag_stats(self, rag_stat: RAGStat) -> None:
|
|
695
|
+
"""打印RAG执行的详细统计信息"""
|
|
696
|
+
total_input_tokens = (
|
|
697
|
+
rag_stat.recall_stat.total_input_tokens +
|
|
698
|
+
rag_stat.chunk_stat.total_input_tokens +
|
|
699
|
+
rag_stat.answer_stat.total_input_tokens
|
|
700
|
+
)
|
|
701
|
+
total_generated_tokens = (
|
|
702
|
+
rag_stat.recall_stat.total_generated_tokens +
|
|
703
|
+
rag_stat.chunk_stat.total_generated_tokens +
|
|
704
|
+
rag_stat.answer_stat.total_generated_tokens
|
|
705
|
+
)
|
|
706
|
+
total_tokens = total_input_tokens + total_generated_tokens
|
|
707
|
+
|
|
708
|
+
# 避免除以零错误
|
|
709
|
+
if total_tokens == 0:
|
|
710
|
+
recall_percent = chunk_percent = answer_percent = 0
|
|
711
|
+
else:
|
|
712
|
+
recall_percent = (rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
|
|
713
|
+
chunk_percent = (rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
|
|
714
|
+
answer_percent = (rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
|
|
715
|
+
|
|
716
|
+
logger.info(
|
|
717
|
+
f"=== RAG 执行统计信息 ===\n"
|
|
718
|
+
f"总令牌使用: {total_tokens} 令牌\n"
|
|
719
|
+
f" * 输入令牌总数: {total_input_tokens}\n"
|
|
720
|
+
f" * 生成令牌总数: {total_generated_tokens}\n"
|
|
721
|
+
f"\n"
|
|
722
|
+
f"阶段统计:\n"
|
|
723
|
+
f" 1. 文档检索阶段:\n"
|
|
724
|
+
f" - 模型: {rag_stat.recall_stat.model_name}\n"
|
|
725
|
+
f" - 输入令牌: {rag_stat.recall_stat.total_input_tokens}\n"
|
|
726
|
+
f" - 生成令牌: {rag_stat.recall_stat.total_generated_tokens}\n"
|
|
727
|
+
f" - 阶段总计: {rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens}\n"
|
|
728
|
+
f"\n"
|
|
729
|
+
f" 2. 文档分块阶段:\n"
|
|
730
|
+
f" - 模型: {rag_stat.chunk_stat.model_name}\n"
|
|
731
|
+
f" - 输入令牌: {rag_stat.chunk_stat.total_input_tokens}\n"
|
|
732
|
+
f" - 生成令牌: {rag_stat.chunk_stat.total_generated_tokens}\n"
|
|
733
|
+
f" - 阶段总计: {rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens}\n"
|
|
734
|
+
f"\n"
|
|
735
|
+
f" 3. 答案生成阶段:\n"
|
|
736
|
+
f" - 模型: {rag_stat.answer_stat.model_name}\n"
|
|
737
|
+
f" - 输入令牌: {rag_stat.answer_stat.total_input_tokens}\n"
|
|
738
|
+
f" - 生成令牌: {rag_stat.answer_stat.total_generated_tokens}\n"
|
|
739
|
+
f" - 阶段总计: {rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens}\n"
|
|
740
|
+
f"\n"
|
|
741
|
+
f"令牌分布百分比:\n"
|
|
742
|
+
f" - 文档检索: {recall_percent:.1f}%\n"
|
|
743
|
+
f" - 文档分块: {chunk_percent:.1f}%\n"
|
|
744
|
+
f" - 答案生成: {answer_percent:.1f}%\n"
|
|
745
|
+
)
|
|
746
|
+
|
|
747
|
+
# 记录原始统计数据,以便调试
|
|
748
|
+
logger.debug(f"RAG Stat 原始数据: {rag_stat}")
|
|
749
|
+
|
|
750
|
+
# 返回成本估算
|
|
751
|
+
estimated_cost = self._estimate_token_cost(total_input_tokens, total_generated_tokens)
|
|
752
|
+
if estimated_cost > 0:
|
|
753
|
+
logger.info(f"估计成本: 约 ${estimated_cost:.4f} 人民币")
|
|
754
|
+
|
|
755
|
+
def _estimate_token_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
756
|
+
"""估算当前请求的令牌成本(人民币)"""
|
|
757
|
+
# 实际应用中,可以根据不同模型设置不同价格
|
|
758
|
+
input_cost_per_1m = 2.0/1000000 # 每百万输入令牌的成本
|
|
759
|
+
output_cost_per_1m = 8.0/100000 # 每百万输出令牌的成本
|
|
760
|
+
|
|
761
|
+
cost = (input_tokens * input_cost_per_1m / 1000000) + (output_tokens* output_cost_per_1m/1000000)
|
|
762
|
+
return cost
|
|
763
|
+
|
autocoder/rag/relevant_utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from autocoder.common import AutoCoderArgs, SourceCode
|
|
2
2
|
from pydantic import BaseModel
|
|
3
3
|
import re
|
|
4
|
-
from typing import Optional
|
|
4
|
+
from typing import Optional, List
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class DocRelevance(BaseModel):
|
|
@@ -23,6 +23,17 @@ class FilterDoc(BaseModel):
|
|
|
23
23
|
task_timing: TaskTiming
|
|
24
24
|
|
|
25
25
|
|
|
26
|
+
class DocFilterResult(BaseModel):
|
|
27
|
+
# 注意, docs 只保留最后成功过滤的文档
|
|
28
|
+
docs: List[FilterDoc]
|
|
29
|
+
# 注意, raw_docs 保留所有文档
|
|
30
|
+
raw_docs: List[FilterDoc]
|
|
31
|
+
input_tokens_counts: List[int]
|
|
32
|
+
generated_tokens_counts: List[int]
|
|
33
|
+
durations: List[float]
|
|
34
|
+
model_name: str = "unknown"
|
|
35
|
+
|
|
36
|
+
|
|
26
37
|
def parse_relevance(text: Optional[str]) -> Optional[DocRelevance]:
|
|
27
38
|
if text is None:
|
|
28
39
|
return None
|
autocoder/rag/token_limiter.py
CHANGED
|
@@ -7,7 +7,9 @@ from autocoder.common import SourceCode
|
|
|
7
7
|
from byzerllm.utils.client.code_utils import extract_code
|
|
8
8
|
import byzerllm
|
|
9
9
|
from byzerllm import ByzerLLM
|
|
10
|
-
|
|
10
|
+
from autocoder.rag.relevant_utils import TaskTiming
|
|
11
|
+
from byzerllm import MetaHolder
|
|
12
|
+
from autocoder.rag.token_limiter_utils import TokenLimiterResult
|
|
11
13
|
|
|
12
14
|
class TokenLimiter:
|
|
13
15
|
def __init__(
|
|
@@ -94,10 +96,24 @@ class TokenLimiter:
|
|
|
94
96
|
relevant_docs: List[SourceCode],
|
|
95
97
|
conversations: List[Dict[str, str]],
|
|
96
98
|
index_filter_workers: int,
|
|
97
|
-
) ->
|
|
99
|
+
) -> TokenLimiterResult:
|
|
100
|
+
logger.info(f"=== TokenLimiter Starting ===")
|
|
101
|
+
logger.info(f"Configuration: full_text_limit={self.full_text_limit}, segment_limit={self.segment_limit}, buff_limit={self.buff_limit}")
|
|
102
|
+
logger.info(f"Processing {len(relevant_docs)} source code documents")
|
|
103
|
+
|
|
104
|
+
start_time = time.time()
|
|
98
105
|
final_relevant_docs = []
|
|
99
106
|
token_count = 0
|
|
100
107
|
doc_num_count = 0
|
|
108
|
+
model_name = self.chunk_llm.default_model_name or "unknown"
|
|
109
|
+
token_limiter_result = TokenLimiterResult(
|
|
110
|
+
docs=[],
|
|
111
|
+
raw_docs=[],
|
|
112
|
+
input_tokens_counts=[],
|
|
113
|
+
generated_tokens_counts=[],
|
|
114
|
+
durations=[],
|
|
115
|
+
model_name=model_name
|
|
116
|
+
)
|
|
101
117
|
|
|
102
118
|
reorder_relevant_docs = []
|
|
103
119
|
|
|
@@ -112,6 +128,7 @@ class TokenLimiter:
|
|
|
112
128
|
## TODO:
|
|
113
129
|
## 1. 未来根据参数决定是否开启重排以及重排的策略
|
|
114
130
|
if not self.disable_segment_reorder:
|
|
131
|
+
logger.info("Document reordering enabled - organizing segments by original document order")
|
|
115
132
|
num_count = 0
|
|
116
133
|
for doc in relevant_docs:
|
|
117
134
|
num_count += 1
|
|
@@ -135,8 +152,11 @@ class TokenLimiter:
|
|
|
135
152
|
temp_docs.sort(key=lambda x: x.metadata["chunk_index"])
|
|
136
153
|
reorder_relevant_docs.extend(temp_docs)
|
|
137
154
|
else:
|
|
155
|
+
logger.info("Document reordering disabled - using original retrieval order")
|
|
138
156
|
reorder_relevant_docs = relevant_docs
|
|
139
157
|
|
|
158
|
+
logger.info(f"After reordering: {len(reorder_relevant_docs)} documents to process")
|
|
159
|
+
|
|
140
160
|
## 非窗口分区实现
|
|
141
161
|
for doc in reorder_relevant_docs:
|
|
142
162
|
doc_tokens = self.count_tokens(doc.source_code)
|
|
@@ -149,10 +169,15 @@ class TokenLimiter:
|
|
|
149
169
|
|
|
150
170
|
## 如果窗口无法放下所有的相关文档,则需要分区
|
|
151
171
|
if len(final_relevant_docs) < len(reorder_relevant_docs):
|
|
172
|
+
logger.info(f"Token limit exceeded: {len(final_relevant_docs)}/{len(reorder_relevant_docs)} docs fit in window")
|
|
173
|
+
logger.info(f"=== Starting First Round: Full Text Loading ===")
|
|
174
|
+
|
|
152
175
|
## 先填充full_text分区
|
|
153
176
|
token_count = 0
|
|
154
177
|
new_token_limit = self.full_text_limit
|
|
155
178
|
doc_num_count = 0
|
|
179
|
+
first_round_start_time = time.time()
|
|
180
|
+
|
|
156
181
|
for doc in reorder_relevant_docs:
|
|
157
182
|
doc_tokens = self.count_tokens(doc.source_code)
|
|
158
183
|
doc_num_count += 1
|
|
@@ -161,11 +186,18 @@ class TokenLimiter:
|
|
|
161
186
|
token_count += doc_tokens
|
|
162
187
|
else:
|
|
163
188
|
break
|
|
189
|
+
|
|
190
|
+
first_round_duration = time.time() - first_round_start_time
|
|
191
|
+
logger.info(
|
|
192
|
+
f"First round complete: loaded {len(self.first_round_full_docs)} documents"
|
|
193
|
+
f" ({token_count} tokens) in {first_round_duration:.2f}s"
|
|
194
|
+
)
|
|
164
195
|
|
|
165
196
|
if len(self.first_round_full_docs) > 0:
|
|
166
197
|
remaining_tokens = (
|
|
167
198
|
self.full_text_limit + self.segment_limit - token_count
|
|
168
199
|
)
|
|
200
|
+
logger.info(f"Remaining token budget: {remaining_tokens}")
|
|
169
201
|
else:
|
|
170
202
|
logger.warning(
|
|
171
203
|
"Full text area is empty, this is may caused by the single doc is too long"
|
|
@@ -175,49 +207,117 @@ class TokenLimiter:
|
|
|
175
207
|
## 继续填充segment分区
|
|
176
208
|
sencond_round_start_time = time.time()
|
|
177
209
|
remaining_docs = reorder_relevant_docs[len(self.first_round_full_docs) :]
|
|
210
|
+
|
|
178
211
|
logger.info(
|
|
179
|
-
f"
|
|
212
|
+
f"=== Starting Second Round: Chunk Extraction ==="
|
|
213
|
+
f"\n * Documents to process: {len(remaining_docs)}"
|
|
214
|
+
f"\n * Remaining token budget: {remaining_tokens}"
|
|
215
|
+
f"\n * Thread pool size: {index_filter_workers or 5}"
|
|
180
216
|
)
|
|
181
217
|
|
|
218
|
+
total_processed = 0
|
|
219
|
+
successful_extractions = 0
|
|
220
|
+
|
|
221
|
+
|
|
182
222
|
with ThreadPoolExecutor(max_workers=index_filter_workers or 5) as executor:
|
|
183
|
-
future_to_doc = {
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
223
|
+
future_to_doc = {}
|
|
224
|
+
for doc in remaining_docs:
|
|
225
|
+
submit_time = time.time()
|
|
226
|
+
future = executor.submit(self.process_range_doc, doc, conversations)
|
|
227
|
+
future_to_doc[future] = (doc, submit_time)
|
|
187
228
|
|
|
188
229
|
for future in as_completed(future_to_doc):
|
|
189
|
-
doc = future_to_doc[future]
|
|
230
|
+
doc, submit_time = future_to_doc[future]
|
|
231
|
+
end_time = time.time()
|
|
232
|
+
total_processed += 1
|
|
233
|
+
progress_percent = (total_processed / len(remaining_docs)) * 100
|
|
234
|
+
|
|
190
235
|
try:
|
|
191
236
|
result = future.result()
|
|
237
|
+
task_duration = end_time - submit_time
|
|
238
|
+
|
|
192
239
|
if result and remaining_tokens > 0:
|
|
193
240
|
self.second_round_extracted_docs.append(result)
|
|
241
|
+
token_limiter_result.raw_docs.append(result)
|
|
242
|
+
|
|
243
|
+
if "rag" in result.metadata and "chunk" in result.metadata["rag"]:
|
|
244
|
+
chunk_meta = result.metadata["rag"]["chunk"]
|
|
245
|
+
token_limiter_result.input_tokens_counts.append(chunk_meta.get("input_tokens_count", 0))
|
|
246
|
+
token_limiter_result.generated_tokens_counts.append(chunk_meta.get("generated_tokens_count", 0))
|
|
247
|
+
token_limiter_result.durations.append(chunk_meta.get("duration", 0))
|
|
248
|
+
|
|
194
249
|
tokens = result.tokens
|
|
250
|
+
successful_extractions += 1
|
|
251
|
+
|
|
252
|
+
logger.info(
|
|
253
|
+
f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
|
|
254
|
+
f"\n - File: {doc.module_name}"
|
|
255
|
+
f"\n - Chunks: {len(result.metadata.get('chunk_ranges', []))}"
|
|
256
|
+
f"\n - Extracted tokens: {tokens}"
|
|
257
|
+
f"\n - Remaining tokens: {remaining_tokens - tokens if tokens > 0 else remaining_tokens}"
|
|
258
|
+
f"\n - Processing time: {task_duration:.2f}s"
|
|
259
|
+
)
|
|
260
|
+
|
|
195
261
|
if tokens > 0:
|
|
196
262
|
remaining_tokens -= tokens
|
|
197
263
|
else:
|
|
198
264
|
logger.warning(
|
|
199
265
|
f"Token count for doc {doc.module_name} is 0 or negative"
|
|
200
266
|
)
|
|
267
|
+
elif result:
|
|
268
|
+
logger.info(
|
|
269
|
+
f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
|
|
270
|
+
f"\n - File: {doc.module_name}"
|
|
271
|
+
f"\n - Skipped: Token budget exhausted ({remaining_tokens} remaining)"
|
|
272
|
+
f"\n - Processing time: {task_duration:.2f}s"
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
logger.warning(
|
|
276
|
+
f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
|
|
277
|
+
f"\n - File: {doc.module_name}"
|
|
278
|
+
f"\n - Result: No content extracted"
|
|
279
|
+
f"\n - Processing time: {task_duration:.2f}s"
|
|
280
|
+
)
|
|
201
281
|
except Exception as exc:
|
|
202
282
|
logger.error(
|
|
203
|
-
f"
|
|
283
|
+
f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
|
|
284
|
+
f"\n - File: {doc.module_name}"
|
|
285
|
+
f"\n - Error: {exc}"
|
|
286
|
+
f"\n - Processing time: {end_time - submit_time:.2f}s"
|
|
204
287
|
)
|
|
205
288
|
|
|
206
289
|
final_relevant_docs = (
|
|
207
290
|
self.first_round_full_docs + self.second_round_extracted_docs
|
|
208
291
|
)
|
|
209
292
|
self.sencond_round_time = time.time() - sencond_round_start_time
|
|
293
|
+
total_time = time.time() - start_time
|
|
294
|
+
|
|
210
295
|
logger.info(
|
|
211
|
-
f"Second round
|
|
296
|
+
f"=== Second round complete ==="
|
|
297
|
+
f"\n * Time: {self.sencond_round_time:.2f}s"
|
|
298
|
+
f"\n * Documents processed: {total_processed}/{len(remaining_docs)}"
|
|
299
|
+
f"\n * Successful extractions: {successful_extractions}"
|
|
300
|
+
f"\n * Extracted tokens: {sum(doc.tokens for doc in self.second_round_extracted_docs)}"
|
|
212
301
|
)
|
|
213
|
-
|
|
214
|
-
|
|
302
|
+
else:
|
|
303
|
+
logger.info(f"All {len(reorder_relevant_docs)} documents fit within token limits")
|
|
304
|
+
total_time = time.time() - start_time
|
|
305
|
+
|
|
306
|
+
logger.info(
|
|
307
|
+
f"=== TokenLimiter Complete ==="
|
|
308
|
+
f"\n * Total time: {total_time:.2f}s"
|
|
309
|
+
f"\n * Documents selected: {len(final_relevant_docs)}/{len(relevant_docs)}"
|
|
310
|
+
f"\n * Total tokens: {sum(doc.tokens for doc in final_relevant_docs)}"
|
|
311
|
+
)
|
|
312
|
+
token_limiter_result.docs = final_relevant_docs
|
|
313
|
+
return token_limiter_result
|
|
215
314
|
|
|
216
315
|
def process_range_doc(
|
|
217
316
|
self, doc: SourceCode, conversations: List[Dict[str, str]], max_retries=3
|
|
218
317
|
) -> SourceCode:
|
|
219
318
|
for attempt in range(max_retries):
|
|
220
319
|
content = ""
|
|
320
|
+
start_time = time.time()
|
|
221
321
|
try:
|
|
222
322
|
source_code_with_line_number = ""
|
|
223
323
|
source_code_lines = doc.source_code.split("\n")
|
|
@@ -225,14 +325,19 @@ class TokenLimiter:
|
|
|
225
325
|
source_code_with_line_number += f"{idx+1} {line}\n"
|
|
226
326
|
|
|
227
327
|
llm = self.chunk_llm
|
|
328
|
+
model_name = llm.default_model_name or "unknown"
|
|
329
|
+
meta_holder = MetaHolder()
|
|
228
330
|
|
|
331
|
+
extraction_start_time = time.time()
|
|
229
332
|
extracted_info = (
|
|
230
333
|
self.extract_relevance_range_from_docs_with_conversation.options(
|
|
231
334
|
{"llm_config": {"max_length": 100}}
|
|
232
335
|
)
|
|
233
|
-
.with_llm(llm)
|
|
336
|
+
.with_llm(llm).with_meta(meta_holder)
|
|
234
337
|
.run(conversations, [source_code_with_line_number])
|
|
235
338
|
)
|
|
339
|
+
extraction_duration = time.time() - extraction_start_time
|
|
340
|
+
|
|
236
341
|
json_str = extract_code(extracted_info)[0][1]
|
|
237
342
|
json_objs = json.loads(json_str)
|
|
238
343
|
|
|
@@ -242,23 +347,59 @@ class TokenLimiter:
|
|
|
242
347
|
chunk = "\n".join(source_code_lines[start_line:end_line])
|
|
243
348
|
content += chunk + "\n"
|
|
244
349
|
|
|
350
|
+
total_duration = time.time() - start_time
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
meta = meta_holder.get_meta_model()
|
|
354
|
+
|
|
355
|
+
input_tokens_count = 0
|
|
356
|
+
generated_tokens_count = 0
|
|
357
|
+
|
|
358
|
+
if meta:
|
|
359
|
+
input_tokens_count = meta.input_tokens_count
|
|
360
|
+
generated_tokens_count = meta.generated_tokens_count
|
|
361
|
+
|
|
362
|
+
logger.debug(
|
|
363
|
+
f"Document {doc.module_name} chunk extraction details:"
|
|
364
|
+
f"\n - Chunks found: {len(json_objs)}"
|
|
365
|
+
f"\n - Input tokens: {input_tokens_count}"
|
|
366
|
+
f"\n - Generated tokens: {generated_tokens_count}"
|
|
367
|
+
f"\n - LLM time: {extraction_duration:.2f}s"
|
|
368
|
+
f"\n - Total processing time: {total_duration:.2f}s"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if "rag" not in doc.metadata:
|
|
372
|
+
doc.metadata["rag"] = {}
|
|
373
|
+
|
|
374
|
+
doc.metadata["rag"]["chunk"] = {
|
|
375
|
+
"original_doc": doc.module_name,
|
|
376
|
+
"chunk_ranges": json_objs,
|
|
377
|
+
"processing_time": total_duration,
|
|
378
|
+
"llm_time": extraction_duration,
|
|
379
|
+
|
|
380
|
+
"input_tokens_count": input_tokens_count,
|
|
381
|
+
"generated_tokens_count": generated_tokens_count,
|
|
382
|
+
"duration": extraction_duration,
|
|
383
|
+
"chunk_model":model_name
|
|
384
|
+
}
|
|
385
|
+
|
|
245
386
|
return SourceCode(
|
|
246
387
|
module_name=doc.module_name,
|
|
247
388
|
source_code=content.strip(),
|
|
248
|
-
tokens=
|
|
389
|
+
tokens=input_tokens_count + generated_tokens_count,
|
|
249
390
|
metadata={
|
|
250
|
-
|
|
251
|
-
"chunk_ranges": json_objs,
|
|
391
|
+
**doc.metadata
|
|
252
392
|
},
|
|
253
393
|
)
|
|
254
394
|
except Exception as e:
|
|
395
|
+
err_duration = time.time() - start_time
|
|
255
396
|
if attempt < max_retries - 1:
|
|
256
397
|
logger.warning(
|
|
257
|
-
f"Error processing doc {doc.module_name}, retrying... (Attempt {attempt + 1}) Error: {str(e)}"
|
|
398
|
+
f"Error processing doc {doc.module_name}, retrying... (Attempt {attempt + 1}) Error: {str(e)}, duration: {err_duration:.2f}s"
|
|
258
399
|
)
|
|
259
400
|
else:
|
|
260
401
|
logger.error(
|
|
261
|
-
f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}"
|
|
402
|
+
f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}, total duration: {err_duration:.2f}s"
|
|
262
403
|
)
|
|
263
404
|
return SourceCode(
|
|
264
405
|
module_name=doc.module_name, source_code="", tokens=0
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import pydantic
|
|
2
|
+
from typing import List
|
|
3
|
+
from autocoder.common import SourceCode
|
|
4
|
+
|
|
5
|
+
class TokenLimiterResult(pydantic.BaseModel):
|
|
6
|
+
# 注意, docs 只保留结果文档
|
|
7
|
+
docs: List[SourceCode]
|
|
8
|
+
# 注意, raw_docs 保留所有文档
|
|
9
|
+
raw_docs: List[SourceCode]
|
|
10
|
+
input_tokens_counts: List[int]
|
|
11
|
+
generated_tokens_counts: List[int]
|
|
12
|
+
durations: List[float]
|
|
13
|
+
model_name: str = "unknown"
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.272"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|