auto-coder 0.1.271__py3-none-any.whl → 0.1.272__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.271
3
+ Version: 0.1.272
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
26
26
  Requires-Dist: jupyter-client
27
27
  Requires-Dist: prompt-toolkit
28
28
  Requires-Dist: tokenizers
29
- Requires-Dist: byzerllm[saas] >=0.1.168
29
+ Requires-Dist: byzerllm[saas] >=0.1.169
30
30
  Requires-Dist: patch
31
31
  Requires-Dist: diff-match-patch
32
32
  Requires-Dist: GitPython
@@ -4,7 +4,7 @@ autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,
4
4
  autocoder/auto_coder_rag.py,sha256=nwgsXO2-scssWStjX3S910tDp-OZXZRddSYrpyC4Nq0,29021
5
5
  autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
6
6
  autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
7
- autocoder/auto_coder_runner.py,sha256=JGyLT231zu2cZgnf98Y-W5wDQ0xORZJef1GfL7SZQto,100725
7
+ autocoder/auto_coder_runner.py,sha256=1SGmVP_X10vNJujdaDc3QuiTeHa4wDIpLp41poUJW7w,100773
8
8
  autocoder/auto_coder_server.py,sha256=6YQweNEKUrGAZ3yPvw8_qlNZJYLVSVUXGrn1K6udLts,20413
9
9
  autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
10
10
  autocoder/chat_auto_coder.py,sha256=skujpqYqf4EvBLQARJELxj7Xwq9KQj2FGefUAiutF7c,16711
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
12
12
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
13
13
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
14
14
  autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
15
- autocoder/version.py,sha256=ZgM5wgwos-e45Kw9MewMvDsa_W_7THMB-EhDfus_ybw,23
15
+ autocoder/version.py,sha256=81VehHvNKUJ_hEbhkGJ2_z23uverZt6jaYYz6duFiJI,23
16
16
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
18
18
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -34,8 +34,8 @@ autocoder/common/__init__.py,sha256=d1AmAAYhm4b17dVhAJFwV3Vv2r1lUhMejpjr32Poyg8,
34
34
  autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
35
35
  autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
36
36
  autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
37
- autocoder/common/auto_coder_lang.py,sha256=WZm5XQaYeECiTiFiHjXWHRZKeV0ux2v64hwZqY8ZDDU,31709
38
- autocoder/common/auto_configure.py,sha256=L0wjvR-6wFNpP3c9ZxwDOunTtGrzwzjUwsS6BUnJ3W8,12470
37
+ autocoder/common/auto_coder_lang.py,sha256=ONjY2R5BPc3wuSQOKkxvfMP1nID2dtKJ5s6XCZ0Jzjo,31733
38
+ autocoder/common/auto_configure.py,sha256=c1k6ttDyvc9_5amiCTt4PUq9mRu1IeG560Q3kIRy-Uk,12474
39
39
  autocoder/common/buildin_tokenizer.py,sha256=L7d5t39ZFvUd6EoMPXUhYK1toD0FHlRH1jtjKRGokWU,1236
40
40
  autocoder/common/chunk_validation.py,sha256=BrR_ZWavW8IANuueEE7hS8NFAwEvm8TX34WnPx_1hs8,3030
41
41
  autocoder/common/cleaner.py,sha256=NU72i8C6o9m0vXExab7nao5bstBUsfJFcj11cXa9l4U,1089
@@ -102,25 +102,26 @@ autocoder/index/symbols_utils.py,sha256=_EP7E_qWXxluAxq3FGZLlLfdrfwx3FmxCdulI8VG
102
102
  autocoder/index/types.py,sha256=a2s_KV5FJlq7jqA2ELSo9E1sjuLwDB-JJYMhSpzBAhU,596
103
103
  autocoder/index/filter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
104
104
  autocoder/index/filter/normal_filter.py,sha256=MI-8xdXCrniaxYCHVTLkq5tafvcUiauD3LN0b3ymRXI,8361
105
- autocoder/index/filter/quick_filter.py,sha256=D6B8zz9TGAFPOp-OBm7UwuAEtg1DYQ8jEq4tHgZocbo,22572
105
+ autocoder/index/filter/quick_filter.py,sha256=5_P-HKK9Wim3ysHUECsegCS-z2PAKYOtfq8FrUY8K24,22777
106
106
  autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI,72
107
107
  autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
108
108
  autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
109
109
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
111
- autocoder/rag/doc_filter.py,sha256=ZCixxUXNBbz6UiGbgXvbDWdn5moLac3HnZEphpasTDc,6579
111
+ autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
112
112
  autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
113
- autocoder/rag/llm_wrapper.py,sha256=sbDxCANiZyWb_ocqNgqu2oy3c2t8orPNRGleEs-Uwl8,2649
114
- autocoder/rag/long_context_rag.py,sha256=SwU6UhTVjdKQu1VOaj07VGHFMG-mRTwRuOH1GwwFygo,24680
113
+ autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
114
+ autocoder/rag/long_context_rag.py,sha256=CzPC-ct6PVIKBkHsKon4s92YXmi8jZOlGgcquOwWQlI,31802
115
115
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
116
116
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
117
117
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
118
- autocoder/rag/relevant_utils.py,sha256=OGfp98OXG4jr3jNmtHIeXGPF8mOlIbTnolPIVTZzYZU,929
118
+ autocoder/rag/relevant_utils.py,sha256=tgTKGbojCrxuZ7dKbyPh2rCw9TIhwE6ltRxJosaA97U,1267
119
119
  autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
120
120
  autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
121
121
  autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
122
122
  autocoder/rag/token_counter.py,sha256=C-Lwc4oIjJpZDEqp9WLHGOe6hb4yhrdJpMtkrtp_1qc,2125
123
- autocoder/rag/token_limiter.py,sha256=5JznJ-h1bG5x2Plx5H0MGkaAHo2GiD7CDIJmeZWWk_8,11015
123
+ autocoder/rag/token_limiter.py,sha256=3VgJF4may3ESyATmBIiOe05oc3VsidJcJTJ5EhoSvH8,18854
124
+ autocoder/rag/token_limiter_utils.py,sha256=FATNEXBnFJy8IK3PWNt1pspIv8wuTgy3F_ACNvqoc4I,404
124
125
  autocoder/rag/types.py,sha256=WPgLpUTwbk0BAikyDOc0NOEwV5k73myF38zWdOuYdC4,2499
125
126
  autocoder/rag/utils.py,sha256=x5L8gskxUUGkVEP5K-0C-iYntE84asuDXOCqdzdNUoA,4956
126
127
  autocoder/rag/variable_holder.py,sha256=PFvBjFcR7-fNDD4Vcsc8CpH2Te057vcpwJMxtrfUgKI,75
@@ -164,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
165
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
166
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
166
167
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
- auto_coder-0.1.271.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
168
- auto_coder-0.1.271.dist-info/METADATA,sha256=eqiFY5uN-v89P1pH5YtnxnghXFABqjgmm4pPOqRUOpE,2643
169
- auto_coder-0.1.271.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
170
- auto_coder-0.1.271.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
171
- auto_coder-0.1.271.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
172
- auto_coder-0.1.271.dist-info/RECORD,,
168
+ auto_coder-0.1.272.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
+ auto_coder-0.1.272.dist-info/METADATA,sha256=_71Qvh-jvuaAUr81tcWZyyjmsY8-_bt50WO-jSimtTQ,2643
170
+ auto_coder-0.1.272.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
+ auto_coder-0.1.272.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
+ auto_coder-0.1.272.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
+ auto_coder-0.1.272.dist-info/RECORD,,
@@ -2133,7 +2133,7 @@ def manage_models(query: str):
2133
2133
  # Check duplication
2134
2134
  if any(m["name"] == data_dict["name"] for m in models_data):
2135
2135
  printer.print_in_terminal("models_add_model_exists", style="yellow", name=data_dict["name"])
2136
- result_manager.add_result(content=printer.get_message_from_key("models_add_model_exists",name=data_dict["name"]),meta={
2136
+ result_manager.add_result(content=printer.get_message_from_key_with_format("models_add_model_exists",name=data_dict["name"]),meta={
2137
2137
  "action": "models",
2138
2138
  "input": {
2139
2139
  "query": query
@@ -2177,7 +2177,7 @@ def manage_models(query: str):
2177
2177
  filtered_models = [m for m in models_data if m["name"] != name]
2178
2178
  if len(filtered_models) == len(models_data):
2179
2179
  printer.print_in_terminal("models_add_model_remove", style="yellow", name=name)
2180
- result_manager.add_result(content=printer.get_message_from_key("models_add_model_remove",name=name),meta={
2180
+ result_manager.add_result(content=printer.get_message_from_key_with_format("models_add_model_remove",name=name),meta={
2181
2181
  "action": "models",
2182
2182
  "input": {
2183
2183
  "query": query
@@ -2186,7 +2186,7 @@ def manage_models(query: str):
2186
2186
  return
2187
2187
  models_module.save_models(filtered_models)
2188
2188
  printer.print_in_terminal("models_add_model_removed", style="green", name=name)
2189
- result_manager.add_result(content=printer.get_message_from_key("models_add_model_removed",name=name),meta={
2189
+ result_manager.add_result(content=printer.get_message_from_key_with_format("models_add_model_removed",name=name),meta={
2190
2190
  "action": "models",
2191
2191
  "input": {
2192
2192
  "query": query
@@ -2194,7 +2194,7 @@ def manage_models(query: str):
2194
2194
  })
2195
2195
  else:
2196
2196
  printer.print_in_terminal("models_unknown_subcmd", style="yellow", subcmd=subcmd)
2197
- result_manager.add_result(content=printer.get_message_from_key("models_unknown_subcmd",subcmd=subcmd),meta={
2197
+ result_manager.add_result(content=printer.get_message_from_key_with_format("models_unknown_subcmd",subcmd=subcmd),meta={
2198
2198
  "action": "models",
2199
2199
  "input": {
2200
2200
  "query": query
@@ -130,7 +130,7 @@ MESSAGES = {
130
130
  "quick_filter_too_long": "⚠️ index file is too large ({{ tokens_len }}/{{ max_tokens }}). The query will be split into {{ split_size }} chunks.",
131
131
  "quick_filter_tokens_len": "📊 Current index size: {{ tokens_len }} tokens",
132
132
  "estimated_chat_input_tokens": "Estimated chat input tokens: {{ estimated_input_tokens }}",
133
- "estimated_input_tokens_in_generate": "Estimated input tokens in generate ({{ generate_mode }}): {{ estimated_input_tokens }}",
133
+ "estimated_input_tokens_in_generate": "Estimated input tokens in generate ({{ generate_mode }}): {{ estimated_input_tokens_in_generate }}",
134
134
  "model_has_access_restrictions": "{{model_name}} has access restrictions, cannot use the current function",
135
135
  "auto_command_not_found": "Auto command not found: {{command}}. Please check your input and try again.",
136
136
  "auto_command_failed": "Auto command failed: {{error}}. Please check your input and try again.",
@@ -319,7 +319,7 @@ MESSAGES = {
319
319
  "quick_filter_title": "{{ model_name }} 正在分析如何筛选上下文...",
320
320
  "quick_filter_failed": "❌ 快速过滤器失败: {{ error }}. ",
321
321
  "estimated_chat_input_tokens": "对话输入token预估为: {{ estimated_input_tokens }}",
322
- "estimated_input_tokens_in_generate": "生成代码({{ generate_mode }})预计输入token数: {{ estimated_input_tokens }}",
322
+ "estimated_input_tokens_in_generate": "生成代码({{ generate_mode }})预计输入token数: {{ estimated_input_tokens_in_generate }}",
323
323
  "model_has_access_restrictions": "{{model_name}} 有访问限制,无法使用当前功能",
324
324
  "auto_command_not_found": "未找到自动命令: {{command}}。请检查您的输入并重试。",
325
325
  "auto_command_failed": "自动命令执行失败: {{error}}。请检查您的输入并重试。",
@@ -154,7 +154,7 @@ def config_readme() -> str:
154
154
  项目类型通常为如下三种选择:
155
155
  1. ts
156
156
  2. py
157
- 3. 代码文件后缀名列表(比如.java,.py,.go,.js,.ts),多个按逗号分割
157
+ 3. 代码文件后缀名列表(比如.java,.py,.go,.js,.ts),多个按逗号分割
158
158
 
159
159
  推荐使用 3 选项,因为项目类型通常为多种后缀名混合。
160
160
  """
@@ -287,6 +287,7 @@ class QuickFilter():
287
287
  当用户提一个需求的时候,我们要找到两种类型的源码文件:
288
288
  1. 根据需求需要被修改的文件,我们叫 edited_files
289
289
  2. 为了能够完成修改这些文件,还需要的一些额外参考文件, 我们叫 reference_files
290
+ 3. 因为修改了 edited_files 文件,可能有一些依赖 edited_files 的文件也需要被修改,我们叫 dependent_files
290
291
 
291
292
  现在,给定下面的索引文件:
292
293
 
@@ -315,9 +316,9 @@ class QuickFilter():
315
316
  ```
316
317
 
317
318
  特别注意:
318
- 1. 如果用户的query里有 @文件 或者 @@符号,那么被@的文件或者@@的符号必须要返回。
319
- 2. 根据需求以及根据 @文件 或者 @@符号 找到的文件,猜测需要被修改的edited_files文件,然后尝试通过索引文件诸如导入语句等信息找到这些文件依赖的其他文件得到 reference_files。
320
- 3. file_list 里的文件序号,按被 @ 或者 @@ 文件,edited_files文件,reference_files文件的顺序排列。注意,reference_files 你要根据需求来猜测是否需要,过滤掉不相关的,避免返回文件数过多。
319
+ 1. 如果用户的query里有 @文件 或者 @@符号,并且他们在索引文件中,那么被@的文件或者@@的符号必须要返回。
320
+ 2. 根据需求以及根据 @文件 或者 @@符号 找到的文件,猜测需要被修改的edited_files文件,然后尝试通过索引文件诸如导入语句等信息找到这些文件依赖的其他文件得到 reference_files,dependent_files
321
+ 3. file_list 里的文件序号,按被 @ 或者 @@ 文件,edited_files文件,reference_files,dependent_files文件的顺序排列。注意,reference_files 你要根据需求来猜测是否需要,过滤掉不相关的,避免返回文件数过多。
321
322
  4. 如果 query 里是一段历史对话,那么对话里的内容提及的文件路径必须要返回。
322
323
  5. 如果用户需求为空,则直接返回空列表即可。
323
324
  6. 返回的 json格式数据不允许有注释
@@ -5,8 +5,9 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
5
5
 
6
6
  from autocoder.rag.relevant_utils import (
7
7
  parse_relevance,
8
- FilterDoc,
8
+ FilterDoc,
9
9
  TaskTiming,
10
+ DocFilterResult
10
11
  )
11
12
 
12
13
  from autocoder.common import SourceCode, AutoCoderArgs
@@ -48,7 +49,6 @@ def _check_relevance_with_conversation(
48
49
  其中, <relevant> 是你认为文档中和问题的相关度,0-10之间的数字,数字越大表示相关度越高。
49
50
  """
50
51
 
51
-
52
52
  class DocFilter:
53
53
  def __init__(
54
54
  self,
@@ -62,40 +62,57 @@ class DocFilter:
62
62
  self.recall_llm = self.llm.get_sub_client("recall_model")
63
63
  else:
64
64
  self.recall_llm = self.llm
65
-
65
+
66
66
  self.args = args
67
67
  self.relevant_score = self.args.rag_doc_filter_relevance
68
68
  self.on_ray = on_ray
69
- self.path = path
69
+ self.path = path
70
70
 
71
71
  def filter_docs(
72
72
  self, conversations: List[Dict[str, str]], documents: List[SourceCode]
73
- ) -> List[FilterDoc]:
74
- return self.filter_docs_with_threads(conversations, documents)
73
+ ) -> DocFilterResult:
74
+ return self.filter_docs_with_threads(conversations, documents)
75
75
 
76
76
  def filter_docs_with_threads(
77
77
  self, conversations: List[Dict[str, str]], documents: List[SourceCode]
78
- ) -> List[FilterDoc]:
79
-
78
+ ) -> DocFilterResult:
79
+
80
+ start_time = time.time()
81
+ logger.info(f"=== DocFilter Starting ===")
82
+ logger.info(
83
+ f"Configuration: relevance_threshold={self.relevant_score}, thread_workers={self.args.index_filter_workers or 5}")
84
+
80
85
  rag_manager = RagConfigManager(path=self.path)
81
86
  rag_config = rag_manager.load_config()
82
- documents = list(documents)
83
- logger.info(f"Filtering {len(documents)} documents....")
87
+
88
+ documents = list(documents)
89
+ logger.info(f"Filtering {len(documents)} documents...")
90
+
91
+ submitted_tasks = 0
92
+ completed_tasks = 0
93
+ relevant_count = 0
94
+ model_name = self.recall_llm.default_model_name or "unknown"
95
+
84
96
  with ThreadPoolExecutor(
85
97
  max_workers=self.args.index_filter_workers or 5
86
98
  ) as executor:
87
99
  future_to_doc = {}
100
+
101
+ # 提交所有任务
88
102
  for doc in documents:
89
103
  submit_time = time.time()
104
+ submitted_tasks += 1
90
105
 
91
106
  def _run(conversations, docs):
92
107
  submit_time_1 = time.time()
108
+ meta = None
93
109
  try:
94
110
  llm = self.recall_llm
111
+ meta_holder = byzerllm.MetaHolder()
95
112
 
96
113
  v = (
97
114
  _check_relevance_with_conversation.with_llm(
98
- llm)
115
+ llm).with_meta(meta_holder)
99
116
  .options({"llm_config": {"max_length": 10}})
100
117
  .run(
101
118
  conversations=conversations,
@@ -103,14 +120,16 @@ class DocFilter:
103
120
  filter_config=rag_config.filter_config,
104
121
  )
105
122
  )
123
+
124
+ meta = meta_holder.get_meta_model()
106
125
  except Exception as e:
107
126
  logger.error(
108
127
  f"Error in _check_relevance_with_conversation: {str(e)}"
109
128
  )
110
- return (None, submit_time_1, time.time())
129
+ return (None, submit_time_1, time.time(), meta)
111
130
 
112
131
  end_time_2 = time.time()
113
- return (v, submit_time_1, end_time_2)
132
+ return (v, submit_time_1, end_time_2, meta)
114
133
 
115
134
  m = executor.submit(
116
135
  _run,
@@ -119,57 +138,144 @@ class DocFilter:
119
138
  )
120
139
  future_to_doc[m] = (doc, submit_time)
121
140
 
122
- relevant_docs = []
123
- for future in as_completed(list(future_to_doc.keys())):
124
- try:
125
- doc, submit_time = future_to_doc[future]
126
- end_time = time.time()
127
- v, submit_time_1, end_time_2 = future.result()
128
- task_timing = TaskTiming(
129
- submit_time=submit_time,
130
- end_time=end_time,
131
- duration=end_time - submit_time,
132
- real_start_time=submit_time_1,
133
- real_end_time=end_time_2,
134
- real_duration=end_time_2 - submit_time_1,
135
- )
136
-
137
- relevance = parse_relevance(v)
138
- logger.info(
139
- f"Document filtering progress:\n"
140
- f" - File: {doc.module_name}\n"
141
- f" - Relevance: {'Relevant' if relevance and relevance.is_relevant else 'Not Relevant'}\n"
142
- f" - Score: {relevance.relevant_score if relevance else 'N/A'}\n"
143
- f" - Score Threshold: {self.relevant_score}\n"
144
- f" - Raw Response: {v}\n"
145
- f" - Timing:\n"
146
- f" * Total Duration: {task_timing.duration:.2f}s\n"
147
- f" * Real Duration: {task_timing.real_duration:.2f}s\n"
148
- f" * Queue Time: {(task_timing.real_start_time - task_timing.submit_time):.2f}s"
149
- )
150
- if (
151
- relevance
152
- # and relevance.is_relevant
153
- and relevance.relevant_score >= self.relevant_score
154
- ):
155
- relevant_docs.append(
156
- FilterDoc(
141
+ logger.info(
142
+ f"Submitted {submitted_tasks} document filtering tasks to thread pool")
143
+
144
+ # 处理完成的任务
145
+ doc_filter_result = DocFilterResult(
146
+ docs=[],
147
+ raw_docs=[],
148
+ input_tokens_counts=[],
149
+ generated_tokens_counts=[],
150
+ durations=[],
151
+ model_name=model_name
152
+ )
153
+ relevant_docs = doc_filter_result.docs
154
+ for future in as_completed(list(future_to_doc.keys())):
155
+ try:
156
+ doc, submit_time = future_to_doc[future]
157
+ end_time = time.time()
158
+ completed_tasks += 1
159
+ progress_percent = (completed_tasks / len(documents)) * 100
160
+
161
+ v, submit_time_1, end_time_2, meta = future.result()
162
+ task_timing = TaskTiming(
163
+ submit_time=submit_time,
164
+ end_time=end_time,
165
+ duration=end_time - submit_time,
166
+ real_start_time=submit_time_1,
167
+ real_end_time=end_time_2,
168
+ real_duration=end_time_2 - submit_time_1,
169
+ )
170
+
171
+ relevance = parse_relevance(v)
172
+ is_relevant = relevance and relevance.relevant_score >= self.relevant_score
173
+
174
+ if is_relevant:
175
+ relevant_count += 1
176
+ status_text = f"RELEVANT (Score: {relevance.relevant_score:.1f})"
177
+ else:
178
+ score_text = f"{relevance.relevant_score:.1f}" if relevance else "N/A"
179
+ status_text = f"NOT RELEVANT (Score: {score_text})"
180
+
181
+ queue_time = task_timing.real_start_time - task_timing.submit_time
182
+
183
+ input_tokens_count = meta.input_tokens_count if meta else 0
184
+ generated_tokens_count = meta.generated_tokens_count if meta else 0
185
+
186
+ logger.info(
187
+ f"Document filtering [{progress_percent:.1f}%] - {completed_tasks}/{len(documents)}:"
188
+ f"\n - File: {doc.module_name}"
189
+ f"\n - Status: {status_text}"
190
+ f"\n - Model: {model_name}"
191
+ f"\n - Threshold: {self.relevant_score}"
192
+ f"\n - Input tokens: {input_tokens_count}"
193
+ f"\n - Generated tokens: {generated_tokens_count}"
194
+ f"\n - Timing: Duration={task_timing.duration:.2f}s, Processing={task_timing.real_duration:.2f}s, Queue={queue_time:.2f}s"
195
+ f"\n - Response: {v}"
196
+ )
197
+
198
+ if "rag" not in doc.metadata:
199
+ doc.metadata["rag"] = {}
200
+ doc.metadata["rag"]["recall"] = {
201
+ "input_tokens_count": input_tokens_count,
202
+ "generated_tokens_count": generated_tokens_count,
203
+ "recall_model": model_name,
204
+ "duration": task_timing.real_duration
205
+ }
206
+
207
+ doc_filter_result.input_tokens_counts.append(input_tokens_count)
208
+ doc_filter_result.generated_tokens_counts.append(generated_tokens_count)
209
+ doc_filter_result.durations.append(task_timing.real_duration)
210
+
211
+ new_filter_doc = FilterDoc(
157
212
  source_code=doc,
158
213
  relevance=relevance,
159
214
  task_timing=task_timing,
160
215
  )
161
- )
162
- except Exception as exc:
163
- try:
164
- doc, submit_time = future_to_doc[future]
165
- logger.error(
166
- f"Filtering document generated an exception (doc: {doc.module_name}): {exc}")
167
- except Exception as e:
168
- logger.error(
169
- f"Filtering document generated an exception: {exc}")
216
+
217
+ doc_filter_result.raw_docs.append(new_filter_doc)
218
+
219
+ if is_relevant:
220
+ relevant_docs.append(
221
+ new_filter_doc
222
+ )
223
+ except Exception as exc:
224
+ try:
225
+ doc, submit_time = future_to_doc[future]
226
+ completed_tasks += 1
227
+ progress_percent = (
228
+ completed_tasks / len(documents)) * 100
229
+ logger.error(
230
+ f"Document filtering [{progress_percent:.1f}%] - {completed_tasks}/{len(documents)}:"
231
+ f"\n - File: {doc.module_name}"
232
+ f"\n - Error: {exc}"
233
+ f"\n - Duration: {time.time() - submit_time:.2f}s"
234
+ )
235
+ doc_filter_result.raw_docs.append(
236
+ FilterDoc(
237
+ source_code=doc,
238
+ relevance=None,
239
+ task_timing=TaskTiming(),
240
+ )
241
+ )
242
+ except Exception as e:
243
+ logger.error(
244
+ f"Document filtering error in task tracking: {exc}"
245
+ )
170
246
 
171
247
  # Sort relevant_docs by relevance score in descending order
172
248
  relevant_docs.sort(
173
249
  key=lambda x: x.relevance.relevant_score, reverse=True)
174
- return relevant_docs
175
-
250
+
251
+ total_time = time.time() - start_time
252
+
253
+ avg_processing_time = sum(
254
+ doc.task_timing.real_duration for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
255
+ avg_queue_time = sum(doc.task_timing.real_start_time -
256
+ doc.task_timing.submit_time for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
257
+
258
+ total_input_tokens = sum(doc_filter_result.input_tokens_counts)
259
+ total_generated_tokens = sum(doc_filter_result.generated_tokens_counts)
260
+
261
+ logger.info(
262
+ f"=== DocFilter Complete ==="
263
+ f"\n * Total time: {total_time:.2f}s"
264
+ f"\n * Documents processed: {completed_tasks}/{len(documents)}"
265
+ f"\n * Relevant documents: {relevant_count} (threshold: {self.relevant_score})"
266
+ f"\n * Average processing time: {avg_processing_time:.2f}s"
267
+ f"\n * Average queue time: {avg_queue_time:.2f}s"
268
+ f"\n * Total input tokens: {total_input_tokens}"
269
+ f"\n * Total generated tokens: {total_generated_tokens}"
270
+ )
271
+
272
+ if relevant_docs:
273
+ logger.info(
274
+ f"Top 5 relevant documents:"
275
+ + "".join([f"\n * {doc.source_code.module_name} (Score: {doc.relevance.relevant_score:.1f})"
276
+ for doc in relevant_docs[:5]])
277
+ )
278
+ else:
279
+ logger.warning("No relevant documents found!")
280
+
281
+ return doc_filter_result
@@ -44,13 +44,15 @@ class LLWrapper:
44
44
  res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
45
45
  for t in res:
46
46
  yield (t,SingleOutputMeta(0,0))
47
+
47
48
 
48
49
  async def async_stream_chat_oai(self,conversations,
49
50
  model:Optional[str]=None,
50
51
  role_mapping=None,
51
52
  delta_mode=False,
52
53
  llm_config:Dict[str,Any]={}):
53
- res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
54
+ res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
55
+ # res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
54
56
  for t in res:
55
57
  yield (t,SingleOutputMeta(0,0))
56
58
 
@@ -31,6 +31,8 @@ from tokenizers import Tokenizer
31
31
  from autocoder.rag.variable_holder import VariableHolder
32
32
  from importlib.metadata import version
33
33
  from autocoder.rag.stream_event import event_writer
34
+ from autocoder.rag.relevant_utils import DocFilterResult
35
+ from pydantic import BaseModel
34
36
 
35
37
  try:
36
38
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
@@ -42,6 +44,24 @@ except ImportError:
42
44
  LLMComputeEngine = None
43
45
 
44
46
 
47
+ class RecallStat(BaseModel):
48
+ total_input_tokens: int
49
+ total_generated_tokens: int
50
+ model_name: str = "unknown"
51
+ class ChunkStat(BaseModel):
52
+ total_input_tokens: int
53
+ total_generated_tokens: int
54
+ model_name: str = "unknown"
55
+ class AnswerStat(BaseModel):
56
+ total_input_tokens: int
57
+ total_generated_tokens: int
58
+ model_name: str = "unknown"
59
+
60
+ class RAGStat(BaseModel):
61
+ recall_stat: RecallStat
62
+ chunk_stat: ChunkStat
63
+ answer_stat: AnswerStat
64
+
45
65
  class LongContextRAG:
46
66
  def __init__(
47
67
  self,
@@ -305,7 +325,7 @@ class LongContextRAG:
305
325
  url = ",".join(contexts)
306
326
  return [SourceCode(module_name=f"RAG:{url}", source_code="".join(v))]
307
327
 
308
- def _filter_docs(self, conversations: List[Dict[str, str]]) -> List[FilterDoc]:
328
+ def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
309
329
  query = conversations[-1]["content"]
310
330
  documents = self._retrieve_documents(options={"query":query})
311
331
  return self.doc_filter.filter_docs(
@@ -439,7 +459,32 @@ class LongContextRAG:
439
459
 
440
460
  logger.info(f"Query: {query} only_contexts: {only_contexts}")
441
461
  start_time = time.time()
442
- relevant_docs: List[FilterDoc] = self._filter_docs(conversations)
462
+
463
+ rag_stat = RAGStat(
464
+ recall_stat=RecallStat(
465
+ total_input_tokens=0,
466
+ total_generated_tokens=0,
467
+ model_name=self.llm.default_model_name,
468
+ ),
469
+ chunk_stat=ChunkStat(
470
+ total_input_tokens=0,
471
+ total_generated_tokens=0,
472
+ model_name=self.llm.default_model_name,
473
+ ),
474
+ answer_stat=AnswerStat(
475
+ total_input_tokens=0,
476
+ total_generated_tokens=0,
477
+ model_name=self.llm.default_model_name,
478
+ ),
479
+ )
480
+
481
+ doc_filter_result = self._filter_docs(conversations)
482
+
483
+ rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
484
+ rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
485
+ rag_stat.recall_stat.model_name = doc_filter_result.model_name
486
+
487
+ relevant_docs: List[FilterDoc] = doc_filter_result.docs
443
488
  filter_time = time.time() - start_time
444
489
 
445
490
  # Filter relevant_docs to only include those with is_relevant=True
@@ -469,17 +514,15 @@ class LongContextRAG:
469
514
  # 将 FilterDoc 转化为 SourceCode 方便后续的逻辑继续做处理
470
515
  relevant_docs = [doc.source_code for doc in relevant_docs]
471
516
 
472
- console = Console()
517
+ logger.info(f"=== RAG Search Results ===")
518
+ logger.info(f"Query: {query}")
519
+ logger.info(f"Found relevant docs: {len(relevant_docs)}")
473
520
 
474
- # Create a table for the query information
475
- query_table = Table(title="Query Information", show_header=False)
476
- query_table.add_row("Query", query)
477
- query_table.add_row("Relevant docs", str(len(relevant_docs)))
478
-
479
- # Add relevant docs information
521
+ # 记录相关文档信息
480
522
  relevant_docs_info = []
481
- for doc in relevant_docs:
482
- info = f"- {doc.module_name.replace(self.path,'',1)}"
523
+ for i, doc in enumerate(relevant_docs):
524
+ doc_path = doc.module_name.replace(self.path, '', 1)
525
+ info = f"{i+1}. {doc_path}"
483
526
  if "original_docs" in doc.metadata:
484
527
  original_docs = ", ".join(
485
528
  [
@@ -490,8 +533,11 @@ class LongContextRAG:
490
533
  info += f" (Original docs: {original_docs})"
491
534
  relevant_docs_info.append(info)
492
535
 
493
- relevant_docs_info = "\n".join(relevant_docs_info)
494
- query_table.add_row("Relevant docs list", relevant_docs_info)
536
+ if relevant_docs_info:
537
+ logger.info(
538
+ f"Relevant documents list:"
539
+ + "".join([f"\n * {info}" for info in relevant_docs_info])
540
+ )
495
541
 
496
542
  first_round_full_docs = []
497
543
  second_round_extracted_docs = []
@@ -507,11 +553,18 @@ class LongContextRAG:
507
553
  llm=self.llm,
508
554
  disable_segment_reorder=self.args.disable_segment_reorder,
509
555
  )
510
- final_relevant_docs = token_limiter.limit_tokens(
556
+
557
+ token_limiter_result = token_limiter.limit_tokens(
511
558
  relevant_docs=relevant_docs,
512
559
  conversations=conversations,
513
560
  index_filter_workers=self.args.index_filter_workers or 5,
514
561
  )
562
+
563
+ rag_stat.chunk_stat.total_input_tokens += sum(token_limiter_result.input_tokens_counts)
564
+ rag_stat.chunk_stat.total_generated_tokens += sum(token_limiter_result.generated_tokens_counts)
565
+ rag_stat.chunk_stat.model_name = token_limiter_result.model_name
566
+
567
+ final_relevant_docs = token_limiter_result.docs
515
568
  first_round_full_docs = token_limiter.first_round_full_docs
516
569
  second_round_extracted_docs = token_limiter.second_round_extracted_docs
517
570
  sencond_round_time = token_limiter.sencond_round_time
@@ -522,57 +575,64 @@ class LongContextRAG:
522
575
 
523
576
  logger.info(f"Finally send to model: {len(relevant_docs)}")
524
577
 
525
- query_table.add_row("Only contexts", str(only_contexts))
526
- query_table.add_row("Filter time", f"{filter_time:.2f} seconds")
527
- query_table.add_row("Final relevant docs", str(len(relevant_docs)))
528
- query_table.add_row(
529
- "first_round_full_docs", str(len(first_round_full_docs))
530
- )
531
- query_table.add_row(
532
- "second_round_extracted_docs", str(len(second_round_extracted_docs))
533
- )
534
- query_table.add_row(
535
- "Second round time", f"{sencond_round_time:.2f} seconds"
578
+ # 记录分段处理的统计信息
579
+ logger.info(
580
+ f"=== Token Management ===\n"
581
+ f" * Only contexts: {only_contexts}\n"
582
+ f" * Filter time: {filter_time:.2f} seconds\n"
583
+ f" * Final relevant docs: {len(relevant_docs)}\n"
584
+ f" * First round full docs: {len(first_round_full_docs)}\n"
585
+ f" * Second round extracted docs: {len(second_round_extracted_docs)}\n"
586
+ f" * Second round time: {sencond_round_time:.2f} seconds"
536
587
  )
537
588
 
538
- # Add relevant docs information
589
+ # 记录最终选择的文档详情
539
590
  final_relevant_docs_info = []
540
- for doc in relevant_docs:
541
- info = f"- {doc.module_name.replace(self.path,'',1)}"
591
+ for i, doc in enumerate(relevant_docs):
592
+ doc_path = doc.module_name.replace(self.path, '', 1)
593
+ info = f"{i+1}. {doc_path}"
594
+
595
+ metadata_info = []
542
596
  if "original_docs" in doc.metadata:
543
597
  original_docs = ", ".join(
544
598
  [
545
- doc.replace(self.path, "", 1)
546
- for doc in doc.metadata["original_docs"]
599
+ od.replace(self.path, "", 1)
600
+ for od in doc.metadata["original_docs"]
547
601
  ]
548
602
  )
549
- info += f" (Original docs: {original_docs})"
603
+ metadata_info.append(f"Original docs: {original_docs}")
604
+
550
605
  if "chunk_ranges" in doc.metadata:
551
606
  chunk_ranges = json.dumps(
552
607
  doc.metadata["chunk_ranges"], ensure_ascii=False
553
608
  )
554
- info += f" (Chunk ranges: {chunk_ranges})"
609
+ metadata_info.append(f"Chunk ranges: {chunk_ranges}")
610
+
611
+ if "processing_time" in doc.metadata:
612
+ metadata_info.append(f"Processing time: {doc.metadata['processing_time']:.2f}s")
613
+
614
+ if metadata_info:
615
+ info += f" ({'; '.join(metadata_info)})"
616
+
555
617
  final_relevant_docs_info.append(info)
556
618
 
557
- final_relevant_docs_info = "\n".join(final_relevant_docs_info)
558
- query_table.add_row("Final Relevant docs list", final_relevant_docs_info)
559
-
560
- # Create a panel to contain the table
561
- panel = Panel(
562
- query_table,
563
- title="RAG Search Results",
564
- expand=False,
619
+ if final_relevant_docs_info:
620
+ logger.info(
621
+ f"Final documents to be sent to model:"
622
+ + "".join([f"\n * {info}" for info in final_relevant_docs_info])
565
623
  )
566
624
 
567
- # Log the panel using rich
568
- console.print(panel)
569
-
625
+ # 记录令牌统计
570
626
  request_tokens = sum([doc.tokens for doc in relevant_docs])
571
627
  target_model = model or self.llm.default_model_name
572
628
  logger.info(
573
- f"Start to send to model {target_model} with {request_tokens} tokens"
629
+ f"=== LLM Request ===\n"
630
+ f" * Target model: {target_model}\n"
631
+ f" * Total tokens: {request_tokens}"
574
632
  )
575
633
 
634
+ logger.info(f"Start to send to model {target_model} with {request_tokens} tokens")
635
+
576
636
  if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
577
637
  llm_compute_engine = LLMComputeEngine(
578
638
  llm=target_llm,
@@ -585,17 +645,22 @@ class LongContextRAG:
585
645
  new_conversations = llm_compute_engine.process_conversation(
586
646
  conversations, query, [doc.source_code for doc in relevant_docs]
587
647
  )
588
-
589
- return (
590
- llm_compute_engine.stream_chat_oai(
648
+ chunks = llm_compute_engine.stream_chat_oai(
591
649
  conversations=new_conversations,
592
650
  model=model,
593
651
  role_mapping=role_mapping,
594
652
  llm_config=llm_config,
595
653
  delta_mode=True,
596
- ),
597
- context,
598
- )
654
+ )
655
+
656
+ def generate_chunks():
657
+ for chunk in chunks:
658
+ yield chunk[0]
659
+ if chunk[1] is not None:
660
+ rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
661
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
662
+ self._print_rag_stats(rag_stat)
663
+ return generate_chunks(), context
599
664
 
600
665
  new_conversations = conversations[:-1] + [
601
666
  {
@@ -614,5 +679,85 @@ class LongContextRAG:
614
679
  llm_config=llm_config,
615
680
  delta_mode=True,
616
681
  )
682
+
683
+ def generate_chunks():
684
+ for chunk in chunks:
685
+ yield chunk[0]
686
+ if chunk[1] is not None:
687
+ rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
688
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
689
+ self._print_rag_stats(rag_stat)
690
+ return generate_chunks(), context
691
+
692
+
617
693
 
618
- return (chunk[0] for chunk in chunks), context
694
+ def _print_rag_stats(self, rag_stat: RAGStat) -> None:
695
+ """打印RAG执行的详细统计信息"""
696
+ total_input_tokens = (
697
+ rag_stat.recall_stat.total_input_tokens +
698
+ rag_stat.chunk_stat.total_input_tokens +
699
+ rag_stat.answer_stat.total_input_tokens
700
+ )
701
+ total_generated_tokens = (
702
+ rag_stat.recall_stat.total_generated_tokens +
703
+ rag_stat.chunk_stat.total_generated_tokens +
704
+ rag_stat.answer_stat.total_generated_tokens
705
+ )
706
+ total_tokens = total_input_tokens + total_generated_tokens
707
+
708
+ # 避免除以零错误
709
+ if total_tokens == 0:
710
+ recall_percent = chunk_percent = answer_percent = 0
711
+ else:
712
+ recall_percent = (rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
713
+ chunk_percent = (rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
714
+ answer_percent = (rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
715
+
716
+ logger.info(
717
+ f"=== RAG 执行统计信息 ===\n"
718
+ f"总令牌使用: {total_tokens} 令牌\n"
719
+ f" * 输入令牌总数: {total_input_tokens}\n"
720
+ f" * 生成令牌总数: {total_generated_tokens}\n"
721
+ f"\n"
722
+ f"阶段统计:\n"
723
+ f" 1. 文档检索阶段:\n"
724
+ f" - 模型: {rag_stat.recall_stat.model_name}\n"
725
+ f" - 输入令牌: {rag_stat.recall_stat.total_input_tokens}\n"
726
+ f" - 生成令牌: {rag_stat.recall_stat.total_generated_tokens}\n"
727
+ f" - 阶段总计: {rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens}\n"
728
+ f"\n"
729
+ f" 2. 文档分块阶段:\n"
730
+ f" - 模型: {rag_stat.chunk_stat.model_name}\n"
731
+ f" - 输入令牌: {rag_stat.chunk_stat.total_input_tokens}\n"
732
+ f" - 生成令牌: {rag_stat.chunk_stat.total_generated_tokens}\n"
733
+ f" - 阶段总计: {rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens}\n"
734
+ f"\n"
735
+ f" 3. 答案生成阶段:\n"
736
+ f" - 模型: {rag_stat.answer_stat.model_name}\n"
737
+ f" - 输入令牌: {rag_stat.answer_stat.total_input_tokens}\n"
738
+ f" - 生成令牌: {rag_stat.answer_stat.total_generated_tokens}\n"
739
+ f" - 阶段总计: {rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens}\n"
740
+ f"\n"
741
+ f"令牌分布百分比:\n"
742
+ f" - 文档检索: {recall_percent:.1f}%\n"
743
+ f" - 文档分块: {chunk_percent:.1f}%\n"
744
+ f" - 答案生成: {answer_percent:.1f}%\n"
745
+ )
746
+
747
+ # 记录原始统计数据,以便调试
748
+ logger.debug(f"RAG Stat 原始数据: {rag_stat}")
749
+
750
+ # 返回成本估算
751
+ estimated_cost = self._estimate_token_cost(total_input_tokens, total_generated_tokens)
752
+ if estimated_cost > 0:
753
+ logger.info(f"估计成本: 约 ${estimated_cost:.4f} 人民币")
754
+
755
+ def _estimate_token_cost(self, input_tokens: int, output_tokens: int) -> float:
756
+ """估算当前请求的令牌成本(人民币)"""
757
+ # 实际应用中,可以根据不同模型设置不同价格
758
+ input_cost_per_1m = 2.0/1000000 # 每百万输入令牌的成本
759
+ output_cost_per_1m = 8.0/100000 # 每百万输出令牌的成本
760
+
761
+ cost = (input_tokens * input_cost_per_1m / 1000000) + (output_tokens* output_cost_per_1m/1000000)
762
+ return cost
763
+
@@ -1,7 +1,7 @@
1
1
  from autocoder.common import AutoCoderArgs, SourceCode
2
2
  from pydantic import BaseModel
3
3
  import re
4
- from typing import Optional
4
+ from typing import Optional, List
5
5
 
6
6
 
7
7
  class DocRelevance(BaseModel):
@@ -23,6 +23,17 @@ class FilterDoc(BaseModel):
23
23
  task_timing: TaskTiming
24
24
 
25
25
 
26
+ class DocFilterResult(BaseModel):
27
+ # 注意, docs 只保留最后成功过滤的文档
28
+ docs: List[FilterDoc]
29
+ # 注意, raw_docs 保留所有文档
30
+ raw_docs: List[FilterDoc]
31
+ input_tokens_counts: List[int]
32
+ generated_tokens_counts: List[int]
33
+ durations: List[float]
34
+ model_name: str = "unknown"
35
+
36
+
26
37
  def parse_relevance(text: Optional[str]) -> Optional[DocRelevance]:
27
38
  if text is None:
28
39
  return None
@@ -7,7 +7,9 @@ from autocoder.common import SourceCode
7
7
  from byzerllm.utils.client.code_utils import extract_code
8
8
  import byzerllm
9
9
  from byzerllm import ByzerLLM
10
-
10
+ from autocoder.rag.relevant_utils import TaskTiming
11
+ from byzerllm import MetaHolder
12
+ from autocoder.rag.token_limiter_utils import TokenLimiterResult
11
13
 
12
14
  class TokenLimiter:
13
15
  def __init__(
@@ -94,10 +96,24 @@ class TokenLimiter:
94
96
  relevant_docs: List[SourceCode],
95
97
  conversations: List[Dict[str, str]],
96
98
  index_filter_workers: int,
97
- ) -> List[SourceCode]:
99
+ ) -> TokenLimiterResult:
100
+ logger.info(f"=== TokenLimiter Starting ===")
101
+ logger.info(f"Configuration: full_text_limit={self.full_text_limit}, segment_limit={self.segment_limit}, buff_limit={self.buff_limit}")
102
+ logger.info(f"Processing {len(relevant_docs)} source code documents")
103
+
104
+ start_time = time.time()
98
105
  final_relevant_docs = []
99
106
  token_count = 0
100
107
  doc_num_count = 0
108
+ model_name = self.chunk_llm.default_model_name or "unknown"
109
+ token_limiter_result = TokenLimiterResult(
110
+ docs=[],
111
+ raw_docs=[],
112
+ input_tokens_counts=[],
113
+ generated_tokens_counts=[],
114
+ durations=[],
115
+ model_name=model_name
116
+ )
101
117
 
102
118
  reorder_relevant_docs = []
103
119
 
@@ -112,6 +128,7 @@ class TokenLimiter:
112
128
  ## TODO:
113
129
  ## 1. 未来根据参数决定是否开启重排以及重排的策略
114
130
  if not self.disable_segment_reorder:
131
+ logger.info("Document reordering enabled - organizing segments by original document order")
115
132
  num_count = 0
116
133
  for doc in relevant_docs:
117
134
  num_count += 1
@@ -135,8 +152,11 @@ class TokenLimiter:
135
152
  temp_docs.sort(key=lambda x: x.metadata["chunk_index"])
136
153
  reorder_relevant_docs.extend(temp_docs)
137
154
  else:
155
+ logger.info("Document reordering disabled - using original retrieval order")
138
156
  reorder_relevant_docs = relevant_docs
139
157
 
158
+ logger.info(f"After reordering: {len(reorder_relevant_docs)} documents to process")
159
+
140
160
  ## 非窗口分区实现
141
161
  for doc in reorder_relevant_docs:
142
162
  doc_tokens = self.count_tokens(doc.source_code)
@@ -149,10 +169,15 @@ class TokenLimiter:
149
169
 
150
170
  ## 如果窗口无法放下所有的相关文档,则需要分区
151
171
  if len(final_relevant_docs) < len(reorder_relevant_docs):
172
+ logger.info(f"Token limit exceeded: {len(final_relevant_docs)}/{len(reorder_relevant_docs)} docs fit in window")
173
+ logger.info(f"=== Starting First Round: Full Text Loading ===")
174
+
152
175
  ## 先填充full_text分区
153
176
  token_count = 0
154
177
  new_token_limit = self.full_text_limit
155
178
  doc_num_count = 0
179
+ first_round_start_time = time.time()
180
+
156
181
  for doc in reorder_relevant_docs:
157
182
  doc_tokens = self.count_tokens(doc.source_code)
158
183
  doc_num_count += 1
@@ -161,11 +186,18 @@ class TokenLimiter:
161
186
  token_count += doc_tokens
162
187
  else:
163
188
  break
189
+
190
+ first_round_duration = time.time() - first_round_start_time
191
+ logger.info(
192
+ f"First round complete: loaded {len(self.first_round_full_docs)} documents"
193
+ f" ({token_count} tokens) in {first_round_duration:.2f}s"
194
+ )
164
195
 
165
196
  if len(self.first_round_full_docs) > 0:
166
197
  remaining_tokens = (
167
198
  self.full_text_limit + self.segment_limit - token_count
168
199
  )
200
+ logger.info(f"Remaining token budget: {remaining_tokens}")
169
201
  else:
170
202
  logger.warning(
171
203
  "Full text area is empty, this is may caused by the single doc is too long"
@@ -175,49 +207,117 @@ class TokenLimiter:
175
207
  ## 继续填充segment分区
176
208
  sencond_round_start_time = time.time()
177
209
  remaining_docs = reorder_relevant_docs[len(self.first_round_full_docs) :]
210
+
178
211
  logger.info(
179
- f"first round docs: {len(self.first_round_full_docs)} remaining docs: {len(remaining_docs)} index_filter_workers: {index_filter_workers}"
212
+ f"=== Starting Second Round: Chunk Extraction ==="
213
+ f"\n * Documents to process: {len(remaining_docs)}"
214
+ f"\n * Remaining token budget: {remaining_tokens}"
215
+ f"\n * Thread pool size: {index_filter_workers or 5}"
180
216
  )
181
217
 
218
+ total_processed = 0
219
+ successful_extractions = 0
220
+
221
+
182
222
  with ThreadPoolExecutor(max_workers=index_filter_workers or 5) as executor:
183
- future_to_doc = {
184
- executor.submit(self.process_range_doc, doc, conversations): doc
185
- for doc in remaining_docs
186
- }
223
+ future_to_doc = {}
224
+ for doc in remaining_docs:
225
+ submit_time = time.time()
226
+ future = executor.submit(self.process_range_doc, doc, conversations)
227
+ future_to_doc[future] = (doc, submit_time)
187
228
 
188
229
  for future in as_completed(future_to_doc):
189
- doc = future_to_doc[future]
230
+ doc, submit_time = future_to_doc[future]
231
+ end_time = time.time()
232
+ total_processed += 1
233
+ progress_percent = (total_processed / len(remaining_docs)) * 100
234
+
190
235
  try:
191
236
  result = future.result()
237
+ task_duration = end_time - submit_time
238
+
192
239
  if result and remaining_tokens > 0:
193
240
  self.second_round_extracted_docs.append(result)
241
+ token_limiter_result.raw_docs.append(result)
242
+
243
+ if "rag" in result.metadata and "chunk" in result.metadata["rag"]:
244
+ chunk_meta = result.metadata["rag"]["chunk"]
245
+ token_limiter_result.input_tokens_counts.append(chunk_meta.get("input_tokens_count", 0))
246
+ token_limiter_result.generated_tokens_counts.append(chunk_meta.get("generated_tokens_count", 0))
247
+ token_limiter_result.durations.append(chunk_meta.get("duration", 0))
248
+
194
249
  tokens = result.tokens
250
+ successful_extractions += 1
251
+
252
+ logger.info(
253
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
254
+ f"\n - File: {doc.module_name}"
255
+ f"\n - Chunks: {len(result.metadata.get('chunk_ranges', []))}"
256
+ f"\n - Extracted tokens: {tokens}"
257
+ f"\n - Remaining tokens: {remaining_tokens - tokens if tokens > 0 else remaining_tokens}"
258
+ f"\n - Processing time: {task_duration:.2f}s"
259
+ )
260
+
195
261
  if tokens > 0:
196
262
  remaining_tokens -= tokens
197
263
  else:
198
264
  logger.warning(
199
265
  f"Token count for doc {doc.module_name} is 0 or negative"
200
266
  )
267
+ elif result:
268
+ logger.info(
269
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
270
+ f"\n - File: {doc.module_name}"
271
+ f"\n - Skipped: Token budget exhausted ({remaining_tokens} remaining)"
272
+ f"\n - Processing time: {task_duration:.2f}s"
273
+ )
274
+ else:
275
+ logger.warning(
276
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
277
+ f"\n - File: {doc.module_name}"
278
+ f"\n - Result: No content extracted"
279
+ f"\n - Processing time: {task_duration:.2f}s"
280
+ )
201
281
  except Exception as exc:
202
282
  logger.error(
203
- f"Processing doc {doc.module_name} generated an exception: {exc}"
283
+ f"Document extraction [{progress_percent:.1f}%] - {total_processed}/{len(remaining_docs)}:"
284
+ f"\n - File: {doc.module_name}"
285
+ f"\n - Error: {exc}"
286
+ f"\n - Processing time: {end_time - submit_time:.2f}s"
204
287
  )
205
288
 
206
289
  final_relevant_docs = (
207
290
  self.first_round_full_docs + self.second_round_extracted_docs
208
291
  )
209
292
  self.sencond_round_time = time.time() - sencond_round_start_time
293
+ total_time = time.time() - start_time
294
+
210
295
  logger.info(
211
- f"Second round processing time: {self.sencond_round_time:.2f} seconds"
296
+ f"=== Second round complete ==="
297
+ f"\n * Time: {self.sencond_round_time:.2f}s"
298
+ f"\n * Documents processed: {total_processed}/{len(remaining_docs)}"
299
+ f"\n * Successful extractions: {successful_extractions}"
300
+ f"\n * Extracted tokens: {sum(doc.tokens for doc in self.second_round_extracted_docs)}"
212
301
  )
213
-
214
- return final_relevant_docs
302
+ else:
303
+ logger.info(f"All {len(reorder_relevant_docs)} documents fit within token limits")
304
+ total_time = time.time() - start_time
305
+
306
+ logger.info(
307
+ f"=== TokenLimiter Complete ==="
308
+ f"\n * Total time: {total_time:.2f}s"
309
+ f"\n * Documents selected: {len(final_relevant_docs)}/{len(relevant_docs)}"
310
+ f"\n * Total tokens: {sum(doc.tokens for doc in final_relevant_docs)}"
311
+ )
312
+ token_limiter_result.docs = final_relevant_docs
313
+ return token_limiter_result
215
314
 
216
315
  def process_range_doc(
217
316
  self, doc: SourceCode, conversations: List[Dict[str, str]], max_retries=3
218
317
  ) -> SourceCode:
219
318
  for attempt in range(max_retries):
220
319
  content = ""
320
+ start_time = time.time()
221
321
  try:
222
322
  source_code_with_line_number = ""
223
323
  source_code_lines = doc.source_code.split("\n")
@@ -225,14 +325,19 @@ class TokenLimiter:
225
325
  source_code_with_line_number += f"{idx+1} {line}\n"
226
326
 
227
327
  llm = self.chunk_llm
328
+ model_name = llm.default_model_name or "unknown"
329
+ meta_holder = MetaHolder()
228
330
 
331
+ extraction_start_time = time.time()
229
332
  extracted_info = (
230
333
  self.extract_relevance_range_from_docs_with_conversation.options(
231
334
  {"llm_config": {"max_length": 100}}
232
335
  )
233
- .with_llm(llm)
336
+ .with_llm(llm).with_meta(meta_holder)
234
337
  .run(conversations, [source_code_with_line_number])
235
338
  )
339
+ extraction_duration = time.time() - extraction_start_time
340
+
236
341
  json_str = extract_code(extracted_info)[0][1]
237
342
  json_objs = json.loads(json_str)
238
343
 
@@ -242,23 +347,59 @@ class TokenLimiter:
242
347
  chunk = "\n".join(source_code_lines[start_line:end_line])
243
348
  content += chunk + "\n"
244
349
 
350
+ total_duration = time.time() - start_time
351
+
352
+
353
+ meta = meta_holder.get_meta_model()
354
+
355
+ input_tokens_count = 0
356
+ generated_tokens_count = 0
357
+
358
+ if meta:
359
+ input_tokens_count = meta.input_tokens_count
360
+ generated_tokens_count = meta.generated_tokens_count
361
+
362
+ logger.debug(
363
+ f"Document {doc.module_name} chunk extraction details:"
364
+ f"\n - Chunks found: {len(json_objs)}"
365
+ f"\n - Input tokens: {input_tokens_count}"
366
+ f"\n - Generated tokens: {generated_tokens_count}"
367
+ f"\n - LLM time: {extraction_duration:.2f}s"
368
+ f"\n - Total processing time: {total_duration:.2f}s"
369
+ )
370
+
371
+ if "rag" not in doc.metadata:
372
+ doc.metadata["rag"] = {}
373
+
374
+ doc.metadata["rag"]["chunk"] = {
375
+ "original_doc": doc.module_name,
376
+ "chunk_ranges": json_objs,
377
+ "processing_time": total_duration,
378
+ "llm_time": extraction_duration,
379
+
380
+ "input_tokens_count": input_tokens_count,
381
+ "generated_tokens_count": generated_tokens_count,
382
+ "duration": extraction_duration,
383
+ "chunk_model":model_name
384
+ }
385
+
245
386
  return SourceCode(
246
387
  module_name=doc.module_name,
247
388
  source_code=content.strip(),
248
- tokens=self.count_tokens(content),
389
+ tokens=input_tokens_count + generated_tokens_count,
249
390
  metadata={
250
- "original_doc": doc.module_name,
251
- "chunk_ranges": json_objs,
391
+ **doc.metadata
252
392
  },
253
393
  )
254
394
  except Exception as e:
395
+ err_duration = time.time() - start_time
255
396
  if attempt < max_retries - 1:
256
397
  logger.warning(
257
- f"Error processing doc {doc.module_name}, retrying... (Attempt {attempt + 1}) Error: {str(e)}"
398
+ f"Error processing doc {doc.module_name}, retrying... (Attempt {attempt + 1}) Error: {str(e)}, duration: {err_duration:.2f}s"
258
399
  )
259
400
  else:
260
401
  logger.error(
261
- f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}"
402
+ f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}, total duration: {err_duration:.2f}s"
262
403
  )
263
404
  return SourceCode(
264
405
  module_name=doc.module_name, source_code="", tokens=0
@@ -0,0 +1,13 @@
1
+ import pydantic
2
+ from typing import List
3
+ from autocoder.common import SourceCode
4
+
5
+ class TokenLimiterResult(pydantic.BaseModel):
6
+ # 注意, docs 只保留结果文档
7
+ docs: List[SourceCode]
8
+ # 注意, raw_docs 保留所有文档
9
+ raw_docs: List[SourceCode]
10
+ input_tokens_counts: List[int]
11
+ generated_tokens_counts: List[int]
12
+ durations: List[float]
13
+ model_name: str = "unknown"
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.271"
1
+ __version__ = "0.1.272"