auto-coder 0.1.279__py3-none-any.whl → 0.1.280__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.279
3
+ Version: 0.1.280
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
12
12
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
13
13
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
14
14
  autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
15
- autocoder/version.py,sha256=bIKEpQ3tDi5heVrpw16rPnkr9ExeYcOXIfk6sw424Tc,23
15
+ autocoder/version.py,sha256=mNnPow60dgdANkDcEoYTXr9_lpoMQZSEy1-LRu7QFHs,23
16
16
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
18
18
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -108,14 +108,15 @@ autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwT
108
108
  autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
109
109
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
111
- autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
111
+ autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
112
112
  autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
113
+ autocoder/rag/lang.py,sha256=TVNx5m7OtBcdfahzI29tMj9m1yrEm32G1c1zc4ZNIPs,3130
113
114
  autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
114
- autocoder/rag/long_context_rag.py,sha256=nZXADsbaiOQYIGiZvEgokMOSjmjuOCA6xkd3LqGnC7o,33658
115
+ autocoder/rag/long_context_rag.py,sha256=3CAlf7GM-LgewS5j9XGKvsKSO4MM6M8TTkKxAGzqVY0,39308
115
116
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
116
117
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
117
118
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
118
- autocoder/rag/relevant_utils.py,sha256=tgTKGbojCrxuZ7dKbyPh2rCw9TIhwE6ltRxJosaA97U,1267
119
+ autocoder/rag/relevant_utils.py,sha256=tnv_g25DDWYPGT-mpfubIyZv86_g2gPXjM4FPvdeIEE,1739
119
120
  autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
120
121
  autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
121
122
  autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
@@ -159,15 +160,16 @@ autocoder/utils/queue_communicate.py,sha256=buyEzdvab1QA4i2QKbq35rG5v_9x9PWVLWWM
159
160
  autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1vmRciKdi4,2095
160
161
  autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
161
162
  autocoder/utils/rest.py,sha256=hLBhr78y-WVnV0oQf9Rxc22EwqF78KINkScvYa1MuYA,6435
163
+ autocoder/utils/stream_thinking.py,sha256=vbDObflBFW53eWEjMTEHf3nyL167_cqpDLh9zRx7Yk8,7015
162
164
  autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
163
165
  autocoder/utils/thread_utils.py,sha256=tv9fhFZOjI18AxVUJbpe_xjBGMpkqgDcOlz9pnDtNik,8583
164
166
  autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
167
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
168
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
167
169
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- auto_coder-0.1.279.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
- auto_coder-0.1.279.dist-info/METADATA,sha256=ibeocSoPjMW2RjhN5DQq4eARnkV5AQDD5c0quH69t4M,2643
170
- auto_coder-0.1.279.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
- auto_coder-0.1.279.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
- auto_coder-0.1.279.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
- auto_coder-0.1.279.dist-info/RECORD,,
170
+ auto_coder-0.1.280.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
171
+ auto_coder-0.1.280.dist-info/METADATA,sha256=SDBMvUk6v6YP7RSwlAWHFGfa3LTOUj3fky1Yz0hlFB0,2643
172
+ auto_coder-0.1.280.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
173
+ auto_coder-0.1.280.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
174
+ auto_coder-0.1.280.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
175
+ auto_coder-0.1.280.dist-info/RECORD,,
@@ -1,13 +1,15 @@
1
1
  import time
2
- from typing import List, Dict, Optional
2
+ from typing import List, Dict, Optional, Generator, Tuple
3
3
  from loguru import logger
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from autocoder.rag.lang import get_message_with_format_and_newline
5
6
 
6
7
  from autocoder.rag.relevant_utils import (
7
8
  parse_relevance,
8
9
  FilterDoc,
9
10
  TaskTiming,
10
- DocFilterResult
11
+ DocFilterResult,
12
+ ProgressUpdate
11
13
  )
12
14
 
13
15
  from autocoder.common import SourceCode, AutoCoderArgs
@@ -49,6 +51,7 @@ def _check_relevance_with_conversation(
49
51
  其中, <relevant> 是你认为文档中和问题的相关度,0-10之间的数字,数字越大表示相关度越高。
50
52
  """
51
53
 
54
+
52
55
  class DocFilter:
53
56
  def __init__(
54
57
  self,
@@ -73,10 +76,10 @@ class DocFilter:
73
76
  ) -> DocFilterResult:
74
77
  return self.filter_docs_with_threads(conversations, documents)
75
78
 
76
- def filter_docs_with_threads(
79
+ def filter_docs_with_progress(
77
80
  self, conversations: List[Dict[str, str]], documents: List[SourceCode]
78
- ) -> DocFilterResult:
79
-
81
+ ) -> Generator[Tuple[ProgressUpdate, Optional[DocFilterResult]], None, DocFilterResult]:
82
+ """使用线程过滤文档,同时产生进度更新"""
80
83
  start_time = time.time()
81
84
  logger.info(f"=== DocFilter Starting ===")
82
85
  logger.info(
@@ -93,6 +96,16 @@ class DocFilter:
93
96
  relevant_count = 0
94
97
  model_name = self.recall_llm.default_model_name or "unknown"
95
98
 
99
+ doc_filter_result = DocFilterResult(
100
+ docs=[],
101
+ raw_docs=[],
102
+ input_tokens_counts=[],
103
+ generated_tokens_counts=[],
104
+ durations=[],
105
+ model_name=model_name
106
+ )
107
+ relevant_docs = doc_filter_result.docs
108
+
96
109
  with ThreadPoolExecutor(
97
110
  max_workers=self.args.index_filter_workers or 5
98
111
  ) as executor:
@@ -141,16 +154,19 @@ class DocFilter:
141
154
  logger.info(
142
155
  f"Submitted {submitted_tasks} document filtering tasks to thread pool")
143
156
 
157
+ # 发送初始进度更新
158
+ yield (ProgressUpdate(
159
+ phase="doc_filter",
160
+ completed=0,
161
+ total=len(documents),
162
+ relevant_count=0,
163
+ message=get_message_with_format_and_newline(
164
+ "doc_filter_start",
165
+ total=len(documents)
166
+ )
167
+ ), None)
168
+
144
169
  # 处理完成的任务
145
- doc_filter_result = DocFilterResult(
146
- docs=[],
147
- raw_docs=[],
148
- input_tokens_counts=[],
149
- generated_tokens_counts=[],
150
- durations=[],
151
- model_name=model_name
152
- )
153
- relevant_docs = doc_filter_result.docs
154
170
  for future in as_completed(list(future_to_doc.keys())):
155
171
  try:
156
172
  doc, submit_time = future_to_doc[future]
@@ -194,32 +210,50 @@ class DocFilter:
194
210
  f"\n - Timing: Duration={task_timing.duration:.2f}s, Processing={task_timing.real_duration:.2f}s, Queue={queue_time:.2f}s"
195
211
  f"\n - Response: {v}"
196
212
  )
197
-
213
+
198
214
  if "rag" not in doc.metadata:
199
215
  doc.metadata["rag"] = {}
200
216
  doc.metadata["rag"]["recall"] = {
201
217
  "input_tokens_count": input_tokens_count,
202
218
  "generated_tokens_count": generated_tokens_count,
203
219
  "recall_model": model_name,
204
- "duration": task_timing.real_duration
220
+ "duration": task_timing.real_duration
205
221
  }
206
-
207
- doc_filter_result.input_tokens_counts.append(input_tokens_count)
208
- doc_filter_result.generated_tokens_counts.append(generated_tokens_count)
209
- doc_filter_result.durations.append(task_timing.real_duration)
210
-
222
+
223
+ doc_filter_result.input_tokens_counts.append(
224
+ input_tokens_count)
225
+ doc_filter_result.generated_tokens_counts.append(
226
+ generated_tokens_count)
227
+ doc_filter_result.durations.append(
228
+ task_timing.real_duration)
229
+
211
230
  new_filter_doc = FilterDoc(
212
- source_code=doc,
213
- relevance=relevance,
214
- task_timing=task_timing,
215
- )
216
-
231
+ source_code=doc,
232
+ relevance=relevance,
233
+ task_timing=task_timing,
234
+ )
235
+
217
236
  doc_filter_result.raw_docs.append(new_filter_doc)
218
237
 
219
238
  if is_relevant:
220
239
  relevant_docs.append(
221
240
  new_filter_doc
222
241
  )
242
+
243
+ # 产生进度更新
244
+ yield (ProgressUpdate(
245
+ phase="doc_filter",
246
+ completed=completed_tasks,
247
+ total=len(documents),
248
+ relevant_count=relevant_count,
249
+ message=get_message_with_format_and_newline(
250
+ "doc_filter_progress",
251
+ progress_percent=progress_percent,
252
+ relevant_count=relevant_count,
253
+ total=len(documents)
254
+ )
255
+ ), None)
256
+
223
257
  except Exception as exc:
224
258
  try:
225
259
  doc, submit_time = future_to_doc[future]
@@ -236,7 +270,7 @@ class DocFilter:
236
270
  FilterDoc(
237
271
  source_code=doc,
238
272
  relevance=None,
239
- task_timing=TaskTiming(),
273
+ task_timing=TaskTiming(),
240
274
  )
241
275
  )
242
276
  except Exception as e:
@@ -244,6 +278,18 @@ class DocFilter:
244
278
  f"Document filtering error in task tracking: {exc}"
245
279
  )
246
280
 
281
+ # 报告错误进度
282
+ yield (ProgressUpdate(
283
+ phase="doc_filter",
284
+ completed=completed_tasks,
285
+ total=len(documents),
286
+ relevant_count=relevant_count,
287
+ message=get_message_with_format_and_newline(
288
+ "doc_filter_error",
289
+ error=str(exc)
290
+ )
291
+ ), None)
292
+
247
293
  # Sort relevant_docs by relevance score in descending order
248
294
  relevant_docs.sort(
249
295
  key=lambda x: x.relevance.relevant_score, reverse=True)
@@ -254,7 +300,7 @@ class DocFilter:
254
300
  doc.task_timing.real_duration for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
255
301
  avg_queue_time = sum(doc.task_timing.real_start_time -
256
302
  doc.task_timing.submit_time for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
257
-
303
+
258
304
  total_input_tokens = sum(doc_filter_result.input_tokens_counts)
259
305
  total_generated_tokens = sum(doc_filter_result.generated_tokens_counts)
260
306
 
@@ -278,4 +324,33 @@ class DocFilter:
278
324
  else:
279
325
  logger.warning("No relevant documents found!")
280
326
 
281
- return doc_filter_result
327
+ # 返回最终结果
328
+ yield (ProgressUpdate(
329
+ phase="doc_filter",
330
+ completed=len(documents),
331
+ total=len(documents),
332
+ relevant_count=relevant_count,
333
+ message=get_message_with_format_and_newline(
334
+ "doc_filter_complete",
335
+ total_time=total_time,
336
+ relevant_count=relevant_count
337
+ )
338
+ ), doc_filter_result)
339
+
340
+ def filter_docs_with_threads(
341
+ self, conversations: List[Dict[str, str]], documents: List[SourceCode]
342
+ ) -> DocFilterResult:
343
+ # 保持兼容性的接口
344
+ for _, result in self.filter_docs_with_progress(conversations, documents):
345
+ if result is not None:
346
+ return result
347
+
348
+ # 这是一个应急情况,不应该到达这里
349
+ return DocFilterResult(
350
+ docs=[],
351
+ raw_docs=[],
352
+ input_tokens_counts=[],
353
+ generated_tokens_counts=[],
354
+ durations=[],
355
+ model_name=self.recall_llm.default_model_name or "unknown"
356
+ )
autocoder/rag/lang.py ADDED
@@ -0,0 +1,50 @@
1
+ import locale
2
+ from byzerllm.utils import format_str_jinja2
3
+
4
+ MESSAGES = {
5
+ "en": {
6
+ "rag_error_title": "RAG Error",
7
+ "rag_error_message": "Failed to generate response: {{error}}",
8
+ "rag_searching_docs": "Searching documents with {{model}}...",
9
+ "rag_docs_filter_result": "{{model}} processed {{docs_num}} documents, cost {{filter_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}",
10
+ "dynamic_chunking_start": "Dynamic chunking start with {{model}}",
11
+ "dynamic_chunking_result": "Dynamic chunking result with {{model}}, first round cost {{first_round_time}} seconds, second round cost {{sencond_round_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}, first round full docs: {{first_round_full_docs}}, second round extracted docs: {{second_round_extracted_docs}}",
12
+ "send_to_model": "Send to model {{model}} with {{tokens}} tokens",
13
+ "doc_filter_start": "Document filtering start, total {{total}} documents",
14
+ "doc_filter_progress": "Document filtering progress: {{progress_percent}}% processed {{relevant_count}}/{{total}} documents",
15
+ "doc_filter_error": "Document filtering error: {{error}}",
16
+ "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents"
17
+ },
18
+ "zh": {
19
+ "rag_error_title": "RAG 错误",
20
+ "rag_error_message": "生成响应失败: {{error}}",
21
+ "rag_searching_docs": "正在使用 {{model}} 搜索文档...",
22
+ "rag_docs_filter_result": "{{model}} 处理了 {{docs_num}} 个文档, 耗时 {{filter_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}",
23
+ "dynamic_chunking_start": "使用 {{model}} 进行动态分块",
24
+ "dynamic_chunking_result": "使用 {{model}} 进行动态分块, 第一轮耗时 {{first_round_time}} 秒, 第二轮耗时 {{sencond_round_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}, 第一轮全量文档: {{first_round_full_docs}}, 第二轮提取文档: {{second_round_extracted_docs}}",
25
+ "send_to_model": "发送给模型 {{model}} 的 tokens 数量预估为 {{tokens}}",
26
+ "doc_filter_start": "开始过滤文档,共 {{total}} 个文档",
27
+ "doc_filter_progress": "文档过滤进度:{{progress_percent}}%,处理了 {{relevant_count}}/{{total}} 个文档",
28
+ "doc_filter_error": "文档过滤错误:{{error}}",
29
+ "doc_filter_complete": "文档过滤完成,耗时 {{total_time}} 秒,找到 {{relevant_count}} 个相关文档"
30
+ }
31
+ }
32
+
33
+
34
+ def get_system_language():
35
+ try:
36
+ return locale.getdefaultlocale()[0][:2]
37
+ except:
38
+ return 'en'
39
+
40
+
41
+ def get_message(key):
42
+ lang = get_system_language()
43
+ return MESSAGES.get(lang, MESSAGES['en']).get(key, MESSAGES['en'][key])
44
+
45
+
46
+ def get_message_with_format(msg_key: str, **kwargs):
47
+ return format_str_jinja2(get_message(msg_key), **kwargs)
48
+
49
+ def get_message_with_format_and_newline(msg_key: str, **kwargs):
50
+ return format_str_jinja2(get_message(msg_key), **kwargs) + "\n"
@@ -23,6 +23,8 @@ from autocoder.rag.relevant_utils import (
23
23
  FilterDoc,
24
24
  TaskTiming,
25
25
  parse_relevance,
26
+ ProgressUpdate,
27
+ DocFilterResult
26
28
  )
27
29
  from autocoder.rag.token_checker import check_token_limit
28
30
  from autocoder.rag.token_counter import RemoteTokenCounter, TokenCounter
@@ -34,14 +36,17 @@ from autocoder.rag.stream_event import event_writer
34
36
  from autocoder.rag.relevant_utils import DocFilterResult
35
37
  from pydantic import BaseModel
36
38
  from byzerllm.utils.types import SingleOutputMeta
39
+ from autocoder.rag.lang import get_message_with_format_and_newline
37
40
 
38
- try:
41
+ try:
39
42
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
40
43
  pro_version = version("auto-coder-pro")
41
44
  autocoder_version = version("auto-coder")
42
- logger.warning(f"auto-coder-pro({pro_version}) plugin is enabled in auto-coder.rag({autocoder_version})")
45
+ logger.warning(
46
+ f"auto-coder-pro({pro_version}) plugin is enabled in auto-coder.rag({autocoder_version})")
43
47
  except ImportError:
44
- logger.warning("Please install auto-coder-pro to enhance llm compute ability")
48
+ logger.warning(
49
+ "Please install auto-coder-pro to enhance llm compute ability")
45
50
  LLMComputeEngine = None
46
51
 
47
52
 
@@ -49,20 +54,26 @@ class RecallStat(BaseModel):
49
54
  total_input_tokens: int
50
55
  total_generated_tokens: int
51
56
  model_name: str = "unknown"
57
+
58
+
52
59
  class ChunkStat(BaseModel):
53
60
  total_input_tokens: int
54
- total_generated_tokens: int
61
+ total_generated_tokens: int
55
62
  model_name: str = "unknown"
63
+
64
+
56
65
  class AnswerStat(BaseModel):
57
66
  total_input_tokens: int
58
67
  total_generated_tokens: int
59
68
  model_name: str = "unknown"
60
69
 
70
+
61
71
  class RAGStat(BaseModel):
62
72
  recall_stat: RecallStat
63
73
  chunk_stat: ChunkStat
64
74
  answer_stat: AnswerStat
65
75
 
76
+
66
77
  class LongContextRAG:
67
78
  def __init__(
68
79
  self,
@@ -86,7 +97,7 @@ class LongContextRAG:
86
97
  self.chunk_llm = self.llm.get_sub_client("chunk_model")
87
98
 
88
99
  self.args = args
89
-
100
+
90
101
  self.path = path
91
102
  self.relevant_score = self.args.rag_doc_filter_relevance or 5
92
103
 
@@ -99,8 +110,10 @@ class LongContextRAG:
99
110
  "The sum of full_text_ratio and segment_ratio must be less than or equal to 1.0"
100
111
  )
101
112
 
102
- self.full_text_limit = int(args.rag_context_window_limit * self.full_text_ratio)
103
- self.segment_limit = int(args.rag_context_window_limit * self.segment_ratio)
113
+ self.full_text_limit = int(
114
+ args.rag_context_window_limit * self.full_text_ratio)
115
+ self.segment_limit = int(
116
+ args.rag_context_window_limit * self.segment_ratio)
104
117
  self.buff_limit = int(args.rag_context_window_limit * self.buff_ratio)
105
118
 
106
119
  self.tokenizer = None
@@ -109,7 +122,8 @@ class LongContextRAG:
109
122
 
110
123
  if self.tokenizer_path:
111
124
  VariableHolder.TOKENIZER_PATH = self.tokenizer_path
112
- VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(self.tokenizer_path)
125
+ VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(
126
+ self.tokenizer_path)
113
127
  self.tokenizer = TokenCounter(self.tokenizer_path)
114
128
  else:
115
129
  if llm.is_model_exist("deepseek_tokenizer"):
@@ -161,9 +175,9 @@ class LongContextRAG:
161
175
  self.required_exts,
162
176
  self.on_ray,
163
177
  self.monitor_mode,
164
- ## 确保全文区至少能放下一个文件
178
+ # 确保全文区至少能放下一个文件
165
179
  single_file_token_limit=self.full_text_limit - 100,
166
- disable_auto_window=self.args.disable_auto_window,
180
+ disable_auto_window=self.args.disable_auto_window,
167
181
  enable_hybrid_index=self.args.enable_hybrid_index,
168
182
  extra_params=self.args
169
183
  )
@@ -224,14 +238,14 @@ class LongContextRAG:
224
238
  {% for msg in conversations %}
225
239
  [{{ msg.role }}]:
226
240
  {{ msg.content }}
227
-
241
+
228
242
  {% endfor %}
229
243
  </conversations>
230
244
 
231
245
  请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
232
246
  如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
233
247
  提取的信息尽量保持和原文中的一样,并且只输出这些信息。
234
- """
248
+ """
235
249
 
236
250
  @byzerllm.prompt()
237
251
  def _answer_question(
@@ -266,26 +280,25 @@ class LongContextRAG:
266
280
  """Get the document retriever class based on configuration."""
267
281
  # Default to LocalDocumentRetriever if not specified
268
282
  return LocalDocumentRetriever
269
-
283
+
270
284
  def _load_ignore_file(self):
271
285
  serveignore_path = os.path.join(self.path, ".serveignore")
272
286
  gitignore_path = os.path.join(self.path, ".gitignore")
273
287
 
274
288
  if os.path.exists(serveignore_path):
275
- with open(serveignore_path, "r",encoding="utf-8") as ignore_file:
289
+ with open(serveignore_path, "r", encoding="utf-8") as ignore_file:
276
290
  return pathspec.PathSpec.from_lines("gitwildmatch", ignore_file)
277
291
  elif os.path.exists(gitignore_path):
278
- with open(gitignore_path, "r",encoding="utf-8") as ignore_file:
292
+ with open(gitignore_path, "r", encoding="utf-8") as ignore_file:
279
293
  return pathspec.PathSpec.from_lines("gitwildmatch", ignore_file)
280
294
  return None
281
295
 
282
- def _retrieve_documents(self,options:Optional[Dict[str,Any]]=None) -> Generator[SourceCode, None, None]:
296
+ def _retrieve_documents(self, options: Optional[Dict[str, Any]] = None) -> Generator[SourceCode, None, None]:
283
297
  return self.document_retriever.retrieve_documents(options=options)
284
298
 
285
299
  def build(self):
286
300
  pass
287
301
 
288
-
289
302
  def search(self, query: str) -> List[SourceCode]:
290
303
  target_query = query
291
304
  only_contexts = False
@@ -300,7 +313,8 @@ class LongContextRAG:
300
313
  only_contexts = True
301
314
 
302
315
  logger.info("Search from RAG.....")
303
- logger.info(f"Query: {target_query[0:100]}... only_contexts: {only_contexts}")
316
+ logger.info(
317
+ f"Query: {target_query[0:100]}... only_contexts: {only_contexts}")
304
318
 
305
319
  if self.client:
306
320
  new_query = json.dumps(
@@ -316,7 +330,8 @@ class LongContextRAG:
316
330
  if not only_contexts:
317
331
  return [SourceCode(module_name=f"RAG:{target_query}", source_code=v)]
318
332
 
319
- json_lines = [json.loads(line) for line in v.split("\n") if line.strip()]
333
+ json_lines = [json.loads(line)
334
+ for line in v.split("\n") if line.strip()]
320
335
  return [SourceCode.model_validate(json_line) for json_line in json_lines]
321
336
  else:
322
337
  if only_contexts:
@@ -335,7 +350,7 @@ class LongContextRAG:
335
350
 
336
351
  def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
337
352
  query = conversations[-1]["content"]
338
- documents = self._retrieve_documents(options={"query":query})
353
+ documents = self._retrieve_documents(options={"query": query})
339
354
  return self.doc_filter.filter_docs(
340
355
  conversations=conversations, documents=documents
341
356
  )
@@ -360,9 +375,8 @@ class LongContextRAG:
360
375
  logger.error(f"Error in stream_chat_oai: {str(e)}")
361
376
  traceback.print_exc()
362
377
  return ["出现错误,请稍后再试。"], []
363
-
364
378
 
365
- def _stream_chatfrom_openai_sdk(self,response):
379
+ def _stream_chatfrom_openai_sdk(self, response):
366
380
  for chunk in response:
367
381
  if hasattr(chunk, "usage") and chunk.usage:
368
382
  input_tokens_count = chunk.usage.prompt_tokens
@@ -386,9 +400,9 @@ class LongContextRAG:
386
400
  reasoning_text = chunk.choices[0].delta.reasoning_content or ""
387
401
 
388
402
  last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
389
- generated_tokens_count=generated_tokens_count,
390
- reasoning_content=reasoning_text,
391
- finish_reason=chunk.choices[0].finish_reason)
403
+ generated_tokens_count=generated_tokens_count,
404
+ reasoning_content=reasoning_text,
405
+ finish_reason=chunk.choices[0].finish_reason)
392
406
  yield (content, last_meta)
393
407
 
394
408
  def _stream_chat_oai(
@@ -398,7 +412,7 @@ class LongContextRAG:
398
412
  role_mapping=None,
399
413
  llm_config: Dict[str, Any] = {},
400
414
  extra_request_params: Dict[str, Any] = {}
401
- ):
415
+ ):
402
416
  if self.client:
403
417
  model = model or self.args.model
404
418
  response = self.client.chat.completions.create(
@@ -407,8 +421,8 @@ class LongContextRAG:
407
421
  stream=True,
408
422
  max_tokens=self.args.rag_params_max_tokens,
409
423
  extra_body=extra_request_params
410
- )
411
- return self._stream_chatfrom_openai_sdk(response), []
424
+ )
425
+ return self._stream_chatfrom_openai_sdk(response), []
412
426
 
413
427
  target_llm = self.llm
414
428
  if self.llm.get_sub_client("qa_model"):
@@ -422,7 +436,7 @@ class LongContextRAG:
422
436
  in query
423
437
  or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
424
438
  in query
425
- ):
439
+ ):
426
440
 
427
441
  chunks = target_llm.stream_chat_oai(
428
442
  conversations=conversations,
@@ -432,22 +446,24 @@ class LongContextRAG:
432
446
  delta_mode=True,
433
447
  extra_request_params=extra_request_params
434
448
  )
449
+
435
450
  def generate_chunks():
436
451
  for chunk in chunks:
437
452
  yield chunk
438
453
  return generate_chunks(), context
439
-
440
- try:
454
+
455
+ try:
441
456
  request_params = json.loads(query)
442
- if "request_id" in request_params:
457
+ if "request_id" in request_params:
443
458
  request_id = request_params["request_id"]
444
459
  index = request_params["index"]
445
-
446
- file_path = event_writer.get_event_file_path(request_id)
447
- logger.info(f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
460
+
461
+ file_path = event_writer.get_event_file_path(request_id)
462
+ logger.info(
463
+ f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
448
464
  events = []
449
465
  if not os.path.exists(file_path):
450
- return [],context
466
+ return [], context
451
467
 
452
468
  with open(file_path, "r") as f:
453
469
  for line in f:
@@ -455,8 +471,8 @@ class LongContextRAG:
455
471
  if event["index"] >= index:
456
472
  events.append(event)
457
473
  return [json.dumps({
458
- "events": [event for event in events],
459
- },ensure_ascii=False)], context
474
+ "events": [event for event in events],
475
+ }, ensure_ascii=False)], context
460
476
  except json.JSONDecodeError:
461
477
  pass
462
478
 
@@ -465,7 +481,7 @@ class LongContextRAG:
465
481
  llm=target_llm,
466
482
  inference_enhance=not self.args.disable_inference_enhance,
467
483
  inference_deep_thought=self.args.inference_deep_thought,
468
- inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
484
+ inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
469
485
  precision=self.args.inference_compute_precision,
470
486
  data_cells_max_num=self.args.data_cells_max_num,
471
487
  )
@@ -474,14 +490,14 @@ class LongContextRAG:
474
490
  conversations, query, []
475
491
  )
476
492
  chunks = llm_compute_engine.stream_chat_oai(
477
- conversations=new_conversations,
478
- model=model,
479
- role_mapping=role_mapping,
480
- llm_config=llm_config,
481
- delta_mode=True,
482
- extra_request_params=extra_request_params
483
- )
484
-
493
+ conversations=new_conversations,
494
+ model=model,
495
+ role_mapping=role_mapping,
496
+ llm_config=llm_config,
497
+ delta_mode=True,
498
+ extra_request_params=extra_request_params
499
+ )
500
+
485
501
  def generate_chunks():
486
502
  for chunk in chunks:
487
503
  yield chunk
@@ -491,7 +507,6 @@ class LongContextRAG:
491
507
  context,
492
508
  )
493
509
 
494
-
495
510
  only_contexts = False
496
511
  try:
497
512
  v = json.loads(query)
@@ -504,7 +519,6 @@ class LongContextRAG:
504
519
 
505
520
  logger.info(f"Query: {query} only_contexts: {only_contexts}")
506
521
  start_time = time.time()
507
-
508
522
 
509
523
  rag_stat = RAGStat(
510
524
  recall_stat=RecallStat(
@@ -525,17 +539,62 @@ class LongContextRAG:
525
539
  )
526
540
 
527
541
  context = []
542
+
528
543
  def generate_sream():
529
544
  nonlocal context
530
- doc_filter_result = self._filter_docs(conversations)
531
545
 
532
- rag_stat.recall_stat.total_input_tokens += sum(doc_filter_result.input_tokens_counts)
533
- rag_stat.recall_stat.total_generated_tokens += sum(doc_filter_result.generated_tokens_counts)
546
+ yield ("", SingleOutputMeta(input_tokens_count=0,
547
+ generated_tokens_count=0,
548
+ reasoning_content=get_message_with_format_and_newline(
549
+ "rag_searching_docs",
550
+ model=rag_stat.recall_stat.model_name
551
+ )
552
+ ))
553
+
554
+ doc_filter_result = DocFilterResult(
555
+ docs=[],
556
+ raw_docs=[],
557
+ input_tokens_counts=[],
558
+ generated_tokens_counts=[],
559
+ durations=[],
560
+ model_name=rag_stat.recall_stat.model_name
561
+ )
562
+ query = conversations[-1]["content"]
563
+ documents = self._retrieve_documents(options={"query": query})
564
+
565
+ # 使用带进度报告的过滤方法
566
+ for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
567
+ if result is not None:
568
+ doc_filter_result = result
569
+ else:
570
+ # 生成进度更新
571
+ yield ("", SingleOutputMeta(
572
+ input_tokens_count=rag_stat.recall_stat.total_input_tokens,
573
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
574
+ reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
575
+ ))
576
+
577
+ rag_stat.recall_stat.total_input_tokens += sum(
578
+ doc_filter_result.input_tokens_counts)
579
+ rag_stat.recall_stat.total_generated_tokens += sum(
580
+ doc_filter_result.generated_tokens_counts)
534
581
  rag_stat.recall_stat.model_name = doc_filter_result.model_name
535
582
 
536
583
  relevant_docs: List[FilterDoc] = doc_filter_result.docs
537
584
  filter_time = time.time() - start_time
538
585
 
586
+ yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens,
587
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
588
+ reasoning_content=get_message_with_format_and_newline(
589
+ "rag_docs_filter_result",
590
+ filter_time=filter_time,
591
+ docs_num=len(relevant_docs),
592
+ input_tokens=rag_stat.recall_stat.total_input_tokens,
593
+ output_tokens=rag_stat.recall_stat.total_generated_tokens,
594
+ model=rag_stat.recall_stat.model_name
595
+ )
596
+ ))
597
+
539
598
  # Filter relevant_docs to only include those with is_relevant=True
540
599
  highly_relevant_docs = [
541
600
  doc for doc in relevant_docs if doc.relevance.is_relevant
@@ -543,7 +602,8 @@ class LongContextRAG:
543
602
 
544
603
  if highly_relevant_docs:
545
604
  relevant_docs = highly_relevant_docs
546
- logger.info(f"Found {len(relevant_docs)} highly relevant documents")
605
+ logger.info(
606
+ f"Found {len(relevant_docs)} highly relevant documents")
547
607
 
548
608
  logger.info(
549
609
  f"Filter time: {filter_time:.2f} seconds with {len(relevant_docs)} docs"
@@ -553,7 +613,7 @@ class LongContextRAG:
553
613
  final_docs = []
554
614
  for doc in relevant_docs:
555
615
  final_docs.append(doc.model_dump())
556
- return [json.dumps(final_docs,ensure_ascii=False)], []
616
+ return [json.dumps(final_docs, ensure_ascii=False)], []
557
617
 
558
618
  if not relevant_docs:
559
619
  return ["没有找到相关的文档来回答这个问题。"], []
@@ -588,6 +648,12 @@ class LongContextRAG:
588
648
  + "".join([f"\n * {info}" for info in relevant_docs_info])
589
649
  )
590
650
 
651
+ yield ("", SingleOutputMeta(generated_tokens_count=0,
652
+ reasoning_content=get_message_with_format_and_newline(
653
+ "dynamic_chunking_start",
654
+ model=rag_stat.chunk_stat.model_name
655
+ )
656
+ ))
591
657
  first_round_full_docs = []
592
658
  second_round_extracted_docs = []
593
659
  sencond_round_time = 0
@@ -602,17 +668,19 @@ class LongContextRAG:
602
668
  llm=self.llm,
603
669
  disable_segment_reorder=self.args.disable_segment_reorder,
604
670
  )
605
-
671
+
606
672
  token_limiter_result = token_limiter.limit_tokens(
607
673
  relevant_docs=relevant_docs,
608
674
  conversations=conversations,
609
675
  index_filter_workers=self.args.index_filter_workers or 5,
610
676
  )
611
677
 
612
- rag_stat.chunk_stat.total_input_tokens += sum(token_limiter_result.input_tokens_counts)
613
- rag_stat.chunk_stat.total_generated_tokens += sum(token_limiter_result.generated_tokens_counts)
678
+ rag_stat.chunk_stat.total_input_tokens += sum(
679
+ token_limiter_result.input_tokens_counts)
680
+ rag_stat.chunk_stat.total_generated_tokens += sum(
681
+ token_limiter_result.generated_tokens_counts)
614
682
  rag_stat.chunk_stat.model_name = token_limiter_result.model_name
615
-
683
+
616
684
  final_relevant_docs = token_limiter_result.docs
617
685
  first_round_full_docs = token_limiter.first_round_full_docs
618
686
  second_round_extracted_docs = token_limiter.second_round_extracted_docs
@@ -623,24 +691,41 @@ class LongContextRAG:
623
691
  relevant_docs = relevant_docs[: self.args.index_filter_file_num]
624
692
 
625
693
  logger.info(f"Finally send to model: {len(relevant_docs)}")
626
-
627
694
  # 记录分段处理的统计信息
628
695
  logger.info(
629
696
  f"=== Token Management ===\n"
630
697
  f" * Only contexts: {only_contexts}\n"
631
- f" * Filter time: {filter_time:.2f} seconds\n"
698
+ f" * Filter time: {filter_time:.2f} seconds\n"
632
699
  f" * Final relevant docs: {len(relevant_docs)}\n"
633
700
  f" * First round full docs: {len(first_round_full_docs)}\n"
634
701
  f" * Second round extracted docs: {len(second_round_extracted_docs)}\n"
635
702
  f" * Second round time: {sencond_round_time:.2f} seconds"
636
703
  )
637
704
 
705
+ yield ("", SingleOutputMeta(generated_tokens_count=rag_stat.chunk_stat.total_generated_tokens + rag_stat.recall_stat.total_generated_tokens,
706
+ input_tokens_count=rag_stat.chunk_stat.total_input_tokens +
707
+ rag_stat.recall_stat.total_input_tokens,
708
+ reasoning_content=get_message_with_format_and_newline(
709
+ "dynamic_chunking_result",
710
+ model=rag_stat.chunk_stat.model_name,
711
+ docs_num=len(relevant_docs),
712
+ filter_time=filter_time,
713
+ sencond_round_time=sencond_round_time,
714
+ first_round_full_docs=len(
715
+ first_round_full_docs),
716
+ second_round_extracted_docs=len(
717
+ second_round_extracted_docs),
718
+ input_tokens=rag_stat.chunk_stat.total_input_tokens,
719
+ output_tokens=rag_stat.chunk_stat.total_generated_tokens
720
+ )
721
+ ))
722
+
638
723
  # 记录最终选择的文档详情
639
724
  final_relevant_docs_info = []
640
725
  for i, doc in enumerate(relevant_docs):
641
726
  doc_path = doc.module_name.replace(self.path, '', 1)
642
727
  info = f"{i+1}. {doc_path}"
643
-
728
+
644
729
  metadata_info = []
645
730
  if "original_docs" in doc.metadata:
646
731
  original_docs = ", ".join(
@@ -650,26 +735,27 @@ class LongContextRAG:
650
735
  ]
651
736
  )
652
737
  metadata_info.append(f"Original docs: {original_docs}")
653
-
738
+
654
739
  if "chunk_ranges" in doc.metadata:
655
740
  chunk_ranges = json.dumps(
656
741
  doc.metadata["chunk_ranges"], ensure_ascii=False
657
742
  )
658
743
  metadata_info.append(f"Chunk ranges: {chunk_ranges}")
659
-
744
+
660
745
  if "processing_time" in doc.metadata:
661
- metadata_info.append(f"Processing time: {doc.metadata['processing_time']:.2f}s")
662
-
746
+ metadata_info.append(
747
+ f"Processing time: {doc.metadata['processing_time']:.2f}s")
748
+
663
749
  if metadata_info:
664
750
  info += f" ({'; '.join(metadata_info)})"
665
-
751
+
666
752
  final_relevant_docs_info.append(info)
667
753
 
668
754
  if final_relevant_docs_info:
669
755
  logger.info(
670
756
  f"Final documents to be sent to model:"
671
757
  + "".join([f"\n * {info}" for info in final_relevant_docs_info])
672
- )
758
+ )
673
759
 
674
760
  # 记录令牌统计
675
761
  request_tokens = sum([doc.tokens for doc in relevant_docs])
@@ -680,7 +766,18 @@ class LongContextRAG:
680
766
  f" * Total tokens: {request_tokens}"
681
767
  )
682
768
 
683
- logger.info(f"Start to send to model {target_model} with {request_tokens} tokens")
769
+ logger.info(
770
+ f"Start to send to model {target_model} with {request_tokens} tokens")
771
+
772
+ yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
773
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
774
+ rag_stat.chunk_stat.total_generated_tokens,
775
+ reasoning_content=get_message_with_format_and_newline(
776
+ "send_to_model",
777
+ model=target_model,
778
+ tokens=request_tokens
779
+ )
780
+ ))
684
781
 
685
782
  if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
686
783
  llm_compute_engine = LLMComputeEngine(
@@ -692,33 +789,42 @@ class LongContextRAG:
692
789
  debug=False,
693
790
  )
694
791
  new_conversations = llm_compute_engine.process_conversation(
695
- conversations, query, [doc.source_code for doc in relevant_docs]
792
+ conversations, query, [
793
+ doc.source_code for doc in relevant_docs]
696
794
  )
697
795
  chunks = llm_compute_engine.stream_chat_oai(
698
- conversations=new_conversations,
699
- model=model,
700
- role_mapping=role_mapping,
701
- llm_config=llm_config,
702
- delta_mode=True,
703
- )
704
-
796
+ conversations=new_conversations,
797
+ model=model,
798
+ role_mapping=role_mapping,
799
+ llm_config=llm_config,
800
+ delta_mode=True,
801
+ )
802
+
705
803
  for chunk in chunks:
706
- yield chunk
707
804
  if chunk[1] is not None:
708
805
  rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
709
- rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
710
- self._print_rag_stats(rag_stat)
711
- else:
806
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
807
+ chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
808
+ rag_stat.chunk_stat.total_input_tokens + \
809
+ rag_stat.answer_stat.total_input_tokens
810
+ chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
811
+ rag_stat.chunk_stat.total_generated_tokens + \
812
+ rag_stat.answer_stat.total_generated_tokens
813
+ yield chunk
814
+
815
+ self._print_rag_stats(rag_stat)
816
+ else:
712
817
  new_conversations = conversations[:-1] + [
713
818
  {
714
819
  "role": "user",
715
820
  "content": self._answer_question.prompt(
716
821
  query=query,
717
- relevant_docs=[doc.source_code for doc in relevant_docs],
822
+ relevant_docs=[
823
+ doc.source_code for doc in relevant_docs],
718
824
  ),
719
825
  }
720
826
  ]
721
-
827
+
722
828
  chunks = target_llm.stream_chat_oai(
723
829
  conversations=new_conversations,
724
830
  model=model,
@@ -727,17 +833,22 @@ class LongContextRAG:
727
833
  delta_mode=True,
728
834
  extra_request_params=extra_request_params
729
835
  )
730
-
836
+
731
837
  for chunk in chunks:
732
- yield chunk
733
838
  if chunk[1] is not None:
734
839
  rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
735
- rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
736
- self._print_rag_stats(rag_stat)
840
+ rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
841
+ chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
842
+ rag_stat.chunk_stat.total_input_tokens + \
843
+ rag_stat.answer_stat.total_input_tokens
844
+ chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
845
+ rag_stat.chunk_stat.total_generated_tokens + \
846
+ rag_stat.answer_stat.total_generated_tokens
847
+ yield chunk
737
848
 
738
- return generate_sream(),context
739
-
740
-
849
+ self._print_rag_stats(rag_stat)
850
+
851
+ return generate_sream(), context
741
852
 
742
853
  def _print_rag_stats(self, rag_stat: RAGStat) -> None:
743
854
  """打印RAG执行的详细统计信息"""
@@ -748,19 +859,22 @@ class LongContextRAG:
748
859
  )
749
860
  total_generated_tokens = (
750
861
  rag_stat.recall_stat.total_generated_tokens +
751
- rag_stat.chunk_stat.total_generated_tokens +
862
+ rag_stat.chunk_stat.total_generated_tokens +
752
863
  rag_stat.answer_stat.total_generated_tokens
753
864
  )
754
865
  total_tokens = total_input_tokens + total_generated_tokens
755
-
866
+
756
867
  # 避免除以零错误
757
868
  if total_tokens == 0:
758
869
  recall_percent = chunk_percent = answer_percent = 0
759
870
  else:
760
- recall_percent = (rag_stat.recall_stat.total_input_tokens + rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
761
- chunk_percent = (rag_stat.chunk_stat.total_input_tokens + rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
762
- answer_percent = (rag_stat.answer_stat.total_input_tokens + rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
763
-
871
+ recall_percent = (rag_stat.recall_stat.total_input_tokens +
872
+ rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
873
+ chunk_percent = (rag_stat.chunk_stat.total_input_tokens +
874
+ rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
875
+ answer_percent = (rag_stat.answer_stat.total_input_tokens +
876
+ rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
877
+
764
878
  logger.info(
765
879
  f"=== RAG 执行统计信息 ===\n"
766
880
  f"总令牌使用: {total_tokens} 令牌\n"
@@ -791,21 +905,22 @@ class LongContextRAG:
791
905
  f" - 文档分块: {chunk_percent:.1f}%\n"
792
906
  f" - 答案生成: {answer_percent:.1f}%\n"
793
907
  )
794
-
908
+
795
909
  # 记录原始统计数据,以便调试
796
910
  logger.debug(f"RAG Stat 原始数据: {rag_stat}")
797
-
911
+
798
912
  # 返回成本估算
799
- estimated_cost = self._estimate_token_cost(total_input_tokens, total_generated_tokens)
913
+ estimated_cost = self._estimate_token_cost(
914
+ total_input_tokens, total_generated_tokens)
800
915
  if estimated_cost > 0:
801
916
  logger.info(f"估计成本: 约 ${estimated_cost:.4f} 人民币")
802
917
 
803
918
  def _estimate_token_cost(self, input_tokens: int, output_tokens: int) -> float:
804
- """估算当前请求的令牌成本(人民币)"""
919
+ """估算当前请求的令牌成本(人民币)"""
805
920
  # 实际应用中,可以根据不同模型设置不同价格
806
921
  input_cost_per_1m = 2.0/1000000 # 每百万输入令牌的成本
807
922
  output_cost_per_1m = 8.0/100000 # 每百万输出令牌的成本
808
-
809
- cost = (input_tokens * input_cost_per_1m / 1000000) + (output_tokens* output_cost_per_1m/1000000)
923
+
924
+ cost = (input_tokens * input_cost_per_1m / 1000000) + \
925
+ (output_tokens * output_cost_per_1m/1000000)
810
926
  return cost
811
-
@@ -34,6 +34,16 @@ class DocFilterResult(BaseModel):
34
34
  model_name: str = "unknown"
35
35
 
36
36
 
37
+ class ProgressUpdate:
38
+ """表示处理过程中的进度更新"""
39
+ def __init__(self, phase: str, completed: int, total: int, relevant_count: int, message: str):
40
+ self.phase = phase # 当前处理阶段:doc_filter, token_check 等
41
+ self.completed = completed # 已完成的任务数
42
+ self.total = total # 总任务数
43
+ self.relevant_count = relevant_count # 找到的相关文档数
44
+ self.message = message # 进度消息
45
+
46
+
37
47
  def parse_relevance(text: Optional[str]) -> Optional[DocRelevance]:
38
48
  if text is None:
39
49
  return None
@@ -0,0 +1,193 @@
1
+ import inspect
2
+
3
+ def stream_with_thinking(response):
4
+ """
5
+ Process an OpenAI streaming response that may contain regular content and reasoning_content.
6
+ Returns a generator that yields the formatted output.
7
+
8
+ Args:
9
+ response: An OpenAI streaming response (generator)
10
+
11
+ Yields:
12
+ str: Formatted output with thinking sections marked
13
+ """
14
+ start_mark = "<thinking>\n"
15
+ end_mark = "\n</thinking>\n"
16
+ is_thinking = False # 跟踪我们是否在输出思考内容
17
+
18
+ for chunk in response:
19
+ # 如果有常规内容
20
+ if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
21
+ # 如果我们之前在输出思考内容,需要先结束思考部分
22
+ if is_thinking:
23
+ yield end_mark
24
+ is_thinking = False
25
+
26
+ yield chunk.choices[0].delta.content
27
+
28
+ # 如果有思考内容
29
+ elif hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
30
+ # 如果这是第一次输出思考内容,打印开始标记
31
+ if not is_thinking:
32
+ yield start_mark
33
+ is_thinking = True
34
+
35
+ yield chunk.choices[0].delta.reasoning_content
36
+
37
+ # 确保思考内容结束后有结束标记
38
+ if is_thinking:
39
+ yield end_mark
40
+
41
+ async def stream_with_thinking_async(response):
42
+ """
43
+ Process an OpenAI async streaming response that may contain regular content and reasoning_content.
44
+ Returns an async generator that yields the formatted output.
45
+
46
+ Args:
47
+ response: An OpenAI async streaming response
48
+
49
+ Yields:
50
+ str: Formatted output with thinking sections marked
51
+ """
52
+ start_mark = "<thinking>\n"
53
+ end_mark = "\n</thinking>\n"
54
+ is_thinking = False # 跟踪我们是否在输出思考内容
55
+
56
+ async for chunk in response:
57
+ # 如果有常规内容
58
+ if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
59
+ # 如果我们之前在输出思考内容,需要先结束思考部分
60
+ if is_thinking:
61
+ yield end_mark
62
+ is_thinking = False
63
+
64
+ yield chunk.choices[0].delta.content
65
+
66
+ # 如果有思考内容
67
+ elif hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
68
+ # 如果这是第一次输出思考内容,打印开始标记
69
+ if not is_thinking:
70
+ yield start_mark
71
+ is_thinking = True
72
+
73
+ yield chunk.choices[0].delta.reasoning_content
74
+
75
+ # 确保思考内容结束后有结束标记
76
+ if is_thinking:
77
+ yield end_mark
78
+
79
+ def process_streaming_response(response):
80
+ """
81
+ Process an OpenAI streaming response, detecting whether it's a regular or async generator.
82
+ If using the async version, you must use this with await in an async context.
83
+
84
+ Args:
85
+ response: An OpenAI streaming response
86
+
87
+ Returns:
88
+ A generator or async generator that yields formatted output
89
+ """
90
+ if inspect.isasyncgen(response):
91
+ return stream_with_thinking_async(response)
92
+ else:
93
+ return stream_with_thinking(response)
94
+
95
+ def print_streaming_response(response):
96
+ """
97
+ Print a streaming response with thinking sections clearly marked.
98
+
99
+ Args:
100
+ response: An OpenAI streaming response
101
+ """
102
+ for text in stream_with_thinking(response):
103
+ print(text, end="", flush=True)
104
+
105
+ async def print_streaming_response_async(response):
106
+ """
107
+ Print an async streaming response with thinking sections clearly marked.
108
+
109
+ Args:
110
+ response: An OpenAI async streaming response
111
+ """
112
+ async for text in stream_with_thinking_async(response):
113
+ print(text, end="", flush=True)
114
+
115
+ def separate_stream_thinking(response):
116
+ """
117
+ Process an OpenAI streaming response and return two separate generators:
118
+ one for thinking content and one for normal content.
119
+
120
+ Args:
121
+ response: An OpenAI streaming response (generator)
122
+
123
+ Returns:
124
+ tuple: (thinking_generator, content_generator)
125
+ """
126
+ pending_content_chunk = None
127
+
128
+ def thinking_generator():
129
+ nonlocal pending_content_chunk
130
+
131
+ for chunk in response:
132
+ # If we have thinking content
133
+ if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
134
+ yield chunk.choices[0].delta.reasoning_content
135
+ # If we have regular content, store it but don't consume more than one chunk
136
+ elif hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
137
+ pending_content_chunk = chunk
138
+ break
139
+
140
+ def content_generator():
141
+ nonlocal pending_content_chunk
142
+
143
+ # First yield any pending content chunk from the thinking generator
144
+ if pending_content_chunk is not None:
145
+ yield pending_content_chunk.choices[0].delta.content
146
+ pending_content_chunk = None
147
+
148
+ # Continue with the rest of the response
149
+ for chunk in response:
150
+ if chunk.choices[0].delta.content:
151
+ yield chunk.choices[0].delta.content
152
+
153
+ return thinking_generator(), content_generator()
154
+
155
+ async def separate_stream_thinking_async(response):
156
+ """
157
+ Process an OpenAI async streaming response and return two separate async generators:
158
+ one for thinking content and one for normal content.
159
+
160
+ Args:
161
+ response: An OpenAI async streaming response
162
+
163
+ Returns:
164
+ tuple: (thinking_generator, content_generator)
165
+ """
166
+ pending_content_chunk = None
167
+
168
+ async def thinking_generator():
169
+ nonlocal pending_content_chunk
170
+
171
+ async for chunk in response:
172
+ # If we have thinking content
173
+ if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
174
+ yield chunk.choices[0].delta.reasoning_content
175
+ # If we have regular content, store it but don't consume more than one chunk
176
+ elif hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
177
+ pending_content_chunk = chunk
178
+ break
179
+
180
+ async def content_generator():
181
+ nonlocal pending_content_chunk
182
+
183
+ # First yield any pending content chunk from the thinking generator
184
+ if pending_content_chunk is not None:
185
+ yield pending_content_chunk.choices[0].delta.content
186
+ pending_content_chunk = None
187
+
188
+ # Continue with the rest of the response
189
+ async for chunk in response:
190
+ if chunk.choices[0].delta.content:
191
+ yield chunk.choices[0].delta.content
192
+
193
+ return thinking_generator(), content_generator()
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.279"
1
+ __version__ = "0.1.280"