auto-coder 0.1.279__py3-none-any.whl → 0.1.280__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/METADATA +1 -1
- {auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/RECORD +12 -10
- autocoder/rag/doc_filter.py +104 -29
- autocoder/rag/lang.py +50 -0
- autocoder/rag/long_context_rag.py +217 -102
- autocoder/rag/relevant_utils.py +10 -0
- autocoder/utils/stream_thinking.py +193 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.279.dist-info → auto_coder-0.1.280.dist-info}/top_level.txt +0 -0
|
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
|
|
|
12
12
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
13
13
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
14
14
|
autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
|
|
15
|
-
autocoder/version.py,sha256=
|
|
15
|
+
autocoder/version.py,sha256=mNnPow60dgdANkDcEoYTXr9_lpoMQZSEy1-LRu7QFHs,23
|
|
16
16
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
18
18
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -108,14 +108,15 @@ autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwT
|
|
|
108
108
|
autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
|
|
109
109
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
110
110
|
autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
|
|
111
|
-
autocoder/rag/doc_filter.py,sha256=
|
|
111
|
+
autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
|
|
112
112
|
autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
|
|
113
|
+
autocoder/rag/lang.py,sha256=TVNx5m7OtBcdfahzI29tMj9m1yrEm32G1c1zc4ZNIPs,3130
|
|
113
114
|
autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
|
|
114
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
115
|
+
autocoder/rag/long_context_rag.py,sha256=3CAlf7GM-LgewS5j9XGKvsKSO4MM6M8TTkKxAGzqVY0,39308
|
|
115
116
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
116
117
|
autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
|
|
117
118
|
autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
|
|
118
|
-
autocoder/rag/relevant_utils.py,sha256=
|
|
119
|
+
autocoder/rag/relevant_utils.py,sha256=tnv_g25DDWYPGT-mpfubIyZv86_g2gPXjM4FPvdeIEE,1739
|
|
119
120
|
autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
|
|
120
121
|
autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
|
|
121
122
|
autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
|
|
@@ -159,15 +160,16 @@ autocoder/utils/queue_communicate.py,sha256=buyEzdvab1QA4i2QKbq35rG5v_9x9PWVLWWM
|
|
|
159
160
|
autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1vmRciKdi4,2095
|
|
160
161
|
autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
|
|
161
162
|
autocoder/utils/rest.py,sha256=hLBhr78y-WVnV0oQf9Rxc22EwqF78KINkScvYa1MuYA,6435
|
|
163
|
+
autocoder/utils/stream_thinking.py,sha256=vbDObflBFW53eWEjMTEHf3nyL167_cqpDLh9zRx7Yk8,7015
|
|
162
164
|
autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
|
|
163
165
|
autocoder/utils/thread_utils.py,sha256=tv9fhFZOjI18AxVUJbpe_xjBGMpkqgDcOlz9pnDtNik,8583
|
|
164
166
|
autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
167
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
168
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
|
|
167
169
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
auto_coder-0.1.
|
|
169
|
-
auto_coder-0.1.
|
|
170
|
-
auto_coder-0.1.
|
|
171
|
-
auto_coder-0.1.
|
|
172
|
-
auto_coder-0.1.
|
|
173
|
-
auto_coder-0.1.
|
|
170
|
+
auto_coder-0.1.280.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
171
|
+
auto_coder-0.1.280.dist-info/METADATA,sha256=SDBMvUk6v6YP7RSwlAWHFGfa3LTOUj3fky1Yz0hlFB0,2643
|
|
172
|
+
auto_coder-0.1.280.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
173
|
+
auto_coder-0.1.280.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
174
|
+
auto_coder-0.1.280.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
175
|
+
auto_coder-0.1.280.dist-info/RECORD,,
|
autocoder/rag/doc_filter.py
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import time
|
|
2
|
-
from typing import List, Dict, Optional
|
|
2
|
+
from typing import List, Dict, Optional, Generator, Tuple
|
|
3
3
|
from loguru import logger
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from autocoder.rag.lang import get_message_with_format_and_newline
|
|
5
6
|
|
|
6
7
|
from autocoder.rag.relevant_utils import (
|
|
7
8
|
parse_relevance,
|
|
8
9
|
FilterDoc,
|
|
9
10
|
TaskTiming,
|
|
10
|
-
DocFilterResult
|
|
11
|
+
DocFilterResult,
|
|
12
|
+
ProgressUpdate
|
|
11
13
|
)
|
|
12
14
|
|
|
13
15
|
from autocoder.common import SourceCode, AutoCoderArgs
|
|
@@ -49,6 +51,7 @@ def _check_relevance_with_conversation(
|
|
|
49
51
|
其中, <relevant> 是你认为文档中和问题的相关度,0-10之间的数字,数字越大表示相关度越高。
|
|
50
52
|
"""
|
|
51
53
|
|
|
54
|
+
|
|
52
55
|
class DocFilter:
|
|
53
56
|
def __init__(
|
|
54
57
|
self,
|
|
@@ -73,10 +76,10 @@ class DocFilter:
|
|
|
73
76
|
) -> DocFilterResult:
|
|
74
77
|
return self.filter_docs_with_threads(conversations, documents)
|
|
75
78
|
|
|
76
|
-
def
|
|
79
|
+
def filter_docs_with_progress(
|
|
77
80
|
self, conversations: List[Dict[str, str]], documents: List[SourceCode]
|
|
78
|
-
) -> DocFilterResult:
|
|
79
|
-
|
|
81
|
+
) -> Generator[Tuple[ProgressUpdate, Optional[DocFilterResult]], None, DocFilterResult]:
|
|
82
|
+
"""使用线程过滤文档,同时产生进度更新"""
|
|
80
83
|
start_time = time.time()
|
|
81
84
|
logger.info(f"=== DocFilter Starting ===")
|
|
82
85
|
logger.info(
|
|
@@ -93,6 +96,16 @@ class DocFilter:
|
|
|
93
96
|
relevant_count = 0
|
|
94
97
|
model_name = self.recall_llm.default_model_name or "unknown"
|
|
95
98
|
|
|
99
|
+
doc_filter_result = DocFilterResult(
|
|
100
|
+
docs=[],
|
|
101
|
+
raw_docs=[],
|
|
102
|
+
input_tokens_counts=[],
|
|
103
|
+
generated_tokens_counts=[],
|
|
104
|
+
durations=[],
|
|
105
|
+
model_name=model_name
|
|
106
|
+
)
|
|
107
|
+
relevant_docs = doc_filter_result.docs
|
|
108
|
+
|
|
96
109
|
with ThreadPoolExecutor(
|
|
97
110
|
max_workers=self.args.index_filter_workers or 5
|
|
98
111
|
) as executor:
|
|
@@ -141,16 +154,19 @@ class DocFilter:
|
|
|
141
154
|
logger.info(
|
|
142
155
|
f"Submitted {submitted_tasks} document filtering tasks to thread pool")
|
|
143
156
|
|
|
157
|
+
# 发送初始进度更新
|
|
158
|
+
yield (ProgressUpdate(
|
|
159
|
+
phase="doc_filter",
|
|
160
|
+
completed=0,
|
|
161
|
+
total=len(documents),
|
|
162
|
+
relevant_count=0,
|
|
163
|
+
message=get_message_with_format_and_newline(
|
|
164
|
+
"doc_filter_start",
|
|
165
|
+
total=len(documents)
|
|
166
|
+
)
|
|
167
|
+
), None)
|
|
168
|
+
|
|
144
169
|
# 处理完成的任务
|
|
145
|
-
doc_filter_result = DocFilterResult(
|
|
146
|
-
docs=[],
|
|
147
|
-
raw_docs=[],
|
|
148
|
-
input_tokens_counts=[],
|
|
149
|
-
generated_tokens_counts=[],
|
|
150
|
-
durations=[],
|
|
151
|
-
model_name=model_name
|
|
152
|
-
)
|
|
153
|
-
relevant_docs = doc_filter_result.docs
|
|
154
170
|
for future in as_completed(list(future_to_doc.keys())):
|
|
155
171
|
try:
|
|
156
172
|
doc, submit_time = future_to_doc[future]
|
|
@@ -194,32 +210,50 @@ class DocFilter:
|
|
|
194
210
|
f"\n - Timing: Duration={task_timing.duration:.2f}s, Processing={task_timing.real_duration:.2f}s, Queue={queue_time:.2f}s"
|
|
195
211
|
f"\n - Response: {v}"
|
|
196
212
|
)
|
|
197
|
-
|
|
213
|
+
|
|
198
214
|
if "rag" not in doc.metadata:
|
|
199
215
|
doc.metadata["rag"] = {}
|
|
200
216
|
doc.metadata["rag"]["recall"] = {
|
|
201
217
|
"input_tokens_count": input_tokens_count,
|
|
202
218
|
"generated_tokens_count": generated_tokens_count,
|
|
203
219
|
"recall_model": model_name,
|
|
204
|
-
"duration": task_timing.real_duration
|
|
220
|
+
"duration": task_timing.real_duration
|
|
205
221
|
}
|
|
206
|
-
|
|
207
|
-
doc_filter_result.input_tokens_counts.append(
|
|
208
|
-
|
|
209
|
-
doc_filter_result.
|
|
210
|
-
|
|
222
|
+
|
|
223
|
+
doc_filter_result.input_tokens_counts.append(
|
|
224
|
+
input_tokens_count)
|
|
225
|
+
doc_filter_result.generated_tokens_counts.append(
|
|
226
|
+
generated_tokens_count)
|
|
227
|
+
doc_filter_result.durations.append(
|
|
228
|
+
task_timing.real_duration)
|
|
229
|
+
|
|
211
230
|
new_filter_doc = FilterDoc(
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
231
|
+
source_code=doc,
|
|
232
|
+
relevance=relevance,
|
|
233
|
+
task_timing=task_timing,
|
|
234
|
+
)
|
|
235
|
+
|
|
217
236
|
doc_filter_result.raw_docs.append(new_filter_doc)
|
|
218
237
|
|
|
219
238
|
if is_relevant:
|
|
220
239
|
relevant_docs.append(
|
|
221
240
|
new_filter_doc
|
|
222
241
|
)
|
|
242
|
+
|
|
243
|
+
# 产生进度更新
|
|
244
|
+
yield (ProgressUpdate(
|
|
245
|
+
phase="doc_filter",
|
|
246
|
+
completed=completed_tasks,
|
|
247
|
+
total=len(documents),
|
|
248
|
+
relevant_count=relevant_count,
|
|
249
|
+
message=get_message_with_format_and_newline(
|
|
250
|
+
"doc_filter_progress",
|
|
251
|
+
progress_percent=progress_percent,
|
|
252
|
+
relevant_count=relevant_count,
|
|
253
|
+
total=len(documents)
|
|
254
|
+
)
|
|
255
|
+
), None)
|
|
256
|
+
|
|
223
257
|
except Exception as exc:
|
|
224
258
|
try:
|
|
225
259
|
doc, submit_time = future_to_doc[future]
|
|
@@ -236,7 +270,7 @@ class DocFilter:
|
|
|
236
270
|
FilterDoc(
|
|
237
271
|
source_code=doc,
|
|
238
272
|
relevance=None,
|
|
239
|
-
task_timing=TaskTiming(),
|
|
273
|
+
task_timing=TaskTiming(),
|
|
240
274
|
)
|
|
241
275
|
)
|
|
242
276
|
except Exception as e:
|
|
@@ -244,6 +278,18 @@ class DocFilter:
|
|
|
244
278
|
f"Document filtering error in task tracking: {exc}"
|
|
245
279
|
)
|
|
246
280
|
|
|
281
|
+
# 报告错误进度
|
|
282
|
+
yield (ProgressUpdate(
|
|
283
|
+
phase="doc_filter",
|
|
284
|
+
completed=completed_tasks,
|
|
285
|
+
total=len(documents),
|
|
286
|
+
relevant_count=relevant_count,
|
|
287
|
+
message=get_message_with_format_and_newline(
|
|
288
|
+
"doc_filter_error",
|
|
289
|
+
error=str(exc)
|
|
290
|
+
)
|
|
291
|
+
), None)
|
|
292
|
+
|
|
247
293
|
# Sort relevant_docs by relevance score in descending order
|
|
248
294
|
relevant_docs.sort(
|
|
249
295
|
key=lambda x: x.relevance.relevant_score, reverse=True)
|
|
@@ -254,7 +300,7 @@ class DocFilter:
|
|
|
254
300
|
doc.task_timing.real_duration for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
|
|
255
301
|
avg_queue_time = sum(doc.task_timing.real_start_time -
|
|
256
302
|
doc.task_timing.submit_time for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
|
|
257
|
-
|
|
303
|
+
|
|
258
304
|
total_input_tokens = sum(doc_filter_result.input_tokens_counts)
|
|
259
305
|
total_generated_tokens = sum(doc_filter_result.generated_tokens_counts)
|
|
260
306
|
|
|
@@ -278,4 +324,33 @@ class DocFilter:
|
|
|
278
324
|
else:
|
|
279
325
|
logger.warning("No relevant documents found!")
|
|
280
326
|
|
|
281
|
-
|
|
327
|
+
# 返回最终结果
|
|
328
|
+
yield (ProgressUpdate(
|
|
329
|
+
phase="doc_filter",
|
|
330
|
+
completed=len(documents),
|
|
331
|
+
total=len(documents),
|
|
332
|
+
relevant_count=relevant_count,
|
|
333
|
+
message=get_message_with_format_and_newline(
|
|
334
|
+
"doc_filter_complete",
|
|
335
|
+
total_time=total_time,
|
|
336
|
+
relevant_count=relevant_count
|
|
337
|
+
)
|
|
338
|
+
), doc_filter_result)
|
|
339
|
+
|
|
340
|
+
def filter_docs_with_threads(
|
|
341
|
+
self, conversations: List[Dict[str, str]], documents: List[SourceCode]
|
|
342
|
+
) -> DocFilterResult:
|
|
343
|
+
# 保持兼容性的接口
|
|
344
|
+
for _, result in self.filter_docs_with_progress(conversations, documents):
|
|
345
|
+
if result is not None:
|
|
346
|
+
return result
|
|
347
|
+
|
|
348
|
+
# 这是一个应急情况,不应该到达这里
|
|
349
|
+
return DocFilterResult(
|
|
350
|
+
docs=[],
|
|
351
|
+
raw_docs=[],
|
|
352
|
+
input_tokens_counts=[],
|
|
353
|
+
generated_tokens_counts=[],
|
|
354
|
+
durations=[],
|
|
355
|
+
model_name=self.recall_llm.default_model_name or "unknown"
|
|
356
|
+
)
|
autocoder/rag/lang.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import locale
|
|
2
|
+
from byzerllm.utils import format_str_jinja2
|
|
3
|
+
|
|
4
|
+
MESSAGES = {
|
|
5
|
+
"en": {
|
|
6
|
+
"rag_error_title": "RAG Error",
|
|
7
|
+
"rag_error_message": "Failed to generate response: {{error}}",
|
|
8
|
+
"rag_searching_docs": "Searching documents with {{model}}...",
|
|
9
|
+
"rag_docs_filter_result": "{{model}} processed {{docs_num}} documents, cost {{filter_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}",
|
|
10
|
+
"dynamic_chunking_start": "Dynamic chunking start with {{model}}",
|
|
11
|
+
"dynamic_chunking_result": "Dynamic chunking result with {{model}}, first round cost {{first_round_time}} seconds, second round cost {{sencond_round_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}, first round full docs: {{first_round_full_docs}}, second round extracted docs: {{second_round_extracted_docs}}",
|
|
12
|
+
"send_to_model": "Send to model {{model}} with {{tokens}} tokens",
|
|
13
|
+
"doc_filter_start": "Document filtering start, total {{total}} documents",
|
|
14
|
+
"doc_filter_progress": "Document filtering progress: {{progress_percent}}% processed {{relevant_count}}/{{total}} documents",
|
|
15
|
+
"doc_filter_error": "Document filtering error: {{error}}",
|
|
16
|
+
"doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents"
|
|
17
|
+
},
|
|
18
|
+
"zh": {
|
|
19
|
+
"rag_error_title": "RAG 错误",
|
|
20
|
+
"rag_error_message": "生成响应失败: {{error}}",
|
|
21
|
+
"rag_searching_docs": "正在使用 {{model}} 搜索文档...",
|
|
22
|
+
"rag_docs_filter_result": "{{model}} 处理了 {{docs_num}} 个文档, 耗时 {{filter_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}",
|
|
23
|
+
"dynamic_chunking_start": "使用 {{model}} 进行动态分块",
|
|
24
|
+
"dynamic_chunking_result": "使用 {{model}} 进行动态分块, 第一轮耗时 {{first_round_time}} 秒, 第二轮耗时 {{sencond_round_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}, 第一轮全量文档: {{first_round_full_docs}}, 第二轮提取文档: {{second_round_extracted_docs}}",
|
|
25
|
+
"send_to_model": "发送给模型 {{model}} 的 tokens 数量预估为 {{tokens}}",
|
|
26
|
+
"doc_filter_start": "开始过滤文档,共 {{total}} 个文档",
|
|
27
|
+
"doc_filter_progress": "文档过滤进度:{{progress_percent}}%,处理了 {{relevant_count}}/{{total}} 个文档",
|
|
28
|
+
"doc_filter_error": "文档过滤错误:{{error}}",
|
|
29
|
+
"doc_filter_complete": "文档过滤完成,耗时 {{total_time}} 秒,找到 {{relevant_count}} 个相关文档"
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_system_language():
|
|
35
|
+
try:
|
|
36
|
+
return locale.getdefaultlocale()[0][:2]
|
|
37
|
+
except:
|
|
38
|
+
return 'en'
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def get_message(key):
|
|
42
|
+
lang = get_system_language()
|
|
43
|
+
return MESSAGES.get(lang, MESSAGES['en']).get(key, MESSAGES['en'][key])
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_message_with_format(msg_key: str, **kwargs):
|
|
47
|
+
return format_str_jinja2(get_message(msg_key), **kwargs)
|
|
48
|
+
|
|
49
|
+
def get_message_with_format_and_newline(msg_key: str, **kwargs):
|
|
50
|
+
return format_str_jinja2(get_message(msg_key), **kwargs) + "\n"
|
|
@@ -23,6 +23,8 @@ from autocoder.rag.relevant_utils import (
|
|
|
23
23
|
FilterDoc,
|
|
24
24
|
TaskTiming,
|
|
25
25
|
parse_relevance,
|
|
26
|
+
ProgressUpdate,
|
|
27
|
+
DocFilterResult
|
|
26
28
|
)
|
|
27
29
|
from autocoder.rag.token_checker import check_token_limit
|
|
28
30
|
from autocoder.rag.token_counter import RemoteTokenCounter, TokenCounter
|
|
@@ -34,14 +36,17 @@ from autocoder.rag.stream_event import event_writer
|
|
|
34
36
|
from autocoder.rag.relevant_utils import DocFilterResult
|
|
35
37
|
from pydantic import BaseModel
|
|
36
38
|
from byzerllm.utils.types import SingleOutputMeta
|
|
39
|
+
from autocoder.rag.lang import get_message_with_format_and_newline
|
|
37
40
|
|
|
38
|
-
try:
|
|
41
|
+
try:
|
|
39
42
|
from autocoder_pro.rag.llm_compute import LLMComputeEngine
|
|
40
43
|
pro_version = version("auto-coder-pro")
|
|
41
44
|
autocoder_version = version("auto-coder")
|
|
42
|
-
logger.warning(
|
|
45
|
+
logger.warning(
|
|
46
|
+
f"auto-coder-pro({pro_version}) plugin is enabled in auto-coder.rag({autocoder_version})")
|
|
43
47
|
except ImportError:
|
|
44
|
-
logger.warning(
|
|
48
|
+
logger.warning(
|
|
49
|
+
"Please install auto-coder-pro to enhance llm compute ability")
|
|
45
50
|
LLMComputeEngine = None
|
|
46
51
|
|
|
47
52
|
|
|
@@ -49,20 +54,26 @@ class RecallStat(BaseModel):
|
|
|
49
54
|
total_input_tokens: int
|
|
50
55
|
total_generated_tokens: int
|
|
51
56
|
model_name: str = "unknown"
|
|
57
|
+
|
|
58
|
+
|
|
52
59
|
class ChunkStat(BaseModel):
|
|
53
60
|
total_input_tokens: int
|
|
54
|
-
total_generated_tokens: int
|
|
61
|
+
total_generated_tokens: int
|
|
55
62
|
model_name: str = "unknown"
|
|
63
|
+
|
|
64
|
+
|
|
56
65
|
class AnswerStat(BaseModel):
|
|
57
66
|
total_input_tokens: int
|
|
58
67
|
total_generated_tokens: int
|
|
59
68
|
model_name: str = "unknown"
|
|
60
69
|
|
|
70
|
+
|
|
61
71
|
class RAGStat(BaseModel):
|
|
62
72
|
recall_stat: RecallStat
|
|
63
73
|
chunk_stat: ChunkStat
|
|
64
74
|
answer_stat: AnswerStat
|
|
65
75
|
|
|
76
|
+
|
|
66
77
|
class LongContextRAG:
|
|
67
78
|
def __init__(
|
|
68
79
|
self,
|
|
@@ -86,7 +97,7 @@ class LongContextRAG:
|
|
|
86
97
|
self.chunk_llm = self.llm.get_sub_client("chunk_model")
|
|
87
98
|
|
|
88
99
|
self.args = args
|
|
89
|
-
|
|
100
|
+
|
|
90
101
|
self.path = path
|
|
91
102
|
self.relevant_score = self.args.rag_doc_filter_relevance or 5
|
|
92
103
|
|
|
@@ -99,8 +110,10 @@ class LongContextRAG:
|
|
|
99
110
|
"The sum of full_text_ratio and segment_ratio must be less than or equal to 1.0"
|
|
100
111
|
)
|
|
101
112
|
|
|
102
|
-
self.full_text_limit = int(
|
|
103
|
-
|
|
113
|
+
self.full_text_limit = int(
|
|
114
|
+
args.rag_context_window_limit * self.full_text_ratio)
|
|
115
|
+
self.segment_limit = int(
|
|
116
|
+
args.rag_context_window_limit * self.segment_ratio)
|
|
104
117
|
self.buff_limit = int(args.rag_context_window_limit * self.buff_ratio)
|
|
105
118
|
|
|
106
119
|
self.tokenizer = None
|
|
@@ -109,7 +122,8 @@ class LongContextRAG:
|
|
|
109
122
|
|
|
110
123
|
if self.tokenizer_path:
|
|
111
124
|
VariableHolder.TOKENIZER_PATH = self.tokenizer_path
|
|
112
|
-
VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(
|
|
125
|
+
VariableHolder.TOKENIZER_MODEL = Tokenizer.from_file(
|
|
126
|
+
self.tokenizer_path)
|
|
113
127
|
self.tokenizer = TokenCounter(self.tokenizer_path)
|
|
114
128
|
else:
|
|
115
129
|
if llm.is_model_exist("deepseek_tokenizer"):
|
|
@@ -161,9 +175,9 @@ class LongContextRAG:
|
|
|
161
175
|
self.required_exts,
|
|
162
176
|
self.on_ray,
|
|
163
177
|
self.monitor_mode,
|
|
164
|
-
|
|
178
|
+
# 确保全文区至少能放下一个文件
|
|
165
179
|
single_file_token_limit=self.full_text_limit - 100,
|
|
166
|
-
disable_auto_window=self.args.disable_auto_window,
|
|
180
|
+
disable_auto_window=self.args.disable_auto_window,
|
|
167
181
|
enable_hybrid_index=self.args.enable_hybrid_index,
|
|
168
182
|
extra_params=self.args
|
|
169
183
|
)
|
|
@@ -224,14 +238,14 @@ class LongContextRAG:
|
|
|
224
238
|
{% for msg in conversations %}
|
|
225
239
|
[{{ msg.role }}]:
|
|
226
240
|
{{ msg.content }}
|
|
227
|
-
|
|
241
|
+
|
|
228
242
|
{% endfor %}
|
|
229
243
|
</conversations>
|
|
230
244
|
|
|
231
245
|
请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
|
|
232
246
|
如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
|
|
233
247
|
提取的信息尽量保持和原文中的一样,并且只输出这些信息。
|
|
234
|
-
"""
|
|
248
|
+
"""
|
|
235
249
|
|
|
236
250
|
@byzerllm.prompt()
|
|
237
251
|
def _answer_question(
|
|
@@ -266,26 +280,25 @@ class LongContextRAG:
|
|
|
266
280
|
"""Get the document retriever class based on configuration."""
|
|
267
281
|
# Default to LocalDocumentRetriever if not specified
|
|
268
282
|
return LocalDocumentRetriever
|
|
269
|
-
|
|
283
|
+
|
|
270
284
|
def _load_ignore_file(self):
|
|
271
285
|
serveignore_path = os.path.join(self.path, ".serveignore")
|
|
272
286
|
gitignore_path = os.path.join(self.path, ".gitignore")
|
|
273
287
|
|
|
274
288
|
if os.path.exists(serveignore_path):
|
|
275
|
-
with open(serveignore_path, "r",encoding="utf-8") as ignore_file:
|
|
289
|
+
with open(serveignore_path, "r", encoding="utf-8") as ignore_file:
|
|
276
290
|
return pathspec.PathSpec.from_lines("gitwildmatch", ignore_file)
|
|
277
291
|
elif os.path.exists(gitignore_path):
|
|
278
|
-
with open(gitignore_path, "r",encoding="utf-8") as ignore_file:
|
|
292
|
+
with open(gitignore_path, "r", encoding="utf-8") as ignore_file:
|
|
279
293
|
return pathspec.PathSpec.from_lines("gitwildmatch", ignore_file)
|
|
280
294
|
return None
|
|
281
295
|
|
|
282
|
-
def _retrieve_documents(self,options:Optional[Dict[str,Any]]=None) -> Generator[SourceCode, None, None]:
|
|
296
|
+
def _retrieve_documents(self, options: Optional[Dict[str, Any]] = None) -> Generator[SourceCode, None, None]:
|
|
283
297
|
return self.document_retriever.retrieve_documents(options=options)
|
|
284
298
|
|
|
285
299
|
def build(self):
|
|
286
300
|
pass
|
|
287
301
|
|
|
288
|
-
|
|
289
302
|
def search(self, query: str) -> List[SourceCode]:
|
|
290
303
|
target_query = query
|
|
291
304
|
only_contexts = False
|
|
@@ -300,7 +313,8 @@ class LongContextRAG:
|
|
|
300
313
|
only_contexts = True
|
|
301
314
|
|
|
302
315
|
logger.info("Search from RAG.....")
|
|
303
|
-
logger.info(
|
|
316
|
+
logger.info(
|
|
317
|
+
f"Query: {target_query[0:100]}... only_contexts: {only_contexts}")
|
|
304
318
|
|
|
305
319
|
if self.client:
|
|
306
320
|
new_query = json.dumps(
|
|
@@ -316,7 +330,8 @@ class LongContextRAG:
|
|
|
316
330
|
if not only_contexts:
|
|
317
331
|
return [SourceCode(module_name=f"RAG:{target_query}", source_code=v)]
|
|
318
332
|
|
|
319
|
-
json_lines = [json.loads(line)
|
|
333
|
+
json_lines = [json.loads(line)
|
|
334
|
+
for line in v.split("\n") if line.strip()]
|
|
320
335
|
return [SourceCode.model_validate(json_line) for json_line in json_lines]
|
|
321
336
|
else:
|
|
322
337
|
if only_contexts:
|
|
@@ -335,7 +350,7 @@ class LongContextRAG:
|
|
|
335
350
|
|
|
336
351
|
def _filter_docs(self, conversations: List[Dict[str, str]]) -> DocFilterResult:
|
|
337
352
|
query = conversations[-1]["content"]
|
|
338
|
-
documents = self._retrieve_documents(options={"query":query})
|
|
353
|
+
documents = self._retrieve_documents(options={"query": query})
|
|
339
354
|
return self.doc_filter.filter_docs(
|
|
340
355
|
conversations=conversations, documents=documents
|
|
341
356
|
)
|
|
@@ -360,9 +375,8 @@ class LongContextRAG:
|
|
|
360
375
|
logger.error(f"Error in stream_chat_oai: {str(e)}")
|
|
361
376
|
traceback.print_exc()
|
|
362
377
|
return ["出现错误,请稍后再试。"], []
|
|
363
|
-
|
|
364
378
|
|
|
365
|
-
def _stream_chatfrom_openai_sdk(self,response):
|
|
379
|
+
def _stream_chatfrom_openai_sdk(self, response):
|
|
366
380
|
for chunk in response:
|
|
367
381
|
if hasattr(chunk, "usage") and chunk.usage:
|
|
368
382
|
input_tokens_count = chunk.usage.prompt_tokens
|
|
@@ -386,9 +400,9 @@ class LongContextRAG:
|
|
|
386
400
|
reasoning_text = chunk.choices[0].delta.reasoning_content or ""
|
|
387
401
|
|
|
388
402
|
last_meta = SingleOutputMeta(input_tokens_count=input_tokens_count,
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
403
|
+
generated_tokens_count=generated_tokens_count,
|
|
404
|
+
reasoning_content=reasoning_text,
|
|
405
|
+
finish_reason=chunk.choices[0].finish_reason)
|
|
392
406
|
yield (content, last_meta)
|
|
393
407
|
|
|
394
408
|
def _stream_chat_oai(
|
|
@@ -398,7 +412,7 @@ class LongContextRAG:
|
|
|
398
412
|
role_mapping=None,
|
|
399
413
|
llm_config: Dict[str, Any] = {},
|
|
400
414
|
extra_request_params: Dict[str, Any] = {}
|
|
401
|
-
):
|
|
415
|
+
):
|
|
402
416
|
if self.client:
|
|
403
417
|
model = model or self.args.model
|
|
404
418
|
response = self.client.chat.completions.create(
|
|
@@ -407,8 +421,8 @@ class LongContextRAG:
|
|
|
407
421
|
stream=True,
|
|
408
422
|
max_tokens=self.args.rag_params_max_tokens,
|
|
409
423
|
extra_body=extra_request_params
|
|
410
|
-
)
|
|
411
|
-
return self._stream_chatfrom_openai_sdk(response), []
|
|
424
|
+
)
|
|
425
|
+
return self._stream_chatfrom_openai_sdk(response), []
|
|
412
426
|
|
|
413
427
|
target_llm = self.llm
|
|
414
428
|
if self.llm.get_sub_client("qa_model"):
|
|
@@ -422,7 +436,7 @@ class LongContextRAG:
|
|
|
422
436
|
in query
|
|
423
437
|
or "简要总结一下对话内容,用作后续的上下文提示 prompt,控制在 200 字以内"
|
|
424
438
|
in query
|
|
425
|
-
):
|
|
439
|
+
):
|
|
426
440
|
|
|
427
441
|
chunks = target_llm.stream_chat_oai(
|
|
428
442
|
conversations=conversations,
|
|
@@ -432,22 +446,24 @@ class LongContextRAG:
|
|
|
432
446
|
delta_mode=True,
|
|
433
447
|
extra_request_params=extra_request_params
|
|
434
448
|
)
|
|
449
|
+
|
|
435
450
|
def generate_chunks():
|
|
436
451
|
for chunk in chunks:
|
|
437
452
|
yield chunk
|
|
438
453
|
return generate_chunks(), context
|
|
439
|
-
|
|
440
|
-
try:
|
|
454
|
+
|
|
455
|
+
try:
|
|
441
456
|
request_params = json.loads(query)
|
|
442
|
-
if "request_id" in request_params:
|
|
457
|
+
if "request_id" in request_params:
|
|
443
458
|
request_id = request_params["request_id"]
|
|
444
459
|
index = request_params["index"]
|
|
445
|
-
|
|
446
|
-
file_path = event_writer.get_event_file_path(request_id)
|
|
447
|
-
logger.info(
|
|
460
|
+
|
|
461
|
+
file_path = event_writer.get_event_file_path(request_id)
|
|
462
|
+
logger.info(
|
|
463
|
+
f"Get events for request_id: {request_id} index: {index} file_path: {file_path}")
|
|
448
464
|
events = []
|
|
449
465
|
if not os.path.exists(file_path):
|
|
450
|
-
return [],context
|
|
466
|
+
return [], context
|
|
451
467
|
|
|
452
468
|
with open(file_path, "r") as f:
|
|
453
469
|
for line in f:
|
|
@@ -455,8 +471,8 @@ class LongContextRAG:
|
|
|
455
471
|
if event["index"] >= index:
|
|
456
472
|
events.append(event)
|
|
457
473
|
return [json.dumps({
|
|
458
|
-
"events": [event for event in events],
|
|
459
|
-
},ensure_ascii=False)], context
|
|
474
|
+
"events": [event for event in events],
|
|
475
|
+
}, ensure_ascii=False)], context
|
|
460
476
|
except json.JSONDecodeError:
|
|
461
477
|
pass
|
|
462
478
|
|
|
@@ -465,7 +481,7 @@ class LongContextRAG:
|
|
|
465
481
|
llm=target_llm,
|
|
466
482
|
inference_enhance=not self.args.disable_inference_enhance,
|
|
467
483
|
inference_deep_thought=self.args.inference_deep_thought,
|
|
468
|
-
inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
|
|
484
|
+
inference_slow_without_deep_thought=self.args.inference_slow_without_deep_thought,
|
|
469
485
|
precision=self.args.inference_compute_precision,
|
|
470
486
|
data_cells_max_num=self.args.data_cells_max_num,
|
|
471
487
|
)
|
|
@@ -474,14 +490,14 @@ class LongContextRAG:
|
|
|
474
490
|
conversations, query, []
|
|
475
491
|
)
|
|
476
492
|
chunks = llm_compute_engine.stream_chat_oai(
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
493
|
+
conversations=new_conversations,
|
|
494
|
+
model=model,
|
|
495
|
+
role_mapping=role_mapping,
|
|
496
|
+
llm_config=llm_config,
|
|
497
|
+
delta_mode=True,
|
|
498
|
+
extra_request_params=extra_request_params
|
|
499
|
+
)
|
|
500
|
+
|
|
485
501
|
def generate_chunks():
|
|
486
502
|
for chunk in chunks:
|
|
487
503
|
yield chunk
|
|
@@ -491,7 +507,6 @@ class LongContextRAG:
|
|
|
491
507
|
context,
|
|
492
508
|
)
|
|
493
509
|
|
|
494
|
-
|
|
495
510
|
only_contexts = False
|
|
496
511
|
try:
|
|
497
512
|
v = json.loads(query)
|
|
@@ -504,7 +519,6 @@ class LongContextRAG:
|
|
|
504
519
|
|
|
505
520
|
logger.info(f"Query: {query} only_contexts: {only_contexts}")
|
|
506
521
|
start_time = time.time()
|
|
507
|
-
|
|
508
522
|
|
|
509
523
|
rag_stat = RAGStat(
|
|
510
524
|
recall_stat=RecallStat(
|
|
@@ -525,17 +539,62 @@ class LongContextRAG:
|
|
|
525
539
|
)
|
|
526
540
|
|
|
527
541
|
context = []
|
|
542
|
+
|
|
528
543
|
def generate_sream():
|
|
529
544
|
nonlocal context
|
|
530
|
-
doc_filter_result = self._filter_docs(conversations)
|
|
531
545
|
|
|
532
|
-
|
|
533
|
-
|
|
546
|
+
yield ("", SingleOutputMeta(input_tokens_count=0,
|
|
547
|
+
generated_tokens_count=0,
|
|
548
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
549
|
+
"rag_searching_docs",
|
|
550
|
+
model=rag_stat.recall_stat.model_name
|
|
551
|
+
)
|
|
552
|
+
))
|
|
553
|
+
|
|
554
|
+
doc_filter_result = DocFilterResult(
|
|
555
|
+
docs=[],
|
|
556
|
+
raw_docs=[],
|
|
557
|
+
input_tokens_counts=[],
|
|
558
|
+
generated_tokens_counts=[],
|
|
559
|
+
durations=[],
|
|
560
|
+
model_name=rag_stat.recall_stat.model_name
|
|
561
|
+
)
|
|
562
|
+
query = conversations[-1]["content"]
|
|
563
|
+
documents = self._retrieve_documents(options={"query": query})
|
|
564
|
+
|
|
565
|
+
# 使用带进度报告的过滤方法
|
|
566
|
+
for progress_update, result in self.doc_filter.filter_docs_with_progress(conversations, documents):
|
|
567
|
+
if result is not None:
|
|
568
|
+
doc_filter_result = result
|
|
569
|
+
else:
|
|
570
|
+
# 生成进度更新
|
|
571
|
+
yield ("", SingleOutputMeta(
|
|
572
|
+
input_tokens_count=rag_stat.recall_stat.total_input_tokens,
|
|
573
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
|
|
574
|
+
reasoning_content=f"{progress_update.message} ({progress_update.completed}/{progress_update.total})"
|
|
575
|
+
))
|
|
576
|
+
|
|
577
|
+
rag_stat.recall_stat.total_input_tokens += sum(
|
|
578
|
+
doc_filter_result.input_tokens_counts)
|
|
579
|
+
rag_stat.recall_stat.total_generated_tokens += sum(
|
|
580
|
+
doc_filter_result.generated_tokens_counts)
|
|
534
581
|
rag_stat.recall_stat.model_name = doc_filter_result.model_name
|
|
535
582
|
|
|
536
583
|
relevant_docs: List[FilterDoc] = doc_filter_result.docs
|
|
537
584
|
filter_time = time.time() - start_time
|
|
538
585
|
|
|
586
|
+
yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens,
|
|
587
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens,
|
|
588
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
589
|
+
"rag_docs_filter_result",
|
|
590
|
+
filter_time=filter_time,
|
|
591
|
+
docs_num=len(relevant_docs),
|
|
592
|
+
input_tokens=rag_stat.recall_stat.total_input_tokens,
|
|
593
|
+
output_tokens=rag_stat.recall_stat.total_generated_tokens,
|
|
594
|
+
model=rag_stat.recall_stat.model_name
|
|
595
|
+
)
|
|
596
|
+
))
|
|
597
|
+
|
|
539
598
|
# Filter relevant_docs to only include those with is_relevant=True
|
|
540
599
|
highly_relevant_docs = [
|
|
541
600
|
doc for doc in relevant_docs if doc.relevance.is_relevant
|
|
@@ -543,7 +602,8 @@ class LongContextRAG:
|
|
|
543
602
|
|
|
544
603
|
if highly_relevant_docs:
|
|
545
604
|
relevant_docs = highly_relevant_docs
|
|
546
|
-
logger.info(
|
|
605
|
+
logger.info(
|
|
606
|
+
f"Found {len(relevant_docs)} highly relevant documents")
|
|
547
607
|
|
|
548
608
|
logger.info(
|
|
549
609
|
f"Filter time: {filter_time:.2f} seconds with {len(relevant_docs)} docs"
|
|
@@ -553,7 +613,7 @@ class LongContextRAG:
|
|
|
553
613
|
final_docs = []
|
|
554
614
|
for doc in relevant_docs:
|
|
555
615
|
final_docs.append(doc.model_dump())
|
|
556
|
-
return [json.dumps(final_docs,ensure_ascii=False)], []
|
|
616
|
+
return [json.dumps(final_docs, ensure_ascii=False)], []
|
|
557
617
|
|
|
558
618
|
if not relevant_docs:
|
|
559
619
|
return ["没有找到相关的文档来回答这个问题。"], []
|
|
@@ -588,6 +648,12 @@ class LongContextRAG:
|
|
|
588
648
|
+ "".join([f"\n * {info}" for info in relevant_docs_info])
|
|
589
649
|
)
|
|
590
650
|
|
|
651
|
+
yield ("", SingleOutputMeta(generated_tokens_count=0,
|
|
652
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
653
|
+
"dynamic_chunking_start",
|
|
654
|
+
model=rag_stat.chunk_stat.model_name
|
|
655
|
+
)
|
|
656
|
+
))
|
|
591
657
|
first_round_full_docs = []
|
|
592
658
|
second_round_extracted_docs = []
|
|
593
659
|
sencond_round_time = 0
|
|
@@ -602,17 +668,19 @@ class LongContextRAG:
|
|
|
602
668
|
llm=self.llm,
|
|
603
669
|
disable_segment_reorder=self.args.disable_segment_reorder,
|
|
604
670
|
)
|
|
605
|
-
|
|
671
|
+
|
|
606
672
|
token_limiter_result = token_limiter.limit_tokens(
|
|
607
673
|
relevant_docs=relevant_docs,
|
|
608
674
|
conversations=conversations,
|
|
609
675
|
index_filter_workers=self.args.index_filter_workers or 5,
|
|
610
676
|
)
|
|
611
677
|
|
|
612
|
-
rag_stat.chunk_stat.total_input_tokens += sum(
|
|
613
|
-
|
|
678
|
+
rag_stat.chunk_stat.total_input_tokens += sum(
|
|
679
|
+
token_limiter_result.input_tokens_counts)
|
|
680
|
+
rag_stat.chunk_stat.total_generated_tokens += sum(
|
|
681
|
+
token_limiter_result.generated_tokens_counts)
|
|
614
682
|
rag_stat.chunk_stat.model_name = token_limiter_result.model_name
|
|
615
|
-
|
|
683
|
+
|
|
616
684
|
final_relevant_docs = token_limiter_result.docs
|
|
617
685
|
first_round_full_docs = token_limiter.first_round_full_docs
|
|
618
686
|
second_round_extracted_docs = token_limiter.second_round_extracted_docs
|
|
@@ -623,24 +691,41 @@ class LongContextRAG:
|
|
|
623
691
|
relevant_docs = relevant_docs[: self.args.index_filter_file_num]
|
|
624
692
|
|
|
625
693
|
logger.info(f"Finally send to model: {len(relevant_docs)}")
|
|
626
|
-
|
|
627
694
|
# 记录分段处理的统计信息
|
|
628
695
|
logger.info(
|
|
629
696
|
f"=== Token Management ===\n"
|
|
630
697
|
f" * Only contexts: {only_contexts}\n"
|
|
631
|
-
f" * Filter time: {filter_time:.2f} seconds\n"
|
|
698
|
+
f" * Filter time: {filter_time:.2f} seconds\n"
|
|
632
699
|
f" * Final relevant docs: {len(relevant_docs)}\n"
|
|
633
700
|
f" * First round full docs: {len(first_round_full_docs)}\n"
|
|
634
701
|
f" * Second round extracted docs: {len(second_round_extracted_docs)}\n"
|
|
635
702
|
f" * Second round time: {sencond_round_time:.2f} seconds"
|
|
636
703
|
)
|
|
637
704
|
|
|
705
|
+
yield ("", SingleOutputMeta(generated_tokens_count=rag_stat.chunk_stat.total_generated_tokens + rag_stat.recall_stat.total_generated_tokens,
|
|
706
|
+
input_tokens_count=rag_stat.chunk_stat.total_input_tokens +
|
|
707
|
+
rag_stat.recall_stat.total_input_tokens,
|
|
708
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
709
|
+
"dynamic_chunking_result",
|
|
710
|
+
model=rag_stat.chunk_stat.model_name,
|
|
711
|
+
docs_num=len(relevant_docs),
|
|
712
|
+
filter_time=filter_time,
|
|
713
|
+
sencond_round_time=sencond_round_time,
|
|
714
|
+
first_round_full_docs=len(
|
|
715
|
+
first_round_full_docs),
|
|
716
|
+
second_round_extracted_docs=len(
|
|
717
|
+
second_round_extracted_docs),
|
|
718
|
+
input_tokens=rag_stat.chunk_stat.total_input_tokens,
|
|
719
|
+
output_tokens=rag_stat.chunk_stat.total_generated_tokens
|
|
720
|
+
)
|
|
721
|
+
))
|
|
722
|
+
|
|
638
723
|
# 记录最终选择的文档详情
|
|
639
724
|
final_relevant_docs_info = []
|
|
640
725
|
for i, doc in enumerate(relevant_docs):
|
|
641
726
|
doc_path = doc.module_name.replace(self.path, '', 1)
|
|
642
727
|
info = f"{i+1}. {doc_path}"
|
|
643
|
-
|
|
728
|
+
|
|
644
729
|
metadata_info = []
|
|
645
730
|
if "original_docs" in doc.metadata:
|
|
646
731
|
original_docs = ", ".join(
|
|
@@ -650,26 +735,27 @@ class LongContextRAG:
|
|
|
650
735
|
]
|
|
651
736
|
)
|
|
652
737
|
metadata_info.append(f"Original docs: {original_docs}")
|
|
653
|
-
|
|
738
|
+
|
|
654
739
|
if "chunk_ranges" in doc.metadata:
|
|
655
740
|
chunk_ranges = json.dumps(
|
|
656
741
|
doc.metadata["chunk_ranges"], ensure_ascii=False
|
|
657
742
|
)
|
|
658
743
|
metadata_info.append(f"Chunk ranges: {chunk_ranges}")
|
|
659
|
-
|
|
744
|
+
|
|
660
745
|
if "processing_time" in doc.metadata:
|
|
661
|
-
metadata_info.append(
|
|
662
|
-
|
|
746
|
+
metadata_info.append(
|
|
747
|
+
f"Processing time: {doc.metadata['processing_time']:.2f}s")
|
|
748
|
+
|
|
663
749
|
if metadata_info:
|
|
664
750
|
info += f" ({'; '.join(metadata_info)})"
|
|
665
|
-
|
|
751
|
+
|
|
666
752
|
final_relevant_docs_info.append(info)
|
|
667
753
|
|
|
668
754
|
if final_relevant_docs_info:
|
|
669
755
|
logger.info(
|
|
670
756
|
f"Final documents to be sent to model:"
|
|
671
757
|
+ "".join([f"\n * {info}" for info in final_relevant_docs_info])
|
|
672
|
-
|
|
758
|
+
)
|
|
673
759
|
|
|
674
760
|
# 记录令牌统计
|
|
675
761
|
request_tokens = sum([doc.tokens for doc in relevant_docs])
|
|
@@ -680,7 +766,18 @@ class LongContextRAG:
|
|
|
680
766
|
f" * Total tokens: {request_tokens}"
|
|
681
767
|
)
|
|
682
768
|
|
|
683
|
-
logger.info(
|
|
769
|
+
logger.info(
|
|
770
|
+
f"Start to send to model {target_model} with {request_tokens} tokens")
|
|
771
|
+
|
|
772
|
+
yield ("", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
773
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
774
|
+
rag_stat.chunk_stat.total_generated_tokens,
|
|
775
|
+
reasoning_content=get_message_with_format_and_newline(
|
|
776
|
+
"send_to_model",
|
|
777
|
+
model=target_model,
|
|
778
|
+
tokens=request_tokens
|
|
779
|
+
)
|
|
780
|
+
))
|
|
684
781
|
|
|
685
782
|
if LLMComputeEngine is not None and not self.args.disable_inference_enhance:
|
|
686
783
|
llm_compute_engine = LLMComputeEngine(
|
|
@@ -692,33 +789,42 @@ class LongContextRAG:
|
|
|
692
789
|
debug=False,
|
|
693
790
|
)
|
|
694
791
|
new_conversations = llm_compute_engine.process_conversation(
|
|
695
|
-
conversations, query, [
|
|
792
|
+
conversations, query, [
|
|
793
|
+
doc.source_code for doc in relevant_docs]
|
|
696
794
|
)
|
|
697
795
|
chunks = llm_compute_engine.stream_chat_oai(
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
796
|
+
conversations=new_conversations,
|
|
797
|
+
model=model,
|
|
798
|
+
role_mapping=role_mapping,
|
|
799
|
+
llm_config=llm_config,
|
|
800
|
+
delta_mode=True,
|
|
801
|
+
)
|
|
802
|
+
|
|
705
803
|
for chunk in chunks:
|
|
706
|
-
yield chunk
|
|
707
804
|
if chunk[1] is not None:
|
|
708
805
|
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
709
|
-
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
710
|
-
|
|
711
|
-
|
|
806
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
807
|
+
chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
|
|
808
|
+
rag_stat.chunk_stat.total_input_tokens + \
|
|
809
|
+
rag_stat.answer_stat.total_input_tokens
|
|
810
|
+
chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
|
|
811
|
+
rag_stat.chunk_stat.total_generated_tokens + \
|
|
812
|
+
rag_stat.answer_stat.total_generated_tokens
|
|
813
|
+
yield chunk
|
|
814
|
+
|
|
815
|
+
self._print_rag_stats(rag_stat)
|
|
816
|
+
else:
|
|
712
817
|
new_conversations = conversations[:-1] + [
|
|
713
818
|
{
|
|
714
819
|
"role": "user",
|
|
715
820
|
"content": self._answer_question.prompt(
|
|
716
821
|
query=query,
|
|
717
|
-
relevant_docs=[
|
|
822
|
+
relevant_docs=[
|
|
823
|
+
doc.source_code for doc in relevant_docs],
|
|
718
824
|
),
|
|
719
825
|
}
|
|
720
826
|
]
|
|
721
|
-
|
|
827
|
+
|
|
722
828
|
chunks = target_llm.stream_chat_oai(
|
|
723
829
|
conversations=new_conversations,
|
|
724
830
|
model=model,
|
|
@@ -727,17 +833,22 @@ class LongContextRAG:
|
|
|
727
833
|
delta_mode=True,
|
|
728
834
|
extra_request_params=extra_request_params
|
|
729
835
|
)
|
|
730
|
-
|
|
836
|
+
|
|
731
837
|
for chunk in chunks:
|
|
732
|
-
yield chunk
|
|
733
838
|
if chunk[1] is not None:
|
|
734
839
|
rag_stat.answer_stat.total_input_tokens += chunk[1].input_tokens_count
|
|
735
|
-
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
736
|
-
|
|
840
|
+
rag_stat.answer_stat.total_generated_tokens += chunk[1].generated_tokens_count
|
|
841
|
+
chunk[1].input_tokens_count = rag_stat.recall_stat.total_input_tokens + \
|
|
842
|
+
rag_stat.chunk_stat.total_input_tokens + \
|
|
843
|
+
rag_stat.answer_stat.total_input_tokens
|
|
844
|
+
chunk[1].generated_tokens_count = rag_stat.recall_stat.total_generated_tokens + \
|
|
845
|
+
rag_stat.chunk_stat.total_generated_tokens + \
|
|
846
|
+
rag_stat.answer_stat.total_generated_tokens
|
|
847
|
+
yield chunk
|
|
737
848
|
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
849
|
+
self._print_rag_stats(rag_stat)
|
|
850
|
+
|
|
851
|
+
return generate_sream(), context
|
|
741
852
|
|
|
742
853
|
def _print_rag_stats(self, rag_stat: RAGStat) -> None:
|
|
743
854
|
"""打印RAG执行的详细统计信息"""
|
|
@@ -748,19 +859,22 @@ class LongContextRAG:
|
|
|
748
859
|
)
|
|
749
860
|
total_generated_tokens = (
|
|
750
861
|
rag_stat.recall_stat.total_generated_tokens +
|
|
751
|
-
rag_stat.chunk_stat.total_generated_tokens +
|
|
862
|
+
rag_stat.chunk_stat.total_generated_tokens +
|
|
752
863
|
rag_stat.answer_stat.total_generated_tokens
|
|
753
864
|
)
|
|
754
865
|
total_tokens = total_input_tokens + total_generated_tokens
|
|
755
|
-
|
|
866
|
+
|
|
756
867
|
# 避免除以零错误
|
|
757
868
|
if total_tokens == 0:
|
|
758
869
|
recall_percent = chunk_percent = answer_percent = 0
|
|
759
870
|
else:
|
|
760
|
-
recall_percent = (rag_stat.recall_stat.total_input_tokens +
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
871
|
+
recall_percent = (rag_stat.recall_stat.total_input_tokens +
|
|
872
|
+
rag_stat.recall_stat.total_generated_tokens) / total_tokens * 100
|
|
873
|
+
chunk_percent = (rag_stat.chunk_stat.total_input_tokens +
|
|
874
|
+
rag_stat.chunk_stat.total_generated_tokens) / total_tokens * 100
|
|
875
|
+
answer_percent = (rag_stat.answer_stat.total_input_tokens +
|
|
876
|
+
rag_stat.answer_stat.total_generated_tokens) / total_tokens * 100
|
|
877
|
+
|
|
764
878
|
logger.info(
|
|
765
879
|
f"=== RAG 执行统计信息 ===\n"
|
|
766
880
|
f"总令牌使用: {total_tokens} 令牌\n"
|
|
@@ -791,21 +905,22 @@ class LongContextRAG:
|
|
|
791
905
|
f" - 文档分块: {chunk_percent:.1f}%\n"
|
|
792
906
|
f" - 答案生成: {answer_percent:.1f}%\n"
|
|
793
907
|
)
|
|
794
|
-
|
|
908
|
+
|
|
795
909
|
# 记录原始统计数据,以便调试
|
|
796
910
|
logger.debug(f"RAG Stat 原始数据: {rag_stat}")
|
|
797
|
-
|
|
911
|
+
|
|
798
912
|
# 返回成本估算
|
|
799
|
-
estimated_cost = self._estimate_token_cost(
|
|
913
|
+
estimated_cost = self._estimate_token_cost(
|
|
914
|
+
total_input_tokens, total_generated_tokens)
|
|
800
915
|
if estimated_cost > 0:
|
|
801
916
|
logger.info(f"估计成本: 约 ${estimated_cost:.4f} 人民币")
|
|
802
917
|
|
|
803
918
|
def _estimate_token_cost(self, input_tokens: int, output_tokens: int) -> float:
|
|
804
|
-
"""估算当前请求的令牌成本(人民币)"""
|
|
919
|
+
"""估算当前请求的令牌成本(人民币)"""
|
|
805
920
|
# 实际应用中,可以根据不同模型设置不同价格
|
|
806
921
|
input_cost_per_1m = 2.0/1000000 # 每百万输入令牌的成本
|
|
807
922
|
output_cost_per_1m = 8.0/100000 # 每百万输出令牌的成本
|
|
808
|
-
|
|
809
|
-
cost = (input_tokens * input_cost_per_1m / 1000000) +
|
|
923
|
+
|
|
924
|
+
cost = (input_tokens * input_cost_per_1m / 1000000) + \
|
|
925
|
+
(output_tokens * output_cost_per_1m/1000000)
|
|
810
926
|
return cost
|
|
811
|
-
|
autocoder/rag/relevant_utils.py
CHANGED
|
@@ -34,6 +34,16 @@ class DocFilterResult(BaseModel):
|
|
|
34
34
|
model_name: str = "unknown"
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class ProgressUpdate:
|
|
38
|
+
"""表示处理过程中的进度更新"""
|
|
39
|
+
def __init__(self, phase: str, completed: int, total: int, relevant_count: int, message: str):
|
|
40
|
+
self.phase = phase # 当前处理阶段:doc_filter, token_check 等
|
|
41
|
+
self.completed = completed # 已完成的任务数
|
|
42
|
+
self.total = total # 总任务数
|
|
43
|
+
self.relevant_count = relevant_count # 找到的相关文档数
|
|
44
|
+
self.message = message # 进度消息
|
|
45
|
+
|
|
46
|
+
|
|
37
47
|
def parse_relevance(text: Optional[str]) -> Optional[DocRelevance]:
|
|
38
48
|
if text is None:
|
|
39
49
|
return None
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
|
|
3
|
+
def stream_with_thinking(response):
|
|
4
|
+
"""
|
|
5
|
+
Process an OpenAI streaming response that may contain regular content and reasoning_content.
|
|
6
|
+
Returns a generator that yields the formatted output.
|
|
7
|
+
|
|
8
|
+
Args:
|
|
9
|
+
response: An OpenAI streaming response (generator)
|
|
10
|
+
|
|
11
|
+
Yields:
|
|
12
|
+
str: Formatted output with thinking sections marked
|
|
13
|
+
"""
|
|
14
|
+
start_mark = "<thinking>\n"
|
|
15
|
+
end_mark = "\n</thinking>\n"
|
|
16
|
+
is_thinking = False # 跟踪我们是否在输出思考内容
|
|
17
|
+
|
|
18
|
+
for chunk in response:
|
|
19
|
+
# 如果有常规内容
|
|
20
|
+
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
|
|
21
|
+
# 如果我们之前在输出思考内容,需要先结束思考部分
|
|
22
|
+
if is_thinking:
|
|
23
|
+
yield end_mark
|
|
24
|
+
is_thinking = False
|
|
25
|
+
|
|
26
|
+
yield chunk.choices[0].delta.content
|
|
27
|
+
|
|
28
|
+
# 如果有思考内容
|
|
29
|
+
elif hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
|
|
30
|
+
# 如果这是第一次输出思考内容,打印开始标记
|
|
31
|
+
if not is_thinking:
|
|
32
|
+
yield start_mark
|
|
33
|
+
is_thinking = True
|
|
34
|
+
|
|
35
|
+
yield chunk.choices[0].delta.reasoning_content
|
|
36
|
+
|
|
37
|
+
# 确保思考内容结束后有结束标记
|
|
38
|
+
if is_thinking:
|
|
39
|
+
yield end_mark
|
|
40
|
+
|
|
41
|
+
async def stream_with_thinking_async(response):
|
|
42
|
+
"""
|
|
43
|
+
Process an OpenAI async streaming response that may contain regular content and reasoning_content.
|
|
44
|
+
Returns an async generator that yields the formatted output.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
response: An OpenAI async streaming response
|
|
48
|
+
|
|
49
|
+
Yields:
|
|
50
|
+
str: Formatted output with thinking sections marked
|
|
51
|
+
"""
|
|
52
|
+
start_mark = "<thinking>\n"
|
|
53
|
+
end_mark = "\n</thinking>\n"
|
|
54
|
+
is_thinking = False # 跟踪我们是否在输出思考内容
|
|
55
|
+
|
|
56
|
+
async for chunk in response:
|
|
57
|
+
# 如果有常规内容
|
|
58
|
+
if hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
|
|
59
|
+
# 如果我们之前在输出思考内容,需要先结束思考部分
|
|
60
|
+
if is_thinking:
|
|
61
|
+
yield end_mark
|
|
62
|
+
is_thinking = False
|
|
63
|
+
|
|
64
|
+
yield chunk.choices[0].delta.content
|
|
65
|
+
|
|
66
|
+
# 如果有思考内容
|
|
67
|
+
elif hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
|
|
68
|
+
# 如果这是第一次输出思考内容,打印开始标记
|
|
69
|
+
if not is_thinking:
|
|
70
|
+
yield start_mark
|
|
71
|
+
is_thinking = True
|
|
72
|
+
|
|
73
|
+
yield chunk.choices[0].delta.reasoning_content
|
|
74
|
+
|
|
75
|
+
# 确保思考内容结束后有结束标记
|
|
76
|
+
if is_thinking:
|
|
77
|
+
yield end_mark
|
|
78
|
+
|
|
79
|
+
def process_streaming_response(response):
|
|
80
|
+
"""
|
|
81
|
+
Process an OpenAI streaming response, detecting whether it's a regular or async generator.
|
|
82
|
+
If using the async version, you must use this with await in an async context.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
response: An OpenAI streaming response
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
A generator or async generator that yields formatted output
|
|
89
|
+
"""
|
|
90
|
+
if inspect.isasyncgen(response):
|
|
91
|
+
return stream_with_thinking_async(response)
|
|
92
|
+
else:
|
|
93
|
+
return stream_with_thinking(response)
|
|
94
|
+
|
|
95
|
+
def print_streaming_response(response):
|
|
96
|
+
"""
|
|
97
|
+
Print a streaming response with thinking sections clearly marked.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
response: An OpenAI streaming response
|
|
101
|
+
"""
|
|
102
|
+
for text in stream_with_thinking(response):
|
|
103
|
+
print(text, end="", flush=True)
|
|
104
|
+
|
|
105
|
+
async def print_streaming_response_async(response):
|
|
106
|
+
"""
|
|
107
|
+
Print an async streaming response with thinking sections clearly marked.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
response: An OpenAI async streaming response
|
|
111
|
+
"""
|
|
112
|
+
async for text in stream_with_thinking_async(response):
|
|
113
|
+
print(text, end="", flush=True)
|
|
114
|
+
|
|
115
|
+
def separate_stream_thinking(response):
|
|
116
|
+
"""
|
|
117
|
+
Process an OpenAI streaming response and return two separate generators:
|
|
118
|
+
one for thinking content and one for normal content.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
response: An OpenAI streaming response (generator)
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
tuple: (thinking_generator, content_generator)
|
|
125
|
+
"""
|
|
126
|
+
pending_content_chunk = None
|
|
127
|
+
|
|
128
|
+
def thinking_generator():
|
|
129
|
+
nonlocal pending_content_chunk
|
|
130
|
+
|
|
131
|
+
for chunk in response:
|
|
132
|
+
# If we have thinking content
|
|
133
|
+
if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
|
|
134
|
+
yield chunk.choices[0].delta.reasoning_content
|
|
135
|
+
# If we have regular content, store it but don't consume more than one chunk
|
|
136
|
+
elif hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
|
|
137
|
+
pending_content_chunk = chunk
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
def content_generator():
|
|
141
|
+
nonlocal pending_content_chunk
|
|
142
|
+
|
|
143
|
+
# First yield any pending content chunk from the thinking generator
|
|
144
|
+
if pending_content_chunk is not None:
|
|
145
|
+
yield pending_content_chunk.choices[0].delta.content
|
|
146
|
+
pending_content_chunk = None
|
|
147
|
+
|
|
148
|
+
# Continue with the rest of the response
|
|
149
|
+
for chunk in response:
|
|
150
|
+
if chunk.choices[0].delta.content:
|
|
151
|
+
yield chunk.choices[0].delta.content
|
|
152
|
+
|
|
153
|
+
return thinking_generator(), content_generator()
|
|
154
|
+
|
|
155
|
+
async def separate_stream_thinking_async(response):
|
|
156
|
+
"""
|
|
157
|
+
Process an OpenAI async streaming response and return two separate async generators:
|
|
158
|
+
one for thinking content and one for normal content.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
response: An OpenAI async streaming response
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
tuple: (thinking_generator, content_generator)
|
|
165
|
+
"""
|
|
166
|
+
pending_content_chunk = None
|
|
167
|
+
|
|
168
|
+
async def thinking_generator():
|
|
169
|
+
nonlocal pending_content_chunk
|
|
170
|
+
|
|
171
|
+
async for chunk in response:
|
|
172
|
+
# If we have thinking content
|
|
173
|
+
if hasattr(chunk.choices[0].delta, 'reasoning_content') and chunk.choices[0].delta.reasoning_content:
|
|
174
|
+
yield chunk.choices[0].delta.reasoning_content
|
|
175
|
+
# If we have regular content, store it but don't consume more than one chunk
|
|
176
|
+
elif hasattr(chunk.choices[0].delta, 'content') and chunk.choices[0].delta.content:
|
|
177
|
+
pending_content_chunk = chunk
|
|
178
|
+
break
|
|
179
|
+
|
|
180
|
+
async def content_generator():
|
|
181
|
+
nonlocal pending_content_chunk
|
|
182
|
+
|
|
183
|
+
# First yield any pending content chunk from the thinking generator
|
|
184
|
+
if pending_content_chunk is not None:
|
|
185
|
+
yield pending_content_chunk.choices[0].delta.content
|
|
186
|
+
pending_content_chunk = None
|
|
187
|
+
|
|
188
|
+
# Continue with the rest of the response
|
|
189
|
+
async for chunk in response:
|
|
190
|
+
if chunk.choices[0].delta.content:
|
|
191
|
+
yield chunk.choices[0].delta.content
|
|
192
|
+
|
|
193
|
+
return thinking_generator(), content_generator()
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.280"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|