auto-coder 0.1.278__py3-none-any.whl → 0.1.280__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.278
3
+ Version: 0.1.280
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -26,7 +26,7 @@ Requires-Dist: tabulate
26
26
  Requires-Dist: jupyter-client
27
27
  Requires-Dist: prompt-toolkit
28
28
  Requires-Dist: tokenizers
29
- Requires-Dist: byzerllm[saas] >=0.1.169
29
+ Requires-Dist: byzerllm[saas] >=0.1.170
30
30
  Requires-Dist: patch
31
31
  Requires-Dist: diff-match-patch
32
32
  Requires-Dist: GitPython
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
12
12
  autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
13
13
  autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
14
14
  autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
15
- autocoder/version.py,sha256=Q3OPt1PiXvKx5xTUCPxUFZP2zsuajLbxdpiOEm_K2L4,23
15
+ autocoder/version.py,sha256=mNnPow60dgdANkDcEoYTXr9_lpoMQZSEy1-LRu7QFHs,23
16
16
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
18
18
  autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
@@ -107,15 +107,16 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
107
107
  autocoder/privacy/model_filter.py,sha256=-N9ZvxxDKpxU7hkn-tKv-QHyXjvkCopUaKgvJwTOGQs,3369
108
108
  autocoder/pyproject/__init__.py,sha256=ms-A_pocgGv0oZPEW8JAdXi7G-VSVhkQ6CnWFe535Ec,14477
109
109
  autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
- autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
111
- autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
110
+ autocoder/rag/api_server.py,sha256=xiypCkdbclY0Z3Cmq5FTvtKrfQUV7yKcDaFFUttA2n0,7242
111
+ autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
112
112
  autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
113
- autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
114
- autocoder/rag/long_context_rag.py,sha256=qFlNmbgQnstCSCb0SxfkMEYtZRr8p6YEc6u0jpve4Q0,32002
113
+ autocoder/rag/lang.py,sha256=TVNx5m7OtBcdfahzI29tMj9m1yrEm32G1c1zc4ZNIPs,3130
114
+ autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
115
+ autocoder/rag/long_context_rag.py,sha256=3CAlf7GM-LgewS5j9XGKvsKSO4MM6M8TTkKxAGzqVY0,39308
115
116
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
116
117
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
117
118
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
118
- autocoder/rag/relevant_utils.py,sha256=tgTKGbojCrxuZ7dKbyPh2rCw9TIhwE6ltRxJosaA97U,1267
119
+ autocoder/rag/relevant_utils.py,sha256=tnv_g25DDWYPGT-mpfubIyZv86_g2gPXjM4FPvdeIEE,1739
119
120
  autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
120
121
  autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
121
122
  autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
@@ -159,15 +160,16 @@ autocoder/utils/queue_communicate.py,sha256=buyEzdvab1QA4i2QKbq35rG5v_9x9PWVLWWM
159
160
  autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1vmRciKdi4,2095
160
161
  autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
161
162
  autocoder/utils/rest.py,sha256=hLBhr78y-WVnV0oQf9Rxc22EwqF78KINkScvYa1MuYA,6435
163
+ autocoder/utils/stream_thinking.py,sha256=vbDObflBFW53eWEjMTEHf3nyL167_cqpDLh9zRx7Yk8,7015
162
164
  autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
163
165
  autocoder/utils/thread_utils.py,sha256=tv9fhFZOjI18AxVUJbpe_xjBGMpkqgDcOlz9pnDtNik,8583
164
166
  autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
167
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
168
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
167
169
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- auto_coder-0.1.278.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
169
- auto_coder-0.1.278.dist-info/METADATA,sha256=W8ANCnQ9mFraLjuXIUaggoO_jH5Yx-bmm4FNxHLfOvo,2643
170
- auto_coder-0.1.278.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
171
- auto_coder-0.1.278.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
172
- auto_coder-0.1.278.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
173
- auto_coder-0.1.278.dist-info/RECORD,,
170
+ auto_coder-0.1.280.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
171
+ auto_coder-0.1.280.dist-info/METADATA,sha256=SDBMvUk6v6YP7RSwlAWHFGfa3LTOUj3fky1Yz0hlFB0,2643
172
+ auto_coder-0.1.280.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
173
+ auto_coder-0.1.280.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
174
+ auto_coder-0.1.280.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
175
+ auto_coder-0.1.280.dist-info/RECORD,,
@@ -103,9 +103,7 @@ async def create_chat_completion(
103
103
  NOTE: Currently we do not support the following features:
104
104
  - function_call (Users should implement this by themselves)
105
105
  - logit_bias (to be supported by vLLM engine)
106
- """
107
- # async with async_timeout.timeout(TIMEOUT):
108
-
106
+ """
109
107
  generator = await openai_serving_chat.create_chat_completion(body, request)
110
108
  if isinstance(generator, ErrorResponse):
111
109
  return JSONResponse(
@@ -1,13 +1,15 @@
1
1
  import time
2
- from typing import List, Dict, Optional
2
+ from typing import List, Dict, Optional, Generator, Tuple
3
3
  from loguru import logger
4
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from autocoder.rag.lang import get_message_with_format_and_newline
5
6
 
6
7
  from autocoder.rag.relevant_utils import (
7
8
  parse_relevance,
8
9
  FilterDoc,
9
10
  TaskTiming,
10
- DocFilterResult
11
+ DocFilterResult,
12
+ ProgressUpdate
11
13
  )
12
14
 
13
15
  from autocoder.common import SourceCode, AutoCoderArgs
@@ -49,6 +51,7 @@ def _check_relevance_with_conversation(
49
51
  其中, <relevant> 是你认为文档中和问题的相关度,0-10之间的数字,数字越大表示相关度越高。
50
52
  """
51
53
 
54
+
52
55
  class DocFilter:
53
56
  def __init__(
54
57
  self,
@@ -73,10 +76,10 @@ class DocFilter:
73
76
  ) -> DocFilterResult:
74
77
  return self.filter_docs_with_threads(conversations, documents)
75
78
 
76
- def filter_docs_with_threads(
79
+ def filter_docs_with_progress(
77
80
  self, conversations: List[Dict[str, str]], documents: List[SourceCode]
78
- ) -> DocFilterResult:
79
-
81
+ ) -> Generator[Tuple[ProgressUpdate, Optional[DocFilterResult]], None, DocFilterResult]:
82
+ """使用线程过滤文档,同时产生进度更新"""
80
83
  start_time = time.time()
81
84
  logger.info(f"=== DocFilter Starting ===")
82
85
  logger.info(
@@ -93,6 +96,16 @@ class DocFilter:
93
96
  relevant_count = 0
94
97
  model_name = self.recall_llm.default_model_name or "unknown"
95
98
 
99
+ doc_filter_result = DocFilterResult(
100
+ docs=[],
101
+ raw_docs=[],
102
+ input_tokens_counts=[],
103
+ generated_tokens_counts=[],
104
+ durations=[],
105
+ model_name=model_name
106
+ )
107
+ relevant_docs = doc_filter_result.docs
108
+
96
109
  with ThreadPoolExecutor(
97
110
  max_workers=self.args.index_filter_workers or 5
98
111
  ) as executor:
@@ -141,16 +154,19 @@ class DocFilter:
141
154
  logger.info(
142
155
  f"Submitted {submitted_tasks} document filtering tasks to thread pool")
143
156
 
157
+ # 发送初始进度更新
158
+ yield (ProgressUpdate(
159
+ phase="doc_filter",
160
+ completed=0,
161
+ total=len(documents),
162
+ relevant_count=0,
163
+ message=get_message_with_format_and_newline(
164
+ "doc_filter_start",
165
+ total=len(documents)
166
+ )
167
+ ), None)
168
+
144
169
  # 处理完成的任务
145
- doc_filter_result = DocFilterResult(
146
- docs=[],
147
- raw_docs=[],
148
- input_tokens_counts=[],
149
- generated_tokens_counts=[],
150
- durations=[],
151
- model_name=model_name
152
- )
153
- relevant_docs = doc_filter_result.docs
154
170
  for future in as_completed(list(future_to_doc.keys())):
155
171
  try:
156
172
  doc, submit_time = future_to_doc[future]
@@ -194,32 +210,50 @@ class DocFilter:
194
210
  f"\n - Timing: Duration={task_timing.duration:.2f}s, Processing={task_timing.real_duration:.2f}s, Queue={queue_time:.2f}s"
195
211
  f"\n - Response: {v}"
196
212
  )
197
-
213
+
198
214
  if "rag" not in doc.metadata:
199
215
  doc.metadata["rag"] = {}
200
216
  doc.metadata["rag"]["recall"] = {
201
217
  "input_tokens_count": input_tokens_count,
202
218
  "generated_tokens_count": generated_tokens_count,
203
219
  "recall_model": model_name,
204
- "duration": task_timing.real_duration
220
+ "duration": task_timing.real_duration
205
221
  }
206
-
207
- doc_filter_result.input_tokens_counts.append(input_tokens_count)
208
- doc_filter_result.generated_tokens_counts.append(generated_tokens_count)
209
- doc_filter_result.durations.append(task_timing.real_duration)
210
-
222
+
223
+ doc_filter_result.input_tokens_counts.append(
224
+ input_tokens_count)
225
+ doc_filter_result.generated_tokens_counts.append(
226
+ generated_tokens_count)
227
+ doc_filter_result.durations.append(
228
+ task_timing.real_duration)
229
+
211
230
  new_filter_doc = FilterDoc(
212
- source_code=doc,
213
- relevance=relevance,
214
- task_timing=task_timing,
215
- )
216
-
231
+ source_code=doc,
232
+ relevance=relevance,
233
+ task_timing=task_timing,
234
+ )
235
+
217
236
  doc_filter_result.raw_docs.append(new_filter_doc)
218
237
 
219
238
  if is_relevant:
220
239
  relevant_docs.append(
221
240
  new_filter_doc
222
241
  )
242
+
243
+ # 产生进度更新
244
+ yield (ProgressUpdate(
245
+ phase="doc_filter",
246
+ completed=completed_tasks,
247
+ total=len(documents),
248
+ relevant_count=relevant_count,
249
+ message=get_message_with_format_and_newline(
250
+ "doc_filter_progress",
251
+ progress_percent=progress_percent,
252
+ relevant_count=relevant_count,
253
+ total=len(documents)
254
+ )
255
+ ), None)
256
+
223
257
  except Exception as exc:
224
258
  try:
225
259
  doc, submit_time = future_to_doc[future]
@@ -236,7 +270,7 @@ class DocFilter:
236
270
  FilterDoc(
237
271
  source_code=doc,
238
272
  relevance=None,
239
- task_timing=TaskTiming(),
273
+ task_timing=TaskTiming(),
240
274
  )
241
275
  )
242
276
  except Exception as e:
@@ -244,6 +278,18 @@ class DocFilter:
244
278
  f"Document filtering error in task tracking: {exc}"
245
279
  )
246
280
 
281
+ # 报告错误进度
282
+ yield (ProgressUpdate(
283
+ phase="doc_filter",
284
+ completed=completed_tasks,
285
+ total=len(documents),
286
+ relevant_count=relevant_count,
287
+ message=get_message_with_format_and_newline(
288
+ "doc_filter_error",
289
+ error=str(exc)
290
+ )
291
+ ), None)
292
+
247
293
  # Sort relevant_docs by relevance score in descending order
248
294
  relevant_docs.sort(
249
295
  key=lambda x: x.relevance.relevant_score, reverse=True)
@@ -254,7 +300,7 @@ class DocFilter:
254
300
  doc.task_timing.real_duration for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
255
301
  avg_queue_time = sum(doc.task_timing.real_start_time -
256
302
  doc.task_timing.submit_time for doc in relevant_docs) / len(relevant_docs) if relevant_docs else 0
257
-
303
+
258
304
  total_input_tokens = sum(doc_filter_result.input_tokens_counts)
259
305
  total_generated_tokens = sum(doc_filter_result.generated_tokens_counts)
260
306
 
@@ -278,4 +324,33 @@ class DocFilter:
278
324
  else:
279
325
  logger.warning("No relevant documents found!")
280
326
 
281
- return doc_filter_result
327
+ # 返回最终结果
328
+ yield (ProgressUpdate(
329
+ phase="doc_filter",
330
+ completed=len(documents),
331
+ total=len(documents),
332
+ relevant_count=relevant_count,
333
+ message=get_message_with_format_and_newline(
334
+ "doc_filter_complete",
335
+ total_time=total_time,
336
+ relevant_count=relevant_count
337
+ )
338
+ ), doc_filter_result)
339
+
340
+ def filter_docs_with_threads(
341
+ self, conversations: List[Dict[str, str]], documents: List[SourceCode]
342
+ ) -> DocFilterResult:
343
+ # 保持兼容性的接口
344
+ for _, result in self.filter_docs_with_progress(conversations, documents):
345
+ if result is not None:
346
+ return result
347
+
348
+ # 这是一个应急情况,不应该到达这里
349
+ return DocFilterResult(
350
+ docs=[],
351
+ raw_docs=[],
352
+ input_tokens_counts=[],
353
+ generated_tokens_counts=[],
354
+ durations=[],
355
+ model_name=self.recall_llm.default_model_name or "unknown"
356
+ )
autocoder/rag/lang.py ADDED
@@ -0,0 +1,50 @@
1
+ import locale
2
+ from byzerllm.utils import format_str_jinja2
3
+
4
+ MESSAGES = {
5
+ "en": {
6
+ "rag_error_title": "RAG Error",
7
+ "rag_error_message": "Failed to generate response: {{error}}",
8
+ "rag_searching_docs": "Searching documents with {{model}}...",
9
+ "rag_docs_filter_result": "{{model}} processed {{docs_num}} documents, cost {{filter_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}",
10
+ "dynamic_chunking_start": "Dynamic chunking start with {{model}}",
11
+ "dynamic_chunking_result": "Dynamic chunking result with {{model}}, first round cost {{first_round_time}} seconds, second round cost {{sencond_round_time}} seconds, input tokens: {{input_tokens}}, output tokens: {{output_tokens}}, first round full docs: {{first_round_full_docs}}, second round extracted docs: {{second_round_extracted_docs}}",
12
+ "send_to_model": "Send to model {{model}} with {{tokens}} tokens",
13
+ "doc_filter_start": "Document filtering start, total {{total}} documents",
14
+ "doc_filter_progress": "Document filtering progress: {{progress_percent}}% processed {{relevant_count}}/{{total}} documents",
15
+ "doc_filter_error": "Document filtering error: {{error}}",
16
+ "doc_filter_complete": "Document filtering complete, cost {{total_time}} seconds, found {{relevant_count}} relevant documents"
17
+ },
18
+ "zh": {
19
+ "rag_error_title": "RAG 错误",
20
+ "rag_error_message": "生成响应失败: {{error}}",
21
+ "rag_searching_docs": "正在使用 {{model}} 搜索文档...",
22
+ "rag_docs_filter_result": "{{model}} 处理了 {{docs_num}} 个文档, 耗时 {{filter_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}",
23
+ "dynamic_chunking_start": "使用 {{model}} 进行动态分块",
24
+ "dynamic_chunking_result": "使用 {{model}} 进行动态分块, 第一轮耗时 {{first_round_time}} 秒, 第二轮耗时 {{sencond_round_time}} 秒, 输入 tokens: {{input_tokens}}, 输出 tokens: {{output_tokens}}, 第一轮全量文档: {{first_round_full_docs}}, 第二轮提取文档: {{second_round_extracted_docs}}",
25
+ "send_to_model": "发送给模型 {{model}} 的 tokens 数量预估为 {{tokens}}",
26
+ "doc_filter_start": "开始过滤文档,共 {{total}} 个文档",
27
+ "doc_filter_progress": "文档过滤进度:{{progress_percent}}%,处理了 {{relevant_count}}/{{total}} 个文档",
28
+ "doc_filter_error": "文档过滤错误:{{error}}",
29
+ "doc_filter_complete": "文档过滤完成,耗时 {{total_time}} 秒,找到 {{relevant_count}} 个相关文档"
30
+ }
31
+ }
32
+
33
+
34
+ def get_system_language():
35
+ try:
36
+ return locale.getdefaultlocale()[0][:2]
37
+ except:
38
+ return 'en'
39
+
40
+
41
+ def get_message(key):
42
+ lang = get_system_language()
43
+ return MESSAGES.get(lang, MESSAGES['en']).get(key, MESSAGES['en'][key])
44
+
45
+
46
+ def get_message_with_format(msg_key: str, **kwargs):
47
+ return format_str_jinja2(get_message(msg_key), **kwargs)
48
+
49
+ def get_message_with_format_and_newline(msg_key: str, **kwargs):
50
+ return format_str_jinja2(get_message(msg_key), **kwargs) + "\n"
@@ -1,61 +1,91 @@
1
- from typing import Any, Dict, List, Optional, Union,Callable
2
- from byzerllm.utils.client.types import (
1
+ from typing import Any, Dict, List, Optional, Union, Callable
2
+ from byzerllm.utils.client.types import (
3
3
  LLMFunctionCallResponse,
4
- LLMClassResponse,LLMResponse
4
+ LLMClassResponse, LLMResponse
5
5
  )
6
6
  import pydantic
7
7
  from byzerllm import ByzerLLM
8
8
  from byzerllm.utils.client import LLMResponse
9
9
  from byzerllm.utils.types import SingleOutputMeta
10
10
  from autocoder.rag.simple_rag import SimpleRAG
11
+ from autocoder.rag.long_context_rag import LongContextRAG
11
12
  from loguru import logger
12
13
  from byzerllm.utils.langutil import asyncfy_with_semaphore
13
14
 
15
+
14
16
  class LLWrapper:
15
17
 
16
- def __init__(self,llm:ByzerLLM,rag:SimpleRAG):
18
+ def __init__(self, llm: ByzerLLM, rag: Union[SimpleRAG, LongContextRAG]):
17
19
  self.llm = llm
18
20
  self.rag = rag
19
21
 
20
22
  def chat_oai(self,
21
23
  conversations,
22
- tools:List[Union[Callable,str]]=[],
23
- tool_choice:Optional[Union[Callable,str]]=None,
24
- execute_tool:bool=False,
25
- impl_func:Optional[Callable]=None,
26
- execute_impl_func:bool=False,
27
- impl_func_params:Optional[Dict[str,Any]]=None,
28
- func_params:Optional[Dict[str,Any]]=None,
29
- response_class:Optional[Union[pydantic.BaseModel,str]] = None,
30
- response_after_chat:Optional[Union[pydantic.BaseModel,str]] = False,
31
- enable_default_sys_message:bool=True,
32
- model:Optional[str] = None,
33
- role_mapping=None,llm_config:Dict[str,Any]={}
34
- )->Union[List[LLMResponse],List[LLMFunctionCallResponse],List[LLMClassResponse]]:
35
- res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
36
- s = "".join(res)
37
- return [LLMResponse(output=s,metadata={},input="")]
38
-
39
- def stream_chat_oai(self,conversations,
40
- model:Optional[str]=None,
41
- role_mapping=None,
42
- delta_mode=False,
43
- llm_config:Dict[str,Any]={}):
44
- res,contexts = self.rag.stream_chat_oai(conversations,llm_config=llm_config)
45
- for t in res:
46
- yield (t,SingleOutputMeta(0,0))
47
-
48
-
49
- async def async_stream_chat_oai(self,conversations,
50
- model:Optional[str]=None,
24
+ tools: List[Union[Callable, str]] = [],
25
+ tool_choice: Optional[Union[Callable, str]] = None,
26
+ execute_tool: bool = False,
27
+ impl_func: Optional[Callable] = None,
28
+ execute_impl_func: bool = False,
29
+ impl_func_params: Optional[Dict[str, Any]] = None,
30
+ func_params: Optional[Dict[str, Any]] = None,
31
+ response_class: Optional[Union[pydantic.BaseModel, str]] = None,
32
+ response_after_chat: Optional[Union[pydantic.BaseModel, str]] = False,
33
+ enable_default_sys_message: bool = True,
34
+ model: Optional[str] = None,
35
+ role_mapping=None,
36
+ llm_config: Dict[str, Any] = {},
37
+ only_return_prompt: bool = False,
38
+ extra_request_params: Dict[str, Any] = {}
39
+ ) -> Union[List[LLMResponse], List[LLMFunctionCallResponse], List[LLMClassResponse]]:
40
+ res, contexts = self.rag.stream_chat_oai(
41
+ conversations, llm_config=llm_config, extra_request_params=extra_request_params)
42
+ metadata = {"request_id":""}
43
+ output = ""
44
+ for chunk in res:
45
+ output += chunk[0]
46
+ metadata["input_tokens_count"] = chunk[1].input_tokens_count
47
+ metadata["generated_tokens_count"] = chunk[1].generated_tokens_count
48
+ metadata["reasoning_content"] = chunk[1].reasoning_content
49
+ metadata["finish_reason"] = chunk[1].finish_reason
50
+ metadata["first_token_time"] = chunk[1].first_token_time
51
+
52
+ return [LLMResponse(output=output, metadata=metadata, input="")]
53
+
54
+ def stream_chat_oai(self, conversations,
55
+ model: Optional[str] = None,
51
56
  role_mapping=None,
52
57
  delta_mode=False,
53
- llm_config:Dict[str,Any]={}):
54
- res,contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations,llm_config=llm_config))()
58
+ llm_config: Dict[str, Any] = {},
59
+ extra_request_params: Dict[str, Any] = {}
60
+ ):
61
+ res, contexts = self.rag.stream_chat_oai(
62
+ conversations, llm_config=llm_config, extra_request_params=extra_request_params)
63
+
64
+ if isinstance(res, tuple):
65
+ for (t, metadata) in res:
66
+ yield (t, SingleOutputMeta(
67
+ input_tokens_count=metadata.get("input_tokens_count", 0),
68
+ generated_tokens_count=metadata.get(
69
+ "generated_tokens_count", 0),
70
+ reasoning_content=metadata.get("reasoning_content", ""),
71
+ finish_reason=metadata.get("finish_reason", "stop"),
72
+ first_token_time=metadata.get("first_token_time", 0)
73
+ ))
74
+ else:
75
+ for t in res:
76
+ yield (t, SingleOutputMeta(0, 0))
77
+
78
+ async def async_stream_chat_oai(self, conversations,
79
+ model: Optional[str] = None,
80
+ role_mapping=None,
81
+ delta_mode=False,
82
+ llm_config: Dict[str, Any] = {},
83
+ extra_request_params: Dict[str, Any] = {}
84
+ ):
85
+ res, contexts = await asyncfy_with_semaphore(lambda: self.rag.stream_chat_oai(conversations, llm_config=llm_config, extra_request_params=extra_request_params))()
55
86
  # res,contexts = await self.llm.async_stream_chat_oai(conversations,llm_config=llm_config)
56
- for t in res:
57
- yield (t,SingleOutputMeta(0,0))
58
-
87
+ for t in res:
88
+ yield t
59
89
 
60
- def __getattr__(self, name):
61
- return getattr(self.llm, name)
90
+ def __getattr__(self, name):
91
+ return getattr(self.llm, name)