kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1285 @@
1
+ import asyncio
2
+ import json
3
+ import re
4
+ from typing import Optional
5
+
6
+ from dolphin.core.common.exceptions import ModelException
7
+ import aiohttp
8
+
9
+ from dolphin.core.common.enums import Messages
10
+ from dolphin.core.config.global_config import TypeAPI
11
+ from dolphin.core.context.context import Context
12
+ from dolphin.core.llm.llm import LLMModelFactory, LLMOpenai
13
+ from dolphin.core.config.global_config import (
14
+ LLMInstanceConfig,
15
+ ContextConstraints,
16
+ )
17
+ from dolphin.core.message.compressor import MessageCompressor
18
+ from dolphin.core.common.constants import (
19
+ CHINESE_CHAR_TO_TOKEN_RATIO,
20
+ COUNT_TO_PROVE_DUPLICATE_OUTPUT,
21
+ DUPLICATE_PATTERN_LENGTH,
22
+ MIN_LENGTH_TO_DETECT_DUPLICATE_OUTPUT,
23
+ count_overlapping_occurrences,
24
+ get_msg_duplicate_output,
25
+ )
26
+ from dolphin.core.logging.logger import console
27
+ from dolphin.core.llm.message_sanitizer import sanitize_and_log
28
+
29
+ """1. Calculate token usage and update the global variable pool; if the usage variable does not exist in the global variable pool, add it first.
30
+ 2. Uniformly call models supported by the anydata model factory:
31
+ Supports streaming and non-streaming (mf_chat, mf_chat_stream)
32
+ 3. Call the DeepSeek native interface:
33
+ Supports streaming and non-streaming (deepseek_chat, deepseek_chat_stream)
34
+ Supports structured output (response_format=True/False); if structured output is to be used, the instruction must include the word "json"
35
+ """
36
+
37
+
38
+ class LLMClient:
39
+ Retry_Count = 3
40
+
41
+ def __init__(self, context: Context):
42
+ self.context = context
43
+ # Get compressor configuration from global configuration
44
+ global_config = self.context.get_config()
45
+ compressor_config = None
46
+ if hasattr(global_config, "message_compressor_config"):
47
+ compressor_config = global_config.message_compressor_config
48
+ elif hasattr(global_config, "context_engineer_config"):
49
+ compressor_config = global_config.context_engineer_config
50
+ assert compressor_config, "message_compressor_config/context_engineer_config is None"
51
+ # Initialize message compressor (originally ContextEngineer)
52
+ self.message_compressor = MessageCompressor(compressor_config, context)
53
+
54
+ @property
55
+ def config(self):
56
+ return self.context.get_config()
57
+
58
+ def get_model_config(self, model_name: Optional[str]) -> LLMInstanceConfig:
59
+ return self.config.get_model_config(model_name)
60
+
61
+ def set_context_strategy(self, strategy_name: str):
62
+ """Set the default context compression strategy"""
63
+ self.message_compressor.config.default_strategy = strategy_name
64
+
65
+ def register_context_strategy(self, name: str, strategy):
66
+ """Register a new context compression strategy"""
67
+ self.message_compressor.register_strategy(name, strategy)
68
+
69
+ def get_available_strategies(self) -> list:
70
+ """Get the list of available compression strategies"""
71
+ return self.message_compressor.get_available_strategies()
72
+
73
+ def set_context_constraints(self, constraints: ContextConstraints):
74
+ """Set context constraints"""
75
+ self.message_compressor.config.constraints = constraints
76
+
77
+ async def update_usage(self, final_chunk):
78
+ self.context.update_usage(final_chunk)
79
+
80
+ def check_error(self, line, model_name="unknown"):
81
+ try:
82
+ line_str = line.decode()
83
+ error = json.loads(line_str, strict=False)
84
+ raise Exception(f"LLM {model_name} request error: {error}")
85
+ except Exception as e:
86
+ console(f"check_error error: {e}")
87
+ raise e
88
+
89
+ # TTC Custom Prompts
90
+ def get_reflection_prompt(self, mode: str) -> str:
91
+ """Get prompts corresponding to different reflection modes"""
92
+ prompts = {
93
+ "反思": "请对你的回答进行反思,找出可能存在的问题或不足并改进(注意,有可能第一次回答就是正确的,此时重复你上一次的答案即可),回答需要忠于用户原始需求。最终只需要输出改进后的答案,不要输出任何其他内容。",
94
+ "验证": "请仔细检查你的回答是否正确,找出并修正所有可能的错误并改正(注意,有可能第一次回答就是正确的,此时重复你上一次的答案即可),回答需要忠于用户原始需求。注意,你只需要给出最终的正确答案,不要输出任何其他内容。",
95
+ "修正": "请重新审视你的回答,进行必要的修正和完善(注意,有可能第一次回答就是正确的,此时重复你上一次的答案即可),使其更加准确、全面和有条理。回答需要忠于用户原始需求。你只需要输出完善后的答案,不要输出任何其他内容。",
96
+ "精调": "请对你的回答进行精细调整,使表达更加清晰、内容更加丰富、逻辑更加严密。回答需要忠于用户原始需求。你只需要输出调整后的答案,不要输出任何其他内容。",
97
+ }
98
+ return prompts.get(mode, mode) # If it's a custom prompt, return directly
99
+
100
+ # Basic mf_chat_stream call, for internal use in TTC mode
101
+ async def _basic_mf_chat_stream(
102
+ self,
103
+ messages: Messages,
104
+ model=None,
105
+ temperature=None,
106
+ strategy_name=None,
107
+ **kwargs,
108
+ ):
109
+ """Basic streaming LLM call
110
+
111
+ Args:
112
+ messages: List of messages
113
+ model: Name of the model
114
+ temperature: Temperature parameter
115
+ strategy_name: Name of the compression strategy
116
+
117
+ Returns:
118
+ async generator yielding content chunks
119
+ """
120
+ model_config = self.get_model_config(model)
121
+
122
+ # Debug log: Records detailed request parameters
123
+ request_info = {
124
+ "model": model_config.model_name,
125
+ "temperature": (
126
+ temperature if temperature is not None else model_config.temperature
127
+ ),
128
+ "top_p": model_config.top_p,
129
+ "top_k": model_config.top_k,
130
+ "max_tokens": model_config.max_tokens,
131
+ "strategy_name": strategy_name,
132
+ "messages_count": len(messages) if messages else 0,
133
+ "messages_preview": [
134
+ {
135
+ "role": msg.role.value if hasattr(msg, 'role') else msg.get("role", "unknown"),
136
+ "content_preview": (
137
+ msg.get_content_preview() if hasattr(msg, 'get_content_preview')
138
+ else {"type": "text", "length": len(str(msg.get("content", "")))}
139
+ ),
140
+ }
141
+ for msg in (
142
+ messages[-3:] if messages else []
143
+ ) # Show preview of only the last 3 messages
144
+ ],
145
+ }
146
+ self.context.debug(f"LLM request started: {request_info}")
147
+
148
+ try:
149
+ # Use a message compressor to process messages, pass model_config so it can automatically adjust constraints
150
+ compression_result = self.message_compressor.compress_messages(
151
+ messages,
152
+ strategy_name=strategy_name,
153
+ model_config=model_config,
154
+ **kwargs,
155
+ )
156
+
157
+ # Sanitize messages for OpenAI compatibility
158
+ sanitized_messages = sanitize_and_log(
159
+ compression_result.compressed_messages.get_messages_as_dict(),
160
+ self.context.warn,
161
+ )
162
+
163
+ # Build request payload
164
+ payload = {
165
+ "model": model_config.model_name,
166
+ "temperature": (
167
+ temperature if temperature is not None else model_config.temperature
168
+ ),
169
+ "top_p": model_config.top_p,
170
+ "top_k": model_config.top_k,
171
+ "messages": sanitized_messages,
172
+ "max_tokens": model_config.max_tokens,
173
+ "stream": True,
174
+ }
175
+
176
+ line_json = {}
177
+ accu_content = ""
178
+ reasoning_content = ""
179
+
180
+ timeout = aiohttp.ClientTimeout(
181
+ total=1800, # Disable overall timeout (use with caution)
182
+ sock_connect=30, # Keep connection timeout
183
+ sock_read=300, # Single read timeout (for slow streaming data)
184
+ )
185
+ async with aiohttp.ClientSession(timeout=timeout) as session:
186
+ async with session.post(
187
+ model_config.api,
188
+ json=payload,
189
+ headers=model_config.headers,
190
+ ssl=False,
191
+ ) as response:
192
+ if response.status != 200:
193
+ error_str = await response.text()
194
+ # Error log: Records detailed request and error information
195
+ error_info = {
196
+ "status_code": response.status,
197
+ "model": model_config.model_name,
198
+ "api_endpoint": model_config.api,
199
+ "payload_summary": {
200
+ "model": payload.get("model"),
201
+ "temperature": payload.get("temperature"),
202
+ "messages_count": len(payload.get("messages", [])),
203
+ "max_tokens": payload.get("max_tokens"),
204
+ },
205
+ "error_response": (
206
+ error_str[:1000] if error_str else "No error details"
207
+ ), # Limit error message length
208
+ "request_headers": {
209
+ k: v
210
+ for k, v in model_config.headers.items()
211
+ if k.lower() not in ["authorization"]
212
+ }, # Filter sensitive information
213
+ }
214
+ self.context.error(f"LLM HTTP error: {error_info}")
215
+ raise RuntimeError(
216
+ f"LLM {model_config.model_name} request error (status {response.status}): {error_str}"
217
+ )
218
+ async for line in response.content:
219
+ if not line.startswith(b"data"):
220
+ if not line.strip():
221
+ continue
222
+ self.check_error(line, model_config.model_name)
223
+ continue
224
+ line_decoded = line.decode().split("data:", 1)[1]
225
+ if "[DONE]" in line_decoded:
226
+ break
227
+ try:
228
+ line_json = json.loads(line_decoded, strict=False)
229
+ except json.JSONDecodeError as e:
230
+ raise ValueError(
231
+ f"LLM {model_config.model_name} response JSON decode error: {line_decoded}"
232
+ ) from e
233
+ if line_json.get("choices"):
234
+ # Accumulate content
235
+ delta_content = (
236
+ line_json["choices"][0].get("delta", {}).get("content")
237
+ or ""
238
+ )
239
+ delta_reasoning = (
240
+ line_json["choices"][0]
241
+ .get("delta", {})
242
+ .get("reasoning_content")
243
+ or ""
244
+ )
245
+
246
+ accu_content += delta_content
247
+ reasoning_content += delta_reasoning
248
+
249
+ if line_json.get("usage") or line_json["choices"][0].get(
250
+ "usage"
251
+ ):
252
+ await self.update_usage(line_json)
253
+
254
+ yield {
255
+ "content": accu_content,
256
+ "reasoning_content": reasoning_content,
257
+ }
258
+
259
+ # Ensure that line_json is of dictionary type before calling the get method
260
+ if line_json.get("choices"):
261
+ await self.update_usage(line_json)
262
+ except aiohttp.ClientError as e:
263
+ # Error log: Records network connection errors
264
+ error_info = {
265
+ "error_type": "ClientError",
266
+ "model": model_config.model_name,
267
+ "api_endpoint": model_config.api,
268
+ "error_message": str(e),
269
+ "error_class": type(e).__name__,
270
+ "request_config": (
271
+ model_config.to_dict()
272
+ if hasattr(model_config, "to_dict")
273
+ else str(model_config)
274
+ ),
275
+ }
276
+ self.context.error(f"LLM client connection error: {error_info}")
277
+ raise ConnectionError(
278
+ f"LLM {model_config.model_name} connection error: {repr(e)}"
279
+ ) from e
280
+ except Exception as e:
281
+ # Error log: records other unexpected errors
282
+ error_info = {
283
+ "error_type": "UnexpectedError",
284
+ "model": model_config.model_name,
285
+ "error_message": str(e),
286
+ "error_class": type(e).__name__,
287
+ "request_config": (
288
+ model_config.to_dict()
289
+ if hasattr(model_config, "to_dict")
290
+ else str(model_config)
291
+ ),
292
+ }
293
+ self.context.error(f"LLM unexpected error: {error_info}")
294
+ raise
295
+
296
+ # Streaming, calling anydata model factory-supported models
297
+ async def mf_chat_stream(
298
+ self,
299
+ messages: Messages,
300
+ continous_content: Optional[str] = None,
301
+ model: Optional[str] = None,
302
+ temperature=None,
303
+ ttc_mode=None,
304
+ output_var=None,
305
+ lang_mode=None,
306
+ context_strategy=None,
307
+ no_cache=False,
308
+ **kwargs,
309
+ ):
310
+ """Stream LLM calls, supporting TTC (Test-Time Computing) mode
311
+
312
+ Args:
313
+ messages: List of messages
314
+ model: Name of the model; if not provided, use the default model
315
+ temperature: Temperature parameter controlling the randomness of the output
316
+ ttc_mode: TTC mode configuration, including parameters such as name and control_vars
317
+ output_var: Name of the output variable for storing results
318
+ lang_mode: Language mode, such as "prompt", "judge", or "explore"
319
+ context_strategy: Name of the context compression strategy, such as "truncation" or "sliding_window_10"
320
+
321
+ Returns:
322
+ async generator providing content chunks
323
+ """
324
+ # If TTC mode is not specified, directly call the base method.
325
+ if not ttc_mode:
326
+ async for chunk in self._chat_stream(
327
+ messages=messages,
328
+ model=model,
329
+ temperature=temperature,
330
+ continous_content=continous_content,
331
+ strategy_name=context_strategy,
332
+ no_cache=no_cache,
333
+ **kwargs,
334
+ ):
335
+ yield chunk
336
+ return
337
+
338
+ # Handle according to TTC mode name
339
+ ttc_name = ttc_mode.get("name", "")
340
+ if ttc_name == "self-reflection":
341
+ # Self-Reflection Mode
342
+ console(f"使用自我反思模式,参数: {ttc_mode}")
343
+
344
+ # Extract parameters from ttc_mode
345
+ control_vars = ttc_mode.get("control_vars", "反思") # Default uses "Reflection" mode
346
+ max_iterations = ttc_mode.get("max_iterations")
347
+ token_budget = ttc_mode.get("token_budget")
348
+ special_token = ttc_mode.get("special_token")
349
+
350
+ # Calling Self-Reflection Streaming Implementation
351
+
352
+ async for chunk in self.run_self_reflection_stream(
353
+ messages,
354
+ model,
355
+ control_vars,
356
+ max_iterations,
357
+ token_budget,
358
+ special_token,
359
+ output_var,
360
+ lang_mode,
361
+ ):
362
+ yield chunk
363
+
364
+ elif ttc_name == "bon":
365
+ # Best Choice Mode
366
+ console(f"使用最佳选择模式,参数: {ttc_mode}")
367
+
368
+ # Extract control variables and evaluate models
369
+ control_vars = ttc_mode.get("control_vars", [])
370
+ eval_str = ttc_mode.get("eval", "")
371
+
372
+ # Modify the extraction logic of eval_model
373
+ # Check whether it is in the llm-as-a-judge format and contains a specific model name
374
+ if "llm-as-a-judge(" in eval_str and ")" in eval_str:
375
+ # Extract the model name within parentheses
376
+ eval_model = eval_str.replace("llm-as-a-judge(", "").replace(")", "")
377
+ # If the extracted model name is empty, use the default model.
378
+ if not eval_model.strip():
379
+ eval_model = model
380
+ else:
381
+ # Not in LLM-as-a-judge format or model not specified, use default model
382
+ eval_model = model
383
+
384
+ # Best choice streaming implementation
385
+ async for chunk in self.run_bon_stream(
386
+ messages, control_vars, eval_model, output_var, lang_mode
387
+ ):
388
+ yield chunk
389
+
390
+ elif ttc_name == "majority-voting":
391
+ # Majority Voting Pattern
392
+ console(f"使用多数投票模式,参数: {ttc_mode}")
393
+
394
+ # Extract control variables and evaluate models
395
+ control_vars = ttc_mode.get("control_vars", [])
396
+ eval_str = ttc_mode.get("eval", "")
397
+
398
+ # Modify the extraction logic of eval_model to be consistent with bon mode.
399
+ if "llm-as-a-judge(" in eval_str and ")" in eval_str:
400
+ # Extract the model name within parentheses
401
+ eval_model = eval_str.replace("llm-as-a-judge(", "").replace(")", "")
402
+ # If the extracted model name is empty, use the default model.
403
+ if not eval_model.strip():
404
+ eval_model = model
405
+ else:
406
+ # Not in the LLM-as-a-judge format or no model specified, use default model
407
+ eval_model = model
408
+ # Majority Voting Streaming Implementation
409
+ async for chunk in self.run_majority_voting_stream(
410
+ messages, control_vars, eval_model, output_var, lang_mode
411
+ ):
412
+ yield chunk
413
+
414
+ else:
415
+ # Unknown TTC type, falling back to basic streaming call
416
+ console(f"未知的TTC模式类型: {ttc_name},使用基础流式调用作为后备方案")
417
+ async for chunk in self._chat_stream(
418
+ messages=messages,
419
+ model=model,
420
+ temperature=temperature,
421
+ continous_content=continous_content,
422
+ strategy_name=context_strategy,
423
+ ):
424
+ yield chunk
425
+
426
+ def mf_chat(
427
+ self,
428
+ messages: Messages,
429
+ model=None,
430
+ temperature=None,
431
+ ttc_mode=None,
432
+ output_var=None,
433
+ lang_mode=None,
434
+ context_strategy=None,
435
+ no_cache=False,
436
+ **kwargs,
437
+ ):
438
+ """Synchronously call LLM without streaming, implemented based on mf_chat_stream, supports TTC mode.
439
+
440
+ Args:
441
+ messages: List of messages
442
+ model: Name of the model, uses default model if not provided
443
+ temperature: Temperature parameter, controls randomness of output
444
+ ttc_mode: TTC mode configuration, including name, control_vars, etc.
445
+ output_var: Name of the output variable for storing results
446
+ lang_mode: Language mode, such as "prompt", "judge", or "explore"
447
+ context_strategy: Name of the context compression strategy
448
+
449
+ Returns:
450
+ string: Final content returned by LLM
451
+ """
452
+
453
+ async def get_result():
454
+ final_content = ""
455
+ # continous_content is for streaming, so it's None here.
456
+ async for chunk in self.mf_chat_stream(
457
+ messages=messages,
458
+ continous_content=None,
459
+ model=model,
460
+ temperature=temperature,
461
+ ttc_mode=ttc_mode,
462
+ output_var=output_var,
463
+ lang_mode=lang_mode,
464
+ context_strategy=context_strategy,
465
+ no_cache=no_cache,
466
+ **kwargs,
467
+ ):
468
+ if chunk and "content" in chunk:
469
+ final_content = chunk["content"]
470
+ return final_content
471
+
472
+ try:
473
+ loop = asyncio.get_running_loop()
474
+ except RuntimeError: # 'There is no current event loop...'
475
+ loop = None
476
+
477
+ if loop and loop.is_running():
478
+ # If there's a running loop, we need to avoid deadlock
479
+ # Create a new thread to run the coroutine
480
+ import concurrent.futures
481
+
482
+ def run_in_new_loop():
483
+ # Create a new event loop in this thread
484
+ new_loop = asyncio.new_event_loop()
485
+ asyncio.set_event_loop(new_loop)
486
+ try:
487
+ return new_loop.run_until_complete(get_result())
488
+ finally:
489
+ new_loop.close()
490
+
491
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
492
+ future = executor.submit(run_in_new_loop)
493
+ return future.result(timeout=60) # 60 second timeout
494
+ else:
495
+ # If there is no running loop, we can use asyncio.run()
496
+ return asyncio.run(get_result())
497
+
498
+ # Streaming self-reflection implementation
499
+ async def run_self_reflection_stream(
500
+ self,
501
+ messages,
502
+ model,
503
+ control_vars,
504
+ max_iterations=3,
505
+ token_budget=None,
506
+ special_token=None,
507
+ output_var=None,
508
+ lang_mode=None,
509
+ ):
510
+ """Streaming version of the self-reflective TTC pattern implementation
511
+
512
+ Args:
513
+ messages: List of messages
514
+ model: Name of the model
515
+ control_vars: Control variables, can be "reflection", "verification", "correction", "fine-tuning", or custom instructions
516
+ max_iterations: Maximum number of iterations, one of the termination conditions
517
+ token_budget: Token budget limit, one of the termination conditions
518
+ special_token: Special end marker, one of the termination conditions
519
+ output_var: Name of the output variable
520
+ lang_mode: Language mode, indicating prompt/judge/explore
521
+
522
+ Returns:
523
+ async generator, yielding content chunks
524
+ """
525
+ # Determine the actual termination condition
526
+ stop_condition = None
527
+ if max_iterations is not None:
528
+ stop_condition = "max_iterations"
529
+ elif token_budget is not None:
530
+ stop_condition = "token_budget"
531
+ elif special_token is not None:
532
+ stop_condition = "special_token"
533
+ else:
534
+ stop_condition = "max_iterations" # Default use of iteration count limit
535
+ max_iterations = 3 # Default 3 iterations
536
+
537
+ ttc_result = {
538
+ "processing": [], # Historical Information in Dialogue Form
539
+ "control_vars": stop_condition, # Ways to stop iteration
540
+ "ttc_mode": "self-reflection", # TTC Mode Name
541
+ "final_answer": "", # Final answer
542
+ }
543
+
544
+ # Set/Update Global Variables
545
+ ttc_var_name = f"{lang_mode}_ttc_mode_{output_var}"
546
+ if not self.context.get_var_value(ttc_var_name):
547
+ self.context.set_variable(ttc_var_name, ttc_result)
548
+ # yield empty content to notify the upper layer each time ttc_result is updated
549
+ yield {"content": "", "reasoning_content": ""}
550
+
551
+ # Use the original messages list to build the conversation history by appending messages
552
+ # To avoid modifying the original messages, we create a copy.
553
+ conversation_messages = messages.copy()
554
+
555
+ # Initialize ttc_result["processing"] as conversation history
556
+ ttc_result["processing"] = conversation_messages.copy()
557
+ self.context.set_variable(ttc_var_name, ttc_result)
558
+ yield {"content": "", "reasoning_content": ""}
559
+
560
+ # Get initial response
561
+ current_content = ""
562
+ current_reasoning = ""
563
+
564
+ # Initialize token counting - now calculates total token consumption for input and output
565
+ total_tokens = 0
566
+
567
+ # Streaming output of the initial response - using original messages
568
+ # Add a placeholder as the assistant's response for real-time updates
569
+ temp_assistant_msg = {"role": "assistant", "content": ""}
570
+ ttc_result["processing"] = conversation_messages.copy() + [temp_assistant_msg]
571
+ self.context.set_variable(ttc_var_name, ttc_result)
572
+
573
+ # The first call will definitely incur token usage - estimate the token count for all input messages
574
+ for msg in conversation_messages:
575
+ # Estimate the number of tokens per message (using a constant ratio of Chinese characters to tokens)
576
+
577
+ total_tokens += int(
578
+ len(msg.get("content", "")) / CHINESE_CHAR_TO_TOKEN_RATIO
579
+ )
580
+
581
+ async for chunk in self._basic_mf_chat_stream(conversation_messages, model):
582
+ current_content = chunk["content"]
583
+ current_reasoning = chunk.get("reasoning_content", "")
584
+
585
+ # Update the last message in ttc_result["processing"] in real time
586
+ temp_assistant_msg["content"] = current_content
587
+ ttc_result["processing"][-1] = temp_assistant_msg.copy()
588
+ self.context.set_variable(ttc_var_name, ttc_result)
589
+
590
+ # yield empty content to notify the upper layer each time ttc_result is updated
591
+ yield {"content": "", "reasoning_content": ""}
592
+
593
+ # First response ends, replacing the temporary placeholder with the official assistant message
594
+ conversation_messages.append({"role": "assistant", "content": current_content})
595
+ ttc_result["processing"] = conversation_messages.copy()
596
+ self.context.set_variable(ttc_var_name, ttc_result)
597
+ yield {"content": "", "reasoning_content": ""}
598
+
599
+ # Update the token count for the initial response
600
+ total_tokens += int(len(current_content) / CHINESE_CHAR_TO_TOKEN_RATIO)
601
+
602
+ # Construct reflection prompts based on control_vars
603
+ # control_vars can be "reflection", "verification", "revision", "fine-tuning", or custom instructions
604
+ reflection_prompt = self.get_reflection_prompt(control_vars)
605
+ iteration_count = 0
606
+ # Start iterating the reflection process
607
+ while True:
608
+ iteration_count += 1
609
+
610
+ # Check if the iteration count limit is reached (Termination condition 1: max_iterations)
611
+ if stop_condition == "max_iterations" and iteration_count > max_iterations:
612
+ # Set the final answer after the last iteration
613
+ ttc_result["final_answer"] = current_content
614
+ self.context.set_variable(ttc_var_name, ttc_result)
615
+ yield {"content": "", "reasoning_content": ""}
616
+ break
617
+
618
+ # Add reflection prompt as user message
619
+ conversation_messages.append({"role": "user", "content": reflection_prompt})
620
+ ttc_result["processing"] = conversation_messages.copy()
621
+ self.context.set_variable(ttc_var_name, ttc_result)
622
+ yield {"content": "", "reasoning_content": ""}
623
+
624
+ # Update token consumption - calculate tokens for all input messages
625
+ # Due to the lack of caching, the token consumption of the entire conversation history needs to be recalculated on each call.
626
+ input_tokens = 0
627
+ for msg in conversation_messages:
628
+ input_tokens += int(
629
+ len(msg.get("content", "")) / CHINESE_CHAR_TO_TOKEN_RATIO
630
+ )
631
+ total_tokens += input_tokens
632
+
633
+ # Check if the token budget has been exceeded (termination condition 2: token_budget)
634
+ if (
635
+ stop_condition == "token_budget"
636
+ and token_budget is not None
637
+ and total_tokens > token_budget
638
+ ):
639
+ # Exceeded token budget limit prematurely, set final answer and exit
640
+ ttc_result["final_answer"] = current_content
641
+ self.context.set_variable(ttc_var_name, ttc_result)
642
+ yield {"content": "", "reasoning_content": ""}
643
+ break
644
+
645
+ # Get reflection results - Add temporary placeholders for real-time updates
646
+ temp_assistant_msg = {"role": "assistant", "content": ""}
647
+ ttc_result["processing"] = conversation_messages.copy() + [
648
+ temp_assistant_msg
649
+ ]
650
+ self.context.set_variable(ttc_var_name, ttc_result)
651
+ yield {"content": "", "reasoning_content": ""}
652
+
653
+ # Stream the reflection results
654
+ reflection_result = ""
655
+ async for chunk in self._basic_mf_chat_stream(conversation_messages, model):
656
+ reflection_result = chunk["content"]
657
+
658
+ # Update the last message in ttc_result["processing"] in real time
659
+ temp_assistant_msg["content"] = reflection_result
660
+ ttc_result["processing"][-1] = temp_assistant_msg.copy()
661
+ self.context.set_variable(ttc_var_name, ttc_result)
662
+ # Yield empty content each time to notify the upper layer
663
+ yield {"content": "", "reasoning_content": ""}
664
+
665
+ # Reflection result output completed, updated to conversation_messages
666
+ conversation_messages.append(
667
+ {"role": "assistant", "content": reflection_result}
668
+ )
669
+ ttc_result["processing"] = conversation_messages.copy()
670
+ self.context.set_variable(ttc_var_name, ttc_result)
671
+ yield {"content": "", "reasoning_content": ""}
672
+
673
+ # Update the token count for reflection results - count only the output portion
674
+ output_tokens = int(len(reflection_result) / CHINESE_CHAR_TO_TOKEN_RATIO)
675
+ total_tokens += output_tokens
676
+
677
+ # Check if the token budget has been exceeded (termination condition 2: token_budget)
678
+ if (
679
+ stop_condition == "token_budget"
680
+ and token_budget is not None
681
+ and total_tokens > token_budget
682
+ ):
683
+ # Reached token budget limit, set final answer
684
+ ttc_result["final_answer"] = current_content
685
+ self.context.set_variable(ttc_var_name, ttc_result)
686
+ yield {"content": "", "reasoning_content": ""}
687
+ break
688
+
689
+ # Check whether it contains special tokens (termination condition 3: special_token)
690
+ if (
691
+ stop_condition == "special_token"
692
+ and special_token is not None
693
+ and special_token in reflection_result
694
+ ):
695
+ ttc_result["final_answer"] = current_content
696
+ self.context.set_variable(ttc_var_name, ttc_result)
697
+ yield {"content": "", "reasoning_content": ""}
698
+ break
699
+
700
+ # Update current content and reasoning - Reflective results directly become current content
701
+ current_content = reflection_result
702
+ current_reasoning = ""
703
+
704
+ # Set the final answer and return
705
+ ttc_result["final_answer"] = current_content
706
+ self.context.set_variable(ttc_var_name, ttc_result)
707
+
708
+ # Return the final result
709
+ yield {"content": current_content, "reasoning_content": current_reasoning}
710
+
711
+ async def run_bon_stream(
712
+ self, messages, control_vars, eval_model, output_var=None, lang_mode=None
713
+ ):
714
+ """Best-of-N TTC mode implementation with streaming, using concurrent methods to obtain candidate answers.
715
+
716
+ Args:
717
+ messages: List of messages
718
+ control_vars: Control variables, which can be a temperature list [0, 0.5, 1.0] or a model name list ["R1", "qwen-max", "deepseek-v3"]
719
+ eval_model: Evaluation model, usually extracted from "llm-as-a-judge(model name)"
720
+ output_var: Name of the output variable
721
+ lang_mode: Language mode, indicating prompt/judge/explore
722
+
723
+ Returns:
724
+ async generator providing content chunks
725
+ """
726
+
727
+ # Check the type of control_vars to determine whether it's a temperature change or a model change.
728
+ mode_type = (
729
+ "temperature"
730
+ if all(isinstance(x, (int, float)) for x in control_vars)
731
+ else "model"
732
+ )
733
+
734
+ # Initialize ttc_result, dynamically add key-value pairs for each model or temperature
735
+ ttc_result = {
736
+ "ttc_mode": "bon", # TTC Mode Name
737
+ "final_answer": "", # Final answer, generated by the evaluation model
738
+ }
739
+
740
+ # Set/Update Global Variables
741
+ ttc_var_name = f"{lang_mode}_ttc_mode_{output_var}"
742
+ self.context.set_variable(ttc_var_name, ttc_result)
743
+ # yield empty content to notify the upper layer every time ttc_result is updated
744
+ yield {"content": "", "reasoning_content": ""}
745
+
746
+ # Define an asynchronous function to retrieve a single candidate answer
747
+ async def get_candidate_answer(var, is_temperature=True):
748
+ response_content = ""
749
+ response_reasoning = ""
750
+ temp_key = str(var) if is_temperature else var
751
+ model_to_use = self.model_name if is_temperature else var
752
+ temperature_to_use = var if is_temperature else None
753
+
754
+ async for chunk in self._basic_mf_chat_stream(
755
+ messages, model_to_use, temperature_to_use
756
+ ):
757
+ response_content = chunk["content"]
758
+ response_reasoning = chunk.get("reasoning_content", "")
759
+
760
+ # Update ttc_result in real time
761
+ ttc_result[temp_key] = {
762
+ "content": response_content,
763
+ "reasoning_content": response_reasoning,
764
+ }
765
+ self.context.set_variable(ttc_var_name, ttc_result)
766
+
767
+ # Here you cannot directly use yield, because yield cannot be used in asyncio.gather
768
+
769
+ result = {
770
+ "content": response_content,
771
+ "reasoning_content": response_reasoning,
772
+ }
773
+ return result
774
+
775
+ # Get all candidate answers using asynchronous concurrency
776
+ is_temperature_mode = mode_type == "temperature"
777
+ tasks = [get_candidate_answer(var, is_temperature_mode) for var in control_vars]
778
+ # Notify the start of concurrent fetching of candidate answers
779
+ yield {"content": "", "reasoning_content": ""}
780
+ # Wait for all tasks to complete
781
+ candidates = await asyncio.gather(*tasks)
782
+ # Re-notification: Candidate answer retrieval completed
783
+ yield {"content": "", "reasoning_content": ""}
784
+
785
+ # Construct evaluation prompts, explicitly requiring the selection of the best answer and output in a fixed format.
786
+ candidate_texts = "\n\n".join(
787
+ [
788
+ f"候选答案 {i + 1}:\n{cand['content']}"
789
+ for i, cand in enumerate(candidates)
790
+ ]
791
+ )
792
+ """You are a fair judge. Please evaluate the following candidate answers and select the best one.
793
+
794
+ Original question: {messages[-1]["content"]}
795
+
796
+ {candidate_texts}
797
+
798
+ Please analyze the pros and cons of each candidate answer and choose the best answer.
799
+
800
+ Evaluation criteria:
801
+ 1. Accuracy and completeness of the response
802
+ 2. Clarity and logical structure of the response
803
+ 3. Usefulness and relevance of the response
804
+ 4. Innovation and depth of the response
805
+
806
+ **Please output your conclusion using the following fixed format**:
807
+
808
+ Analysis: [Your analysis of each answer]
809
+
810
+ Best answer number: [number, e.g., 1, 2, 3, etc., output only the number]
811
+
812
+ You must explicitly select one answer, and you must strictly follow the above format for output.
813
+ """
814
+
815
+ # Evaluate Candidate Answers
816
+ evaluation_messages = [{"role": "user", "content": evaluation_prompt}]
817
+
818
+ evaluation_result = ""
819
+ evaluation_reasoning = ""
820
+
821
+ async for chunk in self._basic_mf_chat_stream(evaluation_messages, eval_model):
822
+ evaluation_result = chunk["content"]
823
+ evaluation_reasoning = chunk.get("reasoning_content", "")
824
+
825
+ # Real-time update ttc_result - Modified to simultaneously save content and reasoning_content
826
+ ttc_result["eval_result"] = {
827
+ "content": evaluation_result,
828
+ "reasoning_content": evaluation_reasoning,
829
+ }
830
+ self.context.set_variable(ttc_var_name, ttc_result)
831
+
832
+ # yield empty content to notify the upper layer every time ttc_result is updated
833
+ yield {"content": "", "reasoning_content": ""}
834
+
835
+ # Extract the best answer index from evaluation results
836
+ best_idx = 0
837
+ try:
838
+ # Use a stricter format parsing logic - get content from the modified structure
839
+ evaluation_content = ttc_result["eval_result"]["content"]
840
+ if "最佳答案编号:" in evaluation_content:
841
+ parts = evaluation_content.split("最佳答案编号:")
842
+ best_str = parts[1].strip().split()[0].strip()
843
+ best_idx = int(best_str) - 1
844
+ elif "最佳答案编号:" in evaluation_content:
845
+ parts = evaluation_content.split("最佳答案编号:")
846
+ best_str = parts[1].strip().split()[0].strip()
847
+ best_idx = int(best_str) - 1
848
+
849
+ # Ensure the index is valid
850
+ if best_idx < 0 or best_idx >= len(candidates):
851
+ best_idx = 0
852
+ except:
853
+ # Use the first candidate answer when parsing fails
854
+ best_idx = 0
855
+
856
+ # Use the selected candidate answer directly as the final answer
857
+ final_answer = candidates[best_idx]["content"]
858
+ final_reasoning = candidates[best_idx].get("reasoning_content", "")
859
+
860
+ # Update TTC results
861
+ ttc_result["final_answer"] = final_answer
862
+ self.context.set_variable(ttc_var_name, ttc_result)
863
+ # yield empty content to notify the upper layer each time ttc_result is updated
864
+ yield {"content": "", "reasoning_content": ""}
865
+ # Only return the complete content at the final result.
866
+ yield {"content": final_answer, "reasoning_content": final_reasoning}
867
+
868
+ async def run_majority_voting_stream(
869
+ self, messages, control_vars, eval_model, output_var=None, lang_mode=None
870
+ ):
871
+ """Streaming version of Majority Voting TTC mode implementation, using concurrent methods to obtain multiple answers.
872
+
873
+ Args:
874
+ messages: List of messages
875
+ control_vars: Control variables, which can be a temperature list [0, 0.5, 1.0] or a model name list ["R1", "qwen-max", "deepseek-v3"]
876
+ eval_model: Evaluation model, usually extracted from "llm-as-a-judge(model name)"
877
+ output_var: Name of the output variable
878
+ lang_mode: Language mode, indicating prompt/judge/explore
879
+
880
+ Returns:
881
+ async generator providing content chunks
882
+ """
883
+ # Check the type of control_vars to determine whether it's a temperature change or a model change.
884
+ mode_type = (
885
+ "temperature"
886
+ if all(isinstance(x, (int, float)) for x in control_vars)
887
+ else "model"
888
+ )
889
+
890
+ # Initialize ttc_result, dynamically add key-value pairs for each model or temperature
891
+ ttc_result = {
892
+ "ttc_mode": "majority-voting", # TTC Mode Name
893
+ "final_answer": "", # Final answer, generated by the evaluation model
894
+ }
895
+
896
+ # Set/Update Global Variables
897
+ ttc_var_name = f"{lang_mode}_ttc_mode_{output_var}"
898
+ self.context.set_variable(ttc_var_name, ttc_result)
899
+ # yield empty content to notify the upper layer each time ttc_result is updated
900
+ yield {"content": "", "reasoning_content": ""}
901
+
902
+ # Define an asynchronous function to retrieve individual voting results
903
+ async def get_vote_answer(var, is_temperature=True):
904
+ response_content = ""
905
+ response_reasoning = ""
906
+ temp_key = str(var) if is_temperature else var
907
+ model_to_use = self.model_name if is_temperature else var
908
+ temperature_to_use = var if is_temperature else None
909
+
910
+ async for chunk in self._basic_mf_chat_stream(
911
+ messages, model_to_use, temperature_to_use
912
+ ):
913
+ response_content = chunk["content"]
914
+ response_reasoning = chunk.get("reasoning_content", "")
915
+
916
+ # Real-time update ttc_result
917
+ ttc_result[temp_key] = {
918
+ "content": response_content,
919
+ "reasoning_content": response_reasoning,
920
+ }
921
+ self.context.set_variable(ttc_var_name, ttc_result)
922
+
923
+ # Here you cannot directly use yield, because yield cannot be used in asyncio.gather
924
+
925
+ result = {
926
+ "content": response_content,
927
+ "reasoning_content": response_reasoning,
928
+ }
929
+ return result
930
+
931
+ # Get all voting answers using asynchronous concurrency
932
+ is_temperature_mode = mode_type == "temperature"
933
+ tasks = [get_vote_answer(var, is_temperature_mode) for var in control_vars]
934
+
935
+ # Notify the start of concurrent fetching of voting answers
936
+ yield {"content": "", "reasoning_content": ""}
937
+
938
+ # Wait for all tasks to complete
939
+ votes = await asyncio.gather(*tasks)
940
+
941
+ # Reminder: Vote answer retrieval completed
942
+ yield {"content": "", "reasoning_content": ""}
943
+
944
+ # Construct a summary prompt, explicitly requiring the generation of a comprehensive answer and output in a fixed format.
945
+ vote_texts = "\n\n".join(
946
+ [f"答案 {i + 1}:\n{v['content']}" for i, v in enumerate(votes)]
947
+ )
948
+ """You are a fair summarizer responsible for improving result quality by leveraging collective intelligence. Please analyze the multiple answers provided for the same question below and apply different processing strategies based on the question type.
949
+
950
+ Original question: {messages[-1]["content"]}
951
+
952
+ {vote_texts}
953
+
954
+ First, determine whether this question has a clear, objective correct answer (e.g., factual questions, problems with standard solutions, etc.).
955
+
956
+ Processing strategy:
957
+ 1. If the question has a clear answer: Identify the parts that are consistent or highly similar across the majority of answers, and take the most frequently occurring answer as the final answer.
958
+ 2. If the question does not have a clear answer (e.g., open-ended questions, creative tasks, subjective evaluations, etc.): Integrate consensus points and valuable unique insights from all answers to form a comprehensive synthesized answer.
959
+
960
+ **Please output your conclusion using the following fixed format**:
961
+
962
+ Question type: [Objective question/Open-ended question]
963
+
964
+ Analysis: [Briefly analyze similarities and differences among the answers, consensus, and disagreements]
965
+
966
+ Final answer:
967
+ [Provide your final answer, which can be the majority-consensus answer or a synthesis of the best elements from all answers]
968
+
969
+ You must provide a clear final answer and strictly follow the above format for output.
970
+ """
971
+
972
+ # Aggregate voting results
973
+ summary_messages = [{"role": "user", "content": summary_prompt}]
974
+
975
+ eval_result = ""
976
+ summary_reasoning = ""
977
+
978
+ async for chunk in self._basic_mf_chat_stream(summary_messages, eval_model):
979
+ eval_result = chunk["content"]
980
+ summary_reasoning = chunk.get("reasoning_content", "")
981
+
982
+ # Real-time update ttc_result - Modified to simultaneously save content and reasoning_content
983
+ ttc_result["eval_result"] = {
984
+ "content": eval_result,
985
+ "reasoning_content": summary_reasoning,
986
+ }
987
+ self.context.set_variable(ttc_var_name, ttc_result)
988
+
989
+ # yield empty content to notify the upper layer every time ttc_result is updated
990
+ yield {"content": "", "reasoning_content": ""}
991
+
992
+ # Extract the final answer from the summary results
993
+ final_answer = ""
994
+ try:
995
+ # Use a stricter format parsing logic - get content from the modified structure
996
+ summary_content = ttc_result["eval_result"]["content"]
997
+ if "最终答案:" in summary_content:
998
+ parts = summary_content.split("最终答案:")
999
+ final_answer = parts[1].strip()
1000
+ elif "最终答案:" in summary_content:
1001
+ parts = summary_content.split("最终答案:")
1002
+ final_answer = parts[1].strip()
1003
+ else:
1004
+ # If no explicit marker is found, use the entire summary result.
1005
+ final_answer = summary_content
1006
+
1007
+ # Extract question type information (if exists)
1008
+ question_type = ""
1009
+ if "问题类型:" in summary_content:
1010
+ type_parts = summary_content.split("问题类型:")
1011
+ if "分析:" in type_parts[1]:
1012
+ question_type = type_parts[1].split("分析:")[0].strip()
1013
+ else:
1014
+ question_type = type_parts[1].strip().split("\n")[0].strip()
1015
+ elif "问题类型:" in summary_content:
1016
+ type_parts = summary_content.split("问题类型:")
1017
+ if "分析:" in type_parts[1]:
1018
+ question_type = type_parts[1].split("分析:")[0].strip()
1019
+ else:
1020
+ question_type = type_parts[1].strip().split("\n")[0].strip()
1021
+
1022
+ # If the question type is successfully extracted, add it to the result.
1023
+ if question_type:
1024
+ ttc_result["question_type"] = question_type
1025
+
1026
+ except Exception as e:
1027
+ # Use the entire summary result when parsing fails
1028
+ console(f"解析多数投票结果时出错: {str(e)}")
1029
+ final_answer = (
1030
+ summary_content if "summary_content" in locals() else eval_result
1031
+ )
1032
+
1033
+ # Update TTC results
1034
+ ttc_result["final_answer"] = final_answer
1035
+ self.context.set_variable(ttc_var_name, ttc_result)
1036
+ # yield empty content to notify the upper layer each time ttc_result is updated
1037
+ yield {"content": "", "reasoning_content": ""}
1038
+
1039
+ # Only return the complete content at the final result.
1040
+ yield {"content": final_answer, "reasoning_content": summary_reasoning}
1041
+
1042
+ async def _chat_stream(
1043
+ self,
1044
+ messages: Messages,
1045
+ model: Optional[str] = None,
1046
+ temperature: Optional[float] = None,
1047
+ continous_content: Optional[str] = None,
1048
+ strategy_name=None,
1049
+ no_cache=False,
1050
+ **kwargs,
1051
+ ):
1052
+ llm_instance_config = self.get_model_config(model)
1053
+
1054
+ # Use a message compressor to process messages, pass model_config so it can automatically adjust constraints
1055
+ compression_result = self.message_compressor.compress_messages(
1056
+ messages,
1057
+ strategy_name=strategy_name,
1058
+ model_config=llm_instance_config,
1059
+ **kwargs,
1060
+ )
1061
+
1062
+ compression_result.compressed_messages.set_max_tokens(
1063
+ llm_instance_config.max_tokens
1064
+ )
1065
+ self.context.set_messages(compression_result.compressed_messages)
1066
+
1067
+ llm = None
1068
+ if llm_instance_config.type_api == TypeAPI.OPENAI:
1069
+ llm = LLMOpenai(self.context)
1070
+ elif llm_instance_config.type_api == TypeAPI.AISHU_MODEL_FACTORY:
1071
+ llm = LLMModelFactory(self.context)
1072
+ else:
1073
+ raise ValueError(f"不支持的API类型: {llm_instance_config.type_api}")
1074
+
1075
+ for i in range(self.Retry_Count):
1076
+ self.accu_content = ""
1077
+
1078
+ # Debug log: records retry information
1079
+ retry_info = {
1080
+ "retry_attempt": i + 1,
1081
+ "max_retries": self.Retry_Count,
1082
+ "model": llm_instance_config.model_name,
1083
+ "model_config": (
1084
+ llm_instance_config.to_dict()
1085
+ if hasattr(llm_instance_config, "to_dict")
1086
+ else str(llm_instance_config)
1087
+ ),
1088
+ }
1089
+ self.context.debug(f"LLM call attempt: {retry_info}")
1090
+
1091
+ try:
1092
+ async for chunk in llm.chat(
1093
+ llm_instance_config=llm_instance_config,
1094
+ messages=compression_result.compressed_messages,
1095
+ temperature=temperature,
1096
+ continous_content=continous_content,
1097
+ no_cache=no_cache,
1098
+ **kwargs,
1099
+ ):
1100
+ # Detect duplicate output - prevents LLM infinite loops
1101
+ # Performance: Optimized 6.8x faster (2026-01-18), see PERFORMANCE_OPTIMIZATION_REPORT.md
1102
+ if chunk is not None and "content" in chunk:
1103
+ self.accu_content = chunk.get("content", "")
1104
+
1105
+ # Only check after MIN_LENGTH threshold to avoid false positives on short content
1106
+ # Default: 2KB, configurable via DOLPHIN_DUPLICATE_MIN_LENGTH env var
1107
+ if len(self.accu_content) > MIN_LENGTH_TO_DETECT_DUPLICATE_OUTPUT:
1108
+ # Check if the last N chars appear repeatedly in previous content
1109
+ # Pattern length configurable via DOLPHIN_DUPLICATE_PATTERN_LENGTH (default: 50)
1110
+ recent = self.accu_content[-DUPLICATE_PATTERN_LENGTH:]
1111
+ previous = self.accu_content[:-DUPLICATE_PATTERN_LENGTH]
1112
+
1113
+ # Count overlapping occurrences using optimized regex (6.8x faster than loop)
1114
+ # Uses lookahead assertion for accurate loop detection
1115
+ count = count_overlapping_occurrences(previous, recent)
1116
+
1117
+ # Trigger if pattern repeats >= threshold times (default: 50)
1118
+ # This allows legitimate repeated content (e.g., 30 SVG cards with same CSS)
1119
+ # while catching infinite loops (e.g., same card repeated 150+ times)
1120
+ if count >= COUNT_TO_PROVE_DUPLICATE_OUTPUT:
1121
+ self.context.warn(
1122
+ f"duplicate output detected: pattern repeated {count} times "
1123
+ f"(threshold: {COUNT_TO_PROVE_DUPLICATE_OUTPUT})"
1124
+ )
1125
+ yield {
1126
+ "content": self.accu_content + get_msg_duplicate_output(),
1127
+ "reasoning_content": "",
1128
+ }
1129
+ raise IOError(
1130
+ f"duplicate output detected: pattern repeated {count} times"
1131
+ )
1132
+ yield chunk
1133
+
1134
+ # Debug log: Records successful completion of requests
1135
+ success_info = {
1136
+ "retry_attempts": i + 1,
1137
+ "model": llm_instance_config.model_name,
1138
+ "final_content_length": (
1139
+ len(self.accu_content) if self.accu_content else 0
1140
+ ),
1141
+ "compression_strategy": strategy_name,
1142
+ "messages_processed": (
1143
+ len(compression_result.compressed_messages)
1144
+ if compression_result.compressed_messages
1145
+ else 0
1146
+ ),
1147
+ }
1148
+ self.context.debug(
1149
+ f"LLM request completed successfully: {success_info}"
1150
+ )
1151
+ return
1152
+ except ModelException as e:
1153
+ # Error log: Records detailed information about ModelException
1154
+ error_info = {
1155
+ "error_type": "ModelException",
1156
+ "retry_attempt": i + 1,
1157
+ "max_retries": self.Retry_Count,
1158
+ "model": llm_instance_config.model_name,
1159
+ "error_message": str(e),
1160
+ "error_class": type(e).__name__,
1161
+ "llm_instance_config": (
1162
+ llm_instance_config.to_dict()
1163
+ if hasattr(llm_instance_config, "to_dict")
1164
+ else str(llm_instance_config)
1165
+ ),
1166
+ "compression_strategy": strategy_name,
1167
+ "messages_count": (
1168
+ len(compression_result.compressed_messages)
1169
+ if compression_result.compressed_messages
1170
+ else 0
1171
+ ),
1172
+ }
1173
+ self.context.error(f"LLM ModelException: {error_info}")
1174
+ raise e
1175
+ except AttributeError as e:
1176
+ # Handle specific AttributeError for NoneType object has no attribute 'name'
1177
+ if "'NoneType' object has no attribute 'name'" in str(e):
1178
+ self.context.debug(f"LLM response parsing warning (non-fatal): {e}")
1179
+ # Continue to next retry or return if this is last retry
1180
+ if i == self.Retry_Count - 1:
1181
+ self.context.warn(
1182
+ "LLM call finally failed with attribute error"
1183
+ )
1184
+ yield {
1185
+ "content": "failed to call LLM[{}]".format(model),
1186
+ "reasoning_content": "",
1187
+ }
1188
+ return
1189
+ else:
1190
+ self.context.warn(
1191
+ f"LLM call failed with AttributeError retry: {i}, error: {e}"
1192
+ )
1193
+ except (
1194
+ aiohttp.ClientError,
1195
+ asyncio.TimeoutError,
1196
+ TimeoutError,
1197
+ ValueError,
1198
+ RuntimeError,
1199
+ ) as e:
1200
+ # Check if this is a multimodal-related error
1201
+ error_str = str(e)
1202
+ is_multimodal_error = (
1203
+ "image_url" in error_str or
1204
+ "unknown variant" in error_str or
1205
+ "multimodal" in error_str.lower()
1206
+ )
1207
+
1208
+ # Warn log: Records detailed information about general exceptions
1209
+ error_info = {
1210
+ "error_type": "GeneralException",
1211
+ "retry_attempt": i + 1,
1212
+ "max_retries": self.Retry_Count,
1213
+ "model": llm_instance_config.model_name,
1214
+ "error_message": str(e),
1215
+ "error_class": type(e).__name__,
1216
+ "llm_instance_config": (
1217
+ llm_instance_config.to_dict()
1218
+ if hasattr(llm_instance_config, "to_dict")
1219
+ else str(llm_instance_config)
1220
+ ),
1221
+ "compression_strategy": strategy_name,
1222
+ "messages_count": (
1223
+ len(compression_result.compressed_messages)
1224
+ if compression_result.compressed_messages
1225
+ else 0
1226
+ ),
1227
+ "traceback_available": "yes", # Indicates that there is complete stack trace information
1228
+ }
1229
+ self.context.warn(
1230
+ f"LLM general exception on retry {i + 1}: {error_info}"
1231
+ )
1232
+
1233
+ # For multimodal errors, don't retry - the model simply doesn't support it
1234
+ if is_multimodal_error:
1235
+ console(f"❌ 模型 '{llm_instance_config.model_name}' 不支持图片输入(多模态)。")
1236
+ console(" 请切换到支持多模态的模型,例如:")
1237
+ console(" • GPT-4o / GPT-4-Vision (OpenAI)")
1238
+ console(" • Claude 3 系列 (Anthropic)")
1239
+ console(" • Qwen-VL 系列 (阿里云)")
1240
+ console(" • Gemini Pro Vision (Google)")
1241
+ yield {
1242
+ "content": f"⚠️ 当前模型 '{llm_instance_config.model_name}' 不支持图片输入。请在配置文件中切换到支持多模态的模型。",
1243
+ "reasoning_content": "",
1244
+ }
1245
+ return
1246
+ except Exception as e:
1247
+ error_info = {
1248
+ "error_type": "UnexpectedExceptionNotRetried",
1249
+ "retry_attempt": i + 1,
1250
+ "max_retries": self.Retry_Count,
1251
+ "model": llm_instance_config.model_name,
1252
+ "error_message": str(e),
1253
+ "error_class": type(e).__name__,
1254
+ }
1255
+ self.context.error(f"LLM unexpected exception (not retried): {error_info}")
1256
+ raise
1257
+
1258
+ # Error log: Records detailed information about final failures
1259
+ final_failure_info = {
1260
+ "error_type": "FinalFailure",
1261
+ "total_retries_attempted": self.Retry_Count,
1262
+ "model": llm_instance_config.model_name,
1263
+ "api_type": llm_instance_config.type_api,
1264
+ "api_endpoint": llm_instance_config.api,
1265
+ "llm_instance_config": (
1266
+ llm_instance_config.to_dict()
1267
+ if hasattr(llm_instance_config, "to_dict")
1268
+ else str(llm_instance_config)
1269
+ ),
1270
+ "compression_strategy": strategy_name,
1271
+ "suggested_actions": [
1272
+ "Check network connectivity",
1273
+ "Verify API credentials and endpoints",
1274
+ "Review model configuration parameters",
1275
+ "Check service availability and rate limits",
1276
+ ],
1277
+ }
1278
+ self.context.error(
1279
+ f"LLM call finally failed after {self.Retry_Count} retries: {final_failure_info}"
1280
+ )
1281
+
1282
+ yield {
1283
+ "content": f"❌ LLM 调用失败 (模型: {llm_instance_config.model_name})。请检查日志文件获取详细信息。",
1284
+ "reasoning_content": "",
1285
+ }