kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,495 @@
1
+ from abc import abstractmethod
2
+ import json
3
+ from typing import Any, Optional
4
+ from dolphin.core.common.exceptions import ModelException
5
+ from dolphin.core import flags
6
+ import aiohttp
7
+ from openai import AsyncOpenAI
8
+
9
+ from dolphin.core.common.enums import MessageRole, Messages
10
+ from dolphin.core.config.global_config import LLMInstanceConfig
11
+ from dolphin.core.common.constants import (
12
+ MSG_CONTINUOUS_CONTENT,
13
+ TOOL_CALL_ID_PREFIX,
14
+ is_msg_duplicate_skill_call,
15
+ )
16
+ from dolphin.core.context.context import Context
17
+ from dolphin.core.logging.logger import get_logger
18
+ from dolphin.core.llm.message_sanitizer import sanitize_and_log
19
+
20
+ logger = get_logger("llm")
21
+
22
+
23
+ class ToolCallsParser:
24
+ """Helper class for parsing tool calls from LLM streaming responses.
25
+
26
+ This class consolidates the tool_calls parsing logic used by both
27
+ LLMModelFactory (raw HTTP) and LLMOpenai (SDK) implementations.
28
+ """
29
+
30
+ def __init__(self):
31
+ self.tool_calls_data: dict = {}
32
+ self.func_name: str | None = None
33
+ self.func_args: list = []
34
+
35
+ def parse_delta_dict(self, delta: dict, tool_calls_key: str = "tool_calls"):
36
+ """Parse tool_calls from a dict delta (used by LLMModelFactory).
37
+
38
+ Args:
39
+ delta: The delta dict from streaming response
40
+ tool_calls_key: Key name for tool_calls in delta
41
+ """
42
+ if tool_calls_key not in delta or not delta[tool_calls_key]:
43
+ return
44
+
45
+ for tool_call in delta[tool_calls_key]:
46
+ index = self._normalize_index(tool_call.get("index", 0))
47
+
48
+ if index not in self.tool_calls_data:
49
+ self.tool_calls_data[index] = {"id": None, "name": None, "arguments": []}
50
+
51
+ # Preserve tool_call_id from LLM
52
+ if tool_call.get("id"):
53
+ self.tool_calls_data[index]["id"] = tool_call["id"]
54
+
55
+ if "function" in tool_call:
56
+ if tool_call["function"].get("name"):
57
+ self.tool_calls_data[index]["name"] = tool_call["function"]["name"]
58
+ if tool_call["function"].get("arguments"):
59
+ self.tool_calls_data[index]["arguments"].append(
60
+ tool_call["function"]["arguments"]
61
+ )
62
+
63
+ # Legacy single tool call: update from index 0 for backward compat
64
+ if index == 0:
65
+ self._update_legacy_fields(tool_call)
66
+
67
+ def parse_delta_object(self, delta):
68
+ """Parse tool_calls from an OpenAI SDK delta object (used by LLMOpenai).
69
+
70
+ Args:
71
+ delta: The delta object from OpenAI SDK streaming response
72
+ """
73
+ if not hasattr(delta, "tool_calls") or delta.tool_calls is None:
74
+ return
75
+
76
+ for tool_call in delta.tool_calls:
77
+ # Get index from OpenAI SDK object
78
+ index = getattr(tool_call, "index", 0) or 0
79
+
80
+ if index not in self.tool_calls_data:
81
+ self.tool_calls_data[index] = {"id": None, "name": None, "arguments": []}
82
+
83
+ # Preserve tool_call_id from LLM
84
+ if getattr(tool_call, "id", None):
85
+ self.tool_calls_data[index]["id"] = tool_call.id
86
+
87
+ if hasattr(tool_call, "function") and tool_call.function is not None:
88
+ if tool_call.function.name is not None:
89
+ self.tool_calls_data[index]["name"] = tool_call.function.name
90
+ if tool_call.function.arguments is not None:
91
+ self.tool_calls_data[index]["arguments"].append(tool_call.function.arguments)
92
+
93
+ # Legacy single tool call: update from index 0 for backward compat
94
+ if index == 0:
95
+ self._update_legacy_fields_from_object(tool_call)
96
+
97
+ def _normalize_index(self, raw_index) -> int:
98
+ """Normalize index to integer, defaulting to 0 on error."""
99
+ try:
100
+ return int(raw_index)
101
+ except (ValueError, TypeError):
102
+ return 0
103
+
104
+ def _update_legacy_fields(self, tool_call: dict):
105
+ """Update legacy single tool call fields from dict."""
106
+ if "function" in tool_call:
107
+ if tool_call["function"].get("name"):
108
+ self.func_name = tool_call["function"]["name"]
109
+ if tool_call["function"].get("arguments"):
110
+ self.func_args.append(tool_call["function"]["arguments"])
111
+
112
+ def _update_legacy_fields_from_object(self, tool_call):
113
+ """Update legacy single tool call fields from SDK object."""
114
+ if hasattr(tool_call, "function") and tool_call.function is not None:
115
+ if tool_call.function.name is not None:
116
+ self.func_name = tool_call.function.name
117
+ if tool_call.function.arguments is not None:
118
+ self.func_args.append(tool_call.function.arguments)
119
+
120
+ def get_result(self) -> dict:
121
+ """Get the parsed result as a dict to merge into the response."""
122
+ result = {}
123
+ if self.func_name:
124
+ result["func_name"] = self.func_name
125
+ if self.func_args:
126
+ result["func_args"] = self.func_args
127
+ if self.tool_calls_data:
128
+ result["tool_calls_data"] = self.tool_calls_data
129
+ return result
130
+
131
+
132
+ class LLM:
133
+ def __init__(self, context: Context):
134
+ self.context = context
135
+
136
+ @abstractmethod
137
+ async def chat(
138
+ self,
139
+ llm_instance_config: LLMInstanceConfig,
140
+ messages: Messages,
141
+ continous_content: Optional[str] = None,
142
+ temperature: Optional[float] = None,
143
+ no_cache: bool = False,
144
+ **kwargs,
145
+ ):
146
+ pass
147
+
148
+ async def update_usage(self, final_chunk):
149
+ await self.context.update_usage(final_chunk)
150
+
151
+ def set_messages(self, messages: Messages, continous_content: Optional[str] = None):
152
+ if continous_content:
153
+ to_be_added = (
154
+ MSG_CONTINUOUS_CONTENT
155
+ if is_msg_duplicate_skill_call(continous_content)
156
+ else ""
157
+ )
158
+ if messages[-1].role == MessageRole.ASSISTANT:
159
+ messages[-1].content += continous_content + to_be_added
160
+ messages[-1].metadata["prefix"] = True
161
+ else:
162
+ messages.append_message(
163
+ MessageRole.ASSISTANT,
164
+ continous_content + to_be_added,
165
+ metadata={"prefix": True},
166
+ )
167
+
168
+ self.context.set_messages(messages)
169
+
170
+ def set_cache(self, llm: str, cache_key: Messages, cache_value: Any):
171
+ self.context.get_config().set_llm_cache(llm, cache_key, cache_value)
172
+
173
+ def get_cache(self, llm: str, cache_key: Messages):
174
+ return self.context.get_config().get_llm_cache(llm, cache_key)
175
+
176
+ def set_cache_by_dict(self, llm: str, cache_key: list, cache_value: Any):
177
+ """Set cache using dict list as key (for sanitized messages)."""
178
+ self.context.get_config().set_llm_cache_by_dict(llm, cache_key, cache_value)
179
+
180
+ def get_cache_by_dict(self, llm: str, cache_key: list):
181
+ """Get cache using dict list as key (for sanitized messages)."""
182
+ return self.context.get_config().get_llm_cache_by_dict(llm, cache_key)
183
+
184
+ def log_request(self, messages: Messages, continous_content: Optional[str] = None):
185
+ self.context.debug(
186
+ "LLM chat messages[{}] length[{}] continous_content[{}]".format(
187
+ messages.str_summary(),
188
+ messages.length(),
189
+ continous_content.replace("\n", "\\n") if continous_content else "",
190
+ )
191
+ )
192
+
193
+
194
+ class LLMModelFactory(LLM):
195
+ def __init__(self, context: Context):
196
+ super().__init__(context)
197
+
198
+ async def chat(
199
+ self,
200
+ llm_instance_config: LLMInstanceConfig,
201
+ messages: Messages,
202
+ continous_content: Optional[str] = None,
203
+ temperature: Optional[float] = None,
204
+ no_cache: bool = False,
205
+ **kwargs,
206
+ ):
207
+ self.log_request(messages, continous_content)
208
+
209
+ self.set_messages(messages, continous_content)
210
+
211
+ # Sanitize messages BEFORE cache check to ensure consistent cache keys
212
+ sanitized_messages = sanitize_and_log(
213
+ messages.get_messages_as_dict(), logger.warning
214
+ )
215
+
216
+ if not no_cache and not flags.is_enabled(flags.DISABLE_LLM_CACHE):
217
+ # Use sanitized messages for cache key to ensure consistency
218
+ cache_value = self.get_cache_by_dict(llm_instance_config.model_name, sanitized_messages)
219
+ if cache_value is not None:
220
+ yield cache_value
221
+ return
222
+ try:
223
+ # Reuse sanitized messages from cache key generation (no need to sanitize again)
224
+
225
+ # Build request payload
226
+ payload = {
227
+ "model": llm_instance_config.model_name,
228
+ "temperature": (
229
+ temperature
230
+ if temperature is not None
231
+ else llm_instance_config.temperature
232
+ ),
233
+ "top_p": llm_instance_config.top_p,
234
+ "top_k": llm_instance_config.top_k,
235
+ "messages": sanitized_messages,
236
+ "max_tokens": llm_instance_config.max_tokens,
237
+ "stream": True,
238
+ }
239
+ # If there is a tools parameter, add it to the API call, and support custom tool_choice.
240
+ if "tools" in kwargs and kwargs["tools"]:
241
+ payload["tools"] = kwargs["tools"]
242
+ # Support tool_choice: auto|none|required or provider-specific
243
+ tool_choice = kwargs.get("tool_choice")
244
+ payload["tool_choice"] = tool_choice if tool_choice else "auto"
245
+
246
+ line_json = ""
247
+ accu_content = ""
248
+ reasoning_content = ""
249
+ finish_reason = None
250
+ # Use ToolCallsParser to handle tool calls parsing
251
+ tool_parser = ToolCallsParser()
252
+
253
+ timeout = aiohttp.ClientTimeout(
254
+ total=1800, # Disable overall timeout (use with caution)
255
+ sock_connect=30, # Keep connection timeout
256
+ # sock_read=60 # Timeout for single read (for slow streaming data)
257
+ )
258
+
259
+ # Extract valid key-value pairs from the input headers (excluding those with None values).
260
+ # This is because aiohttp request headers (headers) must comply with standard HTTP
261
+ # protocol requirements. If headers contain None values, calling aiohttp.ClientSession.post()
262
+ # will raise an error.
263
+ req_headers = {
264
+ key: value
265
+ for key, value in llm_instance_config.headers.items()
266
+ if value is not None
267
+ }
268
+
269
+ async with aiohttp.ClientSession(timeout=timeout) as session:
270
+ async with session.post(
271
+ llm_instance_config.api,
272
+ json=payload,
273
+ headers=req_headers,
274
+ ssl=False,
275
+ ) as response:
276
+ if not response.ok:
277
+ try:
278
+ content = await response.content.read()
279
+ json_content = json.loads(content)
280
+ raise ModelException(
281
+ code=json_content.get("code"),
282
+ message=json_content.get(
283
+ "description", content.decode(errors="ignore")
284
+ ),
285
+ )
286
+ except ModelException as e:
287
+ raise e
288
+ except Exception:
289
+ raise ModelException(
290
+ f"LLM {llm_instance_config.model_name} call error: {response.text}"
291
+ )
292
+
293
+ result = None
294
+ async for line in response.content:
295
+ if not line.startswith(b"data"):
296
+ continue
297
+
298
+ try:
299
+ line_decoded = line.decode().split("data:")[1]
300
+ if "[DONE]" in line_decoded:
301
+ break
302
+ line_json = json.loads(line_decoded, strict=False)
303
+ if "choices" not in line_json:
304
+ raise Exception(
305
+ f"-----------------{line_json}---------------------------"
306
+ )
307
+ else:
308
+ if len(line_json["choices"]) > 0:
309
+ # Accumulate content
310
+ delta_content = (
311
+ line_json["choices"][0]["delta"].get("content")
312
+ or ""
313
+ )
314
+ delta_reasoning = (
315
+ line_json["choices"][0]["delta"].get(
316
+ "reasoning_content"
317
+ )
318
+ or ""
319
+ )
320
+
321
+ accu_content += delta_content
322
+ reasoning_content += delta_reasoning
323
+
324
+ # Capture finish_reason
325
+ chunk_finish_reason = line_json["choices"][0].get("finish_reason")
326
+ if chunk_finish_reason:
327
+ finish_reason = chunk_finish_reason
328
+
329
+ # Parse tool_calls using ToolCallsParser
330
+ delta = line_json["choices"][0]["delta"]
331
+ tool_parser.parse_delta_dict(delta)
332
+
333
+ if line_json.get("usage") or line_json["choices"][
334
+ 0
335
+ ].get("usage"):
336
+ await self.update_usage(line_json)
337
+
338
+ result = {
339
+ "content": accu_content,
340
+ "reasoning_content": reasoning_content,
341
+ }
342
+
343
+ # Add token usage information
344
+ # {"completion_tokens": 26, "prompt_tokens": 159, "total_tokens": 185, "prompt_tokens_details": {"cached_tokens": 0, "uncached_tokens": 159}, "completion_tokens_details": {"reasoning_tokens": 0}}
345
+ result["usage"] = line_json.get("usage", {})
346
+
347
+ # Add tool call information using ToolCallsParser
348
+ result.update(tool_parser.get_result())
349
+
350
+ # Add finish_reason for downstream tool call validation
351
+ if finish_reason:
352
+ result["finish_reason"] = finish_reason
353
+
354
+ yield result
355
+ except Exception as e:
356
+ raise Exception(
357
+ f"LLM {llm_instance_config.model_name} decode error: {repr(e)} content:\n{line}"
358
+ )
359
+
360
+ if result:
361
+ # Use sanitized messages for cache key to ensure consistency
362
+ self.set_cache_by_dict(
363
+ llm_instance_config.model_name,
364
+ sanitized_messages,
365
+ result
366
+ )
367
+
368
+ if "choices" in line_json:
369
+ await self.update_usage(line_json)
370
+
371
+ except ModelException as e:
372
+ raise e
373
+ except Exception as e:
374
+ raise e
375
+
376
+
377
+ class LLMOpenai(LLM):
378
+ def __init__(self, context: Context):
379
+ super().__init__(context)
380
+
381
+ async def chat(
382
+ self,
383
+ llm_instance_config: LLMInstanceConfig,
384
+ messages: Messages,
385
+ continous_content: Optional[str] = None,
386
+ temperature: Optional[float] = None,
387
+ no_cache: bool = False,
388
+ **kwargs,
389
+ ):
390
+ self.log_request(messages, continous_content)
391
+
392
+ # Verify whether the API key exists and is not empty
393
+ if not llm_instance_config.api_key:
394
+ llm_instance_config.set_api_key("dummy_api_key")
395
+
396
+ # For OpenAI-compatible APIs, ensure that base_url does not contain the full path
397
+ # AsyncOpenAI will automatically add paths such as /chat/completions
398
+ api_url = llm_instance_config.api
399
+ if api_url.endswith("/chat/completions"):
400
+ base_url = api_url.replace("/chat/completions", "")
401
+ elif api_url.endswith("/v1/chat/completions"):
402
+ base_url = api_url.replace("/v1/chat/completions", "/v1")
403
+ else:
404
+ # If the URL format does not match expectations, keep it as is, but it may cause errors.
405
+ base_url = api_url
406
+
407
+ client = AsyncOpenAI(
408
+ base_url=base_url,
409
+ api_key=llm_instance_config.api_key,
410
+ default_headers=llm_instance_config.headers,
411
+ )
412
+
413
+ self.set_messages(messages, continous_content)
414
+
415
+ # Sanitize messages BEFORE cache check to ensure consistent cache keys
416
+ sanitized_messages = sanitize_and_log(
417
+ messages.get_messages_as_dict(), logger.warning
418
+ )
419
+
420
+ if not no_cache and not flags.is_enabled(flags.DISABLE_LLM_CACHE):
421
+ # Use sanitized messages for cache key to ensure consistency
422
+ cache_value = self.get_cache_by_dict(llm_instance_config.model_name, sanitized_messages)
423
+ if cache_value is not None:
424
+ yield cache_value
425
+ return
426
+
427
+ # Reuse sanitized messages from cache key generation (no need to sanitize again)
428
+
429
+ # Prepare API call parameters
430
+ api_params = {
431
+ "model": llm_instance_config.model_name,
432
+ "messages": sanitized_messages,
433
+ "stream": True,
434
+ "max_tokens": llm_instance_config.max_tokens,
435
+ "temperature": temperature,
436
+ }
437
+
438
+ # If there is a tools parameter, add it to the API call, and support custom tool_choice.
439
+ if "tools" in kwargs and kwargs["tools"]:
440
+ api_params["tools"] = kwargs["tools"]
441
+ tool_choice = kwargs.get("tool_choice")
442
+ # When tool_choice is provided, inherit it; otherwise, default to auto
443
+ api_params["tool_choice"] = tool_choice if tool_choice else "auto"
444
+
445
+ response = await client.chat.completions.create(**api_params)
446
+
447
+ accu_answer = ""
448
+ accu_reasoning = ""
449
+ finish_reason = None
450
+ result = None
451
+ # Use ToolCallsParser to handle tool calls parsing
452
+ tool_parser = ToolCallsParser()
453
+
454
+ async for chunk in response:
455
+ delta = chunk.choices[0].delta
456
+ if hasattr(delta, "content") and delta.content is not None:
457
+ accu_answer += delta.content
458
+
459
+ if (
460
+ hasattr(delta, "reasoning_content")
461
+ and delta.reasoning_content is not None
462
+ ):
463
+ accu_reasoning += delta.reasoning_content
464
+
465
+ # Capture finish_reason
466
+ chunk_finish_reason = chunk.choices[0].finish_reason
467
+ if chunk_finish_reason:
468
+ finish_reason = chunk_finish_reason
469
+
470
+ # Parse tool_calls using ToolCallsParser
471
+ tool_parser.parse_delta_object(delta)
472
+
473
+ await self.update_usage(chunk)
474
+
475
+ result = {
476
+ "content": accu_answer,
477
+ "reasoning_content": accu_reasoning,
478
+ }
479
+
480
+ # Add tool call information using ToolCallsParser
481
+ result.update(tool_parser.get_result())
482
+
483
+ # Add finish_reason for downstream tool call validation
484
+ if finish_reason:
485
+ result["finish_reason"] = finish_reason
486
+
487
+ yield result
488
+
489
+ if result:
490
+ # Use sanitized messages for cache key to ensure consistency
491
+ self.set_cache_by_dict(
492
+ llm_instance_config.model_name,
493
+ sanitized_messages,
494
+ result
495
+ )
@@ -0,0 +1,100 @@
1
+ import abc
2
+ import time
3
+ import traceback
4
+ from typing import Any
5
+
6
+ from dolphin.core.common.enums import MessageRole, Messages
7
+ from dolphin.core.logging.logger import get_logger
8
+
9
+ logger = get_logger("llm")
10
+
11
+
12
+ class LLMCall(abc.ABC):
13
+ """Abstract base class for LLM operations in the memory system."""
14
+
15
+ def __init__(self, llm_client, memory_config):
16
+ """
17
+ Initialize the LLM call.
18
+
19
+ :param llm_client: LLM client instance
20
+ :param config: Memory configuration
21
+ """
22
+ self.llm_client = llm_client
23
+ self.config = memory_config
24
+ self.model = self.llm_client.config.get_fast_model_config().name
25
+
26
+ def execute(self, llm_args: dict, **kwargs) -> Any:
27
+ """Execute knowledge merging for a list of knowledge points."""
28
+ try:
29
+ start_time = time.time()
30
+ prompt = self._build_prompt(**kwargs)
31
+ if prompt:
32
+ llm_output = self._call_llm_with_retry(prompt, **llm_args)
33
+ result = self._post_process(llm_output, **kwargs)
34
+ else:
35
+ result = self._no_merge_result(**kwargs)
36
+
37
+ end_time = time.time()
38
+ self._log(end_time - start_time, **kwargs)
39
+ return result
40
+
41
+ except Exception as e:
42
+ logger.error(f"llm_call execution failed: {e}")
43
+ raise
44
+
45
+ @abc.abstractmethod
46
+ def _log(self, time_cost: float, **kwargs) -> str:
47
+ """Log the execution result."""
48
+ raise NotImplementedError
49
+
50
+ @abc.abstractmethod
51
+ def _no_merge_result(self, **kwargs) -> Any:
52
+ """No merge result."""
53
+ raise NotImplementedError
54
+
55
+ @abc.abstractmethod
56
+ def _build_prompt(self, **kwargs) -> str:
57
+ """Build the prompt for the LLM call."""
58
+ raise NotImplementedError
59
+
60
+ @abc.abstractmethod
61
+ def _post_process(self, llm_output: str, **kwargs) -> Any:
62
+ """Post-process the LLM output."""
63
+ raise NotImplementedError
64
+
65
+ def _call_llm_with_retry(self, prompt: str, **kwargs) -> str:
66
+ """Call LLM with retry logic."""
67
+ max_retries = getattr(self.config, "max_extraction_retries", 2)
68
+
69
+ for attempt in range(max_retries):
70
+ try:
71
+ # Use asyncio to call the async LLM client
72
+ return self._call_llm_sync(prompt, **kwargs)
73
+ except Exception as e:
74
+ logger.warning(
75
+ f"LLM call attempt {attempt + 1} failed: {e} traceback: {traceback.format_exc()}"
76
+ )
77
+ if attempt == max_retries - 1:
78
+ raise Exception(
79
+ f"LLM call failed after {max_retries} attempts: {e}"
80
+ )
81
+
82
+ def _call_llm_sync(self, prompt: str, **kwargs) -> str:
83
+ """Sync LLM call implementation."""
84
+ try:
85
+ # Prepare messages for LLM client
86
+ messages = Messages()
87
+ messages.append_message(MessageRole.USER, prompt)
88
+
89
+ response = self.llm_client.mf_chat(
90
+ messages=messages,
91
+ model=self.model, # Use default model
92
+ temperature=0.1, # Low temperature for consistent extraction
93
+ no_cache=True, # Memory extraction should not use cache
94
+ **kwargs,
95
+ )
96
+
97
+ return response
98
+ except Exception as e:
99
+ logger.error(f"LLM sync call failed: {e}")
100
+ raise