kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
dolphin/core/llm/llm.py
ADDED
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
from abc import abstractmethod
|
|
2
|
+
import json
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
from dolphin.core.common.exceptions import ModelException
|
|
5
|
+
from dolphin.core import flags
|
|
6
|
+
import aiohttp
|
|
7
|
+
from openai import AsyncOpenAI
|
|
8
|
+
|
|
9
|
+
from dolphin.core.common.enums import MessageRole, Messages
|
|
10
|
+
from dolphin.core.config.global_config import LLMInstanceConfig
|
|
11
|
+
from dolphin.core.common.constants import (
|
|
12
|
+
MSG_CONTINUOUS_CONTENT,
|
|
13
|
+
TOOL_CALL_ID_PREFIX,
|
|
14
|
+
is_msg_duplicate_skill_call,
|
|
15
|
+
)
|
|
16
|
+
from dolphin.core.context.context import Context
|
|
17
|
+
from dolphin.core.logging.logger import get_logger
|
|
18
|
+
from dolphin.core.llm.message_sanitizer import sanitize_and_log
|
|
19
|
+
|
|
20
|
+
logger = get_logger("llm")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ToolCallsParser:
|
|
24
|
+
"""Helper class for parsing tool calls from LLM streaming responses.
|
|
25
|
+
|
|
26
|
+
This class consolidates the tool_calls parsing logic used by both
|
|
27
|
+
LLMModelFactory (raw HTTP) and LLMOpenai (SDK) implementations.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self.tool_calls_data: dict = {}
|
|
32
|
+
self.func_name: str | None = None
|
|
33
|
+
self.func_args: list = []
|
|
34
|
+
|
|
35
|
+
def parse_delta_dict(self, delta: dict, tool_calls_key: str = "tool_calls"):
|
|
36
|
+
"""Parse tool_calls from a dict delta (used by LLMModelFactory).
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
delta: The delta dict from streaming response
|
|
40
|
+
tool_calls_key: Key name for tool_calls in delta
|
|
41
|
+
"""
|
|
42
|
+
if tool_calls_key not in delta or not delta[tool_calls_key]:
|
|
43
|
+
return
|
|
44
|
+
|
|
45
|
+
for tool_call in delta[tool_calls_key]:
|
|
46
|
+
index = self._normalize_index(tool_call.get("index", 0))
|
|
47
|
+
|
|
48
|
+
if index not in self.tool_calls_data:
|
|
49
|
+
self.tool_calls_data[index] = {"id": None, "name": None, "arguments": []}
|
|
50
|
+
|
|
51
|
+
# Preserve tool_call_id from LLM
|
|
52
|
+
if tool_call.get("id"):
|
|
53
|
+
self.tool_calls_data[index]["id"] = tool_call["id"]
|
|
54
|
+
|
|
55
|
+
if "function" in tool_call:
|
|
56
|
+
if tool_call["function"].get("name"):
|
|
57
|
+
self.tool_calls_data[index]["name"] = tool_call["function"]["name"]
|
|
58
|
+
if tool_call["function"].get("arguments"):
|
|
59
|
+
self.tool_calls_data[index]["arguments"].append(
|
|
60
|
+
tool_call["function"]["arguments"]
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Legacy single tool call: update from index 0 for backward compat
|
|
64
|
+
if index == 0:
|
|
65
|
+
self._update_legacy_fields(tool_call)
|
|
66
|
+
|
|
67
|
+
def parse_delta_object(self, delta):
|
|
68
|
+
"""Parse tool_calls from an OpenAI SDK delta object (used by LLMOpenai).
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
delta: The delta object from OpenAI SDK streaming response
|
|
72
|
+
"""
|
|
73
|
+
if not hasattr(delta, "tool_calls") or delta.tool_calls is None:
|
|
74
|
+
return
|
|
75
|
+
|
|
76
|
+
for tool_call in delta.tool_calls:
|
|
77
|
+
# Get index from OpenAI SDK object
|
|
78
|
+
index = getattr(tool_call, "index", 0) or 0
|
|
79
|
+
|
|
80
|
+
if index not in self.tool_calls_data:
|
|
81
|
+
self.tool_calls_data[index] = {"id": None, "name": None, "arguments": []}
|
|
82
|
+
|
|
83
|
+
# Preserve tool_call_id from LLM
|
|
84
|
+
if getattr(tool_call, "id", None):
|
|
85
|
+
self.tool_calls_data[index]["id"] = tool_call.id
|
|
86
|
+
|
|
87
|
+
if hasattr(tool_call, "function") and tool_call.function is not None:
|
|
88
|
+
if tool_call.function.name is not None:
|
|
89
|
+
self.tool_calls_data[index]["name"] = tool_call.function.name
|
|
90
|
+
if tool_call.function.arguments is not None:
|
|
91
|
+
self.tool_calls_data[index]["arguments"].append(tool_call.function.arguments)
|
|
92
|
+
|
|
93
|
+
# Legacy single tool call: update from index 0 for backward compat
|
|
94
|
+
if index == 0:
|
|
95
|
+
self._update_legacy_fields_from_object(tool_call)
|
|
96
|
+
|
|
97
|
+
def _normalize_index(self, raw_index) -> int:
|
|
98
|
+
"""Normalize index to integer, defaulting to 0 on error."""
|
|
99
|
+
try:
|
|
100
|
+
return int(raw_index)
|
|
101
|
+
except (ValueError, TypeError):
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
def _update_legacy_fields(self, tool_call: dict):
|
|
105
|
+
"""Update legacy single tool call fields from dict."""
|
|
106
|
+
if "function" in tool_call:
|
|
107
|
+
if tool_call["function"].get("name"):
|
|
108
|
+
self.func_name = tool_call["function"]["name"]
|
|
109
|
+
if tool_call["function"].get("arguments"):
|
|
110
|
+
self.func_args.append(tool_call["function"]["arguments"])
|
|
111
|
+
|
|
112
|
+
def _update_legacy_fields_from_object(self, tool_call):
|
|
113
|
+
"""Update legacy single tool call fields from SDK object."""
|
|
114
|
+
if hasattr(tool_call, "function") and tool_call.function is not None:
|
|
115
|
+
if tool_call.function.name is not None:
|
|
116
|
+
self.func_name = tool_call.function.name
|
|
117
|
+
if tool_call.function.arguments is not None:
|
|
118
|
+
self.func_args.append(tool_call.function.arguments)
|
|
119
|
+
|
|
120
|
+
def get_result(self) -> dict:
|
|
121
|
+
"""Get the parsed result as a dict to merge into the response."""
|
|
122
|
+
result = {}
|
|
123
|
+
if self.func_name:
|
|
124
|
+
result["func_name"] = self.func_name
|
|
125
|
+
if self.func_args:
|
|
126
|
+
result["func_args"] = self.func_args
|
|
127
|
+
if self.tool_calls_data:
|
|
128
|
+
result["tool_calls_data"] = self.tool_calls_data
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class LLM:
|
|
133
|
+
def __init__(self, context: Context):
|
|
134
|
+
self.context = context
|
|
135
|
+
|
|
136
|
+
@abstractmethod
|
|
137
|
+
async def chat(
|
|
138
|
+
self,
|
|
139
|
+
llm_instance_config: LLMInstanceConfig,
|
|
140
|
+
messages: Messages,
|
|
141
|
+
continous_content: Optional[str] = None,
|
|
142
|
+
temperature: Optional[float] = None,
|
|
143
|
+
no_cache: bool = False,
|
|
144
|
+
**kwargs,
|
|
145
|
+
):
|
|
146
|
+
pass
|
|
147
|
+
|
|
148
|
+
async def update_usage(self, final_chunk):
|
|
149
|
+
await self.context.update_usage(final_chunk)
|
|
150
|
+
|
|
151
|
+
def set_messages(self, messages: Messages, continous_content: Optional[str] = None):
|
|
152
|
+
if continous_content:
|
|
153
|
+
to_be_added = (
|
|
154
|
+
MSG_CONTINUOUS_CONTENT
|
|
155
|
+
if is_msg_duplicate_skill_call(continous_content)
|
|
156
|
+
else ""
|
|
157
|
+
)
|
|
158
|
+
if messages[-1].role == MessageRole.ASSISTANT:
|
|
159
|
+
messages[-1].content += continous_content + to_be_added
|
|
160
|
+
messages[-1].metadata["prefix"] = True
|
|
161
|
+
else:
|
|
162
|
+
messages.append_message(
|
|
163
|
+
MessageRole.ASSISTANT,
|
|
164
|
+
continous_content + to_be_added,
|
|
165
|
+
metadata={"prefix": True},
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
self.context.set_messages(messages)
|
|
169
|
+
|
|
170
|
+
def set_cache(self, llm: str, cache_key: Messages, cache_value: Any):
|
|
171
|
+
self.context.get_config().set_llm_cache(llm, cache_key, cache_value)
|
|
172
|
+
|
|
173
|
+
def get_cache(self, llm: str, cache_key: Messages):
|
|
174
|
+
return self.context.get_config().get_llm_cache(llm, cache_key)
|
|
175
|
+
|
|
176
|
+
def set_cache_by_dict(self, llm: str, cache_key: list, cache_value: Any):
|
|
177
|
+
"""Set cache using dict list as key (for sanitized messages)."""
|
|
178
|
+
self.context.get_config().set_llm_cache_by_dict(llm, cache_key, cache_value)
|
|
179
|
+
|
|
180
|
+
def get_cache_by_dict(self, llm: str, cache_key: list):
|
|
181
|
+
"""Get cache using dict list as key (for sanitized messages)."""
|
|
182
|
+
return self.context.get_config().get_llm_cache_by_dict(llm, cache_key)
|
|
183
|
+
|
|
184
|
+
def log_request(self, messages: Messages, continous_content: Optional[str] = None):
|
|
185
|
+
self.context.debug(
|
|
186
|
+
"LLM chat messages[{}] length[{}] continous_content[{}]".format(
|
|
187
|
+
messages.str_summary(),
|
|
188
|
+
messages.length(),
|
|
189
|
+
continous_content.replace("\n", "\\n") if continous_content else "",
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class LLMModelFactory(LLM):
|
|
195
|
+
def __init__(self, context: Context):
|
|
196
|
+
super().__init__(context)
|
|
197
|
+
|
|
198
|
+
async def chat(
|
|
199
|
+
self,
|
|
200
|
+
llm_instance_config: LLMInstanceConfig,
|
|
201
|
+
messages: Messages,
|
|
202
|
+
continous_content: Optional[str] = None,
|
|
203
|
+
temperature: Optional[float] = None,
|
|
204
|
+
no_cache: bool = False,
|
|
205
|
+
**kwargs,
|
|
206
|
+
):
|
|
207
|
+
self.log_request(messages, continous_content)
|
|
208
|
+
|
|
209
|
+
self.set_messages(messages, continous_content)
|
|
210
|
+
|
|
211
|
+
# Sanitize messages BEFORE cache check to ensure consistent cache keys
|
|
212
|
+
sanitized_messages = sanitize_and_log(
|
|
213
|
+
messages.get_messages_as_dict(), logger.warning
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
if not no_cache and not flags.is_enabled(flags.DISABLE_LLM_CACHE):
|
|
217
|
+
# Use sanitized messages for cache key to ensure consistency
|
|
218
|
+
cache_value = self.get_cache_by_dict(llm_instance_config.model_name, sanitized_messages)
|
|
219
|
+
if cache_value is not None:
|
|
220
|
+
yield cache_value
|
|
221
|
+
return
|
|
222
|
+
try:
|
|
223
|
+
# Reuse sanitized messages from cache key generation (no need to sanitize again)
|
|
224
|
+
|
|
225
|
+
# Build request payload
|
|
226
|
+
payload = {
|
|
227
|
+
"model": llm_instance_config.model_name,
|
|
228
|
+
"temperature": (
|
|
229
|
+
temperature
|
|
230
|
+
if temperature is not None
|
|
231
|
+
else llm_instance_config.temperature
|
|
232
|
+
),
|
|
233
|
+
"top_p": llm_instance_config.top_p,
|
|
234
|
+
"top_k": llm_instance_config.top_k,
|
|
235
|
+
"messages": sanitized_messages,
|
|
236
|
+
"max_tokens": llm_instance_config.max_tokens,
|
|
237
|
+
"stream": True,
|
|
238
|
+
}
|
|
239
|
+
# If there is a tools parameter, add it to the API call, and support custom tool_choice.
|
|
240
|
+
if "tools" in kwargs and kwargs["tools"]:
|
|
241
|
+
payload["tools"] = kwargs["tools"]
|
|
242
|
+
# Support tool_choice: auto|none|required or provider-specific
|
|
243
|
+
tool_choice = kwargs.get("tool_choice")
|
|
244
|
+
payload["tool_choice"] = tool_choice if tool_choice else "auto"
|
|
245
|
+
|
|
246
|
+
line_json = ""
|
|
247
|
+
accu_content = ""
|
|
248
|
+
reasoning_content = ""
|
|
249
|
+
finish_reason = None
|
|
250
|
+
# Use ToolCallsParser to handle tool calls parsing
|
|
251
|
+
tool_parser = ToolCallsParser()
|
|
252
|
+
|
|
253
|
+
timeout = aiohttp.ClientTimeout(
|
|
254
|
+
total=1800, # Disable overall timeout (use with caution)
|
|
255
|
+
sock_connect=30, # Keep connection timeout
|
|
256
|
+
# sock_read=60 # Timeout for single read (for slow streaming data)
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# Extract valid key-value pairs from the input headers (excluding those with None values).
|
|
260
|
+
# This is because aiohttp request headers (headers) must comply with standard HTTP
|
|
261
|
+
# protocol requirements. If headers contain None values, calling aiohttp.ClientSession.post()
|
|
262
|
+
# will raise an error.
|
|
263
|
+
req_headers = {
|
|
264
|
+
key: value
|
|
265
|
+
for key, value in llm_instance_config.headers.items()
|
|
266
|
+
if value is not None
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
270
|
+
async with session.post(
|
|
271
|
+
llm_instance_config.api,
|
|
272
|
+
json=payload,
|
|
273
|
+
headers=req_headers,
|
|
274
|
+
ssl=False,
|
|
275
|
+
) as response:
|
|
276
|
+
if not response.ok:
|
|
277
|
+
try:
|
|
278
|
+
content = await response.content.read()
|
|
279
|
+
json_content = json.loads(content)
|
|
280
|
+
raise ModelException(
|
|
281
|
+
code=json_content.get("code"),
|
|
282
|
+
message=json_content.get(
|
|
283
|
+
"description", content.decode(errors="ignore")
|
|
284
|
+
),
|
|
285
|
+
)
|
|
286
|
+
except ModelException as e:
|
|
287
|
+
raise e
|
|
288
|
+
except Exception:
|
|
289
|
+
raise ModelException(
|
|
290
|
+
f"LLM {llm_instance_config.model_name} call error: {response.text}"
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
result = None
|
|
294
|
+
async for line in response.content:
|
|
295
|
+
if not line.startswith(b"data"):
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
try:
|
|
299
|
+
line_decoded = line.decode().split("data:")[1]
|
|
300
|
+
if "[DONE]" in line_decoded:
|
|
301
|
+
break
|
|
302
|
+
line_json = json.loads(line_decoded, strict=False)
|
|
303
|
+
if "choices" not in line_json:
|
|
304
|
+
raise Exception(
|
|
305
|
+
f"-----------------{line_json}---------------------------"
|
|
306
|
+
)
|
|
307
|
+
else:
|
|
308
|
+
if len(line_json["choices"]) > 0:
|
|
309
|
+
# Accumulate content
|
|
310
|
+
delta_content = (
|
|
311
|
+
line_json["choices"][0]["delta"].get("content")
|
|
312
|
+
or ""
|
|
313
|
+
)
|
|
314
|
+
delta_reasoning = (
|
|
315
|
+
line_json["choices"][0]["delta"].get(
|
|
316
|
+
"reasoning_content"
|
|
317
|
+
)
|
|
318
|
+
or ""
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
accu_content += delta_content
|
|
322
|
+
reasoning_content += delta_reasoning
|
|
323
|
+
|
|
324
|
+
# Capture finish_reason
|
|
325
|
+
chunk_finish_reason = line_json["choices"][0].get("finish_reason")
|
|
326
|
+
if chunk_finish_reason:
|
|
327
|
+
finish_reason = chunk_finish_reason
|
|
328
|
+
|
|
329
|
+
# Parse tool_calls using ToolCallsParser
|
|
330
|
+
delta = line_json["choices"][0]["delta"]
|
|
331
|
+
tool_parser.parse_delta_dict(delta)
|
|
332
|
+
|
|
333
|
+
if line_json.get("usage") or line_json["choices"][
|
|
334
|
+
0
|
|
335
|
+
].get("usage"):
|
|
336
|
+
await self.update_usage(line_json)
|
|
337
|
+
|
|
338
|
+
result = {
|
|
339
|
+
"content": accu_content,
|
|
340
|
+
"reasoning_content": reasoning_content,
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
# Add token usage information
|
|
344
|
+
# {"completion_tokens": 26, "prompt_tokens": 159, "total_tokens": 185, "prompt_tokens_details": {"cached_tokens": 0, "uncached_tokens": 159}, "completion_tokens_details": {"reasoning_tokens": 0}}
|
|
345
|
+
result["usage"] = line_json.get("usage", {})
|
|
346
|
+
|
|
347
|
+
# Add tool call information using ToolCallsParser
|
|
348
|
+
result.update(tool_parser.get_result())
|
|
349
|
+
|
|
350
|
+
# Add finish_reason for downstream tool call validation
|
|
351
|
+
if finish_reason:
|
|
352
|
+
result["finish_reason"] = finish_reason
|
|
353
|
+
|
|
354
|
+
yield result
|
|
355
|
+
except Exception as e:
|
|
356
|
+
raise Exception(
|
|
357
|
+
f"LLM {llm_instance_config.model_name} decode error: {repr(e)} content:\n{line}"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
if result:
|
|
361
|
+
# Use sanitized messages for cache key to ensure consistency
|
|
362
|
+
self.set_cache_by_dict(
|
|
363
|
+
llm_instance_config.model_name,
|
|
364
|
+
sanitized_messages,
|
|
365
|
+
result
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
if "choices" in line_json:
|
|
369
|
+
await self.update_usage(line_json)
|
|
370
|
+
|
|
371
|
+
except ModelException as e:
|
|
372
|
+
raise e
|
|
373
|
+
except Exception as e:
|
|
374
|
+
raise e
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class LLMOpenai(LLM):
|
|
378
|
+
def __init__(self, context: Context):
|
|
379
|
+
super().__init__(context)
|
|
380
|
+
|
|
381
|
+
async def chat(
|
|
382
|
+
self,
|
|
383
|
+
llm_instance_config: LLMInstanceConfig,
|
|
384
|
+
messages: Messages,
|
|
385
|
+
continous_content: Optional[str] = None,
|
|
386
|
+
temperature: Optional[float] = None,
|
|
387
|
+
no_cache: bool = False,
|
|
388
|
+
**kwargs,
|
|
389
|
+
):
|
|
390
|
+
self.log_request(messages, continous_content)
|
|
391
|
+
|
|
392
|
+
# Verify whether the API key exists and is not empty
|
|
393
|
+
if not llm_instance_config.api_key:
|
|
394
|
+
llm_instance_config.set_api_key("dummy_api_key")
|
|
395
|
+
|
|
396
|
+
# For OpenAI-compatible APIs, ensure that base_url does not contain the full path
|
|
397
|
+
# AsyncOpenAI will automatically add paths such as /chat/completions
|
|
398
|
+
api_url = llm_instance_config.api
|
|
399
|
+
if api_url.endswith("/chat/completions"):
|
|
400
|
+
base_url = api_url.replace("/chat/completions", "")
|
|
401
|
+
elif api_url.endswith("/v1/chat/completions"):
|
|
402
|
+
base_url = api_url.replace("/v1/chat/completions", "/v1")
|
|
403
|
+
else:
|
|
404
|
+
# If the URL format does not match expectations, keep it as is, but it may cause errors.
|
|
405
|
+
base_url = api_url
|
|
406
|
+
|
|
407
|
+
client = AsyncOpenAI(
|
|
408
|
+
base_url=base_url,
|
|
409
|
+
api_key=llm_instance_config.api_key,
|
|
410
|
+
default_headers=llm_instance_config.headers,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
self.set_messages(messages, continous_content)
|
|
414
|
+
|
|
415
|
+
# Sanitize messages BEFORE cache check to ensure consistent cache keys
|
|
416
|
+
sanitized_messages = sanitize_and_log(
|
|
417
|
+
messages.get_messages_as_dict(), logger.warning
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
if not no_cache and not flags.is_enabled(flags.DISABLE_LLM_CACHE):
|
|
421
|
+
# Use sanitized messages for cache key to ensure consistency
|
|
422
|
+
cache_value = self.get_cache_by_dict(llm_instance_config.model_name, sanitized_messages)
|
|
423
|
+
if cache_value is not None:
|
|
424
|
+
yield cache_value
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
# Reuse sanitized messages from cache key generation (no need to sanitize again)
|
|
428
|
+
|
|
429
|
+
# Prepare API call parameters
|
|
430
|
+
api_params = {
|
|
431
|
+
"model": llm_instance_config.model_name,
|
|
432
|
+
"messages": sanitized_messages,
|
|
433
|
+
"stream": True,
|
|
434
|
+
"max_tokens": llm_instance_config.max_tokens,
|
|
435
|
+
"temperature": temperature,
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
# If there is a tools parameter, add it to the API call, and support custom tool_choice.
|
|
439
|
+
if "tools" in kwargs and kwargs["tools"]:
|
|
440
|
+
api_params["tools"] = kwargs["tools"]
|
|
441
|
+
tool_choice = kwargs.get("tool_choice")
|
|
442
|
+
# When tool_choice is provided, inherit it; otherwise, default to auto
|
|
443
|
+
api_params["tool_choice"] = tool_choice if tool_choice else "auto"
|
|
444
|
+
|
|
445
|
+
response = await client.chat.completions.create(**api_params)
|
|
446
|
+
|
|
447
|
+
accu_answer = ""
|
|
448
|
+
accu_reasoning = ""
|
|
449
|
+
finish_reason = None
|
|
450
|
+
result = None
|
|
451
|
+
# Use ToolCallsParser to handle tool calls parsing
|
|
452
|
+
tool_parser = ToolCallsParser()
|
|
453
|
+
|
|
454
|
+
async for chunk in response:
|
|
455
|
+
delta = chunk.choices[0].delta
|
|
456
|
+
if hasattr(delta, "content") and delta.content is not None:
|
|
457
|
+
accu_answer += delta.content
|
|
458
|
+
|
|
459
|
+
if (
|
|
460
|
+
hasattr(delta, "reasoning_content")
|
|
461
|
+
and delta.reasoning_content is not None
|
|
462
|
+
):
|
|
463
|
+
accu_reasoning += delta.reasoning_content
|
|
464
|
+
|
|
465
|
+
# Capture finish_reason
|
|
466
|
+
chunk_finish_reason = chunk.choices[0].finish_reason
|
|
467
|
+
if chunk_finish_reason:
|
|
468
|
+
finish_reason = chunk_finish_reason
|
|
469
|
+
|
|
470
|
+
# Parse tool_calls using ToolCallsParser
|
|
471
|
+
tool_parser.parse_delta_object(delta)
|
|
472
|
+
|
|
473
|
+
await self.update_usage(chunk)
|
|
474
|
+
|
|
475
|
+
result = {
|
|
476
|
+
"content": accu_answer,
|
|
477
|
+
"reasoning_content": accu_reasoning,
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
# Add tool call information using ToolCallsParser
|
|
481
|
+
result.update(tool_parser.get_result())
|
|
482
|
+
|
|
483
|
+
# Add finish_reason for downstream tool call validation
|
|
484
|
+
if finish_reason:
|
|
485
|
+
result["finish_reason"] = finish_reason
|
|
486
|
+
|
|
487
|
+
yield result
|
|
488
|
+
|
|
489
|
+
if result:
|
|
490
|
+
# Use sanitized messages for cache key to ensure consistency
|
|
491
|
+
self.set_cache_by_dict(
|
|
492
|
+
llm_instance_config.model_name,
|
|
493
|
+
sanitized_messages,
|
|
494
|
+
result
|
|
495
|
+
)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
import time
|
|
3
|
+
import traceback
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from dolphin.core.common.enums import MessageRole, Messages
|
|
7
|
+
from dolphin.core.logging.logger import get_logger
|
|
8
|
+
|
|
9
|
+
logger = get_logger("llm")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class LLMCall(abc.ABC):
|
|
13
|
+
"""Abstract base class for LLM operations in the memory system."""
|
|
14
|
+
|
|
15
|
+
def __init__(self, llm_client, memory_config):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the LLM call.
|
|
18
|
+
|
|
19
|
+
:param llm_client: LLM client instance
|
|
20
|
+
:param config: Memory configuration
|
|
21
|
+
"""
|
|
22
|
+
self.llm_client = llm_client
|
|
23
|
+
self.config = memory_config
|
|
24
|
+
self.model = self.llm_client.config.get_fast_model_config().name
|
|
25
|
+
|
|
26
|
+
def execute(self, llm_args: dict, **kwargs) -> Any:
|
|
27
|
+
"""Execute knowledge merging for a list of knowledge points."""
|
|
28
|
+
try:
|
|
29
|
+
start_time = time.time()
|
|
30
|
+
prompt = self._build_prompt(**kwargs)
|
|
31
|
+
if prompt:
|
|
32
|
+
llm_output = self._call_llm_with_retry(prompt, **llm_args)
|
|
33
|
+
result = self._post_process(llm_output, **kwargs)
|
|
34
|
+
else:
|
|
35
|
+
result = self._no_merge_result(**kwargs)
|
|
36
|
+
|
|
37
|
+
end_time = time.time()
|
|
38
|
+
self._log(end_time - start_time, **kwargs)
|
|
39
|
+
return result
|
|
40
|
+
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.error(f"llm_call execution failed: {e}")
|
|
43
|
+
raise
|
|
44
|
+
|
|
45
|
+
@abc.abstractmethod
|
|
46
|
+
def _log(self, time_cost: float, **kwargs) -> str:
|
|
47
|
+
"""Log the execution result."""
|
|
48
|
+
raise NotImplementedError
|
|
49
|
+
|
|
50
|
+
@abc.abstractmethod
|
|
51
|
+
def _no_merge_result(self, **kwargs) -> Any:
|
|
52
|
+
"""No merge result."""
|
|
53
|
+
raise NotImplementedError
|
|
54
|
+
|
|
55
|
+
@abc.abstractmethod
|
|
56
|
+
def _build_prompt(self, **kwargs) -> str:
|
|
57
|
+
"""Build the prompt for the LLM call."""
|
|
58
|
+
raise NotImplementedError
|
|
59
|
+
|
|
60
|
+
@abc.abstractmethod
|
|
61
|
+
def _post_process(self, llm_output: str, **kwargs) -> Any:
|
|
62
|
+
"""Post-process the LLM output."""
|
|
63
|
+
raise NotImplementedError
|
|
64
|
+
|
|
65
|
+
def _call_llm_with_retry(self, prompt: str, **kwargs) -> str:
|
|
66
|
+
"""Call LLM with retry logic."""
|
|
67
|
+
max_retries = getattr(self.config, "max_extraction_retries", 2)
|
|
68
|
+
|
|
69
|
+
for attempt in range(max_retries):
|
|
70
|
+
try:
|
|
71
|
+
# Use asyncio to call the async LLM client
|
|
72
|
+
return self._call_llm_sync(prompt, **kwargs)
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.warning(
|
|
75
|
+
f"LLM call attempt {attempt + 1} failed: {e} traceback: {traceback.format_exc()}"
|
|
76
|
+
)
|
|
77
|
+
if attempt == max_retries - 1:
|
|
78
|
+
raise Exception(
|
|
79
|
+
f"LLM call failed after {max_retries} attempts: {e}"
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def _call_llm_sync(self, prompt: str, **kwargs) -> str:
|
|
83
|
+
"""Sync LLM call implementation."""
|
|
84
|
+
try:
|
|
85
|
+
# Prepare messages for LLM client
|
|
86
|
+
messages = Messages()
|
|
87
|
+
messages.append_message(MessageRole.USER, prompt)
|
|
88
|
+
|
|
89
|
+
response = self.llm_client.mf_chat(
|
|
90
|
+
messages=messages,
|
|
91
|
+
model=self.model, # Use default model
|
|
92
|
+
temperature=0.1, # Low temperature for consistent extraction
|
|
93
|
+
no_cache=True, # Memory extraction should not use cache
|
|
94
|
+
**kwargs,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return response
|
|
98
|
+
except Exception as e:
|
|
99
|
+
logger.error(f"LLM sync call failed: {e}")
|
|
100
|
+
raise
|