kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""Tokenizer service for unified tokenization and length estimation.
|
|
2
|
+
|
|
3
|
+
Unified tokenizer service for tokenization and length estimation.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
from typing import Dict, Union, List
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseTokenizer(ABC):
|
|
12
|
+
"""Abstract base class for tokenizers.
|
|
13
|
+
|
|
14
|
+
Abstract base class for tokenizers, defining the interface that tokenizers should implement.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def count_tokens(self, text: str) -> int:
|
|
19
|
+
"""Count tokens in the given text.
|
|
20
|
+
|
|
21
|
+
Count the number of tokens in the given text.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
text (str): The text to count tokens for
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
int: The number of tokens in the text
|
|
28
|
+
"""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def estimate_tokens(self, text: str) -> int:
|
|
33
|
+
"""Estimate token count (faster but less accurate).
|
|
34
|
+
|
|
35
|
+
Estimate the number of tokens (faster but less accurate).
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
text (str): The text to estimate the token count for
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
int: The estimated token count
|
|
42
|
+
"""
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SimpleTokenizer(BaseTokenizer):
|
|
47
|
+
"""Simple tokenizer using regex-based word splitting.
|
|
48
|
+
|
|
49
|
+
Simple tokenizer using regex-based word splitting.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, avg_chars_per_token: float = 4.0):
|
|
53
|
+
"""Initialize a simple tokenizer.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
avg_chars_per_token (float): Average number of characters per token, default is 4.0
|
|
57
|
+
"""
|
|
58
|
+
self.avg_chars_per_token = avg_chars_per_token
|
|
59
|
+
self.word_pattern = re.compile(r"\w+|[^\w\s]")
|
|
60
|
+
|
|
61
|
+
def count_tokens(self, text: str) -> int:
|
|
62
|
+
"""Count tokens using word-based splitting.
|
|
63
|
+
|
|
64
|
+
Count tokens using word-based splitting.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
text (str): The text to count tokens for
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
int: The number of tokens in the text
|
|
71
|
+
"""
|
|
72
|
+
if not text:
|
|
73
|
+
return 0
|
|
74
|
+
words = self.word_pattern.findall(text)
|
|
75
|
+
return len(words)
|
|
76
|
+
|
|
77
|
+
def estimate_tokens(self, text: str) -> int:
|
|
78
|
+
"""Estimate token count based on character length.
|
|
79
|
+
|
|
80
|
+
Estimate the number of tokens based on character length.
|
|
81
|
+
|
|
82
|
+
Args:
|
|
83
|
+
text (str): The text to estimate token count for
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
int: Estimated token count
|
|
87
|
+
"""
|
|
88
|
+
if not text:
|
|
89
|
+
return 0
|
|
90
|
+
return int(len(text) / self.avg_chars_per_token)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class TiktokenTokenizer(BaseTokenizer):
|
|
94
|
+
"""Tokenizer using tiktoken (if available).
|
|
95
|
+
|
|
96
|
+
Tokenizer using tiktoken (if available).
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(self, encoding_name: str = "cl100k_base"):
|
|
100
|
+
"""Initialize the tiktoken tokenizer.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
encoding_name (str): Encoding name, default is "cl100k_base"
|
|
104
|
+
"""
|
|
105
|
+
try:
|
|
106
|
+
import tiktoken
|
|
107
|
+
|
|
108
|
+
# Try to get the encoding, which may trigger a network request to download the encoding file
|
|
109
|
+
try:
|
|
110
|
+
self.encoding = tiktoken.get_encoding(encoding_name)
|
|
111
|
+
self.available = True
|
|
112
|
+
except (OSError, ConnectionError, TimeoutError, Exception):
|
|
113
|
+
# Catch network connection errors and other exceptions, mark as unavailable
|
|
114
|
+
self.available = False
|
|
115
|
+
self.fallback = SimpleTokenizer()
|
|
116
|
+
except ImportError:
|
|
117
|
+
self.available = False
|
|
118
|
+
self.fallback = SimpleTokenizer()
|
|
119
|
+
|
|
120
|
+
def count_tokens(self, text: str) -> int:
|
|
121
|
+
"""Count tokens using tiktoken encoding.
|
|
122
|
+
|
|
123
|
+
Count the number of tokens using tiktoken encoding.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
text (str): The text to count tokens for
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
int: The number of tokens in the text
|
|
130
|
+
"""
|
|
131
|
+
if not self.available:
|
|
132
|
+
return self.fallback.count_tokens(text)
|
|
133
|
+
return len(self.encoding.encode(text))
|
|
134
|
+
|
|
135
|
+
def estimate_tokens(self, text: str) -> int:
|
|
136
|
+
"""Estimate token count (same as count for tiktoken).
|
|
137
|
+
|
|
138
|
+
Estimate the number of tokens (same as count_tokens for tiktoken).
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
text (str): The text to estimate the token count for
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
int: Estimated number of tokens
|
|
145
|
+
"""
|
|
146
|
+
return self.count_tokens(text)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class TokenizerService:
|
|
150
|
+
"""Unified tokenizer service supporting multiple backends.
|
|
151
|
+
|
|
152
|
+
Unified tokenizer service supporting multiple backends.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(self, backend: str = "auto", **kwargs):
|
|
156
|
+
"""Initialize tokenizer service.
|
|
157
|
+
|
|
158
|
+
Initialize the tokenizer service.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
backend: Tokenizer backend ('simple', 'tiktoken', 'auto')
|
|
162
|
+
**kwargs: Additional arguments specific to the backend
|
|
163
|
+
"""
|
|
164
|
+
if backend == "auto":
|
|
165
|
+
# Default to SimpleTokenizer to avoid attempting tiktoken when network is unavailable
|
|
166
|
+
self.tokenizer = SimpleTokenizer(**kwargs)
|
|
167
|
+
elif backend == "tiktoken":
|
|
168
|
+
self.tokenizer = TiktokenTokenizer(**kwargs)
|
|
169
|
+
elif backend == "simple":
|
|
170
|
+
self.tokenizer = SimpleTokenizer(**kwargs)
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError(f"Unknown tokenizer backend: {backend}")
|
|
173
|
+
|
|
174
|
+
def count_tokens(self, text: Union[str, List[str], Dict[str, str]]) -> int:
|
|
175
|
+
"""Count tokens in text.
|
|
176
|
+
|
|
177
|
+
Calculate the number of tokens in text.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
text: string, list of strings, or dictionary of strings
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
Total number of tokens
|
|
184
|
+
"""
|
|
185
|
+
if isinstance(text, str):
|
|
186
|
+
return self.tokenizer.count_tokens(text)
|
|
187
|
+
elif isinstance(text, list):
|
|
188
|
+
return sum(self.tokenizer.count_tokens(item) for item in text)
|
|
189
|
+
elif isinstance(text, dict):
|
|
190
|
+
total = 0
|
|
191
|
+
for key, value in text.items():
|
|
192
|
+
total += self.tokenizer.count_tokens(str(key))
|
|
193
|
+
total += self.tokenizer.count_tokens(str(value))
|
|
194
|
+
return total
|
|
195
|
+
else:
|
|
196
|
+
return self.tokenizer.count_tokens(str(text))
|
|
197
|
+
|
|
198
|
+
def estimate_tokens(self, text: Union[str, List[str], Dict[str, str]]) -> int:
|
|
199
|
+
"""Estimate tokens in text (faster but less accurate).
|
|
200
|
+
|
|
201
|
+
Estimate the number of tokens in text (faster but less accurate).
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
text: string, list of strings, or dictionary of strings
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
Estimated number of tokens
|
|
208
|
+
"""
|
|
209
|
+
if isinstance(text, str):
|
|
210
|
+
return self.tokenizer.estimate_tokens(text)
|
|
211
|
+
elif isinstance(text, list):
|
|
212
|
+
return sum(self.tokenizer.estimate_tokens(item) for item in text)
|
|
213
|
+
elif isinstance(text, dict):
|
|
214
|
+
total = 0
|
|
215
|
+
for key, value in text.items():
|
|
216
|
+
total += self.tokenizer.estimate_tokens(str(key))
|
|
217
|
+
total += self.tokenizer.estimate_tokens(str(value))
|
|
218
|
+
return total
|
|
219
|
+
else:
|
|
220
|
+
return self.tokenizer.estimate_tokens(str(text))
|
|
221
|
+
|
|
222
|
+
def count_tokens_with_breakdown(self, text: Dict[str, str]) -> Dict[str, int]:
|
|
223
|
+
"""Count tokens for each section in a dictionary.
|
|
224
|
+
|
|
225
|
+
Count the number of tokens for each part in a dictionary.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
text: Dictionary of text parts
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
Dictionary of token counts for each part
|
|
232
|
+
"""
|
|
233
|
+
breakdown = {}
|
|
234
|
+
total = 0
|
|
235
|
+
|
|
236
|
+
for key, value in text.items():
|
|
237
|
+
count = self.count_tokens(value)
|
|
238
|
+
breakdown[key] = count
|
|
239
|
+
total += count
|
|
240
|
+
|
|
241
|
+
breakdown["total"] = total
|
|
242
|
+
return breakdown
|
|
243
|
+
|
|
244
|
+
def get_tokenizer_info(self) -> Dict[str, Union[str, bool]]:
|
|
245
|
+
"""Get information about the current tokenizer.
|
|
246
|
+
|
|
247
|
+
Get information about the current tokenizer.
|
|
248
|
+
|
|
249
|
+
Returns:
|
|
250
|
+
A dictionary containing tokenizer information
|
|
251
|
+
"""
|
|
252
|
+
info = {"backend": type(self.tokenizer).__name__, "available": True}
|
|
253
|
+
|
|
254
|
+
# Check if it is a TiktokenTokenizer and has an available attribute
|
|
255
|
+
if isinstance(self.tokenizer, TiktokenTokenizer):
|
|
256
|
+
info["available"] = self.tokenizer.available
|
|
257
|
+
if self.tokenizer.available and hasattr(self.tokenizer, "encoding"):
|
|
258
|
+
info["encoding_name"] = self.tokenizer.encoding.name
|
|
259
|
+
|
|
260
|
+
return info
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""ContextManager Usage Example
|
|
2
|
+
|
|
3
|
+
Demonstrates how to dynamically manage context buckets, supporting incremental updates and controllable compression.
|
|
4
|
+
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
from dolphin.core.common.enums import Messages, MessageRole
|
|
10
|
+
from tabulate import tabulate
|
|
11
|
+
|
|
12
|
+
# Add item path
|
|
13
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
|
|
14
|
+
|
|
15
|
+
from dolphin.core.context_engineer.core.context_manager import (
|
|
16
|
+
ContextManager,
|
|
17
|
+
)
|
|
18
|
+
from dolphin.core.context_engineer.config.settings import (
|
|
19
|
+
ContextConfig,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_context_config() -> ContextConfig:
|
|
24
|
+
return ContextConfig.from_dict(
|
|
25
|
+
{
|
|
26
|
+
"model": {
|
|
27
|
+
"name": "gpt-4",
|
|
28
|
+
"context_limit": 100,
|
|
29
|
+
"output_target": 10,
|
|
30
|
+
},
|
|
31
|
+
"buckets": {
|
|
32
|
+
"system": {
|
|
33
|
+
"name": "system",
|
|
34
|
+
"min_tokens": 10,
|
|
35
|
+
"max_tokens": 30,
|
|
36
|
+
"weight": 2.0,
|
|
37
|
+
"message_role": "system",
|
|
38
|
+
},
|
|
39
|
+
"task": {
|
|
40
|
+
"name": "task",
|
|
41
|
+
"min_tokens": 5,
|
|
42
|
+
"max_tokens": 30,
|
|
43
|
+
"weight": 2.0,
|
|
44
|
+
"message_role": "user",
|
|
45
|
+
},
|
|
46
|
+
"history": {
|
|
47
|
+
"name": "history",
|
|
48
|
+
"min_tokens": 5,
|
|
49
|
+
"max_tokens": 200,
|
|
50
|
+
"weight": 1.0,
|
|
51
|
+
"message_role": "user",
|
|
52
|
+
},
|
|
53
|
+
},
|
|
54
|
+
"policies": {
|
|
55
|
+
"default": {
|
|
56
|
+
"drop_order": [],
|
|
57
|
+
"bucket_order": [
|
|
58
|
+
"system",
|
|
59
|
+
"_query",
|
|
60
|
+
"tool_response",
|
|
61
|
+
"task",
|
|
62
|
+
"history",
|
|
63
|
+
],
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def print_stats_table(stats: dict, title: str = "Token Statistics") -> None:
|
|
71
|
+
"""Print the complete stats information in table format."""
|
|
72
|
+
print(f"\n=== {title} ===")
|
|
73
|
+
|
|
74
|
+
# Print overall statistics
|
|
75
|
+
overview_data = [
|
|
76
|
+
["总token数", stats["total_tokens"]],
|
|
77
|
+
["桶数量", stats["bucket_count"]],
|
|
78
|
+
["需要压缩", stats["compression_needed"]],
|
|
79
|
+
]
|
|
80
|
+
print(tabulate(overview_data, headers=["项目", "值"], tablefmt="grid"))
|
|
81
|
+
|
|
82
|
+
# Print detailed information for each bucket
|
|
83
|
+
if stats["buckets"]:
|
|
84
|
+
print("\n=== 桶详细信息 ===")
|
|
85
|
+
bucket_headers = [
|
|
86
|
+
"桶名称",
|
|
87
|
+
"Tokens",
|
|
88
|
+
"分配Tokens",
|
|
89
|
+
"优先级",
|
|
90
|
+
"压缩状态",
|
|
91
|
+
"利用率",
|
|
92
|
+
"需要压缩",
|
|
93
|
+
]
|
|
94
|
+
bucket_data = []
|
|
95
|
+
for name, info in stats["buckets"].items():
|
|
96
|
+
bucket_data.append(
|
|
97
|
+
[
|
|
98
|
+
name,
|
|
99
|
+
info["tokens"],
|
|
100
|
+
info["allocated"],
|
|
101
|
+
info["priority"],
|
|
102
|
+
"是" if info["is_compressed"] else "否",
|
|
103
|
+
f"{info['utilization']:.2%}",
|
|
104
|
+
"是" if info["needs_compression"] else "否",
|
|
105
|
+
]
|
|
106
|
+
)
|
|
107
|
+
print(tabulate(bucket_data, headers=bucket_headers, tablefmt="grid"))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def main():
|
|
111
|
+
"""Demonstrate the basic usage of ContextManager"""
|
|
112
|
+
|
|
113
|
+
# Create incremental context manager
|
|
114
|
+
context_config = get_context_config()
|
|
115
|
+
manager = ContextManager(context_config=context_config)
|
|
116
|
+
manager.set_layout_policy("default")
|
|
117
|
+
print("=== 初始状态 ===")
|
|
118
|
+
stats = manager.get_token_stats()
|
|
119
|
+
print_stats_table(stats, "初始状态统计")
|
|
120
|
+
|
|
121
|
+
# Add system prompt bucket
|
|
122
|
+
print("\n=== 添加系统提示桶 ===")
|
|
123
|
+
manager.add_bucket(
|
|
124
|
+
bucket_name="system",
|
|
125
|
+
content="你是一个有用的AI助手,请帮助用户解决问题。",
|
|
126
|
+
allocated_tokens=50,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Add task description bucket
|
|
130
|
+
manager.add_bucket(
|
|
131
|
+
bucket_name="task",
|
|
132
|
+
content="用户想要了解如何提高编程技能。",
|
|
133
|
+
allocated_tokens=20,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
stats = manager.get_token_stats()
|
|
137
|
+
print_stats_table(stats, "添加桶后统计")
|
|
138
|
+
|
|
139
|
+
# Update bucket contents (incremental update)
|
|
140
|
+
print("\n=== 增量更新桶内容 ===")
|
|
141
|
+
manager.update_bucket_content(
|
|
142
|
+
"task", "用户想要了解如何提高编程技能 ,特别是Python和机器学习方面的技能。"
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
stats = manager.get_token_stats()
|
|
146
|
+
print_stats_table(stats, "更新内容后统计")
|
|
147
|
+
|
|
148
|
+
# Dynamically add new bucket
|
|
149
|
+
print("\n=== 动态添加对话历史桶 ===")
|
|
150
|
+
history_messages = Messages()
|
|
151
|
+
history_messages.add_message("用户对话历史1")
|
|
152
|
+
history_messages.add_message("用户对话历史2")
|
|
153
|
+
history_messages.add_message("用户对话历史3")
|
|
154
|
+
history_messages.add_message("用户对话历史4")
|
|
155
|
+
|
|
156
|
+
manager.add_bucket(
|
|
157
|
+
"history",
|
|
158
|
+
history_messages,
|
|
159
|
+
# allocated_tokens=10,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
manager.add_bucket(
|
|
163
|
+
bucket_name="tool_response",
|
|
164
|
+
content="这是调用工具的结果的",
|
|
165
|
+
message_role=MessageRole.TOOL,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
stats = manager.get_token_stats()
|
|
169
|
+
print_stats_table(stats, "添加历史桶后统计")
|
|
170
|
+
|
|
171
|
+
# Check if compression is needed
|
|
172
|
+
if manager.needs_compression():
|
|
173
|
+
print("\n=== 检测到需要压缩 ===")
|
|
174
|
+
# Compress all buckets that need to be compressed
|
|
175
|
+
results = manager.compress_all()
|
|
176
|
+
print("压缩结果:", results)
|
|
177
|
+
|
|
178
|
+
print("\n=== 压缩后统计 ===")
|
|
179
|
+
stats = manager.get_token_stats()
|
|
180
|
+
print_stats_table(stats, "添加桶后统计")
|
|
181
|
+
|
|
182
|
+
# Assemble final context
|
|
183
|
+
print("\n=== 组装最终上下文 ===")
|
|
184
|
+
context = manager.assemble_context()
|
|
185
|
+
print(f"上下文桶顺序: {context['bucket_order']}")
|
|
186
|
+
print(f"布局策略: {context['layout_policy']}")
|
|
187
|
+
|
|
188
|
+
# Convert to message format
|
|
189
|
+
print("\n=== 转换为消息格式 ===")
|
|
190
|
+
|
|
191
|
+
messages = manager.to_dph_messages()
|
|
192
|
+
for i, message in enumerate(messages.messages):
|
|
193
|
+
print(
|
|
194
|
+
f"消息 {i + 1}: {message.role.value} - {message.content[:50]}{'...' if len(message.content) > 50 else ''}"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
# Remove Bucket
|
|
198
|
+
print("\n=== 移除历史桶 ===")
|
|
199
|
+
manager.remove_bucket("history")
|
|
200
|
+
stats = manager.get_token_stats()
|
|
201
|
+
print_stats_table(stats, "移除历史桶后统计")
|
|
202
|
+
|
|
203
|
+
# Clear all contexts
|
|
204
|
+
print("\n=== 清空所有上下文 ===")
|
|
205
|
+
manager.clear()
|
|
206
|
+
stats = manager.get_token_stats()
|
|
207
|
+
print_stats_table(stats, "清空后统计")
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def performance_comparison():
|
|
211
|
+
"""Performance Comparison: Traditional Full Assembly vs Incremental Management"""
|
|
212
|
+
|
|
213
|
+
print("\n=== 性能对比演示 ===")
|
|
214
|
+
|
|
215
|
+
# Traditional approach: Full assembly each time
|
|
216
|
+
from dolphin.core.context_engineer.core.context_assembler import (
|
|
217
|
+
ContextAssembler,
|
|
218
|
+
)
|
|
219
|
+
from dolphin.core.context_engineer.core.budget_manager import (
|
|
220
|
+
BudgetAllocation,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Simulate multi-turn dialogue scenarios
|
|
224
|
+
content_sections = {
|
|
225
|
+
"system": "你是一个有用的AI助手。",
|
|
226
|
+
"task": "用户的问题描述。",
|
|
227
|
+
"history": "之前的对话历史。",
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
budget_allocations = [
|
|
231
|
+
BudgetAllocation("system", 50, 2.0),
|
|
232
|
+
BudgetAllocation("task", 100, 1.5),
|
|
233
|
+
BudgetAllocation("history", 200, 1.0),
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
# Traditional approach: Process all data in full each time
|
|
237
|
+
assembler = ContextAssembler()
|
|
238
|
+
|
|
239
|
+
# Incremental mode: dynamic management
|
|
240
|
+
incremental_manager = ContextManager()
|
|
241
|
+
|
|
242
|
+
# Initial Setup
|
|
243
|
+
incremental_manager.add_bucket("system", content_sections["system"], 2.0, 50)
|
|
244
|
+
incremental_manager.add_bucket("task", content_sections["task"], 1.5, 100)
|
|
245
|
+
incremental_manager.add_bucket("history", content_sections["history"], 1.0, 200)
|
|
246
|
+
|
|
247
|
+
print("传统方式需要每次传入完整内容进行全量处理")
|
|
248
|
+
print("增量方式支持动态更新,性能更优")
|
|
249
|
+
|
|
250
|
+
# Demonstrate the advantages of incremental updates
|
|
251
|
+
print("\n=== 增量更新优势 ===")
|
|
252
|
+
|
|
253
|
+
# Update a single bucket (incremental mode)
|
|
254
|
+
incremental_manager.update_bucket_content("task", "更新后的问题描述。")
|
|
255
|
+
print("增量方式:只更新变化的桶,无需全量处理")
|
|
256
|
+
|
|
257
|
+
# Check compression requirements
|
|
258
|
+
if incremental_manager.needs_compression():
|
|
259
|
+
print("检测到需要压缩,可以按需压缩")
|
|
260
|
+
incremental_manager.compress_bucket("history")
|
|
261
|
+
|
|
262
|
+
print("增量管理支持细粒度的控制,避免不必要的计算")
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
if __name__ == "__main__":
|
|
266
|
+
main()
|
|
267
|
+
# performance_comparison()
|