kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,637 @@
|
|
|
1
|
+
"""Incremental context manager for dynamic context assembly and compression.
|
|
2
|
+
|
|
3
|
+
Incremental context manager supporting dynamic updates, incremental assembly, and controllable compression.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Dict, List, Optional, Set, Any, Union
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from dolphin.core.common.enums import Messages, MessageRole
|
|
9
|
+
from dolphin.core.context_engineer.config.settings import (
|
|
10
|
+
get_default_config,
|
|
11
|
+
ContextConfig,
|
|
12
|
+
)
|
|
13
|
+
from dolphin.core.context_engineer.core.context_assembler import (
|
|
14
|
+
AssembledContext,
|
|
15
|
+
)
|
|
16
|
+
from dolphin.core.context_engineer.services.compressor import Compressor
|
|
17
|
+
from dolphin.core.context_engineer.utils.token_utils import truncate_to_tokens
|
|
18
|
+
from dolphin.core.context_engineer.utils.message_formatter import (
|
|
19
|
+
MessageFormatter,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
from dolphin.core.context_engineer.core.tokenizer_service import (
|
|
23
|
+
TokenizerService,
|
|
24
|
+
)
|
|
25
|
+
from dolphin.core.common.exceptions import ContextEngineerException
|
|
26
|
+
from dolphin.core.logging.logger import get_logger
|
|
27
|
+
|
|
28
|
+
logger = get_logger("context_engineer.context_manager")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
DEFAULT_ALLOCATED_TOKENS = 1024
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ContextBucket:
|
|
36
|
+
"""Represents a context bucket with dynamic content, used for managing different types and priorities of context content.
|
|
37
|
+
|
|
38
|
+
A context bucket is the fundamental unit for context management. Each bucket contains specific content, priority, and related metadata,
|
|
39
|
+
used for sorting and selecting during the construction of the final context.
|
|
40
|
+
|
|
41
|
+
Attributes:
|
|
42
|
+
name (str): The bucket name, used to uniquely identify a context bucket
|
|
43
|
+
content (Union[str, Messages]): The content of the bucket, which can be a plain text string or a Messages object
|
|
44
|
+
priority (float): The priority of the bucket, with higher values indicating greater importance, used for sorting and selection
|
|
45
|
+
token_count (int): The number of tokens in the content, used to calculate context size
|
|
46
|
+
allocated_tokens (int): The token budget allocated to this bucket, used to control content size
|
|
47
|
+
message_role (MessageRole): The message role, defining the role of the content in the conversation (e.g., user, system, etc.)
|
|
48
|
+
is_dirty (bool): Dirty flag, indicating whether the content has been modified and needs to have its token count recalculated
|
|
49
|
+
is_compressed (bool): Compression flag, indicating whether the content has already been compressed
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
name: str
|
|
53
|
+
content: Union[str, Messages]
|
|
54
|
+
priority: float
|
|
55
|
+
token_count: int = 0
|
|
56
|
+
allocated_tokens: int = 0
|
|
57
|
+
message_role: MessageRole = MessageRole.USER
|
|
58
|
+
is_dirty: bool = True
|
|
59
|
+
is_compressed: bool = False
|
|
60
|
+
|
|
61
|
+
def _is_messages_type(self) -> bool:
|
|
62
|
+
"""Check if content is of Messages type"""
|
|
63
|
+
return isinstance(self.content, Messages)
|
|
64
|
+
|
|
65
|
+
def _get_content_text(self) -> str:
|
|
66
|
+
"""Get the text representation of content"""
|
|
67
|
+
if self._is_messages_type() and isinstance(self.content, Messages):
|
|
68
|
+
# If it is a Messages type, extract the content of all messages and concatenate them.
|
|
69
|
+
contents = []
|
|
70
|
+
for msg in self.content.messages:
|
|
71
|
+
if hasattr(msg, "content") and msg.content:
|
|
72
|
+
contents.append(str(msg.content))
|
|
73
|
+
return "\n".join(contents)
|
|
74
|
+
else:
|
|
75
|
+
# If it is a string type, return directly.
|
|
76
|
+
return str(self.content)
|
|
77
|
+
|
|
78
|
+
def _merge_messages(
|
|
79
|
+
self, new_content: Union[str, Messages]
|
|
80
|
+
) -> Union[str, Messages]:
|
|
81
|
+
"""Merge Messages content"""
|
|
82
|
+
if not self._is_messages_type() or not isinstance(new_content, Messages):
|
|
83
|
+
return new_content
|
|
84
|
+
|
|
85
|
+
# Merge Messages
|
|
86
|
+
merged_messages = Messages()
|
|
87
|
+
if isinstance(self.content, Messages):
|
|
88
|
+
merged_messages.extend_messages(self.content)
|
|
89
|
+
if isinstance(new_content, Messages):
|
|
90
|
+
merged_messages.extend_messages(new_content)
|
|
91
|
+
|
|
92
|
+
return merged_messages
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class ContextState:
|
|
97
|
+
"""Current state of the context assembly.
|
|
98
|
+
|
|
99
|
+
Context assembly's current state.
|
|
100
|
+
|
|
101
|
+
Attributes:
|
|
102
|
+
buckets (Dict[str, ContextBucket]): Dictionary of buckets
|
|
103
|
+
total_tokens (int): Total number of tokens
|
|
104
|
+
layout_policy (str): Layout policy
|
|
105
|
+
bucket_order (List[str]): Order of buckets
|
|
106
|
+
dirty_buckets (Set[str]): Set of buckets that need to be updated
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
buckets: Dict[str, ContextBucket] = field(default_factory=dict)
|
|
110
|
+
total_tokens: int = 0
|
|
111
|
+
layout_policy: str = "default"
|
|
112
|
+
bucket_order: List[str] = field(default_factory=list)
|
|
113
|
+
dirty_buckets: Set[str] = field(default_factory=set)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class SimpleContextSection:
|
|
117
|
+
def __init__(self, name, content, message_role):
|
|
118
|
+
self.name = name
|
|
119
|
+
self.content = content
|
|
120
|
+
self.message_role = message_role
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
class SimpleAssembledContext(AssembledContext):
|
|
124
|
+
def __init__(self, sections, assembled_context, bucket_order):
|
|
125
|
+
self.sections = sections
|
|
126
|
+
self.total_tokens = assembled_context["total_tokens"]
|
|
127
|
+
self.placement_map = {
|
|
128
|
+
"ordered": (
|
|
129
|
+
bucket_order
|
|
130
|
+
if bucket_order
|
|
131
|
+
else list(assembled_context["sections"].keys())
|
|
132
|
+
)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class ContextManager:
|
|
137
|
+
"""Manages context assembly with incremental updates and controlled compression.
|
|
138
|
+
|
|
139
|
+
Manages context assembly with incremental updates and controlled compression.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
def __init__(
|
|
143
|
+
self,
|
|
144
|
+
tokenizer_service: Optional[TokenizerService] = None,
|
|
145
|
+
compressor_service: Optional[Any] = None,
|
|
146
|
+
context_config: Optional[ContextConfig] = None,
|
|
147
|
+
):
|
|
148
|
+
"""Initialize incremental context manager.
|
|
149
|
+
|
|
150
|
+
Initialize incremental context manager.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
tokenizer_service: TokenizerService instance
|
|
154
|
+
compressor_service: Compressor service
|
|
155
|
+
context_config: Context configuration
|
|
156
|
+
"""
|
|
157
|
+
self.tokenizer = tokenizer_service or TokenizerService()
|
|
158
|
+
self.compressor = compressor_service or Compressor()
|
|
159
|
+
self.context_config = context_config or get_default_config()
|
|
160
|
+
self.state = ContextState()
|
|
161
|
+
self._message_formatter = MessageFormatter()
|
|
162
|
+
|
|
163
|
+
# Initialize with default policy if available
|
|
164
|
+
if "default" in self.context_config.policies:
|
|
165
|
+
self.set_layout_policy("default")
|
|
166
|
+
|
|
167
|
+
def add_bucket(
|
|
168
|
+
self,
|
|
169
|
+
bucket_name: str,
|
|
170
|
+
content: Union[str, Messages],
|
|
171
|
+
priority: float = 1.0,
|
|
172
|
+
allocated_tokens: Optional[int] = None,
|
|
173
|
+
message_role: Optional[MessageRole] = None,
|
|
174
|
+
) -> None:
|
|
175
|
+
"""Add or update a context bucket.
|
|
176
|
+
|
|
177
|
+
Add or update a context bucket.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
bucket_name: Bucket name
|
|
181
|
+
content: Content, supports string or Messages type
|
|
182
|
+
priority: Priority
|
|
183
|
+
allocated_tokens: Allocated token count
|
|
184
|
+
message_role: Message role
|
|
185
|
+
"""
|
|
186
|
+
# Get bucket configuration
|
|
187
|
+
bucket_config = self.context_config.buckets.get(bucket_name)
|
|
188
|
+
|
|
189
|
+
# Set default values
|
|
190
|
+
if allocated_tokens is None:
|
|
191
|
+
allocated_tokens = (
|
|
192
|
+
getattr(bucket_config, "max_tokens", DEFAULT_ALLOCATED_TOKENS)
|
|
193
|
+
if bucket_config
|
|
194
|
+
else DEFAULT_ALLOCATED_TOKENS
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if message_role is None:
|
|
198
|
+
message_role = (
|
|
199
|
+
bucket_config.message_role if bucket_config else MessageRole.USER
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Create or update bucket
|
|
203
|
+
if bucket_name in self.state.buckets:
|
|
204
|
+
# Update existing bucket
|
|
205
|
+
bucket = self.state.buckets[bucket_name]
|
|
206
|
+
|
|
207
|
+
# Check whether content update is needed
|
|
208
|
+
needs_update = False
|
|
209
|
+
if bucket.content != content:
|
|
210
|
+
# If it is a Messages type, perform merging processing.
|
|
211
|
+
if bucket._is_messages_type() and isinstance(content, Messages):
|
|
212
|
+
bucket.content = bucket._merge_messages(content)
|
|
213
|
+
else:
|
|
214
|
+
bucket.content = content
|
|
215
|
+
needs_update = True
|
|
216
|
+
bucket.is_compressed = False
|
|
217
|
+
|
|
218
|
+
if needs_update:
|
|
219
|
+
bucket.is_dirty = True
|
|
220
|
+
# Calculate token count immediately
|
|
221
|
+
bucket.token_count = self._count_tokens_for_content(bucket.content)
|
|
222
|
+
|
|
223
|
+
if bucket.priority != priority:
|
|
224
|
+
bucket.priority = priority
|
|
225
|
+
if bucket.allocated_tokens != allocated_tokens:
|
|
226
|
+
bucket.allocated_tokens = allocated_tokens or 0
|
|
227
|
+
bucket.is_dirty = True
|
|
228
|
+
else:
|
|
229
|
+
# Create new bucket
|
|
230
|
+
bucket = ContextBucket(
|
|
231
|
+
name=bucket_name,
|
|
232
|
+
content=content,
|
|
233
|
+
priority=priority,
|
|
234
|
+
allocated_tokens=allocated_tokens or 0,
|
|
235
|
+
message_role=message_role,
|
|
236
|
+
)
|
|
237
|
+
# Calculate token count immediately
|
|
238
|
+
bucket.token_count = self._count_tokens_for_content(content)
|
|
239
|
+
self.state.buckets[bucket_name] = bucket
|
|
240
|
+
|
|
241
|
+
# Marked for update
|
|
242
|
+
self.state.dirty_buckets.add(bucket_name)
|
|
243
|
+
# Need to recalculate total token count
|
|
244
|
+
self._calculate_total_tokens()
|
|
245
|
+
|
|
246
|
+
def _count_tokens_for_content(self, content: Union[str, Messages]) -> int:
|
|
247
|
+
"""Calculate the number of tokens in the content, supporting both string and Messages types."""
|
|
248
|
+
if isinstance(content, Messages):
|
|
249
|
+
# If it is a Messages type, extract the content of all messages and calculate tokens after concatenation.
|
|
250
|
+
contents = []
|
|
251
|
+
for msg in content.messages:
|
|
252
|
+
if hasattr(msg, "content") and msg.content:
|
|
253
|
+
contents.append(str(msg.content))
|
|
254
|
+
text_content = "\n".join(contents)
|
|
255
|
+
return self.tokenizer.count_tokens(text_content)
|
|
256
|
+
else:
|
|
257
|
+
# If it is a string type, calculate the token directly.
|
|
258
|
+
return self.tokenizer.count_tokens(str(content))
|
|
259
|
+
|
|
260
|
+
def remove_bucket(self, bucket_name: str) -> None:
|
|
261
|
+
"""Remove a context bucket.
|
|
262
|
+
|
|
263
|
+
Remove a context bucket.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
bucket_name: Bucket name
|
|
267
|
+
"""
|
|
268
|
+
if bucket_name in self.state.buckets:
|
|
269
|
+
del self.state.buckets[bucket_name]
|
|
270
|
+
self.state.dirty_buckets.discard(bucket_name)
|
|
271
|
+
self.state.total_tokens = 0
|
|
272
|
+
# Need to recalculate total token count
|
|
273
|
+
self._calculate_total_tokens()
|
|
274
|
+
|
|
275
|
+
def update_bucket_content(
|
|
276
|
+
self, bucket_name: str, content: Union[str, Messages]
|
|
277
|
+
) -> None:
|
|
278
|
+
"""Update bucket content only.
|
|
279
|
+
|
|
280
|
+
Update bucket content only.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
bucket_name: Bucket name
|
|
284
|
+
content: New content, supports string or Messages type
|
|
285
|
+
"""
|
|
286
|
+
if bucket_name in self.state.buckets:
|
|
287
|
+
bucket = self.state.buckets[bucket_name]
|
|
288
|
+
if bucket.content != content:
|
|
289
|
+
# If it is a Messages type, perform merging processing.
|
|
290
|
+
if bucket._is_messages_type() and isinstance(content, Messages):
|
|
291
|
+
bucket.content = bucket._merge_messages(content)
|
|
292
|
+
else:
|
|
293
|
+
bucket.content = content
|
|
294
|
+
bucket.is_dirty = True
|
|
295
|
+
bucket.is_compressed = False
|
|
296
|
+
# Calculate token count immediately
|
|
297
|
+
bucket.token_count = self._count_tokens_for_content(bucket.content)
|
|
298
|
+
self.state.dirty_buckets.add(bucket_name)
|
|
299
|
+
# Need to recalculate total token count
|
|
300
|
+
self._calculate_total_tokens()
|
|
301
|
+
|
|
302
|
+
def replace_bucket_content(
|
|
303
|
+
self, bucket_name: str, content: Union[str, Messages]
|
|
304
|
+
) -> None:
|
|
305
|
+
"""Replace bucket content directly without merging.
|
|
306
|
+
|
|
307
|
+
Directly replace bucket content without merging, used in scenarios where content must strictly match external state (e.g., history).
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
bucket_name: Bucket name
|
|
311
|
+
content: New content, supports string or Messages type
|
|
312
|
+
"""
|
|
313
|
+
if bucket_name in self.state.buckets:
|
|
314
|
+
bucket = self.state.buckets[bucket_name]
|
|
315
|
+
bucket.content = content
|
|
316
|
+
bucket.is_dirty = True
|
|
317
|
+
bucket.is_compressed = False
|
|
318
|
+
bucket.token_count = self._count_tokens_for_content(bucket.content)
|
|
319
|
+
self.state.dirty_buckets.add(bucket_name)
|
|
320
|
+
self._calculate_total_tokens()
|
|
321
|
+
|
|
322
|
+
def set_layout_policy(self, policy_name: str) -> None:
|
|
323
|
+
"""Set layout policy and bucket order.
|
|
324
|
+
|
|
325
|
+
Set the layout policy and bucket order.
|
|
326
|
+
|
|
327
|
+
Args:
|
|
328
|
+
policy_name: Policy name
|
|
329
|
+
"""
|
|
330
|
+
policy = self.context_config.policies.get(policy_name)
|
|
331
|
+
if policy:
|
|
332
|
+
self.state.layout_policy = policy_name
|
|
333
|
+
self.state.bucket_order = policy.bucket_order
|
|
334
|
+
self.state.total_tokens = 0 # Need to recalculate
|
|
335
|
+
|
|
336
|
+
def get_token_stats(self) -> Dict[str, Any]:
|
|
337
|
+
"""Get current token usage statistics.
|
|
338
|
+
|
|
339
|
+
Get current token usage statistics.
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
Dictionary of statistics
|
|
343
|
+
"""
|
|
344
|
+
self._update_dirty_buckets()
|
|
345
|
+
|
|
346
|
+
stats = {
|
|
347
|
+
"total_tokens": self.state.total_tokens,
|
|
348
|
+
"bucket_count": len(self.state.buckets),
|
|
349
|
+
"buckets": {},
|
|
350
|
+
"compression_needed": False,
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
for name, bucket in self.state.buckets.items():
|
|
354
|
+
stats["buckets"][name] = {
|
|
355
|
+
"tokens": bucket.token_count,
|
|
356
|
+
"allocated": bucket.allocated_tokens,
|
|
357
|
+
"priority": bucket.priority,
|
|
358
|
+
"is_compressed": bucket.is_compressed,
|
|
359
|
+
"utilization": (
|
|
360
|
+
bucket.token_count / bucket.allocated_tokens
|
|
361
|
+
if bucket.allocated_tokens > 0
|
|
362
|
+
else 0
|
|
363
|
+
),
|
|
364
|
+
"needs_compression": (
|
|
365
|
+
bucket.token_count > bucket.allocated_tokens
|
|
366
|
+
if bucket.allocated_tokens > 0
|
|
367
|
+
else False
|
|
368
|
+
),
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if stats["buckets"][name]["needs_compression"]:
|
|
372
|
+
stats["compression_needed"] = True
|
|
373
|
+
|
|
374
|
+
return stats
|
|
375
|
+
|
|
376
|
+
def needs_compression(self) -> bool:
|
|
377
|
+
"""Check if compression is needed.
|
|
378
|
+
|
|
379
|
+
Check if compression is needed.
|
|
380
|
+
|
|
381
|
+
Returns:
|
|
382
|
+
Whether compression is needed
|
|
383
|
+
"""
|
|
384
|
+
stats = self.get_token_stats()
|
|
385
|
+
return stats["compression_needed"]
|
|
386
|
+
|
|
387
|
+
def compress_bucket(self, bucket_name: str, method: Optional[str] = None) -> bool:
|
|
388
|
+
"""Compress a specific bucket.
|
|
389
|
+
|
|
390
|
+
Compress a specific bucket.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
bucket_name: Bucket name
|
|
394
|
+
method: Compression method
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Whether the compression was successful
|
|
398
|
+
"""
|
|
399
|
+
if bucket_name not in self.state.buckets:
|
|
400
|
+
logger.warning(f"Bucket {bucket_name} not found.")
|
|
401
|
+
return False
|
|
402
|
+
|
|
403
|
+
bucket = self.state.buckets[bucket_name]
|
|
404
|
+
# Check if compression is needed
|
|
405
|
+
if bucket.token_count <= bucket.allocated_tokens:
|
|
406
|
+
return True # No compression needed, but return success
|
|
407
|
+
|
|
408
|
+
# Get compression method
|
|
409
|
+
if method is None:
|
|
410
|
+
bucket_config = self.context_config.buckets.get(bucket_name)
|
|
411
|
+
method = getattr(bucket_config, "compress", None) if bucket_config else None
|
|
412
|
+
|
|
413
|
+
try:
|
|
414
|
+
# Using Compressor Service
|
|
415
|
+
if self.compressor and method:
|
|
416
|
+
result = self.compressor.compress(
|
|
417
|
+
content=bucket.content,
|
|
418
|
+
target_tokens=bucket.allocated_tokens,
|
|
419
|
+
method=method,
|
|
420
|
+
)
|
|
421
|
+
bucket.content = result.compressed_content
|
|
422
|
+
else:
|
|
423
|
+
# Fallback to simple truncation
|
|
424
|
+
|
|
425
|
+
bucket.content = truncate_to_tokens(
|
|
426
|
+
bucket.content, bucket.allocated_tokens, self.tokenizer
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
# Update token count
|
|
430
|
+
bucket.token_count = self._count_tokens_for_content(bucket.content)
|
|
431
|
+
bucket.is_compressed = True
|
|
432
|
+
bucket.is_dirty = False
|
|
433
|
+
self.state.dirty_buckets.discard(bucket_name)
|
|
434
|
+
|
|
435
|
+
# Recalculate total token count
|
|
436
|
+
self._calculate_total_tokens()
|
|
437
|
+
|
|
438
|
+
return True
|
|
439
|
+
|
|
440
|
+
except Exception as e:
|
|
441
|
+
logger.error(f"Failed to compress bucket '{bucket_name}': {e}")
|
|
442
|
+
raise ContextEngineerException(message="Failed to compress bucket")
|
|
443
|
+
|
|
444
|
+
def compress_all(self) -> Dict[str, bool]:
|
|
445
|
+
"""Compress all buckets that need compression.
|
|
446
|
+
|
|
447
|
+
Compress all buckets that need compression.
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
Dictionary of compression results for each bucket
|
|
451
|
+
"""
|
|
452
|
+
results = {}
|
|
453
|
+
|
|
454
|
+
for bucket_name in self.state.buckets:
|
|
455
|
+
results[bucket_name] = self.compress_bucket(bucket_name)
|
|
456
|
+
|
|
457
|
+
return results
|
|
458
|
+
|
|
459
|
+
def assemble_context(self) -> Dict[str, Any]:
|
|
460
|
+
"""Assemble the final context.
|
|
461
|
+
|
|
462
|
+
Assemble the final context.
|
|
463
|
+
|
|
464
|
+
Returns:
|
|
465
|
+
Assembly result
|
|
466
|
+
"""
|
|
467
|
+
self._update_dirty_buckets()
|
|
468
|
+
|
|
469
|
+
# Sort by bucket order
|
|
470
|
+
ordered_buckets = self._get_ordered_buckets()
|
|
471
|
+
|
|
472
|
+
# Build content dictionary
|
|
473
|
+
content_sections = {}
|
|
474
|
+
for bucket in ordered_buckets:
|
|
475
|
+
# Correctly handle Messages type and string type content
|
|
476
|
+
if isinstance(bucket.content, Messages):
|
|
477
|
+
# For the Messages type, check if there is a message and it is not empty
|
|
478
|
+
if bucket.content.messages:
|
|
479
|
+
content_sections[bucket.name] = bucket.content
|
|
480
|
+
else:
|
|
481
|
+
# For string types, check whether it contains non-empty content
|
|
482
|
+
content_str = str(bucket.content)
|
|
483
|
+
if content_str.strip():
|
|
484
|
+
content_sections[bucket.name] = bucket.content
|
|
485
|
+
|
|
486
|
+
return {
|
|
487
|
+
"sections": content_sections,
|
|
488
|
+
"total_tokens": self.state.total_tokens,
|
|
489
|
+
"bucket_order": [b.name for b in ordered_buckets],
|
|
490
|
+
"layout_policy": self.state.layout_policy,
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
def to_messages(
|
|
494
|
+
self, user_buckets: Optional[List[str]] = None
|
|
495
|
+
) -> List[Dict[str, str]]:
|
|
496
|
+
"""Convert to message format.
|
|
497
|
+
|
|
498
|
+
Convert to message format.
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
user_buckets: List of user role buckets
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
List of messages
|
|
505
|
+
"""
|
|
506
|
+
|
|
507
|
+
assembled_context = self.assemble_context()
|
|
508
|
+
|
|
509
|
+
# Create a simplified context structure for use by formatters
|
|
510
|
+
|
|
511
|
+
# Create section objects in the order specified by bucket_order
|
|
512
|
+
sections_list = []
|
|
513
|
+
|
|
514
|
+
# Get bucket order
|
|
515
|
+
bucket_order = assembled_context.get("bucket_order", [])
|
|
516
|
+
if bucket_order:
|
|
517
|
+
# Process in the order of bucket_order
|
|
518
|
+
for bucket_name in bucket_order:
|
|
519
|
+
if bucket_name in assembled_context["sections"]:
|
|
520
|
+
content = assembled_context["sections"][bucket_name]
|
|
521
|
+
bucket = self.state.buckets.get(bucket_name)
|
|
522
|
+
if bucket:
|
|
523
|
+
section = SimpleContextSection(
|
|
524
|
+
bucket_name, content, bucket.message_role
|
|
525
|
+
)
|
|
526
|
+
sections_list.append(section)
|
|
527
|
+
else:
|
|
528
|
+
# If there is no bucket_order, use the original order
|
|
529
|
+
for name, content in assembled_context["sections"].items():
|
|
530
|
+
bucket = self.state.buckets.get(name)
|
|
531
|
+
if bucket:
|
|
532
|
+
section = SimpleContextSection(name, content, bucket.message_role)
|
|
533
|
+
sections_list.append(section)
|
|
534
|
+
|
|
535
|
+
simple_context = SimpleAssembledContext(
|
|
536
|
+
sections_list, assembled_context, bucket_order
|
|
537
|
+
)
|
|
538
|
+
return self._message_formatter.to_openai_messages_simple(
|
|
539
|
+
simple_context,
|
|
540
|
+
user_sections=user_buckets,
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
def to_dph_messages(self) -> Messages:
|
|
544
|
+
"""Convert to DPH message format.
|
|
545
|
+
|
|
546
|
+
Convert to DPH message format.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
DPH message object
|
|
550
|
+
"""
|
|
551
|
+
|
|
552
|
+
assembled_context = self.assemble_context()
|
|
553
|
+
|
|
554
|
+
# Get bucket order
|
|
555
|
+
bucket_order = assembled_context.get("bucket_order", [])
|
|
556
|
+
|
|
557
|
+
# Create a list of section objects
|
|
558
|
+
sections_list = []
|
|
559
|
+
|
|
560
|
+
if bucket_order:
|
|
561
|
+
# Process in the order of bucket_order
|
|
562
|
+
for bucket_name in bucket_order:
|
|
563
|
+
if bucket_name in assembled_context["sections"]:
|
|
564
|
+
content = assembled_context["sections"][bucket_name]
|
|
565
|
+
bucket = self.state.buckets.get(bucket_name)
|
|
566
|
+
if bucket:
|
|
567
|
+
section = SimpleContextSection(
|
|
568
|
+
bucket_name, content, bucket.message_role
|
|
569
|
+
)
|
|
570
|
+
sections_list.append(section)
|
|
571
|
+
else:
|
|
572
|
+
# If there is no bucket_order, use the original order
|
|
573
|
+
for name, content in assembled_context["sections"].items():
|
|
574
|
+
bucket = self.state.buckets.get(name)
|
|
575
|
+
if bucket:
|
|
576
|
+
section = SimpleContextSection(name, content, bucket.message_role)
|
|
577
|
+
sections_list.append(section)
|
|
578
|
+
|
|
579
|
+
simple_context = SimpleAssembledContext(
|
|
580
|
+
sections_list, assembled_context, bucket_order
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
return self._message_formatter.to_dph_messages_simple(
|
|
584
|
+
simple_context,
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
def _update_dirty_buckets(self) -> None:
|
|
588
|
+
"""Update token counts for dirty buckets."""
|
|
589
|
+
if not self.state.dirty_buckets:
|
|
590
|
+
return
|
|
591
|
+
|
|
592
|
+
for bucket_name in list(self.state.dirty_buckets):
|
|
593
|
+
if bucket_name in self.state.buckets:
|
|
594
|
+
bucket = self.state.buckets[bucket_name]
|
|
595
|
+
bucket.token_count = self._count_tokens_for_content(bucket.content)
|
|
596
|
+
bucket.is_dirty = False
|
|
597
|
+
|
|
598
|
+
self.state.dirty_buckets.clear()
|
|
599
|
+
self._calculate_total_tokens()
|
|
600
|
+
|
|
601
|
+
def _calculate_total_tokens(self) -> None:
|
|
602
|
+
"""Calculate total tokens across all buckets."""
|
|
603
|
+
self.state.total_tokens = sum(
|
|
604
|
+
bucket.token_count for bucket in self.state.buckets.values()
|
|
605
|
+
)
|
|
606
|
+
|
|
607
|
+
def _get_ordered_buckets(self) -> List[ContextBucket]:
|
|
608
|
+
"""Get buckets ordered by layout policy."""
|
|
609
|
+
buckets = list(self.state.buckets.values())
|
|
610
|
+
|
|
611
|
+
if not self.state.bucket_order:
|
|
612
|
+
# Sort by priority
|
|
613
|
+
return sorted(buckets, key=lambda b: -b.priority)
|
|
614
|
+
|
|
615
|
+
# Sort by bucket order
|
|
616
|
+
bucket_index = {name: idx for idx, name in enumerate(self.state.bucket_order)}
|
|
617
|
+
return sorted(
|
|
618
|
+
buckets,
|
|
619
|
+
key=lambda b: (
|
|
620
|
+
bucket_index.get(b.name, len(self.state.bucket_order)),
|
|
621
|
+
-b.priority,
|
|
622
|
+
),
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
def clear(self) -> None:
|
|
626
|
+
"""Clear all context data."""
|
|
627
|
+
self.state = ContextState()
|
|
628
|
+
# Ensure the total number of tokens is 0
|
|
629
|
+
self.state.total_tokens = 0
|
|
630
|
+
|
|
631
|
+
def get_bucket_names(self) -> List[str]:
|
|
632
|
+
"""Get all bucket names."""
|
|
633
|
+
return list(self.state.buckets.keys())
|
|
634
|
+
|
|
635
|
+
def has_bucket(self, bucket_name: str) -> bool:
|
|
636
|
+
"""Check if bucket exists."""
|
|
637
|
+
return bucket_name in self.state.buckets
|