kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,583 @@
|
|
|
1
|
+
"""Context assembler for position-aware message assembly.
|
|
2
|
+
|
|
3
|
+
Context assembler for position-aware message assembly, avoiding the "Lost in the Middle" problem.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Dict, List, Optional, Tuple, Any
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from dolphin.core.common.enums import Messages, MessageRole
|
|
9
|
+
from dolphin.core.context_engineer.config.settings import (
|
|
10
|
+
get_default_config,
|
|
11
|
+
ContextConfig,
|
|
12
|
+
BucketConfig,
|
|
13
|
+
)
|
|
14
|
+
from dolphin.core.context_engineer.services.compressor import Compressor
|
|
15
|
+
from ..core.tokenizer_service import TokenizerService
|
|
16
|
+
from ..core.budget_manager import BudgetAllocation
|
|
17
|
+
from dolphin.core.logging.logger import get_logger
|
|
18
|
+
|
|
19
|
+
logger = get_logger("context_engineer.assembler")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class ContextSection:
|
|
24
|
+
"""Represents a section of context with metadata.
|
|
25
|
+
|
|
26
|
+
Represents a section of context with metadata.
|
|
27
|
+
|
|
28
|
+
Attributes:
|
|
29
|
+
name (str): Section name
|
|
30
|
+
content (str): Content
|
|
31
|
+
priority (float): Priority
|
|
32
|
+
token_count (int): Number of tokens, default is 0
|
|
33
|
+
allocated_tokens (int): Allocated token count for precise compression target, default is 0
|
|
34
|
+
message_role (str): Message role
|
|
35
|
+
placement (str): Placement information, used for test compatibility
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
name: str
|
|
39
|
+
content: str
|
|
40
|
+
priority: float
|
|
41
|
+
token_count: int = 0
|
|
42
|
+
allocated_tokens: int = 0 # The number of tokens allocated for budget distribution, used for precise compression targets.
|
|
43
|
+
message_role: MessageRole = MessageRole.USER
|
|
44
|
+
placement: str = "" # Used for testing compatibility
|
|
45
|
+
|
|
46
|
+
def __init__(self, name: str, content: str, priority: float, *args, **kwargs):
|
|
47
|
+
"""Initialize ContextSection, supporting parameter order for test compatibility."""
|
|
48
|
+
self.name = name
|
|
49
|
+
self.content = content
|
|
50
|
+
self.priority = priority
|
|
51
|
+
|
|
52
|
+
# Handling positional arguments
|
|
53
|
+
if len(args) > 0 and isinstance(args[0], str):
|
|
54
|
+
self.placement = args[0]
|
|
55
|
+
args = args[1:]
|
|
56
|
+
|
|
57
|
+
# Handle the remaining arguments
|
|
58
|
+
if len(args) > 0:
|
|
59
|
+
self.token_count = args[0]
|
|
60
|
+
if len(args) > 1:
|
|
61
|
+
self.allocated_tokens = args[1]
|
|
62
|
+
|
|
63
|
+
# Process keyword arguments
|
|
64
|
+
for key, value in kwargs.items():
|
|
65
|
+
if hasattr(self, key):
|
|
66
|
+
setattr(self, key, value)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class AssembledContext:
|
|
71
|
+
"""Result of context assembly.
|
|
72
|
+
|
|
73
|
+
Result of context assembly.
|
|
74
|
+
|
|
75
|
+
Attributes:
|
|
76
|
+
sections (List[ContextSection]): List of original context sections
|
|
77
|
+
total_tokens (int): Total number of tokens
|
|
78
|
+
placement_map (Dict[str, List[str]]): Placement mapping
|
|
79
|
+
dropped_sections (List[str]): List of dropped sections
|
|
80
|
+
full_context (str): Full context string, used for test compatibility
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
sections: List[ContextSection]
|
|
84
|
+
total_tokens: int
|
|
85
|
+
placement_map: Dict[str, List[str]]
|
|
86
|
+
dropped_sections: List[str]
|
|
87
|
+
full_context: str = "" # Used for testing compatibility
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ContextAssembler:
|
|
91
|
+
"""Assembles context according to position strategies to avoid "Lost in the Middle".
|
|
92
|
+
|
|
93
|
+
Assembles context according to position strategies to avoid "Lost in the Middle".
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
tokenizer_service: Optional[TokenizerService] = None,
|
|
99
|
+
compressor_service: Optional[Any] = None,
|
|
100
|
+
context_config: Optional[ContextConfig] = None,
|
|
101
|
+
):
|
|
102
|
+
"""Initialize context assembler.
|
|
103
|
+
|
|
104
|
+
Initialize the context assembler.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
tokenizer_service: TokenizerService instance, used for token counting
|
|
108
|
+
compressor_service: Compressor service, used for content compression
|
|
109
|
+
"""
|
|
110
|
+
self.tokenizer = tokenizer_service or TokenizerService()
|
|
111
|
+
self.compressor = compressor_service or Compressor()
|
|
112
|
+
self._last_result = None # Store the result of the last assembly.
|
|
113
|
+
self.context_config = context_config or get_default_config()
|
|
114
|
+
# Property for testing compatibility
|
|
115
|
+
self.placement_strategy = {"head": [], "middle": [], "tail": []}
|
|
116
|
+
|
|
117
|
+
def assemble_context(
|
|
118
|
+
self,
|
|
119
|
+
content_sections: Dict[str, str],
|
|
120
|
+
budget_allocations: List[BudgetAllocation],
|
|
121
|
+
placement_policy: Optional[Dict[str, List[str]]] = None,
|
|
122
|
+
bucket_order: Optional[List[str]] = None,
|
|
123
|
+
layout_policy: Optional[str] = None,
|
|
124
|
+
) -> AssembledContext:
|
|
125
|
+
"""Assemble context sections according to bucket order.
|
|
126
|
+
|
|
127
|
+
Assemble context sections according to bucket order.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
content_sections: Dictionary mapping section names to their content
|
|
131
|
+
budget_allocations: List of budget allocations for each section
|
|
132
|
+
bucket_order: Global order of buckets; if None, use original order
|
|
133
|
+
bucket_configs: Optional bucket configurations for accessing compression methods
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
AssembledContext ordered as specified
|
|
137
|
+
"""
|
|
138
|
+
# If bucket_order parameter is provided, use it; otherwise get from layout policy
|
|
139
|
+
if bucket_order is None:
|
|
140
|
+
default_layout_policy = (
|
|
141
|
+
self.context_config.policies.get(layout_policy)
|
|
142
|
+
if layout_policy
|
|
143
|
+
else self.context_config.policies.get("default")
|
|
144
|
+
)
|
|
145
|
+
bucket_order = (
|
|
146
|
+
default_layout_policy.bucket_order if default_layout_policy else None
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
# Create context sections with metadata
|
|
150
|
+
sections = self._create_sections(
|
|
151
|
+
content_sections, budget_allocations, self.context_config.buckets
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
# Sort sections by bucket order and priority (bucket_order first, then by priority)
|
|
155
|
+
sections = self._sort_sections_by_bucket_order(sections, bucket_order)
|
|
156
|
+
|
|
157
|
+
# Apply token limits and compression
|
|
158
|
+
sections = self._apply_token_limits(sections, self.context_config.buckets)
|
|
159
|
+
|
|
160
|
+
# Build final ordered sections with bucket order
|
|
161
|
+
bucket_order_map, dropped_sections = self._build_context_by_order(
|
|
162
|
+
sections, bucket_order
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Calculate total tokens from ordered sections
|
|
166
|
+
total_tokens = sum(section.token_count for section in sections)
|
|
167
|
+
|
|
168
|
+
# Build full context string for compatibility
|
|
169
|
+
full_context = self._join_context_parts(
|
|
170
|
+
[section.content for section in sections]
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
# Handle placement_policy compatibility
|
|
174
|
+
placement_map = bucket_order_map
|
|
175
|
+
if placement_policy:
|
|
176
|
+
# If placement_policy is provided, use it to construct the placement_map
|
|
177
|
+
placement_map = {"head": [], "middle": [], "tail": []}
|
|
178
|
+
for section in sections:
|
|
179
|
+
# Set the placement attribute of the section according to placement_policy
|
|
180
|
+
for position, bucket_names in placement_policy.items():
|
|
181
|
+
if section.name in bucket_names:
|
|
182
|
+
section.placement = position
|
|
183
|
+
placement_map[position].append(section.name)
|
|
184
|
+
break
|
|
185
|
+
else:
|
|
186
|
+
# If no match is found, set the default placement.
|
|
187
|
+
section.placement = "middle"
|
|
188
|
+
placement_map["middle"].append(section.name)
|
|
189
|
+
|
|
190
|
+
result = AssembledContext(
|
|
191
|
+
sections=sections,
|
|
192
|
+
total_tokens=total_tokens,
|
|
193
|
+
placement_map=placement_map, # Use the processed placement_map
|
|
194
|
+
dropped_sections=dropped_sections,
|
|
195
|
+
full_context=full_context,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
self._last_result = result # Save results for later use
|
|
199
|
+
return result
|
|
200
|
+
|
|
201
|
+
def _create_sections(
|
|
202
|
+
self,
|
|
203
|
+
content_sections: Dict[str, str],
|
|
204
|
+
budget_allocations: List[BudgetAllocation],
|
|
205
|
+
bucket_configs: Optional[Dict[str, BucketConfig]] = None,
|
|
206
|
+
) -> List[ContextSection]:
|
|
207
|
+
"""Create context sections with metadata from budget allocations.
|
|
208
|
+
|
|
209
|
+
Create context sections with metadata from budget allocations.
|
|
210
|
+
|
|
211
|
+
Args:
|
|
212
|
+
content_sections: dictionary of content sections
|
|
213
|
+
budget_allocations: list of budget allocations
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
list of ContextSection
|
|
217
|
+
"""
|
|
218
|
+
sections = []
|
|
219
|
+
allocation_map = {alloc.bucket_name: alloc for alloc in budget_allocations}
|
|
220
|
+
|
|
221
|
+
for section_name, content in content_sections.items():
|
|
222
|
+
message_role = MessageRole.USER
|
|
223
|
+
if bucket_configs:
|
|
224
|
+
bucket_config = bucket_configs.get(section_name)
|
|
225
|
+
if bucket_config is not None: # Add checks to ensure bucket_config is not None
|
|
226
|
+
message_role = bucket_config.message_role
|
|
227
|
+
|
|
228
|
+
allocation = allocation_map.get(section_name)
|
|
229
|
+
if not allocation:
|
|
230
|
+
continue
|
|
231
|
+
|
|
232
|
+
section = ContextSection(
|
|
233
|
+
name=section_name,
|
|
234
|
+
content=content,
|
|
235
|
+
priority=allocation.priority,
|
|
236
|
+
token_count=self.tokenizer.count_tokens(content),
|
|
237
|
+
allocated_tokens=allocation.allocated_tokens, # Pass the number of tokens for budget allocation
|
|
238
|
+
message_role=message_role,
|
|
239
|
+
)
|
|
240
|
+
sections.append(section)
|
|
241
|
+
|
|
242
|
+
return sections
|
|
243
|
+
|
|
244
|
+
def _sort_sections_by_bucket_order(
|
|
245
|
+
self, sections: List[ContextSection], bucket_order: Optional[List[str]]
|
|
246
|
+
) -> List[ContextSection]:
|
|
247
|
+
"""Sort sections by bucket order and priority.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
sections: list of context sections
|
|
251
|
+
bucket_order: global order of buckets, if None then use original order
|
|
252
|
+
|
|
253
|
+
Returns:
|
|
254
|
+
sorted list of context sections
|
|
255
|
+
"""
|
|
256
|
+
if not bucket_order:
|
|
257
|
+
# If bucket_order is not specified, sort by original order but with priority.
|
|
258
|
+
return sorted(sections, key=lambda s: -s.priority)
|
|
259
|
+
|
|
260
|
+
# Create a mapping from bucket names to indices for fast lookup
|
|
261
|
+
bucket_index = {name: idx for idx, name in enumerate(bucket_order)}
|
|
262
|
+
|
|
263
|
+
# Sort by bucket_order, and within the same bucket, sort by priority (high priority first)
|
|
264
|
+
return sorted(
|
|
265
|
+
sections,
|
|
266
|
+
key=lambda s: (bucket_index.get(s.name, len(bucket_order)), -s.priority),
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
def _apply_token_limits(
|
|
270
|
+
self,
|
|
271
|
+
sections: List[ContextSection],
|
|
272
|
+
bucket_configs: Optional[Dict[str, Any]] = None,
|
|
273
|
+
) -> List[ContextSection]:
|
|
274
|
+
"""Apply token limits and compression to sections.
|
|
275
|
+
|
|
276
|
+
Determine whether compression is needed by comparing token_count and allocated_tokens,
|
|
277
|
+
completely removing the compression_needed dependency marker.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
sections: List of context sections
|
|
281
|
+
bucket_configs: Dictionary of bucket configurations
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
Processed list of context sections
|
|
285
|
+
"""
|
|
286
|
+
processed_sections = []
|
|
287
|
+
|
|
288
|
+
for section in sections:
|
|
289
|
+
# Determine whether compression is needed by comparing the actual number of tokens with the allocated budget
|
|
290
|
+
if (
|
|
291
|
+
section.token_count > section.allocated_tokens
|
|
292
|
+
and section.token_count > 0
|
|
293
|
+
):
|
|
294
|
+
# Content exceeds budget, needs compression
|
|
295
|
+
# Determine compression method from bucket configuration
|
|
296
|
+
compression_method = None
|
|
297
|
+
if bucket_configs and section.name in bucket_configs:
|
|
298
|
+
bucket_config = bucket_configs[section.name]
|
|
299
|
+
compression_method = getattr(bucket_config, "compress", None)
|
|
300
|
+
|
|
301
|
+
# Apply compression with specified method
|
|
302
|
+
compressed_content = self._compress_section(section, compression_method)
|
|
303
|
+
section.content = compressed_content
|
|
304
|
+
section.token_count = self.tokenizer.count_tokens(compressed_content)
|
|
305
|
+
# If the content conforms to the budget or is empty, no action is required.
|
|
306
|
+
|
|
307
|
+
processed_sections.append(section)
|
|
308
|
+
|
|
309
|
+
return processed_sections
|
|
310
|
+
|
|
311
|
+
def _compress_section(
|
|
312
|
+
self, section: ContextSection, compression_method: Optional[str] = None
|
|
313
|
+
) -> str:
|
|
314
|
+
"""Apply compression to a section based on its needs and configuration.
|
|
315
|
+
|
|
316
|
+
Compress exactly according to allocated_tokens, removing the 50% heuristic rule.
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
section: The context section
|
|
320
|
+
compression_method: The compression method
|
|
321
|
+
|
|
322
|
+
Returns:
|
|
323
|
+
The compressed content
|
|
324
|
+
"""
|
|
325
|
+
if section.token_count <= 0:
|
|
326
|
+
return section.content
|
|
327
|
+
|
|
328
|
+
# Precise compression: directly use the allocated_tokens assigned according to the budget as the target
|
|
329
|
+
target_tokens = section.allocated_tokens
|
|
330
|
+
|
|
331
|
+
# If the actual content already meets the budget requirements, no compression is needed.
|
|
332
|
+
if section.token_count <= target_tokens:
|
|
333
|
+
return section.content
|
|
334
|
+
|
|
335
|
+
# If the compressor service is available, perform precise compression using the specified method.
|
|
336
|
+
if self.compressor and compression_method:
|
|
337
|
+
try:
|
|
338
|
+
result = self.compressor.compress(
|
|
339
|
+
content=section.content,
|
|
340
|
+
target_tokens=target_tokens,
|
|
341
|
+
method=compression_method,
|
|
342
|
+
)
|
|
343
|
+
return result.compressed_content
|
|
344
|
+
except Exception as e:
|
|
345
|
+
# Compression method failed, falling back to simple truncation
|
|
346
|
+
logger.warning(
|
|
347
|
+
f"Warning: Compression method '{compression_method}' failed: {e}"
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
# Fallback to simple truncation, precise to allocated_tokens
|
|
351
|
+
from ..utils.token_utils import truncate_to_tokens
|
|
352
|
+
|
|
353
|
+
return truncate_to_tokens(section.content, target_tokens, self.tokenizer)
|
|
354
|
+
|
|
355
|
+
def _build_context_by_order(
|
|
356
|
+
self, sections: List[ContextSection], bucket_order: Optional[List[str]]
|
|
357
|
+
) -> Tuple[Dict[str, List[str]], List[str]]:
|
|
358
|
+
"""Build final ordered sections with bucket order.
|
|
359
|
+
|
|
360
|
+
Build the final sorted list of context sections using the bucket order.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
sections: List of context sections
|
|
364
|
+
bucket_order: Global order of buckets
|
|
365
|
+
|
|
366
|
+
Returns:
|
|
367
|
+
Tuple(sorted list of context sections, bucket order mapping, list of discarded sections)
|
|
368
|
+
"""
|
|
369
|
+
bucket_order_map = {"ordered": [], "unordered": []}
|
|
370
|
+
dropped_sections = []
|
|
371
|
+
|
|
372
|
+
# Build an ordered list of segments according to bucket_order, ensuring all segments with content are included.
|
|
373
|
+
if bucket_order:
|
|
374
|
+
# Add the parts specified in bucket_order first
|
|
375
|
+
for bucket_name in bucket_order:
|
|
376
|
+
section = next((s for s in sections if s.name == bucket_name), None)
|
|
377
|
+
if section and section.content:
|
|
378
|
+
bucket_order_map["ordered"].append(bucket_name)
|
|
379
|
+
elif section and not section.content:
|
|
380
|
+
dropped_sections.append(section.name)
|
|
381
|
+
|
|
382
|
+
# Add parts that are not in bucket_order but have content
|
|
383
|
+
remaining_sections = [
|
|
384
|
+
s for s in sections if s.name not in bucket_order and s.content
|
|
385
|
+
]
|
|
386
|
+
for section in remaining_sections:
|
|
387
|
+
bucket_order_map["unordered"].append(section.name)
|
|
388
|
+
else:
|
|
389
|
+
# If there is no bucket_order, include all parts with content in their original order.
|
|
390
|
+
for section in sections:
|
|
391
|
+
if section.content:
|
|
392
|
+
bucket_order_map["ordered"].append(section.name)
|
|
393
|
+
else:
|
|
394
|
+
dropped_sections.append(section.name)
|
|
395
|
+
|
|
396
|
+
return bucket_order_map, dropped_sections
|
|
397
|
+
|
|
398
|
+
def _join_context_parts(self, context_parts: List[str]) -> str:
|
|
399
|
+
"""Join context parts with appropriate separators.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
context_parts: List of context parts
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Concatenated context string
|
|
406
|
+
"""
|
|
407
|
+
if not context_parts:
|
|
408
|
+
return ""
|
|
409
|
+
|
|
410
|
+
# Use double newlines to separate major sections
|
|
411
|
+
separator = "\n\n"
|
|
412
|
+
return separator.join(part.strip() for part in context_parts if part.strip())
|
|
413
|
+
|
|
414
|
+
def get_context_stats(
|
|
415
|
+
self, assembled_context: Optional[AssembledContext] = None
|
|
416
|
+
) -> Dict[str, Any]:
|
|
417
|
+
"""Get statistics about the assembled context.
|
|
418
|
+
|
|
419
|
+
Get statistics about the assembled context.
|
|
420
|
+
|
|
421
|
+
Args:
|
|
422
|
+
assembled_context: The assembled context, if None then use _last_result
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
A dictionary of statistics
|
|
426
|
+
"""
|
|
427
|
+
if assembled_context is None:
|
|
428
|
+
if self._last_result is None:
|
|
429
|
+
return {}
|
|
430
|
+
assembled_context = self._last_result
|
|
431
|
+
|
|
432
|
+
# Support new bucket order format and old placement format
|
|
433
|
+
placement_map = assembled_context.placement_map
|
|
434
|
+
|
|
435
|
+
# Calculate the number of partials counted by position (for testing compatibility)
|
|
436
|
+
sections_by_placement = {"head": 0, "middle": 0, "tail": 0}
|
|
437
|
+
for section in assembled_context.sections:
|
|
438
|
+
if section.placement == "head":
|
|
439
|
+
sections_by_placement["head"] += 1
|
|
440
|
+
elif section.placement == "middle":
|
|
441
|
+
sections_by_placement["middle"] += 1
|
|
442
|
+
elif section.placement == "tail":
|
|
443
|
+
sections_by_placement["tail"] += 1
|
|
444
|
+
|
|
445
|
+
sections_by_order = {
|
|
446
|
+
"ordered": len(placement_map.get("ordered", [])),
|
|
447
|
+
"unordered": len(placement_map.get("unordered", [])),
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
stats = {
|
|
451
|
+
"total_tokens": assembled_context.total_tokens,
|
|
452
|
+
"total_sections": len(assembled_context.sections),
|
|
453
|
+
"sections_by_order": sections_by_order,
|
|
454
|
+
"sections_by_placement": sections_by_placement, # Add compatibility fields
|
|
455
|
+
"dropped_sections": len(assembled_context.dropped_sections),
|
|
456
|
+
"section_details": [],
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
for section in assembled_context.sections:
|
|
460
|
+
# Determine whether compression is needed by comparing token_count and allocated_tokens
|
|
461
|
+
stats["section_details"].append(
|
|
462
|
+
{
|
|
463
|
+
"name": section.name,
|
|
464
|
+
"message_role": section.message_role,
|
|
465
|
+
"tokens": section.token_count,
|
|
466
|
+
"priority": section.priority,
|
|
467
|
+
"allocated_tokens": section.allocated_tokens,
|
|
468
|
+
"budget_utilization": (
|
|
469
|
+
section.token_count / section.allocated_tokens
|
|
470
|
+
if section.allocated_tokens > 0
|
|
471
|
+
else 0
|
|
472
|
+
),
|
|
473
|
+
}
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
return stats
|
|
477
|
+
|
|
478
|
+
def to_messages(
|
|
479
|
+
self,
|
|
480
|
+
user_sections: Optional[List[str]] = None,
|
|
481
|
+
) -> List[Dict[str, str]]:
|
|
482
|
+
"""Convert AssembledContext into a simplified message format: a single system message + user message.
|
|
483
|
+
|
|
484
|
+
Strategy:
|
|
485
|
+
- Merge everything except the user message into a single system message
|
|
486
|
+
- Maximize LLM compatibility
|
|
487
|
+
- No need for format_type parameter
|
|
488
|
+
|
|
489
|
+
Args:
|
|
490
|
+
user_sections: Specifies which sections should be treated as user role (default: ["user_query", "user", "input"])
|
|
491
|
+
bucket_configs: Bucket configuration dictionary to support role assignment based on configuration
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
A simplified message list in the format [system, user] or similar structure
|
|
495
|
+
|
|
496
|
+
Raises:
|
|
497
|
+
RuntimeError: If no context can be converted
|
|
498
|
+
"""
|
|
499
|
+
if not hasattr(self, "_last_result") or self._last_result is None:
|
|
500
|
+
raise RuntimeError(
|
|
501
|
+
"No assembled context available. Call assemble_context() first."
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
# Delayed import avoids circular dependencies
|
|
505
|
+
from ..utils.message_formatter import MessageFormatter
|
|
506
|
+
|
|
507
|
+
if not hasattr(self, "_message_formatter"):
|
|
508
|
+
self._message_formatter = MessageFormatter()
|
|
509
|
+
|
|
510
|
+
# Use the most simplified format to merge all non-user messages into a single system message
|
|
511
|
+
# Pass in bucket_configs to support role assignment based on configuration
|
|
512
|
+
return self._message_formatter.to_openai_messages_simple(
|
|
513
|
+
self._last_result,
|
|
514
|
+
user_sections=user_sections,
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
def to_dph_messages(
|
|
518
|
+
self,
|
|
519
|
+
) -> Messages:
|
|
520
|
+
from ..utils.message_formatter import MessageFormatter
|
|
521
|
+
|
|
522
|
+
if not hasattr(self, "_message_formatter"):
|
|
523
|
+
self._message_formatter = MessageFormatter()
|
|
524
|
+
|
|
525
|
+
return self._message_formatter.to_dph_messages_simple(
|
|
526
|
+
self._last_result,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# The following methods are used for compatibility testing
|
|
530
|
+
def _apply_placement_policy(self, sections, placement_policy):
|
|
531
|
+
"""A compatibility method applying positional strategies."""
|
|
532
|
+
# Set the placement attribute of the section according to placement_policy
|
|
533
|
+
for section in sections:
|
|
534
|
+
# Set the placement attribute of the section according to placement_policy
|
|
535
|
+
for position, bucket_names in placement_policy.items():
|
|
536
|
+
if section.name in bucket_names:
|
|
537
|
+
section.placement = position
|
|
538
|
+
break
|
|
539
|
+
else:
|
|
540
|
+
# If no match is found, set the default placement.
|
|
541
|
+
section.placement = "middle"
|
|
542
|
+
return sections
|
|
543
|
+
|
|
544
|
+
def _sort_sections(self, sections):
|
|
545
|
+
"""Compatibility methods for the sorting section."""
|
|
546
|
+
# This is a blank implementation, used only for testing compatibility.
|
|
547
|
+
return sorted(sections, key=lambda s: -s.priority)
|
|
548
|
+
|
|
549
|
+
def _build_context(self, sections):
|
|
550
|
+
"""A method for building context compatibility."""
|
|
551
|
+
# This is a blank implementation, used only for testing compatibility.
|
|
552
|
+
context = "\n\n".join(section.content for section in sections)
|
|
553
|
+
placement_map = {"head": [], "middle": [], "tail": []}
|
|
554
|
+
|
|
555
|
+
# Fill the placement_map according to the placement attribute of section
|
|
556
|
+
for section in sections:
|
|
557
|
+
if section.placement == "head":
|
|
558
|
+
placement_map["head"].append(section.name)
|
|
559
|
+
elif section.placement == "middle":
|
|
560
|
+
placement_map["middle"].append(section.name)
|
|
561
|
+
elif section.placement == "tail":
|
|
562
|
+
placement_map["tail"].append(section.name)
|
|
563
|
+
|
|
564
|
+
dropped = []
|
|
565
|
+
return context, placement_map, dropped
|
|
566
|
+
|
|
567
|
+
def apply_lost_in_middle_mitigation(self, content_sections, key_sections):
|
|
568
|
+
"""A compatibility method that alleviates the application of Lost in the Middle."""
|
|
569
|
+
# This is a blank implementation, used only for testing compatibility.
|
|
570
|
+
result = {}
|
|
571
|
+
for name, content in content_sections.items():
|
|
572
|
+
if name in key_sections:
|
|
573
|
+
result[name] = "IMPORTANT: " + content
|
|
574
|
+
else:
|
|
575
|
+
result[name] = content
|
|
576
|
+
return result
|
|
577
|
+
|
|
578
|
+
def create_excerpts_with_summary(self, content, excerpt_ratio=0.5):
|
|
579
|
+
"""A compatibility method for creating excerpts with summaries."""
|
|
580
|
+
# This is a blank implementation, used only for testing compatibility.
|
|
581
|
+
excerpt = "... [content truncated] ..."
|
|
582
|
+
summary = "Summary of paragraphs"
|
|
583
|
+
return excerpt, summary
|