kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,583 @@
1
+ """Context assembler for position-aware message assembly.
2
+
3
+ Context assembler for position-aware message assembly, avoiding the "Lost in the Middle" problem.
4
+ """
5
+
6
+ from typing import Dict, List, Optional, Tuple, Any
7
+ from dataclasses import dataclass
8
+ from dolphin.core.common.enums import Messages, MessageRole
9
+ from dolphin.core.context_engineer.config.settings import (
10
+ get_default_config,
11
+ ContextConfig,
12
+ BucketConfig,
13
+ )
14
+ from dolphin.core.context_engineer.services.compressor import Compressor
15
+ from ..core.tokenizer_service import TokenizerService
16
+ from ..core.budget_manager import BudgetAllocation
17
+ from dolphin.core.logging.logger import get_logger
18
+
19
+ logger = get_logger("context_engineer.assembler")
20
+
21
+
22
+ @dataclass
23
+ class ContextSection:
24
+ """Represents a section of context with metadata.
25
+
26
+ Represents a section of context with metadata.
27
+
28
+ Attributes:
29
+ name (str): Section name
30
+ content (str): Content
31
+ priority (float): Priority
32
+ token_count (int): Number of tokens, default is 0
33
+ allocated_tokens (int): Allocated token count for precise compression target, default is 0
34
+ message_role (str): Message role
35
+ placement (str): Placement information, used for test compatibility
36
+ """
37
+
38
+ name: str
39
+ content: str
40
+ priority: float
41
+ token_count: int = 0
42
+ allocated_tokens: int = 0 # The number of tokens allocated for budget distribution, used for precise compression targets.
43
+ message_role: MessageRole = MessageRole.USER
44
+ placement: str = "" # Used for testing compatibility
45
+
46
+ def __init__(self, name: str, content: str, priority: float, *args, **kwargs):
47
+ """Initialize ContextSection, supporting parameter order for test compatibility."""
48
+ self.name = name
49
+ self.content = content
50
+ self.priority = priority
51
+
52
+ # Handling positional arguments
53
+ if len(args) > 0 and isinstance(args[0], str):
54
+ self.placement = args[0]
55
+ args = args[1:]
56
+
57
+ # Handle the remaining arguments
58
+ if len(args) > 0:
59
+ self.token_count = args[0]
60
+ if len(args) > 1:
61
+ self.allocated_tokens = args[1]
62
+
63
+ # Process keyword arguments
64
+ for key, value in kwargs.items():
65
+ if hasattr(self, key):
66
+ setattr(self, key, value)
67
+
68
+
69
+ @dataclass
70
+ class AssembledContext:
71
+ """Result of context assembly.
72
+
73
+ Result of context assembly.
74
+
75
+ Attributes:
76
+ sections (List[ContextSection]): List of original context sections
77
+ total_tokens (int): Total number of tokens
78
+ placement_map (Dict[str, List[str]]): Placement mapping
79
+ dropped_sections (List[str]): List of dropped sections
80
+ full_context (str): Full context string, used for test compatibility
81
+ """
82
+
83
+ sections: List[ContextSection]
84
+ total_tokens: int
85
+ placement_map: Dict[str, List[str]]
86
+ dropped_sections: List[str]
87
+ full_context: str = "" # Used for testing compatibility
88
+
89
+
90
+ class ContextAssembler:
91
+ """Assembles context according to position strategies to avoid "Lost in the Middle".
92
+
93
+ Assembles context according to position strategies to avoid "Lost in the Middle".
94
+ """
95
+
96
+ def __init__(
97
+ self,
98
+ tokenizer_service: Optional[TokenizerService] = None,
99
+ compressor_service: Optional[Any] = None,
100
+ context_config: Optional[ContextConfig] = None,
101
+ ):
102
+ """Initialize context assembler.
103
+
104
+ Initialize the context assembler.
105
+
106
+ Args:
107
+ tokenizer_service: TokenizerService instance, used for token counting
108
+ compressor_service: Compressor service, used for content compression
109
+ """
110
+ self.tokenizer = tokenizer_service or TokenizerService()
111
+ self.compressor = compressor_service or Compressor()
112
+ self._last_result = None # Store the result of the last assembly.
113
+ self.context_config = context_config or get_default_config()
114
+ # Property for testing compatibility
115
+ self.placement_strategy = {"head": [], "middle": [], "tail": []}
116
+
117
+ def assemble_context(
118
+ self,
119
+ content_sections: Dict[str, str],
120
+ budget_allocations: List[BudgetAllocation],
121
+ placement_policy: Optional[Dict[str, List[str]]] = None,
122
+ bucket_order: Optional[List[str]] = None,
123
+ layout_policy: Optional[str] = None,
124
+ ) -> AssembledContext:
125
+ """Assemble context sections according to bucket order.
126
+
127
+ Assemble context sections according to bucket order.
128
+
129
+ Args:
130
+ content_sections: Dictionary mapping section names to their content
131
+ budget_allocations: List of budget allocations for each section
132
+ bucket_order: Global order of buckets; if None, use original order
133
+ bucket_configs: Optional bucket configurations for accessing compression methods
134
+
135
+ Returns:
136
+ AssembledContext ordered as specified
137
+ """
138
+ # If bucket_order parameter is provided, use it; otherwise get from layout policy
139
+ if bucket_order is None:
140
+ default_layout_policy = (
141
+ self.context_config.policies.get(layout_policy)
142
+ if layout_policy
143
+ else self.context_config.policies.get("default")
144
+ )
145
+ bucket_order = (
146
+ default_layout_policy.bucket_order if default_layout_policy else None
147
+ )
148
+
149
+ # Create context sections with metadata
150
+ sections = self._create_sections(
151
+ content_sections, budget_allocations, self.context_config.buckets
152
+ )
153
+
154
+ # Sort sections by bucket order and priority (bucket_order first, then by priority)
155
+ sections = self._sort_sections_by_bucket_order(sections, bucket_order)
156
+
157
+ # Apply token limits and compression
158
+ sections = self._apply_token_limits(sections, self.context_config.buckets)
159
+
160
+ # Build final ordered sections with bucket order
161
+ bucket_order_map, dropped_sections = self._build_context_by_order(
162
+ sections, bucket_order
163
+ )
164
+
165
+ # Calculate total tokens from ordered sections
166
+ total_tokens = sum(section.token_count for section in sections)
167
+
168
+ # Build full context string for compatibility
169
+ full_context = self._join_context_parts(
170
+ [section.content for section in sections]
171
+ )
172
+
173
+ # Handle placement_policy compatibility
174
+ placement_map = bucket_order_map
175
+ if placement_policy:
176
+ # If placement_policy is provided, use it to construct the placement_map
177
+ placement_map = {"head": [], "middle": [], "tail": []}
178
+ for section in sections:
179
+ # Set the placement attribute of the section according to placement_policy
180
+ for position, bucket_names in placement_policy.items():
181
+ if section.name in bucket_names:
182
+ section.placement = position
183
+ placement_map[position].append(section.name)
184
+ break
185
+ else:
186
+ # If no match is found, set the default placement.
187
+ section.placement = "middle"
188
+ placement_map["middle"].append(section.name)
189
+
190
+ result = AssembledContext(
191
+ sections=sections,
192
+ total_tokens=total_tokens,
193
+ placement_map=placement_map, # Use the processed placement_map
194
+ dropped_sections=dropped_sections,
195
+ full_context=full_context,
196
+ )
197
+
198
+ self._last_result = result # Save results for later use
199
+ return result
200
+
201
+ def _create_sections(
202
+ self,
203
+ content_sections: Dict[str, str],
204
+ budget_allocations: List[BudgetAllocation],
205
+ bucket_configs: Optional[Dict[str, BucketConfig]] = None,
206
+ ) -> List[ContextSection]:
207
+ """Create context sections with metadata from budget allocations.
208
+
209
+ Create context sections with metadata from budget allocations.
210
+
211
+ Args:
212
+ content_sections: dictionary of content sections
213
+ budget_allocations: list of budget allocations
214
+
215
+ Returns:
216
+ list of ContextSection
217
+ """
218
+ sections = []
219
+ allocation_map = {alloc.bucket_name: alloc for alloc in budget_allocations}
220
+
221
+ for section_name, content in content_sections.items():
222
+ message_role = MessageRole.USER
223
+ if bucket_configs:
224
+ bucket_config = bucket_configs.get(section_name)
225
+ if bucket_config is not None: # Add checks to ensure bucket_config is not None
226
+ message_role = bucket_config.message_role
227
+
228
+ allocation = allocation_map.get(section_name)
229
+ if not allocation:
230
+ continue
231
+
232
+ section = ContextSection(
233
+ name=section_name,
234
+ content=content,
235
+ priority=allocation.priority,
236
+ token_count=self.tokenizer.count_tokens(content),
237
+ allocated_tokens=allocation.allocated_tokens, # Pass the number of tokens for budget allocation
238
+ message_role=message_role,
239
+ )
240
+ sections.append(section)
241
+
242
+ return sections
243
+
244
+ def _sort_sections_by_bucket_order(
245
+ self, sections: List[ContextSection], bucket_order: Optional[List[str]]
246
+ ) -> List[ContextSection]:
247
+ """Sort sections by bucket order and priority.
248
+
249
+ Args:
250
+ sections: list of context sections
251
+ bucket_order: global order of buckets, if None then use original order
252
+
253
+ Returns:
254
+ sorted list of context sections
255
+ """
256
+ if not bucket_order:
257
+ # If bucket_order is not specified, sort by original order but with priority.
258
+ return sorted(sections, key=lambda s: -s.priority)
259
+
260
+ # Create a mapping from bucket names to indices for fast lookup
261
+ bucket_index = {name: idx for idx, name in enumerate(bucket_order)}
262
+
263
+ # Sort by bucket_order, and within the same bucket, sort by priority (high priority first)
264
+ return sorted(
265
+ sections,
266
+ key=lambda s: (bucket_index.get(s.name, len(bucket_order)), -s.priority),
267
+ )
268
+
269
+ def _apply_token_limits(
270
+ self,
271
+ sections: List[ContextSection],
272
+ bucket_configs: Optional[Dict[str, Any]] = None,
273
+ ) -> List[ContextSection]:
274
+ """Apply token limits and compression to sections.
275
+
276
+ Determine whether compression is needed by comparing token_count and allocated_tokens,
277
+ completely removing the compression_needed dependency marker.
278
+
279
+ Args:
280
+ sections: List of context sections
281
+ bucket_configs: Dictionary of bucket configurations
282
+
283
+ Returns:
284
+ Processed list of context sections
285
+ """
286
+ processed_sections = []
287
+
288
+ for section in sections:
289
+ # Determine whether compression is needed by comparing the actual number of tokens with the allocated budget
290
+ if (
291
+ section.token_count > section.allocated_tokens
292
+ and section.token_count > 0
293
+ ):
294
+ # Content exceeds budget, needs compression
295
+ # Determine compression method from bucket configuration
296
+ compression_method = None
297
+ if bucket_configs and section.name in bucket_configs:
298
+ bucket_config = bucket_configs[section.name]
299
+ compression_method = getattr(bucket_config, "compress", None)
300
+
301
+ # Apply compression with specified method
302
+ compressed_content = self._compress_section(section, compression_method)
303
+ section.content = compressed_content
304
+ section.token_count = self.tokenizer.count_tokens(compressed_content)
305
+ # If the content conforms to the budget or is empty, no action is required.
306
+
307
+ processed_sections.append(section)
308
+
309
+ return processed_sections
310
+
311
+ def _compress_section(
312
+ self, section: ContextSection, compression_method: Optional[str] = None
313
+ ) -> str:
314
+ """Apply compression to a section based on its needs and configuration.
315
+
316
+ Compress exactly according to allocated_tokens, removing the 50% heuristic rule.
317
+
318
+ Args:
319
+ section: The context section
320
+ compression_method: The compression method
321
+
322
+ Returns:
323
+ The compressed content
324
+ """
325
+ if section.token_count <= 0:
326
+ return section.content
327
+
328
+ # Precise compression: directly use the allocated_tokens assigned according to the budget as the target
329
+ target_tokens = section.allocated_tokens
330
+
331
+ # If the actual content already meets the budget requirements, no compression is needed.
332
+ if section.token_count <= target_tokens:
333
+ return section.content
334
+
335
+ # If the compressor service is available, perform precise compression using the specified method.
336
+ if self.compressor and compression_method:
337
+ try:
338
+ result = self.compressor.compress(
339
+ content=section.content,
340
+ target_tokens=target_tokens,
341
+ method=compression_method,
342
+ )
343
+ return result.compressed_content
344
+ except Exception as e:
345
+ # Compression method failed, falling back to simple truncation
346
+ logger.warning(
347
+ f"Warning: Compression method '{compression_method}' failed: {e}"
348
+ )
349
+
350
+ # Fallback to simple truncation, precise to allocated_tokens
351
+ from ..utils.token_utils import truncate_to_tokens
352
+
353
+ return truncate_to_tokens(section.content, target_tokens, self.tokenizer)
354
+
355
+ def _build_context_by_order(
356
+ self, sections: List[ContextSection], bucket_order: Optional[List[str]]
357
+ ) -> Tuple[Dict[str, List[str]], List[str]]:
358
+ """Build final ordered sections with bucket order.
359
+
360
+ Build the final sorted list of context sections using the bucket order.
361
+
362
+ Args:
363
+ sections: List of context sections
364
+ bucket_order: Global order of buckets
365
+
366
+ Returns:
367
+ Tuple(sorted list of context sections, bucket order mapping, list of discarded sections)
368
+ """
369
+ bucket_order_map = {"ordered": [], "unordered": []}
370
+ dropped_sections = []
371
+
372
+ # Build an ordered list of segments according to bucket_order, ensuring all segments with content are included.
373
+ if bucket_order:
374
+ # Add the parts specified in bucket_order first
375
+ for bucket_name in bucket_order:
376
+ section = next((s for s in sections if s.name == bucket_name), None)
377
+ if section and section.content:
378
+ bucket_order_map["ordered"].append(bucket_name)
379
+ elif section and not section.content:
380
+ dropped_sections.append(section.name)
381
+
382
+ # Add parts that are not in bucket_order but have content
383
+ remaining_sections = [
384
+ s for s in sections if s.name not in bucket_order and s.content
385
+ ]
386
+ for section in remaining_sections:
387
+ bucket_order_map["unordered"].append(section.name)
388
+ else:
389
+ # If there is no bucket_order, include all parts with content in their original order.
390
+ for section in sections:
391
+ if section.content:
392
+ bucket_order_map["ordered"].append(section.name)
393
+ else:
394
+ dropped_sections.append(section.name)
395
+
396
+ return bucket_order_map, dropped_sections
397
+
398
+ def _join_context_parts(self, context_parts: List[str]) -> str:
399
+ """Join context parts with appropriate separators.
400
+
401
+ Args:
402
+ context_parts: List of context parts
403
+
404
+ Returns:
405
+ Concatenated context string
406
+ """
407
+ if not context_parts:
408
+ return ""
409
+
410
+ # Use double newlines to separate major sections
411
+ separator = "\n\n"
412
+ return separator.join(part.strip() for part in context_parts if part.strip())
413
+
414
+ def get_context_stats(
415
+ self, assembled_context: Optional[AssembledContext] = None
416
+ ) -> Dict[str, Any]:
417
+ """Get statistics about the assembled context.
418
+
419
+ Get statistics about the assembled context.
420
+
421
+ Args:
422
+ assembled_context: The assembled context, if None then use _last_result
423
+
424
+ Returns:
425
+ A dictionary of statistics
426
+ """
427
+ if assembled_context is None:
428
+ if self._last_result is None:
429
+ return {}
430
+ assembled_context = self._last_result
431
+
432
+ # Support new bucket order format and old placement format
433
+ placement_map = assembled_context.placement_map
434
+
435
+ # Calculate the number of partials counted by position (for testing compatibility)
436
+ sections_by_placement = {"head": 0, "middle": 0, "tail": 0}
437
+ for section in assembled_context.sections:
438
+ if section.placement == "head":
439
+ sections_by_placement["head"] += 1
440
+ elif section.placement == "middle":
441
+ sections_by_placement["middle"] += 1
442
+ elif section.placement == "tail":
443
+ sections_by_placement["tail"] += 1
444
+
445
+ sections_by_order = {
446
+ "ordered": len(placement_map.get("ordered", [])),
447
+ "unordered": len(placement_map.get("unordered", [])),
448
+ }
449
+
450
+ stats = {
451
+ "total_tokens": assembled_context.total_tokens,
452
+ "total_sections": len(assembled_context.sections),
453
+ "sections_by_order": sections_by_order,
454
+ "sections_by_placement": sections_by_placement, # Add compatibility fields
455
+ "dropped_sections": len(assembled_context.dropped_sections),
456
+ "section_details": [],
457
+ }
458
+
459
+ for section in assembled_context.sections:
460
+ # Determine whether compression is needed by comparing token_count and allocated_tokens
461
+ stats["section_details"].append(
462
+ {
463
+ "name": section.name,
464
+ "message_role": section.message_role,
465
+ "tokens": section.token_count,
466
+ "priority": section.priority,
467
+ "allocated_tokens": section.allocated_tokens,
468
+ "budget_utilization": (
469
+ section.token_count / section.allocated_tokens
470
+ if section.allocated_tokens > 0
471
+ else 0
472
+ ),
473
+ }
474
+ )
475
+
476
+ return stats
477
+
478
+ def to_messages(
479
+ self,
480
+ user_sections: Optional[List[str]] = None,
481
+ ) -> List[Dict[str, str]]:
482
+ """Convert AssembledContext into a simplified message format: a single system message + user message.
483
+
484
+ Strategy:
485
+ - Merge everything except the user message into a single system message
486
+ - Maximize LLM compatibility
487
+ - No need for format_type parameter
488
+
489
+ Args:
490
+ user_sections: Specifies which sections should be treated as user role (default: ["user_query", "user", "input"])
491
+ bucket_configs: Bucket configuration dictionary to support role assignment based on configuration
492
+
493
+ Returns:
494
+ A simplified message list in the format [system, user] or similar structure
495
+
496
+ Raises:
497
+ RuntimeError: If no context can be converted
498
+ """
499
+ if not hasattr(self, "_last_result") or self._last_result is None:
500
+ raise RuntimeError(
501
+ "No assembled context available. Call assemble_context() first."
502
+ )
503
+
504
+ # Delayed import avoids circular dependencies
505
+ from ..utils.message_formatter import MessageFormatter
506
+
507
+ if not hasattr(self, "_message_formatter"):
508
+ self._message_formatter = MessageFormatter()
509
+
510
+ # Use the most simplified format to merge all non-user messages into a single system message
511
+ # Pass in bucket_configs to support role assignment based on configuration
512
+ return self._message_formatter.to_openai_messages_simple(
513
+ self._last_result,
514
+ user_sections=user_sections,
515
+ )
516
+
517
+ def to_dph_messages(
518
+ self,
519
+ ) -> Messages:
520
+ from ..utils.message_formatter import MessageFormatter
521
+
522
+ if not hasattr(self, "_message_formatter"):
523
+ self._message_formatter = MessageFormatter()
524
+
525
+ return self._message_formatter.to_dph_messages_simple(
526
+ self._last_result,
527
+ )
528
+
529
+ # The following methods are used for compatibility testing
530
+ def _apply_placement_policy(self, sections, placement_policy):
531
+ """A compatibility method applying positional strategies."""
532
+ # Set the placement attribute of the section according to placement_policy
533
+ for section in sections:
534
+ # Set the placement attribute of the section according to placement_policy
535
+ for position, bucket_names in placement_policy.items():
536
+ if section.name in bucket_names:
537
+ section.placement = position
538
+ break
539
+ else:
540
+ # If no match is found, set the default placement.
541
+ section.placement = "middle"
542
+ return sections
543
+
544
+ def _sort_sections(self, sections):
545
+ """Compatibility methods for the sorting section."""
546
+ # This is a blank implementation, used only for testing compatibility.
547
+ return sorted(sections, key=lambda s: -s.priority)
548
+
549
+ def _build_context(self, sections):
550
+ """A method for building context compatibility."""
551
+ # This is a blank implementation, used only for testing compatibility.
552
+ context = "\n\n".join(section.content for section in sections)
553
+ placement_map = {"head": [], "middle": [], "tail": []}
554
+
555
+ # Fill the placement_map according to the placement attribute of section
556
+ for section in sections:
557
+ if section.placement == "head":
558
+ placement_map["head"].append(section.name)
559
+ elif section.placement == "middle":
560
+ placement_map["middle"].append(section.name)
561
+ elif section.placement == "tail":
562
+ placement_map["tail"].append(section.name)
563
+
564
+ dropped = []
565
+ return context, placement_map, dropped
566
+
567
+ def apply_lost_in_middle_mitigation(self, content_sections, key_sections):
568
+ """A compatibility method that alleviates the application of Lost in the Middle."""
569
+ # This is a blank implementation, used only for testing compatibility.
570
+ result = {}
571
+ for name, content in content_sections.items():
572
+ if name in key_sections:
573
+ result[name] = "IMPORTANT: " + content
574
+ else:
575
+ result[name] = content
576
+ return result
577
+
578
+ def create_excerpts_with_summary(self, content, excerpt_ratio=0.5):
579
+ """A compatibility method for creating excerpts with summaries."""
580
+ # This is a blank implementation, used only for testing compatibility.
581
+ excerpt = "... [content truncated] ..."
582
+ summary = "Summary of paragraphs"
583
+ return excerpt, summary