kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,441 @@
1
+ """Context utility functions."""
2
+
3
+ import re
4
+ from typing import List, Dict, Optional
5
+ from ..core.tokenizer_service import TokenizerService
6
+
7
+
8
+ def extract_key_info(
9
+ text: str, max_sentences: int = 3, keyword_weight: float = 2.0
10
+ ) -> str:
11
+ """
12
+ Extract key information from text using simple heuristics.
13
+
14
+ Args:
15
+ text: Input text
16
+ max_sentences: Maximum number of sentences to extract
17
+ keyword_weight: Weight for keyword-containing sentences
18
+
19
+ Returns:
20
+ Extracted key information
21
+ """
22
+ if not text:
23
+ return ""
24
+
25
+ # Split into sentences
26
+ sentences = re.split(r"[.!?]+", text)
27
+ sentences = [s.strip() for s in sentences if s.strip()]
28
+
29
+ if not sentences:
30
+ return ""
31
+
32
+ # Simple keyword extraction
33
+ words = re.findall(r"\b\w+\b", text.lower())
34
+ word_freq = {}
35
+ for word in words:
36
+ if len(word) > 3: # Filter short words
37
+ word_freq[word] = word_freq.get(word, 0) + 1
38
+
39
+ # Get top keywords (excluding common words)
40
+ common_words = {
41
+ "the",
42
+ "and",
43
+ "or",
44
+ "but",
45
+ "in",
46
+ "on",
47
+ "at",
48
+ "to",
49
+ "for",
50
+ "of",
51
+ "with",
52
+ "by",
53
+ "is",
54
+ "are",
55
+ "was",
56
+ "were",
57
+ "be",
58
+ "been",
59
+ "have",
60
+ "has",
61
+ "had",
62
+ "do",
63
+ "does",
64
+ "did",
65
+ "will",
66
+ "would",
67
+ "could",
68
+ "should",
69
+ "may",
70
+ "might",
71
+ "can",
72
+ "this",
73
+ "that",
74
+ "these",
75
+ "those",
76
+ "a",
77
+ "an",
78
+ }
79
+
80
+ keywords = {
81
+ word: freq
82
+ for word, freq in word_freq.items()
83
+ if word not in common_words and freq > 1
84
+ }
85
+
86
+ # Score sentences based on keywords and position
87
+ sentence_scores = []
88
+ for i, sentence in enumerate(sentences):
89
+ score = 0
90
+ sentence_lower = sentence.lower()
91
+
92
+ # Keyword matches
93
+ for keyword in keywords:
94
+ if keyword in sentence_lower:
95
+ score += keyword_weight * keywords[keyword]
96
+
97
+ # Position bonus (first and last sentences get higher scores)
98
+ if i == 0 or i == len(sentences) - 1:
99
+ score += 1.0
100
+
101
+ sentence_scores.append((i, sentence, score))
102
+
103
+ # Sort by score and take top sentences
104
+ sentence_scores.sort(key=lambda x: x[2], reverse=True)
105
+ top_sentences = sentence_scores[:max_sentences]
106
+ top_sentences.sort(key=lambda x: x[0]) # Restore original order
107
+
108
+ return ". ".join(sentence for _, sentence, _ in top_sentences) + "."
109
+
110
+
111
+ def summarize_content(
112
+ text: str, target_ratio: float = 0.3, preserve_keywords: bool = True
113
+ ) -> str:
114
+ """
115
+ Summarize content by extracting key sentences and compressing.
116
+
117
+ Args:
118
+ text: Input text
119
+ target_ratio: Target compression ratio
120
+ preserve_keywords: Whether to preserve important keywords
121
+
122
+ Returns:
123
+ Summarized content
124
+ """
125
+ if not text:
126
+ return ""
127
+
128
+ # Extract key information first
129
+ key_info = extract_key_info(text, max_sentences=int(1 / target_ratio))
130
+
131
+ if preserve_keywords:
132
+ # Extract keywords from original text
133
+ words = re.findall(r"\b\w+\b", text.lower())
134
+ word_freq = {}
135
+ for word in words:
136
+ if len(word) > 4: # Filter short words
137
+ word_freq[word] = word_freq.get(word, 0) + 1
138
+
139
+ # Get top keywords
140
+ keywords = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)[:10]
141
+ keyword_list = [word for word, freq in keywords if freq > 1]
142
+
143
+ # Ensure keywords are included in summary
144
+ summary_parts = [key_info]
145
+
146
+ # Add keyword context if not already included
147
+ for keyword in keyword_list[:5]: # Top 5 keywords
148
+ if keyword not in key_info.lower():
149
+ # Find a sentence containing this keyword
150
+ sentences = re.split(r"[.!?]+", text)
151
+ for sentence in sentences:
152
+ if keyword in sentence.lower():
153
+ summary_parts.append(sentence.strip())
154
+ break
155
+
156
+ return " ".join(summary_parts)
157
+
158
+ return key_info
159
+
160
+
161
+ def extract_entities(text: str) -> Dict[str, List[str]]:
162
+ """
163
+ Simple entity extraction using regex patterns.
164
+
165
+ Args:
166
+ text: Input text
167
+
168
+ Returns:
169
+ Dictionary of extracted entities by type
170
+ """
171
+ entities = {"dates": [], "emails": [], "urls": [], "numbers": [], "cap_words": []}
172
+
173
+ if not text:
174
+ return entities
175
+
176
+ # Date patterns
177
+ date_patterns = [
178
+ r"\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b", # MM/DD/YYYY or DD-MM-YYYY
179
+ r"\b\d{4}[/-]\d{1,2}[/-]\d{1,2}\b", # YYYY/MM/DD
180
+ r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4}\b", # Month DD, YYYY
181
+ ]
182
+
183
+ for pattern in date_patterns:
184
+ entities["dates"].extend(re.findall(pattern, text, re.IGNORECASE))
185
+
186
+ # Email patterns
187
+ entities["emails"] = re.findall(
188
+ r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", text
189
+ )
190
+
191
+ # URL patterns
192
+ entities["urls"] = re.findall(
193
+ r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+",
194
+ text,
195
+ )
196
+
197
+ # Number patterns
198
+ entities["numbers"] = re.findall(r"\b\d+(?:\.\d+)?\b", text)
199
+
200
+ # Capitalized words (potential proper nouns)
201
+ cap_words = re.findall(r"\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b", text)
202
+ entities["cap_words"] = list(set(cap_words)) # Remove duplicates
203
+
204
+ # Remove duplicates from other lists too
205
+ for key in entities:
206
+ entities[key] = list(set(entities[key]))
207
+
208
+ return entities
209
+
210
+
211
+ def calculate_relevance_score(
212
+ query: str, content: str, tokenizer_service: Optional[TokenizerService] = None
213
+ ) -> float:
214
+ """
215
+ Calculate relevance score between query and content.
216
+
217
+ Args:
218
+ query: Search query or task description
219
+ content: Content to score
220
+ tokenizer_service: TokenizerService instance
221
+
222
+ Returns:
223
+ Relevance score (0-1)
224
+ """
225
+ if not query or not content:
226
+ return 0.0
227
+
228
+ # Simple keyword-based relevance
229
+ query_words = set(re.findall(r"\b\w+\b", query.lower()))
230
+ content_words = set(re.findall(r"\b\w+\b", content.lower()))
231
+
232
+ # Remove common words
233
+ common_words = {
234
+ "the",
235
+ "and",
236
+ "or",
237
+ "but",
238
+ "in",
239
+ "on",
240
+ "at",
241
+ "to",
242
+ "for",
243
+ "of",
244
+ "with",
245
+ "by",
246
+ "is",
247
+ "are",
248
+ "was",
249
+ "were",
250
+ "be",
251
+ "been",
252
+ "have",
253
+ "has",
254
+ "had",
255
+ "do",
256
+ "does",
257
+ "did",
258
+ "will",
259
+ "would",
260
+ "could",
261
+ "should",
262
+ "may",
263
+ "might",
264
+ "can",
265
+ "this",
266
+ "that",
267
+ "these",
268
+ "those",
269
+ "a",
270
+ "an",
271
+ "it",
272
+ "its",
273
+ "they",
274
+ "them",
275
+ "their",
276
+ "we",
277
+ "us",
278
+ "our",
279
+ "you",
280
+ "your",
281
+ }
282
+
283
+ query_keywords = query_words - common_words
284
+ content_keywords = content_words - common_words
285
+
286
+ if not query_keywords:
287
+ return 0.0
288
+
289
+ # Calculate overlap
290
+ overlap = len(query_keywords.intersection(content_keywords))
291
+ total_unique = len(query_keywords.union(content_keywords))
292
+
293
+ # Jaccard similarity
294
+ jaccard = overlap / total_unique if total_unique > 0 else 0.0
295
+
296
+ # Bonus for exact phrase matches
297
+ phrase_bonus = 0.0
298
+ query_phrases = re.findall(r'"([^"]+)"', query)
299
+ for phrase in query_phrases:
300
+ if phrase.lower() in content.lower():
301
+ phrase_bonus += 0.2
302
+
303
+ return min(1.0, jaccard + phrase_bonus)
304
+
305
+
306
+ def extract_task_keywords(task_description: str) -> List[str]:
307
+ """
308
+ Extract keywords from task description.
309
+
310
+ Args:
311
+ task_description: Task description text
312
+
313
+ Returns:
314
+ List of extracted keywords
315
+ """
316
+ if not task_description:
317
+ return []
318
+
319
+ # Extract action verbs and important nouns
320
+ words = re.findall(r"\b\w+\b", task_description.lower())
321
+
322
+ # Common action verbs in tasks
323
+ action_verbs = {
324
+ "analyze",
325
+ "create",
326
+ "generate",
327
+ "find",
328
+ "search",
329
+ "extract",
330
+ "summarize",
331
+ "compare",
332
+ "evaluate",
333
+ "assess",
334
+ "review",
335
+ "examine",
336
+ "investigate",
337
+ "determine",
338
+ "identify",
339
+ "classify",
340
+ "organize",
341
+ "optimize",
342
+ "improve",
343
+ "fix",
344
+ "solve",
345
+ "calculate",
346
+ "compute",
347
+ "predict",
348
+ "recommend",
349
+ "suggest",
350
+ }
351
+
352
+ # Technical terms (expand as needed)
353
+ tech_terms = {
354
+ "data",
355
+ "algorithm",
356
+ "model",
357
+ "function",
358
+ "variable",
359
+ "database",
360
+ "api",
361
+ "json",
362
+ "xml",
363
+ "html",
364
+ "css",
365
+ "javascript",
366
+ "python",
367
+ "code",
368
+ "program",
369
+ "software",
370
+ "system",
371
+ "architecture",
372
+ "design",
373
+ "implementation",
374
+ "testing",
375
+ "debugging",
376
+ }
377
+
378
+ keywords = []
379
+ for word in words:
380
+ if len(word) > 3 and (word in action_verbs or word in tech_terms):
381
+ keywords.append(word)
382
+
383
+ # Remove duplicates while preserving order
384
+ seen = set()
385
+ unique_keywords = []
386
+ for keyword in keywords:
387
+ if keyword not in seen:
388
+ seen.add(keyword)
389
+ unique_keywords.append(keyword)
390
+
391
+ return unique_keywords[:10] # Return top 10 keywords
392
+
393
+
394
+ def format_context_section(
395
+ name: str, content: str, max_length: Optional[int] = None
396
+ ) -> str:
397
+ """
398
+ Format a context section with proper headers and structure.
399
+
400
+ Args:
401
+ name: Section name
402
+ content: Section content
403
+ max_length: Maximum length for content (will truncate if exceeded)
404
+
405
+ Returns:
406
+ Formatted section
407
+ """
408
+ if not content:
409
+ return ""
410
+
411
+ # Truncate if necessary
412
+ if max_length and len(content) > max_length:
413
+ content = content[: max_length - 3] + "..."
414
+
415
+ # Format with header
416
+ formatted = f"### {name.upper()} ###\n{content}"
417
+
418
+ return formatted
419
+
420
+
421
+ def merge_context_sections(sections: Dict[str, str], separator: str = "\n\n") -> str:
422
+ """
423
+ Merge multiple context sections into a single string.
424
+
425
+ Args:
426
+ sections: Dictionary of section names to content
427
+ separator: Separator between sections
428
+
429
+ Returns:
430
+ Merged context string
431
+ """
432
+ if not sections:
433
+ return ""
434
+
435
+ parts = []
436
+ for name, content in sections.items():
437
+ if content and content.strip():
438
+ formatted_section = format_context_section(name, content.strip())
439
+ parts.append(formatted_section)
440
+
441
+ return separator.join(parts)
@@ -0,0 +1,270 @@
1
+ from __future__ import annotations
2
+ """
3
+ Message formatter for converting AssembledContext to various LLM message formats.
4
+ """
5
+
6
+ from typing import List, Dict, Any, Optional
7
+
8
+ from dolphin.core.common.enums import Messages, MessageRole
9
+ from ..core.context_assembler import AssembledContext
10
+
11
+
12
+ class MessageFormatter:
13
+ """Converts AssembledContext to different LLM message formats."""
14
+
15
+ def __init__(self):
16
+ """Initialize message formatter."""
17
+ # Default section to role mapping
18
+ self.default_role_mapping = {
19
+ "system": "system",
20
+ "user": "user",
21
+ "assistant": "assistant",
22
+ "task": "user", # Task descriptions are usually provided as user input.
23
+ "tools": "system", # Tool information as system context
24
+ "history": "user", # Historical conversation as user input
25
+ "memory": "system", # Memory information as system context
26
+ "rag": "system", # RAG information as system context
27
+ "fewshot": "assistant", # few-shot examples as assistant responses
28
+ "scratchpad": "assistant", # Thought process as internal state of the assistant
29
+ }
30
+
31
+ def to_openai_messages(
32
+ self,
33
+ assembled_context: AssembledContext,
34
+ role_mapping: Optional[Dict[str, str]] = None,
35
+ include_placement: bool = False,
36
+ ) -> List[Dict[str, str]]:
37
+ """Convert AssembledContext to OpenAI message format.
38
+
39
+ Args:
40
+ assembled_context: assembled context
41
+ role_mapping: custom mapping from section to role
42
+ include_placement: whether to include placement information in the message
43
+
44
+ Returns:
45
+ list of messages in OpenAI format
46
+ """
47
+ if not assembled_context.sections:
48
+ return []
49
+
50
+ # Use custom mapping or default mapping
51
+ mapping = role_mapping or self.default_role_mapping
52
+
53
+ messages = []
54
+
55
+ # Process in placement order: head -> middle -> tail
56
+ placement_order = ["head", "middle", "tail"]
57
+
58
+ for placement in placement_order:
59
+ if placement in assembled_context.placement_map:
60
+ section_names = assembled_context.placement_map[placement]
61
+
62
+ for section_name in section_names:
63
+ # Find the corresponding section
64
+ section = next(
65
+ (
66
+ s
67
+ for s in assembled_context.sections
68
+ if s.name == section_name
69
+ ),
70
+ None,
71
+ )
72
+ if not section or not section.content.strip():
73
+ continue
74
+
75
+ # Determine role
76
+ role = mapping.get(section_name, "system") # Default uses system role
77
+
78
+ # Build message content
79
+ content = section.content.strip()
80
+ if include_placement:
81
+ content = f"[{placement}] {content}"
82
+
83
+ messages.append({"role": role, "content": content})
84
+
85
+ return messages
86
+
87
+ def to_openai_messages_simple(
88
+ self,
89
+ assembled_context: AssembledContext,
90
+ user_sections: Optional[List[str]] = None,
91
+ ) -> List[Dict[str, str]]:
92
+ """Minimal OpenAI message format: a single system message + user message.
93
+
94
+ Strategy:
95
+ - Decide the role of each section based on the message_role in bucket configuration
96
+ - Merge all except the user message into a single system message
97
+ - Maintain position order and logical structure
98
+ - Maximize LLM compatibility
99
+
100
+ Args:
101
+ assembled_context: Assembled context
102
+ user_sections: Specify which sections should be treated as user role (default: ["user_query", "user", "input"])
103
+ bucket_configs: Bucket configuration for retrieving message_role (optional)
104
+
105
+ Returns:
106
+ Simplified message list [system, user] or similar structure
107
+ """
108
+ if not assembled_context.sections:
109
+ return []
110
+
111
+ # Default user section
112
+ default_user = ["user_query", "user", "input"]
113
+ user_sections = user_sections or default_user
114
+
115
+ messages = []
116
+ system_parts = []
117
+
118
+ for section in assembled_context.sections:
119
+ if section.message_role == MessageRole.SYSTEM:
120
+ system_parts.append(section.content)
121
+ elif section.message_role == MessageRole.USER:
122
+ messages.append({"role": "user", "content": section.content})
123
+ elif section.message_role == MessageRole.ASSISTANT:
124
+ messages.append({"role": "assistant", "content": section.content})
125
+ elif section.message_role == MessageRole.ASSISTANT:
126
+ messages.append({"role": "tool", "content": section.content})
127
+ else:
128
+ continue
129
+
130
+ # Build final message
131
+ if system_parts:
132
+ # Merge all system messages into one
133
+ combined_system = "\n\n".join(system_parts)
134
+ messages.insert(0, {"role": "system", "content": combined_system})
135
+ return messages
136
+
137
+ def to_dph_messages_simple(
138
+ self,
139
+ assembled_context: Optional[AssembledContext],
140
+ user_sections: Optional[List[str]] = None,
141
+ ) -> Messages:
142
+ """The most simplified DolphinLanguage message format: a single system message + user message.
143
+
144
+ Strategy:
145
+ - Decide the role of each section based on the message_role in bucket configuration
146
+
147
+ Args:
148
+ assembled_context: The assembled context
149
+ user_sections: Specify which sections should be treated as user role (default: ["user_query", "user", "input"])
150
+ bucket_configs: Bucket configurations for retrieving message_role (optional)
151
+
152
+ Returns:
153
+ A simplified message list [system, user] or similar structure
154
+ """
155
+ if not assembled_context or not assembled_context.sections:
156
+ return Messages()
157
+
158
+ # Default user section
159
+ default_user = ["user_query", "user", "input"]
160
+ user_sections = user_sections or default_user
161
+
162
+ messages = Messages()
163
+ system_parts = []
164
+
165
+ for section in assembled_context.sections:
166
+ if section.message_role == MessageRole.SYSTEM:
167
+ # Process system message content
168
+ if isinstance(section.content, Messages):
169
+ # If it is a Messages type, merge the messages directly.
170
+ messages.extend_messages(section.content)
171
+ else:
172
+ system_parts.append(section.content)
173
+ elif section.message_role == MessageRole.USER:
174
+ # Process user message content
175
+ if isinstance(section.content, Messages):
176
+ # If it is a Messages type, merge the messages directly.
177
+ messages.extend_messages(section.content)
178
+ else:
179
+ messages.add_message(role=MessageRole.USER, content=section.content)
180
+ elif section.message_role == MessageRole.ASSISTANT:
181
+ # Process assistant message content
182
+ if isinstance(section.content, Messages):
183
+ # If it is a Messages type, merge the messages directly.
184
+ messages.extend_messages(section.content)
185
+ else:
186
+ messages.add_message(
187
+ role=MessageRole.ASSISTANT, content=section.content
188
+ )
189
+ elif section.message_role == MessageRole.TOOL:
190
+ # Process tool message content
191
+ if isinstance(section.content, Messages):
192
+ # If it is a Messages type, merge the messages directly.
193
+ messages.extend_messages(section.content)
194
+ else:
195
+ messages.add_message(role=MessageRole.TOOL, content=section.content)
196
+ else:
197
+ continue
198
+
199
+ # Build final message
200
+ if system_parts:
201
+ # Merge all system messages into one
202
+ combined_system = "\n\n".join(system_parts)
203
+ messages.insert_message(role=MessageRole.SYSTEM, content=combined_system)
204
+
205
+ return messages
206
+
207
+ def to_anthropic_messages(
208
+ self,
209
+ assembled_context: AssembledContext,
210
+ role_mapping: Optional[Dict[str, str]] = None,
211
+ ) -> List[Dict[str, Any]]:
212
+ """Convert to Anthropic Claude message format.
213
+
214
+ Args:
215
+ assembled_context: Assembled context
216
+ role_mapping: Custom mapping from section to role
217
+
218
+ Returns:
219
+ List of messages in Anthropic format
220
+ """
221
+ if not assembled_context.sections:
222
+ return []
223
+
224
+ # Anthropic mainly uses user and assistant, merging system content into user.
225
+ mapping = role_mapping or {
226
+ "system": "user",
227
+ "user": "user",
228
+ "assistant": "assistant",
229
+ }
230
+
231
+ messages = []
232
+
233
+ # Process in placement order
234
+ placement_order = ["head", "middle", "tail"]
235
+
236
+ for placement in placement_order:
237
+ if placement in assembled_context.placement_map:
238
+ section_names = assembled_context.placement_map[placement]
239
+
240
+ for section_name in section_names:
241
+ section = next(
242
+ (
243
+ s
244
+ for s in assembled_context.sections
245
+ if s.name == section_name
246
+ ),
247
+ None,
248
+ )
249
+ if not section or not section.content.strip():
250
+ continue
251
+
252
+ role = mapping.get(section_name, "user")
253
+
254
+ messages.append({"role": role, "content": section.content.strip()})
255
+
256
+ return messages
257
+
258
+ def create_custom_mapping(self, section_roles: Dict[str, str]) -> Dict[str, str]:
259
+ """Create a custom section-to-role mapping.
260
+
261
+ Args:
262
+ section_roles: User-defined section-to-role mapping
263
+
264
+ Returns:
265
+ Complete mapping dictionary
266
+ """
267
+ # Allow users to customize based on the default mapping
268
+ custom_mapping = self.default_role_mapping.copy()
269
+ custom_mapping.update(section_roles)
270
+ return custom_mapping