kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,133 @@
1
+ class DolphinException(Exception):
2
+ """Exception raised when the Dolphin is interrupted."""
3
+
4
+ def __init__(
5
+ self,
6
+ code: str,
7
+ message: str = "",
8
+ *args,
9
+ **kwargs,
10
+ ):
11
+ super().__init__(message, *args, **kwargs)
12
+ self.code = code
13
+ self.message = message
14
+
15
+ def __str__(self):
16
+ return f"DolphinException: {self.code}, {self.message}"
17
+
18
+
19
+ class ModelException(DolphinException):
20
+ """Exception raised when the model is interrupted."""
21
+
22
+ def __init__(
23
+ self,
24
+ code: str,
25
+ message: str = "The model was interrupted.",
26
+ *args,
27
+ **kwargs,
28
+ ):
29
+ super().__init__(code, message, *args, **kwargs)
30
+
31
+ def __str__(self):
32
+ return f"ModelException: {self.code}, {self.message}"
33
+
34
+
35
+ class SkillException(DolphinException):
36
+ """Exception raised when the skill is interrupted."""
37
+
38
+ def __init__(
39
+ self,
40
+ code: str,
41
+ message="The skill was interrupted.",
42
+ *args,
43
+ **kwargs,
44
+ ):
45
+ super().__init__(code, message, *args, **kwargs)
46
+
47
+ def __str__(self):
48
+ # If message contains multiple lines, format it nicely
49
+ if '\n' in self.message:
50
+ return f"SkillException [{self.code}]:\n{self.message}"
51
+ return f"SkillException: {self.code}, {self.message}"
52
+
53
+
54
+ class ContextEngineerException(DolphinException):
55
+ """Exception raised when the context engineer is interrupted."""
56
+
57
+ def __init__(
58
+ self,
59
+ code: str = "",
60
+ message: str = "The context engineer was interrupted.",
61
+ *args,
62
+ **kwargs,
63
+ ):
64
+ super().__init__(code, message, *args, **kwargs)
65
+
66
+ def __str__(self):
67
+ return f"ContextEngineerException: {self.code}, {self.message}"
68
+
69
+
70
+ class AgentLifecycleException(DolphinException):
71
+ """Agent Lifecycle Exception"""
72
+
73
+ def __init__(self, code: str, message: str = "Agent lifecycle error"):
74
+ super().__init__(code, message)
75
+
76
+ def __str__(self):
77
+ return f"AgentLifecycleException: {self.code}, {self.message}"
78
+
79
+
80
+ class DolphinAgentException(DolphinException):
81
+ """Dolphin Agent Exception"""
82
+
83
+ def __init__(self, code: str, message: str = "Dolphin agent error"):
84
+ super().__init__(code, message)
85
+
86
+
87
+ class SyncError(DolphinException):
88
+ """Exception raised during message synchronization."""
89
+
90
+ def __init__(self, message: str = "Message synchronization failed."):
91
+ super().__init__("SYNC_ERROR", message)
92
+
93
+ def __str__(self):
94
+ return f"SyncError: {self.message}"
95
+
96
+
97
+ class DebuggerQuitException(Exception):
98
+ """Exception raised when the user quits the debugger."""
99
+ pass
100
+
101
+
102
+ class UserInterrupt(DolphinException):
103
+ """User-initiated interrupt to provide new input during execution.
104
+
105
+ This exception is raised when the user actively interrupts the agent's
106
+ execution (e.g., pressing ESC) to provide new instructions or corrections.
107
+
108
+ Key differences from ToolInterrupt:
109
+ - ToolInterrupt: Tool requests user input, resumes from breakpoint
110
+ - UserInterrupt: User actively interrupts, triggers re-reasoning with new context
111
+
112
+ Use cases:
113
+ - User discovers agent is going in wrong direction, wants to correct
114
+ - User wants to add additional context information
115
+ - User wants to insert new requirements at current step
116
+
117
+ Attributes:
118
+ partial_output: Optional partial LLM output captured at interrupt time
119
+ interrupted_at: Timestamp when interrupt occurred
120
+ """
121
+
122
+ def __init__(
123
+ self,
124
+ message: str = "User interrupted execution",
125
+ partial_output: str = None,
126
+ ):
127
+ super().__init__("USER_INTERRUPT", message)
128
+ self.partial_output = partial_output
129
+ from datetime import datetime
130
+ self.interrupted_at = datetime.now()
131
+
132
+ def __str__(self):
133
+ return f"UserInterrupt: {self.message}"
@@ -0,0 +1,539 @@
1
+ """
2
+ Multimodal support module for Dolphin Language.
3
+
4
+ This module provides types, utilities, and exceptions for handling
5
+ multimodal content (text + images) in the message system.
6
+
7
+ Design based on: docs/core/multimodal_support_design.md
8
+ """
9
+
10
+ import math
11
+ from dataclasses import dataclass, field
12
+ from enum import Enum
13
+ from typing import Any, Dict, List, Optional, Union
14
+
15
+
16
+ # =============================================================================
17
+ # Type Definitions
18
+ # =============================================================================
19
+
20
+ # MessageContent can be either a plain string or a list of ContentBlocks
21
+ MessageContent = Union[str, List[Dict[str, Any]]]
22
+
23
+
24
+ # =============================================================================
25
+ # ContentBlock Helpers
26
+ # =============================================================================
27
+
28
+ def text_block(text: str) -> Dict[str, Any]:
29
+ """Create a text content block.
30
+
31
+ Args:
32
+ text: The text content
33
+
34
+ Returns:
35
+ A text content block in OpenAI format
36
+ """
37
+ return {"type": "text", "text": text}
38
+
39
+
40
+ def image_url_block(url: str, detail: str = "auto") -> Dict[str, Any]:
41
+ """Create an image URL content block.
42
+
43
+ Args:
44
+ url: The image URL (https:// or data:image/... for base64)
45
+ detail: Resolution level - "auto", "low", or "high"
46
+
47
+ Returns:
48
+ An image_url content block in OpenAI format
49
+ """
50
+ return {"type": "image_url", "image_url": {"url": url, "detail": detail}}
51
+
52
+
53
+ def normalize_content(content: MessageContent) -> List[Dict[str, Any]]:
54
+ """Normalize any content format to List[Dict].
55
+
56
+ Args:
57
+ content: Either a string or list of content blocks
58
+
59
+ Returns:
60
+ Content as a list of content blocks
61
+ """
62
+ if isinstance(content, str):
63
+ return [text_block(content)]
64
+ return content
65
+
66
+
67
+ def extract_text(content: MessageContent) -> str:
68
+ """Extract plain text from multimodal content.
69
+
70
+ Used for logging, fallback to non-vision models, etc.
71
+
72
+ Args:
73
+ content: Either a string or list of content blocks
74
+
75
+ Returns:
76
+ Extracted text content (images are omitted)
77
+ """
78
+ if isinstance(content, str):
79
+ return content
80
+ return "".join(
81
+ block.get("text", "")
82
+ for block in content
83
+ if block.get("type") == "text"
84
+ )
85
+
86
+
87
+ def count_images(content: MessageContent) -> int:
88
+ """Count the number of images in content.
89
+
90
+ Args:
91
+ content: Either a string or list of content blocks
92
+
93
+ Returns:
94
+ Number of image blocks
95
+ """
96
+ if isinstance(content, str):
97
+ return 0
98
+ return sum(1 for block in content if block.get("type") == "image_url")
99
+
100
+
101
+ def has_images(content: MessageContent) -> bool:
102
+ """Check if content contains any images.
103
+
104
+ Args:
105
+ content: Either a string or list of content blocks
106
+
107
+ Returns:
108
+ True if content contains at least one image
109
+ """
110
+ return count_images(content) > 0
111
+
112
+
113
+ def get_content_preview(content: MessageContent) -> Dict[str, Any]:
114
+ """Generate a preview of content for logging.
115
+
116
+ Used to avoid logging sensitive data like base64 or full URLs.
117
+
118
+ Args:
119
+ content: Either a string or list of content blocks
120
+
121
+ Returns:
122
+ A summary dict suitable for logging
123
+ """
124
+ if isinstance(content, str):
125
+ return {"type": "text", "length": len(content)}
126
+
127
+ image_count = count_images(content)
128
+ text_length = sum(
129
+ len(block.get("text", ""))
130
+ for block in content
131
+ if block.get("type") == "text"
132
+ )
133
+ return {
134
+ "type": "multimodal",
135
+ "text_length": text_length,
136
+ "image_count": image_count
137
+ }
138
+
139
+
140
+ def calculate_content_length(content: MessageContent) -> int:
141
+ """Calculate the text length of content (excluding images).
142
+
143
+ Args:
144
+ content: Either a string or list of content blocks
145
+
146
+ Returns:
147
+ Total length of text content
148
+ """
149
+ if isinstance(content, str):
150
+ return len(content)
151
+ return sum(
152
+ len(block.get("text", ""))
153
+ for block in content
154
+ if block.get("type") == "text"
155
+ )
156
+
157
+
158
+ # =============================================================================
159
+ # Image Token Estimation
160
+ # =============================================================================
161
+
162
+ @dataclass
163
+ class ImageTokenConfig:
164
+ """Simplified image token estimation configuration.
165
+
166
+ Design Decision:
167
+ - Uses OpenAI-style tile-based algorithm as universal estimate
168
+ - Does not differentiate by provider (±20% error acceptable for compression)
169
+ - Server-side usage is the authoritative source for billing/limits
170
+ """
171
+
172
+ # Tile-based estimation parameters (OpenAI style, universally applicable)
173
+ base_tokens: int = 85 # Base overhead
174
+ tokens_per_tile: int = 170 # Tokens per 512×512 tile
175
+ tile_size: int = 512 # Tile side length
176
+
177
+ # Fallback values when dimensions unknown
178
+ fallback_tokens: Dict[str, int] = field(default_factory=lambda: {
179
+ "low": 85, # Low resolution mode
180
+ "auto": 600, # Default conservative estimate
181
+ "high": 1500, # High resolution conservative estimate
182
+ })
183
+
184
+ def estimate_tokens(
185
+ self,
186
+ width: Optional[int] = None,
187
+ height: Optional[int] = None,
188
+ detail: str = "auto"
189
+ ) -> int:
190
+ """Estimate token count for an image.
191
+
192
+ Args:
193
+ width: Image width in pixels (optional)
194
+ height: Image height in pixels (optional)
195
+ detail: Resolution level ("low", "auto", "high")
196
+
197
+ Returns:
198
+ Estimated token count
199
+ """
200
+ # Low mode returns fixed base overhead
201
+ if detail == "low":
202
+ return self.base_tokens
203
+
204
+ # Use fallback when dimensions unknown
205
+ if width is None or height is None:
206
+ return self.fallback_tokens.get(detail, self.fallback_tokens["auto"])
207
+
208
+ # Tile-based calculation: base + tiles × tokens_per_tile
209
+ tiles_x = math.ceil(width / self.tile_size)
210
+ tiles_y = math.ceil(height / self.tile_size)
211
+ return self.base_tokens + self.tokens_per_tile * tiles_x * tiles_y
212
+
213
+ @classmethod
214
+ def for_provider(cls, provider: str) -> "ImageTokenConfig":
215
+ """Create provider-specific token estimation config.
216
+
217
+ Args:
218
+ provider: LLM provider name (openai, gemini, anthropic, etc.)
219
+
220
+ Returns:
221
+ ImageTokenConfig optimized for the provider
222
+
223
+ Note:
224
+ This is an optional enhancement for improved accuracy.
225
+ The default OpenAI-style config is sufficient for most use cases.
226
+ """
227
+ provider = provider.lower()
228
+
229
+ if provider == "openai":
230
+ return cls(base_tokens=85, tokens_per_tile=170, tile_size=512)
231
+ elif provider == "gemini":
232
+ # Gemini uses 258 tokens per 768x768 tile, no base overhead
233
+ return cls(base_tokens=0, tokens_per_tile=258, tile_size=768)
234
+ elif provider == "anthropic":
235
+ # Anthropic uses pixel-based calculation: (width × height) / 750
236
+ # We approximate with very fine tiles
237
+ return cls(base_tokens=0, tokens_per_tile=1, tile_size=27) # ~750 pixels
238
+ else:
239
+ # Default to OpenAI style
240
+ return cls()
241
+
242
+
243
+ # Global default config instance
244
+ _default_image_token_config = ImageTokenConfig()
245
+
246
+
247
+ def estimate_image_tokens(
248
+ width: Optional[int] = None,
249
+ height: Optional[int] = None,
250
+ detail: str = "auto",
251
+ config: Optional[ImageTokenConfig] = None
252
+ ) -> int:
253
+ """Convenience function to estimate image tokens.
254
+
255
+ Args:
256
+ width: Image width in pixels (optional)
257
+ height: Image height in pixels (optional)
258
+ detail: Resolution level ("low", "auto", "high")
259
+ config: Optional custom config (uses default if not provided)
260
+
261
+ Returns:
262
+ Estimated token count
263
+ """
264
+ cfg = config or _default_image_token_config
265
+ return cfg.estimate_tokens(width, height, detail)
266
+
267
+
268
+ # =============================================================================
269
+ # Image Size Constraints
270
+ # =============================================================================
271
+
272
+ @dataclass
273
+ class ImageConstraints:
274
+ """Constraints for image size and count to prevent memory issues.
275
+
276
+ Design Rationale:
277
+ - Multiple 2MB base64 images can cause OOM
278
+ - Base64 strings inflate by ~33% during serialization
279
+ - Need both per-image and aggregate limits
280
+ """
281
+
282
+ max_base64_bytes_per_image: int = 2 * 1024 * 1024 # 2MB per image
283
+ max_base64_bytes_per_message: int = 5 * 1024 * 1024 # 5MB per message
284
+ max_images_per_message: int = 5 # 5 images per message
285
+ max_images_in_context: int = 20 # 20 images across all messages
286
+
287
+ def validate_base64_size(self, base64_data: str, image_index: int = 0) -> None:
288
+ """Validate a single base64 image size.
289
+
290
+ Args:
291
+ base64_data: The base64 encoded image data (with or without data: prefix)
292
+ image_index: Index of the image (for error messages)
293
+
294
+ Raises:
295
+ ImagePayloadTooLargeError: If image exceeds size limit
296
+ """
297
+ # Strip data URL prefix if present
298
+ if base64_data.startswith("data:"):
299
+ base64_data = base64_data.split(",", 1)[-1]
300
+
301
+ size_bytes = len(base64_data.encode('utf-8'))
302
+ if size_bytes > self.max_base64_bytes_per_image:
303
+ raise ImagePayloadTooLargeError(
304
+ f"Base64 image #{image_index} size ({size_bytes / 1024 / 1024:.2f}MB) "
305
+ f"exceeds limit ({self.max_base64_bytes_per_image / 1024 / 1024:.2f}MB)"
306
+ )
307
+
308
+
309
+ # =============================================================================
310
+ # Multimodal Compression Configuration
311
+ # =============================================================================
312
+
313
+ class MultimodalCompressionMode(Enum):
314
+ """Compression mode for multimodal messages.
315
+
316
+ Modes:
317
+ - TEXT_ONLY: Drop images when over limit, keep text (default, safest for information preservation)
318
+ - ATOMIC: Keep or drop entire message (good for image-text binding scenarios)
319
+ - LATEST_IMAGE: Keep only the latest N images (balance between modes)
320
+ """
321
+ TEXT_ONLY = "text_only" # Drop images when over limit, keep text (default)
322
+ ATOMIC = "atomic" # Keep or drop entire message
323
+ LATEST_IMAGE = "latest_image" # Keep only the latest N images
324
+
325
+
326
+ @dataclass
327
+ class MultimodalCompressionConfig:
328
+ """Configuration for multimodal message compression."""
329
+ mode: MultimodalCompressionMode = MultimodalCompressionMode.TEXT_ONLY
330
+ max_images_to_keep: int = 3 # For LATEST_IMAGE mode
331
+ allow_truncate_text_blocks: bool = True # Whether to allow truncating text blocks
332
+
333
+
334
+ # =============================================================================
335
+ # Exceptions
336
+ # =============================================================================
337
+
338
+ class MultimodalError(Exception):
339
+ """Base class for multimodal-related errors."""
340
+ pass
341
+
342
+
343
+ class MultimodalNotSupportedError(MultimodalError):
344
+ """Raised when model does not support multimodal input."""
345
+ pass
346
+
347
+
348
+ class TooManyImagesError(MultimodalError):
349
+ """Raised when image count exceeds model limit."""
350
+ pass
351
+
352
+
353
+ class UnsupportedImageFormatError(MultimodalError):
354
+ """Raised when image format is not supported."""
355
+ pass
356
+
357
+
358
+ class UnsupportedContentBlockTypeError(MultimodalError):
359
+ """Raised when content block type is not supported."""
360
+ pass
361
+
362
+
363
+ class EmptyMultimodalContentError(MultimodalError):
364
+ """Raised when multimodal content list is empty."""
365
+ pass
366
+
367
+
368
+ class InvalidTextBlockError(MultimodalError):
369
+ """Raised when text block is invalid."""
370
+ pass
371
+
372
+
373
+ class InvalidImageUrlError(MultimodalError):
374
+ """Raised when image URL is invalid."""
375
+ pass
376
+
377
+
378
+ class InvalidImageDetailError(MultimodalError):
379
+ """Raised when image detail level is invalid."""
380
+ pass
381
+
382
+
383
+ class ImagePayloadTooLargeError(MultimodalError):
384
+ """Raised when base64 image payload exceeds limit."""
385
+ pass
386
+
387
+
388
+ class ClipboardEmptyError(MultimodalError):
389
+ """Raised when clipboard does not contain an image."""
390
+ pass
391
+
392
+
393
+ # =============================================================================
394
+ # Validation
395
+ # =============================================================================
396
+
397
+ def validate_content_block(block: Dict[str, Any]) -> None:
398
+ """Validate a single content block.
399
+
400
+ Args:
401
+ block: The content block to validate
402
+
403
+ Raises:
404
+ UnsupportedContentBlockTypeError: If block type is unknown
405
+ InvalidTextBlockError: If text block is malformed
406
+ InvalidImageUrlError: If image URL is invalid
407
+ InvalidImageDetailError: If image detail level is invalid
408
+ """
409
+ block_type = block.get("type")
410
+
411
+ if block_type == "text":
412
+ if not isinstance(block.get("text"), str):
413
+ raise InvalidTextBlockError("Text block requires 'text: str'.")
414
+ return
415
+
416
+ if block_type == "image_url":
417
+ image_url = block.get("image_url") or {}
418
+ url = image_url.get("url")
419
+ detail = image_url.get("detail", "auto")
420
+
421
+ if detail not in ("auto", "low", "high"):
422
+ raise InvalidImageDetailError(f"Invalid image detail: {detail}")
423
+ if not isinstance(url, str) or not url:
424
+ raise InvalidImageUrlError("image_url block requires non-empty url.")
425
+ return
426
+
427
+ raise UnsupportedContentBlockTypeError(f"Unsupported content block type: {block_type}")
428
+
429
+
430
+ def validate_multimodal_content(content: MessageContent) -> None:
431
+ """Validate multimodal content.
432
+
433
+ Args:
434
+ content: The content to validate
435
+
436
+ Raises:
437
+ EmptyMultimodalContentError: If content list is empty
438
+ Other validation errors from validate_content_block
439
+ """
440
+ if isinstance(content, str):
441
+ return # Plain text is always valid
442
+
443
+ if len(content) == 0:
444
+ raise EmptyMultimodalContentError("Multimodal content list must not be empty.")
445
+
446
+ for block in content:
447
+ validate_content_block(block)
448
+
449
+
450
+ class MultimodalValidator:
451
+ """Validator for multimodal messages against model capabilities."""
452
+
453
+ @staticmethod
454
+ def validate(
455
+ messages, # Messages type, but avoiding circular import
456
+ supports_vision: bool = True,
457
+ max_images_per_request: int = 10,
458
+ model_name: str = "unknown",
459
+ image_constraints: Optional[ImageConstraints] = None
460
+ ) -> None:
461
+ """Validate messages against model capabilities.
462
+
463
+ Args:
464
+ messages: Messages object to validate
465
+ supports_vision: Whether model supports vision input
466
+ max_images_per_request: Maximum images allowed per request
467
+ model_name: Name of the model (for error messages)
468
+ image_constraints: Optional size/count constraints for images
469
+
470
+ Raises:
471
+ MultimodalNotSupportedError: If model doesn't support vision
472
+ TooManyImagesError: If image count exceeds limit
473
+ ImagePayloadTooLargeError: If base64 images exceed size limits
474
+ Other validation errors
475
+ """
476
+ total_images = 0
477
+ has_any_images = False
478
+ constraints = image_constraints or ImageConstraints()
479
+
480
+ for msg in messages:
481
+ content = msg.content
482
+ if isinstance(content, list):
483
+ # Validate content blocks
484
+ validate_multimodal_content(content)
485
+
486
+ # Count and validate images in this message
487
+ img_count = 0
488
+ total_base64_bytes = 0
489
+
490
+ for idx, block in enumerate(content):
491
+ if block.get("type") == "image_url":
492
+ img_count += 1
493
+ has_any_images = True
494
+ total_images += 1
495
+
496
+ # Validate base64 size if applicable
497
+ url = block.get("image_url", {}).get("url", "")
498
+ if url.startswith("data:"):
499
+ # Extract base64 data
500
+ base64_data = url.split(",", 1)[-1] if "," in url else url
501
+ base64_bytes = len(base64_data.encode('utf-8'))
502
+ total_base64_bytes += base64_bytes
503
+
504
+ # Check per-image limit
505
+ constraints.validate_base64_size(url, idx)
506
+
507
+ # Check per-message limits
508
+ if img_count > constraints.max_images_per_message:
509
+ raise TooManyImagesError(
510
+ f"Message contains {img_count} images, exceeding limit of "
511
+ f"{constraints.max_images_per_message} images per message"
512
+ )
513
+
514
+ if total_base64_bytes > constraints.max_base64_bytes_per_message:
515
+ raise ImagePayloadTooLargeError(
516
+ f"Message base64 images total {total_base64_bytes / 1024 / 1024:.2f}MB, "
517
+ f"exceeding limit of {constraints.max_base64_bytes_per_message / 1024 / 1024:.2f}MB"
518
+ )
519
+
520
+ # Check vision support
521
+ if has_any_images and not supports_vision:
522
+ raise MultimodalNotSupportedError(
523
+ f"Model '{model_name}' does not support vision input. "
524
+ f"Please use a vision-capable model like gpt-4o or claude-3-5-sonnet."
525
+ )
526
+
527
+ # Check context-wide image count limit
528
+ if total_images > constraints.max_images_in_context:
529
+ raise TooManyImagesError(
530
+ f"Context contains {total_images} images, exceeding limit of "
531
+ f"{constraints.max_images_in_context} images across all messages"
532
+ )
533
+
534
+ # Check model-specific limit (backward compatibility)
535
+ if total_images > max_images_per_request:
536
+ raise TooManyImagesError(
537
+ f"Request contains {total_images} images, but model limit is "
538
+ f"{max_images_per_request}"
539
+ )