kweaver-dolphin 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. DolphinLanguageSDK/__init__.py +58 -0
  2. dolphin/__init__.py +62 -0
  3. dolphin/cli/__init__.py +20 -0
  4. dolphin/cli/args/__init__.py +9 -0
  5. dolphin/cli/args/parser.py +567 -0
  6. dolphin/cli/builtin_agents/__init__.py +22 -0
  7. dolphin/cli/commands/__init__.py +4 -0
  8. dolphin/cli/interrupt/__init__.py +8 -0
  9. dolphin/cli/interrupt/handler.py +205 -0
  10. dolphin/cli/interrupt/keyboard.py +82 -0
  11. dolphin/cli/main.py +49 -0
  12. dolphin/cli/multimodal/__init__.py +34 -0
  13. dolphin/cli/multimodal/clipboard.py +327 -0
  14. dolphin/cli/multimodal/handler.py +249 -0
  15. dolphin/cli/multimodal/image_processor.py +214 -0
  16. dolphin/cli/multimodal/input_parser.py +149 -0
  17. dolphin/cli/runner/__init__.py +8 -0
  18. dolphin/cli/runner/runner.py +989 -0
  19. dolphin/cli/ui/__init__.py +10 -0
  20. dolphin/cli/ui/console.py +2795 -0
  21. dolphin/cli/ui/input.py +340 -0
  22. dolphin/cli/ui/layout.py +425 -0
  23. dolphin/cli/ui/stream_renderer.py +302 -0
  24. dolphin/cli/utils/__init__.py +8 -0
  25. dolphin/cli/utils/helpers.py +135 -0
  26. dolphin/cli/utils/version.py +49 -0
  27. dolphin/core/__init__.py +107 -0
  28. dolphin/core/agent/__init__.py +10 -0
  29. dolphin/core/agent/agent_state.py +69 -0
  30. dolphin/core/agent/base_agent.py +970 -0
  31. dolphin/core/code_block/__init__.py +0 -0
  32. dolphin/core/code_block/agent_init_block.py +0 -0
  33. dolphin/core/code_block/assign_block.py +98 -0
  34. dolphin/core/code_block/basic_code_block.py +1865 -0
  35. dolphin/core/code_block/explore_block.py +1327 -0
  36. dolphin/core/code_block/explore_block_v2.py +712 -0
  37. dolphin/core/code_block/explore_strategy.py +672 -0
  38. dolphin/core/code_block/judge_block.py +220 -0
  39. dolphin/core/code_block/prompt_block.py +32 -0
  40. dolphin/core/code_block/skill_call_deduplicator.py +291 -0
  41. dolphin/core/code_block/tool_block.py +129 -0
  42. dolphin/core/common/__init__.py +17 -0
  43. dolphin/core/common/constants.py +176 -0
  44. dolphin/core/common/enums.py +1173 -0
  45. dolphin/core/common/exceptions.py +133 -0
  46. dolphin/core/common/multimodal.py +539 -0
  47. dolphin/core/common/object_type.py +165 -0
  48. dolphin/core/common/output_format.py +432 -0
  49. dolphin/core/common/types.py +36 -0
  50. dolphin/core/config/__init__.py +16 -0
  51. dolphin/core/config/global_config.py +1289 -0
  52. dolphin/core/config/ontology_config.py +133 -0
  53. dolphin/core/context/__init__.py +12 -0
  54. dolphin/core/context/context.py +1580 -0
  55. dolphin/core/context/context_manager.py +161 -0
  56. dolphin/core/context/var_output.py +82 -0
  57. dolphin/core/context/variable_pool.py +356 -0
  58. dolphin/core/context_engineer/__init__.py +41 -0
  59. dolphin/core/context_engineer/config/__init__.py +5 -0
  60. dolphin/core/context_engineer/config/settings.py +402 -0
  61. dolphin/core/context_engineer/core/__init__.py +7 -0
  62. dolphin/core/context_engineer/core/budget_manager.py +327 -0
  63. dolphin/core/context_engineer/core/context_assembler.py +583 -0
  64. dolphin/core/context_engineer/core/context_manager.py +637 -0
  65. dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
  66. dolphin/core/context_engineer/example/incremental_example.py +267 -0
  67. dolphin/core/context_engineer/example/traditional_example.py +334 -0
  68. dolphin/core/context_engineer/services/__init__.py +5 -0
  69. dolphin/core/context_engineer/services/compressor.py +399 -0
  70. dolphin/core/context_engineer/utils/__init__.py +6 -0
  71. dolphin/core/context_engineer/utils/context_utils.py +441 -0
  72. dolphin/core/context_engineer/utils/message_formatter.py +270 -0
  73. dolphin/core/context_engineer/utils/token_utils.py +139 -0
  74. dolphin/core/coroutine/__init__.py +15 -0
  75. dolphin/core/coroutine/context_snapshot.py +154 -0
  76. dolphin/core/coroutine/context_snapshot_profile.py +922 -0
  77. dolphin/core/coroutine/context_snapshot_store.py +268 -0
  78. dolphin/core/coroutine/execution_frame.py +145 -0
  79. dolphin/core/coroutine/execution_state_registry.py +161 -0
  80. dolphin/core/coroutine/resume_handle.py +101 -0
  81. dolphin/core/coroutine/step_result.py +101 -0
  82. dolphin/core/executor/__init__.py +18 -0
  83. dolphin/core/executor/debug_controller.py +630 -0
  84. dolphin/core/executor/dolphin_executor.py +1063 -0
  85. dolphin/core/executor/executor.py +624 -0
  86. dolphin/core/flags/__init__.py +27 -0
  87. dolphin/core/flags/definitions.py +49 -0
  88. dolphin/core/flags/manager.py +113 -0
  89. dolphin/core/hook/__init__.py +95 -0
  90. dolphin/core/hook/expression_evaluator.py +499 -0
  91. dolphin/core/hook/hook_dispatcher.py +380 -0
  92. dolphin/core/hook/hook_types.py +248 -0
  93. dolphin/core/hook/isolated_variable_pool.py +284 -0
  94. dolphin/core/interfaces.py +53 -0
  95. dolphin/core/llm/__init__.py +0 -0
  96. dolphin/core/llm/llm.py +495 -0
  97. dolphin/core/llm/llm_call.py +100 -0
  98. dolphin/core/llm/llm_client.py +1285 -0
  99. dolphin/core/llm/message_sanitizer.py +120 -0
  100. dolphin/core/logging/__init__.py +20 -0
  101. dolphin/core/logging/logger.py +526 -0
  102. dolphin/core/message/__init__.py +8 -0
  103. dolphin/core/message/compressor.py +749 -0
  104. dolphin/core/parser/__init__.py +8 -0
  105. dolphin/core/parser/parser.py +405 -0
  106. dolphin/core/runtime/__init__.py +10 -0
  107. dolphin/core/runtime/runtime_graph.py +926 -0
  108. dolphin/core/runtime/runtime_instance.py +446 -0
  109. dolphin/core/skill/__init__.py +14 -0
  110. dolphin/core/skill/context_retention.py +157 -0
  111. dolphin/core/skill/skill_function.py +686 -0
  112. dolphin/core/skill/skill_matcher.py +282 -0
  113. dolphin/core/skill/skillkit.py +700 -0
  114. dolphin/core/skill/skillset.py +72 -0
  115. dolphin/core/trajectory/__init__.py +10 -0
  116. dolphin/core/trajectory/recorder.py +189 -0
  117. dolphin/core/trajectory/trajectory.py +522 -0
  118. dolphin/core/utils/__init__.py +9 -0
  119. dolphin/core/utils/cache_kv.py +212 -0
  120. dolphin/core/utils/tools.py +340 -0
  121. dolphin/lib/__init__.py +93 -0
  122. dolphin/lib/debug/__init__.py +8 -0
  123. dolphin/lib/debug/visualizer.py +409 -0
  124. dolphin/lib/memory/__init__.py +28 -0
  125. dolphin/lib/memory/async_processor.py +220 -0
  126. dolphin/lib/memory/llm_calls.py +195 -0
  127. dolphin/lib/memory/manager.py +78 -0
  128. dolphin/lib/memory/sandbox.py +46 -0
  129. dolphin/lib/memory/storage.py +245 -0
  130. dolphin/lib/memory/utils.py +51 -0
  131. dolphin/lib/ontology/__init__.py +12 -0
  132. dolphin/lib/ontology/basic/__init__.py +0 -0
  133. dolphin/lib/ontology/basic/base.py +102 -0
  134. dolphin/lib/ontology/basic/concept.py +130 -0
  135. dolphin/lib/ontology/basic/object.py +11 -0
  136. dolphin/lib/ontology/basic/relation.py +63 -0
  137. dolphin/lib/ontology/datasource/__init__.py +27 -0
  138. dolphin/lib/ontology/datasource/datasource.py +66 -0
  139. dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
  140. dolphin/lib/ontology/datasource/sql.py +845 -0
  141. dolphin/lib/ontology/mapping.py +177 -0
  142. dolphin/lib/ontology/ontology.py +733 -0
  143. dolphin/lib/ontology/ontology_context.py +16 -0
  144. dolphin/lib/ontology/ontology_manager.py +107 -0
  145. dolphin/lib/skill_results/__init__.py +31 -0
  146. dolphin/lib/skill_results/cache_backend.py +559 -0
  147. dolphin/lib/skill_results/result_processor.py +181 -0
  148. dolphin/lib/skill_results/result_reference.py +179 -0
  149. dolphin/lib/skill_results/skillkit_hook.py +324 -0
  150. dolphin/lib/skill_results/strategies.py +328 -0
  151. dolphin/lib/skill_results/strategy_registry.py +150 -0
  152. dolphin/lib/skillkits/__init__.py +44 -0
  153. dolphin/lib/skillkits/agent_skillkit.py +155 -0
  154. dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
  155. dolphin/lib/skillkits/env_skillkit.py +250 -0
  156. dolphin/lib/skillkits/mcp_adapter.py +616 -0
  157. dolphin/lib/skillkits/mcp_skillkit.py +771 -0
  158. dolphin/lib/skillkits/memory_skillkit.py +650 -0
  159. dolphin/lib/skillkits/noop_skillkit.py +31 -0
  160. dolphin/lib/skillkits/ontology_skillkit.py +89 -0
  161. dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
  162. dolphin/lib/skillkits/resource/__init__.py +52 -0
  163. dolphin/lib/skillkits/resource/models/__init__.py +6 -0
  164. dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
  165. dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
  166. dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
  167. dolphin/lib/skillkits/resource/skill_cache.py +215 -0
  168. dolphin/lib/skillkits/resource/skill_loader.py +395 -0
  169. dolphin/lib/skillkits/resource/skill_validator.py +406 -0
  170. dolphin/lib/skillkits/resource_skillkit.py +11 -0
  171. dolphin/lib/skillkits/search_skillkit.py +163 -0
  172. dolphin/lib/skillkits/sql_skillkit.py +274 -0
  173. dolphin/lib/skillkits/system_skillkit.py +509 -0
  174. dolphin/lib/skillkits/vm_skillkit.py +65 -0
  175. dolphin/lib/utils/__init__.py +9 -0
  176. dolphin/lib/utils/data_process.py +207 -0
  177. dolphin/lib/utils/handle_progress.py +178 -0
  178. dolphin/lib/utils/security.py +139 -0
  179. dolphin/lib/utils/text_retrieval.py +462 -0
  180. dolphin/lib/vm/__init__.py +11 -0
  181. dolphin/lib/vm/env_executor.py +895 -0
  182. dolphin/lib/vm/python_session_manager.py +453 -0
  183. dolphin/lib/vm/vm.py +610 -0
  184. dolphin/sdk/__init__.py +60 -0
  185. dolphin/sdk/agent/__init__.py +12 -0
  186. dolphin/sdk/agent/agent_factory.py +236 -0
  187. dolphin/sdk/agent/dolphin_agent.py +1106 -0
  188. dolphin/sdk/api/__init__.py +4 -0
  189. dolphin/sdk/runtime/__init__.py +8 -0
  190. dolphin/sdk/runtime/env.py +363 -0
  191. dolphin/sdk/skill/__init__.py +10 -0
  192. dolphin/sdk/skill/global_skills.py +706 -0
  193. dolphin/sdk/skill/traditional_toolkit.py +260 -0
  194. kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
  195. kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
  196. kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
  197. kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
  198. kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
  199. kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,249 @@
1
+ """
2
+ Multimodal input handler for CLI.
3
+
4
+ Integrates parsing, image reading, and processing to convert user input
5
+ with multimodal markers into proper multimodal content.
6
+ """
7
+
8
+ import os
9
+ import base64
10
+ from typing import Union, List, Dict, Any, Optional
11
+
12
+ from dolphin.cli.multimodal.input_parser import (
13
+ MultimodalInputParser,
14
+ ParsedMultimodalInput,
15
+ ImageSourceType,
16
+ )
17
+ from dolphin.cli.multimodal.clipboard import ClipboardImageReader
18
+ from dolphin.cli.multimodal.image_processor import ImageProcessor, ImageProcessConfig
19
+ from dolphin.core.common.multimodal import (
20
+ text_block,
21
+ image_url_block,
22
+ ClipboardEmptyError,
23
+ )
24
+
25
+
26
+ # Type alias for message content
27
+ MessageContent = Union[str, List[Dict[str, Any]]]
28
+
29
+
30
+ class MultimodalInputHandler:
31
+ """Handler for processing multimodal CLI input.
32
+
33
+ Converts user input with @paste, @image:, @url: markers into
34
+ proper multimodal content compatible with LLM APIs.
35
+
36
+ Usage:
37
+ handler = MultimodalInputHandler()
38
+ content = handler.process("@paste 请描述这张图片")
39
+ # Returns List[Dict] with image and text blocks
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ image_config: Optional[ImageProcessConfig] = None,
45
+ verbose: bool = False
46
+ ):
47
+ """Initialize the handler.
48
+
49
+ Args:
50
+ image_config: Configuration for image processing
51
+ verbose: If True, print status messages
52
+ """
53
+ self.parser = MultimodalInputParser()
54
+ self.clipboard = ClipboardImageReader()
55
+ self.processor = ImageProcessor(image_config)
56
+ self.verbose = verbose
57
+
58
+ def process(self, raw_input: str) -> MessageContent:
59
+ """Process user input and convert to message content.
60
+
61
+ Args:
62
+ raw_input: Raw user input string
63
+
64
+ Returns:
65
+ str if no multimodal markers, List[Dict] if multimodal
66
+
67
+ Raises:
68
+ ClipboardEmptyError: If @paste used but clipboard empty
69
+ FileNotFoundError: If @image: file doesn't exist
70
+ Other exceptions from image processing
71
+ """
72
+ # Quick check - if no markers, return as plain text
73
+ if not self.parser.has_multimodal_markers(raw_input):
74
+ return raw_input
75
+
76
+ # Parse the input
77
+ parsed = self.parser.parse(raw_input)
78
+
79
+ if not parsed.has_images():
80
+ return raw_input
81
+
82
+ # Build multimodal content
83
+ content: List[Dict[str, Any]] = []
84
+
85
+ for i, text_part in enumerate(parsed.text_parts):
86
+ # Add text block if not empty
87
+ if text_part.strip():
88
+ content.append(text_block(text_part.strip()))
89
+
90
+ # Add corresponding image if exists
91
+ if i < len(parsed.image_refs):
92
+ ref = parsed.image_refs[i]
93
+ image_url = self._resolve_image_ref(ref)
94
+ content.append(image_url_block(image_url, detail="auto"))
95
+
96
+ # Ensure we have at least one block
97
+ if not content:
98
+ return raw_input
99
+
100
+ return content
101
+
102
+ def _resolve_image_ref(self, ref) -> str:
103
+ """Resolve an image reference to a usable URL.
104
+
105
+ Args:
106
+ ref: ImageReference object
107
+
108
+ Returns:
109
+ Image URL (data: URL for local/clipboard, https: for web)
110
+ """
111
+ if ref.source_type == ImageSourceType.CLIPBOARD:
112
+ return self._read_clipboard()
113
+
114
+ elif ref.source_type == ImageSourceType.FILE:
115
+ return self._read_file(ref.source)
116
+
117
+ elif ref.source_type == ImageSourceType.URL:
118
+ # URL is passed through directly
119
+ return ref.source
120
+
121
+ raise ValueError(f"Unknown source type: {ref.source_type}")
122
+
123
+ def _read_clipboard(self) -> str:
124
+ """Read and process clipboard image.
125
+
126
+ Returns:
127
+ Base64 data URL
128
+ """
129
+ data = self.clipboard.read()
130
+ if data is None:
131
+ raise ClipboardEmptyError(
132
+ "No image found in clipboard. "
133
+ "Please copy an image first (Cmd/Ctrl+C on an image)."
134
+ )
135
+
136
+ if self.verbose:
137
+ info = self.processor.get_image_info(data)
138
+ print(f"📎 Read clipboard image: {info.get('width', '?')}x{info.get('height', '?')}, "
139
+ f"{info.get('size_bytes', 0) // 1024}KB")
140
+
141
+ # Process the image (resize, compress if needed)
142
+ processed = self.processor.process(data)
143
+
144
+ if self.verbose and len(processed) != len(data):
145
+ print(f" Compressed to {len(processed) // 1024}KB")
146
+
147
+ return self.clipboard.to_base64_url(processed)
148
+
149
+ def _read_file(self, path: str) -> str:
150
+ """Read and process image file.
151
+
152
+ Args:
153
+ path: File path (can be relative or use ~)
154
+
155
+ Returns:
156
+ Base64 data URL
157
+ """
158
+ # Expand user home and resolve path
159
+ expanded_path = os.path.expanduser(path)
160
+ if not os.path.isabs(expanded_path):
161
+ expanded_path = os.path.abspath(expanded_path)
162
+
163
+ if not os.path.exists(expanded_path):
164
+ raise FileNotFoundError(f"Image file not found: {path}")
165
+
166
+ if self.verbose:
167
+ print(f"📁 Reading image file: {path}")
168
+
169
+ with open(expanded_path, "rb") as f:
170
+ data = f.read()
171
+
172
+ # Detect MIME type
173
+ mime_type = self._detect_mime_type(data)
174
+
175
+ # Process the image
176
+ processed = self.processor.process(data)
177
+
178
+ if self.verbose:
179
+ info = self.processor.get_image_info(processed)
180
+ print(f" Image: {info.get('width', '?')}x{info.get('height', '?')}, "
181
+ f"{len(processed) // 1024}KB")
182
+
183
+ return self._to_base64_url(processed, mime_type)
184
+
185
+ def _detect_mime_type(self, data: bytes) -> str:
186
+ """Detect MIME type from image data.
187
+
188
+ Args:
189
+ data: Image bytes
190
+
191
+ Returns:
192
+ MIME type string
193
+ """
194
+ # Check magic bytes
195
+ if data[:8] == b'\x89PNG\r\n\x1a\n':
196
+ return "image/png"
197
+ elif data[:2] == b'\xff\xd8':
198
+ return "image/jpeg"
199
+ elif data[:6] in (b'GIF87a', b'GIF89a'):
200
+ return "image/gif"
201
+ elif data[:4] == b'RIFF' and data[8:12] == b'WEBP':
202
+ return "image/webp"
203
+ else:
204
+ # Default to PNG (processor will convert)
205
+ return "image/png"
206
+
207
+ def _to_base64_url(self, data: bytes, mime_type: str = "image/png") -> str:
208
+ """Convert image data to base64 data URL.
209
+
210
+ Args:
211
+ data: Image bytes
212
+ mime_type: MIME type
213
+
214
+ Returns:
215
+ Data URL string
216
+ """
217
+ b64 = base64.b64encode(data).decode('utf-8')
218
+ return f"data:{mime_type};base64,{b64}"
219
+
220
+ def check_clipboard_status(self) -> dict:
221
+ """Check if clipboard contains an image.
222
+
223
+ Returns:
224
+ Status dict with has_image and optional info
225
+ """
226
+ data = self.clipboard.read()
227
+ if data is None:
228
+ return {"has_image": False}
229
+
230
+ info = self.processor.get_image_info(data)
231
+ return {
232
+ "has_image": True,
233
+ "info": info
234
+ }
235
+
236
+
237
+ # Convenience function for quick processing
238
+ def process_multimodal_input(raw_input: str, verbose: bool = False) -> MessageContent:
239
+ """Process user input and convert multimodal markers to content blocks.
240
+
241
+ Args:
242
+ raw_input: Raw user input string
243
+ verbose: If True, print status messages
244
+
245
+ Returns:
246
+ str if no multimodal markers, List[Dict] if multimodal
247
+ """
248
+ handler = MultimodalInputHandler(verbose=verbose)
249
+ return handler.process(raw_input)
@@ -0,0 +1,214 @@
1
+ """
2
+ Image processor for multimodal CLI input.
3
+
4
+ Handles image validation, format conversion, and compression.
5
+ """
6
+
7
+ import io
8
+ from dataclasses import dataclass, field
9
+ from typing import Tuple, Optional
10
+
11
+ from dolphin.core.common.multimodal import (
12
+ UnsupportedImageFormatError,
13
+ ImagePayloadTooLargeError,
14
+ )
15
+
16
+
17
+ @dataclass
18
+ class ImageProcessConfig:
19
+ """Configuration for image processing."""
20
+ max_size_bytes: int = 4 * 1024 * 1024 # 4MB
21
+ max_dimension: int = 2048 # Maximum edge length
22
+ quality: int = 85 # JPEG compression quality
23
+ # Added MPO (Multi-Picture Object) - used by some cameras/phones, JPEG-based
24
+ allowed_formats: Tuple[str, ...] = ("PNG", "JPEG", "GIF", "WEBP", "MPO")
25
+ auto_compress: bool = True # Auto-compress oversized images
26
+
27
+
28
+ class ImageProcessor:
29
+ """Image preprocessor for CLI multimodal input.
30
+
31
+ Handles:
32
+ - Format validation
33
+ - Size checking and compression
34
+ - Format conversion
35
+
36
+ Usage:
37
+ processor = ImageProcessor()
38
+ processed_data = processor.process(raw_image_data)
39
+ """
40
+
41
+ def __init__(self, config: Optional[ImageProcessConfig] = None):
42
+ """Initialize the processor with configuration.
43
+
44
+ Args:
45
+ config: Processing configuration (uses defaults if not provided)
46
+ """
47
+ self.config = config or ImageProcessConfig()
48
+
49
+ def process(self, image_data: bytes) -> bytes:
50
+ """Process image data: validate, resize if needed, optimize.
51
+
52
+ Args:
53
+ image_data: Raw image bytes
54
+
55
+ Returns:
56
+ Processed image bytes
57
+
58
+ Raises:
59
+ UnsupportedImageFormatError: If format not allowed
60
+ ImagePayloadTooLargeError: If size exceeds limit (after compression attempt)
61
+ """
62
+ try:
63
+ from PIL import Image
64
+ except ImportError:
65
+ # Pillow not installed, return data as-is but check size
66
+ if len(image_data) > self.config.max_size_bytes:
67
+ raise ImagePayloadTooLargeError(
68
+ f"Image size {len(image_data)} exceeds limit {self.config.max_size_bytes}. "
69
+ f"Install Pillow for automatic compression."
70
+ )
71
+ return image_data
72
+
73
+ # Open and validate image
74
+ img = Image.open(io.BytesIO(image_data))
75
+
76
+ # Check format - treat MPO as JPEG (MPO is JPEG-based multi-picture format)
77
+ img_format = img.format.upper() if img.format else None
78
+ if img_format and img_format not in self.config.allowed_formats:
79
+ raise UnsupportedImageFormatError(
80
+ f"Image format '{img.format}' not supported. "
81
+ f"Allowed formats: {', '.join(self.config.allowed_formats)}"
82
+ )
83
+
84
+ # Check and resize if needed
85
+ if max(img.size) > self.config.max_dimension:
86
+ if self.config.auto_compress:
87
+ img = self._resize(img)
88
+ else:
89
+ raise ImagePayloadTooLargeError(
90
+ f"Image dimensions {img.size} exceed limit {self.config.max_dimension}. "
91
+ f"Enable auto_compress to automatically resize."
92
+ )
93
+
94
+ # Convert to output format
95
+ output = io.BytesIO()
96
+
97
+ # Choose format based on image mode
98
+ if img.mode in ('RGBA', 'LA', 'P'):
99
+ # Preserve transparency with PNG
100
+ if img.mode == 'P' and 'transparency' in img.info:
101
+ img = img.convert('RGBA')
102
+ output_format = "PNG"
103
+ img.save(output, format=output_format, optimize=True)
104
+ else:
105
+ # Use JPEG for photos (smaller size)
106
+ if img.mode != 'RGB':
107
+ img = img.convert('RGB')
108
+ output_format = "JPEG"
109
+ img.save(output, format=output_format, quality=self.config.quality, optimize=True)
110
+
111
+ result = output.getvalue()
112
+
113
+ # Final size check
114
+ if len(result) > self.config.max_size_bytes:
115
+ if self.config.auto_compress:
116
+ # Try more aggressive compression
117
+ result = self._aggressive_compress(img, output_format)
118
+
119
+ if len(result) > self.config.max_size_bytes:
120
+ raise ImagePayloadTooLargeError(
121
+ f"Image size {len(result)} exceeds limit {self.config.max_size_bytes} "
122
+ f"even after compression."
123
+ )
124
+
125
+ return result
126
+
127
+ def _resize(self, img) -> "Image.Image":
128
+ """Resize image to fit within max_dimension while preserving aspect ratio.
129
+
130
+ Args:
131
+ img: PIL Image object
132
+
133
+ Returns:
134
+ Resized PIL Image
135
+ """
136
+ from PIL import Image
137
+
138
+ ratio = self.config.max_dimension / max(img.size)
139
+ new_size = (int(img.width * ratio), int(img.height * ratio))
140
+ return img.resize(new_size, Image.Resampling.LANCZOS)
141
+
142
+ def _aggressive_compress(self, img, output_format: str) -> bytes:
143
+ """Apply aggressive compression to reduce file size.
144
+
145
+ Args:
146
+ img: PIL Image object
147
+ output_format: Target format
148
+
149
+ Returns:
150
+ Compressed image bytes
151
+ """
152
+ output = io.BytesIO()
153
+
154
+ # Reduce dimensions further
155
+ max_dim = min(self.config.max_dimension, 1024)
156
+ if max(img.size) > max_dim:
157
+ from PIL import Image
158
+ ratio = max_dim / max(img.size)
159
+ new_size = (int(img.width * ratio), int(img.height * ratio))
160
+ img = img.resize(new_size, Image.Resampling.LANCZOS)
161
+
162
+ # Use lower quality
163
+ if output_format == "JPEG":
164
+ if img.mode != 'RGB':
165
+ img = img.convert('RGB')
166
+ img.save(output, format="JPEG", quality=60, optimize=True)
167
+ else:
168
+ img.save(output, format="PNG", optimize=True)
169
+
170
+ return output.getvalue()
171
+
172
+ def get_image_info(self, image_data: bytes) -> dict:
173
+ """Get information about an image.
174
+
175
+ Args:
176
+ image_data: Raw image bytes
177
+
178
+ Returns:
179
+ Dict with format, size, mode, dimensions
180
+ """
181
+ try:
182
+ from PIL import Image
183
+
184
+ img = Image.open(io.BytesIO(image_data))
185
+ return {
186
+ "format": img.format,
187
+ "mode": img.mode,
188
+ "width": img.width,
189
+ "height": img.height,
190
+ "size_bytes": len(image_data),
191
+ }
192
+ except Exception as e:
193
+ return {
194
+ "error": str(e),
195
+ "size_bytes": len(image_data),
196
+ }
197
+
198
+ def validate_file(self, file_path: str) -> bool:
199
+ """Validate that a file is a valid image.
200
+
201
+ Args:
202
+ file_path: Path to image file
203
+
204
+ Returns:
205
+ True if valid image
206
+ """
207
+ try:
208
+ from PIL import Image
209
+
210
+ with Image.open(file_path) as img:
211
+ img.verify()
212
+ return True
213
+ except Exception:
214
+ return False
@@ -0,0 +1,149 @@
1
+ """
2
+ Input parser for multimodal CLI input.
3
+
4
+ Parses user input to extract multimodal references like @paste, @image:<path>, @url:<url>.
5
+ """
6
+
7
+ import re
8
+ from dataclasses import dataclass
9
+ from enum import Enum
10
+ from typing import List, Tuple
11
+
12
+
13
+ class ImageSourceType(Enum):
14
+ """Type of image source."""
15
+ CLIPBOARD = "clipboard" # @paste
16
+ FILE = "file" # @image:<path>
17
+ URL = "url" # @url:<url>
18
+
19
+
20
+ @dataclass
21
+ class ImageReference:
22
+ """Reference to an image in user input."""
23
+ source_type: ImageSourceType
24
+ source: str # Path, URL, or "clipboard"
25
+ position: int # Position in original text
26
+ original_text: str # Original matched text (e.g., "@paste", "@image:./foo.png")
27
+
28
+
29
+ @dataclass
30
+ class ParsedMultimodalInput:
31
+ """Result of parsing multimodal input."""
32
+ text_parts: List[str] # Text fragments after removing image references
33
+ image_refs: List[ImageReference] # List of image references
34
+ original_input: str # Original unmodified input
35
+
36
+ def has_images(self) -> bool:
37
+ """Check if input contains any image references."""
38
+ return len(self.image_refs) > 0
39
+
40
+ def get_combined_text(self) -> str:
41
+ """Get all text parts combined (without image markers)."""
42
+ return " ".join(part.strip() for part in self.text_parts if part.strip())
43
+
44
+
45
+ class MultimodalInputParser:
46
+ """Parser for multimodal CLI input.
47
+
48
+ Supports:
49
+ - @paste: Read image from clipboard
50
+ - @image:<path>: Read image from local file
51
+ - @url:<url>: Reference image by URL (https only)
52
+
53
+ Example:
54
+ parser = MultimodalInputParser()
55
+ result = parser.parse("@paste 请描述这张图片")
56
+ # result.has_images() == True
57
+ # result.image_refs[0].source_type == ImageSourceType.CLIPBOARD
58
+ """
59
+
60
+ # Pattern definitions
61
+ PASTE_PATTERN = r"@paste"
62
+ IMAGE_PATTERN = r"@image:([^\s]+)"
63
+ URL_PATTERN = r"@url:(https://[^\s]+)"
64
+
65
+ def __init__(self):
66
+ """Initialize the parser with compiled regex patterns."""
67
+ self._patterns = [
68
+ (re.compile(self.PASTE_PATTERN, re.IGNORECASE), ImageSourceType.CLIPBOARD),
69
+ (re.compile(self.IMAGE_PATTERN, re.IGNORECASE), ImageSourceType.FILE),
70
+ (re.compile(self.URL_PATTERN, re.IGNORECASE), ImageSourceType.URL),
71
+ ]
72
+
73
+ def parse(self, raw_input: str) -> ParsedMultimodalInput:
74
+ """Parse raw input to extract multimodal references.
75
+
76
+ Args:
77
+ raw_input: User's raw input string
78
+
79
+ Returns:
80
+ ParsedMultimodalInput containing text parts and image references
81
+ """
82
+ if not raw_input:
83
+ return ParsedMultimodalInput(
84
+ text_parts=[""],
85
+ image_refs=[],
86
+ original_input=raw_input
87
+ )
88
+
89
+ # Find all matches with their positions
90
+ matches: List[Tuple[int, int, ImageReference]] = []
91
+
92
+ for pattern, source_type in self._patterns:
93
+ for match in pattern.finditer(raw_input):
94
+ start, end = match.span()
95
+
96
+ if source_type == ImageSourceType.CLIPBOARD:
97
+ source = "clipboard"
98
+ elif source_type == ImageSourceType.FILE:
99
+ source = match.group(1) # The path
100
+ elif source_type == ImageSourceType.URL:
101
+ source = match.group(1) # The URL
102
+ else:
103
+ continue
104
+
105
+ ref = ImageReference(
106
+ source_type=source_type,
107
+ source=source,
108
+ position=start,
109
+ original_text=match.group(0)
110
+ )
111
+ matches.append((start, end, ref))
112
+
113
+ # Sort by position
114
+ matches.sort(key=lambda x: x[0])
115
+
116
+ # Extract text parts and image references
117
+ text_parts: List[str] = []
118
+ image_refs: List[ImageReference] = []
119
+ last_end = 0
120
+
121
+ for start, end, ref in matches:
122
+ # Add text before this match
123
+ text_before = raw_input[last_end:start]
124
+ text_parts.append(text_before)
125
+ image_refs.append(ref)
126
+ last_end = end
127
+
128
+ # Add remaining text after last match
129
+ text_parts.append(raw_input[last_end:])
130
+
131
+ return ParsedMultimodalInput(
132
+ text_parts=text_parts,
133
+ image_refs=image_refs,
134
+ original_input=raw_input
135
+ )
136
+
137
+ def has_multimodal_markers(self, text: str) -> bool:
138
+ """Quick check if text contains any multimodal markers.
139
+
140
+ Args:
141
+ text: Text to check
142
+
143
+ Returns:
144
+ True if text contains @paste, @image:, or @url:
145
+ """
146
+ for pattern, _ in self._patterns:
147
+ if pattern.search(text):
148
+ return True
149
+ return False
@@ -0,0 +1,8 @@
1
+ # -*- coding: utf-8 -*-
2
+ """Runner 模块 - CLI 运行器"""
3
+
4
+ from dolphin.cli.runner.runner import runDolphinAgent
5
+
6
+ __all__ = [
7
+ "runDolphinAgent",
8
+ ]