kweaver-dolphin 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- DolphinLanguageSDK/__init__.py +58 -0
- dolphin/__init__.py +62 -0
- dolphin/cli/__init__.py +20 -0
- dolphin/cli/args/__init__.py +9 -0
- dolphin/cli/args/parser.py +567 -0
- dolphin/cli/builtin_agents/__init__.py +22 -0
- dolphin/cli/commands/__init__.py +4 -0
- dolphin/cli/interrupt/__init__.py +8 -0
- dolphin/cli/interrupt/handler.py +205 -0
- dolphin/cli/interrupt/keyboard.py +82 -0
- dolphin/cli/main.py +49 -0
- dolphin/cli/multimodal/__init__.py +34 -0
- dolphin/cli/multimodal/clipboard.py +327 -0
- dolphin/cli/multimodal/handler.py +249 -0
- dolphin/cli/multimodal/image_processor.py +214 -0
- dolphin/cli/multimodal/input_parser.py +149 -0
- dolphin/cli/runner/__init__.py +8 -0
- dolphin/cli/runner/runner.py +989 -0
- dolphin/cli/ui/__init__.py +10 -0
- dolphin/cli/ui/console.py +2795 -0
- dolphin/cli/ui/input.py +340 -0
- dolphin/cli/ui/layout.py +425 -0
- dolphin/cli/ui/stream_renderer.py +302 -0
- dolphin/cli/utils/__init__.py +8 -0
- dolphin/cli/utils/helpers.py +135 -0
- dolphin/cli/utils/version.py +49 -0
- dolphin/core/__init__.py +107 -0
- dolphin/core/agent/__init__.py +10 -0
- dolphin/core/agent/agent_state.py +69 -0
- dolphin/core/agent/base_agent.py +970 -0
- dolphin/core/code_block/__init__.py +0 -0
- dolphin/core/code_block/agent_init_block.py +0 -0
- dolphin/core/code_block/assign_block.py +98 -0
- dolphin/core/code_block/basic_code_block.py +1865 -0
- dolphin/core/code_block/explore_block.py +1327 -0
- dolphin/core/code_block/explore_block_v2.py +712 -0
- dolphin/core/code_block/explore_strategy.py +672 -0
- dolphin/core/code_block/judge_block.py +220 -0
- dolphin/core/code_block/prompt_block.py +32 -0
- dolphin/core/code_block/skill_call_deduplicator.py +291 -0
- dolphin/core/code_block/tool_block.py +129 -0
- dolphin/core/common/__init__.py +17 -0
- dolphin/core/common/constants.py +176 -0
- dolphin/core/common/enums.py +1173 -0
- dolphin/core/common/exceptions.py +133 -0
- dolphin/core/common/multimodal.py +539 -0
- dolphin/core/common/object_type.py +165 -0
- dolphin/core/common/output_format.py +432 -0
- dolphin/core/common/types.py +36 -0
- dolphin/core/config/__init__.py +16 -0
- dolphin/core/config/global_config.py +1289 -0
- dolphin/core/config/ontology_config.py +133 -0
- dolphin/core/context/__init__.py +12 -0
- dolphin/core/context/context.py +1580 -0
- dolphin/core/context/context_manager.py +161 -0
- dolphin/core/context/var_output.py +82 -0
- dolphin/core/context/variable_pool.py +356 -0
- dolphin/core/context_engineer/__init__.py +41 -0
- dolphin/core/context_engineer/config/__init__.py +5 -0
- dolphin/core/context_engineer/config/settings.py +402 -0
- dolphin/core/context_engineer/core/__init__.py +7 -0
- dolphin/core/context_engineer/core/budget_manager.py +327 -0
- dolphin/core/context_engineer/core/context_assembler.py +583 -0
- dolphin/core/context_engineer/core/context_manager.py +637 -0
- dolphin/core/context_engineer/core/tokenizer_service.py +260 -0
- dolphin/core/context_engineer/example/incremental_example.py +267 -0
- dolphin/core/context_engineer/example/traditional_example.py +334 -0
- dolphin/core/context_engineer/services/__init__.py +5 -0
- dolphin/core/context_engineer/services/compressor.py +399 -0
- dolphin/core/context_engineer/utils/__init__.py +6 -0
- dolphin/core/context_engineer/utils/context_utils.py +441 -0
- dolphin/core/context_engineer/utils/message_formatter.py +270 -0
- dolphin/core/context_engineer/utils/token_utils.py +139 -0
- dolphin/core/coroutine/__init__.py +15 -0
- dolphin/core/coroutine/context_snapshot.py +154 -0
- dolphin/core/coroutine/context_snapshot_profile.py +922 -0
- dolphin/core/coroutine/context_snapshot_store.py +268 -0
- dolphin/core/coroutine/execution_frame.py +145 -0
- dolphin/core/coroutine/execution_state_registry.py +161 -0
- dolphin/core/coroutine/resume_handle.py +101 -0
- dolphin/core/coroutine/step_result.py +101 -0
- dolphin/core/executor/__init__.py +18 -0
- dolphin/core/executor/debug_controller.py +630 -0
- dolphin/core/executor/dolphin_executor.py +1063 -0
- dolphin/core/executor/executor.py +624 -0
- dolphin/core/flags/__init__.py +27 -0
- dolphin/core/flags/definitions.py +49 -0
- dolphin/core/flags/manager.py +113 -0
- dolphin/core/hook/__init__.py +95 -0
- dolphin/core/hook/expression_evaluator.py +499 -0
- dolphin/core/hook/hook_dispatcher.py +380 -0
- dolphin/core/hook/hook_types.py +248 -0
- dolphin/core/hook/isolated_variable_pool.py +284 -0
- dolphin/core/interfaces.py +53 -0
- dolphin/core/llm/__init__.py +0 -0
- dolphin/core/llm/llm.py +495 -0
- dolphin/core/llm/llm_call.py +100 -0
- dolphin/core/llm/llm_client.py +1285 -0
- dolphin/core/llm/message_sanitizer.py +120 -0
- dolphin/core/logging/__init__.py +20 -0
- dolphin/core/logging/logger.py +526 -0
- dolphin/core/message/__init__.py +8 -0
- dolphin/core/message/compressor.py +749 -0
- dolphin/core/parser/__init__.py +8 -0
- dolphin/core/parser/parser.py +405 -0
- dolphin/core/runtime/__init__.py +10 -0
- dolphin/core/runtime/runtime_graph.py +926 -0
- dolphin/core/runtime/runtime_instance.py +446 -0
- dolphin/core/skill/__init__.py +14 -0
- dolphin/core/skill/context_retention.py +157 -0
- dolphin/core/skill/skill_function.py +686 -0
- dolphin/core/skill/skill_matcher.py +282 -0
- dolphin/core/skill/skillkit.py +700 -0
- dolphin/core/skill/skillset.py +72 -0
- dolphin/core/trajectory/__init__.py +10 -0
- dolphin/core/trajectory/recorder.py +189 -0
- dolphin/core/trajectory/trajectory.py +522 -0
- dolphin/core/utils/__init__.py +9 -0
- dolphin/core/utils/cache_kv.py +212 -0
- dolphin/core/utils/tools.py +340 -0
- dolphin/lib/__init__.py +93 -0
- dolphin/lib/debug/__init__.py +8 -0
- dolphin/lib/debug/visualizer.py +409 -0
- dolphin/lib/memory/__init__.py +28 -0
- dolphin/lib/memory/async_processor.py +220 -0
- dolphin/lib/memory/llm_calls.py +195 -0
- dolphin/lib/memory/manager.py +78 -0
- dolphin/lib/memory/sandbox.py +46 -0
- dolphin/lib/memory/storage.py +245 -0
- dolphin/lib/memory/utils.py +51 -0
- dolphin/lib/ontology/__init__.py +12 -0
- dolphin/lib/ontology/basic/__init__.py +0 -0
- dolphin/lib/ontology/basic/base.py +102 -0
- dolphin/lib/ontology/basic/concept.py +130 -0
- dolphin/lib/ontology/basic/object.py +11 -0
- dolphin/lib/ontology/basic/relation.py +63 -0
- dolphin/lib/ontology/datasource/__init__.py +27 -0
- dolphin/lib/ontology/datasource/datasource.py +66 -0
- dolphin/lib/ontology/datasource/oracle_datasource.py +338 -0
- dolphin/lib/ontology/datasource/sql.py +845 -0
- dolphin/lib/ontology/mapping.py +177 -0
- dolphin/lib/ontology/ontology.py +733 -0
- dolphin/lib/ontology/ontology_context.py +16 -0
- dolphin/lib/ontology/ontology_manager.py +107 -0
- dolphin/lib/skill_results/__init__.py +31 -0
- dolphin/lib/skill_results/cache_backend.py +559 -0
- dolphin/lib/skill_results/result_processor.py +181 -0
- dolphin/lib/skill_results/result_reference.py +179 -0
- dolphin/lib/skill_results/skillkit_hook.py +324 -0
- dolphin/lib/skill_results/strategies.py +328 -0
- dolphin/lib/skill_results/strategy_registry.py +150 -0
- dolphin/lib/skillkits/__init__.py +44 -0
- dolphin/lib/skillkits/agent_skillkit.py +155 -0
- dolphin/lib/skillkits/cognitive_skillkit.py +82 -0
- dolphin/lib/skillkits/env_skillkit.py +250 -0
- dolphin/lib/skillkits/mcp_adapter.py +616 -0
- dolphin/lib/skillkits/mcp_skillkit.py +771 -0
- dolphin/lib/skillkits/memory_skillkit.py +650 -0
- dolphin/lib/skillkits/noop_skillkit.py +31 -0
- dolphin/lib/skillkits/ontology_skillkit.py +89 -0
- dolphin/lib/skillkits/plan_act_skillkit.py +452 -0
- dolphin/lib/skillkits/resource/__init__.py +52 -0
- dolphin/lib/skillkits/resource/models/__init__.py +6 -0
- dolphin/lib/skillkits/resource/models/skill_config.py +109 -0
- dolphin/lib/skillkits/resource/models/skill_meta.py +127 -0
- dolphin/lib/skillkits/resource/resource_skillkit.py +393 -0
- dolphin/lib/skillkits/resource/skill_cache.py +215 -0
- dolphin/lib/skillkits/resource/skill_loader.py +395 -0
- dolphin/lib/skillkits/resource/skill_validator.py +406 -0
- dolphin/lib/skillkits/resource_skillkit.py +11 -0
- dolphin/lib/skillkits/search_skillkit.py +163 -0
- dolphin/lib/skillkits/sql_skillkit.py +274 -0
- dolphin/lib/skillkits/system_skillkit.py +509 -0
- dolphin/lib/skillkits/vm_skillkit.py +65 -0
- dolphin/lib/utils/__init__.py +9 -0
- dolphin/lib/utils/data_process.py +207 -0
- dolphin/lib/utils/handle_progress.py +178 -0
- dolphin/lib/utils/security.py +139 -0
- dolphin/lib/utils/text_retrieval.py +462 -0
- dolphin/lib/vm/__init__.py +11 -0
- dolphin/lib/vm/env_executor.py +895 -0
- dolphin/lib/vm/python_session_manager.py +453 -0
- dolphin/lib/vm/vm.py +610 -0
- dolphin/sdk/__init__.py +60 -0
- dolphin/sdk/agent/__init__.py +12 -0
- dolphin/sdk/agent/agent_factory.py +236 -0
- dolphin/sdk/agent/dolphin_agent.py +1106 -0
- dolphin/sdk/api/__init__.py +4 -0
- dolphin/sdk/runtime/__init__.py +8 -0
- dolphin/sdk/runtime/env.py +363 -0
- dolphin/sdk/skill/__init__.py +10 -0
- dolphin/sdk/skill/global_skills.py +706 -0
- dolphin/sdk/skill/traditional_toolkit.py +260 -0
- kweaver_dolphin-0.1.0.dist-info/METADATA +521 -0
- kweaver_dolphin-0.1.0.dist-info/RECORD +199 -0
- kweaver_dolphin-0.1.0.dist-info/WHEEL +5 -0
- kweaver_dolphin-0.1.0.dist-info/entry_points.txt +27 -0
- kweaver_dolphin-0.1.0.dist-info/licenses/LICENSE.txt +201 -0
- kweaver_dolphin-0.1.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
class DolphinException(Exception):
|
|
2
|
+
"""Exception raised when the Dolphin is interrupted."""
|
|
3
|
+
|
|
4
|
+
def __init__(
|
|
5
|
+
self,
|
|
6
|
+
code: str,
|
|
7
|
+
message: str = "",
|
|
8
|
+
*args,
|
|
9
|
+
**kwargs,
|
|
10
|
+
):
|
|
11
|
+
super().__init__(message, *args, **kwargs)
|
|
12
|
+
self.code = code
|
|
13
|
+
self.message = message
|
|
14
|
+
|
|
15
|
+
def __str__(self):
|
|
16
|
+
return f"DolphinException: {self.code}, {self.message}"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ModelException(DolphinException):
|
|
20
|
+
"""Exception raised when the model is interrupted."""
|
|
21
|
+
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
code: str,
|
|
25
|
+
message: str = "The model was interrupted.",
|
|
26
|
+
*args,
|
|
27
|
+
**kwargs,
|
|
28
|
+
):
|
|
29
|
+
super().__init__(code, message, *args, **kwargs)
|
|
30
|
+
|
|
31
|
+
def __str__(self):
|
|
32
|
+
return f"ModelException: {self.code}, {self.message}"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class SkillException(DolphinException):
|
|
36
|
+
"""Exception raised when the skill is interrupted."""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
code: str,
|
|
41
|
+
message="The skill was interrupted.",
|
|
42
|
+
*args,
|
|
43
|
+
**kwargs,
|
|
44
|
+
):
|
|
45
|
+
super().__init__(code, message, *args, **kwargs)
|
|
46
|
+
|
|
47
|
+
def __str__(self):
|
|
48
|
+
# If message contains multiple lines, format it nicely
|
|
49
|
+
if '\n' in self.message:
|
|
50
|
+
return f"SkillException [{self.code}]:\n{self.message}"
|
|
51
|
+
return f"SkillException: {self.code}, {self.message}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ContextEngineerException(DolphinException):
|
|
55
|
+
"""Exception raised when the context engineer is interrupted."""
|
|
56
|
+
|
|
57
|
+
def __init__(
|
|
58
|
+
self,
|
|
59
|
+
code: str = "",
|
|
60
|
+
message: str = "The context engineer was interrupted.",
|
|
61
|
+
*args,
|
|
62
|
+
**kwargs,
|
|
63
|
+
):
|
|
64
|
+
super().__init__(code, message, *args, **kwargs)
|
|
65
|
+
|
|
66
|
+
def __str__(self):
|
|
67
|
+
return f"ContextEngineerException: {self.code}, {self.message}"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class AgentLifecycleException(DolphinException):
|
|
71
|
+
"""Agent Lifecycle Exception"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, code: str, message: str = "Agent lifecycle error"):
|
|
74
|
+
super().__init__(code, message)
|
|
75
|
+
|
|
76
|
+
def __str__(self):
|
|
77
|
+
return f"AgentLifecycleException: {self.code}, {self.message}"
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class DolphinAgentException(DolphinException):
|
|
81
|
+
"""Dolphin Agent Exception"""
|
|
82
|
+
|
|
83
|
+
def __init__(self, code: str, message: str = "Dolphin agent error"):
|
|
84
|
+
super().__init__(code, message)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class SyncError(DolphinException):
|
|
88
|
+
"""Exception raised during message synchronization."""
|
|
89
|
+
|
|
90
|
+
def __init__(self, message: str = "Message synchronization failed."):
|
|
91
|
+
super().__init__("SYNC_ERROR", message)
|
|
92
|
+
|
|
93
|
+
def __str__(self):
|
|
94
|
+
return f"SyncError: {self.message}"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class DebuggerQuitException(Exception):
|
|
98
|
+
"""Exception raised when the user quits the debugger."""
|
|
99
|
+
pass
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class UserInterrupt(DolphinException):
|
|
103
|
+
"""User-initiated interrupt to provide new input during execution.
|
|
104
|
+
|
|
105
|
+
This exception is raised when the user actively interrupts the agent's
|
|
106
|
+
execution (e.g., pressing ESC) to provide new instructions or corrections.
|
|
107
|
+
|
|
108
|
+
Key differences from ToolInterrupt:
|
|
109
|
+
- ToolInterrupt: Tool requests user input, resumes from breakpoint
|
|
110
|
+
- UserInterrupt: User actively interrupts, triggers re-reasoning with new context
|
|
111
|
+
|
|
112
|
+
Use cases:
|
|
113
|
+
- User discovers agent is going in wrong direction, wants to correct
|
|
114
|
+
- User wants to add additional context information
|
|
115
|
+
- User wants to insert new requirements at current step
|
|
116
|
+
|
|
117
|
+
Attributes:
|
|
118
|
+
partial_output: Optional partial LLM output captured at interrupt time
|
|
119
|
+
interrupted_at: Timestamp when interrupt occurred
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
message: str = "User interrupted execution",
|
|
125
|
+
partial_output: str = None,
|
|
126
|
+
):
|
|
127
|
+
super().__init__("USER_INTERRUPT", message)
|
|
128
|
+
self.partial_output = partial_output
|
|
129
|
+
from datetime import datetime
|
|
130
|
+
self.interrupted_at = datetime.now()
|
|
131
|
+
|
|
132
|
+
def __str__(self):
|
|
133
|
+
return f"UserInterrupt: {self.message}"
|
|
@@ -0,0 +1,539 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Multimodal support module for Dolphin Language.
|
|
3
|
+
|
|
4
|
+
This module provides types, utilities, and exceptions for handling
|
|
5
|
+
multimodal content (text + images) in the message system.
|
|
6
|
+
|
|
7
|
+
Design based on: docs/core/multimodal_support_design.md
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import math
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Any, Dict, List, Optional, Union
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# =============================================================================
|
|
17
|
+
# Type Definitions
|
|
18
|
+
# =============================================================================
|
|
19
|
+
|
|
20
|
+
# MessageContent can be either a plain string or a list of ContentBlocks
|
|
21
|
+
MessageContent = Union[str, List[Dict[str, Any]]]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# =============================================================================
|
|
25
|
+
# ContentBlock Helpers
|
|
26
|
+
# =============================================================================
|
|
27
|
+
|
|
28
|
+
def text_block(text: str) -> Dict[str, Any]:
|
|
29
|
+
"""Create a text content block.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
text: The text content
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
A text content block in OpenAI format
|
|
36
|
+
"""
|
|
37
|
+
return {"type": "text", "text": text}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def image_url_block(url: str, detail: str = "auto") -> Dict[str, Any]:
|
|
41
|
+
"""Create an image URL content block.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
url: The image URL (https:// or data:image/... for base64)
|
|
45
|
+
detail: Resolution level - "auto", "low", or "high"
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
An image_url content block in OpenAI format
|
|
49
|
+
"""
|
|
50
|
+
return {"type": "image_url", "image_url": {"url": url, "detail": detail}}
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def normalize_content(content: MessageContent) -> List[Dict[str, Any]]:
|
|
54
|
+
"""Normalize any content format to List[Dict].
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
content: Either a string or list of content blocks
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Content as a list of content blocks
|
|
61
|
+
"""
|
|
62
|
+
if isinstance(content, str):
|
|
63
|
+
return [text_block(content)]
|
|
64
|
+
return content
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def extract_text(content: MessageContent) -> str:
|
|
68
|
+
"""Extract plain text from multimodal content.
|
|
69
|
+
|
|
70
|
+
Used for logging, fallback to non-vision models, etc.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
content: Either a string or list of content blocks
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Extracted text content (images are omitted)
|
|
77
|
+
"""
|
|
78
|
+
if isinstance(content, str):
|
|
79
|
+
return content
|
|
80
|
+
return "".join(
|
|
81
|
+
block.get("text", "")
|
|
82
|
+
for block in content
|
|
83
|
+
if block.get("type") == "text"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def count_images(content: MessageContent) -> int:
|
|
88
|
+
"""Count the number of images in content.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
content: Either a string or list of content blocks
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Number of image blocks
|
|
95
|
+
"""
|
|
96
|
+
if isinstance(content, str):
|
|
97
|
+
return 0
|
|
98
|
+
return sum(1 for block in content if block.get("type") == "image_url")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def has_images(content: MessageContent) -> bool:
|
|
102
|
+
"""Check if content contains any images.
|
|
103
|
+
|
|
104
|
+
Args:
|
|
105
|
+
content: Either a string or list of content blocks
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
True if content contains at least one image
|
|
109
|
+
"""
|
|
110
|
+
return count_images(content) > 0
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def get_content_preview(content: MessageContent) -> Dict[str, Any]:
|
|
114
|
+
"""Generate a preview of content for logging.
|
|
115
|
+
|
|
116
|
+
Used to avoid logging sensitive data like base64 or full URLs.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
content: Either a string or list of content blocks
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
A summary dict suitable for logging
|
|
123
|
+
"""
|
|
124
|
+
if isinstance(content, str):
|
|
125
|
+
return {"type": "text", "length": len(content)}
|
|
126
|
+
|
|
127
|
+
image_count = count_images(content)
|
|
128
|
+
text_length = sum(
|
|
129
|
+
len(block.get("text", ""))
|
|
130
|
+
for block in content
|
|
131
|
+
if block.get("type") == "text"
|
|
132
|
+
)
|
|
133
|
+
return {
|
|
134
|
+
"type": "multimodal",
|
|
135
|
+
"text_length": text_length,
|
|
136
|
+
"image_count": image_count
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def calculate_content_length(content: MessageContent) -> int:
|
|
141
|
+
"""Calculate the text length of content (excluding images).
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
content: Either a string or list of content blocks
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Total length of text content
|
|
148
|
+
"""
|
|
149
|
+
if isinstance(content, str):
|
|
150
|
+
return len(content)
|
|
151
|
+
return sum(
|
|
152
|
+
len(block.get("text", ""))
|
|
153
|
+
for block in content
|
|
154
|
+
if block.get("type") == "text"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# =============================================================================
|
|
159
|
+
# Image Token Estimation
|
|
160
|
+
# =============================================================================
|
|
161
|
+
|
|
162
|
+
@dataclass
|
|
163
|
+
class ImageTokenConfig:
|
|
164
|
+
"""Simplified image token estimation configuration.
|
|
165
|
+
|
|
166
|
+
Design Decision:
|
|
167
|
+
- Uses OpenAI-style tile-based algorithm as universal estimate
|
|
168
|
+
- Does not differentiate by provider (±20% error acceptable for compression)
|
|
169
|
+
- Server-side usage is the authoritative source for billing/limits
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
# Tile-based estimation parameters (OpenAI style, universally applicable)
|
|
173
|
+
base_tokens: int = 85 # Base overhead
|
|
174
|
+
tokens_per_tile: int = 170 # Tokens per 512×512 tile
|
|
175
|
+
tile_size: int = 512 # Tile side length
|
|
176
|
+
|
|
177
|
+
# Fallback values when dimensions unknown
|
|
178
|
+
fallback_tokens: Dict[str, int] = field(default_factory=lambda: {
|
|
179
|
+
"low": 85, # Low resolution mode
|
|
180
|
+
"auto": 600, # Default conservative estimate
|
|
181
|
+
"high": 1500, # High resolution conservative estimate
|
|
182
|
+
})
|
|
183
|
+
|
|
184
|
+
def estimate_tokens(
|
|
185
|
+
self,
|
|
186
|
+
width: Optional[int] = None,
|
|
187
|
+
height: Optional[int] = None,
|
|
188
|
+
detail: str = "auto"
|
|
189
|
+
) -> int:
|
|
190
|
+
"""Estimate token count for an image.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
width: Image width in pixels (optional)
|
|
194
|
+
height: Image height in pixels (optional)
|
|
195
|
+
detail: Resolution level ("low", "auto", "high")
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
Estimated token count
|
|
199
|
+
"""
|
|
200
|
+
# Low mode returns fixed base overhead
|
|
201
|
+
if detail == "low":
|
|
202
|
+
return self.base_tokens
|
|
203
|
+
|
|
204
|
+
# Use fallback when dimensions unknown
|
|
205
|
+
if width is None or height is None:
|
|
206
|
+
return self.fallback_tokens.get(detail, self.fallback_tokens["auto"])
|
|
207
|
+
|
|
208
|
+
# Tile-based calculation: base + tiles × tokens_per_tile
|
|
209
|
+
tiles_x = math.ceil(width / self.tile_size)
|
|
210
|
+
tiles_y = math.ceil(height / self.tile_size)
|
|
211
|
+
return self.base_tokens + self.tokens_per_tile * tiles_x * tiles_y
|
|
212
|
+
|
|
213
|
+
@classmethod
|
|
214
|
+
def for_provider(cls, provider: str) -> "ImageTokenConfig":
|
|
215
|
+
"""Create provider-specific token estimation config.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
provider: LLM provider name (openai, gemini, anthropic, etc.)
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
ImageTokenConfig optimized for the provider
|
|
222
|
+
|
|
223
|
+
Note:
|
|
224
|
+
This is an optional enhancement for improved accuracy.
|
|
225
|
+
The default OpenAI-style config is sufficient for most use cases.
|
|
226
|
+
"""
|
|
227
|
+
provider = provider.lower()
|
|
228
|
+
|
|
229
|
+
if provider == "openai":
|
|
230
|
+
return cls(base_tokens=85, tokens_per_tile=170, tile_size=512)
|
|
231
|
+
elif provider == "gemini":
|
|
232
|
+
# Gemini uses 258 tokens per 768x768 tile, no base overhead
|
|
233
|
+
return cls(base_tokens=0, tokens_per_tile=258, tile_size=768)
|
|
234
|
+
elif provider == "anthropic":
|
|
235
|
+
# Anthropic uses pixel-based calculation: (width × height) / 750
|
|
236
|
+
# We approximate with very fine tiles
|
|
237
|
+
return cls(base_tokens=0, tokens_per_tile=1, tile_size=27) # ~750 pixels
|
|
238
|
+
else:
|
|
239
|
+
# Default to OpenAI style
|
|
240
|
+
return cls()
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
# Global default config instance
|
|
244
|
+
_default_image_token_config = ImageTokenConfig()
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def estimate_image_tokens(
|
|
248
|
+
width: Optional[int] = None,
|
|
249
|
+
height: Optional[int] = None,
|
|
250
|
+
detail: str = "auto",
|
|
251
|
+
config: Optional[ImageTokenConfig] = None
|
|
252
|
+
) -> int:
|
|
253
|
+
"""Convenience function to estimate image tokens.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
width: Image width in pixels (optional)
|
|
257
|
+
height: Image height in pixels (optional)
|
|
258
|
+
detail: Resolution level ("low", "auto", "high")
|
|
259
|
+
config: Optional custom config (uses default if not provided)
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
Estimated token count
|
|
263
|
+
"""
|
|
264
|
+
cfg = config or _default_image_token_config
|
|
265
|
+
return cfg.estimate_tokens(width, height, detail)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# =============================================================================
|
|
269
|
+
# Image Size Constraints
|
|
270
|
+
# =============================================================================
|
|
271
|
+
|
|
272
|
+
@dataclass
|
|
273
|
+
class ImageConstraints:
|
|
274
|
+
"""Constraints for image size and count to prevent memory issues.
|
|
275
|
+
|
|
276
|
+
Design Rationale:
|
|
277
|
+
- Multiple 2MB base64 images can cause OOM
|
|
278
|
+
- Base64 strings inflate by ~33% during serialization
|
|
279
|
+
- Need both per-image and aggregate limits
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
max_base64_bytes_per_image: int = 2 * 1024 * 1024 # 2MB per image
|
|
283
|
+
max_base64_bytes_per_message: int = 5 * 1024 * 1024 # 5MB per message
|
|
284
|
+
max_images_per_message: int = 5 # 5 images per message
|
|
285
|
+
max_images_in_context: int = 20 # 20 images across all messages
|
|
286
|
+
|
|
287
|
+
def validate_base64_size(self, base64_data: str, image_index: int = 0) -> None:
|
|
288
|
+
"""Validate a single base64 image size.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
base64_data: The base64 encoded image data (with or without data: prefix)
|
|
292
|
+
image_index: Index of the image (for error messages)
|
|
293
|
+
|
|
294
|
+
Raises:
|
|
295
|
+
ImagePayloadTooLargeError: If image exceeds size limit
|
|
296
|
+
"""
|
|
297
|
+
# Strip data URL prefix if present
|
|
298
|
+
if base64_data.startswith("data:"):
|
|
299
|
+
base64_data = base64_data.split(",", 1)[-1]
|
|
300
|
+
|
|
301
|
+
size_bytes = len(base64_data.encode('utf-8'))
|
|
302
|
+
if size_bytes > self.max_base64_bytes_per_image:
|
|
303
|
+
raise ImagePayloadTooLargeError(
|
|
304
|
+
f"Base64 image #{image_index} size ({size_bytes / 1024 / 1024:.2f}MB) "
|
|
305
|
+
f"exceeds limit ({self.max_base64_bytes_per_image / 1024 / 1024:.2f}MB)"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# =============================================================================
|
|
310
|
+
# Multimodal Compression Configuration
|
|
311
|
+
# =============================================================================
|
|
312
|
+
|
|
313
|
+
class MultimodalCompressionMode(Enum):
|
|
314
|
+
"""Compression mode for multimodal messages.
|
|
315
|
+
|
|
316
|
+
Modes:
|
|
317
|
+
- TEXT_ONLY: Drop images when over limit, keep text (default, safest for information preservation)
|
|
318
|
+
- ATOMIC: Keep or drop entire message (good for image-text binding scenarios)
|
|
319
|
+
- LATEST_IMAGE: Keep only the latest N images (balance between modes)
|
|
320
|
+
"""
|
|
321
|
+
TEXT_ONLY = "text_only" # Drop images when over limit, keep text (default)
|
|
322
|
+
ATOMIC = "atomic" # Keep or drop entire message
|
|
323
|
+
LATEST_IMAGE = "latest_image" # Keep only the latest N images
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
@dataclass
|
|
327
|
+
class MultimodalCompressionConfig:
|
|
328
|
+
"""Configuration for multimodal message compression."""
|
|
329
|
+
mode: MultimodalCompressionMode = MultimodalCompressionMode.TEXT_ONLY
|
|
330
|
+
max_images_to_keep: int = 3 # For LATEST_IMAGE mode
|
|
331
|
+
allow_truncate_text_blocks: bool = True # Whether to allow truncating text blocks
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
# =============================================================================
|
|
335
|
+
# Exceptions
|
|
336
|
+
# =============================================================================
|
|
337
|
+
|
|
338
|
+
class MultimodalError(Exception):
|
|
339
|
+
"""Base class for multimodal-related errors."""
|
|
340
|
+
pass
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
class MultimodalNotSupportedError(MultimodalError):
|
|
344
|
+
"""Raised when model does not support multimodal input."""
|
|
345
|
+
pass
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class TooManyImagesError(MultimodalError):
|
|
349
|
+
"""Raised when image count exceeds model limit."""
|
|
350
|
+
pass
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class UnsupportedImageFormatError(MultimodalError):
|
|
354
|
+
"""Raised when image format is not supported."""
|
|
355
|
+
pass
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
class UnsupportedContentBlockTypeError(MultimodalError):
|
|
359
|
+
"""Raised when content block type is not supported."""
|
|
360
|
+
pass
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
class EmptyMultimodalContentError(MultimodalError):
|
|
364
|
+
"""Raised when multimodal content list is empty."""
|
|
365
|
+
pass
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class InvalidTextBlockError(MultimodalError):
|
|
369
|
+
"""Raised when text block is invalid."""
|
|
370
|
+
pass
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
class InvalidImageUrlError(MultimodalError):
|
|
374
|
+
"""Raised when image URL is invalid."""
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
class InvalidImageDetailError(MultimodalError):
|
|
379
|
+
"""Raised when image detail level is invalid."""
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
class ImagePayloadTooLargeError(MultimodalError):
|
|
384
|
+
"""Raised when base64 image payload exceeds limit."""
|
|
385
|
+
pass
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
class ClipboardEmptyError(MultimodalError):
|
|
389
|
+
"""Raised when clipboard does not contain an image."""
|
|
390
|
+
pass
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
# =============================================================================
|
|
394
|
+
# Validation
|
|
395
|
+
# =============================================================================
|
|
396
|
+
|
|
397
|
+
def validate_content_block(block: Dict[str, Any]) -> None:
|
|
398
|
+
"""Validate a single content block.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
block: The content block to validate
|
|
402
|
+
|
|
403
|
+
Raises:
|
|
404
|
+
UnsupportedContentBlockTypeError: If block type is unknown
|
|
405
|
+
InvalidTextBlockError: If text block is malformed
|
|
406
|
+
InvalidImageUrlError: If image URL is invalid
|
|
407
|
+
InvalidImageDetailError: If image detail level is invalid
|
|
408
|
+
"""
|
|
409
|
+
block_type = block.get("type")
|
|
410
|
+
|
|
411
|
+
if block_type == "text":
|
|
412
|
+
if not isinstance(block.get("text"), str):
|
|
413
|
+
raise InvalidTextBlockError("Text block requires 'text: str'.")
|
|
414
|
+
return
|
|
415
|
+
|
|
416
|
+
if block_type == "image_url":
|
|
417
|
+
image_url = block.get("image_url") or {}
|
|
418
|
+
url = image_url.get("url")
|
|
419
|
+
detail = image_url.get("detail", "auto")
|
|
420
|
+
|
|
421
|
+
if detail not in ("auto", "low", "high"):
|
|
422
|
+
raise InvalidImageDetailError(f"Invalid image detail: {detail}")
|
|
423
|
+
if not isinstance(url, str) or not url:
|
|
424
|
+
raise InvalidImageUrlError("image_url block requires non-empty url.")
|
|
425
|
+
return
|
|
426
|
+
|
|
427
|
+
raise UnsupportedContentBlockTypeError(f"Unsupported content block type: {block_type}")
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def validate_multimodal_content(content: MessageContent) -> None:
|
|
431
|
+
"""Validate multimodal content.
|
|
432
|
+
|
|
433
|
+
Args:
|
|
434
|
+
content: The content to validate
|
|
435
|
+
|
|
436
|
+
Raises:
|
|
437
|
+
EmptyMultimodalContentError: If content list is empty
|
|
438
|
+
Other validation errors from validate_content_block
|
|
439
|
+
"""
|
|
440
|
+
if isinstance(content, str):
|
|
441
|
+
return # Plain text is always valid
|
|
442
|
+
|
|
443
|
+
if len(content) == 0:
|
|
444
|
+
raise EmptyMultimodalContentError("Multimodal content list must not be empty.")
|
|
445
|
+
|
|
446
|
+
for block in content:
|
|
447
|
+
validate_content_block(block)
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
class MultimodalValidator:
|
|
451
|
+
"""Validator for multimodal messages against model capabilities."""
|
|
452
|
+
|
|
453
|
+
@staticmethod
|
|
454
|
+
def validate(
|
|
455
|
+
messages, # Messages type, but avoiding circular import
|
|
456
|
+
supports_vision: bool = True,
|
|
457
|
+
max_images_per_request: int = 10,
|
|
458
|
+
model_name: str = "unknown",
|
|
459
|
+
image_constraints: Optional[ImageConstraints] = None
|
|
460
|
+
) -> None:
|
|
461
|
+
"""Validate messages against model capabilities.
|
|
462
|
+
|
|
463
|
+
Args:
|
|
464
|
+
messages: Messages object to validate
|
|
465
|
+
supports_vision: Whether model supports vision input
|
|
466
|
+
max_images_per_request: Maximum images allowed per request
|
|
467
|
+
model_name: Name of the model (for error messages)
|
|
468
|
+
image_constraints: Optional size/count constraints for images
|
|
469
|
+
|
|
470
|
+
Raises:
|
|
471
|
+
MultimodalNotSupportedError: If model doesn't support vision
|
|
472
|
+
TooManyImagesError: If image count exceeds limit
|
|
473
|
+
ImagePayloadTooLargeError: If base64 images exceed size limits
|
|
474
|
+
Other validation errors
|
|
475
|
+
"""
|
|
476
|
+
total_images = 0
|
|
477
|
+
has_any_images = False
|
|
478
|
+
constraints = image_constraints or ImageConstraints()
|
|
479
|
+
|
|
480
|
+
for msg in messages:
|
|
481
|
+
content = msg.content
|
|
482
|
+
if isinstance(content, list):
|
|
483
|
+
# Validate content blocks
|
|
484
|
+
validate_multimodal_content(content)
|
|
485
|
+
|
|
486
|
+
# Count and validate images in this message
|
|
487
|
+
img_count = 0
|
|
488
|
+
total_base64_bytes = 0
|
|
489
|
+
|
|
490
|
+
for idx, block in enumerate(content):
|
|
491
|
+
if block.get("type") == "image_url":
|
|
492
|
+
img_count += 1
|
|
493
|
+
has_any_images = True
|
|
494
|
+
total_images += 1
|
|
495
|
+
|
|
496
|
+
# Validate base64 size if applicable
|
|
497
|
+
url = block.get("image_url", {}).get("url", "")
|
|
498
|
+
if url.startswith("data:"):
|
|
499
|
+
# Extract base64 data
|
|
500
|
+
base64_data = url.split(",", 1)[-1] if "," in url else url
|
|
501
|
+
base64_bytes = len(base64_data.encode('utf-8'))
|
|
502
|
+
total_base64_bytes += base64_bytes
|
|
503
|
+
|
|
504
|
+
# Check per-image limit
|
|
505
|
+
constraints.validate_base64_size(url, idx)
|
|
506
|
+
|
|
507
|
+
# Check per-message limits
|
|
508
|
+
if img_count > constraints.max_images_per_message:
|
|
509
|
+
raise TooManyImagesError(
|
|
510
|
+
f"Message contains {img_count} images, exceeding limit of "
|
|
511
|
+
f"{constraints.max_images_per_message} images per message"
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
if total_base64_bytes > constraints.max_base64_bytes_per_message:
|
|
515
|
+
raise ImagePayloadTooLargeError(
|
|
516
|
+
f"Message base64 images total {total_base64_bytes / 1024 / 1024:.2f}MB, "
|
|
517
|
+
f"exceeding limit of {constraints.max_base64_bytes_per_message / 1024 / 1024:.2f}MB"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
# Check vision support
|
|
521
|
+
if has_any_images and not supports_vision:
|
|
522
|
+
raise MultimodalNotSupportedError(
|
|
523
|
+
f"Model '{model_name}' does not support vision input. "
|
|
524
|
+
f"Please use a vision-capable model like gpt-4o or claude-3-5-sonnet."
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
# Check context-wide image count limit
|
|
528
|
+
if total_images > constraints.max_images_in_context:
|
|
529
|
+
raise TooManyImagesError(
|
|
530
|
+
f"Context contains {total_images} images, exceeding limit of "
|
|
531
|
+
f"{constraints.max_images_in_context} images across all messages"
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
# Check model-specific limit (backward compatibility)
|
|
535
|
+
if total_images > max_images_per_request:
|
|
536
|
+
raise TooManyImagesError(
|
|
537
|
+
f"Request contains {total_images} images, but model limit is "
|
|
538
|
+
f"{max_images_per_request}"
|
|
539
|
+
)
|