jaf-py 2.4.1__py3-none-any.whl → 2.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jaf/__init__.py +15 -0
- jaf/core/agent_tool.py +6 -4
- jaf/core/analytics.py +4 -3
- jaf/core/engine.py +401 -37
- jaf/core/state.py +156 -0
- jaf/core/tracing.py +114 -23
- jaf/core/types.py +113 -3
- jaf/memory/approval_storage.py +306 -0
- jaf/memory/providers/postgres.py +10 -4
- jaf/memory/types.py +1 -0
- jaf/memory/utils.py +1 -1
- jaf/providers/model.py +277 -17
- jaf/server/__init__.py +2 -0
- jaf/server/server.py +665 -22
- jaf/server/types.py +149 -4
- jaf/utils/__init__.py +50 -0
- jaf/utils/attachments.py +401 -0
- jaf/utils/document_processor.py +561 -0
- {jaf_py-2.4.1.dist-info → jaf_py-2.4.3.dist-info}/METADATA +10 -2
- {jaf_py-2.4.1.dist-info → jaf_py-2.4.3.dist-info}/RECORD +24 -19
- {jaf_py-2.4.1.dist-info → jaf_py-2.4.3.dist-info}/WHEEL +0 -0
- {jaf_py-2.4.1.dist-info → jaf_py-2.4.3.dist-info}/entry_points.txt +0 -0
- {jaf_py-2.4.1.dist-info → jaf_py-2.4.3.dist-info}/licenses/LICENSE +0 -0
- {jaf_py-2.4.1.dist-info → jaf_py-2.4.3.dist-info}/top_level.txt +0 -0
jaf/server/types.py
CHANGED
|
@@ -7,22 +7,118 @@ for the JAF HTTP server implementation.
|
|
|
7
7
|
|
|
8
8
|
from dataclasses import dataclass
|
|
9
9
|
from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
|
|
10
|
+
import base64
|
|
10
11
|
|
|
11
|
-
from pydantic import BaseModel, Field
|
|
12
|
+
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
12
13
|
|
|
13
|
-
from ..core.types import Agent, RunConfig
|
|
14
|
+
from ..core.types import Agent, RunConfig, Attachment, MessageContentPart, get_text_content
|
|
14
15
|
from ..memory.types import MemoryProvider
|
|
15
16
|
|
|
16
17
|
Ctx = TypeVar('Ctx')
|
|
17
18
|
|
|
19
|
+
# Pydantic models for attachments to work with HTTP API
|
|
20
|
+
class HttpAttachment(BaseModel):
|
|
21
|
+
"""HTTP attachment format for API requests."""
|
|
22
|
+
kind: Literal['image', 'document', 'file']
|
|
23
|
+
mime_type: Optional[str] = None
|
|
24
|
+
name: Optional[str] = None
|
|
25
|
+
url: Optional[str] = None
|
|
26
|
+
data: Optional[str] = None # Base64 encoded data
|
|
27
|
+
format: Optional[str] = None
|
|
28
|
+
use_litellm_format: Optional[bool] = None
|
|
29
|
+
|
|
30
|
+
@model_validator(mode='after')
|
|
31
|
+
def validate_url_or_data_present(self) -> 'HttpAttachment':
|
|
32
|
+
"""Validate that at least one of url or data is present."""
|
|
33
|
+
if self.url is None and self.data is None:
|
|
34
|
+
raise ValueError("At least one of 'url' or 'data' must be provided")
|
|
35
|
+
return self
|
|
36
|
+
|
|
37
|
+
@field_validator('data')
|
|
38
|
+
@classmethod
|
|
39
|
+
def validate_base64_data(cls, v: Optional[str]) -> Optional[str]:
|
|
40
|
+
"""Validate that data is proper base64 encoded."""
|
|
41
|
+
if v is not None:
|
|
42
|
+
try:
|
|
43
|
+
# Try to decode the base64 data to verify it's valid
|
|
44
|
+
decoded = base64.b64decode(v)
|
|
45
|
+
# Check if it's empty
|
|
46
|
+
if len(decoded) == 0:
|
|
47
|
+
raise ValueError("Base64 data decodes to empty content")
|
|
48
|
+
except Exception as e:
|
|
49
|
+
raise ValueError(f"Invalid base64 encoding: {str(e)}")
|
|
50
|
+
return v
|
|
51
|
+
|
|
52
|
+
@model_validator(mode='after')
|
|
53
|
+
def validate_mime_type_consistency(self) -> 'HttpAttachment':
|
|
54
|
+
"""Validate that mime_type is consistent with kind."""
|
|
55
|
+
if self.mime_type is not None and self.kind is not None:
|
|
56
|
+
if self.kind == 'image' and not self.mime_type.startswith('image/'):
|
|
57
|
+
raise ValueError(f"For kind='image', mime_type must start with 'image/'. Got: {self.mime_type}")
|
|
58
|
+
|
|
59
|
+
elif self.kind == 'document' and not (
|
|
60
|
+
self.mime_type.startswith('application/') or
|
|
61
|
+
self.mime_type.startswith('text/') or
|
|
62
|
+
self.mime_type.startswith('document/')
|
|
63
|
+
):
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"For kind='document', mime_type must start with 'application/', 'text/', "
|
|
66
|
+
f"or 'document/'. Got: {self.mime_type}"
|
|
67
|
+
)
|
|
68
|
+
return self
|
|
69
|
+
|
|
70
|
+
class HttpMessageContentPart(BaseModel):
|
|
71
|
+
"""HTTP message content part for multi-part messages."""
|
|
72
|
+
type: Literal['text', 'image_url', 'file']
|
|
73
|
+
text: Optional[str] = None
|
|
74
|
+
image_url: Optional[Dict[str, Any]] = None
|
|
75
|
+
file: Optional[Dict[str, Any]] = None
|
|
76
|
+
|
|
77
|
+
@model_validator(mode='after')
|
|
78
|
+
def validate_content_consistency(self) -> 'HttpMessageContentPart':
|
|
79
|
+
"""Validate that exactly one field is populated and it matches the declared type."""
|
|
80
|
+
# Count non-None content fields
|
|
81
|
+
populated_fields = []
|
|
82
|
+
if self.text is not None:
|
|
83
|
+
populated_fields.append('text')
|
|
84
|
+
if self.image_url is not None:
|
|
85
|
+
populated_fields.append('image_url')
|
|
86
|
+
if self.file is not None:
|
|
87
|
+
populated_fields.append('file')
|
|
88
|
+
|
|
89
|
+
# Check if exactly one field is populated
|
|
90
|
+
if len(populated_fields) != 1:
|
|
91
|
+
raise ValueError(f"Exactly one content field must be populated. Found {len(populated_fields)}: {populated_fields}")
|
|
92
|
+
|
|
93
|
+
# Check that the populated field matches the declared type
|
|
94
|
+
populated_field = populated_fields[0]
|
|
95
|
+
if self.type == 'text' and populated_field != 'text':
|
|
96
|
+
raise ValueError(f"For type='text', the 'text' field must be populated, but found '{populated_field}' instead")
|
|
97
|
+
elif self.type == 'image_url' and populated_field != 'image_url':
|
|
98
|
+
raise ValueError(f"For type='image_url', the 'image_url' field must be populated, but found '{populated_field}' instead")
|
|
99
|
+
elif self.type == 'file' and populated_field != 'file':
|
|
100
|
+
raise ValueError(f"For type='file', the 'file' field must be populated, but found '{populated_field}' instead")
|
|
101
|
+
|
|
102
|
+
return self
|
|
103
|
+
|
|
18
104
|
# HTTP Message types
|
|
19
105
|
class HttpMessage(BaseModel):
|
|
20
106
|
"""HTTP message format for API requests."""
|
|
21
107
|
role: Literal['user', 'assistant', 'system', 'tool']
|
|
22
|
-
content: str
|
|
108
|
+
content: Union[str, List[HttpMessageContentPart]]
|
|
109
|
+
attachments: Optional[List[HttpAttachment]] = None
|
|
23
110
|
tool_call_id: Optional[str] = None
|
|
24
111
|
tool_calls: Optional[List[Dict[str, Any]]] = None
|
|
25
112
|
|
|
113
|
+
# Approval types for HITL
|
|
114
|
+
class ApprovalMessage(BaseModel):
|
|
115
|
+
"""Approval message for tool execution."""
|
|
116
|
+
type: Literal['approval'] = 'approval'
|
|
117
|
+
session_id: str = Field(..., description="Session ID for the approval")
|
|
118
|
+
tool_call_id: str = Field(..., description="ID of the tool call being approved")
|
|
119
|
+
approved: bool = Field(..., description="Whether the tool execution is approved")
|
|
120
|
+
additional_context: Optional[Dict[str, Any]] = Field(default=None, description="Additional context for the approval")
|
|
121
|
+
|
|
26
122
|
# Request types
|
|
27
123
|
class ChatRequest(BaseModel):
|
|
28
124
|
"""Request format for chat endpoints."""
|
|
@@ -33,6 +129,33 @@ class ChatRequest(BaseModel):
|
|
|
33
129
|
stream: bool = Field(default=False, description="Whether to stream the response")
|
|
34
130
|
conversation_id: Optional[str] = Field(default=None, description="Conversation ID for memory persistence")
|
|
35
131
|
memory: Optional[Dict[str, Any]] = Field(default=None, description="Memory configuration override")
|
|
132
|
+
store_on_completion: Optional[bool] = Field(default=None, description="Whether to store conversation on completion")
|
|
133
|
+
approvals: Optional[List[ApprovalMessage]] = Field(default=None, description="Approval decisions for tool calls")
|
|
134
|
+
|
|
135
|
+
# Interruption types for HITL
|
|
136
|
+
class ToolCallInterruption(BaseModel):
|
|
137
|
+
"""Tool call interruption data."""
|
|
138
|
+
id: str
|
|
139
|
+
type: Literal['function'] = 'function'
|
|
140
|
+
function: Dict[str, str] # name and arguments
|
|
141
|
+
|
|
142
|
+
class InterruptionData(BaseModel):
|
|
143
|
+
"""Interruption information."""
|
|
144
|
+
type: Literal['tool_approval'] = 'tool_approval'
|
|
145
|
+
tool_call: Optional[ToolCallInterruption]
|
|
146
|
+
session_id: str
|
|
147
|
+
|
|
148
|
+
# Base outcome types
|
|
149
|
+
class BaseOutcomeData(BaseModel):
|
|
150
|
+
"""Base outcome data."""
|
|
151
|
+
status: Literal['completed', 'error', 'max_turns', 'interrupted']
|
|
152
|
+
output: Optional[str] = None
|
|
153
|
+
error: Optional[Any] = None
|
|
154
|
+
|
|
155
|
+
class InterruptedOutcomeData(BaseOutcomeData):
|
|
156
|
+
"""Outcome data for interrupted runs."""
|
|
157
|
+
status: Literal['interrupted'] = 'interrupted'
|
|
158
|
+
interruptions: Optional[List[InterruptionData]] = None
|
|
36
159
|
|
|
37
160
|
# Response types
|
|
38
161
|
class CompletedChatData(BaseModel):
|
|
@@ -40,7 +163,7 @@ class CompletedChatData(BaseModel):
|
|
|
40
163
|
run_id: str
|
|
41
164
|
trace_id: str
|
|
42
165
|
messages: List[HttpMessage]
|
|
43
|
-
outcome:
|
|
166
|
+
outcome: BaseOutcomeData
|
|
44
167
|
turn_count: int
|
|
45
168
|
execution_time_ms: int
|
|
46
169
|
conversation_id: Optional[str] = None
|
|
@@ -123,6 +246,28 @@ class ServerConfig(Generic[Ctx]):
|
|
|
123
246
|
cors: Union[bool, Dict[str, Any]] = True
|
|
124
247
|
default_memory_provider: Optional[MemoryProvider] = None
|
|
125
248
|
|
|
249
|
+
# Approval response types
|
|
250
|
+
class PendingApprovalData(BaseModel):
|
|
251
|
+
"""Data for a pending approval."""
|
|
252
|
+
conversation_id: str
|
|
253
|
+
tool_call_id: str
|
|
254
|
+
tool_name: str
|
|
255
|
+
args: Dict[str, Any]
|
|
256
|
+
signature: Optional[str] = None
|
|
257
|
+
status: Literal['pending'] = 'pending'
|
|
258
|
+
session_id: Optional[str] = None
|
|
259
|
+
|
|
260
|
+
class PendingApprovalsData(BaseModel):
|
|
261
|
+
"""Data for pending approvals response."""
|
|
262
|
+
pending: List[PendingApprovalData]
|
|
263
|
+
|
|
264
|
+
class PendingApprovalsResponse(BaseModel):
|
|
265
|
+
"""Response format for pending approvals endpoint."""
|
|
266
|
+
success: bool
|
|
267
|
+
data: Optional[PendingApprovalsData] = None
|
|
268
|
+
error: Optional[str] = None
|
|
269
|
+
|
|
270
|
+
|
|
126
271
|
# Validation schemas
|
|
127
272
|
def validate_chat_request(data: Dict[str, Any]) -> ChatRequest:
|
|
128
273
|
"""Validate and parse a chat request."""
|
jaf/utils/__init__.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utility modules for the JAF framework.
|
|
3
|
+
|
|
4
|
+
This package provides various utility functions and classes for working
|
|
5
|
+
with attachments, document processing, and other common tasks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Import attachment utilities
|
|
9
|
+
from .attachments import (
|
|
10
|
+
make_image_attachment,
|
|
11
|
+
make_file_attachment,
|
|
12
|
+
make_document_attachment,
|
|
13
|
+
validate_attachment,
|
|
14
|
+
assert_non_empty_attachment,
|
|
15
|
+
AttachmentValidationError,
|
|
16
|
+
ATTACHMENT_LIMITS,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Import document processing utilities
|
|
20
|
+
from .document_processor import (
|
|
21
|
+
extract_document_content,
|
|
22
|
+
is_document_supported,
|
|
23
|
+
get_document_description,
|
|
24
|
+
get_missing_dependencies,
|
|
25
|
+
check_dependencies,
|
|
26
|
+
ProcessedDocument,
|
|
27
|
+
DocumentProcessingError,
|
|
28
|
+
NetworkError,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
__all__ = [
|
|
32
|
+
# Attachment utilities
|
|
33
|
+
'make_image_attachment',
|
|
34
|
+
'make_file_attachment',
|
|
35
|
+
'make_document_attachment',
|
|
36
|
+
'validate_attachment',
|
|
37
|
+
'assert_non_empty_attachment',
|
|
38
|
+
'AttachmentValidationError',
|
|
39
|
+
'ATTACHMENT_LIMITS',
|
|
40
|
+
|
|
41
|
+
# Document processing
|
|
42
|
+
'extract_document_content',
|
|
43
|
+
'is_document_supported',
|
|
44
|
+
'get_document_description',
|
|
45
|
+
'get_missing_dependencies',
|
|
46
|
+
'check_dependencies',
|
|
47
|
+
'ProcessedDocument',
|
|
48
|
+
'DocumentProcessingError',
|
|
49
|
+
'NetworkError',
|
|
50
|
+
]
|
jaf/utils/attachments.py
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Attachment validation and utility functions for the JAF framework.
|
|
3
|
+
|
|
4
|
+
This module provides type-safe attachment creation and validation with
|
|
5
|
+
comprehensive error handling and security checks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import base64
|
|
9
|
+
import re
|
|
10
|
+
from typing import Union, Optional, List
|
|
11
|
+
from urllib.parse import urlparse
|
|
12
|
+
|
|
13
|
+
from ..core.types import Attachment
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AttachmentValidationError(Exception):
|
|
17
|
+
"""Exception raised when attachment validation fails."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, message: str, field: Optional[str] = None):
|
|
20
|
+
super().__init__(message)
|
|
21
|
+
self.field = field
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Constants
|
|
25
|
+
MAX_ATTACHMENT_SIZE = 10 * 1024 * 1024 # 10MB
|
|
26
|
+
MAX_FILENAME_LENGTH = 255
|
|
27
|
+
BASE64_SIZE_RATIO = 0.75 # Base64 decoded size is approximately 3/4 of the encoded size
|
|
28
|
+
MAX_FORMAT_LENGTH = 10
|
|
29
|
+
|
|
30
|
+
ALLOWED_IMAGE_MIME_TYPES = [
|
|
31
|
+
'image/jpeg', 'image/jpg', 'image/png', 'image/gif',
|
|
32
|
+
'image/webp', 'image/bmp', 'image/svg+xml'
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
ALLOWED_DOCUMENT_MIME_TYPES = [
|
|
36
|
+
'application/pdf', 'text/plain', 'text/csv', 'application/json',
|
|
37
|
+
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
38
|
+
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _validate_base64(data: str) -> bool:
|
|
43
|
+
"""Validate base64 string format."""
|
|
44
|
+
try:
|
|
45
|
+
# Basic base64 pattern check
|
|
46
|
+
base64_pattern = re.compile(r'^[A-Za-z0-9+/]*={0,2}$')
|
|
47
|
+
if not base64_pattern.match(data):
|
|
48
|
+
return False
|
|
49
|
+
|
|
50
|
+
# Try to decode to verify it's valid base64
|
|
51
|
+
decoded = base64.b64decode(data)
|
|
52
|
+
reencoded = base64.b64encode(decoded).decode('ascii')
|
|
53
|
+
|
|
54
|
+
# Account for padding differences
|
|
55
|
+
normalized_input = data.rstrip('=')
|
|
56
|
+
normalized_reencoded = reencoded.rstrip('=')
|
|
57
|
+
|
|
58
|
+
return normalized_input == normalized_reencoded
|
|
59
|
+
except Exception:
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _validate_attachment_size(data: Optional[str]) -> None:
|
|
64
|
+
"""Validate attachment size doesn't exceed limits."""
|
|
65
|
+
if data:
|
|
66
|
+
# Calculate exact decoded size for base64 data
|
|
67
|
+
# Remove padding to get accurate count
|
|
68
|
+
data_without_padding = data.rstrip('=')
|
|
69
|
+
# Each 4 base64 chars encode 3 bytes, with the last group potentially having padding
|
|
70
|
+
exact_groups = len(data_without_padding) // 4
|
|
71
|
+
remaining_chars = len(data_without_padding) % 4
|
|
72
|
+
|
|
73
|
+
decoded_size = exact_groups * 3
|
|
74
|
+
if remaining_chars == 2:
|
|
75
|
+
decoded_size += 1 # 2 chars = 1 byte
|
|
76
|
+
elif remaining_chars == 3:
|
|
77
|
+
decoded_size += 2 # 3 chars = 2 bytes
|
|
78
|
+
|
|
79
|
+
if decoded_size > MAX_ATTACHMENT_SIZE:
|
|
80
|
+
size_mb = round(decoded_size / 1024 / 1024, 2)
|
|
81
|
+
max_mb = MAX_ATTACHMENT_SIZE // 1024 // 1024
|
|
82
|
+
raise AttachmentValidationError(
|
|
83
|
+
f"Attachment size ({size_mb}MB) exceeds maximum allowed size ({max_mb}MB)"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _validate_filename(name: Optional[str]) -> None:
|
|
88
|
+
"""Validate filename for security and length constraints."""
|
|
89
|
+
if not name:
|
|
90
|
+
return
|
|
91
|
+
|
|
92
|
+
if len(name) > MAX_FILENAME_LENGTH:
|
|
93
|
+
raise AttachmentValidationError(
|
|
94
|
+
f"Filename length ({len(name)}) exceeds maximum allowed length ({MAX_FILENAME_LENGTH})"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Check for dangerous characters and control characters
|
|
98
|
+
dangerous_chars = re.compile(r'[<>:"|?*]')
|
|
99
|
+
control_chars = re.compile(r'[\x00-\x1f]')
|
|
100
|
+
|
|
101
|
+
if dangerous_chars.search(name) or control_chars.search(name):
|
|
102
|
+
raise AttachmentValidationError('Filename contains invalid characters')
|
|
103
|
+
|
|
104
|
+
# Check for path traversal attempts
|
|
105
|
+
if '..' in name or '/' in name or '\\' in name:
|
|
106
|
+
raise AttachmentValidationError(
|
|
107
|
+
'Filename cannot contain path separators or traversal sequences'
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _validate_mime_type(mime_type: Optional[str], allowed_types: List[str], kind: str) -> None:
|
|
112
|
+
"""Validate MIME type against allowed types."""
|
|
113
|
+
if mime_type:
|
|
114
|
+
# Normalize the input mime_type
|
|
115
|
+
normalized_mime_type = mime_type.lower().strip()
|
|
116
|
+
|
|
117
|
+
# Normalize the allowed types list
|
|
118
|
+
normalized_allowed_types = {t.lower().strip() for t in allowed_types}
|
|
119
|
+
|
|
120
|
+
if normalized_mime_type not in normalized_allowed_types:
|
|
121
|
+
raise AttachmentValidationError(
|
|
122
|
+
f"MIME type '{mime_type}' is not allowed for {kind} attachments. "
|
|
123
|
+
f"Allowed types: {', '.join(allowed_types)}"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _validate_url(url: Optional[str]) -> None:
|
|
128
|
+
"""Validate URL format and protocol."""
|
|
129
|
+
if not url:
|
|
130
|
+
return
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
parsed = urlparse(url)
|
|
134
|
+
allowed_protocols = ['http', 'https', 'data']
|
|
135
|
+
|
|
136
|
+
if parsed.scheme not in allowed_protocols:
|
|
137
|
+
raise AttachmentValidationError(
|
|
138
|
+
f"URL protocol '{parsed.scheme}' is not allowed. "
|
|
139
|
+
f"Allowed protocols: {', '.join(allowed_protocols)}"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Additional validation for data URLs
|
|
143
|
+
if parsed.scheme == 'data':
|
|
144
|
+
# For data URLs, the "path" component in urlparse contains the mediatype and data
|
|
145
|
+
# Proper data URL format: mediatype[;charset][;base64],data
|
|
146
|
+
data_content_pattern = re.compile(r'^([^;,]+)(;[^;,]+)*(;base64)?,(.+)$')
|
|
147
|
+
data_content = parsed.path
|
|
148
|
+
|
|
149
|
+
# Some URLs might have query components that are part of the data
|
|
150
|
+
if parsed.query:
|
|
151
|
+
data_content += "?" + parsed.query
|
|
152
|
+
|
|
153
|
+
if not data_content_pattern.match(data_content):
|
|
154
|
+
raise AttachmentValidationError(
|
|
155
|
+
'Invalid data URL format: must match mediatype[;charset][;base64],data pattern'
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
except ValueError as e:
|
|
159
|
+
raise AttachmentValidationError(f"Invalid URL: {e}")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _process_base64_data(data: Union[bytes, str, None]) -> Optional[str]:
|
|
163
|
+
"""Process and validate base64 data."""
|
|
164
|
+
if not data:
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
if isinstance(data, bytes):
|
|
168
|
+
base64_str = base64.b64encode(data).decode('ascii')
|
|
169
|
+
else:
|
|
170
|
+
base64_str = data
|
|
171
|
+
|
|
172
|
+
# Validate base64 format if it was provided as string
|
|
173
|
+
if isinstance(data, str) and not _validate_base64(base64_str):
|
|
174
|
+
raise AttachmentValidationError('Invalid base64 data format')
|
|
175
|
+
|
|
176
|
+
return base64_str
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def make_image_attachment(
|
|
180
|
+
data: Union[bytes, str, None] = None,
|
|
181
|
+
url: Optional[str] = None,
|
|
182
|
+
mime_type: Optional[str] = None,
|
|
183
|
+
name: Optional[str] = None
|
|
184
|
+
) -> Attachment:
|
|
185
|
+
"""
|
|
186
|
+
Create a validated image attachment.
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
data: Raw bytes or base64 string
|
|
190
|
+
url: Remote or data URL
|
|
191
|
+
mime_type: MIME type (e.g., 'image/png')
|
|
192
|
+
name: Optional filename
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
Validated Attachment object
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
AttachmentValidationError: If validation fails
|
|
199
|
+
"""
|
|
200
|
+
# Validate inputs
|
|
201
|
+
_validate_filename(name)
|
|
202
|
+
_validate_url(url)
|
|
203
|
+
_validate_mime_type(mime_type, ALLOWED_IMAGE_MIME_TYPES, 'image')
|
|
204
|
+
|
|
205
|
+
# Process data to base64 first, so we can validate size for both bytes and string inputs
|
|
206
|
+
base64_data = _process_base64_data(data)
|
|
207
|
+
|
|
208
|
+
# Validate size if we have data
|
|
209
|
+
if base64_data:
|
|
210
|
+
_validate_attachment_size(base64_data)
|
|
211
|
+
|
|
212
|
+
# Ensure at least one content source
|
|
213
|
+
if not url and not base64_data:
|
|
214
|
+
raise AttachmentValidationError('Image attachment must have either url or data')
|
|
215
|
+
|
|
216
|
+
return Attachment(
|
|
217
|
+
kind='image',
|
|
218
|
+
mime_type=mime_type,
|
|
219
|
+
name=name,
|
|
220
|
+
url=url,
|
|
221
|
+
data=base64_data
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def make_file_attachment(
|
|
226
|
+
data: Union[bytes, str, None] = None,
|
|
227
|
+
url: Optional[str] = None,
|
|
228
|
+
mime_type: Optional[str] = None,
|
|
229
|
+
name: Optional[str] = None,
|
|
230
|
+
format: Optional[str] = None
|
|
231
|
+
) -> Attachment:
|
|
232
|
+
"""
|
|
233
|
+
Create a validated file attachment.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
data: Raw bytes or base64 string
|
|
237
|
+
url: Remote or data URL
|
|
238
|
+
mime_type: MIME type
|
|
239
|
+
name: Optional filename
|
|
240
|
+
format: Optional format identifier (e.g., 'pdf', 'txt')
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Validated Attachment object
|
|
244
|
+
|
|
245
|
+
Raises:
|
|
246
|
+
AttachmentValidationError: If validation fails
|
|
247
|
+
"""
|
|
248
|
+
# Validate inputs
|
|
249
|
+
_validate_filename(name)
|
|
250
|
+
_validate_url(url)
|
|
251
|
+
|
|
252
|
+
# Process data to base64 first, so we can validate size for both bytes and string inputs
|
|
253
|
+
base64_data = _process_base64_data(data)
|
|
254
|
+
|
|
255
|
+
# Validate size if we have data
|
|
256
|
+
if base64_data:
|
|
257
|
+
_validate_attachment_size(base64_data)
|
|
258
|
+
|
|
259
|
+
# Ensure at least one content source
|
|
260
|
+
if not url and not base64_data:
|
|
261
|
+
raise AttachmentValidationError('File attachment must have either url or data')
|
|
262
|
+
|
|
263
|
+
# Validate format if provided
|
|
264
|
+
if format and len(format) > MAX_FORMAT_LENGTH:
|
|
265
|
+
raise AttachmentValidationError('File format must be 10 characters or less')
|
|
266
|
+
|
|
267
|
+
return Attachment(
|
|
268
|
+
kind='file',
|
|
269
|
+
mime_type=mime_type,
|
|
270
|
+
name=name,
|
|
271
|
+
url=url,
|
|
272
|
+
data=base64_data,
|
|
273
|
+
format=format
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def make_document_attachment(
|
|
278
|
+
data: Union[bytes, str, None] = None,
|
|
279
|
+
url: Optional[str] = None,
|
|
280
|
+
mime_type: Optional[str] = None,
|
|
281
|
+
name: Optional[str] = None,
|
|
282
|
+
format: Optional[str] = None,
|
|
283
|
+
use_litellm_format: Optional[bool] = None
|
|
284
|
+
) -> Attachment:
|
|
285
|
+
"""
|
|
286
|
+
Create a validated document attachment.
|
|
287
|
+
|
|
288
|
+
Args:
|
|
289
|
+
data: Raw bytes or base64 string
|
|
290
|
+
url: Remote or data URL
|
|
291
|
+
mime_type: MIME type
|
|
292
|
+
name: Optional filename
|
|
293
|
+
format: Optional format identifier
|
|
294
|
+
use_litellm_format: Whether to use LiteLLM native format
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Validated Attachment object
|
|
298
|
+
|
|
299
|
+
Raises:
|
|
300
|
+
AttachmentValidationError: If validation fails
|
|
301
|
+
"""
|
|
302
|
+
# Additional validation for documents
|
|
303
|
+
_validate_mime_type(mime_type, ALLOWED_DOCUMENT_MIME_TYPES, 'document')
|
|
304
|
+
|
|
305
|
+
attachment = make_file_attachment(
|
|
306
|
+
data=data,
|
|
307
|
+
url=url,
|
|
308
|
+
mime_type=mime_type,
|
|
309
|
+
name=name,
|
|
310
|
+
format=format
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
return Attachment(
|
|
314
|
+
kind='document',
|
|
315
|
+
mime_type=attachment.mime_type,
|
|
316
|
+
name=attachment.name,
|
|
317
|
+
url=attachment.url,
|
|
318
|
+
data=attachment.data,
|
|
319
|
+
format=attachment.format,
|
|
320
|
+
use_litellm_format=use_litellm_format
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def validate_attachment(attachment: Attachment) -> None:
|
|
325
|
+
"""
|
|
326
|
+
Validate an existing attachment object.
|
|
327
|
+
|
|
328
|
+
Args:
|
|
329
|
+
attachment: Attachment to validate
|
|
330
|
+
|
|
331
|
+
Raises:
|
|
332
|
+
AttachmentValidationError: If validation fails
|
|
333
|
+
"""
|
|
334
|
+
try:
|
|
335
|
+
if not attachment.url and not attachment.data:
|
|
336
|
+
raise AttachmentValidationError(
|
|
337
|
+
'Attachment must have either url or data',
|
|
338
|
+
field='url/data'
|
|
339
|
+
)
|
|
340
|
+
|
|
341
|
+
if attachment.name:
|
|
342
|
+
try:
|
|
343
|
+
_validate_filename(attachment.name)
|
|
344
|
+
except AttachmentValidationError as e:
|
|
345
|
+
raise AttachmentValidationError(f"Invalid filename: {e}", field='name') from e
|
|
346
|
+
|
|
347
|
+
if attachment.url:
|
|
348
|
+
try:
|
|
349
|
+
_validate_url(attachment.url)
|
|
350
|
+
except AttachmentValidationError as e:
|
|
351
|
+
raise AttachmentValidationError(f"Invalid URL: {e}", field='url') from e
|
|
352
|
+
|
|
353
|
+
if attachment.data:
|
|
354
|
+
try:
|
|
355
|
+
_validate_attachment_size(attachment.data)
|
|
356
|
+
except AttachmentValidationError as e:
|
|
357
|
+
raise AttachmentValidationError(f"Size validation failed: {e}", field='data') from e
|
|
358
|
+
|
|
359
|
+
if not _validate_base64(attachment.data):
|
|
360
|
+
raise AttachmentValidationError(
|
|
361
|
+
'Invalid base64 data format in attachment',
|
|
362
|
+
field='data'
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
# Validate MIME type based on attachment kind
|
|
366
|
+
if attachment.kind == 'image':
|
|
367
|
+
try:
|
|
368
|
+
_validate_mime_type(attachment.mime_type, ALLOWED_IMAGE_MIME_TYPES, 'image')
|
|
369
|
+
except AttachmentValidationError as e:
|
|
370
|
+
raise AttachmentValidationError(f"Image MIME type validation failed: {e}", field='mime_type') from e
|
|
371
|
+
elif attachment.kind == 'document':
|
|
372
|
+
try:
|
|
373
|
+
_validate_mime_type(attachment.mime_type, ALLOWED_DOCUMENT_MIME_TYPES, 'document')
|
|
374
|
+
except AttachmentValidationError as e:
|
|
375
|
+
raise AttachmentValidationError(f"Document MIME type validation failed: {e}", field='mime_type') from e
|
|
376
|
+
elif attachment.kind == 'file':
|
|
377
|
+
# Files can have any MIME type, but still validate format
|
|
378
|
+
if attachment.format and len(attachment.format) > MAX_FORMAT_LENGTH:
|
|
379
|
+
raise AttachmentValidationError(
|
|
380
|
+
f'File format "{attachment.format}" exceeds maximum length of {MAX_FORMAT_LENGTH} characters',
|
|
381
|
+
field='format'
|
|
382
|
+
)
|
|
383
|
+
except AttachmentValidationError:
|
|
384
|
+
raise
|
|
385
|
+
except Exception as e:
|
|
386
|
+
raise AttachmentValidationError(f"Unexpected validation error: {e}") from e
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
# Legacy function for backwards compatibility
|
|
390
|
+
def assert_non_empty_attachment(attachment: Attachment) -> None:
|
|
391
|
+
"""Legacy function for backwards compatibility."""
|
|
392
|
+
validate_attachment(attachment)
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# Export validation constants for external use
|
|
396
|
+
ATTACHMENT_LIMITS = {
|
|
397
|
+
'MAX_SIZE': MAX_ATTACHMENT_SIZE,
|
|
398
|
+
'MAX_FILENAME_LENGTH': MAX_FILENAME_LENGTH,
|
|
399
|
+
'ALLOWED_IMAGE_MIME_TYPES': ALLOWED_IMAGE_MIME_TYPES,
|
|
400
|
+
'ALLOWED_DOCUMENT_MIME_TYPES': ALLOWED_DOCUMENT_MIME_TYPES,
|
|
401
|
+
}
|