letta-nightly 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250614104137__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +11 -1
- letta/agents/base_agent.py +11 -4
- letta/agents/ephemeral_summary_agent.py +3 -2
- letta/agents/letta_agent.py +109 -78
- letta/agents/letta_agent_batch.py +4 -3
- letta/agents/voice_agent.py +3 -3
- letta/agents/voice_sleeptime_agent.py +3 -2
- letta/client/client.py +6 -3
- letta/constants.py +6 -0
- letta/data_sources/connectors.py +3 -5
- letta/functions/async_composio_toolset.py +4 -1
- letta/functions/function_sets/files.py +4 -3
- letta/functions/schema_generator.py +5 -2
- letta/groups/sleeptime_multi_agent_v2.py +4 -3
- letta/helpers/converters.py +7 -1
- letta/helpers/message_helper.py +31 -11
- letta/helpers/tool_rule_solver.py +69 -4
- letta/interfaces/anthropic_streaming_interface.py +8 -1
- letta/interfaces/openai_streaming_interface.py +4 -1
- letta/llm_api/anthropic_client.py +4 -4
- letta/llm_api/openai_client.py +56 -11
- letta/local_llm/utils.py +3 -20
- letta/orm/sqlalchemy_base.py +7 -1
- letta/otel/metric_registry.py +26 -0
- letta/otel/metrics.py +78 -14
- letta/schemas/letta_message_content.py +64 -3
- letta/schemas/letta_request.py +5 -1
- letta/schemas/message.py +61 -14
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/providers.py +41 -14
- letta/schemas/tool_rule.py +67 -0
- letta/schemas/user.py +2 -2
- letta/server/rest_api/routers/v1/agents.py +22 -12
- letta/server/rest_api/routers/v1/sources.py +13 -25
- letta/server/server.py +10 -5
- letta/services/agent_manager.py +5 -1
- letta/services/file_manager.py +219 -0
- letta/services/file_processor/chunker/line_chunker.py +119 -14
- letta/services/file_processor/file_processor.py +8 -8
- letta/services/file_processor/file_types.py +303 -0
- letta/services/file_processor/parser/mistral_parser.py +2 -11
- letta/services/helpers/agent_manager_helper.py +6 -0
- letta/services/message_manager.py +32 -0
- letta/services/organization_manager.py +4 -6
- letta/services/passage_manager.py +1 -0
- letta/services/source_manager.py +0 -208
- letta/services/tool_executor/composio_tool_executor.py +5 -1
- letta/services/tool_executor/files_tool_executor.py +291 -15
- letta/services/user_manager.py +8 -8
- letta/system.py +3 -1
- letta/utils.py +7 -13
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/METADATA +2 -2
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/RECORD +57 -55
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/entry_points.txt +0 -0
@@ -1,34 +1,139 @@
|
|
1
|
+
import re
|
1
2
|
from typing import List, Optional
|
2
3
|
|
3
4
|
from letta.log import get_logger
|
5
|
+
from letta.schemas.file import FileMetadata
|
6
|
+
from letta.services.file_processor.file_types import ChunkingStrategy, file_type_registry
|
4
7
|
|
5
8
|
logger = get_logger(__name__)
|
6
9
|
|
7
10
|
|
8
11
|
class LineChunker:
|
9
|
-
"""
|
12
|
+
"""Content-aware line chunker that adapts chunking strategy based on file type"""
|
10
13
|
|
11
14
|
def __init__(self):
|
12
|
-
|
15
|
+
self.file_type_registry = file_type_registry
|
13
16
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
def _determine_chunking_strategy(self, file_metadata: FileMetadata) -> ChunkingStrategy:
|
18
|
+
"""Determine the best chunking strategy based on file metadata"""
|
19
|
+
# Try to get strategy from MIME type first
|
20
|
+
if file_metadata.file_type:
|
21
|
+
try:
|
22
|
+
return self.file_type_registry.get_chunking_strategy_by_mime_type(file_metadata.file_type)
|
23
|
+
except Exception:
|
24
|
+
pass
|
19
25
|
|
20
|
-
|
26
|
+
# Fallback to filename extension
|
27
|
+
if file_metadata.file_name:
|
28
|
+
try:
|
29
|
+
# Extract extension from filename
|
30
|
+
import os
|
31
|
+
|
32
|
+
_, ext = os.path.splitext(file_metadata.file_name)
|
33
|
+
if ext:
|
34
|
+
return self.file_type_registry.get_chunking_strategy_by_extension(ext)
|
35
|
+
except Exception:
|
36
|
+
pass
|
37
|
+
|
38
|
+
# Default fallback
|
39
|
+
return ChunkingStrategy.LINE_BASED
|
40
|
+
|
41
|
+
def _chunk_by_lines(self, text: str, preserve_indentation: bool = False) -> List[str]:
|
42
|
+
"""Traditional line-based chunking for code and structured data"""
|
43
|
+
lines = []
|
44
|
+
for line in text.splitlines():
|
45
|
+
if preserve_indentation:
|
46
|
+
# For code: preserve leading whitespace (indentation), remove trailing whitespace
|
47
|
+
line = line.rstrip()
|
48
|
+
# Only skip completely empty lines
|
49
|
+
if line:
|
50
|
+
lines.append(line)
|
51
|
+
else:
|
52
|
+
# For structured data: strip all whitespace
|
53
|
+
line = line.strip()
|
54
|
+
if line:
|
55
|
+
lines.append(line)
|
56
|
+
return lines
|
57
|
+
|
58
|
+
def _chunk_by_sentences(self, text: str) -> List[str]:
|
59
|
+
"""Sentence-based chunking for documentation and markup"""
|
60
|
+
# Simple sentence splitting on periods, exclamation marks, and question marks
|
61
|
+
# followed by whitespace or end of string
|
62
|
+
sentence_pattern = r"(?<=[.!?])\s+(?=[A-Z])"
|
63
|
+
|
64
|
+
# Split text into sentences
|
65
|
+
sentences = re.split(sentence_pattern, text.strip())
|
66
|
+
|
67
|
+
# Clean up sentences - remove extra whitespace and empty sentences
|
68
|
+
cleaned_sentences = []
|
69
|
+
for sentence in sentences:
|
70
|
+
sentence = re.sub(r"\s+", " ", sentence.strip()) # Normalize whitespace
|
71
|
+
if sentence:
|
72
|
+
cleaned_sentences.append(sentence)
|
73
|
+
|
74
|
+
return cleaned_sentences
|
75
|
+
|
76
|
+
def _chunk_by_characters(self, text: str, target_line_length: int = 100) -> List[str]:
|
77
|
+
"""Character-based wrapping for prose text"""
|
78
|
+
words = text.split()
|
79
|
+
lines = []
|
80
|
+
current_line = []
|
81
|
+
current_length = 0
|
82
|
+
|
83
|
+
for word in words:
|
84
|
+
# Check if adding this word would exceed the target length
|
85
|
+
word_length = len(word)
|
86
|
+
if current_length + word_length + len(current_line) > target_line_length and current_line:
|
87
|
+
# Start a new line
|
88
|
+
lines.append(" ".join(current_line))
|
89
|
+
current_line = [word]
|
90
|
+
current_length = word_length
|
91
|
+
else:
|
92
|
+
current_line.append(word)
|
93
|
+
current_length += word_length
|
94
|
+
|
95
|
+
# Add the last line if there's content
|
96
|
+
if current_line:
|
97
|
+
lines.append(" ".join(current_line))
|
98
|
+
|
99
|
+
return [line for line in lines if line.strip()]
|
100
|
+
|
101
|
+
def chunk_text(
|
102
|
+
self, text: str, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True
|
103
|
+
) -> List[str]:
|
104
|
+
"""Content-aware text chunking based on file type"""
|
105
|
+
strategy = self._determine_chunking_strategy(file_metadata)
|
106
|
+
|
107
|
+
# Apply the appropriate chunking strategy
|
108
|
+
if strategy == ChunkingStrategy.DOCUMENTATION:
|
109
|
+
content_lines = self._chunk_by_sentences(text)
|
110
|
+
elif strategy == ChunkingStrategy.PROSE:
|
111
|
+
content_lines = self._chunk_by_characters(text)
|
112
|
+
elif strategy == ChunkingStrategy.CODE:
|
113
|
+
content_lines = self._chunk_by_lines(text, preserve_indentation=True)
|
114
|
+
else: # STRUCTURED_DATA or LINE_BASED
|
115
|
+
content_lines = self._chunk_by_lines(text, preserve_indentation=False)
|
116
|
+
|
117
|
+
total_chunks = len(content_lines)
|
118
|
+
|
119
|
+
# Handle start/end slicing
|
120
|
+
if start is not None and end is not None:
|
21
121
|
content_lines = content_lines[start:end]
|
22
122
|
line_offset = start
|
23
123
|
else:
|
24
124
|
line_offset = 0
|
25
125
|
|
26
|
-
|
126
|
+
# Add line numbers for all strategies
|
127
|
+
content_lines = [f"{i + line_offset}: {line}" for i, line in enumerate(content_lines)]
|
27
128
|
|
28
|
-
# Add metadata about total
|
29
|
-
if
|
30
|
-
|
31
|
-
|
32
|
-
|
129
|
+
# Add metadata about total chunks
|
130
|
+
if add_metadata:
|
131
|
+
chunk_type = (
|
132
|
+
"sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
|
133
|
+
)
|
134
|
+
if start is not None and end is not None:
|
135
|
+
content_lines.insert(0, f"[Viewing {chunk_type} {start} to {end-1} (out of {total_chunks} {chunk_type})]")
|
136
|
+
else:
|
137
|
+
content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]")
|
33
138
|
|
34
139
|
return content_lines
|
@@ -11,6 +11,7 @@ from letta.schemas.job import Job, JobUpdate
|
|
11
11
|
from letta.schemas.passage import Passage
|
12
12
|
from letta.schemas.user import User
|
13
13
|
from letta.server.server import SyncServer
|
14
|
+
from letta.services.file_manager import FileManager
|
14
15
|
from letta.services.file_processor.chunker.line_chunker import LineChunker
|
15
16
|
from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
|
16
17
|
from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
|
@@ -38,6 +39,7 @@ class FileProcessor:
|
|
38
39
|
self.line_chunker = LineChunker()
|
39
40
|
self.embedder = embedder
|
40
41
|
self.max_file_size = max_file_size
|
42
|
+
self.file_manager = FileManager()
|
41
43
|
self.source_manager = SourceManager()
|
42
44
|
self.passage_manager = PassageManager()
|
43
45
|
self.job_manager = JobManager()
|
@@ -58,7 +60,7 @@ class FileProcessor:
|
|
58
60
|
|
59
61
|
# Create file as early as possible with no content
|
60
62
|
file_metadata.processing_status = FileProcessingStatus.PARSING # Parsing now
|
61
|
-
file_metadata = await self.
|
63
|
+
file_metadata = await self.file_manager.create_file(file_metadata, self.actor)
|
62
64
|
|
63
65
|
try:
|
64
66
|
# Ensure we're working with bytes
|
@@ -73,16 +75,14 @@ class FileProcessor:
|
|
73
75
|
|
74
76
|
# update file with raw text
|
75
77
|
raw_markdown_text = "".join([page.markdown for page in ocr_response.pages])
|
76
|
-
file_metadata = await self.
|
77
|
-
|
78
|
-
)
|
79
|
-
file_metadata = await self.source_manager.update_file_status(
|
78
|
+
file_metadata = await self.file_manager.upsert_file_content(file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor)
|
79
|
+
file_metadata = await self.file_manager.update_file_status(
|
80
80
|
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.EMBEDDING
|
81
81
|
)
|
82
82
|
|
83
83
|
# Insert to agent context window
|
84
84
|
# TODO: Rethink this line chunking mechanism
|
85
|
-
content_lines = self.line_chunker.chunk_text(text=raw_markdown_text)
|
85
|
+
content_lines = self.line_chunker.chunk_text(text=raw_markdown_text, file_metadata=file_metadata)
|
86
86
|
visible_content = "\n".join(content_lines)
|
87
87
|
|
88
88
|
await server.insert_file_into_context_windows(
|
@@ -123,7 +123,7 @@ class FileProcessor:
|
|
123
123
|
job.metadata["num_passages"] = len(all_passages)
|
124
124
|
await self.job_manager.update_job_by_id_async(job_id=job.id, job_update=JobUpdate(**job.model_dump()), actor=self.actor)
|
125
125
|
|
126
|
-
await self.
|
126
|
+
await self.file_manager.update_file_status(
|
127
127
|
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
|
128
128
|
)
|
129
129
|
|
@@ -138,7 +138,7 @@ class FileProcessor:
|
|
138
138
|
job.metadata["error"] = str(e)
|
139
139
|
await self.job_manager.update_job_by_id_async(job_id=job.id, job_update=JobUpdate(**job.model_dump()), actor=self.actor)
|
140
140
|
|
141
|
-
await self.
|
141
|
+
await self.file_manager.update_file_status(
|
142
142
|
file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.ERROR, error_message=str(e)
|
143
143
|
)
|
144
144
|
|
@@ -0,0 +1,303 @@
|
|
1
|
+
"""
|
2
|
+
Centralized file type configuration for supported file formats.
|
3
|
+
|
4
|
+
This module provides a single source of truth for file type definitions,
|
5
|
+
mime types, and file processing capabilities across the Letta codebase.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import mimetypes
|
9
|
+
from dataclasses import dataclass
|
10
|
+
from enum import Enum
|
11
|
+
from typing import Dict, Set
|
12
|
+
|
13
|
+
|
14
|
+
class ChunkingStrategy(str, Enum):
|
15
|
+
"""Enum for different file chunking strategies."""
|
16
|
+
|
17
|
+
CODE = "code" # Line-based chunking for code files
|
18
|
+
STRUCTURED_DATA = "structured_data" # Line-based chunking for JSON, XML, etc.
|
19
|
+
DOCUMENTATION = "documentation" # Paragraph-aware chunking for Markdown, HTML
|
20
|
+
PROSE = "prose" # Character-based wrapping for plain text
|
21
|
+
LINE_BASED = "line_based" # Default line-based chunking
|
22
|
+
|
23
|
+
|
24
|
+
@dataclass
|
25
|
+
class FileTypeInfo:
|
26
|
+
"""Information about a supported file type."""
|
27
|
+
|
28
|
+
extension: str
|
29
|
+
mime_type: str
|
30
|
+
is_simple_text: bool
|
31
|
+
description: str
|
32
|
+
chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED
|
33
|
+
|
34
|
+
|
35
|
+
class FileTypeRegistry:
|
36
|
+
"""Central registry for supported file types."""
|
37
|
+
|
38
|
+
def __init__(self):
|
39
|
+
"""Initialize the registry with default supported file types."""
|
40
|
+
self._file_types: Dict[str, FileTypeInfo] = {}
|
41
|
+
self._register_default_types()
|
42
|
+
|
43
|
+
def _register_default_types(self) -> None:
|
44
|
+
"""Register all default supported file types."""
|
45
|
+
# Document formats
|
46
|
+
self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED)
|
47
|
+
self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE)
|
48
|
+
self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
|
49
|
+
self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
|
50
|
+
self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA)
|
51
|
+
self.register(".jsonl", "application/jsonl", True, "JSON Lines file", ChunkingStrategy.STRUCTURED_DATA)
|
52
|
+
|
53
|
+
# Programming languages
|
54
|
+
self.register(".py", "text/x-python", True, "Python source code", ChunkingStrategy.CODE)
|
55
|
+
self.register(".js", "text/javascript", True, "JavaScript source code", ChunkingStrategy.CODE)
|
56
|
+
self.register(".ts", "text/x-typescript", True, "TypeScript source code", ChunkingStrategy.CODE)
|
57
|
+
self.register(".java", "text/x-java-source", True, "Java source code", ChunkingStrategy.CODE)
|
58
|
+
self.register(".cpp", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
|
59
|
+
self.register(".cxx", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
|
60
|
+
self.register(".c", "text/x-c", True, "C source code", ChunkingStrategy.CODE)
|
61
|
+
self.register(".h", "text/x-c", True, "C/C++ header file", ChunkingStrategy.CODE)
|
62
|
+
self.register(".cs", "text/x-csharp", True, "C# source code", ChunkingStrategy.CODE)
|
63
|
+
self.register(".php", "text/x-php", True, "PHP source code", ChunkingStrategy.CODE)
|
64
|
+
self.register(".rb", "text/x-ruby", True, "Ruby source code", ChunkingStrategy.CODE)
|
65
|
+
self.register(".go", "text/x-go", True, "Go source code", ChunkingStrategy.CODE)
|
66
|
+
self.register(".rs", "text/x-rust", True, "Rust source code", ChunkingStrategy.CODE)
|
67
|
+
self.register(".swift", "text/x-swift", True, "Swift source code", ChunkingStrategy.CODE)
|
68
|
+
self.register(".kt", "text/x-kotlin", True, "Kotlin source code", ChunkingStrategy.CODE)
|
69
|
+
self.register(".scala", "text/x-scala", True, "Scala source code", ChunkingStrategy.CODE)
|
70
|
+
self.register(".r", "text/x-r", True, "R source code", ChunkingStrategy.CODE)
|
71
|
+
self.register(".m", "text/x-objective-c", True, "Objective-C source code", ChunkingStrategy.CODE)
|
72
|
+
|
73
|
+
# Web technologies
|
74
|
+
self.register(".html", "text/html", True, "HTML document", ChunkingStrategy.CODE)
|
75
|
+
self.register(".htm", "text/html", True, "HTML document", ChunkingStrategy.CODE)
|
76
|
+
self.register(".css", "text/css", True, "CSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
|
77
|
+
self.register(".scss", "text/x-scss", True, "SCSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
|
78
|
+
self.register(".sass", "text/x-sass", True, "Sass stylesheet", ChunkingStrategy.STRUCTURED_DATA)
|
79
|
+
self.register(".less", "text/x-less", True, "Less stylesheet", ChunkingStrategy.STRUCTURED_DATA)
|
80
|
+
self.register(".vue", "text/x-vue", True, "Vue.js component", ChunkingStrategy.CODE)
|
81
|
+
self.register(".jsx", "text/x-jsx", True, "JSX source code", ChunkingStrategy.CODE)
|
82
|
+
self.register(".tsx", "text/x-tsx", True, "TSX source code", ChunkingStrategy.CODE)
|
83
|
+
|
84
|
+
# Configuration and data formats
|
85
|
+
self.register(".xml", "application/xml", True, "XML document", ChunkingStrategy.STRUCTURED_DATA)
|
86
|
+
self.register(".yaml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
|
87
|
+
self.register(".yml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
|
88
|
+
self.register(".toml", "application/toml", True, "TOML configuration", ChunkingStrategy.STRUCTURED_DATA)
|
89
|
+
self.register(".ini", "text/x-ini", True, "INI configuration", ChunkingStrategy.STRUCTURED_DATA)
|
90
|
+
self.register(".cfg", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
|
91
|
+
self.register(".conf", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
|
92
|
+
|
93
|
+
# Scripts and SQL
|
94
|
+
self.register(".sh", "text/x-shellscript", True, "Shell script", ChunkingStrategy.CODE)
|
95
|
+
self.register(".bash", "text/x-shellscript", True, "Bash script", ChunkingStrategy.CODE)
|
96
|
+
self.register(".ps1", "text/x-powershell", True, "PowerShell script", ChunkingStrategy.CODE)
|
97
|
+
self.register(".bat", "text/x-batch", True, "Batch script", ChunkingStrategy.CODE)
|
98
|
+
self.register(".cmd", "text/x-batch", True, "Command script", ChunkingStrategy.CODE)
|
99
|
+
self.register(".dockerfile", "text/x-dockerfile", True, "Dockerfile", ChunkingStrategy.CODE)
|
100
|
+
self.register(".sql", "text/x-sql", True, "SQL script", ChunkingStrategy.CODE)
|
101
|
+
|
102
|
+
def register(
|
103
|
+
self,
|
104
|
+
extension: str,
|
105
|
+
mime_type: str,
|
106
|
+
is_simple_text: bool,
|
107
|
+
description: str,
|
108
|
+
chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED,
|
109
|
+
) -> None:
|
110
|
+
"""
|
111
|
+
Register a new file type.
|
112
|
+
|
113
|
+
Args:
|
114
|
+
extension: File extension (with leading dot, e.g., '.py')
|
115
|
+
mime_type: MIME type for the file
|
116
|
+
is_simple_text: Whether this is a simple text file that can be read directly
|
117
|
+
description: Human-readable description of the file type
|
118
|
+
chunking_strategy: Strategy for chunking this file type
|
119
|
+
"""
|
120
|
+
if not extension.startswith("."):
|
121
|
+
extension = f".{extension}"
|
122
|
+
|
123
|
+
self._file_types[extension] = FileTypeInfo(
|
124
|
+
extension=extension,
|
125
|
+
mime_type=mime_type,
|
126
|
+
is_simple_text=is_simple_text,
|
127
|
+
description=description,
|
128
|
+
chunking_strategy=chunking_strategy,
|
129
|
+
)
|
130
|
+
|
131
|
+
def register_mime_types(self) -> None:
|
132
|
+
"""Register all file types with Python's mimetypes module."""
|
133
|
+
for file_type in self._file_types.values():
|
134
|
+
mimetypes.add_type(file_type.mime_type, file_type.extension)
|
135
|
+
|
136
|
+
# Also register some additional MIME type aliases that may be encountered
|
137
|
+
mimetypes.add_type("text/x-markdown", ".md")
|
138
|
+
mimetypes.add_type("application/x-jsonlines", ".jsonl")
|
139
|
+
mimetypes.add_type("text/xml", ".xml")
|
140
|
+
|
141
|
+
def get_allowed_media_types(self) -> Set[str]:
|
142
|
+
"""
|
143
|
+
Get set of all allowed MIME types.
|
144
|
+
|
145
|
+
Returns:
|
146
|
+
Set of MIME type strings that are supported for upload
|
147
|
+
"""
|
148
|
+
allowed_types = {file_type.mime_type for file_type in self._file_types.values()}
|
149
|
+
|
150
|
+
# Add additional MIME type aliases
|
151
|
+
allowed_types.update(
|
152
|
+
{
|
153
|
+
"text/x-markdown", # Alternative markdown MIME type
|
154
|
+
"application/x-jsonlines", # Alternative JSONL MIME type
|
155
|
+
"text/xml", # Alternative XML MIME type
|
156
|
+
}
|
157
|
+
)
|
158
|
+
|
159
|
+
return allowed_types
|
160
|
+
|
161
|
+
def get_extension_to_mime_type_map(self) -> Dict[str, str]:
|
162
|
+
"""
|
163
|
+
Get mapping from file extensions to MIME types.
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
Dictionary mapping extensions (with leading dot) to MIME types
|
167
|
+
"""
|
168
|
+
return {file_type.extension: file_type.mime_type for file_type in self._file_types.values()}
|
169
|
+
|
170
|
+
def get_simple_text_mime_types(self) -> Set[str]:
|
171
|
+
"""
|
172
|
+
Get set of MIME types that represent simple text files.
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
Set of MIME type strings for files that can be read as plain text
|
176
|
+
"""
|
177
|
+
return {file_type.mime_type for file_type in self._file_types.values() if file_type.is_simple_text}
|
178
|
+
|
179
|
+
def is_simple_text_mime_type(self, mime_type: str) -> bool:
|
180
|
+
"""
|
181
|
+
Check if a MIME type represents simple text that can be read directly.
|
182
|
+
|
183
|
+
Args:
|
184
|
+
mime_type: MIME type to check
|
185
|
+
|
186
|
+
Returns:
|
187
|
+
True if the MIME type represents simple text
|
188
|
+
"""
|
189
|
+
# Check if it's in our registered simple text types
|
190
|
+
if mime_type in self.get_simple_text_mime_types():
|
191
|
+
return True
|
192
|
+
|
193
|
+
# Check for text/* types
|
194
|
+
if mime_type.startswith("text/"):
|
195
|
+
return True
|
196
|
+
|
197
|
+
# Check for known aliases that represent simple text
|
198
|
+
simple_text_aliases = {
|
199
|
+
"application/x-jsonlines", # Alternative JSONL MIME type
|
200
|
+
"text/xml", # Alternative XML MIME type
|
201
|
+
}
|
202
|
+
return mime_type in simple_text_aliases
|
203
|
+
|
204
|
+
def get_supported_extensions(self) -> Set[str]:
|
205
|
+
"""
|
206
|
+
Get set of all supported file extensions.
|
207
|
+
|
208
|
+
Returns:
|
209
|
+
Set of file extensions (with leading dots)
|
210
|
+
"""
|
211
|
+
return set(self._file_types.keys())
|
212
|
+
|
213
|
+
def is_supported_extension(self, extension: str) -> bool:
|
214
|
+
"""
|
215
|
+
Check if a file extension is supported.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
extension: File extension (with or without leading dot)
|
219
|
+
|
220
|
+
Returns:
|
221
|
+
True if the extension is supported
|
222
|
+
"""
|
223
|
+
if not extension.startswith("."):
|
224
|
+
extension = f".{extension}"
|
225
|
+
return extension in self._file_types
|
226
|
+
|
227
|
+
def get_file_type_info(self, extension: str) -> FileTypeInfo:
|
228
|
+
"""
|
229
|
+
Get information about a file type by extension.
|
230
|
+
|
231
|
+
Args:
|
232
|
+
extension: File extension (with or without leading dot)
|
233
|
+
|
234
|
+
Returns:
|
235
|
+
FileTypeInfo object with details about the file type
|
236
|
+
|
237
|
+
Raises:
|
238
|
+
KeyError: If the extension is not supported
|
239
|
+
"""
|
240
|
+
if not extension.startswith("."):
|
241
|
+
extension = f".{extension}"
|
242
|
+
return self._file_types[extension]
|
243
|
+
|
244
|
+
def get_chunking_strategy_by_extension(self, extension: str) -> ChunkingStrategy:
|
245
|
+
"""
|
246
|
+
Get the chunking strategy for a file based on its extension.
|
247
|
+
|
248
|
+
Args:
|
249
|
+
extension: File extension (with or without leading dot)
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
ChunkingStrategy enum value for the file type
|
253
|
+
|
254
|
+
Raises:
|
255
|
+
KeyError: If the extension is not supported
|
256
|
+
"""
|
257
|
+
file_type_info = self.get_file_type_info(extension)
|
258
|
+
return file_type_info.chunking_strategy
|
259
|
+
|
260
|
+
def get_chunking_strategy_by_mime_type(self, mime_type: str) -> ChunkingStrategy:
|
261
|
+
"""
|
262
|
+
Get the chunking strategy for a file based on its MIME type.
|
263
|
+
|
264
|
+
Args:
|
265
|
+
mime_type: MIME type of the file
|
266
|
+
|
267
|
+
Returns:
|
268
|
+
ChunkingStrategy enum value for the file type, or LINE_BASED if not found
|
269
|
+
"""
|
270
|
+
for file_type in self._file_types.values():
|
271
|
+
if file_type.mime_type == mime_type:
|
272
|
+
return file_type.chunking_strategy
|
273
|
+
return ChunkingStrategy.LINE_BASED
|
274
|
+
|
275
|
+
|
276
|
+
# Global registry instance
|
277
|
+
file_type_registry = FileTypeRegistry()
|
278
|
+
|
279
|
+
|
280
|
+
# Convenience functions for backward compatibility and ease of use
|
281
|
+
def register_mime_types() -> None:
|
282
|
+
"""Register all supported file types with Python's mimetypes module."""
|
283
|
+
file_type_registry.register_mime_types()
|
284
|
+
|
285
|
+
|
286
|
+
def get_allowed_media_types() -> Set[str]:
|
287
|
+
"""Get set of all allowed MIME types for file uploads."""
|
288
|
+
return file_type_registry.get_allowed_media_types()
|
289
|
+
|
290
|
+
|
291
|
+
def get_extension_to_mime_type_map() -> Dict[str, str]:
|
292
|
+
"""Get mapping from file extensions to MIME types."""
|
293
|
+
return file_type_registry.get_extension_to_mime_type_map()
|
294
|
+
|
295
|
+
|
296
|
+
def get_simple_text_mime_types() -> Set[str]:
|
297
|
+
"""Get set of MIME types that represent simple text files."""
|
298
|
+
return file_type_registry.get_simple_text_mime_types()
|
299
|
+
|
300
|
+
|
301
|
+
def is_simple_text_mime_type(mime_type: str) -> bool:
|
302
|
+
"""Check if a MIME type represents simple text."""
|
303
|
+
return file_type_registry.is_simple_text_mime_type(mime_type)
|
@@ -3,22 +3,13 @@ import base64
|
|
3
3
|
from mistralai import Mistral, OCRPageObject, OCRResponse, OCRUsageInfo
|
4
4
|
|
5
5
|
from letta.log import get_logger
|
6
|
+
from letta.services.file_processor.file_types import is_simple_text_mime_type
|
6
7
|
from letta.services.file_processor.parser.base_parser import FileParser
|
7
8
|
from letta.settings import settings
|
8
9
|
|
9
10
|
logger = get_logger(__name__)
|
10
11
|
|
11
12
|
|
12
|
-
SIMPLE_TEXT_MIME_TYPES = {
|
13
|
-
"text/plain",
|
14
|
-
"text/markdown",
|
15
|
-
"text/x-markdown",
|
16
|
-
"application/json",
|
17
|
-
"application/jsonl",
|
18
|
-
"application/x-jsonlines",
|
19
|
-
}
|
20
|
-
|
21
|
-
|
22
13
|
class MistralFileParser(FileParser):
|
23
14
|
"""Mistral-based OCR extraction"""
|
24
15
|
|
@@ -33,7 +24,7 @@ class MistralFileParser(FileParser):
|
|
33
24
|
|
34
25
|
# TODO: Kind of hacky...we try to exit early here?
|
35
26
|
# TODO: Create our internal file parser representation we return instead of OCRResponse
|
36
|
-
if mime_type
|
27
|
+
if is_simple_text_mime_type(mime_type):
|
37
28
|
text = content.decode("utf-8", errors="replace")
|
38
29
|
return OCRResponse(
|
39
30
|
model=self.model,
|
@@ -229,6 +229,7 @@ def compile_system_message(
|
|
229
229
|
template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
|
230
230
|
previous_message_count: int = 0,
|
231
231
|
archival_memory_size: int = 0,
|
232
|
+
tool_rules_solver: Optional[ToolRulesSolver] = None,
|
232
233
|
) -> str:
|
233
234
|
"""Prepare the final/full system message that will be fed into the LLM API
|
234
235
|
|
@@ -237,6 +238,11 @@ def compile_system_message(
|
|
237
238
|
The following are reserved variables:
|
238
239
|
- CORE_MEMORY: the in-context memory of the LLM
|
239
240
|
"""
|
241
|
+
# Add tool rule constraints if available
|
242
|
+
if tool_rules_solver is not None:
|
243
|
+
tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
|
244
|
+
if tool_constraint_block: # There may not be any depending on if there are tool rules attached
|
245
|
+
in_context_memory.blocks.append(tool_constraint_block)
|
240
246
|
|
241
247
|
if user_defined_variables is not None:
|
242
248
|
# TODO eventually support the user defining their own variables to inject
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import json
|
2
|
+
import uuid
|
2
3
|
from typing import List, Optional, Sequence
|
3
4
|
|
4
5
|
from sqlalchemy import delete, exists, func, select, text
|
@@ -10,10 +11,12 @@ from letta.orm.message import Message as MessageModel
|
|
10
11
|
from letta.otel.tracing import trace_method
|
11
12
|
from letta.schemas.enums import MessageRole
|
12
13
|
from letta.schemas.letta_message import LettaMessageUpdateUnion
|
14
|
+
from letta.schemas.letta_message_content import ImageSourceType, LettaImage, MessageContentType
|
13
15
|
from letta.schemas.message import Message as PydanticMessage
|
14
16
|
from letta.schemas.message import MessageUpdate
|
15
17
|
from letta.schemas.user import User as PydanticUser
|
16
18
|
from letta.server.db import db_registry
|
19
|
+
from letta.services.file_manager import FileManager
|
17
20
|
from letta.utils import enforce_types
|
18
21
|
|
19
22
|
logger = get_logger(__name__)
|
@@ -22,6 +25,10 @@ logger = get_logger(__name__)
|
|
22
25
|
class MessageManager:
|
23
26
|
"""Manager class to handle business logic related to Messages."""
|
24
27
|
|
28
|
+
def __init__(self):
|
29
|
+
"""Initialize the MessageManager."""
|
30
|
+
self.file_manager = FileManager()
|
31
|
+
|
25
32
|
@enforce_types
|
26
33
|
@trace_method
|
27
34
|
def get_message_by_id(self, message_id: str, actor: PydanticUser) -> Optional[PydanticMessage]:
|
@@ -131,6 +138,31 @@ class MessageManager:
|
|
131
138
|
if not pydantic_msgs:
|
132
139
|
return []
|
133
140
|
|
141
|
+
for message in pydantic_msgs:
|
142
|
+
if isinstance(message.content, list):
|
143
|
+
for content in message.content:
|
144
|
+
if content.type == MessageContentType.image and content.source.type == ImageSourceType.base64:
|
145
|
+
# TODO: actually persist image files in db
|
146
|
+
# file = await self.file_manager.create_file( # TODO: use batch create to prevent multiple db round trips
|
147
|
+
# db_session=session,
|
148
|
+
# image_create=FileMetadata(
|
149
|
+
# user_id=actor.id, # TODO: add field
|
150
|
+
# source_id= '' # TODO: make optional
|
151
|
+
# organization_id=actor.organization_id,
|
152
|
+
# file_type=content.source.media_type,
|
153
|
+
# processing_status=FileProcessingStatus.COMPLETED,
|
154
|
+
# content= '' # TODO: should content be added here or in top level text field?
|
155
|
+
# ),
|
156
|
+
# actor=actor,
|
157
|
+
# text=content.source.data,
|
158
|
+
# )
|
159
|
+
file_id_placeholder = "file-" + str(uuid.uuid4())
|
160
|
+
content.source = LettaImage(
|
161
|
+
file_id=file_id_placeholder,
|
162
|
+
data=content.source.data,
|
163
|
+
media_type=content.source.media_type,
|
164
|
+
detail=content.source.detail,
|
165
|
+
)
|
134
166
|
orm_messages = self._create_many_preprocess(pydantic_msgs, actor)
|
135
167
|
async with db_registry.async_session() as session:
|
136
168
|
created_messages = await MessageModel.batch_create_async(orm_messages, session, actor=actor)
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import List, Optional
|
2
2
|
|
3
|
+
from letta.constants import DEFAULT_ORG_ID, DEFAULT_ORG_NAME
|
3
4
|
from letta.orm.errors import NoResultFound
|
4
5
|
from letta.orm.organization import Organization as OrganizationModel
|
5
6
|
from letta.otel.tracing import trace_method
|
@@ -12,14 +13,11 @@ from letta.utils import enforce_types
|
|
12
13
|
class OrganizationManager:
|
13
14
|
"""Manager class to handle business logic related to Organizations."""
|
14
15
|
|
15
|
-
DEFAULT_ORG_ID = "org-00000000-0000-4000-8000-000000000000"
|
16
|
-
DEFAULT_ORG_NAME = "default_org"
|
17
|
-
|
18
16
|
@enforce_types
|
19
17
|
@trace_method
|
20
18
|
async def get_default_organization_async(self) -> PydanticOrganization:
|
21
19
|
"""Fetch the default organization."""
|
22
|
-
return await self.get_organization_by_id_async(
|
20
|
+
return await self.get_organization_by_id_async(DEFAULT_ORG_ID)
|
23
21
|
|
24
22
|
@enforce_types
|
25
23
|
@trace_method
|
@@ -72,14 +70,14 @@ class OrganizationManager:
|
|
72
70
|
@trace_method
|
73
71
|
def create_default_organization(self) -> PydanticOrganization:
|
74
72
|
"""Create the default organization."""
|
75
|
-
pydantic_org = PydanticOrganization(name=
|
73
|
+
pydantic_org = PydanticOrganization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID)
|
76
74
|
return self.create_organization(pydantic_org)
|
77
75
|
|
78
76
|
@enforce_types
|
79
77
|
@trace_method
|
80
78
|
async def create_default_organization_async(self) -> PydanticOrganization:
|
81
79
|
"""Create the default organization."""
|
82
|
-
return await self.create_organization_async(PydanticOrganization(name=
|
80
|
+
return await self.create_organization_async(PydanticOrganization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID))
|
83
81
|
|
84
82
|
@enforce_types
|
85
83
|
@trace_method
|