letta-nightly 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250614104137__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +11 -1
  3. letta/agents/base_agent.py +11 -4
  4. letta/agents/ephemeral_summary_agent.py +3 -2
  5. letta/agents/letta_agent.py +109 -78
  6. letta/agents/letta_agent_batch.py +4 -3
  7. letta/agents/voice_agent.py +3 -3
  8. letta/agents/voice_sleeptime_agent.py +3 -2
  9. letta/client/client.py +6 -3
  10. letta/constants.py +6 -0
  11. letta/data_sources/connectors.py +3 -5
  12. letta/functions/async_composio_toolset.py +4 -1
  13. letta/functions/function_sets/files.py +4 -3
  14. letta/functions/schema_generator.py +5 -2
  15. letta/groups/sleeptime_multi_agent_v2.py +4 -3
  16. letta/helpers/converters.py +7 -1
  17. letta/helpers/message_helper.py +31 -11
  18. letta/helpers/tool_rule_solver.py +69 -4
  19. letta/interfaces/anthropic_streaming_interface.py +8 -1
  20. letta/interfaces/openai_streaming_interface.py +4 -1
  21. letta/llm_api/anthropic_client.py +4 -4
  22. letta/llm_api/openai_client.py +56 -11
  23. letta/local_llm/utils.py +3 -20
  24. letta/orm/sqlalchemy_base.py +7 -1
  25. letta/otel/metric_registry.py +26 -0
  26. letta/otel/metrics.py +78 -14
  27. letta/schemas/letta_message_content.py +64 -3
  28. letta/schemas/letta_request.py +5 -1
  29. letta/schemas/message.py +61 -14
  30. letta/schemas/openai/chat_completion_request.py +1 -1
  31. letta/schemas/providers.py +41 -14
  32. letta/schemas/tool_rule.py +67 -0
  33. letta/schemas/user.py +2 -2
  34. letta/server/rest_api/routers/v1/agents.py +22 -12
  35. letta/server/rest_api/routers/v1/sources.py +13 -25
  36. letta/server/server.py +10 -5
  37. letta/services/agent_manager.py +5 -1
  38. letta/services/file_manager.py +219 -0
  39. letta/services/file_processor/chunker/line_chunker.py +119 -14
  40. letta/services/file_processor/file_processor.py +8 -8
  41. letta/services/file_processor/file_types.py +303 -0
  42. letta/services/file_processor/parser/mistral_parser.py +2 -11
  43. letta/services/helpers/agent_manager_helper.py +6 -0
  44. letta/services/message_manager.py +32 -0
  45. letta/services/organization_manager.py +4 -6
  46. letta/services/passage_manager.py +1 -0
  47. letta/services/source_manager.py +0 -208
  48. letta/services/tool_executor/composio_tool_executor.py +5 -1
  49. letta/services/tool_executor/files_tool_executor.py +291 -15
  50. letta/services/user_manager.py +8 -8
  51. letta/system.py +3 -1
  52. letta/utils.py +7 -13
  53. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/METADATA +2 -2
  54. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/RECORD +57 -55
  55. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/LICENSE +0 -0
  56. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/WHEEL +0 -0
  57. {letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/entry_points.txt +0 -0
@@ -1,34 +1,139 @@
1
+ import re
1
2
  from typing import List, Optional
2
3
 
3
4
  from letta.log import get_logger
5
+ from letta.schemas.file import FileMetadata
6
+ from letta.services.file_processor.file_types import ChunkingStrategy, file_type_registry
4
7
 
5
8
  logger = get_logger(__name__)
6
9
 
7
10
 
8
11
  class LineChunker:
9
- """Newline chunker"""
12
+ """Content-aware line chunker that adapts chunking strategy based on file type"""
10
13
 
11
14
  def __init__(self):
12
- pass
15
+ self.file_type_registry = file_type_registry
13
16
 
14
- # TODO: Make this more general beyond Mistral
15
- def chunk_text(self, text: str, start: Optional[int] = None, end: Optional[int] = None) -> List[str]:
16
- """Split lines"""
17
- content_lines = [line.strip() for line in text.split("\n") if line.strip()]
18
- total_lines = len(content_lines)
17
+ def _determine_chunking_strategy(self, file_metadata: FileMetadata) -> ChunkingStrategy:
18
+ """Determine the best chunking strategy based on file metadata"""
19
+ # Try to get strategy from MIME type first
20
+ if file_metadata.file_type:
21
+ try:
22
+ return self.file_type_registry.get_chunking_strategy_by_mime_type(file_metadata.file_type)
23
+ except Exception:
24
+ pass
19
25
 
20
- if start and end:
26
+ # Fallback to filename extension
27
+ if file_metadata.file_name:
28
+ try:
29
+ # Extract extension from filename
30
+ import os
31
+
32
+ _, ext = os.path.splitext(file_metadata.file_name)
33
+ if ext:
34
+ return self.file_type_registry.get_chunking_strategy_by_extension(ext)
35
+ except Exception:
36
+ pass
37
+
38
+ # Default fallback
39
+ return ChunkingStrategy.LINE_BASED
40
+
41
+ def _chunk_by_lines(self, text: str, preserve_indentation: bool = False) -> List[str]:
42
+ """Traditional line-based chunking for code and structured data"""
43
+ lines = []
44
+ for line in text.splitlines():
45
+ if preserve_indentation:
46
+ # For code: preserve leading whitespace (indentation), remove trailing whitespace
47
+ line = line.rstrip()
48
+ # Only skip completely empty lines
49
+ if line:
50
+ lines.append(line)
51
+ else:
52
+ # For structured data: strip all whitespace
53
+ line = line.strip()
54
+ if line:
55
+ lines.append(line)
56
+ return lines
57
+
58
+ def _chunk_by_sentences(self, text: str) -> List[str]:
59
+ """Sentence-based chunking for documentation and markup"""
60
+ # Simple sentence splitting on periods, exclamation marks, and question marks
61
+ # followed by whitespace or end of string
62
+ sentence_pattern = r"(?<=[.!?])\s+(?=[A-Z])"
63
+
64
+ # Split text into sentences
65
+ sentences = re.split(sentence_pattern, text.strip())
66
+
67
+ # Clean up sentences - remove extra whitespace and empty sentences
68
+ cleaned_sentences = []
69
+ for sentence in sentences:
70
+ sentence = re.sub(r"\s+", " ", sentence.strip()) # Normalize whitespace
71
+ if sentence:
72
+ cleaned_sentences.append(sentence)
73
+
74
+ return cleaned_sentences
75
+
76
+ def _chunk_by_characters(self, text: str, target_line_length: int = 100) -> List[str]:
77
+ """Character-based wrapping for prose text"""
78
+ words = text.split()
79
+ lines = []
80
+ current_line = []
81
+ current_length = 0
82
+
83
+ for word in words:
84
+ # Check if adding this word would exceed the target length
85
+ word_length = len(word)
86
+ if current_length + word_length + len(current_line) > target_line_length and current_line:
87
+ # Start a new line
88
+ lines.append(" ".join(current_line))
89
+ current_line = [word]
90
+ current_length = word_length
91
+ else:
92
+ current_line.append(word)
93
+ current_length += word_length
94
+
95
+ # Add the last line if there's content
96
+ if current_line:
97
+ lines.append(" ".join(current_line))
98
+
99
+ return [line for line in lines if line.strip()]
100
+
101
+ def chunk_text(
102
+ self, text: str, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True
103
+ ) -> List[str]:
104
+ """Content-aware text chunking based on file type"""
105
+ strategy = self._determine_chunking_strategy(file_metadata)
106
+
107
+ # Apply the appropriate chunking strategy
108
+ if strategy == ChunkingStrategy.DOCUMENTATION:
109
+ content_lines = self._chunk_by_sentences(text)
110
+ elif strategy == ChunkingStrategy.PROSE:
111
+ content_lines = self._chunk_by_characters(text)
112
+ elif strategy == ChunkingStrategy.CODE:
113
+ content_lines = self._chunk_by_lines(text, preserve_indentation=True)
114
+ else: # STRUCTURED_DATA or LINE_BASED
115
+ content_lines = self._chunk_by_lines(text, preserve_indentation=False)
116
+
117
+ total_chunks = len(content_lines)
118
+
119
+ # Handle start/end slicing
120
+ if start is not None and end is not None:
21
121
  content_lines = content_lines[start:end]
22
122
  line_offset = start
23
123
  else:
24
124
  line_offset = 0
25
125
 
26
- content_lines = [f"Line {i + line_offset}: {line}" for i, line in enumerate(content_lines)]
126
+ # Add line numbers for all strategies
127
+ content_lines = [f"{i + line_offset}: {line}" for i, line in enumerate(content_lines)]
27
128
 
28
- # Add metadata about total lines
29
- if start and end:
30
- content_lines.insert(0, f"[Viewing lines {start} to {end} (out of {total_lines} lines)]")
31
- else:
32
- content_lines.insert(0, f"[Viewing file start (out of {total_lines} lines)]")
129
+ # Add metadata about total chunks
130
+ if add_metadata:
131
+ chunk_type = (
132
+ "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
133
+ )
134
+ if start is not None and end is not None:
135
+ content_lines.insert(0, f"[Viewing {chunk_type} {start} to {end-1} (out of {total_chunks} {chunk_type})]")
136
+ else:
137
+ content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]")
33
138
 
34
139
  return content_lines
@@ -11,6 +11,7 @@ from letta.schemas.job import Job, JobUpdate
11
11
  from letta.schemas.passage import Passage
12
12
  from letta.schemas.user import User
13
13
  from letta.server.server import SyncServer
14
+ from letta.services.file_manager import FileManager
14
15
  from letta.services.file_processor.chunker.line_chunker import LineChunker
15
16
  from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
16
17
  from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
@@ -38,6 +39,7 @@ class FileProcessor:
38
39
  self.line_chunker = LineChunker()
39
40
  self.embedder = embedder
40
41
  self.max_file_size = max_file_size
42
+ self.file_manager = FileManager()
41
43
  self.source_manager = SourceManager()
42
44
  self.passage_manager = PassageManager()
43
45
  self.job_manager = JobManager()
@@ -58,7 +60,7 @@ class FileProcessor:
58
60
 
59
61
  # Create file as early as possible with no content
60
62
  file_metadata.processing_status = FileProcessingStatus.PARSING # Parsing now
61
- file_metadata = await self.source_manager.create_file(file_metadata, self.actor)
63
+ file_metadata = await self.file_manager.create_file(file_metadata, self.actor)
62
64
 
63
65
  try:
64
66
  # Ensure we're working with bytes
@@ -73,16 +75,14 @@ class FileProcessor:
73
75
 
74
76
  # update file with raw text
75
77
  raw_markdown_text = "".join([page.markdown for page in ocr_response.pages])
76
- file_metadata = await self.source_manager.upsert_file_content(
77
- file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor
78
- )
79
- file_metadata = await self.source_manager.update_file_status(
78
+ file_metadata = await self.file_manager.upsert_file_content(file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor)
79
+ file_metadata = await self.file_manager.update_file_status(
80
80
  file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.EMBEDDING
81
81
  )
82
82
 
83
83
  # Insert to agent context window
84
84
  # TODO: Rethink this line chunking mechanism
85
- content_lines = self.line_chunker.chunk_text(text=raw_markdown_text)
85
+ content_lines = self.line_chunker.chunk_text(text=raw_markdown_text, file_metadata=file_metadata)
86
86
  visible_content = "\n".join(content_lines)
87
87
 
88
88
  await server.insert_file_into_context_windows(
@@ -123,7 +123,7 @@ class FileProcessor:
123
123
  job.metadata["num_passages"] = len(all_passages)
124
124
  await self.job_manager.update_job_by_id_async(job_id=job.id, job_update=JobUpdate(**job.model_dump()), actor=self.actor)
125
125
 
126
- await self.source_manager.update_file_status(
126
+ await self.file_manager.update_file_status(
127
127
  file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
128
128
  )
129
129
 
@@ -138,7 +138,7 @@ class FileProcessor:
138
138
  job.metadata["error"] = str(e)
139
139
  await self.job_manager.update_job_by_id_async(job_id=job.id, job_update=JobUpdate(**job.model_dump()), actor=self.actor)
140
140
 
141
- await self.source_manager.update_file_status(
141
+ await self.file_manager.update_file_status(
142
142
  file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.ERROR, error_message=str(e)
143
143
  )
144
144
 
@@ -0,0 +1,303 @@
1
+ """
2
+ Centralized file type configuration for supported file formats.
3
+
4
+ This module provides a single source of truth for file type definitions,
5
+ mime types, and file processing capabilities across the Letta codebase.
6
+ """
7
+
8
+ import mimetypes
9
+ from dataclasses import dataclass
10
+ from enum import Enum
11
+ from typing import Dict, Set
12
+
13
+
14
+ class ChunkingStrategy(str, Enum):
15
+ """Enum for different file chunking strategies."""
16
+
17
+ CODE = "code" # Line-based chunking for code files
18
+ STRUCTURED_DATA = "structured_data" # Line-based chunking for JSON, XML, etc.
19
+ DOCUMENTATION = "documentation" # Paragraph-aware chunking for Markdown, HTML
20
+ PROSE = "prose" # Character-based wrapping for plain text
21
+ LINE_BASED = "line_based" # Default line-based chunking
22
+
23
+
24
+ @dataclass
25
+ class FileTypeInfo:
26
+ """Information about a supported file type."""
27
+
28
+ extension: str
29
+ mime_type: str
30
+ is_simple_text: bool
31
+ description: str
32
+ chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED
33
+
34
+
35
+ class FileTypeRegistry:
36
+ """Central registry for supported file types."""
37
+
38
+ def __init__(self):
39
+ """Initialize the registry with default supported file types."""
40
+ self._file_types: Dict[str, FileTypeInfo] = {}
41
+ self._register_default_types()
42
+
43
+ def _register_default_types(self) -> None:
44
+ """Register all default supported file types."""
45
+ # Document formats
46
+ self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED)
47
+ self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE)
48
+ self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
49
+ self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
50
+ self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA)
51
+ self.register(".jsonl", "application/jsonl", True, "JSON Lines file", ChunkingStrategy.STRUCTURED_DATA)
52
+
53
+ # Programming languages
54
+ self.register(".py", "text/x-python", True, "Python source code", ChunkingStrategy.CODE)
55
+ self.register(".js", "text/javascript", True, "JavaScript source code", ChunkingStrategy.CODE)
56
+ self.register(".ts", "text/x-typescript", True, "TypeScript source code", ChunkingStrategy.CODE)
57
+ self.register(".java", "text/x-java-source", True, "Java source code", ChunkingStrategy.CODE)
58
+ self.register(".cpp", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
59
+ self.register(".cxx", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
60
+ self.register(".c", "text/x-c", True, "C source code", ChunkingStrategy.CODE)
61
+ self.register(".h", "text/x-c", True, "C/C++ header file", ChunkingStrategy.CODE)
62
+ self.register(".cs", "text/x-csharp", True, "C# source code", ChunkingStrategy.CODE)
63
+ self.register(".php", "text/x-php", True, "PHP source code", ChunkingStrategy.CODE)
64
+ self.register(".rb", "text/x-ruby", True, "Ruby source code", ChunkingStrategy.CODE)
65
+ self.register(".go", "text/x-go", True, "Go source code", ChunkingStrategy.CODE)
66
+ self.register(".rs", "text/x-rust", True, "Rust source code", ChunkingStrategy.CODE)
67
+ self.register(".swift", "text/x-swift", True, "Swift source code", ChunkingStrategy.CODE)
68
+ self.register(".kt", "text/x-kotlin", True, "Kotlin source code", ChunkingStrategy.CODE)
69
+ self.register(".scala", "text/x-scala", True, "Scala source code", ChunkingStrategy.CODE)
70
+ self.register(".r", "text/x-r", True, "R source code", ChunkingStrategy.CODE)
71
+ self.register(".m", "text/x-objective-c", True, "Objective-C source code", ChunkingStrategy.CODE)
72
+
73
+ # Web technologies
74
+ self.register(".html", "text/html", True, "HTML document", ChunkingStrategy.CODE)
75
+ self.register(".htm", "text/html", True, "HTML document", ChunkingStrategy.CODE)
76
+ self.register(".css", "text/css", True, "CSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
77
+ self.register(".scss", "text/x-scss", True, "SCSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
78
+ self.register(".sass", "text/x-sass", True, "Sass stylesheet", ChunkingStrategy.STRUCTURED_DATA)
79
+ self.register(".less", "text/x-less", True, "Less stylesheet", ChunkingStrategy.STRUCTURED_DATA)
80
+ self.register(".vue", "text/x-vue", True, "Vue.js component", ChunkingStrategy.CODE)
81
+ self.register(".jsx", "text/x-jsx", True, "JSX source code", ChunkingStrategy.CODE)
82
+ self.register(".tsx", "text/x-tsx", True, "TSX source code", ChunkingStrategy.CODE)
83
+
84
+ # Configuration and data formats
85
+ self.register(".xml", "application/xml", True, "XML document", ChunkingStrategy.STRUCTURED_DATA)
86
+ self.register(".yaml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
87
+ self.register(".yml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
88
+ self.register(".toml", "application/toml", True, "TOML configuration", ChunkingStrategy.STRUCTURED_DATA)
89
+ self.register(".ini", "text/x-ini", True, "INI configuration", ChunkingStrategy.STRUCTURED_DATA)
90
+ self.register(".cfg", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
91
+ self.register(".conf", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
92
+
93
+ # Scripts and SQL
94
+ self.register(".sh", "text/x-shellscript", True, "Shell script", ChunkingStrategy.CODE)
95
+ self.register(".bash", "text/x-shellscript", True, "Bash script", ChunkingStrategy.CODE)
96
+ self.register(".ps1", "text/x-powershell", True, "PowerShell script", ChunkingStrategy.CODE)
97
+ self.register(".bat", "text/x-batch", True, "Batch script", ChunkingStrategy.CODE)
98
+ self.register(".cmd", "text/x-batch", True, "Command script", ChunkingStrategy.CODE)
99
+ self.register(".dockerfile", "text/x-dockerfile", True, "Dockerfile", ChunkingStrategy.CODE)
100
+ self.register(".sql", "text/x-sql", True, "SQL script", ChunkingStrategy.CODE)
101
+
102
+ def register(
103
+ self,
104
+ extension: str,
105
+ mime_type: str,
106
+ is_simple_text: bool,
107
+ description: str,
108
+ chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED,
109
+ ) -> None:
110
+ """
111
+ Register a new file type.
112
+
113
+ Args:
114
+ extension: File extension (with leading dot, e.g., '.py')
115
+ mime_type: MIME type for the file
116
+ is_simple_text: Whether this is a simple text file that can be read directly
117
+ description: Human-readable description of the file type
118
+ chunking_strategy: Strategy for chunking this file type
119
+ """
120
+ if not extension.startswith("."):
121
+ extension = f".{extension}"
122
+
123
+ self._file_types[extension] = FileTypeInfo(
124
+ extension=extension,
125
+ mime_type=mime_type,
126
+ is_simple_text=is_simple_text,
127
+ description=description,
128
+ chunking_strategy=chunking_strategy,
129
+ )
130
+
131
+ def register_mime_types(self) -> None:
132
+ """Register all file types with Python's mimetypes module."""
133
+ for file_type in self._file_types.values():
134
+ mimetypes.add_type(file_type.mime_type, file_type.extension)
135
+
136
+ # Also register some additional MIME type aliases that may be encountered
137
+ mimetypes.add_type("text/x-markdown", ".md")
138
+ mimetypes.add_type("application/x-jsonlines", ".jsonl")
139
+ mimetypes.add_type("text/xml", ".xml")
140
+
141
+ def get_allowed_media_types(self) -> Set[str]:
142
+ """
143
+ Get set of all allowed MIME types.
144
+
145
+ Returns:
146
+ Set of MIME type strings that are supported for upload
147
+ """
148
+ allowed_types = {file_type.mime_type for file_type in self._file_types.values()}
149
+
150
+ # Add additional MIME type aliases
151
+ allowed_types.update(
152
+ {
153
+ "text/x-markdown", # Alternative markdown MIME type
154
+ "application/x-jsonlines", # Alternative JSONL MIME type
155
+ "text/xml", # Alternative XML MIME type
156
+ }
157
+ )
158
+
159
+ return allowed_types
160
+
161
+ def get_extension_to_mime_type_map(self) -> Dict[str, str]:
162
+ """
163
+ Get mapping from file extensions to MIME types.
164
+
165
+ Returns:
166
+ Dictionary mapping extensions (with leading dot) to MIME types
167
+ """
168
+ return {file_type.extension: file_type.mime_type for file_type in self._file_types.values()}
169
+
170
+ def get_simple_text_mime_types(self) -> Set[str]:
171
+ """
172
+ Get set of MIME types that represent simple text files.
173
+
174
+ Returns:
175
+ Set of MIME type strings for files that can be read as plain text
176
+ """
177
+ return {file_type.mime_type for file_type in self._file_types.values() if file_type.is_simple_text}
178
+
179
+ def is_simple_text_mime_type(self, mime_type: str) -> bool:
180
+ """
181
+ Check if a MIME type represents simple text that can be read directly.
182
+
183
+ Args:
184
+ mime_type: MIME type to check
185
+
186
+ Returns:
187
+ True if the MIME type represents simple text
188
+ """
189
+ # Check if it's in our registered simple text types
190
+ if mime_type in self.get_simple_text_mime_types():
191
+ return True
192
+
193
+ # Check for text/* types
194
+ if mime_type.startswith("text/"):
195
+ return True
196
+
197
+ # Check for known aliases that represent simple text
198
+ simple_text_aliases = {
199
+ "application/x-jsonlines", # Alternative JSONL MIME type
200
+ "text/xml", # Alternative XML MIME type
201
+ }
202
+ return mime_type in simple_text_aliases
203
+
204
+ def get_supported_extensions(self) -> Set[str]:
205
+ """
206
+ Get set of all supported file extensions.
207
+
208
+ Returns:
209
+ Set of file extensions (with leading dots)
210
+ """
211
+ return set(self._file_types.keys())
212
+
213
+ def is_supported_extension(self, extension: str) -> bool:
214
+ """
215
+ Check if a file extension is supported.
216
+
217
+ Args:
218
+ extension: File extension (with or without leading dot)
219
+
220
+ Returns:
221
+ True if the extension is supported
222
+ """
223
+ if not extension.startswith("."):
224
+ extension = f".{extension}"
225
+ return extension in self._file_types
226
+
227
+ def get_file_type_info(self, extension: str) -> FileTypeInfo:
228
+ """
229
+ Get information about a file type by extension.
230
+
231
+ Args:
232
+ extension: File extension (with or without leading dot)
233
+
234
+ Returns:
235
+ FileTypeInfo object with details about the file type
236
+
237
+ Raises:
238
+ KeyError: If the extension is not supported
239
+ """
240
+ if not extension.startswith("."):
241
+ extension = f".{extension}"
242
+ return self._file_types[extension]
243
+
244
+ def get_chunking_strategy_by_extension(self, extension: str) -> ChunkingStrategy:
245
+ """
246
+ Get the chunking strategy for a file based on its extension.
247
+
248
+ Args:
249
+ extension: File extension (with or without leading dot)
250
+
251
+ Returns:
252
+ ChunkingStrategy enum value for the file type
253
+
254
+ Raises:
255
+ KeyError: If the extension is not supported
256
+ """
257
+ file_type_info = self.get_file_type_info(extension)
258
+ return file_type_info.chunking_strategy
259
+
260
+ def get_chunking_strategy_by_mime_type(self, mime_type: str) -> ChunkingStrategy:
261
+ """
262
+ Get the chunking strategy for a file based on its MIME type.
263
+
264
+ Args:
265
+ mime_type: MIME type of the file
266
+
267
+ Returns:
268
+ ChunkingStrategy enum value for the file type, or LINE_BASED if not found
269
+ """
270
+ for file_type in self._file_types.values():
271
+ if file_type.mime_type == mime_type:
272
+ return file_type.chunking_strategy
273
+ return ChunkingStrategy.LINE_BASED
274
+
275
+
276
+ # Global registry instance
277
+ file_type_registry = FileTypeRegistry()
278
+
279
+
280
+ # Convenience functions for backward compatibility and ease of use
281
+ def register_mime_types() -> None:
282
+ """Register all supported file types with Python's mimetypes module."""
283
+ file_type_registry.register_mime_types()
284
+
285
+
286
+ def get_allowed_media_types() -> Set[str]:
287
+ """Get set of all allowed MIME types for file uploads."""
288
+ return file_type_registry.get_allowed_media_types()
289
+
290
+
291
+ def get_extension_to_mime_type_map() -> Dict[str, str]:
292
+ """Get mapping from file extensions to MIME types."""
293
+ return file_type_registry.get_extension_to_mime_type_map()
294
+
295
+
296
+ def get_simple_text_mime_types() -> Set[str]:
297
+ """Get set of MIME types that represent simple text files."""
298
+ return file_type_registry.get_simple_text_mime_types()
299
+
300
+
301
+ def is_simple_text_mime_type(mime_type: str) -> bool:
302
+ """Check if a MIME type represents simple text."""
303
+ return file_type_registry.is_simple_text_mime_type(mime_type)
@@ -3,22 +3,13 @@ import base64
3
3
  from mistralai import Mistral, OCRPageObject, OCRResponse, OCRUsageInfo
4
4
 
5
5
  from letta.log import get_logger
6
+ from letta.services.file_processor.file_types import is_simple_text_mime_type
6
7
  from letta.services.file_processor.parser.base_parser import FileParser
7
8
  from letta.settings import settings
8
9
 
9
10
  logger = get_logger(__name__)
10
11
 
11
12
 
12
- SIMPLE_TEXT_MIME_TYPES = {
13
- "text/plain",
14
- "text/markdown",
15
- "text/x-markdown",
16
- "application/json",
17
- "application/jsonl",
18
- "application/x-jsonlines",
19
- }
20
-
21
-
22
13
  class MistralFileParser(FileParser):
23
14
  """Mistral-based OCR extraction"""
24
15
 
@@ -33,7 +24,7 @@ class MistralFileParser(FileParser):
33
24
 
34
25
  # TODO: Kind of hacky...we try to exit early here?
35
26
  # TODO: Create our internal file parser representation we return instead of OCRResponse
36
- if mime_type in SIMPLE_TEXT_MIME_TYPES or mime_type.startswith("text/"):
27
+ if is_simple_text_mime_type(mime_type):
37
28
  text = content.decode("utf-8", errors="replace")
38
29
  return OCRResponse(
39
30
  model=self.model,
@@ -229,6 +229,7 @@ def compile_system_message(
229
229
  template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
230
230
  previous_message_count: int = 0,
231
231
  archival_memory_size: int = 0,
232
+ tool_rules_solver: Optional[ToolRulesSolver] = None,
232
233
  ) -> str:
233
234
  """Prepare the final/full system message that will be fed into the LLM API
234
235
 
@@ -237,6 +238,11 @@ def compile_system_message(
237
238
  The following are reserved variables:
238
239
  - CORE_MEMORY: the in-context memory of the LLM
239
240
  """
241
+ # Add tool rule constraints if available
242
+ if tool_rules_solver is not None:
243
+ tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
244
+ if tool_constraint_block: # There may not be any depending on if there are tool rules attached
245
+ in_context_memory.blocks.append(tool_constraint_block)
240
246
 
241
247
  if user_defined_variables is not None:
242
248
  # TODO eventually support the user defining their own variables to inject
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import uuid
2
3
  from typing import List, Optional, Sequence
3
4
 
4
5
  from sqlalchemy import delete, exists, func, select, text
@@ -10,10 +11,12 @@ from letta.orm.message import Message as MessageModel
10
11
  from letta.otel.tracing import trace_method
11
12
  from letta.schemas.enums import MessageRole
12
13
  from letta.schemas.letta_message import LettaMessageUpdateUnion
14
+ from letta.schemas.letta_message_content import ImageSourceType, LettaImage, MessageContentType
13
15
  from letta.schemas.message import Message as PydanticMessage
14
16
  from letta.schemas.message import MessageUpdate
15
17
  from letta.schemas.user import User as PydanticUser
16
18
  from letta.server.db import db_registry
19
+ from letta.services.file_manager import FileManager
17
20
  from letta.utils import enforce_types
18
21
 
19
22
  logger = get_logger(__name__)
@@ -22,6 +25,10 @@ logger = get_logger(__name__)
22
25
  class MessageManager:
23
26
  """Manager class to handle business logic related to Messages."""
24
27
 
28
+ def __init__(self):
29
+ """Initialize the MessageManager."""
30
+ self.file_manager = FileManager()
31
+
25
32
  @enforce_types
26
33
  @trace_method
27
34
  def get_message_by_id(self, message_id: str, actor: PydanticUser) -> Optional[PydanticMessage]:
@@ -131,6 +138,31 @@ class MessageManager:
131
138
  if not pydantic_msgs:
132
139
  return []
133
140
 
141
+ for message in pydantic_msgs:
142
+ if isinstance(message.content, list):
143
+ for content in message.content:
144
+ if content.type == MessageContentType.image and content.source.type == ImageSourceType.base64:
145
+ # TODO: actually persist image files in db
146
+ # file = await self.file_manager.create_file( # TODO: use batch create to prevent multiple db round trips
147
+ # db_session=session,
148
+ # image_create=FileMetadata(
149
+ # user_id=actor.id, # TODO: add field
150
+ # source_id= '' # TODO: make optional
151
+ # organization_id=actor.organization_id,
152
+ # file_type=content.source.media_type,
153
+ # processing_status=FileProcessingStatus.COMPLETED,
154
+ # content= '' # TODO: should content be added here or in top level text field?
155
+ # ),
156
+ # actor=actor,
157
+ # text=content.source.data,
158
+ # )
159
+ file_id_placeholder = "file-" + str(uuid.uuid4())
160
+ content.source = LettaImage(
161
+ file_id=file_id_placeholder,
162
+ data=content.source.data,
163
+ media_type=content.source.media_type,
164
+ detail=content.source.detail,
165
+ )
134
166
  orm_messages = self._create_many_preprocess(pydantic_msgs, actor)
135
167
  async with db_registry.async_session() as session:
136
168
  created_messages = await MessageModel.batch_create_async(orm_messages, session, actor=actor)
@@ -1,5 +1,6 @@
1
1
  from typing import List, Optional
2
2
 
3
+ from letta.constants import DEFAULT_ORG_ID, DEFAULT_ORG_NAME
3
4
  from letta.orm.errors import NoResultFound
4
5
  from letta.orm.organization import Organization as OrganizationModel
5
6
  from letta.otel.tracing import trace_method
@@ -12,14 +13,11 @@ from letta.utils import enforce_types
12
13
  class OrganizationManager:
13
14
  """Manager class to handle business logic related to Organizations."""
14
15
 
15
- DEFAULT_ORG_ID = "org-00000000-0000-4000-8000-000000000000"
16
- DEFAULT_ORG_NAME = "default_org"
17
-
18
16
  @enforce_types
19
17
  @trace_method
20
18
  async def get_default_organization_async(self) -> PydanticOrganization:
21
19
  """Fetch the default organization."""
22
- return await self.get_organization_by_id_async(self.DEFAULT_ORG_ID)
20
+ return await self.get_organization_by_id_async(DEFAULT_ORG_ID)
23
21
 
24
22
  @enforce_types
25
23
  @trace_method
@@ -72,14 +70,14 @@ class OrganizationManager:
72
70
  @trace_method
73
71
  def create_default_organization(self) -> PydanticOrganization:
74
72
  """Create the default organization."""
75
- pydantic_org = PydanticOrganization(name=self.DEFAULT_ORG_NAME, id=self.DEFAULT_ORG_ID)
73
+ pydantic_org = PydanticOrganization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID)
76
74
  return self.create_organization(pydantic_org)
77
75
 
78
76
  @enforce_types
79
77
  @trace_method
80
78
  async def create_default_organization_async(self) -> PydanticOrganization:
81
79
  """Create the default organization."""
82
- return await self.create_organization_async(PydanticOrganization(name=self.DEFAULT_ORG_NAME, id=self.DEFAULT_ORG_ID))
80
+ return await self.create_organization_async(PydanticOrganization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID))
83
81
 
84
82
  @enforce_types
85
83
  @trace_method