PyPI - letta-nightly - Versions diffs - 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250614104137__py3-none-any.whl - Mend

letta-nightly 0.8.3.dev20250612104349py3-none-any.whl → 0.8.4.dev20250614104137py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

letta/__init__.py +1 -1
letta/agent.py +11 -1
letta/agents/base_agent.py +11 -4
letta/agents/ephemeral_summary_agent.py +3 -2
letta/agents/letta_agent.py +109 -78
letta/agents/letta_agent_batch.py +4 -3
letta/agents/voice_agent.py +3 -3
letta/agents/voice_sleeptime_agent.py +3 -2
letta/client/client.py +6 -3
letta/constants.py +6 -0
letta/data_sources/connectors.py +3 -5
letta/functions/async_composio_toolset.py +4 -1
letta/functions/function_sets/files.py +4 -3
letta/functions/schema_generator.py +5 -2
letta/groups/sleeptime_multi_agent_v2.py +4 -3
letta/helpers/converters.py +7 -1
letta/helpers/message_helper.py +31 -11
letta/helpers/tool_rule_solver.py +69 -4
letta/interfaces/anthropic_streaming_interface.py +8 -1
letta/interfaces/openai_streaming_interface.py +4 -1
letta/llm_api/anthropic_client.py +4 -4
letta/llm_api/openai_client.py +56 -11
letta/local_llm/utils.py +3 -20
letta/orm/sqlalchemy_base.py +7 -1
letta/otel/metric_registry.py +26 -0
letta/otel/metrics.py +78 -14
letta/schemas/letta_message_content.py +64 -3
letta/schemas/letta_request.py +5 -1
letta/schemas/message.py +61 -14
letta/schemas/openai/chat_completion_request.py +1 -1
letta/schemas/providers.py +41 -14
letta/schemas/tool_rule.py +67 -0
letta/schemas/user.py +2 -2
letta/server/rest_api/routers/v1/agents.py +22 -12
letta/server/rest_api/routers/v1/sources.py +13 -25
letta/server/server.py +10 -5
letta/services/agent_manager.py +5 -1
letta/services/file_manager.py +219 -0
letta/services/file_processor/chunker/line_chunker.py +119 -14
letta/services/file_processor/file_processor.py +8 -8
letta/services/file_processor/file_types.py +303 -0
letta/services/file_processor/parser/mistral_parser.py +2 -11
letta/services/helpers/agent_manager_helper.py +6 -0
letta/services/message_manager.py +32 -0
letta/services/organization_manager.py +4 -6
letta/services/passage_manager.py +1 -0
letta/services/source_manager.py +0 -208
letta/services/tool_executor/composio_tool_executor.py +5 -1
letta/services/tool_executor/files_tool_executor.py +291 -15
letta/services/user_manager.py +8 -8
letta/system.py +3 -1
letta/utils.py +7 -13
{letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/METADATA +2 -2
{letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/RECORD +57 -55
{letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/LICENSE +0 -0
{letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/WHEEL +0 -0
{letta_nightly-0.8.3.dev20250612104349.dist-info → letta_nightly-0.8.4.dev20250614104137.dist-info}/entry_points.txt +0 -0

letta/services/file_processor/chunker/line_chunker.py CHANGED Viewed

@@ -1,34 +1,139 @@
+import re
 from typing import List, Optional
 from letta.log import get_logger
+from letta.schemas.file import FileMetadata
+from letta.services.file_processor.file_types import ChunkingStrategy, file_type_registry
 logger = get_logger(__name__)
 class LineChunker:
-    """Newline chunker"""
+    """Content-aware line chunker that adapts chunking strategy based on file type"""
     def __init__(self):
-        pass
+        self.file_type_registry = file_type_registry
-    # TODO: Make this more general beyond Mistral
-    def chunk_text(self, text: str, start: Optional[int] = None, end: Optional[int] = None) -> List[str]:
-        """Split lines"""
-        content_lines = [line.strip() for line in text.split("\n") if line.strip()]
-        total_lines = len(content_lines)
+    def _determine_chunking_strategy(self, file_metadata: FileMetadata) -> ChunkingStrategy:
+        """Determine the best chunking strategy based on file metadata"""
+        # Try to get strategy from MIME type first
+        if file_metadata.file_type:
+            try:
+                return self.file_type_registry.get_chunking_strategy_by_mime_type(file_metadata.file_type)
+            except Exception:
+                pass
-        if start and end:
+        # Fallback to filename extension
+        if file_metadata.file_name:
+            try:
+                # Extract extension from filename
+                import os
+                _, ext = os.path.splitext(file_metadata.file_name)
+                if ext:
+                    return self.file_type_registry.get_chunking_strategy_by_extension(ext)
+            except Exception:
+                pass
+        # Default fallback
+        return ChunkingStrategy.LINE_BASED
+    def _chunk_by_lines(self, text: str, preserve_indentation: bool = False) -> List[str]:
+        """Traditional line-based chunking for code and structured data"""
+        lines = []
+        for line in text.splitlines():
+            if preserve_indentation:
+                # For code: preserve leading whitespace (indentation), remove trailing whitespace
+                line = line.rstrip()
+                # Only skip completely empty lines
+                if line:
+                    lines.append(line)
+            else:
+                # For structured data: strip all whitespace
+                line = line.strip()
+                if line:
+                    lines.append(line)
+        return lines
+    def _chunk_by_sentences(self, text: str) -> List[str]:
+        """Sentence-based chunking for documentation and markup"""
+        # Simple sentence splitting on periods, exclamation marks, and question marks
+        # followed by whitespace or end of string
+        sentence_pattern = r"(?<=[.!?])\s+(?=[A-Z])"
+        # Split text into sentences
+        sentences = re.split(sentence_pattern, text.strip())
+        # Clean up sentences - remove extra whitespace and empty sentences
+        cleaned_sentences = []
+        for sentence in sentences:
+            sentence = re.sub(r"\s+", " ", sentence.strip())  # Normalize whitespace
+            if sentence:
+                cleaned_sentences.append(sentence)
+        return cleaned_sentences
+    def _chunk_by_characters(self, text: str, target_line_length: int = 100) -> List[str]:
+        """Character-based wrapping for prose text"""
+        words = text.split()
+        lines = []
+        current_line = []
+        current_length = 0
+        for word in words:
+            # Check if adding this word would exceed the target length
+            word_length = len(word)
+            if current_length + word_length + len(current_line) > target_line_length and current_line:
+                # Start a new line
+                lines.append(" ".join(current_line))
+                current_line = [word]
+                current_length = word_length
+            else:
+                current_line.append(word)
+                current_length += word_length
+        # Add the last line if there's content
+        if current_line:
+            lines.append(" ".join(current_line))
+        return [line for line in lines if line.strip()]
+    def chunk_text(
+        self, text: str, file_metadata: FileMetadata, start: Optional[int] = None, end: Optional[int] = None, add_metadata: bool = True
+    ) -> List[str]:
+        """Content-aware text chunking based on file type"""
+        strategy = self._determine_chunking_strategy(file_metadata)
+        # Apply the appropriate chunking strategy
+        if strategy == ChunkingStrategy.DOCUMENTATION:
+            content_lines = self._chunk_by_sentences(text)
+        elif strategy == ChunkingStrategy.PROSE:
+            content_lines = self._chunk_by_characters(text)
+        elif strategy == ChunkingStrategy.CODE:
+            content_lines = self._chunk_by_lines(text, preserve_indentation=True)
+        else:  # STRUCTURED_DATA or LINE_BASED
+            content_lines = self._chunk_by_lines(text, preserve_indentation=False)
+        total_chunks = len(content_lines)
+        # Handle start/end slicing
+        if start is not None and end is not None:
             content_lines = content_lines[start:end]
             line_offset = start
         else:
             line_offset = 0
-        content_lines = [f"Line {i + line_offset}: {line}" for i, line in enumerate(content_lines)]
+        # Add line numbers for all strategies
+        content_lines = [f"{i + line_offset}: {line}" for i, line in enumerate(content_lines)]
-        # Add metadata about total lines
-        if start and end:
-            content_lines.insert(0, f"[Viewing lines {start} to {end} (out of {total_lines} lines)]")
-        else:
-            content_lines.insert(0, f"[Viewing file start (out of {total_lines} lines)]")
+        # Add metadata about total chunks
+        if add_metadata:
+            chunk_type = (
+                "sentences" if strategy == ChunkingStrategy.DOCUMENTATION else "chunks" if strategy == ChunkingStrategy.PROSE else "lines"
+            )
+            if start is not None and end is not None:
+                content_lines.insert(0, f"[Viewing {chunk_type} {start} to {end-1} (out of {total_chunks} {chunk_type})]")
+            else:
+                content_lines.insert(0, f"[Viewing file start (out of {total_chunks} {chunk_type})]")
         return content_lines

letta/services/file_processor/file_processor.py CHANGED Viewed

@@ -11,6 +11,7 @@ from letta.schemas.job import Job, JobUpdate
 from letta.schemas.passage import Passage
 from letta.schemas.user import User
 from letta.server.server import SyncServer
+from letta.services.file_manager import FileManager
 from letta.services.file_processor.chunker.line_chunker import LineChunker
 from letta.services.file_processor.chunker.llama_index_chunker import LlamaIndexChunker
 from letta.services.file_processor.embedder.openai_embedder import OpenAIEmbedder
@@ -38,6 +39,7 @@ class FileProcessor:
         self.line_chunker = LineChunker()
         self.embedder = embedder
         self.max_file_size = max_file_size
+        self.file_manager = FileManager()
         self.source_manager = SourceManager()
         self.passage_manager = PassageManager()
         self.job_manager = JobManager()
@@ -58,7 +60,7 @@ class FileProcessor:
         # Create file as early as possible with no content
         file_metadata.processing_status = FileProcessingStatus.PARSING  # Parsing now
-        file_metadata = await self.source_manager.create_file(file_metadata, self.actor)
+        file_metadata = await self.file_manager.create_file(file_metadata, self.actor)
         try:
             # Ensure we're working with bytes
@@ -73,16 +75,14 @@ class FileProcessor:
             # update file with raw text
             raw_markdown_text = "".join([page.markdown for page in ocr_response.pages])
-            file_metadata = await self.source_manager.upsert_file_content(
-                file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor
-            )
-            file_metadata = await self.source_manager.update_file_status(
+            file_metadata = await self.file_manager.upsert_file_content(file_id=file_metadata.id, text=raw_markdown_text, actor=self.actor)
+            file_metadata = await self.file_manager.update_file_status(
                 file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.EMBEDDING
             )
             # Insert to agent context window
             # TODO: Rethink this line chunking mechanism
-            content_lines = self.line_chunker.chunk_text(text=raw_markdown_text)
+            content_lines = self.line_chunker.chunk_text(text=raw_markdown_text, file_metadata=file_metadata)
             visible_content = "\n".join(content_lines)
             await server.insert_file_into_context_windows(
@@ -123,7 +123,7 @@ class FileProcessor:
                 job.metadata["num_passages"] = len(all_passages)
                 await self.job_manager.update_job_by_id_async(job_id=job.id, job_update=JobUpdate(**job.model_dump()), actor=self.actor)
-            await self.source_manager.update_file_status(
+            await self.file_manager.update_file_status(
                 file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.COMPLETED
             )
@@ -138,7 +138,7 @@ class FileProcessor:
                 job.metadata["error"] = str(e)
                 await self.job_manager.update_job_by_id_async(job_id=job.id, job_update=JobUpdate(**job.model_dump()), actor=self.actor)
-            await self.source_manager.update_file_status(
+            await self.file_manager.update_file_status(
                 file_id=file_metadata.id, actor=self.actor, processing_status=FileProcessingStatus.ERROR, error_message=str(e)
             )

letta/services/file_processor/file_types.py ADDED Viewed

@@ -0,0 +1,303 @@
+"""
+Centralized file type configuration for supported file formats.
+This module provides a single source of truth for file type definitions,
+mime types, and file processing capabilities across the Letta codebase.
+"""
+import mimetypes
+from dataclasses import dataclass
+from enum import Enum
+from typing import Dict, Set
+class ChunkingStrategy(str, Enum):
+    """Enum for different file chunking strategies."""
+    CODE = "code"  # Line-based chunking for code files
+    STRUCTURED_DATA = "structured_data"  # Line-based chunking for JSON, XML, etc.
+    DOCUMENTATION = "documentation"  # Paragraph-aware chunking for Markdown, HTML
+    PROSE = "prose"  # Character-based wrapping for plain text
+    LINE_BASED = "line_based"  # Default line-based chunking
+@dataclass
+class FileTypeInfo:
+    """Information about a supported file type."""
+    extension: str
+    mime_type: str
+    is_simple_text: bool
+    description: str
+    chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED
+class FileTypeRegistry:
+    """Central registry for supported file types."""
+    def __init__(self):
+        """Initialize the registry with default supported file types."""
+        self._file_types: Dict[str, FileTypeInfo] = {}
+        self._register_default_types()
+    def _register_default_types(self) -> None:
+        """Register all default supported file types."""
+        # Document formats
+        self.register(".pdf", "application/pdf", False, "PDF document", ChunkingStrategy.LINE_BASED)
+        self.register(".txt", "text/plain", True, "Plain text file", ChunkingStrategy.PROSE)
+        self.register(".md", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
+        self.register(".markdown", "text/markdown", True, "Markdown document", ChunkingStrategy.DOCUMENTATION)
+        self.register(".json", "application/json", True, "JSON data file", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".jsonl", "application/jsonl", True, "JSON Lines file", ChunkingStrategy.STRUCTURED_DATA)
+        # Programming languages
+        self.register(".py", "text/x-python", True, "Python source code", ChunkingStrategy.CODE)
+        self.register(".js", "text/javascript", True, "JavaScript source code", ChunkingStrategy.CODE)
+        self.register(".ts", "text/x-typescript", True, "TypeScript source code", ChunkingStrategy.CODE)
+        self.register(".java", "text/x-java-source", True, "Java source code", ChunkingStrategy.CODE)
+        self.register(".cpp", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
+        self.register(".cxx", "text/x-c++", True, "C++ source code", ChunkingStrategy.CODE)
+        self.register(".c", "text/x-c", True, "C source code", ChunkingStrategy.CODE)
+        self.register(".h", "text/x-c", True, "C/C++ header file", ChunkingStrategy.CODE)
+        self.register(".cs", "text/x-csharp", True, "C# source code", ChunkingStrategy.CODE)
+        self.register(".php", "text/x-php", True, "PHP source code", ChunkingStrategy.CODE)
+        self.register(".rb", "text/x-ruby", True, "Ruby source code", ChunkingStrategy.CODE)
+        self.register(".go", "text/x-go", True, "Go source code", ChunkingStrategy.CODE)
+        self.register(".rs", "text/x-rust", True, "Rust source code", ChunkingStrategy.CODE)
+        self.register(".swift", "text/x-swift", True, "Swift source code", ChunkingStrategy.CODE)
+        self.register(".kt", "text/x-kotlin", True, "Kotlin source code", ChunkingStrategy.CODE)
+        self.register(".scala", "text/x-scala", True, "Scala source code", ChunkingStrategy.CODE)
+        self.register(".r", "text/x-r", True, "R source code", ChunkingStrategy.CODE)
+        self.register(".m", "text/x-objective-c", True, "Objective-C source code", ChunkingStrategy.CODE)
+        # Web technologies
+        self.register(".html", "text/html", True, "HTML document", ChunkingStrategy.CODE)
+        self.register(".htm", "text/html", True, "HTML document", ChunkingStrategy.CODE)
+        self.register(".css", "text/css", True, "CSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".scss", "text/x-scss", True, "SCSS stylesheet", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".sass", "text/x-sass", True, "Sass stylesheet", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".less", "text/x-less", True, "Less stylesheet", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".vue", "text/x-vue", True, "Vue.js component", ChunkingStrategy.CODE)
+        self.register(".jsx", "text/x-jsx", True, "JSX source code", ChunkingStrategy.CODE)
+        self.register(".tsx", "text/x-tsx", True, "TSX source code", ChunkingStrategy.CODE)
+        # Configuration and data formats
+        self.register(".xml", "application/xml", True, "XML document", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".yaml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".yml", "text/x-yaml", True, "YAML configuration", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".toml", "application/toml", True, "TOML configuration", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".ini", "text/x-ini", True, "INI configuration", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".cfg", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
+        self.register(".conf", "text/x-conf", True, "Configuration file", ChunkingStrategy.STRUCTURED_DATA)
+        # Scripts and SQL
+        self.register(".sh", "text/x-shellscript", True, "Shell script", ChunkingStrategy.CODE)
+        self.register(".bash", "text/x-shellscript", True, "Bash script", ChunkingStrategy.CODE)
+        self.register(".ps1", "text/x-powershell", True, "PowerShell script", ChunkingStrategy.CODE)
+        self.register(".bat", "text/x-batch", True, "Batch script", ChunkingStrategy.CODE)
+        self.register(".cmd", "text/x-batch", True, "Command script", ChunkingStrategy.CODE)
+        self.register(".dockerfile", "text/x-dockerfile", True, "Dockerfile", ChunkingStrategy.CODE)
+        self.register(".sql", "text/x-sql", True, "SQL script", ChunkingStrategy.CODE)
+    def register(
+        self,
+        extension: str,
+        mime_type: str,
+        is_simple_text: bool,
+        description: str,
+        chunking_strategy: ChunkingStrategy = ChunkingStrategy.LINE_BASED,
+    ) -> None:
+        """
+        Register a new file type.
+        Args:
+            extension: File extension (with leading dot, e.g., '.py')
+            mime_type: MIME type for the file
+            is_simple_text: Whether this is a simple text file that can be read directly
+            description: Human-readable description of the file type
+            chunking_strategy: Strategy for chunking this file type
+        """
+        if not extension.startswith("."):
+            extension = f".{extension}"
+        self._file_types[extension] = FileTypeInfo(
+            extension=extension,
+            mime_type=mime_type,
+            is_simple_text=is_simple_text,
+            description=description,
+            chunking_strategy=chunking_strategy,
+        )
+    def register_mime_types(self) -> None:
+        """Register all file types with Python's mimetypes module."""
+        for file_type in self._file_types.values():
+            mimetypes.add_type(file_type.mime_type, file_type.extension)
+        # Also register some additional MIME type aliases that may be encountered
+        mimetypes.add_type("text/x-markdown", ".md")
+        mimetypes.add_type("application/x-jsonlines", ".jsonl")
+        mimetypes.add_type("text/xml", ".xml")
+    def get_allowed_media_types(self) -> Set[str]:
+        """
+        Get set of all allowed MIME types.
+        Returns:
+            Set of MIME type strings that are supported for upload
+        """
+        allowed_types = {file_type.mime_type for file_type in self._file_types.values()}
+        # Add additional MIME type aliases
+        allowed_types.update(
+            {
+                "text/x-markdown",  # Alternative markdown MIME type
+                "application/x-jsonlines",  # Alternative JSONL MIME type
+                "text/xml",  # Alternative XML MIME type
+            }
+        )
+        return allowed_types
+    def get_extension_to_mime_type_map(self) -> Dict[str, str]:
+        """
+        Get mapping from file extensions to MIME types.
+        Returns:
+            Dictionary mapping extensions (with leading dot) to MIME types
+        """
+        return {file_type.extension: file_type.mime_type for file_type in self._file_types.values()}
+    def get_simple_text_mime_types(self) -> Set[str]:
+        """
+        Get set of MIME types that represent simple text files.
+        Returns:
+            Set of MIME type strings for files that can be read as plain text
+        """
+        return {file_type.mime_type for file_type in self._file_types.values() if file_type.is_simple_text}
+    def is_simple_text_mime_type(self, mime_type: str) -> bool:
+        """
+        Check if a MIME type represents simple text that can be read directly.
+        Args:
+            mime_type: MIME type to check
+        Returns:
+            True if the MIME type represents simple text
+        """
+        # Check if it's in our registered simple text types
+        if mime_type in self.get_simple_text_mime_types():
+            return True
+        # Check for text/* types
+        if mime_type.startswith("text/"):
+            return True
+        # Check for known aliases that represent simple text
+        simple_text_aliases = {
+            "application/x-jsonlines",  # Alternative JSONL MIME type
+            "text/xml",  # Alternative XML MIME type
+        }
+        return mime_type in simple_text_aliases
+    def get_supported_extensions(self) -> Set[str]:
+        """
+        Get set of all supported file extensions.
+        Returns:
+            Set of file extensions (with leading dots)
+        """
+        return set(self._file_types.keys())
+    def is_supported_extension(self, extension: str) -> bool:
+        """
+        Check if a file extension is supported.
+        Args:
+            extension: File extension (with or without leading dot)
+        Returns:
+            True if the extension is supported
+        """
+        if not extension.startswith("."):
+            extension = f".{extension}"
+        return extension in self._file_types
+    def get_file_type_info(self, extension: str) -> FileTypeInfo:
+        """
+        Get information about a file type by extension.
+        Args:
+            extension: File extension (with or without leading dot)
+        Returns:
+            FileTypeInfo object with details about the file type
+        Raises:
+            KeyError: If the extension is not supported
+        """
+        if not extension.startswith("."):
+            extension = f".{extension}"
+        return self._file_types[extension]
+    def get_chunking_strategy_by_extension(self, extension: str) -> ChunkingStrategy:
+        """
+        Get the chunking strategy for a file based on its extension.
+        Args:
+            extension: File extension (with or without leading dot)
+        Returns:
+            ChunkingStrategy enum value for the file type
+        Raises:
+            KeyError: If the extension is not supported
+        """
+        file_type_info = self.get_file_type_info(extension)
+        return file_type_info.chunking_strategy
+    def get_chunking_strategy_by_mime_type(self, mime_type: str) -> ChunkingStrategy:
+        """
+        Get the chunking strategy for a file based on its MIME type.
+        Args:
+            mime_type: MIME type of the file
+        Returns:
+            ChunkingStrategy enum value for the file type, or LINE_BASED if not found
+        """
+        for file_type in self._file_types.values():
+            if file_type.mime_type == mime_type:
+                return file_type.chunking_strategy
+        return ChunkingStrategy.LINE_BASED
+# Global registry instance
+file_type_registry = FileTypeRegistry()
+# Convenience functions for backward compatibility and ease of use
+def register_mime_types() -> None:
+    """Register all supported file types with Python's mimetypes module."""
+    file_type_registry.register_mime_types()
+def get_allowed_media_types() -> Set[str]:
+    """Get set of all allowed MIME types for file uploads."""
+    return file_type_registry.get_allowed_media_types()
+def get_extension_to_mime_type_map() -> Dict[str, str]:
+    """Get mapping from file extensions to MIME types."""
+    return file_type_registry.get_extension_to_mime_type_map()
+def get_simple_text_mime_types() -> Set[str]:
+    """Get set of MIME types that represent simple text files."""
+    return file_type_registry.get_simple_text_mime_types()
+def is_simple_text_mime_type(mime_type: str) -> bool:
+    """Check if a MIME type represents simple text."""
+    return file_type_registry.is_simple_text_mime_type(mime_type)

letta/services/file_processor/parser/mistral_parser.py CHANGED Viewed

@@ -3,22 +3,13 @@ import base64
 from mistralai import Mistral, OCRPageObject, OCRResponse, OCRUsageInfo
 from letta.log import get_logger
+from letta.services.file_processor.file_types import is_simple_text_mime_type
 from letta.services.file_processor.parser.base_parser import FileParser
 from letta.settings import settings
 logger = get_logger(__name__)
-SIMPLE_TEXT_MIME_TYPES = {
-    "text/plain",
-    "text/markdown",
-    "text/x-markdown",
-    "application/json",
-    "application/jsonl",
-    "application/x-jsonlines",
-}
 class MistralFileParser(FileParser):
     """Mistral-based OCR extraction"""
@@ -33,7 +24,7 @@ class MistralFileParser(FileParser):
             # TODO: Kind of hacky...we try to exit early here?
             # TODO: Create our internal file parser representation we return instead of OCRResponse
-            if mime_type in SIMPLE_TEXT_MIME_TYPES or mime_type.startswith("text/"):
+            if is_simple_text_mime_type(mime_type):
                 text = content.decode("utf-8", errors="replace")
                 return OCRResponse(
                     model=self.model,

letta/services/helpers/agent_manager_helper.py CHANGED Viewed

@@ -229,6 +229,7 @@ def compile_system_message(
     template_format: Literal["f-string", "mustache", "jinja2"] = "f-string",
     previous_message_count: int = 0,
     archival_memory_size: int = 0,
+    tool_rules_solver: Optional[ToolRulesSolver] = None,
 ) -> str:
     """Prepare the final/full system message that will be fed into the LLM API
@@ -237,6 +238,11 @@ def compile_system_message(
     The following are reserved variables:
       - CORE_MEMORY: the in-context memory of the LLM
     """
+    # Add tool rule constraints if available
+    if tool_rules_solver is not None:
+        tool_constraint_block = tool_rules_solver.compile_tool_rule_prompts()
+        if tool_constraint_block:  # There may not be any depending on if there are tool rules attached
+            in_context_memory.blocks.append(tool_constraint_block)
     if user_defined_variables is not None:
         # TODO eventually support the user defining their own variables to inject

letta/services/message_manager.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import uuid
 from typing import List, Optional, Sequence
 from sqlalchemy import delete, exists, func, select, text
@@ -10,10 +11,12 @@ from letta.orm.message import Message as MessageModel
 from letta.otel.tracing import trace_method
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_message import LettaMessageUpdateUnion
+from letta.schemas.letta_message_content import ImageSourceType, LettaImage, MessageContentType
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.message import MessageUpdate
 from letta.schemas.user import User as PydanticUser
 from letta.server.db import db_registry
+from letta.services.file_manager import FileManager
 from letta.utils import enforce_types
 logger = get_logger(__name__)
@@ -22,6 +25,10 @@ logger = get_logger(__name__)
 class MessageManager:
     """Manager class to handle business logic related to Messages."""
+    def __init__(self):
+        """Initialize the MessageManager."""
+        self.file_manager = FileManager()
     @enforce_types
     @trace_method
     def get_message_by_id(self, message_id: str, actor: PydanticUser) -> Optional[PydanticMessage]:
@@ -131,6 +138,31 @@ class MessageManager:
         if not pydantic_msgs:
             return []
+        for message in pydantic_msgs:
+            if isinstance(message.content, list):
+                for content in message.content:
+                    if content.type == MessageContentType.image and content.source.type == ImageSourceType.base64:
+                        # TODO: actually persist image files in db
+                        # file = await self.file_manager.create_file( # TODO: use batch create to prevent multiple db round trips
+                        #     db_session=session,
+                        #     image_create=FileMetadata(
+                        #         user_id=actor.id, # TODO: add field
+                        #         source_id= '' # TODO: make optional
+                        #         organization_id=actor.organization_id,
+                        #         file_type=content.source.media_type,
+                        #         processing_status=FileProcessingStatus.COMPLETED,
+                        #         content= '' # TODO: should content be added here or in top level text field?
+                        #     ),
+                        #     actor=actor,
+                        #     text=content.source.data,
+                        # )
+                        file_id_placeholder = "file-" + str(uuid.uuid4())
+                        content.source = LettaImage(
+                            file_id=file_id_placeholder,
+                            data=content.source.data,
+                            media_type=content.source.media_type,
+                            detail=content.source.detail,
+                        )
         orm_messages = self._create_many_preprocess(pydantic_msgs, actor)
         async with db_registry.async_session() as session:
             created_messages = await MessageModel.batch_create_async(orm_messages, session, actor=actor)

letta/services/organization_manager.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import List, Optional
+from letta.constants import DEFAULT_ORG_ID, DEFAULT_ORG_NAME
 from letta.orm.errors import NoResultFound
 from letta.orm.organization import Organization as OrganizationModel
 from letta.otel.tracing import trace_method
@@ -12,14 +13,11 @@ from letta.utils import enforce_types
 class OrganizationManager:
     """Manager class to handle business logic related to Organizations."""
-    DEFAULT_ORG_ID = "org-00000000-0000-4000-8000-000000000000"
-    DEFAULT_ORG_NAME = "default_org"
     @enforce_types
     @trace_method
     async def get_default_organization_async(self) -> PydanticOrganization:
         """Fetch the default organization."""
-        return await self.get_organization_by_id_async(self.DEFAULT_ORG_ID)
+        return await self.get_organization_by_id_async(DEFAULT_ORG_ID)
     @enforce_types
     @trace_method
@@ -72,14 +70,14 @@ class OrganizationManager:
     @trace_method
     def create_default_organization(self) -> PydanticOrganization:
         """Create the default organization."""
-        pydantic_org = PydanticOrganization(name=self.DEFAULT_ORG_NAME, id=self.DEFAULT_ORG_ID)
+        pydantic_org = PydanticOrganization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID)
         return self.create_organization(pydantic_org)
     @enforce_types
     @trace_method
     async def create_default_organization_async(self) -> PydanticOrganization:
         """Create the default organization."""
-        return await self.create_organization_async(PydanticOrganization(name=self.DEFAULT_ORG_NAME, id=self.DEFAULT_ORG_ID))
+        return await self.create_organization_async(PydanticOrganization(name=DEFAULT_ORG_NAME, id=DEFAULT_ORG_ID))
     @enforce_types
     @trace_method

letta-nightly 0.8.3.dev20250612104349__py3-none-any.whl → 0.8.4.dev20250614104137__py3-none-any.whl

letta-nightly 0.8.3.dev20250612104349py3-none-any.whl → 0.8.4.dev20250614104137py3-none-any.whl