PyPI - langroid - Versions diffs - 0.51.1__py3-none-any.whl → 0.52.0__py3-none-any.whl - Mend

langroid 0.51.1py3-none-any.whl → 0.52.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

langroid/agent/base.py +7 -0
langroid/agent/chat_agent.py +17 -8
langroid/agent/chat_document.py +10 -6
langroid/agent/special/doc_chat_agent.py +2 -2
langroid/agent/special/table_chat_agent.py +2 -2
langroid/language_models/base.py +27 -4
langroid/language_models/model_info.py +33 -1
langroid/language_models/openai_gpt.py +33 -9
langroid/parsing/document_parser.py +24 -19
langroid/parsing/file_attachment.py +157 -0
langroid/parsing/parser.py +3 -0
{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/METADATA +2 -2
{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/RECORD +15 -14
{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/WHEEL +0 -0
{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/licenses/LICENSE +0 -0

langroid/agent/base.py CHANGED Viewed

@@ -47,6 +47,7 @@ from langroid.language_models.base import (
 )
 from langroid.language_models.openai_gpt import OpenAIGPT, OpenAIGPTConfig
 from langroid.mytypes import Entity
+from langroid.parsing.file_attachment import FileAttachment
 from langroid.parsing.parse_json import extract_top_level_json
 from langroid.parsing.parser import Parser, ParsingConfig
 from langroid.prompts.prompts_config import PromptsConfig
@@ -440,6 +441,7 @@ class Agent(ABC):
     def create_agent_response(
         self,
         content: str | None = None,
+        files: List[FileAttachment] = [],
         content_any: Any = None,
         tool_messages: List[ToolMessage] = [],
         oai_tool_calls: Optional[List[OpenAIToolCall]] = None,
@@ -452,6 +454,7 @@ class Agent(ABC):
         return self.response_template(
             Entity.AGENT,
             content=content,
+            files=files,
             content_any=content_any,
             tool_messages=tool_messages,
             oai_tool_calls=oai_tool_calls,
@@ -689,6 +692,7 @@ class Agent(ABC):
         self,
         e: Entity,
         content: str | None = None,
+        files: List[FileAttachment] = [],
         content_any: Any = None,
         tool_messages: List[ToolMessage] = [],
         oai_tool_calls: Optional[List[OpenAIToolCall]] = None,
@@ -700,6 +704,7 @@ class Agent(ABC):
         """Template for response from entity `e`."""
         return ChatDocument(
             content=content or "",
+            files=files,
             content_any=content_any,
             tool_messages=tool_messages,
             oai_tool_calls=oai_tool_calls,
@@ -714,6 +719,7 @@ class Agent(ABC):
     def create_user_response(
         self,
         content: str | None = None,
+        files: List[FileAttachment] = [],
         content_any: Any = None,
         tool_messages: List[ToolMessage] = [],
         oai_tool_calls: List[OpenAIToolCall] | None = None,
@@ -726,6 +732,7 @@ class Agent(ABC):
         return self.response_template(
             e=Entity.USER,
             content=content,
+            files=files,
             content_any=content_any,
             tool_messages=tool_messages,
             oai_tool_calls=oai_tool_calls,

langroid/agent/chat_agent.py CHANGED Viewed

@@ -1511,12 +1511,14 @@ class ChatAgent(Agent):
         output_len = self.config.llm.model_max_output_tokens
         if (
             truncate
-            and self.chat_num_tokens(hist)
-            > self.llm.chat_context_length() - self.config.llm.model_max_output_tokens
+            and output_len > self.llm.chat_context_length() - self.chat_num_tokens(hist)
         ):
             # chat + output > max context length,
-            # so first try to shorten requested output len to fit.
-            output_len = self.llm.chat_context_length() - self.chat_num_tokens(hist)
+            # so first try to shorten requested output len to fit;
+            # use an extra margin of 300 tokens in case our calcs are off
+            output_len = (
+                self.llm.chat_context_length() - self.chat_num_tokens(hist) - 300
+            )
             if output_len < self.config.llm.min_output_tokens:
                 # unacceptably small output len, so drop early parts of conv history
                 # if output_len is still too long, then drop early parts of conv history
@@ -1534,10 +1536,17 @@ class ChatAgent(Agent):
                         # and last message (user msg).
                         raise ValueError(
                             """
-                        The message history is longer than the max chat context
-                        length allowed, and we have run out of messages to drop.
-                        HINT: In your `OpenAIGPTConfig` object, try increasing
-                        `chat_context_length` or decreasing `model_max_output_tokens`.
+                        The (message history + max_output_tokens) is longer than the
+                        max chat context length of this model, and we have tried
+                        reducing the requested max output tokens, as well as dropping
+                        early parts of the message history, to accommodate the model
+                        context length, but we have run out of msgs to drop.
+                        HINT: In the `llm` field of your `ChatAgentConfig` object,
+                        which is of type `LLMConfig/OpenAIGPTConfig`, try
+                        - increasing `chat_context_length`
+                            (if accurate for the model), or
+                        - decreasing `max_output_tokens`
                         """
                         )
                     # drop the second message, i.e. first msg after the sys msg

langroid/agent/chat_document.py CHANGED Viewed

@@ -19,6 +19,7 @@ from langroid.language_models.base import (
 )
 from langroid.mytypes import DocMetaData, Document, Entity
 from langroid.parsing.agent_chats import parse_message
+from langroid.parsing.file_attachment import FileAttachment
 from langroid.parsing.parse_json import extract_top_level_json, top_level_json_field
 from langroid.pydantic_v1 import BaseModel, Extra
 from langroid.utils.object_registry import ObjectRegistry
@@ -119,6 +120,7 @@ class ChatDocument(Document):
     reasoning: str = ""  # reasoning produced by a reasoning LLM
     content_any: Any = None  # to hold arbitrary data returned by responders
+    files: List[FileAttachment] = []  # list of file attachments
     oai_tool_calls: Optional[List[OpenAIToolCall]] = None
     oai_tool_id2result: Optional[OrderedDict[str, str]] = None
     oai_tool_choice: ToolChoiceTypes | Dict[str, Dict[str, str] | str] = "auto"
@@ -356,12 +358,8 @@ class ChatDocument(Document):
         Returns:
             List[LLMMessage]: list of LLMMessages corresponding to this ChatDocument.
         """
-        sender_name = None
         sender_role = Role.USER
-        fun_call = None
-        oai_tool_calls = None
-        tool_id = ""  # for OpenAI Assistant
-        chat_document_id: str = ""
         if isinstance(message, str):
             message = ChatDocument.from_str(message)
         content = message.content or to_string(message.content_any) or ""
@@ -381,6 +379,8 @@ class ChatDocument(Document):
             # same reasoning as for function-call above
             content += " " + "\n\n".join(str(tc) for tc in oai_tool_calls)
             oai_tool_calls = None
+        # some LLM APIs (e.g. gemini) don't like empty msg
+        content = content or " "
         sender_name = message.metadata.sender_name
         tool_ids = message.metadata.tool_ids
         tool_id = tool_ids[-1] if len(tool_ids) > 0 else ""
@@ -409,6 +409,7 @@ class ChatDocument(Document):
                         role=Role.TOOL,
                         tool_call_id=oai_tools[0].id,
                         content=content,
+                        files=message.files,
                         chat_document_id=chat_document_id,
                     )
                 ]
@@ -424,6 +425,7 @@ class ChatDocument(Document):
                         role=Role.TOOL,
                         tool_call_id=message.metadata.oai_tool_id,
                         content=content,
+                        files=message.files,
                         chat_document_id=chat_document_id,
                     )
                 ]
@@ -437,7 +439,8 @@ class ChatDocument(Document):
                     LLMMessage(
                         role=Role.TOOL,
                         tool_call_id=tool_id,
-                        content=result,
+                        content=result or " ",
+                        files=message.files,
                         chat_document_id=chat_document_id,
                     )
                     for tool_id, result in message.oai_tool_id2result.items()
@@ -450,6 +453,7 @@ class ChatDocument(Document):
                 role=sender_role,
                 tool_id=tool_id,  # for OpenAI Assistant
                 content=content,
+                files=message.files,
                 function_call=fun_call,
                 tool_calls=oai_tool_calls,
                 name=sender_name,

langroid/agent/special/doc_chat_agent.py CHANGED Viewed

@@ -204,8 +204,8 @@ class DocChatAgentConfig(ChatAgentConfig):
     llm: OpenAIGPTConfig = OpenAIGPTConfig(
         type="openai",
-        chat_model=OpenAIChatModel.GPT4,
-        completion_model=OpenAIChatModel.GPT4,
+        chat_model=OpenAIChatModel.GPT4o,
+        completion_model=OpenAIChatModel.GPT4o,
         timeout=40,
     )
     prompts: PromptsConfig = PromptsConfig(

langroid/agent/special/table_chat_agent.py CHANGED Viewed

@@ -118,8 +118,8 @@ class TableChatAgentConfig(ChatAgentConfig):
     vecdb: None | VectorStoreConfig = None
     llm: OpenAIGPTConfig = OpenAIGPTConfig(
         type="openai",
-        chat_model=OpenAIChatModel.GPT4,
-        completion_model=OpenAIChatModel.GPT4,
+        chat_model=OpenAIChatModel.GPT4o,
+        completion_model=OpenAIChatModel.GPT4o,
     )
     prompts: PromptsConfig = PromptsConfig(
         max_tokens=1000,

langroid/language_models/base.py CHANGED Viewed

@@ -21,6 +21,7 @@ from langroid.cachedb.base import CacheDBConfig
 from langroid.cachedb.redis_cachedb import RedisCacheConfig
 from langroid.language_models.model_info import ModelInfo, get_model_info
 from langroid.parsing.agent_chats import parse_message
+from langroid.parsing.file_attachment import FileAttachment
 from langroid.parsing.parse_json import parse_imperfect_json, top_level_json_field
 from langroid.prompts.dialog import collate_chat_history
 from langroid.pydantic_v1 import BaseModel, BaseSettings, Field
@@ -53,6 +54,13 @@ class StreamEventType(Enum):
     TOOL_ARGS = 5
+class RetryParams(BaseSettings):
+    max_retries: int = 5
+    initial_delay: float = 1.0
+    exponential_base: float = 1.3
+    jitter: bool = True
 class LLMConfig(BaseSettings):
     """
     Common configuration for all language models.
@@ -63,7 +71,8 @@ class LLMConfig(BaseSettings):
     streamer_async: Optional[Callable[..., Awaitable[None]]] = async_noop_fn
     api_base: str | None = None
     formatter: None | str = None
-    max_output_tokens: int | None = 8192  # specify None to use model_max_output_tokens
+    # specify None if you want to use the full max output tokens of the model
+    max_output_tokens: int | None = 8192
     timeout: int = 20  # timeout for API requests
     chat_model: str = ""
     completion_model: str = ""
@@ -86,11 +95,13 @@ class LLMConfig(BaseSettings):
     # Dict of model -> (input/prompt cost, output/completion cost)
     chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
     completion_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
+    retry_params: RetryParams = RetryParams()
     @property
     def model_max_output_tokens(self) -> int:
-        return (
-            self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens
+        return min(
+            self.max_output_tokens or get_model_info(self.chat_model).max_output_tokens,
+            get_model_info(self.chat_model).max_output_tokens,
         )
@@ -263,13 +274,14 @@ class LLMMessage(BaseModel):
     tool_call_id: Optional[str] = None  # which OpenAI LLM tool this is a response to
     tool_id: str = ""  # used by OpenAIAssistant
     content: str
+    files: List[FileAttachment] = []
     function_call: Optional[LLMFunctionCall] = None
     tool_calls: Optional[List[OpenAIToolCall]] = None
     timestamp: datetime = Field(default_factory=datetime.utcnow)
     # link to corresponding chat document, for provenance/rewind purposes
     chat_document_id: str = ""
-    def api_dict(self, has_system_role: bool = True) -> Dict[str, Any]:
+    def api_dict(self, model: str, has_system_role: bool = True) -> Dict[str, Any]:
         """
         Convert to dictionary for API request, keeping ONLY
         the fields that are expected in an API call!
@@ -283,6 +295,17 @@ class LLMMessage(BaseModel):
             dict: dictionary representation of LLM message
         """
         d = self.dict()
+        files: List[FileAttachment] = d.pop("files")
+        if len(files) > 0 and self.role == Role.USER:
+            # In there are files, then content is an array of
+            # different content-parts
+            d["content"] = [
+                dict(
+                    type="text",
+                    text=self.content,
+                )
+            ] + [f.to_dict(model) for f in self.files]
         # if there is a key k = "role" with value "system", change to "user"
         # in case has_system_role is False
         if not has_system_role and "role" in d and d["role"] == "system":

langroid/language_models/model_info.py CHANGED Viewed

@@ -24,13 +24,16 @@ class OpenAIChatModel(ModelName):
     """Enum for OpenAI Chat models"""
     GPT3_5_TURBO = "gpt-3.5-turbo-1106"
-    GPT4 = "gpt-4"
+    GPT4 = "gpt-4o"  # avoid deprecated gpt-4
     GPT4_TURBO = "gpt-4-turbo"
     GPT4o = "gpt-4o"
     GPT4o_MINI = "gpt-4o-mini"
     O1 = "o1"
     O1_MINI = "o1-mini"
     O3_MINI = "o3-mini"
+    GPT4_1 = "gpt-4.1"
+    GPT4_1_MINI = "gpt-4.1-mini"
+    GPT4_1_NANO = "gpt-4.1-nano"
 class OpenAICompletionModel(str, Enum):
@@ -44,6 +47,7 @@ class AnthropicModel(ModelName):
     """Enum for Anthropic models"""
     CLAUDE_3_5_SONNET = "claude-3-5-sonnet-latest"
+    CLAUDE_3_7_SONNET = "claude-3-7-sonnet-latest"
     CLAUDE_3_OPUS = "claude-3-opus-latest"
     CLAUDE_3_SONNET = "claude-3-sonnet-20240229"
     CLAUDE_3_HAIKU = "claude-3-haiku-20240307"
@@ -63,6 +67,7 @@ class GeminiModel(ModelName):
     GEMINI_1_5_FLASH = "gemini-1.5-flash"
     GEMINI_1_5_FLASH_8B = "gemini-1.5-flash-8b"
     GEMINI_1_5_PRO = "gemini-1.5-pro"
+    GEMINI_2_5_PRO = "gemini-2.5-pro-exp-02-05"
     GEMINI_2_PRO = "gemini-2.0-pro-exp-02-05"
     GEMINI_2_FLASH = "gemini-2.0-flash"
     GEMINI_2_FLASH_LITE = "gemini-2.0-flash-lite-preview"
@@ -160,6 +165,33 @@ MODEL_INFO: Dict[str, ModelInfo] = {
         output_cost_per_million=30.0,
         description="GPT-4 Turbo",
     ),
+    OpenAIChatModel.GPT4_1_NANO.value: ModelInfo(
+        name=OpenAIChatModel.GPT4_1_NANO.value,
+        provider=ModelProvider.OPENAI,
+        context_length=1_047_576,
+        max_output_tokens=32_768,
+        input_cost_per_million=0.10,
+        output_cost_per_million=0.40,
+        description="GPT-4.1",
+    ),
+    OpenAIChatModel.GPT4_1_MINI.value: ModelInfo(
+        name=OpenAIChatModel.GPT4_1_MINI.value,
+        provider=ModelProvider.OPENAI,
+        context_length=1_047_576,
+        max_output_tokens=32_768,
+        input_cost_per_million=0.40,
+        output_cost_per_million=1.60,
+        description="GPT-4.1 Mini",
+    ),
+    OpenAIChatModel.GPT4_1.value: ModelInfo(
+        name=OpenAIChatModel.GPT4_1.value,
+        provider=ModelProvider.OPENAI,
+        context_length=1_047_576,
+        max_output_tokens=32_768,
+        input_cost_per_million=2.00,
+        output_cost_per_million=8.00,
+        description="GPT-4.1",
+    ),
     OpenAIChatModel.GPT4o.value: ModelInfo(
         name=OpenAIChatModel.GPT4o.value,
         provider=ModelProvider.OPENAI,

langroid/language_models/openai_gpt.py CHANGED Viewed

@@ -91,10 +91,13 @@ LLAMACPP_API_KEY = os.environ.get("LLAMA_API_KEY", DUMMY_API_KEY)
 openai_chat_model_pref_list = [
     OpenAIChatModel.GPT4o,
+    OpenAIChatModel.GPT4_1_NANO,
+    OpenAIChatModel.GPT4_1_MINI,
+    OpenAIChatModel.GPT4_1,
     OpenAIChatModel.GPT4o_MINI,
     OpenAIChatModel.O1_MINI,
+    OpenAIChatModel.O3_MINI,
     OpenAIChatModel.O1,
-    OpenAIChatModel.GPT3_5_TURBO,
 ]
 openai_completion_model_pref_list = [
@@ -1731,8 +1734,7 @@ class OpenAIGPT(LanguageModel):
             logging.error(friendly_error(e, "Error in OpenAIGPT.achat: "))
             raise e
-    @retry_with_exponential_backoff
-    def _chat_completions_with_backoff(self, **kwargs):  # type: ignore
+    def _chat_completions_with_backoff_body(self, **kwargs):  # type: ignore
         cached = False
         hashed_key, result = self._cache_lookup("Completion", **kwargs)
         if result is not None:
@@ -1781,8 +1783,17 @@ class OpenAIGPT(LanguageModel):
                 self._cache_store(hashed_key, result.model_dump())
         return cached, hashed_key, result
-    @async_retry_with_exponential_backoff
-    async def _achat_completions_with_backoff(self, **kwargs):  # type: ignore
+    def _chat_completions_with_backoff(self, **kwargs):  # type: ignore
+        retry_func = retry_with_exponential_backoff(
+            self._chat_completions_with_backoff_body,
+            initial_delay=self.config.retry_params.initial_delay,
+            max_retries=self.config.retry_params.max_retries,
+            exponential_base=self.config.retry_params.exponential_base,
+            jitter=self.config.retry_params.jitter,
+        )
+        return retry_func(**kwargs)
+    async def _achat_completions_with_backoff_body(self, **kwargs):  # type: ignore
         cached = False
         hashed_key, result = self._cache_lookup("Completion", **kwargs)
         if result is not None:
@@ -1836,6 +1847,16 @@ class OpenAIGPT(LanguageModel):
                 self._cache_store(hashed_key, result.model_dump())
         return cached, hashed_key, result
+    async def _achat_completions_with_backoff(self, **kwargs):  # type: ignore
+        retry_func = async_retry_with_exponential_backoff(
+            self._achat_completions_with_backoff_body,
+            initial_delay=self.config.retry_params.initial_delay,
+            max_retries=self.config.retry_params.max_retries,
+            exponential_base=self.config.retry_params.exponential_base,
+            jitter=self.config.retry_params.jitter,
+        )
+        return await retry_func(**kwargs)
     def _prep_chat_completion(
         self,
         messages: Union[str, List[LLMMessage]],
@@ -1876,10 +1897,13 @@ class OpenAIGPT(LanguageModel):
         args: Dict[str, Any] = dict(
             model=chat_model,
             messages=[
-                m.api_dict(has_system_role=self.info().allows_system_message)
+                m.api_dict(
+                    self.config.chat_model,
+                    has_system_role=self.info().allows_system_message,
+                )
                 for m in (llm_messages)
             ],
-            max_tokens=max_tokens,
+            max_completion_tokens=max_tokens,
             stream=self.get_stream(),
         )
         if self.get_stream():
@@ -2073,7 +2097,7 @@ class OpenAIGPT(LanguageModel):
             function_call,
             response_format,
         )
-        cached, hashed_key, response = self._chat_completions_with_backoff(**args)
+        cached, hashed_key, response = self._chat_completions_with_backoff(**args)  # type: ignore
         if self.get_stream() and not cached:
             llm_response, openai_response = self._stream_response(response, chat=True)
             self._cache_store(hashed_key, openai_response)
@@ -2106,7 +2130,7 @@ class OpenAIGPT(LanguageModel):
             function_call,
             response_format,
         )
-        cached, hashed_key, response = await self._achat_completions_with_backoff(
+        cached, hashed_key, response = await self._achat_completions_with_backoff(  # type: ignore
             **args
         )
         if self.get_stream() and not cached:

langroid/parsing/document_parser.py CHANGED Viewed

@@ -31,7 +31,7 @@ if TYPE_CHECKING:
     from PIL import Image
 from langroid.mytypes import DocMetaData, Document
-from langroid.parsing.parser import Parser, ParsingConfig
+from langroid.parsing.parser import LLMPdfParserConfig, Parser, ParsingConfig
 logger = logging.getLogger(__name__)
@@ -1040,7 +1040,8 @@ class LLMPdfParser(DocumentParser):
             raise ValueError(
                 "LLMPdfParser requires a llm-based config in pdf parsing config"
             )
-        self.model_name = config.pdf.llm_parser_config.model_name
+        self.llm_parser_config: LLMPdfParserConfig = config.pdf.llm_parser_config
+        self.model_name = self.llm_parser_config.model_name
         # Ensure output directory exists
         self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
@@ -1059,9 +1060,7 @@ class LLMPdfParser(DocumentParser):
         temp_file.close()
         self.output_filename = Path(temp_file.name)
-        self.max_tokens = (
-            config.pdf.llm_parser_config.max_tokens or self.DEFAULT_MAX_TOKENS
-        )
+        self.max_tokens = self.llm_parser_config.max_tokens or self.DEFAULT_MAX_TOKENS
         """
         If True, each PDF page is processed as a separate chunk,
@@ -1069,12 +1068,12 @@ class LLMPdfParser(DocumentParser):
         grouped into chunks based on `max_token_limit` before being sent
         to the LLM.
         """
-        self.split_on_page = config.pdf.llm_parser_config.split_on_page or False
+        self.split_on_page = self.llm_parser_config.split_on_page or False
         # Rate limiting parameters
         import asyncio
-        self.requests_per_minute = config.pdf.llm_parser_config.requests_per_minute or 5
+        self.requests_per_minute = self.llm_parser_config.requests_per_minute or 5
         """
         A semaphore to control the number of concurrent requests to the LLM,
@@ -1231,6 +1230,7 @@ class LLMPdfParser(DocumentParser):
                     llm_config = OpenAIGPTConfig(
                         chat_model=self.model_name,
                         max_output_tokens=self.max_tokens,
+                        timeout=self.llm_parser_config.timeout,
                     )
                     llm = OpenAIGPT(config=llm_config)
                     page_nums = self._page_num_str(chunk.get("page_numbers", "?"))
@@ -1242,7 +1242,7 @@ class LLMPdfParser(DocumentParser):
                             image_url=dict(url=data_uri),
                         )
                     elif "claude" in self.model_name.lower():
-                        # optimistrally try this: some API proxies like litellm
+                        # optimistically try this: some API proxies like litellm
                         # support this, and others may not.
                         file_content = dict(
                             type="file",
@@ -1259,27 +1259,32 @@ class LLMPdfParser(DocumentParser):
                                 file_data=data_uri,
                             ),
                         )
+                    prompt = (
+                        self.llm_parser_config.prompt
+                        or self.LLM_PDF_MD_SYSTEM_INSTRUCTION
+                    )
+                    system_prompt = (
+                        self.llm_parser_config.system_prompt
+                        or """
+                         You are an expert pdf -> markdown converter.
+                         Do NOT use any triple backquotes when you present the
+                         markdown content,like ```markdown etc.
+                         FAITHFULLY CONVERT THE PDF TO MARKDOWN,
+                         retaining ALL content as you find it.
+                        """
+                    )
                     # Send the request with PDF content and system instructions
                     response = await llm.async_client.chat.completions.create(  # type: ignore
                         model=self.model_name.split("/")[-1],
                         messages=[
-                            dict(
-                                role="system",
-                                content="""
-                                You are an expert pdf -> markdown converter.
-                                Do NOT use any triple backquotes when you present the
-                                markdown content,like ```markdown etc.
-                                FAITHFULLY CONVERT THE PDF TO MARKDOWN,
-                                retaining ALL content as you find it.
-                                """,
-                            ),
+                            dict(role="system", content=system_prompt),
                             dict(  # type: ignore
                                 role="user",
                                 content=[
                                     dict(
                                         type="text",
-                                        text=self.LLM_PDF_MD_SYSTEM_INSTRUCTION,
+                                        text=prompt,
                                     ),
                                     file_content,
                                 ],

langroid/parsing/file_attachment.py ADDED Viewed

@@ -0,0 +1,157 @@
+import base64
+import mimetypes
+import uuid
+from pathlib import Path
+from typing import Any, BinaryIO, Dict, Optional, Union
+from langroid.pydantic_v1 import BaseModel
+class FileAttachment(BaseModel):
+    """Represents a file attachment to be sent to an LLM API."""
+    content: bytes
+    filename: Optional[str] = None
+    mime_type: str = "application/octet-stream"
+    def __init__(self, **data: Any) -> None:
+        """Initialize with sensible defaults for filename if not provided."""
+        if "filename" not in data or data["filename"] is None:
+            # Generate a more readable unique filename
+            unique_id = str(uuid.uuid4())[:8]
+            data["filename"] = f"attachment_{unique_id}.bin"
+        super().__init__(**data)
+    @classmethod
+    def from_path(cls, file_path: Union[str, Path]) -> "FileAttachment":
+        """Create a FileAttachment from a file path.
+        Args:
+            file_path: Path to the file to attach
+        Returns:
+            FileAttachment instance
+        """
+        path = Path(file_path)
+        with open(path, "rb") as f:
+            content = f.read()
+        mime_type, _ = mimetypes.guess_type(path)
+        if mime_type is None:
+            mime_type = "application/octet-stream"
+        return cls(content=content, filename=path.name, mime_type=mime_type)
+    @classmethod
+    def from_bytes(
+        cls,
+        content: bytes,
+        filename: Optional[str] = None,
+        mime_type: Optional[str] = None,
+    ) -> "FileAttachment":
+        """Create a FileAttachment from bytes content.
+        Args:
+            content: Raw bytes content
+            filename: Optional name to use for the file
+            mime_type: MIME type of the content, guessed from filename if provided
+        Returns:
+            FileAttachment instance
+        """
+        if mime_type is None and filename is not None:
+            mime_type, _ = mimetypes.guess_type(filename)
+        return cls(
+            content=content,
+            filename=filename,
+            mime_type=mime_type or "application/octet-stream",
+        )
+    @classmethod
+    def from_io(
+        cls,
+        file_obj: BinaryIO,
+        filename: Optional[str] = None,
+        mime_type: Optional[str] = None,
+    ) -> "FileAttachment":
+        """Create a FileAttachment from a file-like object.
+        Args:
+            file_obj: File-like object with binary content
+            filename: Optional name to use for the file
+            mime_type: MIME type of the content, guessed from filename if provided
+        Returns:
+            FileAttachment instance
+        """
+        content = file_obj.read()
+        return cls.from_bytes(content, filename, mime_type)
+    @classmethod
+    def from_text(
+        cls,
+        text: str,
+        filename: Optional[str] = None,
+        mime_type: str = "text/plain",
+        encoding: str = "utf-8",
+    ) -> "FileAttachment":
+        """Create a FileAttachment from text content.
+        Args:
+            text: Text content to include
+            filename: Optional name to use for the file
+            mime_type: MIME type of the content
+            encoding: Text encoding to use
+        Returns:
+            FileAttachment instance
+        """
+        content = text.encode(encoding)
+        return cls(content=content, filename=filename, mime_type=mime_type)
+    def to_base64(self) -> str:
+        """Convert content to base64 encoding.
+        Returns:
+            Base64 encoded string
+        """
+        return base64.b64encode(self.content).decode("utf-8")
+    def to_data_uri(self) -> str:
+        """Convert content to a data URI.
+        Returns:
+            A data URI string containing the base64-encoded content with MIME type
+        """
+        base64_content = self.to_base64()
+        return f"data:{self.mime_type};base64,{base64_content}"
+    def to_dict(self, model: str) -> Dict[str, Any]:
+        """
+        Convert to a dictionary suitable for API requests.
+        Tested only for PDF files.
+        Returns:
+            Dictionary with file data
+        """
+        if "gemini" in model.lower():
+            return dict(type="image_url", image_url=dict(url=self.to_data_uri()))
+        elif "claude" in model.lower():
+            # optimistically try this: some API proxies like litellm
+            # support this, and others may not.
+            return dict(
+                type="file",
+                file=dict(
+                    file_data=self.to_data_uri(),
+                ),
+            )
+        else:
+            # fallback: assume file upload is similar to OpenAI API
+            return dict(
+                type="file",
+                file=dict(
+                    filename=self.filename,
+                    file_data=self.to_data_uri(),
+                ),
+            )

langroid/parsing/parser.py CHANGED Viewed

@@ -43,6 +43,9 @@ class LLMPdfParserConfig(BaseSettings):
     max_tokens: Optional[int] = None
     split_on_page: Optional[bool] = True
     requests_per_minute: Optional[int] = 5
+    timeout: int = 60
+    prompt: str = ""  # override with a domain-specific prompt
+    system_prompt: str = ""  # override with a domain-specific system prompt
 class MarkerConfig(BaseSettings):

{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.51.1
+Version: 0.52.0
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT
@@ -846,7 +846,7 @@ import langroid.language_models as lm
 mdl = lm.OpenAIGPT(
     lm.OpenAIGPTConfig(
-        chat_model=lm.OpenAIChatModel.GPT4, # or, e.g.  "ollama/qwen2.5"
+        chat_model=lm.OpenAIChatModel.GPT4o, # or, e.g.  "ollama/qwen2.5"
     ),
 )

{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/RECORD RENAMED Viewed

@@ -3,10 +3,10 @@ langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
 langroid/mytypes.py,sha256=HIcYAqGeA9OK0Hlscym2FI5Oax9QFljDZoVgRlomhRk,4014
 langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
-langroid/agent/base.py,sha256=bs5OLCf534mhsdR7Rgf27GqVNuSV2bOVbD46Y86mGFA,79829
+langroid/agent/base.py,sha256=lWR4ivX_elTFejpknLhkO-DlAGT3aG6ojQAVkzDOqMc,80090
 langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
-langroid/agent/chat_agent.py,sha256=BBXGx4nYUG4xgeRGoT4HmVJ9lVi6E3kPt0YYBW52e3E,84557
-langroid/agent/chat_document.py,sha256=gWceR8mcggyGbJePJQgVvqzVivYlfPlFp8pUZ7yUZvg,17821
+langroid/agent/chat_agent.py,sha256=mIkf3kq5m1RPXeBb6U52pXB9itum0ChcpXABC_g-Xfs,85082
+langroid/agent/chat_document.py,sha256=6O20Fp4QrquykaF2jFtwNHkvcoDte1LLwVZNk9mVH9c,18057
 langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
 langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
 langroid/agent/tool_message.py,sha256=BhjP-_TfQ2tgxuY4Yo_JHLOwwt0mJ4BwjPnREvEY4vk,14744
@@ -14,13 +14,13 @@ langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a
 langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/callbacks/chainlit.py,sha256=UHB6P_J40vsVnssosqkpkOVWRf9NK4TOY0_G2g_Arsg,20900
 langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
-langroid/agent/special/doc_chat_agent.py,sha256=dOL9Y0xAslkwepCdKU8Dc1m5Vk8qgk-gLbU4JzsmTII,65234
+langroid/agent/special/doc_chat_agent.py,sha256=ALp2rv12J-ChRCxVtflDwz6n0qIbUAymldIy8qpsvrg,65236
 langroid/agent/special/doc_chat_task.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
 langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
 langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
 langroid/agent/special/retriever_agent.py,sha256=o2UfqiCGME0t85SZ6qjK041_WZYqXSuV1SeH_3KtVuc,1931
-langroid/agent/special/table_chat_agent.py,sha256=d9v2wsblaRx7oMnKhLV7uO_ujvk9gh59pSGvBXyeyNc,9659
+langroid/agent/special/table_chat_agent.py,sha256=ii-xd7pRLLfRhamFZ04zpSkRO4xPn6Rm5qmA4z4N0HA,9661
 langroid/agent/special/arangodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 langroid/agent/special/arangodb/arangodb_agent.py,sha256=12Y54c84c9qXV-YXRBcI5HaqyiY75JR4TmqlURYKJAM,25851
 langroid/agent/special/arangodb/system_messages.py,sha256=udwfLleTdyz_DuxHuoiv2wHEZoAPBPbwdF_ivjIfP5c,6867
@@ -69,11 +69,11 @@ langroid/embedding_models/protoc/embeddings_pb2.pyi,sha256=UkNy7BrNsmQm0vLb3NtGX
 langroid/embedding_models/protoc/embeddings_pb2_grpc.py,sha256=9dYQqkW3JPyBpSEjeGXTNpSqAkC-6FPtBHyteVob2Y8,2452
 langroid/language_models/__init__.py,sha256=3aD2qC1lz8v12HX4B-dilv27gNxYdGdeu1QvDlkqqHs,1095
 langroid/language_models/azure_openai.py,sha256=SW0Fp_y6HpERr9l6TtF6CYsKgKwjUf_hSL_2mhTV4wI,5034
-langroid/language_models/base.py,sha256=sCDC02hqIgjY73KnCvc-YGxZJm_LAs4Z1VVQpIFWLyQ,27754
+langroid/language_models/base.py,sha256=pfN3t-BktKmN_4K8pwmpjC9OdcHxsytM5s5TmsJ-nPg,28560
 langroid/language_models/config.py,sha256=9Q8wk5a7RQr8LGMT_0WkpjY8S4ywK06SalVRjXlfCiI,378
 langroid/language_models/mock_lm.py,sha256=5BgHKDVRWFbUwDT_PFgTZXz9-k8wJSA2e3PZmyDgQ1k,4022
-langroid/language_models/model_info.py,sha256=tfBBxL0iUf2mVN6CjcvqflzFUVg2oZqOJZexZ8jHTYA,12216
-langroid/language_models/openai_gpt.py,sha256=FG3eMWedko0kN-n-SkSbwnrm5hSxoW2wmJSBAvOAOYU,84731
+langroid/language_models/model_info.py,sha256=0NE1zWNUHJwcM5jhwNxUqGjbpek-Nq7ljGdWpM8R3RQ,13380
+langroid/language_models/openai_gpt.py,sha256=KFbG6q143CYt6SP8rU1UEFX2mffn8yXfVOdEyuA5IaY,85854
 langroid/language_models/utils.py,sha256=hC5p61P_Qlrowkm5wMap1A1b5ZUCwK_XhPIzAQk1T1s,5483
 langroid/language_models/prompt_formatter/__init__.py,sha256=2-5cdE24XoFDhifOLl8yiscohil1ogbP1ECkYdBlBsk,372
 langroid/language_models/prompt_formatter/base.py,sha256=eDS1sgRNZVnoajwV_ZIha6cba5Dt8xjgzdRbPITwx3Q,1221
@@ -82,11 +82,12 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
 langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
 langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
 langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
-langroid/parsing/document_parser.py,sha256=7_pHu-_yQOETtDATv5VRdVSvac9kJRuZiwQ6EbJqJ_o,57403
+langroid/parsing/document_parser.py,sha256=cUcp4JKS_LpsjX7OqnGBhHorDHx7FG5pvKGjRBkQoMw,57685
+langroid/parsing/file_attachment.py,sha256=iIMTmAkfu-TgV9CKnDEB-BiDDN6WOepH51sIz6-PnFw,4826
 langroid/parsing/md_parser.py,sha256=JUgsUpCaeAuBndmtDaJR9HMZaje1gmtXtaLXJHst3i8,21340
 langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
 langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
-langroid/parsing/parser.py,sha256=Tbe1mQ7wp6GVx2xMWv1raIkpepTN0qNrqOxakWY6Zkc,15437
+langroid/parsing/parser.py,sha256=uaAITarcGI2504zcP_dLhp3LjNdh9A6R_yS-o_VcaH8,15599
 langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
 langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
 langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
@@ -129,7 +130,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
 langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
 langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
 langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
-langroid-0.51.1.dist-info/METADATA,sha256=9E0M5JzLk_fuMOLH918i7fIBwWKMm1O6J3VY8DoG3NM,63641
-langroid-0.51.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.51.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.51.1.dist-info/RECORD,,
+langroid-0.52.0.dist-info/METADATA,sha256=xiHZhRlCgRm6s8wRIZ300xBO5gUW1j2wqWZG9rj1npc,63642
+langroid-0.52.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.52.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.52.0.dist-info/RECORD,,

{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.51.1.dist-info → langroid-0.52.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.51.1__py3-none-any.whl → 0.52.0__py3-none-any.whl

langroid 0.51.1py3-none-any.whl → 0.52.0py3-none-any.whl