PyPI - langroid - Versions diffs - 0.50.12__py3-none-any.whl → 0.51.1__py3-none-any.whl - Mend

langroid 0.50.12py3-none-any.whl → 0.51.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

langroid/parsing/document_parser.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import base64
 import itertools
 import logging
 import os
@@ -148,8 +149,8 @@ class DocumentParser(Parser):
                 return UnstructuredPDFParser(source, config)
             elif config.pdf.library == "pdf2image":
                 return ImagePdfParser(source, config)
-            elif config.pdf.library == "gemini":
-                return GeminiPdfParser(source, config)
+            elif config.pdf.library == "llm-pdf-parser":
+                return LLMPdfParser(source, config)
             elif config.pdf.library == "marker":
                 return MarkerPdfParser(source, config)
             else:
@@ -993,13 +994,13 @@ class MarkitdownPPTXParser(DocumentParser):
         )
-class GeminiPdfParser(DocumentParser):
+class LLMPdfParser(DocumentParser):
     """
-    This class converts PDFs to Markdown using Gemini multimodal LLMs.
+    This class converts PDFs to Markdown using multimodal LLMs.
     It extracts pages, converts them with the LLM (replacing images with
     detailed descriptions), and outputs Markdown page by page. The
-    conversion follows `GEMINI_SYSTEM_INSTRUCTION`. It employs
+    conversion follows `LLM_PDF_MD_SYSTEM_INSTRUCTION`. It employs
     multiprocessing for speed, async requests with rate limiting, and
     handles errors.
@@ -1008,9 +1009,9 @@ class GeminiPdfParser(DocumentParser):
     """
     DEFAULT_MAX_TOKENS = 7000
-    OUTPUT_DIR = Path(".gemini_pdfparser")  # Fixed output directory
+    OUTPUT_DIR = Path(".llm_pdfparser")  # Fixed output directory
-    GEMINI_SYSTEM_INSTRUCTION = """
+    LLM_PDF_MD_SYSTEM_INSTRUCTION = """
     ### **Convert PDF to Markdown**
     1. **Text:**
         * Preserve structure, formatting (**bold**, *italic*), lists, and indentation.
@@ -1035,11 +1036,11 @@ class GeminiPdfParser(DocumentParser):
     def __init__(self, source: Union[str, bytes], config: ParsingConfig):
         super().__init__(source, config)
-        if not config.pdf.gemini_config:
+        if not config.pdf.llm_parser_config:
             raise ValueError(
-                "GeminiPdfParser requires a Gemini-based config in pdf parsing config"
+                "LLMPdfParser requires a llm-based config in pdf parsing config"
             )
-        self.model_name = config.pdf.gemini_config.model_name
+        self.model_name = config.pdf.llm_parser_config.model_name
         # Ensure output directory exists
         self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
@@ -1058,7 +1059,9 @@ class GeminiPdfParser(DocumentParser):
         temp_file.close()
         self.output_filename = Path(temp_file.name)
-        self.max_tokens = config.pdf.gemini_config.max_tokens or self.DEFAULT_MAX_TOKENS
+        self.max_tokens = (
+            config.pdf.llm_parser_config.max_tokens or self.DEFAULT_MAX_TOKENS
+        )
         """
         If True, each PDF page is processed as a separate chunk,
@@ -1066,12 +1069,12 @@ class GeminiPdfParser(DocumentParser):
         grouped into chunks based on `max_token_limit` before being sent
         to the LLM.
         """
-        self.split_on_page = config.pdf.gemini_config.split_on_page or False
+        self.split_on_page = config.pdf.llm_parser_config.split_on_page or False
         # Rate limiting parameters
         import asyncio
-        self.requests_per_minute = config.pdf.gemini_config.requests_per_minute or 5
+        self.requests_per_minute = config.pdf.llm_parser_config.requests_per_minute or 5
         """
         A semaphore to control the number of concurrent requests to the LLM,
@@ -1175,7 +1178,7 @@ class GeminiPdfParser(DocumentParser):
             "page_numbers": page_numbers,  # List of page numbers in this chunk
         }
-    def _prepare_pdf_chunks_for_gemini(
+    def _prepare_pdf_chunks_for_llm(
         self,
         num_workers: Optional[int] = None,
         max_tokens: int = DEFAULT_MAX_TOKENS,
@@ -1198,37 +1201,102 @@ class GeminiPdfParser(DocumentParser):
                 pdf_chunks = pool.map(self._merge_pages_into_pdf_with_metadata, chunks)
             return pdf_chunks
-    async def _send_chunk_to_gemini(
-        self, chunk: Dict[str, Any], gemini_api_key: str
-    ) -> str:
+    @staticmethod
+    def _page_num_str(page_numbers: Any) -> str:
+        """
+        Converts page numbers to a formatted string.
+        """
+        if isinstance(page_numbers, list):
+            if len(page_numbers) == 0:
+                return ""
+            return str(page_numbers[0]) + "-" + str(page_numbers[-1])
+        elif isinstance(page_numbers, int):
+            return str(page_numbers)
+        else:
+            return str(page_numbers).replace(" ", "-")
+    async def _send_chunk_to_llm(self, chunk: Dict[str, Any]) -> str:
         """
-        Sends a PDF chunk to the Gemini API and returns the response text.
+        Sends a PDF chunk to the LLM API and returns the response text.
         Uses retries with exponential backoff to handle transient failures.
         """
         import asyncio
         import logging
-        from google import genai
-        from google.genai import types
+        from langroid.language_models.openai_gpt import OpenAIGPT, OpenAIGPTConfig
         async with self.semaphore:  # Limit concurrent API requests
             for attempt in range(self.max_retries):
                 try:
-                    client = genai.Client(api_key=gemini_api_key)
+                    llm_config = OpenAIGPTConfig(
+                        chat_model=self.model_name,
+                        max_output_tokens=self.max_tokens,
+                    )
+                    llm = OpenAIGPT(config=llm_config)
+                    page_nums = self._page_num_str(chunk.get("page_numbers", "?"))
+                    base64_string = base64.b64encode(chunk["pdf_bytes"]).decode("utf-8")
+                    data_uri = f"data:application/pdf;base64,{base64_string}"
+                    if "gemini" in self.model_name.lower():
+                        file_content = dict(
+                            type="image_url",
+                            image_url=dict(url=data_uri),
+                        )
+                    elif "claude" in self.model_name.lower():
+                        # optimistrally try this: some API proxies like litellm
+                        # support this, and others may not.
+                        file_content = dict(
+                            type="file",
+                            file=dict(
+                                file_data=data_uri,
+                            ),
+                        )
+                    else:
+                        # fallback: assume file upload is similar to OpenAI API
+                        file_content = dict(
+                            type="file",
+                            file=dict(
+                                filename=f"pages-{page_nums}.pdf",
+                                file_data=data_uri,
+                            ),
+                        )
                     # Send the request with PDF content and system instructions
-                    response = await client.aio.models.generate_content(
-                        model=self.model_name,
-                        contents=[
-                            types.Part.from_bytes(
-                                data=chunk["pdf_bytes"], mime_type="application/pdf"
+                    response = await llm.async_client.chat.completions.create(  # type: ignore
+                        model=self.model_name.split("/")[-1],
+                        messages=[
+                            dict(
+                                role="system",
+                                content="""
+                                You are an expert pdf -> markdown converter.
+                                Do NOT use any triple backquotes when you present the
+                                markdown content,like ```markdown etc.
+                                FAITHFULLY CONVERT THE PDF TO MARKDOWN,
+                                retaining ALL content as you find it.
+                                """,
+                            ),
+                            dict(  # type: ignore
+                                role="user",
+                                content=[
+                                    dict(
+                                        type="text",
+                                        text=self.LLM_PDF_MD_SYSTEM_INSTRUCTION,
+                                    ),
+                                    file_content,
+                                ],
                             ),
-                            self.GEMINI_SYSTEM_INSTRUCTION,
                         ],
                     )
                     # Return extracted text if available
-                    return str(response.text) if response.text else ""
+                    return (
+                        ""
+                        if (
+                            response is None
+                            or not hasattr(response, "choices")
+                            or not isinstance(response.choices, list)
+                        )
+                        else (response.choices[0].message.content)
+                    )
                 except Exception as e:
                     # Log error with page numbers for debugging
@@ -1246,33 +1314,34 @@ class GeminiPdfParser(DocumentParser):
                         await asyncio.sleep(delay)
                     else:
                         # Log failure after max retries
+                        page_nums = chunk.get("page_numbers", "Unknown")
                         logging.error(
-                            "Max retries reached for pages %s",
-                            chunk.get("page_numbers", "Unknown"),
+                            f"""
+                            Max retries reached for pages {page_nums}.
+                            It is possible your LLM API provider for
+                            the model {self.model_name} does not support
+                            file uploads via an OpenAI-compatible API.
+                            """,
                         )
                         break
         return ""  # Return empty string if all retries fail
-    async def process_chunks(
-        self, chunks: List[Dict[str, Any]], api_key: str
-    ) -> List[str]:
+    async def process_chunks(self, chunks: List[Dict[str, Any]]) -> List[str]:
         """
-        Processes PDF chunks by sending them to the Gemini API and
+        Processes PDF chunks by sending them to the LLM API and
         collecting the results.
         Args:
             chunks: A list of dictionaries, where each dictionary represents
                 a PDF chunk and contains the PDF data and page numbers.
-            api_key: The Gemini API key.
         """
         # To show nice progress bar
         from tqdm.asyncio import tqdm_asyncio
-        # Create a list of asynchronous tasks to send each chunk to Gemini.
+        # Create a list of asynchronous tasks to send each chunk to the LLM.
         # Chunk in this case might be single page or group of pages returned
         # by prepare_pdf_chunks function
-        tasks = [self._send_chunk_to_gemini(chunk, api_key) for chunk in chunks]
+        tasks = [self._send_chunk_to_llm(chunk) for chunk in chunks]
         # Gather the results from all tasks, allowing exceptions to be returned.
         # tqdm_asyncio is wrapper around asyncio.gather
@@ -1311,7 +1380,7 @@ class GeminiPdfParser(DocumentParser):
     def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
         """
         Iterates over the document pages, extracting content using the
-        Gemini API, saves them to a markdown file, and yields page numbers
+        LLM API, saves them to a markdown file, and yields page numbers
         along with their corresponding content.
         Yields:
@@ -1319,14 +1388,8 @@ class GeminiPdfParser(DocumentParser):
             (int) and the page content (Any).
         """
         import asyncio
-        import os
-        # Load environment variables (e.g., GEMINI_API_KEY) from a .env file.
         load_dotenv()
-        gemini_api_key = os.getenv("GEMINI_API_KEY")
-        if not gemini_api_key:
-            raise ValueError("GEMINI_API_KEY not found in environment variables.")
         try:
             # This involves extracting pages, grouping them according to the
             # `max_tokens` limit (if `split_on_page` is False), and
@@ -1335,18 +1398,16 @@ class GeminiPdfParser(DocumentParser):
             # PDF bytes and the associated page numbers or single page if
             # `split_on_page` is true
-            pdf_chunks = self._prepare_pdf_chunks_for_gemini(
+            pdf_chunks = self._prepare_pdf_chunks_for_llm(
                 num_workers=8,
                 max_tokens=self.max_tokens,
                 split_on_page=self.split_on_page,
             )
             # We asynchronously processes each chunk, sending it
-            # to Gemini and retrieving the Markdown output. It handles rate
+            # to the LLM and retrieving the Markdown output. It handles rate
             # limiting and retries.
-            markdown_results = asyncio.run(
-                self.process_chunks(pdf_chunks, gemini_api_key)
-            )
+            markdown_results = asyncio.run(self.process_chunks(pdf_chunks))
             # This file serves as an intermediate storage location for the
             # complete Markdown output.

langroid/parsing/parser.py CHANGED Viewed

@@ -36,10 +36,10 @@ class BaseParsingConfig(BaseSettings):
         extra = "ignore"  # Ignore unknown settings
-class GeminiConfig(BaseSettings):
-    """Configuration for Gemini-based parsing."""
+class LLMPdfParserConfig(BaseSettings):
+    """Configuration for LLM-based parsing."""
-    model_name: str = "gemini-2.0-flash"  # Default model
+    model_name: str = "gemini/gemini-2.0-flash"  # Default model
     max_tokens: Optional[int] = None
     split_on_page: Optional[bool] = True
     requests_per_minute: Optional[int] = 5
@@ -60,10 +60,10 @@ class PdfParsingConfig(BaseParsingConfig):
         "unstructured",
         "pdf2image",
         "markitdown",
-        "gemini",
+        "llm-pdf-parser",
         "marker",
     ] = "pymupdf4llm"
-    gemini_config: Optional[GeminiConfig] = None
+    llm_parser_config: Optional[LLMPdfParserConfig] = None
     marker_config: Optional[MarkerConfig] = None
     @root_validator(pre=True)
@@ -71,10 +71,10 @@ class PdfParsingConfig(BaseParsingConfig):
         """Ensure correct config is set based on library selection."""
         library = values.get("library")
-        if library == "gemini":
-            values.setdefault("gemini_config", GeminiConfig())
+        if library == "llm-pdf-parser":
+            values.setdefault("llm_parser_config", LLMPdfParserConfig())
         else:
-            values["gemini_config"] = None
+            values["llm_parser_config"] = None
         if library == "marker":
             values.setdefault("marker_config", MarkerConfig())

{langroid-0.50.12.dist-info → langroid-0.51.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: langroid
-Version: 0.50.12
+Version: 0.51.1
 Summary: Harness LLMs with Multi-Agent Programming
 Author-email: Prasad Chalasani <pchalasani@gmail.com>
 License: MIT

{langroid-0.50.12.dist-info → langroid-0.51.1.dist-info}/RECORD RENAMED Viewed

@@ -82,11 +82,11 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
 langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
 langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
 langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
-langroid/parsing/document_parser.py,sha256=XihXwhp--Nxhb8xoh6wth_isJCGUROKiVr3rPDOJodU,54359
+langroid/parsing/document_parser.py,sha256=7_pHu-_yQOETtDATv5VRdVSvac9kJRuZiwQ6EbJqJ_o,57403
 langroid/parsing/md_parser.py,sha256=JUgsUpCaeAuBndmtDaJR9HMZaje1gmtXtaLXJHst3i8,21340
 langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
 langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
-langroid/parsing/parser.py,sha256=YPE6X6efimz2bYbardrhHHKw7V1LZvq-vF0q5p5XzOk,15387
+langroid/parsing/parser.py,sha256=Tbe1mQ7wp6GVx2xMWv1raIkpepTN0qNrqOxakWY6Zkc,15437
 langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
 langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
 langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
@@ -129,7 +129,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
 langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
 langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
 langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
-langroid-0.50.12.dist-info/METADATA,sha256=b1vQBIkydfimg9r80ud7w07d7540XJAdhpegeqAPPTw,63642
-langroid-0.50.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-langroid-0.50.12.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
-langroid-0.50.12.dist-info/RECORD,,
+langroid-0.51.1.dist-info/METADATA,sha256=9E0M5JzLk_fuMOLH918i7fIBwWKMm1O6J3VY8DoG3NM,63641
+langroid-0.51.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+langroid-0.51.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
+langroid-0.51.1.dist-info/RECORD,,

{langroid-0.50.12.dist-info → langroid-0.51.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{langroid-0.50.12.dist-info → langroid-0.51.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

langroid 0.50.12__py3-none-any.whl → 0.51.1__py3-none-any.whl

langroid 0.50.12py3-none-any.whl → 0.51.1py3-none-any.whl