PyPI - lexoid - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

lexoid 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

lexoid/api.py +48 -8
lexoid/core/parse_type/llm_parser.py +34 -26
lexoid/core/parse_type/static_parser.py +41 -6
lexoid/core/utils.py +15 -11
{lexoid-0.1.11.dist-info → lexoid-0.1.12.dist-info}/METADATA +60 -20
lexoid-0.1.12.dist-info/RECORD +9 -0
lexoid-0.1.11.dist-info/RECORD +0 -9
{lexoid-0.1.11.dist-info → lexoid-0.1.12.dist-info}/LICENSE +0 -0
{lexoid-0.1.11.dist-info → lexoid-0.1.12.dist-info}/WHEEL +0 -0

lexoid/api.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 import os
 import re
 import tempfile
@@ -50,7 +51,8 @@ def parse_chunk(path: str, parser_type: ParserType, **kwargs) -> Dict:
             - token_usage: Dictionary containing token usage statistics
     """
     if parser_type == ParserType.AUTO:
-        parser_type = ParserType[router(path)]
+        router_priority = kwargs.get("router_priority", "speed")
+        parser_type = ParserType[router(path, router_priority)]
         logger.debug(f"Auto-detected parser type: {parser_type}")
     kwargs["start"] = (
@@ -80,7 +82,7 @@ def parse_chunk_list(
     """
     combined_segments = []
     raw_texts = []
-    token_usage = {"input": 0, "output": 0}
+    token_usage = {"input": 0, "output": 0, "image_count": 0}
     for file_path in file_paths:
         result = parse_chunk(file_path, parser_type, **kwargs)
         combined_segments.extend(result["segments"])
@@ -88,6 +90,7 @@ def parse_chunk_list(
         if "token_usage" in result:
             token_usage["input"] += result["token_usage"]["input"]
             token_usage["output"] += result["token_usage"]["output"]
+            token_usage["image_count"] += len(result["segments"])
     token_usage["total"] = token_usage["input"] + token_usage["output"]
     return {
@@ -135,14 +138,20 @@ def parse(
     if type(parser_type) == str:
         parser_type = ParserType[parser_type]
+    if (
+        path.lower().endswith((".doc", ".docx"))
+        and parser_type != ParserType.STATIC_PARSE
+    ):
+        as_pdf = True
+    if path.lower().endswith(".xlsx") and parser_type == ParserType.LLM_PARSE:
+        logger.warning("LLM_PARSE does not support .xlsx files. Using STATIC_PARSE.")
+        parser_type = ParserType.STATIC_PARSE
+    if path.lower().endswith(".pptx") and parser_type == ParserType.LLM_PARSE:
+        logger.warning("LLM_PARSE does not support .pptx files. Using STATIC_PARSE.")
+        parser_type = ParserType.STATIC_PARSE
     with tempfile.TemporaryDirectory() as temp_dir:
-        if (
-            path.lower().endswith((".doc", ".docx"))
-            and parser_type != ParserType.STATIC_PARSE
-        ):
-            as_pdf = True
+        kwargs["temp_dir"] = temp_dir
         if path.startswith(("http://", "https://")):
             kwargs["url"] = path
             download_dir = kwargs.get("save_dir", os.path.join(temp_dir, "downloads/"))
@@ -210,9 +219,40 @@ def parse(
                 "token_usage": {
                     "input": sum(r["token_usage"]["input"] for r in chunk_results),
                     "output": sum(r["token_usage"]["output"] for r in chunk_results),
+                    "image_count": sum(
+                        r["token_usage"]["image_count"] for r in chunk_results
+                    ),
                     "total": sum(r["token_usage"]["total"] for r in chunk_results),
                 },
             }
+            if "api_cost_mapping" in kwargs:
+                api_cost_mapping = kwargs["api_cost_mapping"]
+                if isinstance(api_cost_mapping, dict):
+                    api_cost_mapping = api_cost_mapping
+                elif isinstance(api_cost_mapping, str) and os.path.exists(
+                    api_cost_mapping
+                ):
+                    with open(api_cost_mapping, "r") as f:
+                        api_cost_mapping = json.load(f)
+                else:
+                    raise ValueError(f"Unsupported API cost value: {api_cost_mapping}.")
+                api_cost = api_cost_mapping.get(
+                    kwargs.get("model", "gemini-2.0-flash"), None
+                )
+                if api_cost:
+                    token_usage = result["token_usage"]
+                    token_cost = {
+                        "input": token_usage["input"] * api_cost["input"] / 1_000_000
+                        + api_cost.get("input-image", 0) * token_usage["image_count"],
+                        "output": token_usage["output"]
+                        * api_cost["output"]
+                        / 1_000_000,
+                    }
+                    token_cost["total"] = token_cost["input"] + token_cost["output"]
+                    result["token_cost"] = token_cost
             if as_pdf:
                 result["pdf_path"] = path

lexoid/core/parse_type/llm_parser.py CHANGED Viewed

@@ -31,6 +31,7 @@ def retry_on_http_error(func):
             logger.error(f"HTTPError encountered: {e}. Retrying in 10 seconds...")
             time.sleep(10)
             try:
+                logger.debug(f"Retry {func.__name__}")
                 return func(*args, **kwargs)
             except HTTPError as e:
                 logger.error(f"Retry failed: {e}")
@@ -49,6 +50,8 @@ def retry_on_http_error(func):
 @retry_on_http_error
 def parse_llm_doc(path: str, **kwargs) -> List[Dict] | str:
+    if "api_provider" in kwargs and kwargs["api_provider"]:
+        return parse_with_api(path, api=kwargs["api_provider"], **kwargs)
     if "model" not in kwargs:
         kwargs["model"] = "gemini-2.0-flash"
     model = kwargs.get("model")
@@ -57,9 +60,11 @@ def parse_llm_doc(path: str, **kwargs) -> List[Dict] | str:
     if model.startswith("gpt"):
         return parse_with_api(path, api="openai", **kwargs)
     if model.startswith("meta-llama"):
-        if model.endswith("Turbo") or model == "meta-llama/Llama-Vision-Free":
+        if "Turbo" in model or model == "meta-llama/Llama-Vision-Free":
             return parse_with_api(path, api="together", **kwargs)
         return parse_with_api(path, api="huggingface", **kwargs)
+    if any(model.startswith(prefix) for prefix in ["microsoft", "google", "qwen"]):
+        return parse_with_api(path, api="openrouter", **kwargs)
     raise ValueError(f"Unsupported model: {model}")
@@ -81,20 +86,20 @@ def parse_with_gemini(path: str, **kwargs) -> List[Dict] | str:
             file_content = file.read()
         base64_file = base64.b64encode(file_content).decode("utf-8")
-    # Ideally, we do this ourselves. But, for now this might be a good enough.
-    custom_instruction = f"""- Total number of pages: {kwargs["pages_per_split_"]}. {INSTRUCTIONS_ADD_PG_BREAK}"""
-    if kwargs["pages_per_split_"] == 1:
-        custom_instruction = ""
+    if "system_prompt" in kwargs:
+        prompt = kwargs["system_prompt"]
+    else:
+        # Ideally, we do this ourselves. But, for now this might be a good enough.
+        custom_instruction = f"""- Total number of pages: {kwargs["pages_per_split_"]}. {INSTRUCTIONS_ADD_PG_BREAK}"""
+        if kwargs["pages_per_split_"] == 1:
+            custom_instruction = ""
+        prompt = PARSER_PROMPT.format(custom_instructions=custom_instruction)
     payload = {
         "contents": [
             {
                 "parts": [
-                    {
-                        "text": PARSER_PROMPT.format(
-                            custom_instructions=custom_instruction
-                        )
-                    },
+                    {"text": prompt},
                     {"inline_data": {"mime_type": mime_type, "data": base64_file}},
                 ]
             }
@@ -105,9 +110,11 @@ def parse_with_gemini(path: str, **kwargs) -> List[Dict] | str:
     }
     headers = {"Content-Type": "application/json"}
-    response = requests.post(url, json=payload, headers=headers)
-    response.raise_for_status()
+    try:
+        response = requests.post(url, json=payload, headers=headers, timeout=120)
+        response.raise_for_status()
+    except requests.Timeout as e:
+        raise HTTPError(f"Timeout error occurred: {e}")
     result = response.json()
@@ -130,7 +137,7 @@ def parse_with_gemini(path: str, **kwargs) -> List[Dict] | str:
     total_tokens = input_tokens + output_tokens
     return {
-        "raw": combined_text,
+        "raw": combined_text.replace("<page-break>", "\n\n"),
         "segments": [
             {"metadata": {"page": kwargs.get("start", 0) + page_no}, "content": page}
             for page_no, page in enumerate(combined_text.split("<page-break>"), start=1)
@@ -181,6 +188,10 @@ def parse_with_api(path: str, api: str, **kwargs) -> List[Dict] | str:
             token=os.environ["HUGGINGFACEHUB_API_TOKEN"]
         ),
         "together": lambda: Together(),
+        "openrouter": lambda: OpenAI(
+            base_url="https://openrouter.ai/api/v1",
+            api_key=os.environ["OPENROUTER_API_KEY"],
+        ),
     }
     assert api in clients, f"Unsupported API: {api}"
     logger.debug(f"Parsing with {api} API and model {kwargs['model']}")
@@ -206,35 +217,32 @@ def parse_with_api(path: str, api: str, **kwargs) -> List[Dict] | str:
     # API-specific message formatting
     def get_messages(page_num: int, image_url: str) -> List[Dict]:
-        base_message = {
-            "type": "text",
-            "text": LLAMA_PARSER_PROMPT,
-        }
         image_message = {
             "type": "image_url",
             "image_url": {"url": image_url},
         }
         if api == "openai":
+            system_prompt = kwargs.get(
+                "system_prompt", PARSER_PROMPT.format(custom_instructions="")
+            )
+            user_prompt = kwargs.get("user_prompt", OPENAI_USER_PROMPT)
             return [
                 {
                     "role": "system",
-                    "content": PARSER_PROMPT.format(
-                        custom_instructions=INSTRUCTIONS_ADD_PG_BREAK
-                    ),
+                    "content": system_prompt,
                 },
                 {
                     "role": "user",
                     "content": [
-                        {
-                            "type": "text",
-                            "text": f"{OPENAI_USER_PROMPT} (Page {page_num + 1})",
-                        },
+                        {"type": "text", "text": user_prompt},
                         image_message,
                     ],
                 },
             ]
         else:
+            prompt = kwargs.get("system_prompt", LLAMA_PARSER_PROMPT)
+            base_message = {"type": "text", "text": prompt}
             return [
                 {
                     "role": "user",
@@ -283,7 +291,7 @@ def parse_with_api(path: str, api: str, **kwargs) -> List[Dict] | str:
     # Sort results by page number and combine
     all_results.sort(key=lambda x: x[0])
     all_texts = [text for _, text, _, _, _ in all_results]
-    combined_text = "<page-break>".join(all_texts)
+    combined_text = "\n\n".join(all_texts)
     return {
         "raw": combined_text,

lexoid/core/parse_type/static_parser.py CHANGED Viewed

@@ -1,12 +1,23 @@
+import os
 import tempfile
+from time import time
+from typing import List, Dict
 import pandas as pd
 import pdfplumber
-from typing import List, Dict
-from lexoid.core.utils import get_file_type, get_uri_rect, html_to_markdown, split_pdf
+from docx import Document
 from pdfminer.high_level import extract_pages
 from pdfminer.layout import LTTextContainer
 from pdfplumber.utils import get_bbox_overlap, obj_to_bbox
-from docx import Document
+from pptx2md import convert, ConversionConfig
+from lexoid.core.utils import (
+    get_file_type,
+    get_uri_rect,
+    html_to_markdown,
+    split_pdf,
+    split_md_by_headings,
+)
 def parse_static_doc(path: str, **kwargs) -> Dict:
@@ -47,8 +58,11 @@ def parse_static_doc(path: str, **kwargs) -> Dict:
                 "parent_title": kwargs.get("parent_title", ""),
                 "recursive_docs": [],
             }
-    elif file_type == "text/csv":
-        df = pd.read_csv(path)
+    elif file_type == "text/csv" or "spreadsheet" in file_type:
+        if "spreadsheet" in file_type:
+            df = pd.read_excel(path)
+        else:
+            df = pd.read_csv(path)
         content = df.to_markdown(index=False)
         return {
             "raw": content,
@@ -58,6 +72,27 @@ def parse_static_doc(path: str, **kwargs) -> Dict:
             "parent_title": kwargs.get("parent_title", ""),
             "recursive_docs": [],
         }
+    elif "presentation" in file_type:
+        md_path = os.path.join(kwargs["temp_dir"], f"{int(time())}.md")
+        convert(
+            ConversionConfig(
+                pptx_path=path,
+                output_path=md_path,
+                image_dir=None,
+                disable_image=True,
+                disable_notes=True,
+            )
+        )
+        with open(md_path, "r") as f:
+            content = f.read()
+        return {
+            "raw": content,
+            "segments": split_md_by_headings(content, "#"),
+            "title": kwargs["title"],
+            "url": kwargs.get("url", ""),
+            "parent_title": kwargs.get("parent_title", ""),
+            "recursive_docs": [],
+        }
     else:
         raise ValueError(f"Unsupported file type: {file_type}")
@@ -389,7 +424,7 @@ def parse_with_pdfplumber(path: str, **kwargs) -> Dict:
     ]
     return {
-        "raw": "<page-break>".join(page_texts),
+        "raw": "\n\n".join(page_texts),
         "segments": segments,
         "title": kwargs["title"],
         "url": kwargs.get("url", ""),

lexoid/core/utils.py CHANGED Viewed

@@ -46,7 +46,7 @@ def split_pdf(input_path: str, output_dir: str, pages_per_split: int):
 def create_sub_pdf(
-    input_path: str, output_path: str, page_nums: Optional[tuple[int, ...]|int] = None
+    input_path: str, output_path: str, page_nums: Optional[tuple[int, ...] | int] = None
 ) -> str:
     if isinstance(page_nums, int):
         page_nums = (page_nums,)
@@ -106,6 +106,8 @@ def is_supported_file_type(path: str) -> bool:
     if (
         file_type == "application/pdf"
         or "wordprocessing" in file_type
+        or "spreadsheet" in file_type
+        or "presentation" in file_type
         or file_type.startswith("image/")
         or file_type.startswith("text")
     ):
@@ -217,7 +219,7 @@ def split_md_by_headings(markdown_content: str, heading_pattern: str) -> List[Di
         pattern = r"^([^\n]+)\n-+$"
         sections = re.split(pattern, markdown_content, flags=re.MULTILINE)
         # Remove empty sections and strip whitespace
-        sections = [section.strip() for section in sections if section.strip()]
+        sections = [section.strip() for section in sections]
         # Handle content before first heading if it exists
         if sections and not re.match(r"^[^\n]+\n-+$", sections[0], re.MULTILINE):
@@ -244,7 +246,7 @@ def split_md_by_headings(markdown_content: str, heading_pattern: str) -> List[Di
         headings = re.findall(regex, markdown_content, flags=re.MULTILINE)
         # Remove empty sections and strip whitespace
-        sections = [section.strip() for section in sections if section.strip()]
+        sections = [section.strip() for section in sections]
         # Handle content before first heading if it exists
         if len(sections) > len(headings):
@@ -299,6 +301,7 @@ def html_to_markdown(html: str, title: str, url: str) -> str:
     return content
 def get_webpage_soup(url: str) -> BeautifulSoup:
     try:
         from playwright.async_api import async_playwright
@@ -473,7 +476,10 @@ def save_webpage_as_pdf(url: str, output_path: str) -> str:
     Returns:
         str: The path to the saved PDF file.
     """
-    app = QApplication(sys.argv)
+    if not QApplication.instance():
+        app = QApplication(sys.argv)
+    else:
+        app = QApplication.instance()
     web = QWebEngineView()
     web.load(QUrl(url))
@@ -546,7 +552,7 @@ def has_hyperlink_in_pdf(path: str):
     )
-def router(path: str, priority: str = "accuracy") -> str:
+def router(path: str, priority: str = "speed") -> str:
     """
     Routes the file path to the appropriate parser based on the file type.
@@ -555,9 +561,9 @@ def router(path: str, priority: str = "accuracy") -> str:
         priority (str): The priority for routing: "accuracy" (preference to LLM_PARSE) or "speed" (preference to STATIC_PARSE).
     """
     file_type = get_file_type(path)
-    if file_type.startswith("text/"):
+    if file_type.startswith("text/") or "spreadsheet" in file_type or "presentation" in file_type:
         return "STATIC_PARSE"
     if priority == "accuracy":
         # If the file is a PDF without images but has hyperlinks, use STATIC_PARSE
         # Otherwise, use LLM_PARSE
@@ -571,13 +577,11 @@ def router(path: str, priority: str = "accuracy") -> str:
     else:
         # If the file is a PDF without images, use STATIC_PARSE
         # Otherwise, use LLM_PARSE
-        if (
-            file_type == "application/pdf"
-            and not has_image_in_pdf(path)
-        ):
+        if file_type == "application/pdf" and not has_image_in_pdf(path):
             return "STATIC_PARSE"
         return "LLM_PARSE"
 def convert_doc_to_pdf(input_path: str, temp_dir: str) -> str:
     temp_path = os.path.join(
         temp_dir, os.path.splitext(os.path.basename(input_path))[0] + ".pdf"

{lexoid-0.1.11.dist-info → lexoid-0.1.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: lexoid
-Version: 0.1.11
+Version: 0.1.12
 Summary:
 Requires-Python: >=3.10,<4.0
 Classifier: Programming Language :: Python :: 3
@@ -18,10 +18,12 @@ Requires-Dist: markdownify (>=0.13.1,<0.14.0)
 Requires-Dist: nest-asyncio (>=1.6.0,<2.0.0)
 Requires-Dist: openai (>=1.47.0,<2.0.0)
 Requires-Dist: opencv-python (>=4.10.0.84,<5.0.0.0)
+Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
 Requires-Dist: pandas (>=2.2.3,<3.0.0)
 Requires-Dist: pdfplumber (>=0.11.4,<0.12.0)
 Requires-Dist: pikepdf (>=9.3.0,<10.0.0)
 Requires-Dist: playwright (>=1.49.0,<2.0.0)
+Requires-Dist: pptx2md (>=2.0.6,<3.0.0)
 Requires-Dist: pypdfium2 (>=4.30.0,<5.0.0)
 Requires-Dist: pyqt5 (>=5.15.11,<6.0.0) ; platform_system != "debian"
 Requires-Dist: pyqtwebengine (>=5.15.7,<6.0.0) ; platform_system != "debian"
@@ -31,7 +33,20 @@ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
 Requires-Dist: together (>=1.4.0,<2.0.0)
 Description-Content-Type: text/markdown
-# Lexoid
+<div align="center">
+```
+ ___      _______  __   __  _______  ___   ______
+|   |    |       ||  |_|  ||       ||   | |      |
+|   |    |    ___||       ||   _   ||   | |  _    |
+|   |    |   |___ |       ||  | |  ||   | | | |   |
+|   |___ |    ___| |     | |  |_|  ||   | | |_|   |
+|       ||   |___ |   _   ||       ||   | |       |
+|_______||_______||__| |__||_______||___| |______|
+```
+</div>
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/oidlabs-com/Lexoid/blob/main/examples/example_notebook_colab.ipynb)
 [![GitHub license](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://github.com/oidlabs-com/Lexoid/blob/main/LICENSE)
@@ -43,54 +58,67 @@ Lexoid is an efficient document parsing library that supports both LLM-based and
 [Documentation](https://oidlabs-com.github.io/Lexoid/)
 ## Motivation:
 - Use the multi-modal advancement of LLMs
 - Enable convenience for users
 - Collaborate with a permissive license
 ## Installation
 ### Installing with pip
 ```
 pip install lexoid
 ```
 To use LLM-based parsing, define the following environment variables or create a `.env` file with the following definitions
 ```
 OPENAI_API_KEY=""
 GOOGLE_API_KEY=""
 ```
 Optionally, to use `Playwright` for retrieving web content (instead of the `requests` library):
 ```
 playwright install --with-deps --only-shell chromium
 ```
 ### Building `.whl` from source
 ```
 make build
 ```
 ### Creating a local installation
 To install dependencies:
 ```
 make install
 ```
 or, to install with dev-dependencies:
 ```
 make dev
 ```
 To activate virtual environment:
 ```
 source .venv/bin/activate
 ```
 ## Usage
 [Example Notebook](https://github.com/oidlabs-com/Lexoid/blob/main/examples/example_notebook.ipynb)
-[Example Colab Notebook](https://drive.google.com/file/d/1v9R6VOUp9CEGalgZGeg5G57XzHqh_tB6/view?usp=sharing)
+[Example Colab Notebook](https://colab.research.google.com/github/oidlabs-com/Lexoid/blob/main/examples/example_notebook_colab.ipynb)
 Here's a quick example to parse documents using Lexoid:
-``` python
+```python
 from lexoid.api import parse
 from lexoid.api import ParserType
@@ -103,30 +131,42 @@ print(parsed_md)
 ```
 ### Parameters
 - path (str): The file path or URL.
 - parser_type (str, optional): The type of parser to use ("LLM_PARSE" or "STATIC_PARSE"). Defaults to "AUTO".
 - pages_per_split (int, optional): Number of pages per split for chunking. Defaults to 4.
 - max_threads (int, optional): Maximum number of threads for parallel processing. Defaults to 4.
-- **kwargs: Additional arguments for the parser.
+- \*\*kwargs: Additional arguments for the parser.
+## Supported API Providers
+* Google
+* OpenAI
+* Hugging Face
+* Together AI
+* OpenRouter
 ## Benchmark
 Results aggregated across 5 iterations each for 5 documents.
 _Note:_ Benchmarks are currently done in the zero-shot setting.
-| Rank | Model | Mean Similarity | Std. Dev. | Time (s) |
-|---|---|---|---|---|
-| 1 | gemini-2.0-flash | 0.829 | 0.102 | 7.41 |
-| 2 | gemini-2.0-flash-001 | 0.814 | 0.176 | 6.85 |
-| 3 | gemini-1.5-flash | 0.797 | 0.143 | 9.54 |
-| 4 | gemini-2.0-pro-exp | 0.764 | 0.227 | 11.95 |
-| 5 | gemini-2.0-flash-thinking-exp | 0.746 | 0.266 | 10.46 |
-| 6 | gemini-1.5-pro | 0.732 | 0.265 | 11.44 |
-| 7 | gpt-4o | 0.687 | 0.247 | 10.16 |
-| 8 | gpt-4o-mini | 0.642 | 0.213 | 9.71 |
-| 9 | gemini-1.5-flash-8b | 0.551 | 0.223 | 3.91 |
-| 10 | Llama-Vision-Free (via Together AI) | 0.531 | 0.198 | 6.93 |
-| 11 | Llama-3.2-11B-Vision-Instruct-Turbo (via Together AI) | 0.524 | 0.192 | 3.68 |
-| 12 | Llama-3.2-90B-Vision-Instruct-Turbo (via Together AI) | 0.461 | 0.306 | 19.26 |
-| 13 | Llama-3.2-11B-Vision-Instruct (via Hugging Face) | 0.451 | 0.257 | 4.54 |
+| Rank | Model                                                 | Mean Similarity | Std. Dev. | Time (s) | Cost($)  |
+| ---- | ----------------------------------------------------- | --------------- | --------- | -------- | -------- |
+| 1    | gemini-2.0-flash                                      | 0.829           | 0.102     | 7.41     | 0.000480 |
+| 2    | gemini-2.0-flash-001                                  | 0.814           | 0.176     | 6.85     | 0.000421 |
+| 3    | gemini-1.5-flash                                      | 0.797           | 0.143     | 9.54     | 0.000238 |
+| 4    | gemini-2.0-pro-exp                                    | 0.764           | 0.227     | 11.95    |   TBA    |
+| 5    | gemini-2.0-flash-thinking-exp                         | 0.746           | 0.266     | 10.46    |   TBA    |
+| 6    | gemini-1.5-pro                                        | 0.732           | 0.265     | 11.44    | 0.003332 |
+| 7    | gpt-4o                                                | 0.687           | 0.247     | 10.16    | 0.004736 |
+| 8    | gpt-4o-mini                                           | 0.642           | 0.213     | 9.71     | 0.000275 |
+| 9    | gemma-3-27b-it (via OpenRouter)                       | 0.628           | 0.299     | 18.79    | 0.000096 |
+| 10   | gemini-1.5-flash-8b                                   | 0.551           | 0.223     | 3.91     | 0.000055 |
+| 11   | Llama-Vision-Free (via Together AI)                   | 0.531           | 0.198     | 6.93     | 0        |
+| 12   | Llama-3.2-11B-Vision-Instruct-Turbo (via Together AI) | 0.524           | 0.192     | 3.68     | 0.000060 |
+| 13   | qwen/qwen-2.5-vl-7b-instruct (via OpenRouter)         | 0.482           | 0.209     | 11.53    | 0.000052 |
+| 14   | Llama-3.2-90B-Vision-Instruct-Turbo (via Together AI) | 0.461           | 0.306     | 19.26    | 0.000426 |
+| 15   | Llama-3.2-11B-Vision-Instruct (via Hugging Face)      | 0.451           | 0.257     | 4.54     |   0      |
+| 16   | microsoft/phi-4-multimodal-instruct (via OpenRouter)  | 0.366           | 0.287     | 10.80    | 0.000019 |

lexoid-0.1.12.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+lexoid/api.py,sha256=lTkUcbGML29JrWJv4pE_ZqbzeJuHUE8b6OnijoLBEfU,11350
+lexoid/core/parse_type/llm_parser.py,sha256=rrc1Lwp-6ZAi8IVp3672mHAHUs1JefhT2rnYyQ1gA5E,11292
+lexoid/core/parse_type/static_parser.py,sha256=v4GWUmZVBBIF9TnbkhPBt2gspk0Oq_ujtNGnXZHLBr8,15055
+lexoid/core/prompt_templates.py,sha256=svSMH0yhm6ZjtOeTtUUEiCYi81ebVY9EZKPKP0Q921o,6311
+lexoid/core/utils.py,sha256=6s24X3-4Y57u70HzjIS798Tg8qx6Z3mLATf4xtENE-8,19718
+lexoid-0.1.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+lexoid-0.1.12.dist-info/METADATA,sha256=XMHFMqwDj2DgSaZcZjXU881NxdPsRGBAsUyPyRsJvyU,6809
+lexoid-0.1.12.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
+lexoid-0.1.12.dist-info/RECORD,,

lexoid-0.1.11.dist-info/RECORD DELETED Viewed

@@ -1,9 +0,0 @@
-lexoid/api.py,sha256=CIZBNvh38PJbD0OwK1Mp0qqkWxkAEBw2L_FkoCmagXA,9288
-lexoid/core/parse_type/llm_parser.py,sha256=XfsN6RAtb14p31U2jL-9QyRKpkNAGXXiK3urWJIFi2U,10625
-lexoid/core/parse_type/static_parser.py,sha256=j3khirFnXq2j3IFEu0TsYWA5sHMpe_oQLFM9Uv3hScM,14100
-lexoid/core/prompt_templates.py,sha256=svSMH0yhm6ZjtOeTtUUEiCYi81ebVY9EZKPKP0Q921o,6311
-lexoid/core/utils.py,sha256=1If_3XoUhPQRY5XMzLJBsHdyjtLgD734eYBYvsg8w5Y,19569
-lexoid-0.1.11.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-lexoid-0.1.11.dist-info/METADATA,sha256=kipDZLbUz_wkJUrzPGH2VppBNMHmaJadHR5_BAqHgjU,4838
-lexoid-0.1.11.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
-lexoid-0.1.11.dist-info/RECORD,,

{lexoid-0.1.11.dist-info → lexoid-0.1.12.dist-info}/LICENSE RENAMED Viewed

File without changes

{lexoid-0.1.11.dist-info → lexoid-0.1.12.dist-info}/WHEEL RENAMED Viewed

File without changes

lexoid 0.1.11__py3-none-any.whl → 0.1.12__py3-none-any.whl

lexoid 0.1.11py3-none-any.whl → 0.1.12py3-none-any.whl