langroid 0.50.12__py3-none-any.whl → 0.51.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import base64
3
4
  import itertools
4
5
  import logging
5
6
  import os
@@ -148,8 +149,8 @@ class DocumentParser(Parser):
148
149
  return UnstructuredPDFParser(source, config)
149
150
  elif config.pdf.library == "pdf2image":
150
151
  return ImagePdfParser(source, config)
151
- elif config.pdf.library == "gemini":
152
- return GeminiPdfParser(source, config)
152
+ elif config.pdf.library == "llm-pdf-parser":
153
+ return LLMPdfParser(source, config)
153
154
  elif config.pdf.library == "marker":
154
155
  return MarkerPdfParser(source, config)
155
156
  else:
@@ -993,13 +994,13 @@ class MarkitdownPPTXParser(DocumentParser):
993
994
  )
994
995
 
995
996
 
996
- class GeminiPdfParser(DocumentParser):
997
+ class LLMPdfParser(DocumentParser):
997
998
  """
998
- This class converts PDFs to Markdown using Gemini multimodal LLMs.
999
+ This class converts PDFs to Markdown using multimodal LLMs.
999
1000
 
1000
1001
  It extracts pages, converts them with the LLM (replacing images with
1001
1002
  detailed descriptions), and outputs Markdown page by page. The
1002
- conversion follows `GEMINI_SYSTEM_INSTRUCTION`. It employs
1003
+ conversion follows `LLM_PDF_MD_SYSTEM_INSTRUCTION`. It employs
1003
1004
  multiprocessing for speed, async requests with rate limiting, and
1004
1005
  handles errors.
1005
1006
 
@@ -1008,9 +1009,9 @@ class GeminiPdfParser(DocumentParser):
1008
1009
  """
1009
1010
 
1010
1011
  DEFAULT_MAX_TOKENS = 7000
1011
- OUTPUT_DIR = Path(".gemini_pdfparser") # Fixed output directory
1012
+ OUTPUT_DIR = Path(".llm_pdfparser") # Fixed output directory
1012
1013
 
1013
- GEMINI_SYSTEM_INSTRUCTION = """
1014
+ LLM_PDF_MD_SYSTEM_INSTRUCTION = """
1014
1015
  ### **Convert PDF to Markdown**
1015
1016
  1. **Text:**
1016
1017
  * Preserve structure, formatting (**bold**, *italic*), lists, and indentation.
@@ -1035,11 +1036,11 @@ class GeminiPdfParser(DocumentParser):
1035
1036
 
1036
1037
  def __init__(self, source: Union[str, bytes], config: ParsingConfig):
1037
1038
  super().__init__(source, config)
1038
- if not config.pdf.gemini_config:
1039
+ if not config.pdf.llm_parser_config:
1039
1040
  raise ValueError(
1040
- "GeminiPdfParser requires a Gemini-based config in pdf parsing config"
1041
+ "LLMPdfParser requires a llm-based config in pdf parsing config"
1041
1042
  )
1042
- self.model_name = config.pdf.gemini_config.model_name
1043
+ self.model_name = config.pdf.llm_parser_config.model_name
1043
1044
 
1044
1045
  # Ensure output directory exists
1045
1046
  self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
@@ -1058,7 +1059,9 @@ class GeminiPdfParser(DocumentParser):
1058
1059
  temp_file.close()
1059
1060
  self.output_filename = Path(temp_file.name)
1060
1061
 
1061
- self.max_tokens = config.pdf.gemini_config.max_tokens or self.DEFAULT_MAX_TOKENS
1062
+ self.max_tokens = (
1063
+ config.pdf.llm_parser_config.max_tokens or self.DEFAULT_MAX_TOKENS
1064
+ )
1062
1065
 
1063
1066
  """
1064
1067
  If True, each PDF page is processed as a separate chunk,
@@ -1066,12 +1069,12 @@ class GeminiPdfParser(DocumentParser):
1066
1069
  grouped into chunks based on `max_token_limit` before being sent
1067
1070
  to the LLM.
1068
1071
  """
1069
- self.split_on_page = config.pdf.gemini_config.split_on_page or False
1072
+ self.split_on_page = config.pdf.llm_parser_config.split_on_page or False
1070
1073
 
1071
1074
  # Rate limiting parameters
1072
1075
  import asyncio
1073
1076
 
1074
- self.requests_per_minute = config.pdf.gemini_config.requests_per_minute or 5
1077
+ self.requests_per_minute = config.pdf.llm_parser_config.requests_per_minute or 5
1075
1078
 
1076
1079
  """
1077
1080
  A semaphore to control the number of concurrent requests to the LLM,
@@ -1175,7 +1178,7 @@ class GeminiPdfParser(DocumentParser):
1175
1178
  "page_numbers": page_numbers, # List of page numbers in this chunk
1176
1179
  }
1177
1180
 
1178
- def _prepare_pdf_chunks_for_gemini(
1181
+ def _prepare_pdf_chunks_for_llm(
1179
1182
  self,
1180
1183
  num_workers: Optional[int] = None,
1181
1184
  max_tokens: int = DEFAULT_MAX_TOKENS,
@@ -1198,37 +1201,92 @@ class GeminiPdfParser(DocumentParser):
1198
1201
  pdf_chunks = pool.map(self._merge_pages_into_pdf_with_metadata, chunks)
1199
1202
  return pdf_chunks
1200
1203
 
1201
- async def _send_chunk_to_gemini(
1202
- self, chunk: Dict[str, Any], gemini_api_key: str
1203
- ) -> str:
1204
+ async def _send_chunk_to_llm(self, chunk: Dict[str, Any]) -> str:
1204
1205
  """
1205
- Sends a PDF chunk to the Gemini API and returns the response text.
1206
+ Sends a PDF chunk to the LLM API and returns the response text.
1206
1207
  Uses retries with exponential backoff to handle transient failures.
1207
1208
  """
1208
1209
  import asyncio
1209
1210
  import logging
1210
1211
 
1211
- from google import genai
1212
- from google.genai import types
1212
+ from langroid.language_models.openai_gpt import OpenAIGPT, OpenAIGPTConfig
1213
1213
 
1214
1214
  async with self.semaphore: # Limit concurrent API requests
1215
1215
  for attempt in range(self.max_retries):
1216
1216
  try:
1217
- client = genai.Client(api_key=gemini_api_key)
1217
+ llm_config = OpenAIGPTConfig(
1218
+ chat_model=self.model_name,
1219
+ max_output_tokens=self.max_tokens,
1220
+ )
1221
+ llm = OpenAIGPT(config=llm_config)
1222
+ base64_string = base64.b64encode(chunk["pdf_bytes"]).decode("utf-8")
1223
+ data_uri = f"data:application/pdf;base64,{base64_string}"
1224
+ if "gemini" in self.model_name.lower():
1225
+ file_content = dict(
1226
+ type="image_url",
1227
+ image_url=dict(url=data_uri),
1228
+ )
1229
+ elif "claude" in self.model_name.lower() and llm.is_litellm_proxy:
1230
+ file_content = dict(
1231
+ type="file",
1232
+ file=dict(
1233
+ file_data=data_uri,
1234
+ ),
1235
+ )
1236
+ else:
1237
+ if not llm.is_openai_chat_model():
1238
+ logger.warning(
1239
+ f"""
1240
+ File uploads may not be supported for this model
1241
+ {self.model_name}. But attempting to
1242
+ use OpenAI-like file upload.
1243
+ """,
1244
+ )
1245
+ file_content = dict(
1246
+ type="file",
1247
+ file=dict(
1248
+ filename="dummy.pdf",
1249
+ file_data=data_uri,
1250
+ ),
1251
+ )
1218
1252
 
1219
1253
  # Send the request with PDF content and system instructions
1220
- response = await client.aio.models.generate_content(
1221
- model=self.model_name,
1222
- contents=[
1223
- types.Part.from_bytes(
1224
- data=chunk["pdf_bytes"], mime_type="application/pdf"
1254
+ response = await llm.async_client.chat.completions.create( # type: ignore
1255
+ model=self.model_name.split("/")[-1],
1256
+ messages=[
1257
+ dict(
1258
+ role="system",
1259
+ content="""
1260
+ You are an expert pdf -> markdown converter.
1261
+ Do NOT use any triple backquotes when you present the
1262
+ markdown content,like ```markdown etc.
1263
+ FAITHFULLY CONVERT THE PDF TO MARKDOWN,
1264
+ retaining ALL content as you find it.
1265
+ """,
1266
+ ),
1267
+ dict( # type: ignore
1268
+ role="user",
1269
+ content=[
1270
+ dict(
1271
+ type="text",
1272
+ text=self.LLM_PDF_MD_SYSTEM_INSTRUCTION,
1273
+ ),
1274
+ file_content,
1275
+ ],
1225
1276
  ),
1226
- self.GEMINI_SYSTEM_INSTRUCTION,
1227
1277
  ],
1228
1278
  )
1229
1279
 
1230
1280
  # Return extracted text if available
1231
- return str(response.text) if response.text else ""
1281
+ return (
1282
+ ""
1283
+ if (
1284
+ response is None
1285
+ or not hasattr(response, "choices")
1286
+ or not isinstance(response.choices, list)
1287
+ )
1288
+ else (response.choices[0].message.content)
1289
+ )
1232
1290
 
1233
1291
  except Exception as e:
1234
1292
  # Log error with page numbers for debugging
@@ -1251,28 +1309,24 @@ class GeminiPdfParser(DocumentParser):
1251
1309
  chunk.get("page_numbers", "Unknown"),
1252
1310
  )
1253
1311
  break
1254
-
1255
1312
  return "" # Return empty string if all retries fail
1256
1313
 
1257
- async def process_chunks(
1258
- self, chunks: List[Dict[str, Any]], api_key: str
1259
- ) -> List[str]:
1314
+ async def process_chunks(self, chunks: List[Dict[str, Any]]) -> List[str]:
1260
1315
  """
1261
- Processes PDF chunks by sending them to the Gemini API and
1316
+ Processes PDF chunks by sending them to the LLM API and
1262
1317
  collecting the results.
1263
1318
 
1264
1319
  Args:
1265
1320
  chunks: A list of dictionaries, where each dictionary represents
1266
1321
  a PDF chunk and contains the PDF data and page numbers.
1267
- api_key: The Gemini API key.
1268
1322
  """
1269
1323
  # To show nice progress bar
1270
1324
  from tqdm.asyncio import tqdm_asyncio
1271
1325
 
1272
- # Create a list of asynchronous tasks to send each chunk to Gemini.
1326
+ # Create a list of asynchronous tasks to send each chunk to the LLM.
1273
1327
  # Chunk in this case might be single page or group of pages returned
1274
1328
  # by prepare_pdf_chunks function
1275
- tasks = [self._send_chunk_to_gemini(chunk, api_key) for chunk in chunks]
1329
+ tasks = [self._send_chunk_to_llm(chunk) for chunk in chunks]
1276
1330
 
1277
1331
  # Gather the results from all tasks, allowing exceptions to be returned.
1278
1332
  # tqdm_asyncio is wrapper around asyncio.gather
@@ -1311,7 +1365,7 @@ class GeminiPdfParser(DocumentParser):
1311
1365
  def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
1312
1366
  """
1313
1367
  Iterates over the document pages, extracting content using the
1314
- Gemini API, saves them to a markdown file, and yields page numbers
1368
+ LLM API, saves them to a markdown file, and yields page numbers
1315
1369
  along with their corresponding content.
1316
1370
 
1317
1371
  Yields:
@@ -1319,14 +1373,8 @@ class GeminiPdfParser(DocumentParser):
1319
1373
  (int) and the page content (Any).
1320
1374
  """
1321
1375
  import asyncio
1322
- import os
1323
1376
 
1324
- # Load environment variables (e.g., GEMINI_API_KEY) from a .env file.
1325
1377
  load_dotenv()
1326
- gemini_api_key = os.getenv("GEMINI_API_KEY")
1327
- if not gemini_api_key:
1328
- raise ValueError("GEMINI_API_KEY not found in environment variables.")
1329
-
1330
1378
  try:
1331
1379
  # This involves extracting pages, grouping them according to the
1332
1380
  # `max_tokens` limit (if `split_on_page` is False), and
@@ -1335,18 +1383,16 @@ class GeminiPdfParser(DocumentParser):
1335
1383
  # PDF bytes and the associated page numbers or single page if
1336
1384
  # `split_on_page` is true
1337
1385
 
1338
- pdf_chunks = self._prepare_pdf_chunks_for_gemini(
1386
+ pdf_chunks = self._prepare_pdf_chunks_for_llm(
1339
1387
  num_workers=8,
1340
1388
  max_tokens=self.max_tokens,
1341
1389
  split_on_page=self.split_on_page,
1342
1390
  )
1343
1391
 
1344
1392
  # We asynchronously processes each chunk, sending it
1345
- # to Gemini and retrieving the Markdown output. It handles rate
1393
+ # to the LLM and retrieving the Markdown output. It handles rate
1346
1394
  # limiting and retries.
1347
- markdown_results = asyncio.run(
1348
- self.process_chunks(pdf_chunks, gemini_api_key)
1349
- )
1395
+ markdown_results = asyncio.run(self.process_chunks(pdf_chunks))
1350
1396
 
1351
1397
  # This file serves as an intermediate storage location for the
1352
1398
  # complete Markdown output.
@@ -36,10 +36,10 @@ class BaseParsingConfig(BaseSettings):
36
36
  extra = "ignore" # Ignore unknown settings
37
37
 
38
38
 
39
- class GeminiConfig(BaseSettings):
40
- """Configuration for Gemini-based parsing."""
39
+ class LLMPdfParserConfig(BaseSettings):
40
+ """Configuration for LLM-based parsing."""
41
41
 
42
- model_name: str = "gemini-2.0-flash" # Default model
42
+ model_name: str = "gemini/gemini-2.0-flash" # Default model
43
43
  max_tokens: Optional[int] = None
44
44
  split_on_page: Optional[bool] = True
45
45
  requests_per_minute: Optional[int] = 5
@@ -60,10 +60,10 @@ class PdfParsingConfig(BaseParsingConfig):
60
60
  "unstructured",
61
61
  "pdf2image",
62
62
  "markitdown",
63
- "gemini",
63
+ "llm-pdf-parser",
64
64
  "marker",
65
65
  ] = "pymupdf4llm"
66
- gemini_config: Optional[GeminiConfig] = None
66
+ llm_parser_config: Optional[LLMPdfParserConfig] = None
67
67
  marker_config: Optional[MarkerConfig] = None
68
68
 
69
69
  @root_validator(pre=True)
@@ -71,10 +71,10 @@ class PdfParsingConfig(BaseParsingConfig):
71
71
  """Ensure correct config is set based on library selection."""
72
72
  library = values.get("library")
73
73
 
74
- if library == "gemini":
75
- values.setdefault("gemini_config", GeminiConfig())
74
+ if library == "llm-pdf-parser":
75
+ values.setdefault("llm_parser_config", LLMPdfParserConfig())
76
76
  else:
77
- values["gemini_config"] = None
77
+ values["llm_parser_config"] = None
78
78
 
79
79
  if library == "marker":
80
80
  values.setdefault("marker_config", MarkerConfig())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.50.12
3
+ Version: 0.51.0
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -82,11 +82,11 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
82
82
  langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
83
83
  langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
84
84
  langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
85
- langroid/parsing/document_parser.py,sha256=XihXwhp--Nxhb8xoh6wth_isJCGUROKiVr3rPDOJodU,54359
85
+ langroid/parsing/document_parser.py,sha256=GPCxudLlA9rSjRjseRsoiKlm9f1nIlGTZjSmWKqWj0M,56764
86
86
  langroid/parsing/md_parser.py,sha256=JUgsUpCaeAuBndmtDaJR9HMZaje1gmtXtaLXJHst3i8,21340
87
87
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
88
88
  langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
89
- langroid/parsing/parser.py,sha256=YPE6X6efimz2bYbardrhHHKw7V1LZvq-vF0q5p5XzOk,15387
89
+ langroid/parsing/parser.py,sha256=Tbe1mQ7wp6GVx2xMWv1raIkpepTN0qNrqOxakWY6Zkc,15437
90
90
  langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
91
91
  langroid/parsing/repo_loader.py,sha256=NpysuyzRHvgL3F4BB_wGo5sCUnZ3FOlVCJmZ7CaUdbs,30202
92
92
  langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
@@ -129,7 +129,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
129
129
  langroid/vector_store/postgres.py,sha256=wHPtIi2qM4fhO4pMQr95pz1ZCe7dTb2hxl4VYspGZoA,16104
130
130
  langroid/vector_store/qdrantdb.py,sha256=O6dSBoDZ0jzfeVBd7LLvsXu083xs2fxXtPa9gGX3JX4,18443
131
131
  langroid/vector_store/weaviatedb.py,sha256=Yn8pg139gOy3zkaPfoTbMXEEBCiLiYa1MU5d_3UA1K4,11847
132
- langroid-0.50.12.dist-info/METADATA,sha256=b1vQBIkydfimg9r80ud7w07d7540XJAdhpegeqAPPTw,63642
133
- langroid-0.50.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
134
- langroid-0.50.12.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
135
- langroid-0.50.12.dist-info/RECORD,,
132
+ langroid-0.51.0.dist-info/METADATA,sha256=6VIQRRSez7ZiSVmPH-HCr3MSfxYZqUP-ppHwbdADzjo,63641
133
+ langroid-0.51.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
134
+ langroid-0.51.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
135
+ langroid-0.51.0.dist-info/RECORD,,