langroid 0.42.10__py3-none-any.whl → 0.43.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1069,6 +1069,13 @@ class ChatAgent(Agent):
1069
1069
  was enabled, disables it for the tool, else triggers strict recovery.
1070
1070
  """
1071
1071
  self.tool_error = False
1072
+ most_recent_sent_by_llm = (
1073
+ len(self.message_history) > 0
1074
+ and self.message_history[-1].role == Role.ASSISTANT
1075
+ )
1076
+ was_llm = most_recent_sent_by_llm or (
1077
+ isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM
1078
+ )
1072
1079
  try:
1073
1080
  tools = super().get_tool_messages(msg, all_tools)
1074
1081
  except ValidationError as ve:
@@ -1099,9 +1106,16 @@ class ChatAgent(Agent):
1099
1106
  if isinstance(msg, ChatDocument):
1100
1107
  self.tool_error = msg.metadata.sender == Entity.LLM
1101
1108
  else:
1102
- self.tool_error = True
1109
+ self.tool_error = most_recent_sent_by_llm
1103
1110
 
1104
- raise ve
1111
+ if was_llm:
1112
+ raise ve
1113
+ else:
1114
+ self.tool_error = False
1115
+ return []
1116
+
1117
+ if not was_llm:
1118
+ self.tool_error = False
1105
1119
 
1106
1120
  return tools
1107
1121
 
@@ -9,7 +9,9 @@ from enum import Enum
9
9
  from io import BytesIO
10
10
  from itertools import accumulate
11
11
  from pathlib import Path
12
- from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
12
+ from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Tuple, Union
13
+
14
+ from dotenv import load_dotenv
13
15
 
14
16
  from langroid.exceptions import LangroidImportError
15
17
  from langroid.utils.object_registry import ObjectRegistry
@@ -163,6 +165,8 @@ class DocumentParser(Parser):
163
165
  return UnstructuredPDFParser(source, config)
164
166
  elif config.pdf.library == "pdf2image":
165
167
  return ImagePdfParser(source, config)
168
+ elif config.pdf.library == "gemini":
169
+ return GeminiPdfParser(source, config)
166
170
  else:
167
171
  raise ValueError(
168
172
  f"Unsupported PDF library specified: {config.pdf.library}"
@@ -954,3 +958,409 @@ class MarkitdownPPTXParser(DocumentParser):
954
958
  content=self.fix_text(md_content),
955
959
  metadata=DocMetaData(source=self.source),
956
960
  )
961
+
962
+
963
+ class GeminiPdfParser(DocumentParser):
964
+ """
965
+ This class converts PDFs to Markdown using Gemini multimodal LLMs.
966
+
967
+ It extracts pages, converts them with the LLM (replacing images with
968
+ detailed descriptions), and outputs Markdown page by page. The
969
+ conversion follows `GEMINI_SYSTEM_INSTRUCTION`. It employs
970
+ multiprocessing for speed, async requests with rate limiting, and
971
+ handles errors.
972
+
973
+ It supports page-by-page splitting or chunking multiple pages into
974
+ one, respecting page boundaries and a `max_token_limit`.
975
+ """
976
+
977
+ DEFAULT_MAX_TOKENS = 7000
978
+ OUTPUT_DIR = Path(".gemini_pdfparser") # Fixed output directory
979
+
980
+ GEMINI_SYSTEM_INSTRUCTION = """
981
+ ### **Convert PDF to Markdown**
982
+ 1. **Text:**
983
+ * Preserve structure, formatting (**bold**, *italic*), lists, and indentation.
984
+ * **Remove running heads (page numbers, headers/footers).**
985
+ * Keep section and chapter titles; discard repeated page headers.
986
+ 2. **Images:** Replace with **detailed, creative descriptions**
987
+ optimized for clarity and understanding.
988
+ 3. **Tables:** Convert to Markdown tables with proper structure.
989
+ 4. **Math:** Use LaTeX (`...` inline, `$...$` block).
990
+ 5. **Code:** Wrap in fenced blocks without specifying a language:
991
+
992
+ ```
993
+ code
994
+ ```
995
+ 6. **Clean Output:**
996
+ * No system messages, metadata, or artifacts or ```markdown``` identifier.
997
+ * Do **not** include introductory or explanatory messages
998
+ like "Here is your output."
999
+ * Ensure formatting is **consistent and structured**
1000
+ for feeding into a markdown parser.
1001
+ """.strip()
1002
+
1003
+ def __init__(self, source: Union[str, bytes], config: ParsingConfig):
1004
+ super().__init__(source, config)
1005
+ if not config.pdf.gemini_config:
1006
+ raise ValueError(
1007
+ "GeminiPdfParser requires a Gemini-based config in pdf parsing config"
1008
+ )
1009
+ self.model_name = config.pdf.gemini_config.model_name
1010
+
1011
+ # Ensure output directory exists
1012
+ self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
1013
+
1014
+ prefix = (
1015
+ Path(source).stem + "_"
1016
+ if isinstance(source, str) and Path(source).exists()
1017
+ else "output_"
1018
+ )
1019
+ temp_file = tempfile.NamedTemporaryFile(
1020
+ suffix=".md",
1021
+ prefix=prefix,
1022
+ dir=str(self.OUTPUT_DIR),
1023
+ delete=False,
1024
+ )
1025
+ temp_file.close()
1026
+ self.output_filename = Path(temp_file.name)
1027
+
1028
+ self.max_tokens = config.pdf.gemini_config.max_tokens or self.DEFAULT_MAX_TOKENS
1029
+
1030
+ """
1031
+ If True, each PDF page is processed as a separate chunk,
1032
+ resulting in one LLM request per page. If False, pages are
1033
+ grouped into chunks based on `max_token_limit` before being sent
1034
+ to the LLM.
1035
+ """
1036
+ self.split_on_page = config.pdf.gemini_config.split_on_page or False
1037
+
1038
+ # Rate limiting parameters
1039
+ import asyncio
1040
+
1041
+ self.requests_per_minute = config.pdf.gemini_config.requests_per_minute or 5
1042
+
1043
+ """
1044
+ A semaphore to control the number of concurrent requests to the LLM,
1045
+ preventing rate limit errors. A semaphore slot is acquired before
1046
+ making an LLM request and released after the request is complete.
1047
+ """
1048
+ self.semaphore = asyncio.Semaphore(self.requests_per_minute)
1049
+ self.retry_delay = 5 # seconds, for exponential backoff
1050
+ self.max_retries = 3
1051
+
1052
+ def _extract_page(self, page_num: int) -> Dict[str, Any]:
1053
+ """
1054
+ Extracts a single page and estimates token count.
1055
+ Opens the PDF from self.doc_bytes (a BytesIO object).
1056
+ """
1057
+ import fitz
1058
+
1059
+ try:
1060
+ # Always open the document from in-memory bytes.
1061
+ doc = fitz.open(stream=self.doc_bytes.getvalue(), filetype="pdf")
1062
+ new_pdf = fitz.open()
1063
+ new_pdf.insert_pdf(doc, from_page=page_num, to_page=page_num)
1064
+ pdf_bytes = new_pdf.write()
1065
+ text = doc[page_num].get_text("text")
1066
+ token_count = len(text) // 4 if text else len(pdf_bytes) // 4
1067
+
1068
+ return {
1069
+ "page_numbers": page_num + 1,
1070
+ "pdf_bytes": pdf_bytes,
1071
+ "token_count": token_count,
1072
+ }
1073
+ except Exception as e:
1074
+ raise ValueError(f"Error processing PDF document: {e}") from e
1075
+
1076
+ def _extract_pdf_pages_parallel(
1077
+ self, num_workers: Optional[int] = None
1078
+ ) -> List[Dict[str, Any]]:
1079
+ """Parallel PDF page extraction using self.doc_bytes."""
1080
+ from multiprocessing import Pool, cpu_count
1081
+
1082
+ import fitz
1083
+ from tqdm import tqdm
1084
+
1085
+ try:
1086
+ doc = fitz.open(stream=self.doc_bytes.getvalue(), filetype="pdf")
1087
+ total_pages = len(doc)
1088
+ except Exception as e:
1089
+ raise ValueError(f"Error opening PDF document: {e}") from e
1090
+
1091
+ num_workers = num_workers or cpu_count()
1092
+ with Pool(num_workers) as pool:
1093
+ with tqdm(total=total_pages, desc="Extracting pages", unit="page") as pbar:
1094
+ results = []
1095
+ for result in pool.imap(self._extract_page, range(total_pages)):
1096
+ results.append(result)
1097
+ pbar.update(1)
1098
+
1099
+ return results
1100
+
1101
+ def _group_pages_by_token_limit(
1102
+ self, pages: List[Dict[str, Any]], max_tokens: int = DEFAULT_MAX_TOKENS
1103
+ ) -> List[List[Dict[str, Any]]]:
1104
+ """Groups pages into chunks where each chunk is approximately `max_tokens`."""
1105
+ chunks: List[List[Dict[str, Any]]] = []
1106
+ current_chunk: List[Dict[str, Any]] = []
1107
+ current_tokens = 0
1108
+
1109
+ for page in pages:
1110
+ if current_tokens + page["token_count"] > max_tokens and current_chunk:
1111
+ chunks.append(current_chunk)
1112
+ current_chunk = []
1113
+ current_tokens = 0
1114
+
1115
+ current_chunk.append(page)
1116
+ current_tokens += page["token_count"]
1117
+
1118
+ if current_chunk: # Add remaining pages
1119
+ chunks.append(current_chunk)
1120
+
1121
+ return chunks
1122
+
1123
+ def _merge_pages_into_pdf_with_metadata(
1124
+ self, page_group: List[Dict[str, Any]]
1125
+ ) -> Dict[str, Any]:
1126
+ """
1127
+ Merges grouped pages into a single binary chunk so that
1128
+ it does not exceed max token limit
1129
+ """
1130
+ import fitz
1131
+
1132
+ merged_pdf = fitz.open()
1133
+ page_numbers = []
1134
+
1135
+ for page in page_group:
1136
+ temp_pdf = fitz.open("pdf", page["pdf_bytes"])
1137
+ merged_pdf.insert_pdf(temp_pdf)
1138
+ page_numbers.append(page["page_numbers"])
1139
+
1140
+ return {
1141
+ "pdf_bytes": merged_pdf.write(), # Binary PDF data
1142
+ "page_numbers": page_numbers, # List of page numbers in this chunk
1143
+ }
1144
+
1145
+ def _prepare_pdf_chunks_for_gemini(
1146
+ self,
1147
+ num_workers: Optional[int] = None,
1148
+ max_tokens: int = DEFAULT_MAX_TOKENS,
1149
+ split_on_page: bool = False,
1150
+ ) -> List[Dict[str, Any]]:
1151
+ """
1152
+ Extracts, groups, and merges PDF pages into chunks with embedded page markers.
1153
+ """
1154
+ from multiprocessing import Pool
1155
+
1156
+ pages = self._extract_pdf_pages_parallel(num_workers)
1157
+
1158
+ if split_on_page:
1159
+ # Each page becomes its own chunk
1160
+ return pages
1161
+ else:
1162
+ # Group pages based on token limit
1163
+ chunks = self._group_pages_by_token_limit(pages, max_tokens)
1164
+ with Pool(num_workers) as pool:
1165
+ pdf_chunks = pool.map(self._merge_pages_into_pdf_with_metadata, chunks)
1166
+ return pdf_chunks
1167
+
1168
+ async def _send_chunk_to_gemini(
1169
+ self, chunk: Dict[str, Any], gemini_api_key: str
1170
+ ) -> str:
1171
+ """
1172
+ Sends a PDF chunk to the Gemini API and returns the response text.
1173
+ Uses retries with exponential backoff to handle transient failures.
1174
+ """
1175
+ import asyncio
1176
+ import logging
1177
+
1178
+ from google import genai
1179
+ from google.genai import types
1180
+
1181
+ async with self.semaphore: # Limit concurrent API requests
1182
+ for attempt in range(self.max_retries):
1183
+ try:
1184
+ client = genai.Client(api_key=gemini_api_key)
1185
+
1186
+ # Send the request with PDF content and system instructions
1187
+ response = await client.aio.models.generate_content(
1188
+ model=self.model_name,
1189
+ contents=[
1190
+ types.Part.from_bytes(
1191
+ data=chunk["pdf_bytes"], mime_type="application/pdf"
1192
+ ),
1193
+ self.GEMINI_SYSTEM_INSTRUCTION,
1194
+ ],
1195
+ )
1196
+
1197
+ # Return extracted text if available
1198
+ return str(response.text) if response.text else ""
1199
+
1200
+ except Exception as e:
1201
+ # Log error with page numbers for debugging
1202
+ logging.error(
1203
+ "Attempt %d failed for pages %s: %s",
1204
+ attempt + 1,
1205
+ chunk.get("page_numbers", "Unknown"),
1206
+ e,
1207
+ )
1208
+
1209
+ if attempt < self.max_retries - 1:
1210
+ # Apply exponential backoff before retrying
1211
+ delay = self.retry_delay * (2**attempt)
1212
+ logging.info("Retrying in %s sec...", delay)
1213
+ await asyncio.sleep(delay)
1214
+ else:
1215
+ # Log failure after max retries
1216
+ logging.error(
1217
+ "Max retries reached for pages %s",
1218
+ chunk.get("page_numbers", "Unknown"),
1219
+ )
1220
+ break
1221
+
1222
+ return "" # Return empty string if all retries fail
1223
+
1224
+ async def process_chunks(
1225
+ self, chunks: List[Dict[str, Any]], api_key: str
1226
+ ) -> List[str]:
1227
+ """
1228
+ Processes PDF chunks by sending them to the Gemini API and
1229
+ collecting the results.
1230
+
1231
+ Args:
1232
+ chunks: A list of dictionaries, where each dictionary represents
1233
+ a PDF chunk and contains the PDF data and page numbers.
1234
+ api_key: The Gemini API key.
1235
+ """
1236
+ # To show nice progress bar
1237
+ from tqdm.asyncio import tqdm_asyncio
1238
+
1239
+ # Create a list of asynchronous tasks to send each chunk to Gemini.
1240
+ # Chunk in this case might be single page or group of pages returned
1241
+ # by prepare_pdf_chunks function
1242
+ tasks = [self._send_chunk_to_gemini(chunk, api_key) for chunk in chunks]
1243
+
1244
+ # Gather the results from all tasks, allowing exceptions to be returned.
1245
+ # tqdm_asyncio is wrapper around asyncio.gather
1246
+ gathered_results = await tqdm_asyncio.gather(
1247
+ *tasks, desc="Processing chunks(pages)", unit="chunk"
1248
+ )
1249
+ results = []
1250
+ for i, result in enumerate(gathered_results):
1251
+ chunk = chunks[i] # Get the corresponding chunk.
1252
+
1253
+ if isinstance(result, Exception):
1254
+ # Handle exceptions that occurred during chunk processing.
1255
+ logging.error(
1256
+ "Failed to process chunk %s: %s",
1257
+ chunk.get("page_numbers", "Unknown"),
1258
+ result,
1259
+ )
1260
+ results.append(
1261
+ "<!----Error: Could not process chunk %s---->"
1262
+ % chunk.get("page_numbers", "Unknown")
1263
+ )
1264
+ else:
1265
+ # Process successful results and append page/chunk markers.
1266
+ markdown = str(result)
1267
+ if self.split_on_page:
1268
+ results.append(
1269
+ markdown + f"<!----Page-{chunk['page_numbers']}---->"
1270
+ )
1271
+ else:
1272
+ results.append(
1273
+ markdown + f"<!----Chunk-{chunk['page_numbers']}---->"
1274
+ )
1275
+
1276
+ return results # Return the list of results.
1277
+
1278
+ def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
1279
+ """
1280
+ Iterates over the document pages, extracting content using the
1281
+ Gemini API, saves them to a markdown file, and yields page numbers
1282
+ along with their corresponding content.
1283
+
1284
+ Yields:
1285
+ A generator of tuples, where each tuple contains the page number
1286
+ (int) and the page content (Any).
1287
+ """
1288
+ import asyncio
1289
+ import os
1290
+
1291
+ # Load environment variables (e.g., GEMINI_API_KEY) from a .env file.
1292
+ load_dotenv()
1293
+ gemini_api_key = os.getenv("GEMINI_API_KEY")
1294
+ if not gemini_api_key:
1295
+ raise ValueError("GEMINI_API_KEY not found in environment variables.")
1296
+
1297
+ try:
1298
+ # This involves extracting pages, grouping them according to the
1299
+ # `max_tokens` limit (if `split_on_page` is False), and
1300
+ # merging pages into larger PDF chunks. The result
1301
+ # is a list of dictionaries, where each dictionary contains the
1302
+ # PDF bytes and the associated page numbers or single page if
1303
+ # `split_on_page` is true
1304
+
1305
+ pdf_chunks = self._prepare_pdf_chunks_for_gemini(
1306
+ num_workers=8,
1307
+ max_tokens=self.max_tokens,
1308
+ split_on_page=self.split_on_page,
1309
+ )
1310
+
1311
+ # We asynchronously processes each chunk, sending it
1312
+ # to Gemini and retrieving the Markdown output. It handles rate
1313
+ # limiting and retries.
1314
+ markdown_results = asyncio.run(
1315
+ self.process_chunks(pdf_chunks, gemini_api_key)
1316
+ )
1317
+
1318
+ # This file serves as an intermediate storage location for the
1319
+ # complete Markdown output.
1320
+ with open(self.output_filename, "w", encoding="utf-8") as outfile:
1321
+ outfile.write("\n\n".join(markdown_results))
1322
+
1323
+ # Read the full Markdown content from the temporary file.
1324
+ with open(self.output_filename, "r", encoding="utf-8") as infile:
1325
+ full_markdown = infile.read()
1326
+
1327
+ # The splitting is based on the `split_on_page` setting. If True,
1328
+ # the Markdown is split using the "Page-" marker. Otherwise, it's
1329
+ # split using the "Chunk-" marker.
1330
+ if self.split_on_page:
1331
+ pages = full_markdown.split("<!----Page-")
1332
+ else:
1333
+ pages = full_markdown.split("<!----Chunk-")
1334
+
1335
+ # Remove the first element if it's empty (due to the split).
1336
+ if pages and pages[0] == "":
1337
+ pages = pages[1:]
1338
+
1339
+ # Iterate over the pages or chunks and yield their content.
1340
+ for i, page in enumerate(pages):
1341
+ # Check for errors during processing.
1342
+ if "<!----Error:" in page:
1343
+ page_content = page
1344
+ logging.warning(f"Page {i}: Error processing chunk.")
1345
+ else:
1346
+ # Extract the actual page content by removing the marker.
1347
+ page_content = (
1348
+ page.split("---->", 1)[1]
1349
+ if len(page.split("---->", 1)) > 1
1350
+ else page
1351
+ )
1352
+
1353
+ # Yield the page number and content.
1354
+ yield i, page_content
1355
+
1356
+ except Exception as e:
1357
+ raise ValueError(f"Error processing document: {e}") from e
1358
+
1359
+ def get_document_from_page(self, page: str) -> Document:
1360
+ """
1361
+ Get a Document object from a given markdown page.
1362
+ """
1363
+ return Document(
1364
+ content=page,
1365
+ metadata=DocMetaData(source=self.source),
1366
+ )
@@ -1,13 +1,13 @@
1
1
  import logging
2
2
  import re
3
3
  from enum import Enum
4
- from typing import Dict, List, Literal
4
+ from typing import Any, Dict, List, Literal, Optional
5
5
 
6
6
  import tiktoken
7
7
 
8
8
  from langroid.mytypes import Document
9
9
  from langroid.parsing.para_sentence_split import create_chunks, remove_extra_whitespace
10
- from langroid.pydantic_v1 import BaseSettings
10
+ from langroid.pydantic_v1 import BaseSettings, root_validator
11
11
  from langroid.utils.object_registry import ObjectRegistry
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -20,7 +20,26 @@ class Splitter(str, Enum):
20
20
  SIMPLE = "simple"
21
21
 
22
22
 
23
- class PdfParsingConfig(BaseSettings):
23
+ class BaseParsingConfig(BaseSettings):
24
+ """Base class for document parsing configurations."""
25
+
26
+ library: str
27
+
28
+ class Config:
29
+ extra = "ignore" # Ignore unknown settings
30
+
31
+
32
+ class GeminiConfig(BaseSettings):
33
+ """Configuration for Gemini-based parsing."""
34
+
35
+ model_name: str = "gemini-2.0-flash" # Default model
36
+ max_tokens: Optional[int] = None
37
+ split_on_page: Optional[bool] = True
38
+ requests_per_minute: Optional[int] = 5
39
+
40
+
41
+ class PdfParsingConfig(BaseParsingConfig):
42
+
24
43
  library: Literal[
25
44
  "fitz",
26
45
  "pymupdf4llm",
@@ -29,7 +48,18 @@ class PdfParsingConfig(BaseSettings):
29
48
  "unstructured",
30
49
  "pdf2image",
31
50
  "markitdown",
51
+ "gemini",
32
52
  ] = "pymupdf4llm"
53
+ gemini_config: Optional[GeminiConfig] = None
54
+
55
+ @root_validator(pre=True)
56
+ def enable_gemini_config(cls, values: Dict[str, Any]) -> Dict[str, Any]:
57
+ """Ensure GeminiConfig is set only when library is 'gemini'."""
58
+ if values.get("library") == "gemini":
59
+ values["gemini_config"] = values.get("gemini_config") or GeminiConfig()
60
+ else:
61
+ values["gemini_config"] = None
62
+ return values
33
63
 
34
64
 
35
65
  class DocxParsingConfig(BaseSettings):
langroid/utils/system.py CHANGED
@@ -14,7 +14,12 @@ from typing import Any, Literal
14
14
 
15
15
  logger = logging.getLogger(__name__)
16
16
 
17
- DELETION_ALLOWED_PATHS = [".qdrant", ".chroma", ".lancedb", ".weaviate"]
17
+ DELETION_ALLOWED_PATHS = [
18
+ ".qdrant",
19
+ ".chroma",
20
+ ".lancedb",
21
+ ".weaviate",
22
+ ]
18
23
 
19
24
 
20
25
  def pydantic_major_version() -> int:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.42.10
3
+ Version: 0.43.1
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -86,6 +86,8 @@ Requires-Dist: weaviate-client>=4.9.6; extra == 'all'
86
86
  Provides-Extra: arango
87
87
  Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
88
88
  Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == 'arango'
89
+ Provides-Extra: asyncio
90
+ Requires-Dist: asyncio>=3.4.3; extra == 'asyncio'
89
91
  Provides-Extra: chainlit
90
92
  Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == 'chainlit'
91
93
  Requires-Dist: python-socketio<6.0.0,>=5.11.0; extra == 'chainlit'
@@ -5,7 +5,7 @@ langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
6
6
  langroid/agent/base.py,sha256=0szJ5ZxNSmobFO5805ur2cqKfD6vUP4ooN76Z5qAeyw,78677
7
7
  langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
8
- langroid/agent/chat_agent.py,sha256=yuuEWVFLIN71XUpxdbhwZxEKAbOWG7zAV3ofYX4lCWg,84443
8
+ langroid/agent/chat_agent.py,sha256=be7GlySBCuZ4jGQzk0FdVKlqhGeAuewfDywmHDACjh8,84924
9
9
  langroid/agent/chat_document.py,sha256=xzMtrPbaW-Y-BnF7kuhr2dorsD-D5rMWzfOqJ8HAoo8,17885
10
10
  langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
11
11
  langroid/agent/task.py,sha256=HB6N-Jn80HFqCf0ZYOC1v3Bn3oO7NLjShHQJJFwW0q4,90557
@@ -81,10 +81,10 @@ langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeu
81
81
  langroid/parsing/__init__.py,sha256=2oUWJJAxIavq9Wtw5RGlkXLq3GF3zgXeVLLW4j7yeb8,1138
82
82
  langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
83
83
  langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
84
- langroid/parsing/document_parser.py,sha256=Gyz-xEMuPZSFiJhH8MSXDAVOWV2EMP50eTvOIj7rB_s,33733
84
+ langroid/parsing/document_parser.py,sha256=tov34uYB_2ecq7-G7P7CWSOv5alcfwkrrwfsnCCVdIk,49714
85
85
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
86
86
  langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
87
- langroid/parsing/parser.py,sha256=moJKI5Cn_Pxd7xbNrY220dqQu-0FeEWUI7ogeq63Kec,12842
87
+ langroid/parsing/parser.py,sha256=8MDoKQO60RGXod9E5jMj-k90QNhdim4blVJB9L0rrSA,13789
88
88
  langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
89
89
  langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
90
90
  langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
@@ -110,7 +110,7 @@ langroid/utils/logging.py,sha256=mwxHimq1wtVQ64PvDyfJJ7Upj-rjHLNHgx8EC2wClvo,402
110
110
  langroid/utils/object_registry.py,sha256=iPz9GHzvmCeVoidB3JdAMEKcxJEqTdUr0otQEexDZ5s,2100
111
111
  langroid/utils/pandas_utils.py,sha256=UctS986Jtl_MvU5rA7-GfrjEHXP7MNu8ePhepv0bTn0,755
112
112
  langroid/utils/pydantic_utils.py,sha256=R7Ps8VP56-eSo-LYHWllFo-SJ2zDmdItuuYpUq2gGJ8,20854
113
- langroid/utils/system.py,sha256=cJqDgOf9mM82l1GyUeQQdEYAwepYXQwtpJU8Xrz0-MA,8453
113
+ langroid/utils/system.py,sha256=q3QJtTSapIwNe8MMhGEM03wgxPLmZiD47_sF1pKx53I,8472
114
114
  langroid/utils/types.py,sha256=-BvyIf_LmAJ5jR9NC7S4CSVNEr3XayAaxJ5o0TiIej0,2992
115
115
  langroid/utils/algorithms/__init__.py,sha256=WylYoZymA0fnzpB4vrsH_0n7WsoLhmuZq8qxsOCjUpM,41
116
116
  langroid/utils/algorithms/graph.py,sha256=JbdpPnUOhw4-D6O7ou101JLA3xPCD0Lr3qaPoFCaRfo,2866
@@ -127,7 +127,7 @@ langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZ
127
127
  langroid/vector_store/postgres.py,sha256=DQHd6dt-OcV_QVNm-ymn28rlTfhI6hqgcpLTPCsm0jI,15990
128
128
  langroid/vector_store/qdrantdb.py,sha256=v7TAsIoj_vxeKDYS9tpwJLBZA8fuTweTYxHo0X_uawM,17949
129
129
  langroid/vector_store/weaviatedb.py,sha256=tjlqEtkwrhykelt-nbr2WIuHWJBuSAGjZuG6gsAMBsc,11753
130
- langroid-0.42.10.dist-info/METADATA,sha256=NVYWP1mCWLiImvi6R5BVjxX-j-jDv02eGnoUOAg1aqE,61700
131
- langroid-0.42.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
132
- langroid-0.42.10.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
133
- langroid-0.42.10.dist-info/RECORD,,
130
+ langroid-0.43.1.dist-info/METADATA,sha256=AQaUq3J9kszROM1HO3-8s9us3eGpSt9yJy7SI8eznkU,61773
131
+ langroid-0.43.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
132
+ langroid-0.43.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
133
+ langroid-0.43.1.dist-info/RECORD,,