langroid 0.1.72__py3-none-any.whl → 0.1.76__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,7 @@ class DocChatAgentConfig(ChatAgentConfig):
81
81
  # and use the embed(A) to find similar chunks in vecdb.
82
82
  # Referred to as HyDE in the paper:
83
83
  # https://arxiv.org/pdf/2212.10496.pdf
84
+ # It is False by default; its benefits depends on the context.
84
85
  hypothetical_answer: bool = False
85
86
  n_query_rephrases: int = 0
86
87
  use_fuzzy_match: bool = True
@@ -125,7 +126,6 @@ class DocChatAgentConfig(ChatAgentConfig):
125
126
  )
126
127
 
127
128
  vecdb: VectorStoreConfig = QdrantDBConfig(
128
- type="qdrant",
129
129
  collection_name=None,
130
130
  storage_path=".qdrant/data/",
131
131
  embedding=hf_embed_config,
@@ -392,13 +392,14 @@ class DocChatAgent(ChatAgent):
392
392
  if self.config.hypothetical_answer:
393
393
  with console.status("[cyan]LLM generating hypothetical answer..."):
394
394
  with StreamingIfAllowed(self.llm, False):
395
+ # TODO: provide an easy way to
396
+ # Adjust this prompt depending on context.
395
397
  answer = self.llm_response_forget(
396
398
  f"""
397
- Give a sample answer to the following query,
399
+ Give an ideal answer to the following query,
398
400
  in up to 3 sentences. Do not explain yourself,
399
401
  and do not apologize, just show
400
- a possible answer. Guess a hypothetical answer
401
- even if you do not have any information.
402
+ a good possible answer, even if you do not have any information.
402
403
  Preface your answer with "HYPOTHETICAL ANSWER: "
403
404
 
404
405
  QUERY: {query}
@@ -505,7 +506,7 @@ class DocChatAgent(ChatAgent):
505
506
 
506
507
  with console.status("[cyan]LLM Extracting verbatim passages..."):
507
508
  with StreamingIfAllowed(self.llm, False):
508
- # these are async calls, one per passage
509
+ # these are async calls, one per passage; turn off streaming
509
510
  extracts = self.llm.get_verbatim_extracts(query, passages)
510
511
  extracts = [e for e in extracts if e.content != NO_ANSWER]
511
512
 
@@ -41,7 +41,6 @@ class RetrieverAgentConfig(DocChatAgentConfig):
41
41
  stream: bool = True # allow streaming where needed
42
42
  max_tokens: int = 10000
43
43
  vecdb: VectorStoreConfig = QdrantDBConfig(
44
- type="qdrant",
45
44
  collection_name=None,
46
45
  storage_path=".qdrant/data/",
47
46
  embedding=OpenAIEmbeddingsConfig(
@@ -1,5 +1,6 @@
1
1
  import re
2
2
  from abc import abstractmethod
3
+ from enum import Enum
3
4
  from io import BytesIO
4
5
  from typing import Any, Generator, List, Tuple
5
6
 
@@ -12,35 +13,56 @@ from langroid.mytypes import DocMetaData, Document
12
13
  from langroid.parsing.parser import Parser, ParsingConfig
13
14
 
14
15
 
15
- class PdfParser(Parser):
16
+ class DocumentType(str, Enum):
17
+ PDF = "pdf"
18
+ DOCX = "docx"
19
+
20
+
21
+ class DocumentParser(Parser):
16
22
  """
17
- Abstract base class for extracting text from PDFs.
23
+ Abstract base class for extracting text from special types of docs
24
+ such as PDFs or Docx.
18
25
 
19
26
  Attributes:
20
- source (str): The PDF source, either a URL or a file path.
21
- pdf_bytes (BytesIO): BytesIO object containing the PDF data.
27
+ source (str): The source, either a URL or a file path.
28
+ doc_bytes (BytesIO): BytesIO object containing the doc data.
22
29
  """
23
30
 
24
31
  @classmethod
25
- def create(cls, source: str, config: ParsingConfig) -> "PdfParser":
32
+ def create(cls, source: str, config: ParsingConfig) -> "DocumentParser":
26
33
  """
27
- Create a PDF Parser instance based on config.library specified.
34
+ Create a DocumentParser instance based on source type
35
+ and config.<source_type>.library specified.
28
36
 
29
37
  Args:
30
38
  source (str): The source of the PDF, either a URL or a file path.
31
39
  config (ParserConfig): The parser configuration.
32
40
 
33
41
  Returns:
34
- PdfParser: An instance of a PDF Parser subclass.
35
- """
36
- if config.pdf.library == "fitz":
37
- return FitzPdfParser(source, config)
38
- elif config.pdf.library == "pypdf":
39
- return PyPdfParser(source, config)
40
- elif config.pdf.library == "pdfplumber":
41
- return PdfPlumberParser(source, config)
42
+ DocumentParser: An instance of a DocumentParser subclass.
43
+ """
44
+ if DocumentParser._document_type(source) == DocumentType.PDF:
45
+ if config.pdf.library == "fitz":
46
+ return FitzPDFParser(source, config)
47
+ elif config.pdf.library == "pypdf":
48
+ return PyPDFParser(source, config)
49
+ elif config.pdf.library == "pdfplumber":
50
+ return PDFPlumberParser(source, config)
51
+ elif config.pdf.library == "unstructured":
52
+ return UnstructuredPDFParser(source, config)
53
+ else:
54
+ raise ValueError(
55
+ f"Unsupported PDF library specified: {config.pdf.library}"
56
+ )
57
+ elif DocumentParser._document_type(source) == DocumentType.DOCX:
58
+ if config.docx.library == "unstructured":
59
+ return UnstructuredDocxParser(source, config)
60
+ else:
61
+ raise ValueError(
62
+ f"Unsupported DOCX library specified: {config.docx.library}"
63
+ )
42
64
  else:
43
- raise ValueError(f"Unsupported library specified: {config.pdf.library}")
65
+ raise ValueError(f"Unsupported document type: {source}")
44
66
 
45
67
  def __init__(self, source: str, config: ParsingConfig):
46
68
  """
@@ -52,14 +74,32 @@ class PdfParser(Parser):
52
74
  super().__init__(config)
53
75
  self.source = source
54
76
  self.config = config
55
- self.pdf_bytes = self._load_pdf_as_bytesio()
77
+ self.doc_bytes = self._load_doc_as_bytesio()
78
+
79
+ @staticmethod
80
+ def _document_type(source: str) -> DocumentType:
81
+ """
82
+ Determine the type of document based on the source.
83
+
84
+ Args:
85
+ source (str): The source of the PDF, either a URL or a file path.
86
+
87
+ Returns:
88
+ str: The document type.
89
+ """
90
+ if source.lower().endswith(".pdf"):
91
+ return DocumentType.PDF
92
+ elif source.lower().endswith(".docx"):
93
+ return DocumentType.DOCX
94
+ else:
95
+ raise ValueError(f"Unsupported document type: {source}")
56
96
 
57
- def _load_pdf_as_bytesio(self) -> BytesIO:
97
+ def _load_doc_as_bytesio(self) -> BytesIO:
58
98
  """
59
- Load the PDF into a BytesIO object.
99
+ Load the docs into a BytesIO object.
60
100
 
61
101
  Returns:
62
- BytesIO: A BytesIO object containing the PDF data.
102
+ BytesIO: A BytesIO object containing the doc data.
63
103
  """
64
104
  if self.source.startswith(("http://", "https://")):
65
105
  response = requests.get(self.source)
@@ -159,7 +199,7 @@ class PdfParser(Parser):
159
199
  return docs
160
200
 
161
201
 
162
- class FitzPdfParser(PdfParser):
202
+ class FitzPDFParser(DocumentParser):
163
203
  """
164
204
  Parser for processing PDFs using the `fitz` library.
165
205
  """
@@ -171,7 +211,7 @@ class FitzPdfParser(PdfParser):
171
211
  Returns:
172
212
  Generator[fitz.Page]: Generator yielding each page.
173
213
  """
174
- doc = fitz.open(stream=self.pdf_bytes, filetype="pdf")
214
+ doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
175
215
  for i, page in enumerate(doc):
176
216
  yield i, page
177
217
  doc.close()
@@ -189,7 +229,7 @@ class FitzPdfParser(PdfParser):
189
229
  return self.fix_text(page.get_text())
190
230
 
191
231
 
192
- class PyPdfParser(PdfParser):
232
+ class PyPDFParser(DocumentParser):
193
233
  """
194
234
  Parser for processing PDFs using the `pypdf` library.
195
235
  """
@@ -201,7 +241,7 @@ class PyPdfParser(PdfParser):
201
241
  Returns:
202
242
  Generator[pypdf.pdf.PageObject]: Generator yielding each page.
203
243
  """
204
- reader = pypdf.PdfReader(self.pdf_bytes)
244
+ reader = pypdf.PdfReader(self.doc_bytes)
205
245
  for i, page in enumerate(reader.pages):
206
246
  yield i, page
207
247
 
@@ -218,7 +258,7 @@ class PyPdfParser(PdfParser):
218
258
  return self.fix_text(page.extract_text())
219
259
 
220
260
 
221
- class PdfPlumberParser(PdfParser):
261
+ class PDFPlumberParser(DocumentParser):
222
262
  """
223
263
  Parser for processing PDFs using the `pdfplumber` library.
224
264
  """
@@ -232,7 +272,7 @@ class PdfPlumberParser(PdfParser):
232
272
  Returns:
233
273
  Generator[pdfplumber.Page]: Generator yielding each page.
234
274
  """
235
- with pdfplumber.open(self.pdf_bytes) as pdf:
275
+ with pdfplumber.open(self.doc_bytes) as pdf:
236
276
  for i, page in enumerate(pdf.pages):
237
277
  yield i, page
238
278
 
@@ -247,3 +287,60 @@ class PdfPlumberParser(PdfParser):
247
287
  str: Extracted text from the page.
248
288
  """
249
289
  return self.fix_text(page.extract_text())
290
+
291
+
292
+ class UnstructuredPDFParser(DocumentParser):
293
+ """
294
+ Parser for processing PDF files using the `unstructured` library.
295
+ """
296
+
297
+ def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]: # type: ignore
298
+ from unstructured.partition.pdf import partition_pdf
299
+
300
+ elements = partition_pdf(file=self.doc_bytes, include_page_breaks=True)
301
+ for i, el in enumerate(elements):
302
+ yield i, el
303
+
304
+ def extract_text_from_page(self, page: Any) -> str:
305
+ """
306
+ Extract text from a given `unstructured` element.
307
+
308
+ Args:
309
+ page (unstructured element): The `unstructured` element object.
310
+
311
+ Returns:
312
+ str: Extracted text from the element.
313
+ """
314
+ return self.fix_text(str(page))
315
+
316
+
317
+ class UnstructuredDocxParser(DocumentParser):
318
+ """
319
+ Parser for processing DOCX files using the `unstructured` library.
320
+ """
321
+
322
+ def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]: # type: ignore
323
+ from unstructured.partition.docx import partition_docx
324
+
325
+ elements = partition_docx(file=self.doc_bytes)
326
+ for i, el in enumerate(elements):
327
+ yield i, el
328
+
329
+ def extract_text_from_page(self, page: Any) -> str:
330
+ """
331
+ Extract text from a given `unstructured` element.
332
+
333
+ Note:
334
+ The concept of "pages" doesn't actually exist in the .docx file format in
335
+ the same way it does in formats like .pdf. A .docx file is made up of a
336
+ series of elements like paragraphs and tables, but the division into
337
+ pages is done dynamically based on the rendering settings (like the page
338
+ size, margin size, font size, etc.).
339
+
340
+ Args:
341
+ page (unstructured element): The `unstructured` element object.
342
+
343
+ Returns:
344
+ str: Extracted text from the element.
345
+ """
346
+ return self.fix_text(str(page))
@@ -23,6 +23,10 @@ class PdfParsingConfig(BaseSettings):
23
23
  library: str = "pdfplumber"
24
24
 
25
25
 
26
+ class DocxParsingConfig(BaseSettings):
27
+ library: str = "unstructured"
28
+
29
+
26
30
  class ParsingConfig(BaseSettings):
27
31
  splitter: str = Splitter.TOKENS
28
32
  chunk_size: int = 200 # aim for this many tokens per chunk
@@ -35,6 +39,7 @@ class ParsingConfig(BaseSettings):
35
39
  separators: List[str] = ["\n\n", "\n", " ", ""]
36
40
  token_encoding_model: str = "text-embedding-ada-002"
37
41
  pdf: PdfParsingConfig = PdfParsingConfig()
42
+ docx: DocxParsingConfig = DocxParsingConfig()
38
43
 
39
44
 
40
45
  class Parser:
@@ -18,8 +18,8 @@ from github.Repository import Repository
18
18
  from pydantic import BaseSettings
19
19
 
20
20
  from langroid.mytypes import DocMetaData, Document
21
- from langroid.parsing.parser import Parser
22
- from langroid.parsing.pdf_parser import PdfParser
21
+ from langroid.parsing.document_parser import DocumentParser
22
+ from langroid.parsing.parser import Parser, ParsingConfig
23
23
 
24
24
  logger = logging.getLogger(__name__)
25
25
 
@@ -440,7 +440,7 @@ class RepoLoader:
440
440
  @staticmethod
441
441
  def get_documents(
442
442
  path: str,
443
- parser: Parser,
443
+ parser: Parser = Parser(ParsingConfig()),
444
444
  file_types: Optional[List[str]] = None,
445
445
  exclude_dirs: Optional[List[str]] = None,
446
446
  depth: int = -1,
@@ -493,12 +493,12 @@ class RepoLoader:
493
493
 
494
494
  for file_path in file_paths:
495
495
  _, file_extension = os.path.splitext(file_path)
496
- if file_extension.lower() == ".pdf":
497
- pdf_parser = PdfParser.create(
496
+ if file_extension.lower() in [".pdf", ".docx"]:
497
+ doc_parser = DocumentParser.create(
498
498
  file_path,
499
499
  parser.config,
500
500
  )
501
- docs.extend(pdf_parser.get_doc_chunks())
501
+ docs.extend(doc_parser.get_doc_chunks())
502
502
  else:
503
503
  with open(file_path, "r") as f:
504
504
  if lines is not None:
@@ -9,8 +9,8 @@ from trafilatura.downloads import (
9
9
  )
10
10
 
11
11
  from langroid.mytypes import DocMetaData, Document
12
- from langroid.parsing.parser import Parser
13
- from langroid.parsing.pdf_parser import PdfParser
12
+ from langroid.parsing.document_parser import DocumentParser
13
+ from langroid.parsing.parser import Parser, ParsingConfig
14
14
 
15
15
  logging.getLogger("trafilatura").setLevel(logging.ERROR)
16
16
 
@@ -27,7 +27,7 @@ class URLLoader:
27
27
  the "accept" button on the cookie dialog.
28
28
  """
29
29
 
30
- def __init__(self, urls: List[str], parser: Parser):
30
+ def __init__(self, urls: List[str], parser: Parser = Parser(ParsingConfig())):
31
31
  self.urls = urls
32
32
  self.parser = parser
33
33
 
@@ -44,12 +44,12 @@ class URLLoader:
44
44
  sleep_time=5,
45
45
  )
46
46
  for url, result in buffered_downloads(buffer, threads):
47
- if url.lower().endswith(".pdf"):
48
- pdf_parser = PdfParser.create(
47
+ if url.lower().endswith(".pdf") or url.lower().endswith(".docx"):
48
+ doc_parser = DocumentParser.create(
49
49
  url,
50
50
  self.parser.config,
51
51
  )
52
- docs.extend(pdf_parser.get_doc_chunks())
52
+ docs.extend(doc_parser.get_doc_chunks())
53
53
  else:
54
54
  text = trafilatura.extract(
55
55
  result,
@@ -14,6 +14,7 @@ logger = logging.getLogger(__name__)
14
14
 
15
15
 
16
16
  class VectorStoreConfig(BaseSettings):
17
+ type: str = "qdrant" # deprecated, keeping it for backward compatibility
17
18
  collection_name: str | None = None
18
19
  replace_collection: bool = False # replace collection if it already exists
19
20
  storage_path: str = ".qdrant/data"
@@ -23,7 +24,6 @@ class VectorStoreConfig(BaseSettings):
23
24
  model_type="openai",
24
25
  )
25
26
  timeout: int = 60
26
- type: str = "qdrant"
27
27
  host: str = "127.0.0.1"
28
28
  port: int = 6333
29
29
  # compose_file: str = "langroid/vector_store/docker-compose-qdrant.yml"
@@ -38,7 +38,7 @@ class VectorStore(ABC):
38
38
  self.config = config
39
39
 
40
40
  @staticmethod
41
- def create(config: VectorStoreConfig) -> "VectorStore":
41
+ def create(config: VectorStoreConfig) -> Optional["VectorStore"]:
42
42
  from langroid.vector_store.chromadb import ChromaDB, ChromaDBConfig
43
43
  from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
44
44
 
@@ -47,7 +47,16 @@ class VectorStore(ABC):
47
47
  elif isinstance(config, ChromaDBConfig):
48
48
  return ChromaDB(config)
49
49
  else:
50
- raise ValueError(f"Unknown vector store config: {config.__repr_name__()}")
50
+ logger.warning(
51
+ f"""
52
+ Unknown vector store config: {config.__repr_name__()},
53
+ so skipping vector store creation!
54
+ If you intended to use a vector-store, please set a specific
55
+ vector-store in your script, typically in the `vecdb` field of a
56
+ `ChatAgentConfig`, otherwise set it to None.
57
+ """
58
+ )
59
+ return None
51
60
 
52
61
  @abstractmethod
53
62
  def clear_empty_collections(self) -> int:
@@ -123,4 +132,4 @@ class VectorStore(ABC):
123
132
  def show_if_debug(self, doc_score_pairs: List[Tuple[Document, float]]) -> None:
124
133
  if settings.debug:
125
134
  for i, (d, s) in enumerate(doc_score_pairs):
126
- print_long_text("red", "italic red", f"MATCH-{i}", d.content)
135
+ print_long_text("red", "italic red", f"\nMATCH-{i}\n", d.content)
@@ -17,7 +17,6 @@ logger = logging.getLogger(__name__)
17
17
 
18
18
 
19
19
  class ChromaDBConfig(VectorStoreConfig):
20
- type: str = "chroma"
21
20
  collection_name: str = "chroma-langroid"
22
21
  storage_path: str = ".chroma/data"
23
22
  embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
@@ -28,9 +28,7 @@ logger = logging.getLogger(__name__)
28
28
 
29
29
 
30
30
  class QdrantDBConfig(VectorStoreConfig):
31
- type: str = "qdrant"
32
31
  cloud: bool = True
33
-
34
32
  collection_name: str | None = None
35
33
  storage_path: str = ".qdrant/data"
36
34
  embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langroid
3
- Version: 0.1.72
3
+ Version: 0.1.76
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  License: MIT
6
6
  Author: Prasad Chalasani
@@ -70,6 +70,7 @@ Requires-Dist: trafilatura (>=1.5.0,<2.0.0)
70
70
  Requires-Dist: typer (>=0.7.0,<0.8.0)
71
71
  Requires-Dist: types-redis (>=4.5.5.2,<5.0.0.0)
72
72
  Requires-Dist: types-requests (>=2.31.0.1,<3.0.0.0)
73
+ Requires-Dist: unstructured[docx,pdf,pptx] (>=0.10.16,<0.11.0)
73
74
  Requires-Dist: wget (>=3.2,<4.0)
74
75
  Description-Content-Type: text/markdown
75
76
 
@@ -130,8 +131,8 @@ This Multi-Agent paradigm is inspired by the
130
131
  [Actor Framework](https://en.wikipedia.org/wiki/Actor_model)
131
132
  (but you do not need to know anything about this!).
132
133
 
133
- Langroid is a fresh take on LLM app-development, where considerable thought has gone
134
- into simplifying the developer experience. It does not use `Langchain` or `Llama-Index`.
134
+ `Langroid` is a fresh take on LLM app-development, where considerable thought has gone
135
+ into simplifying the developer experience; it does not use `Langchain`.
135
136
 
136
137
  We welcome contributions -- See the [contributions](./CONTRIBUTING.md) document
137
138
  for ideas on what to contribute.
@@ -142,6 +143,8 @@ for ideas on what to contribute.
142
143
  <summary> <b>:fire: Updates/Releases</b></summary>
143
144
 
144
145
  - **Sep 2023:**
146
+ - **0.1.72:** Many improvements to DocChatAgent: better embedding model,
147
+ hybrid search to improve retrieval, better pdf parsing, re-ranking retrieved results with cross-encoders.
145
148
  - **Use with local LLama Models:** see tutorial [here](https://langroid.github.io/langroid/blog/2023/09/14/using-langroid-with-local-llms/)
146
149
  - **Langroid Blog/Newsletter Launched!**: First post is [here](https://substack.com/notes/post/p-136704592) -- Please subscribe to stay updated.
147
150
  - **0.1.56:** Support Azure OpenAI.
@@ -167,7 +170,7 @@ See [this test](tests/main/test_recipient_tool.py) for example usage.
167
170
  - **0.1.27**: Added [support](langroid/cachedb/momento_cachedb.py)
168
171
  for [Momento Serverless Cache](https://www.gomomento.com/) as an alternative to Redis.
169
172
  - **0.1.24**: [`DocChatAgent`](langroid/agent/special/doc_chat_agent.py)
170
- now [accepts](langroid/parsing/pdf_parser.py) PDF files or URLs.
173
+ now [accepts](langroid/parsing/document_parser.py) PDF files or URLs.
171
174
 
172
175
  </details>
173
176
 
@@ -231,9 +234,6 @@ Here is what it looks like in action:
231
234
 
232
235
  # :gear: Installation and Setup
233
236
 
234
- :whale: For a simpler setup, see the Docker section below, which lets you get started just
235
- by setting up environment variables in a `.env` file.
236
-
237
237
  ### Install `langroid`
238
238
  Langroid requires Python 3.11+. We recommend using a virtual environment.
239
239
  Use `pip` to install `langroid` (from PyPi) to your virtual environment:
@@ -663,6 +663,7 @@ First create a `DocChatAgentConfig` instance, with a
663
663
 
664
664
  ```python
665
665
  from langroid.agent.doc_chat_agent import DocChatAgentConfig
666
+ from langroid.vector_store.qdrantdb import QdrantDBConfig
666
667
  config = DocChatAgentConfig(
667
668
  doc_paths = [
668
669
  "https://en.wikipedia.org/wiki/Language_model",
@@ -672,9 +673,7 @@ config = DocChatAgentConfig(
672
673
  llm = OpenAIGPTConfig(
673
674
  chat_model=OpenAIChatModel.GPT4,
674
675
  ),
675
- vecdb=VectorStoreConfig(
676
- type="qdrant",
677
- ),
676
+ vecdb=QdrantDBConfig()
678
677
  )
679
678
  ```
680
679
 
@@ -6,9 +6,9 @@ langroid/agent/chat_document.py,sha256=k7Klav3FIBTf2w95bQtxgqBrf2fMo1ydSlklQvv4R
6
6
  langroid/agent/helpers.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  langroid/agent/junk,sha256=LxfuuW7Cijsg0szAzT81OjWWv1PMNI-6w_-DspVIO2s,339
8
8
  langroid/agent/special/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- langroid/agent/special/doc_chat_agent.py,sha256=nzodYPy2N5EaS20UlrXI6md4ofIrOVHWFK_EYfpVQQA,23767
9
+ langroid/agent/special/doc_chat_agent.py,sha256=oBy9K6ScT01AWmdSBvKyhuivjv6ZWD6mYcpxY8kGZQk,23897
10
10
  langroid/agent/special/recipient_validator_agent.py,sha256=R3Rit93BNWQar_9stuDBGzmLr2W-IYOQ7oq-tlNNlps,6035
11
- langroid/agent/special/retriever_agent.py,sha256=DeOB5crFjXBvDEZT9k9ZVinOfFM2VgS6tQWWFyXSk9o,7204
11
+ langroid/agent/special/retriever_agent.py,sha256=c4FKTLnMVuHAIDfdKXSHxhvigYeTEccRWVsO_dHrSNg,7181
12
12
  langroid/agent/special/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  langroid/agent/special/sql/sql_chat_agent.py,sha256=Ua_gfK_1k5ct59Zkbe78bzs-2jabtFkEVx76a0pGs9Y,12867
14
14
  langroid/agent/special/sql/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -46,14 +46,14 @@ langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulr
46
46
  langroid/parsing/code-parsing.md,sha256=--cyyNiSZSDlIwcjAV4-shKrSiRe2ytF3AdSoS_hD2g,3294
47
47
  langroid/parsing/code_parser.py,sha256=BbDAzp35wkYQ9U1dpf1ARL0lVyi0tfqEc6_eox2C090,3727
48
48
  langroid/parsing/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ langroid/parsing/document_parser.py,sha256=w31HvTt8aijusYUk9XN9FpEUo-xc8_-iTK1UGdEM-jg,11212
49
50
  langroid/parsing/json.py,sha256=MVqBUfInALQm1QKbcfEvLzWxBz_UztCIyGk7AK5uFPo,1650
50
51
  langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
51
- langroid/parsing/parser.py,sha256=PxYsZD2nEPhGjL6czR98DABS6sLnJ_AWdvRbIrZk6PQ,7945
52
- langroid/parsing/pdf_parser.py,sha256=wB5PnNXxhcRssiVUH6nDQ2Fxp0O3VqJcT3vlA7-J38M,7858
53
- langroid/parsing/repo_loader.py,sha256=fOQaQZpjeSNulY4zd1Oplzo4mcOQUfgx-Sxz8V9a9e4,27265
52
+ langroid/parsing/parser.py,sha256=99RE4sQg5CHH4xEznuJOE_yl3lIIehkRyGmUdq4hmuo,8070
53
+ langroid/parsing/repo_loader.py,sha256=2OWCNZg6PjoXpIxCusumCb-LIItXPE9ROx53kXdrxAE,27332
54
54
  langroid/parsing/search.py,sha256=nyJYyKcXZ5fOtT8vLfveejq4AYAOoloTGappU9HMSpM,4414
55
55
  langroid/parsing/table_loader.py,sha256=uqbupGr4y_7os18RtaY5GpD0hWcgzROoNy8dQIHB4kc,1767
56
- langroid/parsing/url_loader.py,sha256=mJZvN9iIDX-lPvxslg_FWIzndXfckyLupXCv0AjJa58,2064
56
+ langroid/parsing/url_loader.py,sha256=dhmUTysS_YZyIXVAekxCGPiCbFsOsHXj_eHMow0xoGQ,2153
57
57
  langroid/parsing/url_loader_cookies.py,sha256=Lg4sNpRz9MByWq2mde6T0hKv68VZSV3mtMjNEHuFeSU,2327
58
58
  langroid/parsing/urls.py,sha256=_Bcf1iRdT7cQrQ8hnbPX0Jtzxc0lVFaucTS5rJoKA14,3709
59
59
  langroid/parsing/utils.py,sha256=zqvZWpZktRJTKx_JAqxaIyoudMdKVdB1zzjnOhVYHS4,2196
@@ -80,11 +80,11 @@ langroid/utils/web/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
80
80
  langroid/utils/web/login.py,sha256=1iz9eUAHa87vpKIkzwkmFa00avwFWivDSAr7QUhK7U0,2528
81
81
  langroid/utils/web/selenium_login.py,sha256=mYI6EvVmne34N9RajlsxxRqJQJvV-WG4LGp6sEECHPw,1156
82
82
  langroid/vector_store/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
83
- langroid/vector_store/base.py,sha256=QITt6rpXUUqCb1gYo069r06QMKMox28o00BvTy7LUko,4035
84
- langroid/vector_store/chromadb.py,sha256=Kai27vE6xXrXKtP2hDeNh2z43-8X7M_spzS3Pp-YQvs,5409
83
+ langroid/vector_store/base.py,sha256=mw36zLzdQeG_c1KIWeRmycXnXIzFvqRW2RG7xf6jTGk,4465
84
+ langroid/vector_store/chromadb.py,sha256=2a68iLkgBGoGmuJ80ogJ0rRuoh-Wqdj3rlxVGagMxWk,5384
85
85
  langroid/vector_store/qdrant_cloud.py,sha256=3im4Mip0QXLkR6wiqVsjV1QvhSElfxdFSuDKddBDQ-4,188
86
- langroid/vector_store/qdrantdb.py,sha256=MnC5krrRFHL9tRrOW2SLcOMhn-Lr136co2IGf4TinCw,9883
87
- langroid-0.1.72.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
88
- langroid-0.1.72.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
89
- langroid-0.1.72.dist-info/METADATA,sha256=NVTj1blyVA_tYceuhx9JeWC1yhYX2GHPDC5PMAXdyOo,35946
90
- langroid-0.1.72.dist-info/RECORD,,
86
+ langroid/vector_store/qdrantdb.py,sha256=RxLCLaaampLS-Gi-ccYEydUjzI0qUJC9jEvc8g2OXEE,9857
87
+ langroid-0.1.76.dist-info/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
88
+ langroid-0.1.76.dist-info/WHEEL,sha256=vVCvjcmxuUltf8cYhJ0sJMRDLr1XsPuxEId8YDzbyCY,88
89
+ langroid-0.1.76.dist-info/METADATA,sha256=QPzgocvZ5PQSJYhAA0uVu3RFhw4jHioxlg_cRwVtS0o,36078
90
+ langroid-0.1.76.dist-info/RECORD,,