langroid 0.36.1__py3-none-any.whl → 0.37.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/special/doc_chat_agent.py +3 -3
- langroid/embedding_models/models.py +2 -2
- langroid/exceptions.py +16 -4
- langroid/parsing/code_parser.py +1 -1
- langroid/parsing/document_parser.py +161 -64
- langroid/parsing/parser.py +6 -4
- langroid/parsing/pdf_utils.py +55 -0
- langroid/vector_store/chromadb.py +12 -1
- langroid/vector_store/qdrantdb.py +1 -1
- langroid/vector_store/weaviatedb.py +5 -5
- {langroid-0.36.1.dist-info → langroid-0.37.1.dist-info}/METADATA +21 -11
- {langroid-0.36.1.dist-info → langroid-0.37.1.dist-info}/RECORD +14 -13
- {langroid-0.36.1.dist-info → langroid-0.37.1.dist-info}/WHEEL +0 -0
- {langroid-0.36.1.dist-info → langroid-0.37.1.dist-info}/licenses/LICENSE +0 -0
@@ -100,7 +100,7 @@ hf_embed_config = SentenceTransformerEmbeddingsConfig(
|
|
100
100
|
|
101
101
|
oai_embed_config = OpenAIEmbeddingsConfig(
|
102
102
|
model_type="openai",
|
103
|
-
model_name="text-embedding-
|
103
|
+
model_name="text-embedding-3-small",
|
104
104
|
dims=1536,
|
105
105
|
)
|
106
106
|
|
@@ -189,8 +189,8 @@ class DocChatAgentConfig(ChatAgentConfig):
|
|
189
189
|
# NOTE: PDF parsing is extremely challenging, and each library
|
190
190
|
# has its own strengths and weaknesses.
|
191
191
|
# Try one that works for your use case.
|
192
|
-
# or "unstructured", "
|
193
|
-
library="
|
192
|
+
# or "unstructured", "fitz", "pymupdf4llm", "pypdf"
|
193
|
+
library="pymupdf4llm",
|
194
194
|
),
|
195
195
|
)
|
196
196
|
|
@@ -18,7 +18,7 @@ AzureADTokenProvider = Callable[[], str]
|
|
18
18
|
|
19
19
|
class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
|
20
20
|
model_type: str = "openai"
|
21
|
-
model_name: str = "text-embedding-
|
21
|
+
model_name: str = "text-embedding-3-large"
|
22
22
|
api_key: str = ""
|
23
23
|
api_base: Optional[str] = None
|
24
24
|
organization: str = ""
|
@@ -28,7 +28,7 @@ class OpenAIEmbeddingsConfig(EmbeddingModelsConfig):
|
|
28
28
|
|
29
29
|
class AzureOpenAIEmbeddingsConfig(EmbeddingModelsConfig):
|
30
30
|
model_type: str = "azure-openai"
|
31
|
-
model_name: str = "text-embedding-
|
31
|
+
model_name: str = "text-embedding-3-large"
|
32
32
|
api_key: str = ""
|
33
33
|
api_base: str = ""
|
34
34
|
deployment_name: Optional[str] = None
|
langroid/exceptions.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import List, Optional
|
2
2
|
|
3
3
|
|
4
4
|
class XMLException(Exception):
|
@@ -15,7 +15,7 @@ class LangroidImportError(ImportError):
|
|
15
15
|
def __init__(
|
16
16
|
self,
|
17
17
|
package: Optional[str] = None,
|
18
|
-
extra: Optional[str] = None,
|
18
|
+
extra: Optional[str | List[str]] = None,
|
19
19
|
error: str = "",
|
20
20
|
*args: object,
|
21
21
|
) -> None:
|
@@ -33,9 +33,21 @@ class LangroidImportError(ImportError):
|
|
33
33
|
error = f"{package} is not installed by default with Langroid.\n"
|
34
34
|
|
35
35
|
if extra:
|
36
|
+
if isinstance(extra, list):
|
37
|
+
help_preamble = f"""
|
38
|
+
If you want to use it, please install langroid with one of these
|
39
|
+
extras: {', '.join(extra)}. The examples below use the first one,
|
40
|
+
i.e. {extra[0]}.
|
41
|
+
"""
|
42
|
+
extra = extra[0]
|
43
|
+
else:
|
44
|
+
help_preamble = f"""
|
45
|
+
If you want to use it, please install langroid with the
|
46
|
+
`{extra}` extra.
|
47
|
+
"""
|
48
|
+
|
36
49
|
install_help = f"""
|
37
|
-
|
38
|
-
with the `{extra}` extra, for example:
|
50
|
+
{help_preamble}
|
39
51
|
|
40
52
|
If you are using pip:
|
41
53
|
pip install "langroid[{extra}]"
|
langroid/parsing/code_parser.py
CHANGED
@@ -5,9 +5,10 @@ import logging
|
|
5
5
|
import re
|
6
6
|
from enum import Enum
|
7
7
|
from io import BytesIO
|
8
|
-
from typing import TYPE_CHECKING, Any, Generator, List, Tuple
|
8
|
+
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Tuple
|
9
9
|
|
10
10
|
from langroid.exceptions import LangroidImportError
|
11
|
+
from langroid.parsing.pdf_utils import pdf_split_pages
|
11
12
|
from langroid.utils.object_registry import ObjectRegistry
|
12
13
|
|
13
14
|
try:
|
@@ -15,18 +16,24 @@ try:
|
|
15
16
|
except ImportError:
|
16
17
|
if not TYPE_CHECKING:
|
17
18
|
fitz = None
|
19
|
+
try:
|
20
|
+
import pymupdf4llm
|
21
|
+
except ImportError:
|
22
|
+
if not TYPE_CHECKING:
|
23
|
+
pymupdf4llm = None
|
18
24
|
|
19
25
|
try:
|
20
|
-
import
|
26
|
+
import docling
|
21
27
|
except ImportError:
|
22
28
|
if not TYPE_CHECKING:
|
23
|
-
|
29
|
+
docling = None
|
24
30
|
|
25
31
|
try:
|
26
|
-
import
|
32
|
+
import pypdf
|
27
33
|
except ImportError:
|
28
34
|
if not TYPE_CHECKING:
|
29
|
-
|
35
|
+
pypdf = None
|
36
|
+
|
30
37
|
|
31
38
|
import requests
|
32
39
|
from bs4 import BeautifulSoup
|
@@ -41,6 +48,7 @@ logger = logging.getLogger(__name__)
|
|
41
48
|
|
42
49
|
|
43
50
|
class DocumentType(str, Enum):
|
51
|
+
# TODO add `md` (Markdown) and `html`
|
44
52
|
PDF = "pdf"
|
45
53
|
DOCX = "docx"
|
46
54
|
DOC = "doc"
|
@@ -139,10 +147,12 @@ class DocumentParser(Parser):
|
|
139
147
|
if inferred_doc_type == DocumentType.PDF:
|
140
148
|
if config.pdf.library == "fitz":
|
141
149
|
return FitzPDFParser(source, config)
|
150
|
+
elif config.pdf.library == "pymupdf4llm":
|
151
|
+
return PyMuPDF4LLMParser(source, config)
|
152
|
+
elif config.pdf.library == "docling":
|
153
|
+
return DoclingParser(source, config)
|
142
154
|
elif config.pdf.library == "pypdf":
|
143
155
|
return PyPDFParser(source, config)
|
144
|
-
elif config.pdf.library == "pdfplumber":
|
145
|
-
return PDFPlumberParser(source, config)
|
146
156
|
elif config.pdf.library == "unstructured":
|
147
157
|
return UnstructuredPDFParser(source, config)
|
148
158
|
elif config.pdf.library == "pdf2image":
|
@@ -307,8 +317,11 @@ class DocumentParser(Parser):
|
|
307
317
|
"""Yield each page in the PDF."""
|
308
318
|
raise NotImplementedError
|
309
319
|
|
310
|
-
def
|
311
|
-
"""
|
320
|
+
def get_document_from_page(self, page: Any) -> Document:
|
321
|
+
"""
|
322
|
+
Get Langroid Document object (with possible metadata)
|
323
|
+
corresponding to a given page.
|
324
|
+
"""
|
312
325
|
raise NotImplementedError
|
313
326
|
|
314
327
|
def fix_text(self, text: str) -> str:
|
@@ -335,7 +348,10 @@ class DocumentParser(Parser):
|
|
335
348
|
"""
|
336
349
|
|
337
350
|
text = "".join(
|
338
|
-
[
|
351
|
+
[
|
352
|
+
self.get_document_from_page(page).content
|
353
|
+
for _, page in self.iterate_pages()
|
354
|
+
]
|
339
355
|
)
|
340
356
|
return Document(content=text, metadata=DocMetaData(source=self.source))
|
341
357
|
|
@@ -359,7 +375,10 @@ class DocumentParser(Parser):
|
|
359
375
|
common_id = ObjectRegistry.new_id()
|
360
376
|
n_chunks = 0 # how many chunk so far
|
361
377
|
for i, page in self.iterate_pages():
|
362
|
-
|
378
|
+
# not used but could be useful, esp to blend the
|
379
|
+
# metadata from the pages into the chunks
|
380
|
+
page_doc = self.get_document_from_page(page)
|
381
|
+
page_text = page_doc.content
|
363
382
|
split += self.tokenizer.encode(page_text)
|
364
383
|
pages.append(str(i + 1))
|
365
384
|
# split could be so long it needs to be split
|
@@ -422,81 +441,146 @@ class FitzPDFParser(DocumentParser):
|
|
422
441
|
yield i, page
|
423
442
|
doc.close()
|
424
443
|
|
425
|
-
def
|
444
|
+
def get_document_from_page(self, page: "fitz.Page") -> Document:
|
426
445
|
"""
|
427
|
-
|
446
|
+
Get Document object from a given `fitz` page.
|
428
447
|
|
429
448
|
Args:
|
430
449
|
page (fitz.Page): The `fitz` page object.
|
431
450
|
|
432
451
|
Returns:
|
433
|
-
|
452
|
+
Document: Document object, with content and possible metadata.
|
434
453
|
"""
|
435
|
-
return
|
454
|
+
return Document(
|
455
|
+
content=self.fix_text(page.get_text()),
|
456
|
+
metadata=DocMetaData(source=self.source),
|
457
|
+
)
|
436
458
|
|
437
459
|
|
438
|
-
class
|
460
|
+
class PyMuPDF4LLMParser(DocumentParser):
|
439
461
|
"""
|
440
|
-
Parser for processing PDFs using the `
|
462
|
+
Parser for processing PDFs using the `pymupdf4llm` library.
|
441
463
|
"""
|
442
464
|
|
443
|
-
def iterate_pages(self) -> Generator[Tuple[int,
|
465
|
+
def iterate_pages(self) -> Generator[Tuple[int, "fitz.Page"], None, None]:
|
444
466
|
"""
|
445
|
-
Yield each page in the PDF using `
|
467
|
+
Yield each page in the PDF using `fitz`.
|
446
468
|
|
447
469
|
Returns:
|
448
|
-
Generator[
|
470
|
+
Generator[fitz.Page]: Generator yielding each page.
|
449
471
|
"""
|
450
|
-
if
|
451
|
-
raise LangroidImportError(
|
452
|
-
|
453
|
-
|
472
|
+
if fitz is None:
|
473
|
+
raise LangroidImportError(
|
474
|
+
"pymupdf4llm", ["pymupdf4llm", "all", "pdf-parsers", "doc-chat"]
|
475
|
+
)
|
476
|
+
doc: fitz.Document = fitz.open(stream=self.doc_bytes, filetype="pdf")
|
477
|
+
pages: List[Dict[str, Any]] = pymupdf4llm.to_markdown(doc, page_chunks=True)
|
478
|
+
for i, page in enumerate(pages):
|
454
479
|
yield i, page
|
480
|
+
doc.close()
|
455
481
|
|
456
|
-
def
|
482
|
+
def get_document_from_page(self, page: Dict[str, Any]) -> Document:
|
457
483
|
"""
|
458
|
-
|
484
|
+
Get Document object corresponding to a given "page-chunk"
|
485
|
+
dictionary, see:
|
486
|
+
https://pymupdf.readthedocs.io/en/latest/pymupdf4llm/api.html
|
487
|
+
|
459
488
|
|
460
489
|
Args:
|
461
|
-
page (
|
490
|
+
page (Dict[str,Any]): The "page-chunk" dictionary.
|
462
491
|
|
463
492
|
Returns:
|
464
|
-
|
493
|
+
Document: Document object, with content and possible metadata.
|
465
494
|
"""
|
466
|
-
return
|
495
|
+
return Document(
|
496
|
+
content=self.fix_text(page.get("text", "")),
|
497
|
+
# TODO could possible use other metadata from page, see above link.
|
498
|
+
metadata=DocMetaData(source=self.source),
|
499
|
+
)
|
467
500
|
|
468
501
|
|
469
|
-
class
|
502
|
+
class DoclingParser(DocumentParser):
|
470
503
|
"""
|
471
|
-
Parser for processing PDFs using the `
|
504
|
+
Parser for processing PDFs using the `docling` library.
|
472
505
|
"""
|
473
506
|
|
474
|
-
def iterate_pages(
|
475
|
-
|
476
|
-
|
507
|
+
def iterate_pages(self) -> Generator[Tuple[int, Any], None, None]:
|
508
|
+
"""
|
509
|
+
Yield each page in the PDF using `docling`.
|
510
|
+
|
511
|
+
Returns:
|
512
|
+
Generator[docling.Page]: Generator yielding each page.
|
477
513
|
"""
|
478
|
-
|
514
|
+
if docling is None:
|
515
|
+
raise LangroidImportError(
|
516
|
+
"docling", ["docling", "pdf-parsers", "all", "doc-chat"]
|
517
|
+
)
|
518
|
+
|
519
|
+
from docling.document_converter import ( # type: ignore
|
520
|
+
ConversionResult,
|
521
|
+
DocumentConverter,
|
522
|
+
)
|
523
|
+
from docling_core.types.doc import ImageRefMode # type: ignore
|
524
|
+
|
525
|
+
page_files, tmp_dir = pdf_split_pages(self.doc_bytes)
|
526
|
+
converter = DocumentConverter()
|
527
|
+
for i, page_file in enumerate(page_files):
|
528
|
+
result: ConversionResult = converter.convert(page_file)
|
529
|
+
md_text = result.document.export_to_markdown(
|
530
|
+
image_mode=ImageRefMode.REFERENCED
|
531
|
+
)
|
532
|
+
yield i, md_text
|
533
|
+
|
534
|
+
tmp_dir.cleanup()
|
535
|
+
|
536
|
+
def get_document_from_page(self, page: str) -> Document:
|
537
|
+
"""
|
538
|
+
Get Document object from a given `docling` "page" (actually a chunk).
|
539
|
+
|
540
|
+
Args:
|
541
|
+
page (docling.chunking.DocChunk): The `docling` chunk
|
479
542
|
|
480
543
|
Returns:
|
481
|
-
|
544
|
+
Document: Document object, with content and possible metadata.
|
482
545
|
"""
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
546
|
+
return Document(
|
547
|
+
content=self.fix_text(page),
|
548
|
+
metadata=DocMetaData(source=self.source),
|
549
|
+
)
|
550
|
+
|
488
551
|
|
489
|
-
|
552
|
+
class PyPDFParser(DocumentParser):
|
553
|
+
"""
|
554
|
+
Parser for processing PDFs using the `pypdf` library.
|
555
|
+
"""
|
556
|
+
|
557
|
+
def iterate_pages(self) -> Generator[Tuple[int, pypdf.PageObject], None, None]:
|
490
558
|
"""
|
491
|
-
|
559
|
+
Yield each page in the PDF using `pypdf`.
|
560
|
+
|
561
|
+
Returns:
|
562
|
+
Generator[pypdf.pdf.PageObject]: Generator yielding each page.
|
563
|
+
"""
|
564
|
+
if pypdf is None:
|
565
|
+
raise LangroidImportError("pypdf", "pdf-parsers")
|
566
|
+
reader = pypdf.PdfReader(self.doc_bytes)
|
567
|
+
for i, page in enumerate(reader.pages):
|
568
|
+
yield i, page
|
569
|
+
|
570
|
+
def get_document_from_page(self, page: pypdf.PageObject) -> Document:
|
571
|
+
"""
|
572
|
+
Get Document object from a given `pypdf` page.
|
492
573
|
|
493
574
|
Args:
|
494
|
-
page (
|
575
|
+
page (pypdf.pdf.PageObject): The `pypdf` page object.
|
495
576
|
|
496
577
|
Returns:
|
497
|
-
|
578
|
+
Document: Document object, with content and possible metadata.
|
498
579
|
"""
|
499
|
-
return
|
580
|
+
return Document(
|
581
|
+
content=self.fix_text(page.extract_text()),
|
582
|
+
metadata=DocMetaData(source=self.source),
|
583
|
+
)
|
500
584
|
|
501
585
|
|
502
586
|
class ImagePdfParser(DocumentParser):
|
@@ -516,15 +600,15 @@ class ImagePdfParser(DocumentParser):
|
|
516
600
|
for i, image in enumerate(images):
|
517
601
|
yield i, image
|
518
602
|
|
519
|
-
def
|
603
|
+
def get_document_from_page(self, page: "Image") -> Document: # type: ignore
|
520
604
|
"""
|
521
|
-
|
605
|
+
Get Document object corresponding to a given `pdf2image` page.
|
522
606
|
|
523
607
|
Args:
|
524
608
|
page (Image): The PIL Image object.
|
525
609
|
|
526
610
|
Returns:
|
527
|
-
|
611
|
+
Document: Document object, with content and possible metadata.
|
528
612
|
"""
|
529
613
|
try:
|
530
614
|
import pytesseract
|
@@ -532,7 +616,10 @@ class ImagePdfParser(DocumentParser):
|
|
532
616
|
raise LangroidImportError("pytesseract", "pdf-parsers")
|
533
617
|
|
534
618
|
text = pytesseract.image_to_string(page)
|
535
|
-
return
|
619
|
+
return Document(
|
620
|
+
content=self.fix_text(text),
|
621
|
+
metadata=DocMetaData(source=self.source),
|
622
|
+
)
|
536
623
|
|
537
624
|
|
538
625
|
class UnstructuredPDFParser(DocumentParser):
|
@@ -564,8 +651,8 @@ class UnstructuredPDFParser(DocumentParser):
|
|
564
651
|
The `unstructured` library failed to parse the pdf.
|
565
652
|
Please try a different library by setting the `library` field
|
566
653
|
in the `pdf` section of the `parsing` field in the config file.
|
567
|
-
|
568
|
-
fitz,
|
654
|
+
Other supported libraries are:
|
655
|
+
fitz, pymupdf4llm, pypdf
|
569
656
|
"""
|
570
657
|
)
|
571
658
|
|
@@ -584,18 +671,21 @@ class UnstructuredPDFParser(DocumentParser):
|
|
584
671
|
if page_elements:
|
585
672
|
yield page_number, page_elements
|
586
673
|
|
587
|
-
def
|
674
|
+
def get_document_from_page(self, page: Any) -> Document:
|
588
675
|
"""
|
589
|
-
|
676
|
+
Get Document object from a given `unstructured` element.
|
590
677
|
|
591
678
|
Args:
|
592
679
|
page (unstructured element): The `unstructured` element object.
|
593
680
|
|
594
681
|
Returns:
|
595
|
-
|
682
|
+
Document: Document object, with content and possible metadata.
|
596
683
|
"""
|
597
684
|
text = " ".join(el.text for el in page)
|
598
|
-
return
|
685
|
+
return Document(
|
686
|
+
content=self.fix_text(text),
|
687
|
+
metadata=DocMetaData(source=self.source),
|
688
|
+
)
|
599
689
|
|
600
690
|
|
601
691
|
class UnstructuredDocxParser(DocumentParser):
|
@@ -632,9 +722,9 @@ class UnstructuredDocxParser(DocumentParser):
|
|
632
722
|
if page_elements:
|
633
723
|
yield page_number, page_elements
|
634
724
|
|
635
|
-
def
|
725
|
+
def get_document_from_page(self, page: Any) -> Document:
|
636
726
|
"""
|
637
|
-
|
727
|
+
Get Document object from a given `unstructured` element.
|
638
728
|
|
639
729
|
Note:
|
640
730
|
The concept of "pages" doesn't actually exist in the .docx file format in
|
@@ -647,10 +737,13 @@ class UnstructuredDocxParser(DocumentParser):
|
|
647
737
|
page (unstructured element): The `unstructured` element object.
|
648
738
|
|
649
739
|
Returns:
|
650
|
-
|
740
|
+
Document object, with content and possible metadata.
|
651
741
|
"""
|
652
742
|
text = " ".join(el.text for el in page)
|
653
|
-
return
|
743
|
+
return Document(
|
744
|
+
content=self.fix_text(text),
|
745
|
+
metadata=DocMetaData(source=self.source),
|
746
|
+
)
|
654
747
|
|
655
748
|
|
656
749
|
class UnstructuredDocParser(UnstructuredDocxParser):
|
@@ -704,15 +797,19 @@ class PythonDocxParser(DocumentParser):
|
|
704
797
|
for i, para in enumerate(doc.paragraphs, start=1):
|
705
798
|
yield i, [para]
|
706
799
|
|
707
|
-
def
|
800
|
+
def get_document_from_page(self, page: Any) -> Document:
|
708
801
|
"""
|
709
|
-
|
802
|
+
Get Document object from a given 'page', which in this case is a single
|
803
|
+
paragraph.
|
710
804
|
|
711
805
|
Args:
|
712
806
|
page (list): A list containing a single Paragraph object.
|
713
807
|
|
714
808
|
Returns:
|
715
|
-
|
809
|
+
Document: Document object, with content and possible metadata.
|
716
810
|
"""
|
717
811
|
paragraph = page[0]
|
718
|
-
return
|
812
|
+
return Document(
|
813
|
+
content=self.fix_text(paragraph.text),
|
814
|
+
metadata=DocMetaData(source=self.source),
|
815
|
+
)
|
langroid/parsing/parser.py
CHANGED
@@ -23,11 +23,12 @@ class Splitter(str, Enum):
|
|
23
23
|
class PdfParsingConfig(BaseSettings):
|
24
24
|
library: Literal[
|
25
25
|
"fitz",
|
26
|
-
"
|
26
|
+
"pymupdf4llm",
|
27
|
+
"docling",
|
27
28
|
"pypdf",
|
28
29
|
"unstructured",
|
29
30
|
"pdf2image",
|
30
|
-
] = "
|
31
|
+
] = "pymupdf4llm"
|
31
32
|
|
32
33
|
|
33
34
|
class DocxParsingConfig(BaseSettings):
|
@@ -40,6 +41,7 @@ class DocParsingConfig(BaseSettings):
|
|
40
41
|
|
41
42
|
class ParsingConfig(BaseSettings):
|
42
43
|
splitter: str = Splitter.TOKENS
|
44
|
+
chunk_by_page: bool = False # split by page?
|
43
45
|
chunk_size: int = 200 # aim for this many tokens per chunk
|
44
46
|
overlap: int = 50 # overlap between chunks
|
45
47
|
max_chunks: int = 10_000
|
@@ -49,7 +51,7 @@ class ParsingConfig(BaseSettings):
|
|
49
51
|
n_similar_docs: int = 4
|
50
52
|
n_neighbor_ids: int = 5 # window size to store around each chunk
|
51
53
|
separators: List[str] = ["\n\n", "\n", " ", ""]
|
52
|
-
token_encoding_model: str = "text-embedding-
|
54
|
+
token_encoding_model: str = "text-embedding-3-large"
|
53
55
|
pdf: PdfParsingConfig = PdfParsingConfig()
|
54
56
|
docx: DocxParsingConfig = DocxParsingConfig()
|
55
57
|
doc: DocParsingConfig = DocParsingConfig()
|
@@ -61,7 +63,7 @@ class Parser:
|
|
61
63
|
try:
|
62
64
|
self.tokenizer = tiktoken.encoding_for_model(config.token_encoding_model)
|
63
65
|
except Exception:
|
64
|
-
self.tokenizer = tiktoken.encoding_for_model("text-embedding-
|
66
|
+
self.tokenizer = tiktoken.encoding_for_model("text-embedding-3-small")
|
65
67
|
|
66
68
|
def num_tokens(self, text: str) -> int:
|
67
69
|
tokens = self.tokenizer.encode(text)
|
@@ -0,0 +1,55 @@
|
|
1
|
+
import tempfile
|
2
|
+
from io import BytesIO
|
3
|
+
from pathlib import Path
|
4
|
+
from tempfile import TemporaryDirectory
|
5
|
+
from typing import TYPE_CHECKING, Any, BinaryIO, List, Tuple, Union
|
6
|
+
|
7
|
+
try:
|
8
|
+
import pypdf
|
9
|
+
except ImportError:
|
10
|
+
if not TYPE_CHECKING:
|
11
|
+
pypdf = None
|
12
|
+
|
13
|
+
from langroid.exceptions import LangroidImportError
|
14
|
+
|
15
|
+
if pypdf is None:
|
16
|
+
raise LangroidImportError(
|
17
|
+
"pypdf", ["pypdf", "docling", "all", "pdf-parsers", "doc-chat"]
|
18
|
+
)
|
19
|
+
from pypdf import PdfReader, PdfWriter
|
20
|
+
|
21
|
+
|
22
|
+
def pdf_split_pages(
|
23
|
+
input_pdf: Union[str, Path, BytesIO, BinaryIO],
|
24
|
+
) -> Tuple[List[Path], TemporaryDirectory[Any]]:
|
25
|
+
"""Splits a PDF into individual pages in a temporary directory.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
input_pdf: Input PDF file path or file-like object
|
29
|
+
max_workers: Maximum number of concurrent workers for parallel processing
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
Tuple containing:
|
33
|
+
- List of paths to individual PDF pages
|
34
|
+
- Temporary directory object (caller must call cleanup())
|
35
|
+
|
36
|
+
Example:
|
37
|
+
paths, tmp_dir = split_pdf_temp("input.pdf")
|
38
|
+
# Use paths...
|
39
|
+
tmp_dir.cleanup() # Clean up temp files when done
|
40
|
+
"""
|
41
|
+
tmp_dir = tempfile.TemporaryDirectory()
|
42
|
+
reader = PdfReader(input_pdf)
|
43
|
+
paths = []
|
44
|
+
|
45
|
+
for i in range(len(reader.pages)):
|
46
|
+
writer = PdfWriter()
|
47
|
+
writer.add_page(reader.pages[i])
|
48
|
+
writer.add_metadata(reader.metadata or {})
|
49
|
+
|
50
|
+
output = Path(tmp_dir.name) / f"page_{i+1}.pdf"
|
51
|
+
with open(output, "wb") as f:
|
52
|
+
writer.write(f)
|
53
|
+
paths.append(output)
|
54
|
+
|
55
|
+
return paths, tmp_dir # Return dir object so caller can control cleanup
|
@@ -1,6 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
3
|
+
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
|
4
4
|
|
5
5
|
from langroid.embedding_models.base import (
|
6
6
|
EmbeddingModelsConfig,
|
@@ -18,6 +18,10 @@ logger = logging.getLogger(__name__)
|
|
18
18
|
class ChromaDBConfig(VectorStoreConfig):
|
19
19
|
collection_name: str = "temp"
|
20
20
|
storage_path: str = ".chroma/data"
|
21
|
+
distance: Literal["cosine", "l2", "ip"] = "cosine"
|
22
|
+
construction_ef: int = 100
|
23
|
+
search_ef: int = 100
|
24
|
+
max_neighbors: int = 16
|
21
25
|
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
|
22
26
|
host: str = "127.0.0.1"
|
23
27
|
port: int = 6333
|
@@ -109,6 +113,13 @@ class ChromaDB(VectorStore):
|
|
109
113
|
name=self.config.collection_name,
|
110
114
|
embedding_function=self.embedding_fn,
|
111
115
|
get_or_create=not replace,
|
116
|
+
metadata={
|
117
|
+
"hnsw:space": self.config.distance,
|
118
|
+
"hnsw:construction_ef": self.config.construction_ef,
|
119
|
+
"hnsw:search_ef": self.config.search_ef,
|
120
|
+
# we could expose other configs, see:
|
121
|
+
# https://docs.trychroma.com/docs/collections/configure
|
122
|
+
},
|
112
123
|
)
|
113
124
|
|
114
125
|
def add_documents(self, documents: Sequence[Document]) -> None:
|
@@ -78,7 +78,7 @@ class QdrantDB(VectorStore):
|
|
78
78
|
super().__init__(config)
|
79
79
|
self.config: QdrantDBConfig = config
|
80
80
|
self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
|
81
|
-
self.embedding_dim = self.
|
81
|
+
self.embedding_dim = len(self.embedding_fn(["test"])[0])
|
82
82
|
if self.config.use_sparse_embeddings:
|
83
83
|
try:
|
84
84
|
from transformers import AutoModelForMaskedLM, AutoTokenizer
|
@@ -43,8 +43,8 @@ class WeaviateDB(VectorStore):
|
|
43
43
|
load_dotenv()
|
44
44
|
key = os.getenv("WEAVIATE_API_KEY")
|
45
45
|
url = os.getenv("WEAVIATE_API_URL")
|
46
|
-
if None
|
47
|
-
|
46
|
+
if url is None or key is None:
|
47
|
+
raise ValueError(
|
48
48
|
"""WEAVIATE_API_KEY, WEAVIATE_API_URL env variable must be set to use
|
49
49
|
WeaviateDB in cloud mode. Please set these values
|
50
50
|
in your .env file.
|
@@ -130,9 +130,9 @@ class WeaviateDB(VectorStore):
|
|
130
130
|
vector_index_config = Configure.VectorIndex.hnsw(
|
131
131
|
distance_metric=VectorDistances.COSINE,
|
132
132
|
)
|
133
|
-
if self.config.embedding
|
133
|
+
if isinstance(self.config.embedding, OpenAIEmbeddingsConfig):
|
134
134
|
vectorizer_config = Configure.Vectorizer.text2vec_openai(
|
135
|
-
model=self.
|
135
|
+
model=self.config.embedding.model_name,
|
136
136
|
)
|
137
137
|
else:
|
138
138
|
vectorizer_config = None
|
@@ -212,7 +212,7 @@ class WeaviateDB(VectorStore):
|
|
212
212
|
return_metadata=MetadataQuery(distance=True),
|
213
213
|
)
|
214
214
|
return [
|
215
|
-
(self.weaviate_obj_to_doc(item), 1 - item.metadata.distance)
|
215
|
+
(self.weaviate_obj_to_doc(item), 1 - (item.metadata.distance or 1))
|
216
216
|
for item in response.objects
|
217
217
|
]
|
218
218
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.37.1
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
Author-email: Prasad Chalasani <pchalasani@gmail.com>
|
6
6
|
License: MIT
|
@@ -12,6 +12,7 @@ Requires-Dist: async-generator<2.0,>=1.10
|
|
12
12
|
Requires-Dist: bs4<1.0.0,>=0.0.1
|
13
13
|
Requires-Dist: cerebras-cloud-sdk<2.0.0,>=1.1.0
|
14
14
|
Requires-Dist: colorlog<7.0.0,>=6.7.0
|
15
|
+
Requires-Dist: docling<3.0.0,>=2.16.0
|
15
16
|
Requires-Dist: docstring-parser<1.0,>=0.16
|
16
17
|
Requires-Dist: duckduckgo-search<7.0.0,>=6.0.0
|
17
18
|
Requires-Dist: faker<19.0.0,>=18.9.0
|
@@ -32,9 +33,10 @@ Requires-Dist: onnxruntime<2.0.0,>=1.16.1
|
|
32
33
|
Requires-Dist: openai<2.0.0,>=1.45.0
|
33
34
|
Requires-Dist: pandas<3.0.0,>=2.0.3
|
34
35
|
Requires-Dist: prettytable<4.0.0,>=3.8.0
|
35
|
-
Requires-Dist: pydantic<
|
36
|
+
Requires-Dist: pydantic<3.0.0,>=1
|
36
37
|
Requires-Dist: pygithub<2.0.0,>=1.58.1
|
37
38
|
Requires-Dist: pygments<3.0.0,>=2.15.1
|
39
|
+
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17
|
38
40
|
Requires-Dist: pyparsing<4.0.0,>=3.0.9
|
39
41
|
Requires-Dist: pytest-rerunfailures<16.0,>=15.0
|
40
42
|
Requires-Dist: python-dotenv<2.0.0,>=1.0.0
|
@@ -55,14 +57,15 @@ Provides-Extra: all
|
|
55
57
|
Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'all'
|
56
58
|
Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == 'all'
|
57
59
|
Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'all'
|
60
|
+
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'all'
|
58
61
|
Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'all'
|
59
|
-
Requires-Dist: huggingface-hub<0.
|
62
|
+
Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'all'
|
60
63
|
Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'all'
|
61
64
|
Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == 'all'
|
62
65
|
Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'all'
|
63
66
|
Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'all'
|
64
|
-
Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'all'
|
65
67
|
Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'all'
|
68
|
+
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'all'
|
66
69
|
Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'all'
|
67
70
|
Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'all'
|
68
71
|
Requires-Dist: pypdf>=5.1.0; extra == 'all'
|
@@ -74,7 +77,7 @@ Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'all'
|
|
74
77
|
Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'all'
|
75
78
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'all'
|
76
79
|
Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'all'
|
77
|
-
Requires-Dist: unstructured[docx,pdf,pptx]<0.
|
80
|
+
Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'all'
|
78
81
|
Requires-Dist: weaviate-client>=4.9.6; extra == 'all'
|
79
82
|
Provides-Extra: arango
|
80
83
|
Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
|
@@ -89,13 +92,17 @@ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'db'
|
|
89
92
|
Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'db'
|
90
93
|
Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'db'
|
91
94
|
Provides-Extra: doc-chat
|
95
|
+
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'doc-chat'
|
92
96
|
Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'doc-chat'
|
93
|
-
Requires-Dist:
|
97
|
+
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'doc-chat'
|
94
98
|
Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'doc-chat'
|
95
99
|
Requires-Dist: pypdf>=5.1.0; extra == 'doc-chat'
|
96
100
|
Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'doc-chat'
|
97
101
|
Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'doc-chat'
|
98
|
-
Requires-Dist: unstructured[docx,pdf,pptx]<0.
|
102
|
+
Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'doc-chat'
|
103
|
+
Provides-Extra: docling
|
104
|
+
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'docling'
|
105
|
+
Requires-Dist: pypdf>=5.1.0; extra == 'docling'
|
99
106
|
Provides-Extra: docx
|
100
107
|
Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
|
101
108
|
Provides-Extra: fastembed
|
@@ -104,7 +111,7 @@ Provides-Extra: hf-embeddings
|
|
104
111
|
Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'hf-embeddings'
|
105
112
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'hf-embeddings'
|
106
113
|
Provides-Extra: hf-transformers
|
107
|
-
Requires-Dist: huggingface-hub<0.
|
114
|
+
Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'hf-transformers'
|
108
115
|
Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'hf-transformers'
|
109
116
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'hf-transformers'
|
110
117
|
Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'hf-transformers'
|
@@ -125,13 +132,16 @@ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'mysql'
|
|
125
132
|
Provides-Extra: neo4j
|
126
133
|
Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'neo4j'
|
127
134
|
Provides-Extra: pdf-parsers
|
135
|
+
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'pdf-parsers'
|
128
136
|
Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'pdf-parsers'
|
129
|
-
Requires-Dist:
|
137
|
+
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pdf-parsers'
|
130
138
|
Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'pdf-parsers'
|
131
139
|
Requires-Dist: pypdf>=5.1.0; extra == 'pdf-parsers'
|
132
140
|
Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'pdf-parsers'
|
133
141
|
Provides-Extra: postgres
|
134
142
|
Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'postgres'
|
143
|
+
Provides-Extra: pymupdf4llm
|
144
|
+
Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pymupdf4llm'
|
135
145
|
Provides-Extra: scrapy
|
136
146
|
Requires-Dist: scrapy<3.0.0,>=2.11.0; extra == 'scrapy'
|
137
147
|
Provides-Extra: sql
|
@@ -139,11 +149,11 @@ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'sql'
|
|
139
149
|
Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'sql'
|
140
150
|
Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'sql'
|
141
151
|
Provides-Extra: transformers
|
142
|
-
Requires-Dist: huggingface-hub<0.
|
152
|
+
Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'transformers'
|
143
153
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'transformers'
|
144
154
|
Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'transformers'
|
145
155
|
Provides-Extra: unstructured
|
146
|
-
Requires-Dist: unstructured[docx,pdf,pptx]<0.
|
156
|
+
Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'unstructured'
|
147
157
|
Provides-Extra: vecdbs
|
148
158
|
Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
|
149
159
|
Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
|
@@ -1,5 +1,5 @@
|
|
1
1
|
langroid/__init__.py,sha256=z_fCOLQJPOw3LLRPBlFB5-2HyCjpPgQa4m4iY5Fvb8Y,1800
|
2
|
-
langroid/exceptions.py,sha256=
|
2
|
+
langroid/exceptions.py,sha256=OPjece_8cwg94DLPcOGA1ddzy5bGh65pxzcHMnssTz8,2995
|
3
3
|
langroid/mytypes.py,sha256=h1eMq1ZwTLVezObPfCseWNWbEOzP7mAKu2XoS63W1cM,2647
|
4
4
|
langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
|
@@ -14,7 +14,7 @@ langroid/agent/xml_tool_message.py,sha256=6SshYZJKIfi4mkE-gIoSwjkEYekQ8GwcSiCv7a
|
|
14
14
|
langroid/agent/callbacks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
15
|
langroid/agent/callbacks/chainlit.py,sha256=RH8qUXaZE5o2WQz3WJQ1SdFtASGlxWCA6_HYz_3meDQ,20822
|
16
16
|
langroid/agent/special/__init__.py,sha256=gik_Xtm_zV7U9s30Mn8UX3Gyuy4jTjQe9zjiE3HWmEo,1273
|
17
|
-
langroid/agent/special/doc_chat_agent.py,sha256=
|
17
|
+
langroid/agent/special/doc_chat_agent.py,sha256=qoXp6PKI7oAQs8rgj934NzZaEEKsPICcgYl_iQY0bac,64818
|
18
18
|
langroid/agent/special/lance_doc_chat_agent.py,sha256=s8xoRs0gGaFtDYFUSIRchsgDVbS5Q3C2b2mr3V1Fd-Q,10419
|
19
19
|
langroid/agent/special/lance_tools.py,sha256=qS8x4wi8mrqfbYV2ztFzrcxyhHQ0ZWOc-zkYiH7awj0,2105
|
20
20
|
langroid/agent/special/relevance_extractor_agent.py,sha256=zIx8GUdVo1aGW6ASla0NPQjYYIpmriK_TYMijqAx3F8,4796
|
@@ -57,7 +57,7 @@ langroid/cachedb/momento_cachedb.py,sha256=YEOJ62hEcV6iIeMr5aGgRYgWQqFYaej9gEDEc
|
|
57
57
|
langroid/cachedb/redis_cachedb.py,sha256=7kgnbf4b5CKsCrlL97mHWKvdvlLt8zgn7lc528jEpiE,5141
|
58
58
|
langroid/embedding_models/__init__.py,sha256=XhVIMQJbQRpImcnhA9sJR7h6r7QgPo1SKDCvwEUD9j4,851
|
59
59
|
langroid/embedding_models/base.py,sha256=DUhvzALoW2UMbtmLxP4eJTfPii99WjUNX7bwFpj_K-0,2395
|
60
|
-
langroid/embedding_models/models.py,sha256=
|
60
|
+
langroid/embedding_models/models.py,sha256=YppD52U1lbeygt8_SuPNi6piOV_FgBltZWH5e3l7iso,16776
|
61
61
|
langroid/embedding_models/remote_embeds.py,sha256=6_kjXByVbqhY9cGwl9R83ZcYC2km-nGieNNAo1McHaY,5151
|
62
62
|
langroid/embedding_models/protoc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
63
63
|
langroid/embedding_models/protoc/embeddings.proto,sha256=_O-SgFpTaylQeOTgSpxhEJ7CUw7PeCQQJLaPqpPYKJg,321
|
@@ -77,11 +77,12 @@ langroid/language_models/prompt_formatter/hf_formatter.py,sha256=PVJppmjRvD-2DF-
|
|
77
77
|
langroid/language_models/prompt_formatter/llama2_formatter.py,sha256=YdcO88qyBeuMENVIVvVqSYuEpvYSTndUe_jd6hVTko4,2899
|
78
78
|
langroid/parsing/__init__.py,sha256=ZgSAfgTC6VsTLFlRSWT-TwYco7SQeRMeZG-49MnKYGY,936
|
79
79
|
langroid/parsing/agent_chats.py,sha256=sbZRV9ujdM5QXvvuHVjIi2ysYSYlap-uqfMMUKulrW0,1068
|
80
|
-
langroid/parsing/code_parser.py,sha256=
|
81
|
-
langroid/parsing/document_parser.py,sha256=
|
80
|
+
langroid/parsing/code_parser.py,sha256=5ze0MBytrGGkU69pA_bJDjRm6QZz_QYfPcIwkagUa7U,3796
|
81
|
+
langroid/parsing/document_parser.py,sha256=Xcf_yA4admhx75N123_ouWcgnYXHztxX0S3TxqlWKNU,28334
|
82
82
|
langroid/parsing/para_sentence_split.py,sha256=AJBzZojP3zpB-_IMiiHismhqcvkrVBQ3ZINoQyx_bE4,2000
|
83
83
|
langroid/parsing/parse_json.py,sha256=aADo38bAHQhC8on4aWZZzVzSDy-dK35vRLZsFI2ewh8,4756
|
84
|
-
langroid/parsing/parser.py,sha256=
|
84
|
+
langroid/parsing/parser.py,sha256=WDv4QnNtAcLSiPe6cPhHOa-aMhrt3OV-kKnVXdgwtmI,12276
|
85
|
+
langroid/parsing/pdf_utils.py,sha256=IFs2GH9_ZOYJ159YF5MomQ8RKRj1YPBIxkv0gx4Xz7o,1629
|
85
86
|
langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
|
86
87
|
langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
|
87
88
|
langroid/parsing/search.py,sha256=0i_r0ESb5HEQfagA2g7_uMQyxYPADWVbdcN9ixZhS4E,8992
|
@@ -116,13 +117,13 @@ langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4e
|
|
116
117
|
langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
|
117
118
|
langroid/vector_store/__init__.py,sha256=BcoOm1tG3y0EqjkIGmMOHkY9iTUhDHgyruknWDKgqIg,1214
|
118
119
|
langroid/vector_store/base.py,sha256=suBanIt0iKEgnMnGdQOyWS58guG20Jyy-GK4DMMuYL0,14208
|
119
|
-
langroid/vector_store/chromadb.py,sha256=
|
120
|
+
langroid/vector_store/chromadb.py,sha256=XkpW7pnSf6Lk7Nf1BEIw-zjYGYchoWHgrhnJX7YmxD8,8725
|
120
121
|
langroid/vector_store/lancedb.py,sha256=b3_vWkTjG8mweZ7ZNlUD-NjmQP_rLBZfyKWcxt2vosA,14855
|
121
122
|
langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
|
122
123
|
langroid/vector_store/momento.py,sha256=UNHGT6jXuQtqY9f6MdqGU14bVnS0zHgIJUa30ULpUJo,10474
|
123
|
-
langroid/vector_store/qdrantdb.py,sha256=
|
124
|
-
langroid/vector_store/weaviatedb.py,sha256=
|
125
|
-
langroid-0.
|
126
|
-
langroid-0.
|
127
|
-
langroid-0.
|
128
|
-
langroid-0.
|
124
|
+
langroid/vector_store/qdrantdb.py,sha256=Cen6f-y6witiR53UQ-5a605Reo0gTj3ygXpE_ehYoZo,18116
|
125
|
+
langroid/vector_store/weaviatedb.py,sha256=C6jd1Twl5_jux3JYyrcTfQb63Lk9HuiUzVF4NahXuGo,10642
|
126
|
+
langroid-0.37.1.dist-info/METADATA,sha256=XL8VnB7r3uUJ6-BkwZkUPeSQO4pfvo8YfH3GvbX_gFg,60572
|
127
|
+
langroid-0.37.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
128
|
+
langroid-0.37.1.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
129
|
+
langroid-0.37.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|