langroid 0.1.252__py3-none-any.whl → 0.1.254__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/__init__.py +16 -15
- langroid/agent/__init__.py +1 -0
- langroid/agent/callbacks/chainlit.py +5 -12
- langroid/agent/special/__init__.py +13 -4
- langroid/agent/special/doc_chat_agent.py +39 -55
- langroid/agent/special/neo4j/csv_kg_chat.py +2 -2
- langroid/agent/special/sql/__init__.py +12 -6
- langroid/agent/special/sql/sql_chat_agent.py +10 -4
- langroid/agent/special/sql/utils/__init__.py +4 -5
- langroid/agent/special/sql/utils/description_extractors.py +7 -2
- langroid/agent/special/sql/utils/populate_metadata.py +6 -1
- langroid/agent/special/table_chat_agent.py +2 -2
- langroid/agent/tool_message.py +14 -3
- langroid/agent/tools/__init__.py +2 -3
- langroid/agent/tools/duckduckgo_search_tool.py +2 -2
- langroid/agent/tools/google_search_tool.py +2 -2
- langroid/agent/tools/metaphor_search_tool.py +2 -2
- langroid/agent/tools/retrieval_tool.py +2 -2
- langroid/agent/tools/run_python_code.py +2 -2
- langroid/agent/tools/segment_extract_tool.py +2 -2
- langroid/cachedb/__init__.py +10 -2
- langroid/cachedb/base.py +10 -2
- langroid/cachedb/momento_cachedb.py +10 -4
- langroid/cachedb/redis_cachedb.py +2 -3
- langroid/embedding_models/__init__.py +1 -0
- langroid/exceptions.py +57 -0
- langroid/language_models/__init__.py +1 -0
- langroid/language_models/base.py +2 -3
- langroid/language_models/openai_gpt.py +15 -14
- langroid/language_models/prompt_formatter/__init__.py +4 -3
- langroid/parsing/__init__.py +8 -2
- langroid/parsing/document_parser.py +46 -10
- langroid/parsing/parser.pyi +56 -0
- langroid/parsing/spider.py +12 -7
- langroid/utils/logging.py +7 -3
- langroid/utils/output/__init__.py +1 -2
- langroid/utils/output/citations.py +41 -0
- langroid/utils/output/printing.py +7 -2
- langroid/vector_store/__init__.py +33 -23
- langroid/vector_store/chromadb.py +2 -8
- langroid/vector_store/lancedb.py +36 -5
- langroid/vector_store/meilisearch.py +21 -11
- langroid/vector_store/momento.py +31 -14
- {langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/METADATA +59 -47
- {langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/RECORD +47 -45
- {langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/LICENSE +0 -0
- {langroid-0.1.252.dist-info → langroid-0.1.254.dist-info}/WHEEL +0 -0
langroid/exceptions.py
CHANGED
@@ -1,3 +1,60 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
|
1
4
|
class InfiniteLoopException(Exception):
|
2
5
|
def __init__(self, message: str = "Infinite loop detected", *args: object) -> None:
|
3
6
|
super().__init__(message, *args)
|
7
|
+
|
8
|
+
|
9
|
+
class LangroidImportError(ImportError):
|
10
|
+
def __init__(
|
11
|
+
self,
|
12
|
+
package: Optional[str] = None,
|
13
|
+
extra: Optional[str] = None,
|
14
|
+
error: str = "",
|
15
|
+
*args: object,
|
16
|
+
) -> None:
|
17
|
+
"""
|
18
|
+
Generate helpful warning when attempting to import package or module.
|
19
|
+
|
20
|
+
Args:
|
21
|
+
package (str): The name of the package to import.
|
22
|
+
extra (str): The name of the extras package required for this import.
|
23
|
+
error (str): The error message to display. Depending on context, we
|
24
|
+
can set this by capturing the ImportError message.
|
25
|
+
|
26
|
+
"""
|
27
|
+
if error == "" and package is not None:
|
28
|
+
error = f"{package} is not installed by default with Langroid.\n"
|
29
|
+
|
30
|
+
if extra:
|
31
|
+
install_help = f"""
|
32
|
+
If you want to use it, please install langroid
|
33
|
+
with the `{extra}` extra, for example:
|
34
|
+
|
35
|
+
If you are using pip:
|
36
|
+
pip install "langroid[{extra}]"
|
37
|
+
|
38
|
+
For multiple extras, you can separate them with commas:
|
39
|
+
pip install "langroid[{extra},another-extra]"
|
40
|
+
|
41
|
+
If you are using Poetry:
|
42
|
+
poetry add langroid --extras "{extra}"
|
43
|
+
|
44
|
+
For multiple extras with Poetry, list them with spaces:
|
45
|
+
poetry add langroid --extras "{extra} another-extra"
|
46
|
+
|
47
|
+
If you are working within the langroid dev env (which uses Poetry),
|
48
|
+
you can do:
|
49
|
+
poetry install -E "{extra}"
|
50
|
+
or if you want to include multiple extras:
|
51
|
+
poetry install -E "{extra} another-extra"
|
52
|
+
"""
|
53
|
+
else:
|
54
|
+
install_help = """
|
55
|
+
If you want to use it, please install it in the same
|
56
|
+
virtual environment as langroid.
|
57
|
+
"""
|
58
|
+
msg = error + install_help
|
59
|
+
|
60
|
+
super().__init__(msg, *args)
|
langroid/language_models/base.py
CHANGED
@@ -10,8 +10,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
|
|
10
10
|
import aiohttp
|
11
11
|
from pydantic import BaseModel, BaseSettings, Field
|
12
12
|
|
13
|
-
from langroid.cachedb.
|
14
|
-
from langroid.cachedb.redis_cachedb import RedisCacheConfig
|
13
|
+
from langroid.cachedb.base import CacheDBConfig
|
15
14
|
from langroid.mytypes import Document
|
16
15
|
from langroid.parsing.agent_chats import parse_message
|
17
16
|
from langroid.parsing.parse_json import top_level_json_field
|
@@ -49,7 +48,7 @@ class LLMConfig(BaseSettings):
|
|
49
48
|
# use chat model for completion? For OpenAI models, this MUST be set to True!
|
50
49
|
use_chat_for_completion: bool = True
|
51
50
|
stream: bool = True # stream output from API?
|
52
|
-
cache_config: None |
|
51
|
+
cache_config: None | CacheDBConfig = None
|
53
52
|
|
54
53
|
# Dict of model -> (input/prompt cost, output/completion cost)
|
55
54
|
chat_cost_per_1k_tokens: Tuple[float, float] = (0.0, 0.0)
|
@@ -28,8 +28,9 @@ from pydantic import BaseModel
|
|
28
28
|
from rich import print
|
29
29
|
from rich.markup import escape
|
30
30
|
|
31
|
-
from langroid.cachedb.
|
31
|
+
from langroid.cachedb.base import CacheDB
|
32
32
|
from langroid.cachedb.redis_cachedb import RedisCache, RedisCacheConfig
|
33
|
+
from langroid.exceptions import LangroidImportError
|
33
34
|
from langroid.language_models.base import (
|
34
35
|
LanguageModel,
|
35
36
|
LLMConfig,
|
@@ -280,14 +281,7 @@ class OpenAIGPTConfig(LLMConfig):
|
|
280
281
|
try:
|
281
282
|
import litellm
|
282
283
|
except ImportError:
|
283
|
-
raise
|
284
|
-
"""
|
285
|
-
litellm not installed. Please install it via:
|
286
|
-
pip install litellm.
|
287
|
-
Or when installing langroid, install it with the `litellm` extra:
|
288
|
-
pip install langroid[litellm]
|
289
|
-
"""
|
290
|
-
)
|
284
|
+
raise LangroidImportError("litellm", "litellm")
|
291
285
|
litellm.telemetry = False
|
292
286
|
litellm.drop_params = True # drop un-supported params without crashing
|
293
287
|
self.seed = None # some local mdls don't support seed
|
@@ -482,17 +476,24 @@ class OpenAIGPT(LanguageModel):
|
|
482
476
|
timeout=Timeout(self.config.timeout),
|
483
477
|
)
|
484
478
|
|
485
|
-
self.cache:
|
479
|
+
self.cache: CacheDB
|
486
480
|
if settings.cache_type == "momento":
|
487
|
-
|
488
|
-
|
481
|
+
from langroid.cachedb.momento_cachedb import (
|
482
|
+
MomentoCache,
|
483
|
+
MomentoCacheConfig,
|
484
|
+
)
|
485
|
+
|
486
|
+
if config.cache_config is None or not isinstance(
|
487
|
+
config.cache_config,
|
488
|
+
MomentoCacheConfig,
|
489
489
|
):
|
490
490
|
# switch to fresh momento config if needed
|
491
491
|
config.cache_config = MomentoCacheConfig()
|
492
492
|
self.cache = MomentoCache(config.cache_config)
|
493
493
|
elif "redis" in settings.cache_type:
|
494
|
-
if config.cache_config is None or isinstance(
|
495
|
-
config.cache_config,
|
494
|
+
if config.cache_config is None or not isinstance(
|
495
|
+
config.cache_config,
|
496
|
+
RedisCacheConfig,
|
496
497
|
):
|
497
498
|
# switch to fresh redis config if needed
|
498
499
|
config.cache_config = RedisCacheConfig(
|
@@ -1,9 +1,10 @@
|
|
1
|
+
from . import base
|
2
|
+
from . import llama2_formatter
|
1
3
|
from .base import PromptFormatter
|
2
4
|
from .llama2_formatter import Llama2Formatter
|
3
|
-
from ..config import PromptFormatterConfig
|
5
|
+
from ..config import PromptFormatterConfig
|
6
|
+
from ..config import Llama2FormatterConfig
|
4
7
|
|
5
|
-
from . import base
|
6
|
-
from . import llama2_formatter
|
7
8
|
|
8
9
|
__all__ = [
|
9
10
|
"PromptFormatter",
|
langroid/parsing/__init__.py
CHANGED
@@ -11,7 +11,6 @@ from . import urls
|
|
11
11
|
from . import utils
|
12
12
|
from . import search
|
13
13
|
from . import web_search
|
14
|
-
from . import spider
|
15
14
|
|
16
15
|
from .parser import (
|
17
16
|
Splitter,
|
@@ -36,7 +35,6 @@ __all__ = [
|
|
36
35
|
"utils",
|
37
36
|
"search",
|
38
37
|
"web_search",
|
39
|
-
"spider",
|
40
38
|
"Splitter",
|
41
39
|
"PdfParsingConfig",
|
42
40
|
"DocxParsingConfig",
|
@@ -44,3 +42,11 @@ __all__ = [
|
|
44
42
|
"ParsingConfig",
|
45
43
|
"Parser",
|
46
44
|
]
|
45
|
+
|
46
|
+
try:
|
47
|
+
from . import spider
|
48
|
+
|
49
|
+
spider
|
50
|
+
__all__.append("spider")
|
51
|
+
except ImportError:
|
52
|
+
pass
|
@@ -1,16 +1,37 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
1
3
|
import itertools
|
2
4
|
import logging
|
3
5
|
import re
|
4
6
|
from enum import Enum
|
5
7
|
from io import BytesIO
|
6
|
-
from typing import Any, Generator, List, Tuple
|
8
|
+
from typing import TYPE_CHECKING, Any, Generator, List, Tuple
|
9
|
+
|
10
|
+
from langroid.exceptions import LangroidImportError
|
11
|
+
|
12
|
+
try:
|
13
|
+
import fitz
|
14
|
+
except ImportError:
|
15
|
+
if not TYPE_CHECKING:
|
16
|
+
fitz = None
|
17
|
+
|
18
|
+
try:
|
19
|
+
import pypdf
|
20
|
+
except ImportError:
|
21
|
+
if not TYPE_CHECKING:
|
22
|
+
pypdf = None
|
23
|
+
|
24
|
+
try:
|
25
|
+
import pdfplumber
|
26
|
+
except ImportError:
|
27
|
+
if not TYPE_CHECKING:
|
28
|
+
pdfplumber = None
|
7
29
|
|
8
|
-
import fitz
|
9
|
-
import pdfplumber
|
10
|
-
import pypdf
|
11
30
|
import requests
|
12
31
|
from bs4 import BeautifulSoup
|
13
|
-
|
32
|
+
|
33
|
+
if TYPE_CHECKING:
|
34
|
+
from PIL import Image
|
14
35
|
|
15
36
|
from langroid.mytypes import DocMetaData, Document
|
16
37
|
from langroid.parsing.parser import Parser, ParsingConfig
|
@@ -363,19 +384,21 @@ class FitzPDFParser(DocumentParser):
|
|
363
384
|
Parser for processing PDFs using the `fitz` library.
|
364
385
|
"""
|
365
386
|
|
366
|
-
def iterate_pages(self) -> Generator[Tuple[int, fitz.Page], None, None]:
|
387
|
+
def iterate_pages(self) -> Generator[Tuple[int, "fitz.Page"], None, None]:
|
367
388
|
"""
|
368
389
|
Yield each page in the PDF using `fitz`.
|
369
390
|
|
370
391
|
Returns:
|
371
392
|
Generator[fitz.Page]: Generator yielding each page.
|
372
393
|
"""
|
394
|
+
if fitz is None:
|
395
|
+
raise LangroidImportError("fitz", "pdf-parsers")
|
373
396
|
doc = fitz.open(stream=self.doc_bytes, filetype="pdf")
|
374
397
|
for i, page in enumerate(doc):
|
375
398
|
yield i, page
|
376
399
|
doc.close()
|
377
400
|
|
378
|
-
def extract_text_from_page(self, page: fitz.Page) -> str:
|
401
|
+
def extract_text_from_page(self, page: "fitz.Page") -> str:
|
379
402
|
"""
|
380
403
|
Extract text from a given `fitz` page.
|
381
404
|
|
@@ -400,6 +423,8 @@ class PyPDFParser(DocumentParser):
|
|
400
423
|
Returns:
|
401
424
|
Generator[pypdf.pdf.PageObject]: Generator yielding each page.
|
402
425
|
"""
|
426
|
+
if pypdf is None:
|
427
|
+
raise LangroidImportError("pypdf", "pdf-parsers")
|
403
428
|
reader = pypdf.PdfReader(self.doc_bytes)
|
404
429
|
for i, page in enumerate(reader.pages):
|
405
430
|
yield i, page
|
@@ -431,6 +456,8 @@ class PDFPlumberParser(DocumentParser):
|
|
431
456
|
Returns:
|
432
457
|
Generator[pdfplumber.Page]: Generator yielding each page.
|
433
458
|
"""
|
459
|
+
if pdfplumber is None:
|
460
|
+
raise LangroidImportError("pdfplumber", "pdf-parsers")
|
434
461
|
with pdfplumber.open(self.doc_bytes) as pdf:
|
435
462
|
for i, page in enumerate(pdf.pages):
|
436
463
|
yield i, page
|
@@ -456,7 +483,10 @@ class ImagePdfParser(DocumentParser):
|
|
456
483
|
def iterate_pages(
|
457
484
|
self,
|
458
485
|
) -> Generator[Tuple[int, "Image"], None, None]: # type: ignore
|
459
|
-
|
486
|
+
try:
|
487
|
+
from pdf2image import convert_from_bytes
|
488
|
+
except ImportError:
|
489
|
+
raise LangroidImportError("pdf2image", "pdf-parsers")
|
460
490
|
|
461
491
|
images = convert_from_bytes(self.doc_bytes.getvalue())
|
462
492
|
for i, image in enumerate(images):
|
@@ -472,7 +502,10 @@ class ImagePdfParser(DocumentParser):
|
|
472
502
|
Returns:
|
473
503
|
str: Extracted text from the image.
|
474
504
|
"""
|
475
|
-
|
505
|
+
try:
|
506
|
+
import pytesseract
|
507
|
+
except ImportError:
|
508
|
+
raise LangroidImportError("pytesseract", "pdf-parsers")
|
476
509
|
|
477
510
|
text = pytesseract.image_to_string(page)
|
478
511
|
return self.fix_text(text)
|
@@ -638,7 +671,10 @@ class PythonDocxParser(DocumentParser):
|
|
638
671
|
In a DOCX file, pages are not explicitly defined,
|
639
672
|
so we consider each paragraph as a separate 'page' for simplicity.
|
640
673
|
"""
|
641
|
-
|
674
|
+
try:
|
675
|
+
import docx
|
676
|
+
except ImportError:
|
677
|
+
raise LangroidImportError("python-docx", "docx")
|
642
678
|
|
643
679
|
doc = docx.Document(self.doc_bytes)
|
644
680
|
for i, para in enumerate(doc.paragraphs, start=1):
|
@@ -0,0 +1,56 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from _typeshed import Incomplete
|
5
|
+
from pydantic import BaseSettings
|
6
|
+
|
7
|
+
from langroid.mytypes import Document as Document
|
8
|
+
from langroid.parsing.para_sentence_split import (
|
9
|
+
create_chunks as create_chunks,
|
10
|
+
)
|
11
|
+
from langroid.parsing.para_sentence_split import (
|
12
|
+
remove_extra_whitespace as remove_extra_whitespace,
|
13
|
+
)
|
14
|
+
|
15
|
+
logger: Incomplete
|
16
|
+
|
17
|
+
class Splitter(str, Enum):
|
18
|
+
TOKENS: str
|
19
|
+
PARA_SENTENCE: str
|
20
|
+
SIMPLE: str
|
21
|
+
|
22
|
+
class PdfParsingConfig(BaseSettings):
|
23
|
+
library: Literal["fitz", "pdfplumber", "pypdf", "unstructured", "pdf2image"]
|
24
|
+
|
25
|
+
class DocxParsingConfig(BaseSettings):
|
26
|
+
library: Literal["python-docx", "unstructured"]
|
27
|
+
|
28
|
+
class DocParsingConfig(BaseSettings):
|
29
|
+
library: Literal["unstructured"]
|
30
|
+
|
31
|
+
class ParsingConfig(BaseSettings):
|
32
|
+
splitter: str
|
33
|
+
chunk_size: int
|
34
|
+
overlap: int
|
35
|
+
max_chunks: int
|
36
|
+
min_chunk_chars: int
|
37
|
+
discard_chunk_chars: int
|
38
|
+
n_similar_docs: int
|
39
|
+
n_neighbor_ids: int
|
40
|
+
separators: list[str]
|
41
|
+
token_encoding_model: str
|
42
|
+
pdf: PdfParsingConfig
|
43
|
+
docx: DocxParsingConfig
|
44
|
+
doc: DocParsingConfig
|
45
|
+
|
46
|
+
class Parser:
|
47
|
+
config: Incomplete
|
48
|
+
tokenizer: Incomplete
|
49
|
+
def __init__(self, config: ParsingConfig) -> None: ...
|
50
|
+
def num_tokens(self, text: str) -> int: ...
|
51
|
+
def add_window_ids(self, chunks: list[Document]) -> None: ...
|
52
|
+
def split_simple(self, docs: list[Document]) -> list[Document]: ...
|
53
|
+
def split_para_sentence(self, docs: list[Document]) -> list[Document]: ...
|
54
|
+
def split_chunk_tokens(self, docs: list[Document]) -> list[Document]: ...
|
55
|
+
def chunk_tokens(self, text: str) -> list[str]: ...
|
56
|
+
def split(self, docs: list[Document]) -> list[Document]: ...
|
langroid/parsing/spider.py
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
from typing import List, Set, no_type_check
|
2
2
|
from urllib.parse import urlparse
|
3
3
|
|
4
|
-
from
|
5
|
-
|
6
|
-
|
7
|
-
from
|
8
|
-
from scrapy
|
9
|
-
from scrapy.
|
10
|
-
from
|
4
|
+
from langroid.exceptions import LangroidImportError
|
5
|
+
|
6
|
+
try:
|
7
|
+
from pydispatch import dispatcher
|
8
|
+
from scrapy import signals
|
9
|
+
from scrapy.crawler import CrawlerRunner
|
10
|
+
from scrapy.http import Response
|
11
|
+
from scrapy.linkextractors import LinkExtractor
|
12
|
+
from scrapy.spiders import CrawlSpider, Rule
|
13
|
+
from twisted.internet import defer, reactor
|
14
|
+
except ImportError:
|
15
|
+
raise LangroidImportError("scrapy", "scrapy")
|
11
16
|
|
12
17
|
|
13
18
|
@no_type_check
|
langroid/utils/logging.py
CHANGED
@@ -31,7 +31,11 @@ def setup_colored_logging() -> None:
|
|
31
31
|
# logger.setLevel(logging.DEBUG)
|
32
32
|
|
33
33
|
|
34
|
-
def setup_logger(
|
34
|
+
def setup_logger(
|
35
|
+
name: str,
|
36
|
+
level: int = logging.INFO,
|
37
|
+
terminal: bool = False,
|
38
|
+
) -> logging.Logger:
|
35
39
|
"""
|
36
40
|
Set up a logger of module `name` at a desired level.
|
37
41
|
Args:
|
@@ -42,7 +46,7 @@ def setup_logger(name: str, level: int = logging.INFO) -> logging.Logger:
|
|
42
46
|
"""
|
43
47
|
logger = logging.getLogger(name)
|
44
48
|
logger.setLevel(level)
|
45
|
-
if not logger.hasHandlers():
|
49
|
+
if not logger.hasHandlers() and terminal:
|
46
50
|
handler = logging.StreamHandler()
|
47
51
|
formatter = logging.Formatter(
|
48
52
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
@@ -73,7 +77,7 @@ def setup_file_logger(
|
|
73
77
|
) -> logging.Logger:
|
74
78
|
os.makedirs(os.path.dirname(filename), exist_ok=True)
|
75
79
|
file_mode = "a" if append else "w"
|
76
|
-
logger = setup_logger(name)
|
80
|
+
logger = setup_logger(name, terminal=False)
|
77
81
|
handler = logging.FileHandler(filename, mode=file_mode)
|
78
82
|
handler.setLevel(logging.INFO)
|
79
83
|
if log_format:
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from . import printing
|
2
|
-
|
3
2
|
from .printing import (
|
4
3
|
shorten_text,
|
5
4
|
print_long_text,
|
@@ -7,9 +6,9 @@ from .printing import (
|
|
7
6
|
SuppressLoggerWarnings,
|
8
7
|
PrintColored,
|
9
8
|
)
|
10
|
-
|
11
9
|
from .status import status
|
12
10
|
|
11
|
+
|
13
12
|
__all__ = [
|
14
13
|
"printing",
|
15
14
|
"shorten_text",
|
@@ -0,0 +1,41 @@
|
|
1
|
+
def extract_markdown_references(md_string: str) -> list[int]:
|
2
|
+
"""
|
3
|
+
Extracts markdown references (e.g., [^1], [^2]) from a string and returns
|
4
|
+
them as a sorted list of integers.
|
5
|
+
|
6
|
+
Args:
|
7
|
+
md_string (str): The markdown string containing references.
|
8
|
+
|
9
|
+
Returns:
|
10
|
+
list[int]: A sorted list of unique integers from the markdown references.
|
11
|
+
"""
|
12
|
+
import re
|
13
|
+
|
14
|
+
# Regex to find all occurrences of [^<number>]
|
15
|
+
matches = re.findall(r"\[\^(\d+)\]", md_string)
|
16
|
+
# Convert matches to integers, remove duplicates with set, and sort
|
17
|
+
return sorted(set(int(match) for match in matches))
|
18
|
+
|
19
|
+
|
20
|
+
def format_footnote_text(content: str, width: int = 80) -> str:
|
21
|
+
"""
|
22
|
+
Formats the content part of a footnote (i.e. not the first line that
|
23
|
+
appears right after the reference [^4])
|
24
|
+
It wraps the text so that no line is longer than the specified width and indents
|
25
|
+
lines as necessary for markdown footnotes.
|
26
|
+
|
27
|
+
Args:
|
28
|
+
content (str): The text of the footnote to be formatted.
|
29
|
+
width (int): Maximum width of the text lines.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
str: Properly formatted markdown footnote text.
|
33
|
+
"""
|
34
|
+
import textwrap
|
35
|
+
|
36
|
+
# Wrap the text to the specified width
|
37
|
+
wrapped_lines = textwrap.wrap(content, width)
|
38
|
+
if len(wrapped_lines) == 0:
|
39
|
+
return ""
|
40
|
+
indent = " " # Indentation for markdown footnotes
|
41
|
+
return indent + ("\n" + indent).join(wrapped_lines)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import sys
|
3
3
|
from contextlib import contextmanager
|
4
|
-
from typing import Any, Iterator, Optional
|
4
|
+
from typing import Any, Iterator, Optional, Type
|
5
5
|
|
6
6
|
from rich import print as rprint
|
7
7
|
from rich.text import Text
|
@@ -89,6 +89,11 @@ class SuppressLoggerWarnings:
|
|
89
89
|
# Set the logging level to 'ERROR' to suppress warnings
|
90
90
|
self.logger.setLevel(logging.ERROR)
|
91
91
|
|
92
|
-
def __exit__(
|
92
|
+
def __exit__(
|
93
|
+
self,
|
94
|
+
exc_type: Optional[Type[BaseException]],
|
95
|
+
exc_value: Optional[BaseException],
|
96
|
+
traceback: Any,
|
97
|
+
) -> None:
|
93
98
|
# Reset the logging level to its original value
|
94
99
|
self.logger.setLevel(self.original_level)
|
@@ -1,40 +1,50 @@
|
|
1
1
|
from . import base
|
2
2
|
|
3
3
|
from . import qdrantdb
|
4
|
-
from . import meilisearch
|
5
|
-
from . import lancedb
|
6
4
|
|
7
5
|
from .base import VectorStoreConfig, VectorStore
|
8
6
|
from .qdrantdb import QdrantDBConfig, QdrantDB
|
9
|
-
from .meilisearch import MeiliSearch, MeiliSearchConfig
|
10
|
-
from .lancedb import LanceDB, LanceDBConfig
|
11
|
-
|
12
|
-
has_chromadb = False
|
13
|
-
try:
|
14
|
-
from . import chromadb
|
15
|
-
from .chromadb import ChromaDBConfig, ChromaDB
|
16
|
-
|
17
|
-
chromadb # silence linters
|
18
|
-
ChromaDB
|
19
|
-
ChromaDBConfig
|
20
|
-
has_chromadb = True
|
21
|
-
except ImportError:
|
22
|
-
pass
|
23
7
|
|
24
8
|
__all__ = [
|
25
9
|
"base",
|
26
10
|
"VectorStore",
|
27
11
|
"VectorStoreConfig",
|
28
12
|
"qdrantdb",
|
29
|
-
"meilisearch",
|
30
|
-
"lancedb",
|
31
13
|
"QdrantDBConfig",
|
32
14
|
"QdrantDB",
|
33
|
-
"MeiliSearch",
|
34
|
-
"MeiliSearchConfig",
|
35
|
-
"LanceDB",
|
36
|
-
"LanceDBConfig",
|
37
15
|
]
|
38
16
|
|
39
|
-
|
17
|
+
|
18
|
+
try:
|
19
|
+
from . import meilisearch
|
20
|
+
from .meilisearch import MeiliSearch, MeiliSearchConfig
|
21
|
+
|
22
|
+
meilisearch
|
23
|
+
MeiliSearch
|
24
|
+
MeiliSearchConfig
|
25
|
+
__all__.extend(["meilisearch", "MeiliSearch", "MeiliSearchConfig"])
|
26
|
+
except ImportError:
|
27
|
+
pass
|
28
|
+
|
29
|
+
|
30
|
+
try:
|
31
|
+
from . import lancedb
|
32
|
+
from .lancedb import LanceDB, LanceDBConfig
|
33
|
+
|
34
|
+
lancedb
|
35
|
+
LanceDB
|
36
|
+
LanceDBConfig
|
37
|
+
__all__.extend(["lancedb", "LanceDB", "LanceDBConfig"])
|
38
|
+
except ImportError:
|
39
|
+
pass
|
40
|
+
|
41
|
+
try:
|
42
|
+
from . import chromadb
|
43
|
+
from .chromadb import ChromaDBConfig, ChromaDB
|
44
|
+
|
45
|
+
chromadb # silence linters
|
46
|
+
ChromaDB
|
47
|
+
ChromaDBConfig
|
40
48
|
__all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
|
49
|
+
except ImportError:
|
50
|
+
pass
|
@@ -7,6 +7,7 @@ from langroid.embedding_models.base import (
|
|
7
7
|
EmbeddingModelsConfig,
|
8
8
|
)
|
9
9
|
from langroid.embedding_models.models import OpenAIEmbeddingsConfig
|
10
|
+
from langroid.exceptions import LangroidImportError
|
10
11
|
from langroid.mytypes import DocMetaData, Document
|
11
12
|
from langroid.utils.configuration import settings
|
12
13
|
from langroid.utils.output.printing import print_long_text
|
@@ -29,14 +30,7 @@ class ChromaDB(VectorStore):
|
|
29
30
|
try:
|
30
31
|
import chromadb
|
31
32
|
except ImportError:
|
32
|
-
raise
|
33
|
-
"""
|
34
|
-
ChromaDB is not installed by default with Langroid.
|
35
|
-
If you want to use it, please install it with the `chromadb` extra, e.g.
|
36
|
-
pip install "langroid[chromadb]"
|
37
|
-
or an equivalent command.
|
38
|
-
"""
|
39
|
-
)
|
33
|
+
raise LangroidImportError("chromadb", "chromadb")
|
40
34
|
self.config = config
|
41
35
|
emb_model = EmbeddingModel.create(config.embedding)
|
42
36
|
self.embedding_fn = emb_model.embedding_fn()
|