langroid 0.40.0__py3-none-any.whl → 0.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/tools/exa_search_tool.py +68 -0
- langroid/agent/tools/tavily_search_tool.py +50 -0
- langroid/parsing/search.py +1 -1
- langroid/parsing/utils.py +4 -3
- langroid/parsing/web_search.py +91 -7
- langroid/vector_store/__init__.py +9 -11
- langroid/vector_store/base.py +3 -0
- langroid/vector_store/pineconedb.py +427 -0
- langroid/vector_store/postgres.py +23 -15
- langroid/vector_store/weaviatedb.py +16 -2
- {langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/METADATA +10 -1
- {langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/RECORD +14 -11
- {langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/WHEEL +0 -0
- {langroid-0.40.0.dist-info → langroid-0.41.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,68 @@
|
|
1
|
+
"""
|
2
|
+
A tool to trigger a Exa search for a given query,
|
3
|
+
(https://docs.exa.ai/reference/getting-started)
|
4
|
+
and return the top results with their titles, links, summaries.
|
5
|
+
Since the tool is stateless (i.e. does not need
|
6
|
+
access to agent state), it can be enabled for any agent, without having to define a
|
7
|
+
special method inside the agent: `agent.enable_message(ExaSearchTool)`
|
8
|
+
|
9
|
+
NOTE: To use this tool, you need to:
|
10
|
+
|
11
|
+
* set the EXA_API_KEY environment variables in
|
12
|
+
your `.env` file, e.g. `EXA_API_KEY=your_api_key_here`
|
13
|
+
(Note as of 28 Jan 2023, Metaphor renamed to Exa, so you can also use
|
14
|
+
`EXA_API_KEY=your_api_key_here`)
|
15
|
+
|
16
|
+
* install langroid with the `exa-py` extra, e.g.
|
17
|
+
`pip install langroid[exa]` or `uv pip install langroid[exa]`
|
18
|
+
or `poetry add langroid[exa]` or `uv add langroid[exa]`
|
19
|
+
(it installs the `exa_py` package from pypi).
|
20
|
+
|
21
|
+
For more information, please refer to the official docs:
|
22
|
+
https://exa.ai/
|
23
|
+
"""
|
24
|
+
|
25
|
+
from typing import List, Tuple
|
26
|
+
|
27
|
+
from langroid.agent.tool_message import ToolMessage
|
28
|
+
from langroid.parsing.web_search import exa_search
|
29
|
+
|
30
|
+
|
31
|
+
class ExaSearchTool(ToolMessage):
|
32
|
+
request: str = "exa_search"
|
33
|
+
purpose: str = """
|
34
|
+
To search the web and return up to <num_results>
|
35
|
+
links relevant to the given <query>. When using this tool,
|
36
|
+
ONLY show the required JSON, DO NOT SAY ANYTHING ELSE.
|
37
|
+
Wait for the results of the web search, and then use them to
|
38
|
+
compose your response.
|
39
|
+
"""
|
40
|
+
query: str
|
41
|
+
num_results: int
|
42
|
+
|
43
|
+
def handle(self) -> str:
|
44
|
+
"""
|
45
|
+
Conducts a search using the exa API based on the provided query
|
46
|
+
and number of results by triggering a exa_search.
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
str: A formatted string containing the titles, links, and
|
50
|
+
summaries of each search result, separated by two newlines.
|
51
|
+
"""
|
52
|
+
|
53
|
+
search_results = exa_search(self.query, self.num_results)
|
54
|
+
# return Title, Link, Summary of each result, separated by two newlines
|
55
|
+
results_str = "\n\n".join(str(result) for result in search_results)
|
56
|
+
return f"""
|
57
|
+
BELOW ARE THE RESULTS FROM THE WEB SEARCH. USE THESE TO COMPOSE YOUR RESPONSE:
|
58
|
+
{results_str}
|
59
|
+
"""
|
60
|
+
|
61
|
+
@classmethod
|
62
|
+
def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]:
|
63
|
+
return [
|
64
|
+
cls(
|
65
|
+
query="When was the Llama2 Large Language Model (LLM) released?",
|
66
|
+
num_results=3,
|
67
|
+
),
|
68
|
+
]
|
@@ -0,0 +1,50 @@
|
|
1
|
+
"""
|
2
|
+
A tool to trigger a Tavily search for a given query, and return the top results with
|
3
|
+
their titles, links, summaries. Since the tool is stateless (i.e. does not need
|
4
|
+
access to agent state), it can be enabled for any agent, without having to define a
|
5
|
+
special method inside the agent: `agent.enable_message(TavilySearchTool)`
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import List, Tuple
|
9
|
+
|
10
|
+
from langroid.agent.tool_message import ToolMessage
|
11
|
+
from langroid.parsing.web_search import tavily_search
|
12
|
+
|
13
|
+
|
14
|
+
class TavilySearchTool(ToolMessage):
|
15
|
+
request: str = "tavily_search"
|
16
|
+
purpose: str = """
|
17
|
+
To search the web and return up to <num_results>
|
18
|
+
links relevant to the given <query>. When using this tool,
|
19
|
+
ONLY show the required JSON, DO NOT SAY ANYTHING ELSE.
|
20
|
+
Wait for the results of the web search, and then use them to
|
21
|
+
compose your response.
|
22
|
+
"""
|
23
|
+
query: str
|
24
|
+
num_results: int
|
25
|
+
|
26
|
+
def handle(self) -> str:
|
27
|
+
"""
|
28
|
+
Conducts a search using Tavily based on the provided query
|
29
|
+
and number of results by triggering a tavily_search.
|
30
|
+
|
31
|
+
Returns:
|
32
|
+
str: A formatted string containing the titles, links, and
|
33
|
+
summaries of each search result, separated by two newlines.
|
34
|
+
"""
|
35
|
+
search_results = tavily_search(self.query, self.num_results)
|
36
|
+
# return Title, Link, Summary of each result, separated by two newlines
|
37
|
+
results_str = "\n\n".join(str(result) for result in search_results)
|
38
|
+
return f"""
|
39
|
+
BELOW ARE THE RESULTS FROM THE WEB SEARCH. USE THESE TO COMPOSE YOUR RESPONSE:
|
40
|
+
{results_str}
|
41
|
+
"""
|
42
|
+
|
43
|
+
@classmethod
|
44
|
+
def examples(cls) -> List["ToolMessage" | Tuple[str, "ToolMessage"]]:
|
45
|
+
return [
|
46
|
+
cls(
|
47
|
+
query="When was the Llama2 Large Language Model (LLM) released?",
|
48
|
+
num_results=3,
|
49
|
+
),
|
50
|
+
]
|
langroid/parsing/search.py
CHANGED
@@ -118,7 +118,7 @@ def preprocess_text(text: str) -> str:
|
|
118
118
|
str: The preprocessed text.
|
119
119
|
"""
|
120
120
|
# Ensure the NLTK resources are available
|
121
|
-
for resource in ["punkt", "wordnet", "stopwords"]:
|
121
|
+
for resource in ["tokenizers/punkt", "corpora/wordnet", "corpora/stopwords"]:
|
122
122
|
download_nltk_resource(resource)
|
123
123
|
|
124
124
|
# Lowercase the text
|
langroid/parsing/utils.py
CHANGED
@@ -28,12 +28,13 @@ def download_nltk_resource(resource: str) -> None:
|
|
28
28
|
try:
|
29
29
|
nltk.data.find(resource)
|
30
30
|
except LookupError:
|
31
|
-
|
31
|
+
model = resource.split("/")[-1]
|
32
|
+
nltk.download(model, quiet=True)
|
32
33
|
|
33
34
|
|
34
35
|
# Download punkt_tab resource at module import
|
35
|
-
download_nltk_resource("punkt_tab")
|
36
|
-
download_nltk_resource("gutenberg")
|
36
|
+
download_nltk_resource("tokenizers/punkt_tab")
|
37
|
+
download_nltk_resource("corpora/gutenberg")
|
37
38
|
|
38
39
|
T = TypeVar("T")
|
39
40
|
|
langroid/parsing/web_search.py
CHANGED
@@ -16,6 +16,8 @@ from duckduckgo_search import DDGS
|
|
16
16
|
from googleapiclient.discovery import Resource, build
|
17
17
|
from requests.models import Response
|
18
18
|
|
19
|
+
from langroid.exceptions import LangroidImportError
|
20
|
+
|
19
21
|
|
20
22
|
class WebSearchResult:
|
21
23
|
"""
|
@@ -109,13 +111,7 @@ def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
|
|
109
111
|
try:
|
110
112
|
from metaphor_python import Metaphor
|
111
113
|
except ImportError:
|
112
|
-
raise
|
113
|
-
"You are attempting to use the `metaphor_python` library;"
|
114
|
-
"To use it, please install langroid with the `metaphor` extra, e.g. "
|
115
|
-
"`pip install langroid[metaphor]` or `poetry add langroid[metaphor]` "
|
116
|
-
"or `uv add langroid[metaphor]`"
|
117
|
-
"(it installs the `metaphor_python` package from pypi)."
|
118
|
-
)
|
114
|
+
raise LangroidImportError("metaphor-python", "metaphor")
|
119
115
|
|
120
116
|
client = Metaphor(api_key=api_key)
|
121
117
|
|
@@ -130,6 +126,53 @@ def metaphor_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
|
|
130
126
|
]
|
131
127
|
|
132
128
|
|
129
|
+
def exa_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
|
130
|
+
"""
|
131
|
+
Method that makes an API call by Exa client that queries
|
132
|
+
the top num_results links that matches the query. Returns a list
|
133
|
+
of WebSearchResult objects.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
query (str): The query body that users wants to make.
|
137
|
+
num_results (int): Number of top matching results that we want
|
138
|
+
to grab
|
139
|
+
"""
|
140
|
+
|
141
|
+
load_dotenv()
|
142
|
+
|
143
|
+
api_key = os.getenv("EXA_API_KEY")
|
144
|
+
if not api_key:
|
145
|
+
raise ValueError(
|
146
|
+
"""
|
147
|
+
EXA_API_KEY environment variables are not set.
|
148
|
+
Please set one of them to your API key, and try again.
|
149
|
+
"""
|
150
|
+
)
|
151
|
+
|
152
|
+
try:
|
153
|
+
from exa_py import Exa
|
154
|
+
except ImportError:
|
155
|
+
raise LangroidImportError("exa-py", "exa")
|
156
|
+
|
157
|
+
client = Exa(api_key=api_key)
|
158
|
+
|
159
|
+
response = client.search(
|
160
|
+
query=query,
|
161
|
+
num_results=num_results,
|
162
|
+
)
|
163
|
+
raw_results = response.results
|
164
|
+
|
165
|
+
return [
|
166
|
+
WebSearchResult(
|
167
|
+
title=result.title or "",
|
168
|
+
link=result.url,
|
169
|
+
max_content_length=3500,
|
170
|
+
max_summary_length=300,
|
171
|
+
)
|
172
|
+
for result in raw_results
|
173
|
+
]
|
174
|
+
|
175
|
+
|
133
176
|
def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
|
134
177
|
"""
|
135
178
|
Method that makes an API call by DuckDuckGo client that queries
|
@@ -154,3 +197,44 @@ def duckduckgo_search(query: str, num_results: int = 5) -> List[WebSearchResult]
|
|
154
197
|
)
|
155
198
|
for result in search_results
|
156
199
|
]
|
200
|
+
|
201
|
+
|
202
|
+
def tavily_search(query: str, num_results: int = 5) -> List[WebSearchResult]:
|
203
|
+
"""
|
204
|
+
Method that makes an API call to Tavily API that queries
|
205
|
+
the top `num_results` links that match the query. Returns a list
|
206
|
+
of WebSearchResult objects.
|
207
|
+
|
208
|
+
Args:
|
209
|
+
query (str): The query body that users wants to make.
|
210
|
+
num_results (int): Number of top matching results that we want
|
211
|
+
to grab
|
212
|
+
"""
|
213
|
+
|
214
|
+
load_dotenv()
|
215
|
+
|
216
|
+
api_key = os.getenv("TAVILY_API_KEY")
|
217
|
+
if not api_key:
|
218
|
+
raise ValueError(
|
219
|
+
"TAVILY_API_KEY environment variable is not set. "
|
220
|
+
"Please set it to your API key and try again."
|
221
|
+
)
|
222
|
+
|
223
|
+
try:
|
224
|
+
from tavily import TavilyClient
|
225
|
+
except ImportError:
|
226
|
+
raise LangroidImportError("tavily-python", "tavily")
|
227
|
+
|
228
|
+
client = TavilyClient(api_key=api_key)
|
229
|
+
response = client.search(query=query, max_results=num_results)
|
230
|
+
search_results = response["results"]
|
231
|
+
|
232
|
+
return [
|
233
|
+
WebSearchResult(
|
234
|
+
title=result["title"],
|
235
|
+
link=result["url"],
|
236
|
+
max_content_length=3500,
|
237
|
+
max_summary_length=300,
|
238
|
+
)
|
239
|
+
for result in search_results
|
240
|
+
]
|
@@ -23,11 +23,7 @@ try:
|
|
23
23
|
MeiliSearch
|
24
24
|
MeiliSearchConfig
|
25
25
|
__all__.extend(["meilisearch", "MeiliSearch", "MeiliSearchConfig"])
|
26
|
-
except ImportError:
|
27
|
-
pass
|
28
26
|
|
29
|
-
|
30
|
-
try:
|
31
27
|
from . import lancedb
|
32
28
|
from .lancedb import LanceDB, LanceDBConfig
|
33
29
|
|
@@ -35,10 +31,6 @@ try:
|
|
35
31
|
LanceDB
|
36
32
|
LanceDBConfig
|
37
33
|
__all__.extend(["lancedb", "LanceDB", "LanceDBConfig"])
|
38
|
-
except ImportError:
|
39
|
-
pass
|
40
|
-
|
41
|
-
try:
|
42
34
|
from . import chromadb
|
43
35
|
from .chromadb import ChromaDBConfig, ChromaDB
|
44
36
|
|
@@ -46,10 +38,7 @@ try:
|
|
46
38
|
ChromaDB
|
47
39
|
ChromaDBConfig
|
48
40
|
__all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
|
49
|
-
except ImportError:
|
50
|
-
pass
|
51
41
|
|
52
|
-
try:
|
53
42
|
from . import postgres
|
54
43
|
from .postgres import PostgresDB, PostgresDBConfig
|
55
44
|
|
@@ -57,6 +46,7 @@ try:
|
|
57
46
|
PostgresDB
|
58
47
|
PostgresDBConfig
|
59
48
|
__all__.extend(["postgres", "PostgresDB", "PostgresDBConfig"])
|
49
|
+
|
60
50
|
from . import weaviatedb
|
61
51
|
from .weaviatedb import WeaviateDBConfig, WeaviateDB
|
62
52
|
|
@@ -64,5 +54,13 @@ try:
|
|
64
54
|
WeaviateDB
|
65
55
|
WeaviateDBConfig
|
66
56
|
__all__.extend(["weaviatedb", "WeaviateDB", "WeaviateDBConfig"])
|
57
|
+
|
58
|
+
from . import pineconedb
|
59
|
+
from .pineconedb import PineconeDB, PineconeDBConfig
|
60
|
+
|
61
|
+
pineconedb
|
62
|
+
PineconeDB
|
63
|
+
PineconeDBConfig
|
64
|
+
__all__.extend(["pineconedb", "PineconeDB", "PineconeDBConfig"])
|
67
65
|
except ImportError:
|
68
66
|
pass
|
langroid/vector_store/base.py
CHANGED
@@ -59,6 +59,7 @@ class VectorStore(ABC):
|
|
59
59
|
from langroid.vector_store.lancedb import LanceDB, LanceDBConfig
|
60
60
|
from langroid.vector_store.meilisearch import MeiliSearch, MeiliSearchConfig
|
61
61
|
from langroid.vector_store.momento import MomentoVI, MomentoVIConfig
|
62
|
+
from langroid.vector_store.pineconedb import PineconeDB, PineconeDBConfig
|
62
63
|
from langroid.vector_store.postgres import PostgresDB, PostgresDBConfig
|
63
64
|
from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
|
64
65
|
from langroid.vector_store.weaviatedb import WeaviateDB, WeaviateDBConfig
|
@@ -77,6 +78,8 @@ class VectorStore(ABC):
|
|
77
78
|
return PostgresDB(config)
|
78
79
|
elif isinstance(config, WeaviateDBConfig):
|
79
80
|
return WeaviateDB(config)
|
81
|
+
elif isinstance(config, PineconeDBConfig):
|
82
|
+
return PineconeDB(config)
|
80
83
|
|
81
84
|
else:
|
82
85
|
logger.warning(
|
@@ -0,0 +1,427 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import re
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import (
|
7
|
+
TYPE_CHECKING,
|
8
|
+
Any,
|
9
|
+
Dict,
|
10
|
+
List,
|
11
|
+
Literal,
|
12
|
+
Optional,
|
13
|
+
Sequence,
|
14
|
+
Tuple,
|
15
|
+
Union,
|
16
|
+
)
|
17
|
+
|
18
|
+
from dotenv import load_dotenv
|
19
|
+
|
20
|
+
from langroid import LangroidImportError
|
21
|
+
from langroid.mytypes import Document
|
22
|
+
|
23
|
+
# import dataclass
|
24
|
+
from langroid.pydantic_v1 import BaseModel
|
25
|
+
from langroid.utils.configuration import settings
|
26
|
+
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
27
|
+
|
28
|
+
logger = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
31
|
+
has_pinecone: bool = True
|
32
|
+
try:
|
33
|
+
from pinecone import Pinecone, PineconeApiException, ServerlessSpec
|
34
|
+
except ImportError:
|
35
|
+
|
36
|
+
if not TYPE_CHECKING:
|
37
|
+
|
38
|
+
class ServerlessSpec(BaseModel):
|
39
|
+
"""
|
40
|
+
Fallback Serverless specification configuration to avoid import errors.
|
41
|
+
"""
|
42
|
+
|
43
|
+
cloud: str
|
44
|
+
region: str
|
45
|
+
|
46
|
+
PineconeApiException = Any # type: ignore
|
47
|
+
Pinecone = Any # type: ignore
|
48
|
+
has_pinecone = False
|
49
|
+
|
50
|
+
|
51
|
+
@dataclass(frozen=True)
|
52
|
+
class IndexMeta:
|
53
|
+
name: str
|
54
|
+
total_vector_count: int
|
55
|
+
|
56
|
+
|
57
|
+
class PineconeDBConfig(VectorStoreConfig):
|
58
|
+
cloud: bool = True
|
59
|
+
collection_name: str | None = "temp"
|
60
|
+
spec: ServerlessSpec = ServerlessSpec(cloud="aws", region="us-east-1")
|
61
|
+
deletion_protection: Literal["enabled", "disabled"] | None = None
|
62
|
+
metric: str = "cosine"
|
63
|
+
pagination_size: int = 100
|
64
|
+
|
65
|
+
|
66
|
+
class PineconeDB(VectorStore):
|
67
|
+
def __init__(self, config: PineconeDBConfig = PineconeDBConfig()):
|
68
|
+
super().__init__(config)
|
69
|
+
if not has_pinecone:
|
70
|
+
raise LangroidImportError("pinecone", "pinecone")
|
71
|
+
self.config: PineconeDBConfig = config
|
72
|
+
load_dotenv()
|
73
|
+
key = os.getenv("PINECONE_API_KEY")
|
74
|
+
|
75
|
+
if not key:
|
76
|
+
raise ValueError("PINECONE_API_KEY not set, could not instantiate client")
|
77
|
+
self.client = Pinecone(api_key=key)
|
78
|
+
|
79
|
+
if config.collection_name:
|
80
|
+
self.create_collection(
|
81
|
+
collection_name=config.collection_name,
|
82
|
+
replace=config.replace_collection,
|
83
|
+
)
|
84
|
+
|
85
|
+
def clear_empty_collections(self) -> int:
|
86
|
+
indexes = self._list_index_metas(empty=True)
|
87
|
+
n_deletes = 0
|
88
|
+
for index in indexes:
|
89
|
+
if index.total_vector_count == -1:
|
90
|
+
logger.warning(
|
91
|
+
f"Error fetching details for {index.name} when scanning indexes"
|
92
|
+
)
|
93
|
+
n_deletes += 1
|
94
|
+
self.delete_collection(collection_name=index.name)
|
95
|
+
return n_deletes
|
96
|
+
|
97
|
+
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
|
98
|
+
"""
|
99
|
+
Returns:
|
100
|
+
Number of Pinecone indexes that were deleted
|
101
|
+
|
102
|
+
Args:
|
103
|
+
really: Optional[bool] - whether to really delete all Pinecone collections
|
104
|
+
prefix: Optional[str] - string to match potential Pinecone
|
105
|
+
indexes for deletion
|
106
|
+
"""
|
107
|
+
if not really:
|
108
|
+
logger.warning("Not deleting all collections, set really=True to confirm")
|
109
|
+
return 0
|
110
|
+
indexes = [
|
111
|
+
c for c in self._list_index_metas(empty=True) if c.name.startswith(prefix)
|
112
|
+
]
|
113
|
+
if len(indexes) == 0:
|
114
|
+
logger.warning(f"No collections found with prefix {prefix}")
|
115
|
+
return 0
|
116
|
+
n_empty_deletes, n_non_empty_deletes = 0, 0
|
117
|
+
for index_desc in indexes:
|
118
|
+
self.delete_collection(collection_name=index_desc.name)
|
119
|
+
n_empty_deletes += index_desc.total_vector_count == 0
|
120
|
+
n_non_empty_deletes += index_desc.total_vector_count > 0
|
121
|
+
logger.warning(
|
122
|
+
f"""
|
123
|
+
Deleted {n_empty_deletes} empty indexes and
|
124
|
+
{n_non_empty_deletes} non-empty indexes
|
125
|
+
"""
|
126
|
+
)
|
127
|
+
return n_empty_deletes + n_non_empty_deletes
|
128
|
+
|
129
|
+
def list_collections(self, empty: bool = False) -> List[str]:
|
130
|
+
"""
|
131
|
+
Returns:
|
132
|
+
List of Pinecone indices that have at least one vector.
|
133
|
+
|
134
|
+
Args:
|
135
|
+
empty: Optional[bool] - whether to include empty collections
|
136
|
+
"""
|
137
|
+
indexes = self.client.list_indexes()
|
138
|
+
res: List[str] = []
|
139
|
+
if empty:
|
140
|
+
res.extend(indexes.names())
|
141
|
+
return res
|
142
|
+
|
143
|
+
for index in indexes.names():
|
144
|
+
index_meta = self.client.Index(name=index)
|
145
|
+
if index_meta.describe_index_stats().get("total_vector_count", 0) > 0:
|
146
|
+
res.append(index)
|
147
|
+
return res
|
148
|
+
|
149
|
+
def _list_index_metas(self, empty: bool = False) -> List[IndexMeta]:
|
150
|
+
"""
|
151
|
+
Returns:
|
152
|
+
List of objects describing Pinecone indices
|
153
|
+
|
154
|
+
Args:
|
155
|
+
empty: Optional[bool] - whether to include empty collections
|
156
|
+
"""
|
157
|
+
indexes = self.client.list_indexes()
|
158
|
+
res = []
|
159
|
+
for index in indexes.names():
|
160
|
+
index_meta = self._fetch_index_meta(index)
|
161
|
+
if empty:
|
162
|
+
res.append(index_meta)
|
163
|
+
elif index_meta.total_vector_count > 0:
|
164
|
+
res.append(index_meta)
|
165
|
+
return res
|
166
|
+
|
167
|
+
def _fetch_index_meta(self, index_name: str) -> IndexMeta:
|
168
|
+
"""
|
169
|
+
Returns:
|
170
|
+
A dataclass describing the input Index by name and vector count
|
171
|
+
to save a bit on index description calls
|
172
|
+
|
173
|
+
Args:
|
174
|
+
index_name: str - Name of the index in Pinecone
|
175
|
+
"""
|
176
|
+
try:
|
177
|
+
index = self.client.Index(name=index_name)
|
178
|
+
stats = index.describe_index_stats()
|
179
|
+
return IndexMeta(
|
180
|
+
name=index_name, total_vector_count=stats.get("total_vector_count", 0)
|
181
|
+
)
|
182
|
+
except PineconeApiException as e:
|
183
|
+
logger.warning(f"Error fetching details for index {index_name}")
|
184
|
+
logger.warning(e)
|
185
|
+
return IndexMeta(name=index_name, total_vector_count=-1)
|
186
|
+
|
187
|
+
def create_collection(self, collection_name: str, replace: bool = False) -> None:
|
188
|
+
"""
|
189
|
+
Create a collection with the given name, optionally replacing an existing
|
190
|
+
collection if `replace` is True.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
collection_name: str - Configuration of the collection to create.
|
194
|
+
replace: Optional[Bool] - Whether to replace an existing collection
|
195
|
+
with the same name. Defaults to False.
|
196
|
+
"""
|
197
|
+
pattern = re.compile(r"^[a-z0-9-]+$")
|
198
|
+
if not pattern.match(collection_name):
|
199
|
+
raise ValueError(
|
200
|
+
"Pinecone index names must be lowercase alphanumeric characters or '-'"
|
201
|
+
)
|
202
|
+
self.config.collection_name = collection_name
|
203
|
+
if collection_name in self.list_collections(empty=True):
|
204
|
+
index = self.client.Index(name=collection_name)
|
205
|
+
stats = index.describe_index_stats()
|
206
|
+
status = self.client.describe_index(name=collection_name)
|
207
|
+
if status["status"]["ready"] and stats["total_vector_count"] > 0:
|
208
|
+
logger.warning(f"Non-empty collection {collection_name} already exists")
|
209
|
+
if not replace:
|
210
|
+
logger.warning("Not replacing collection")
|
211
|
+
return
|
212
|
+
else:
|
213
|
+
logger.warning("Recreating fresh collection")
|
214
|
+
self.delete_collection(collection_name=collection_name)
|
215
|
+
|
216
|
+
payload = {
|
217
|
+
"name": collection_name,
|
218
|
+
"dimension": self.embedding_dim,
|
219
|
+
"spec": self.config.spec,
|
220
|
+
"metric": self.config.metric,
|
221
|
+
"timeout": self.config.timeout,
|
222
|
+
}
|
223
|
+
|
224
|
+
if self.config.deletion_protection:
|
225
|
+
payload["deletion_protection"] = self.config.deletion_protection
|
226
|
+
|
227
|
+
try:
|
228
|
+
self.client.create_index(**payload)
|
229
|
+
except PineconeApiException as e:
|
230
|
+
logger.error(e)
|
231
|
+
|
232
|
+
def delete_collection(self, collection_name: str) -> None:
|
233
|
+
logger.info(f"Attempting to delete {collection_name}")
|
234
|
+
try:
|
235
|
+
self.client.delete_index(name=collection_name)
|
236
|
+
except PineconeApiException as e:
|
237
|
+
logger.error(f"Failed to delete {collection_name}")
|
238
|
+
logger.error(e)
|
239
|
+
|
240
|
+
def add_documents(self, documents: Sequence[Document], namespace: str = "") -> None:
|
241
|
+
if self.config.collection_name is None:
|
242
|
+
raise ValueError("No collection name set, cannot ingest docs")
|
243
|
+
|
244
|
+
if len(documents) == 0:
|
245
|
+
logger.warning("Empty list of documents passed into add_documents")
|
246
|
+
return
|
247
|
+
|
248
|
+
super().maybe_add_ids(documents)
|
249
|
+
document_dicts = [doc.dict() for doc in documents]
|
250
|
+
document_ids = [doc.id() for doc in documents]
|
251
|
+
embedding_vectors = self.embedding_fn([doc.content for doc in documents])
|
252
|
+
vectors = [
|
253
|
+
{
|
254
|
+
"id": document_id,
|
255
|
+
"values": embedding_vector,
|
256
|
+
"metadata": {
|
257
|
+
**document_dict["metadata"],
|
258
|
+
**{
|
259
|
+
key: value
|
260
|
+
for key, value in document_dict.items()
|
261
|
+
if key != "metadata"
|
262
|
+
},
|
263
|
+
},
|
264
|
+
}
|
265
|
+
for document_dict, document_id, embedding_vector in zip(
|
266
|
+
document_dicts, document_ids, embedding_vectors
|
267
|
+
)
|
268
|
+
]
|
269
|
+
|
270
|
+
if self.config.collection_name not in self.list_collections(empty=True):
|
271
|
+
self.create_collection(
|
272
|
+
collection_name=self.config.collection_name, replace=True
|
273
|
+
)
|
274
|
+
|
275
|
+
index = self.client.Index(name=self.config.collection_name)
|
276
|
+
batch_size = self.config.batch_size
|
277
|
+
|
278
|
+
for i in range(0, len(documents), batch_size):
|
279
|
+
try:
|
280
|
+
if namespace:
|
281
|
+
index.upsert(
|
282
|
+
vectors=vectors[i : i + batch_size], namespace=namespace
|
283
|
+
)
|
284
|
+
else:
|
285
|
+
index.upsert(vectors=vectors[i : i + batch_size])
|
286
|
+
except PineconeApiException as e:
|
287
|
+
logger.error(
|
288
|
+
f"Unable to add of docs between indices {i} and {batch_size}"
|
289
|
+
)
|
290
|
+
logger.error(e)
|
291
|
+
|
292
|
+
def get_all_documents(
|
293
|
+
self, prefix: str = "", namespace: str = ""
|
294
|
+
) -> List[Document]:
|
295
|
+
"""
|
296
|
+
Returns:
|
297
|
+
All documents for the collection currently defined in
|
298
|
+
the configuration object
|
299
|
+
|
300
|
+
Args:
|
301
|
+
prefix: str - document id prefix to search for
|
302
|
+
namespace: str - partition of vectors to search within the index
|
303
|
+
"""
|
304
|
+
if self.config.collection_name is None:
|
305
|
+
raise ValueError("No collection name set, cannot retrieve docs")
|
306
|
+
docs = []
|
307
|
+
|
308
|
+
request_filters: Dict[str, Union[str, int]] = {
|
309
|
+
"limit": self.config.pagination_size
|
310
|
+
}
|
311
|
+
if prefix:
|
312
|
+
request_filters["prefix"] = prefix
|
313
|
+
if namespace:
|
314
|
+
request_filters["namespace"] = namespace
|
315
|
+
|
316
|
+
index = self.client.Index(name=self.config.collection_name)
|
317
|
+
|
318
|
+
while True:
|
319
|
+
response = index.list_paginated(**request_filters)
|
320
|
+
vectors = response.get("vectors", [])
|
321
|
+
|
322
|
+
if not vectors:
|
323
|
+
logger.warning("Received empty list while requesting for vector ids")
|
324
|
+
logger.warning("Halting fetch requests")
|
325
|
+
if settings.debug:
|
326
|
+
logger.debug(f"Request for failed fetch was: {request_filters}")
|
327
|
+
break
|
328
|
+
|
329
|
+
docs.extend(
|
330
|
+
self.get_documents_by_ids(
|
331
|
+
ids=[vector.get("id") for vector in vectors],
|
332
|
+
namespace=namespace if namespace else "",
|
333
|
+
)
|
334
|
+
)
|
335
|
+
|
336
|
+
pagination_token = response.get("pagination", {}).get("next", None)
|
337
|
+
|
338
|
+
if not pagination_token:
|
339
|
+
break
|
340
|
+
|
341
|
+
request_filters["pagination_token"] = pagination_token
|
342
|
+
|
343
|
+
return docs
|
344
|
+
|
345
|
+
def get_documents_by_ids(
|
346
|
+
self, ids: List[str], namespace: str = ""
|
347
|
+
) -> List[Document]:
|
348
|
+
"""
|
349
|
+
Returns:
|
350
|
+
Fetches document text embedded in Pinecone index metadata
|
351
|
+
|
352
|
+
Args:
|
353
|
+
ids: List[str] - vector data object ids to retrieve
|
354
|
+
namespace: str - partition of vectors to search within the index
|
355
|
+
"""
|
356
|
+
if self.config.collection_name is None:
|
357
|
+
raise ValueError("No collection name set, cannot retrieve docs")
|
358
|
+
index = self.client.Index(name=self.config.collection_name)
|
359
|
+
|
360
|
+
if namespace:
|
361
|
+
records = index.fetch(ids=ids, namespace=namespace)
|
362
|
+
else:
|
363
|
+
records = index.fetch(ids=ids)
|
364
|
+
|
365
|
+
id_mapping = {key: value for key, value in records["vectors"].items()}
|
366
|
+
ordered_payloads = [id_mapping[_id] for _id in ids if _id in id_mapping]
|
367
|
+
return [
|
368
|
+
self.transform_pinecone_vector(payload.get("metadata", {}))
|
369
|
+
for payload in ordered_payloads
|
370
|
+
]
|
371
|
+
|
372
|
+
def similar_texts_with_scores(
|
373
|
+
self,
|
374
|
+
text: str,
|
375
|
+
k: int = 1,
|
376
|
+
where: Optional[str] = None,
|
377
|
+
namespace: Optional[str] = None,
|
378
|
+
) -> List[Tuple[Document, float]]:
|
379
|
+
if self.config.collection_name is None:
|
380
|
+
raise ValueError("No collection name set, cannot search")
|
381
|
+
|
382
|
+
if k < 1 or k > 9999:
|
383
|
+
raise ValueError(
|
384
|
+
f"TopK for Pinecone vector search must be 1 < k < 10000, k was {k}"
|
385
|
+
)
|
386
|
+
|
387
|
+
vector_search_request = {
|
388
|
+
"top_k": k,
|
389
|
+
"include_metadata": True,
|
390
|
+
"vector": self.embedding_fn([text])[0],
|
391
|
+
}
|
392
|
+
if where:
|
393
|
+
vector_search_request["filter"] = json.loads(where) if where else None
|
394
|
+
if namespace:
|
395
|
+
vector_search_request["namespace"] = namespace
|
396
|
+
|
397
|
+
index = self.client.Index(name=self.config.collection_name)
|
398
|
+
response = index.query(**vector_search_request)
|
399
|
+
doc_score_pairs = [
|
400
|
+
(
|
401
|
+
self.transform_pinecone_vector(match.get("metadata", {})),
|
402
|
+
match.get("score", 0),
|
403
|
+
)
|
404
|
+
for match in response.get("matches", [])
|
405
|
+
]
|
406
|
+
if settings.debug:
|
407
|
+
max_score = max([pair[1] for pair in doc_score_pairs])
|
408
|
+
logger.info(f"Found {len(doc_score_pairs)} matches, max score: {max_score}")
|
409
|
+
self.show_if_debug(doc_score_pairs)
|
410
|
+
return doc_score_pairs
|
411
|
+
|
412
|
+
def transform_pinecone_vector(self, metadata_dict: Dict[str, Any]) -> Document:
|
413
|
+
"""
|
414
|
+
Parses the metadata response from the Pinecone vector query and
|
415
|
+
formats it into a dictionary that can be parsed by the Document class
|
416
|
+
associated with the PineconeDBConfig class
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
Well formed dictionary object to be transformed into a Document
|
420
|
+
|
421
|
+
Args:
|
422
|
+
metadata_dict: Dict - the metadata dictionary from the Pinecone
|
423
|
+
vector query match
|
424
|
+
"""
|
425
|
+
return self.config.document_class(
|
426
|
+
**{**metadata_dict, "metadata": {**metadata_dict}}
|
427
|
+
)
|
@@ -5,21 +5,6 @@ import os
|
|
5
5
|
import uuid
|
6
6
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
7
7
|
|
8
|
-
from sqlalchemy import (
|
9
|
-
Column,
|
10
|
-
MetaData,
|
11
|
-
String,
|
12
|
-
Table,
|
13
|
-
case,
|
14
|
-
create_engine,
|
15
|
-
inspect,
|
16
|
-
text,
|
17
|
-
)
|
18
|
-
from sqlalchemy.dialects.postgresql import JSONB
|
19
|
-
from sqlalchemy.engine import Connection, Engine
|
20
|
-
from sqlalchemy.orm import sessionmaker
|
21
|
-
from sqlalchemy.sql.expression import insert
|
22
|
-
|
23
8
|
from langroid.embedding_models.base import (
|
24
9
|
EmbeddingModelsConfig,
|
25
10
|
)
|
@@ -28,6 +13,27 @@ from langroid.exceptions import LangroidImportError
|
|
28
13
|
from langroid.mytypes import DocMetaData, Document
|
29
14
|
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
30
15
|
|
16
|
+
has_postgres: bool = True
|
17
|
+
try:
|
18
|
+
from sqlalchemy import (
|
19
|
+
Column,
|
20
|
+
MetaData,
|
21
|
+
String,
|
22
|
+
Table,
|
23
|
+
case,
|
24
|
+
create_engine,
|
25
|
+
inspect,
|
26
|
+
text,
|
27
|
+
)
|
28
|
+
from sqlalchemy.dialects.postgresql import JSONB
|
29
|
+
from sqlalchemy.engine import Connection, Engine
|
30
|
+
from sqlalchemy.orm import sessionmaker
|
31
|
+
from sqlalchemy.sql.expression import insert
|
32
|
+
except ImportError:
|
33
|
+
Engine = Any # type: ignore
|
34
|
+
Connection = Any # type: ignore
|
35
|
+
has_postgres = False
|
36
|
+
|
31
37
|
logger = logging.getLogger(__name__)
|
32
38
|
|
33
39
|
|
@@ -48,6 +54,8 @@ class PostgresDBConfig(VectorStoreConfig):
|
|
48
54
|
class PostgresDB(VectorStore):
|
49
55
|
def __init__(self, config: PostgresDBConfig = PostgresDBConfig()):
|
50
56
|
super().__init__(config)
|
57
|
+
if not has_postgres:
|
58
|
+
raise LangroidImportError("pgvector", "postgres")
|
51
59
|
self.config: PostgresDBConfig = config
|
52
60
|
self.engine = self._create_engine()
|
53
61
|
PostgresDB._create_vector_extension(self.engine)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import logging
|
2
2
|
import os
|
3
3
|
import re
|
4
|
-
from typing import Any, List, Optional, Sequence, Tuple
|
4
|
+
from typing import TYPE_CHECKING, Any, List, Optional, Sequence, Tuple
|
5
5
|
|
6
6
|
from dotenv import load_dotenv
|
7
7
|
|
@@ -15,6 +15,7 @@ from langroid.utils.configuration import settings
|
|
15
15
|
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
16
16
|
|
17
17
|
logger = logging.getLogger(__name__)
|
18
|
+
has_weaviate: bool = True
|
18
19
|
try:
|
19
20
|
import weaviate
|
20
21
|
from weaviate.classes.config import (
|
@@ -25,7 +26,18 @@ try:
|
|
25
26
|
from weaviate.classes.query import Filter, MetadataQuery
|
26
27
|
from weaviate.util import generate_uuid5, get_valid_uuid
|
27
28
|
except ImportError:
|
28
|
-
|
29
|
+
has_weaviate = False
|
30
|
+
|
31
|
+
if not TYPE_CHECKING:
|
32
|
+
|
33
|
+
class VectorDistances:
|
34
|
+
"""
|
35
|
+
Fallback class when weaviate is not installed, to avoid import errors.
|
36
|
+
"""
|
37
|
+
|
38
|
+
COSINE: str = "cosine"
|
39
|
+
DOTPRODUCT: str = "dot"
|
40
|
+
L2: str = "l2"
|
29
41
|
|
30
42
|
|
31
43
|
class WeaviateDBConfig(VectorStoreConfig):
|
@@ -39,6 +51,8 @@ class WeaviateDBConfig(VectorStoreConfig):
|
|
39
51
|
class WeaviateDB(VectorStore):
|
40
52
|
def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig()):
|
41
53
|
super().__init__(config)
|
54
|
+
if not has_weaviate:
|
55
|
+
raise LangroidImportError("weaviate", "weaviate")
|
42
56
|
self.config: WeaviateDBConfig = config
|
43
57
|
load_dotenv()
|
44
58
|
if not self.config.cloud:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.41.0
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
Author-email: Prasad Chalasani <pchalasani@gmail.com>
|
6
6
|
License: MIT
|
@@ -15,6 +15,7 @@ Requires-Dist: colorlog<7.0.0,>=6.7.0
|
|
15
15
|
Requires-Dist: docling<3.0.0,>=2.16.0
|
16
16
|
Requires-Dist: docstring-parser<1.0,>=0.16
|
17
17
|
Requires-Dist: duckduckgo-search<7.0.0,>=6.0.0
|
18
|
+
Requires-Dist: exa-py>=1.8.7
|
18
19
|
Requires-Dist: faker<19.0.0,>=18.9.0
|
19
20
|
Requires-Dist: fakeredis<3.0.0,>=2.12.1
|
20
21
|
Requires-Dist: fire<1.0.0,>=0.5.0
|
@@ -48,6 +49,7 @@ Requires-Dist: redis<6.0.0,>=5.0.1
|
|
48
49
|
Requires-Dist: requests-oauthlib<2.0.0,>=1.3.1
|
49
50
|
Requires-Dist: requests<3.0.0,>=2.31.0
|
50
51
|
Requires-Dist: rich<14.0.0,>=13.3.4
|
52
|
+
Requires-Dist: tavily-python>=0.5.0
|
51
53
|
Requires-Dist: thefuzz<1.0.0,>=0.20.0
|
52
54
|
Requires-Dist: tiktoken<1.0.0,>=0.7.0
|
53
55
|
Requires-Dist: trafilatura<2.0.0,>=1.5.0
|
@@ -106,6 +108,8 @@ Provides-Extra: docling
|
|
106
108
|
Requires-Dist: docling<3.0.0,>=2.16.0; extra == 'docling'
|
107
109
|
Provides-Extra: docx
|
108
110
|
Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
|
111
|
+
Provides-Extra: exa
|
112
|
+
Requires-Dist: exa-py>=1.8.7; extra == 'exa'
|
109
113
|
Provides-Extra: fastembed
|
110
114
|
Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'fastembed'
|
111
115
|
Provides-Extra: google-generativeai
|
@@ -141,6 +145,8 @@ Requires-Dist: pymupdf4llm<0.1.0,>=0.0.17; extra == 'pdf-parsers'
|
|
141
145
|
Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'pdf-parsers'
|
142
146
|
Requires-Dist: pypdf>=5.1.0; extra == 'pdf-parsers'
|
143
147
|
Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'pdf-parsers'
|
148
|
+
Provides-Extra: pinecone
|
149
|
+
Requires-Dist: pinecone-client>=5.0.1; extra == 'pinecone'
|
144
150
|
Provides-Extra: postgres
|
145
151
|
Requires-Dist: pgvector>=0.3.6; extra == 'postgres'
|
146
152
|
Requires-Dist: psycopg2-binary>=2.9.10; extra == 'postgres'
|
@@ -154,6 +160,8 @@ Provides-Extra: sql
|
|
154
160
|
Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'sql'
|
155
161
|
Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'sql'
|
156
162
|
Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'sql'
|
163
|
+
Provides-Extra: tavily
|
164
|
+
Requires-Dist: tavily-python>=0.5.0; extra == 'tavily'
|
157
165
|
Provides-Extra: transformers
|
158
166
|
Requires-Dist: huggingface-hub<1.0.0,>=0.21.2; extra == 'transformers'
|
159
167
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'transformers'
|
@@ -163,6 +171,7 @@ Requires-Dist: unstructured[docx,pdf,pptx]<1.0.0,>=0.16.15; extra == 'unstructur
|
|
163
171
|
Provides-Extra: vecdbs
|
164
172
|
Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
|
165
173
|
Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
|
174
|
+
Requires-Dist: pinecone-client>=5.0.1; extra == 'vecdbs'
|
166
175
|
Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'vecdbs'
|
167
176
|
Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'vecdbs'
|
168
177
|
Requires-Dist: weaviate-client>=4.9.6; extra == 'vecdbs'
|
@@ -43,6 +43,7 @@ langroid/agent/special/sql/utils/system_message.py,sha256=qKLHkvQWRQodTtPLPxr1GS
|
|
43
43
|
langroid/agent/special/sql/utils/tools.py,sha256=ovCePzq5cmbqw0vsVPBzxdZpUcSUIfTiDSMGXustZW8,1749
|
44
44
|
langroid/agent/tools/__init__.py,sha256=IMgCte-_ZIvCkozGQmvMqxIw7_nKLKzD78ccJL1bnQU,804
|
45
45
|
langroid/agent/tools/duckduckgo_search_tool.py,sha256=NhsCaGZkdv28nja7yveAhSK_w6l_Ftym8agbrdzqgfo,1935
|
46
|
+
langroid/agent/tools/exa_search_tool.py,sha256=qxDs6vIiUtFyfX6gmS-PxoCXes-55in3ef5AkUQhiM0,2469
|
46
47
|
langroid/agent/tools/file_tools.py,sha256=GjPB5YDILucYapElnvvoYpGJuZQ25ecLs2REv7edPEo,7292
|
47
48
|
langroid/agent/tools/google_search_tool.py,sha256=y7b-3FtgXf0lfF4AYxrZ3K5pH2dhidvibUOAGBE--WI,1456
|
48
49
|
langroid/agent/tools/metaphor_search_tool.py,sha256=ccyEhkShH5MxW6-sx1n0BLpD_GForQddS_nNvBZ67Ik,2561
|
@@ -51,6 +52,7 @@ langroid/agent/tools/recipient_tool.py,sha256=dr0yTxgNEIoxUYxH6TtaExC4G_8WdJ0xGo
|
|
51
52
|
langroid/agent/tools/retrieval_tool.py,sha256=zcAV20PP_6VzSd-UE-IJcabaBseFL_QNz59Bnig8-lE,946
|
52
53
|
langroid/agent/tools/rewind_tool.py,sha256=XAXL3BpNhCmBGYq_qi_sZfHJuIw7NY2jp4wnojJ7WRs,5606
|
53
54
|
langroid/agent/tools/segment_extract_tool.py,sha256=__srZ_VGYLVOdPrITUM8S0HpmX4q7r5FHWMDdHdEv8w,1440
|
55
|
+
langroid/agent/tools/tavily_search_tool.py,sha256=soI-j0HdgVQLf09wRQScaEK4b5RpAX9C4cwOivRFWWI,1903
|
54
56
|
langroid/cachedb/__init__.py,sha256=icAT2s7Vhf-ZGUeqpDQGNU6ob6o0aFEyjwcxxUGRFjg,225
|
55
57
|
langroid/cachedb/base.py,sha256=ztVjB1DtN6pLCujCWnR6xruHxwVj3XkYniRTYAKKqk0,1354
|
56
58
|
langroid/cachedb/momento_cachedb.py,sha256=YEOJ62hEcV6iIeMr5aGgRYgWQqFYaej9gEDEcY0sm7M,3172
|
@@ -86,13 +88,13 @@ langroid/parsing/parser.py,sha256=pPzM3zXQvFtwTyQPtDha15oZhu1O3OKDLECnkB8waxg,12
|
|
86
88
|
langroid/parsing/pdf_utils.py,sha256=rmNJ9UzuBgXTAYwj1TtRJcD8h53x7cizhgyYHKO88I4,1513
|
87
89
|
langroid/parsing/repo_loader.py,sha256=3GjvPJS6Vf5L6gV2zOU8s-Tf1oq_fZm-IB_RL_7CTsY,29373
|
88
90
|
langroid/parsing/routing.py,sha256=-FcnlqldzL4ZoxuDwXjQPNHgBe9F9-F4R6q7b_z9CvI,1232
|
89
|
-
langroid/parsing/search.py,sha256=
|
91
|
+
langroid/parsing/search.py,sha256=YPCwezM0c4PWbNUMEmQ5RrJBtvX4aWZ1CMCJFs4sqFo,9806
|
90
92
|
langroid/parsing/spider.py,sha256=hAVM6wxh1pQ0EN4tI5wMBtAjIk0T-xnpi-ZUzWybhos,3258
|
91
93
|
langroid/parsing/table_loader.py,sha256=qNM4obT_0Y4tjrxNBCNUYjKQ9oETCZ7FbolKBTcz-GM,3410
|
92
94
|
langroid/parsing/url_loader.py,sha256=JK48KktLRDBfjrt4nsUfy92M6yGdEeicAqOum2MdULM,4656
|
93
95
|
langroid/parsing/urls.py,sha256=86omykgxo4hg2jyF10Ef-FJa9n6MgXdSXy2mImqgo5c,8076
|
94
|
-
langroid/parsing/utils.py,sha256=
|
95
|
-
langroid/parsing/web_search.py,sha256=
|
96
|
+
langroid/parsing/utils.py,sha256=ZWMS7oG04GUY9EAIwnFN6KKo_ePCKhqk_H8jW6TDT0s,12805
|
97
|
+
langroid/parsing/web_search.py,sha256=wWSmV0METFTGPhHJIs-M4tog2Aur_75Pxr4a49cKDkU,7042
|
96
98
|
langroid/prompts/__init__.py,sha256=RW11vK6jiLPuaUh4GpeFvstti73gkm8_rDMtrbo2YsU,142
|
97
99
|
langroid/prompts/dialog.py,sha256=SpfiSyofSgy2pwD1YboHR_yHO3LEEMbv6j2sm874jKo,331
|
98
100
|
langroid/prompts/prompts_config.py,sha256=p_lp9nbMuQwhhMwAZsOxveRw9C0ZFZvql7pdIfgVZYo,143
|
@@ -116,16 +118,17 @@ langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87s
|
|
116
118
|
langroid/utils/output/citations.py,sha256=9T69O_N6mxPQjQ-qC1vKS8_kyg1z5hDQXMhBsA45xkk,3147
|
117
119
|
langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
|
118
120
|
langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
|
119
|
-
langroid/vector_store/__init__.py,sha256=
|
120
|
-
langroid/vector_store/base.py,sha256=
|
121
|
+
langroid/vector_store/__init__.py,sha256=8ktJUVsVUoc7FMmkUFpFBZu7VMWUqQY9zpm4kEJ8yTs,1537
|
122
|
+
langroid/vector_store/base.py,sha256=BgQzTScKNzKr3F3o9jrQNG-b3Dv16wKEGSM9jg-W03Y,14752
|
121
123
|
langroid/vector_store/chromadb.py,sha256=p9mEqJwO2BrL2jSSXfa23kCPlPOwWpF3xJYd5zoWw_c,8661
|
122
124
|
langroid/vector_store/lancedb.py,sha256=Qd20gKjWozPWfW5-D66J6U8dSrJo1yl-maj6s1lbf1c,14688
|
123
125
|
langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
|
124
126
|
langroid/vector_store/momento.py,sha256=xOaU7Hlyyn_5ihb0ARS5JHtmrKrTCt2IdRA-ioMM5ek,10307
|
125
|
-
langroid/vector_store/
|
127
|
+
langroid/vector_store/pineconedb.py,sha256=otxXZNaBKb9f_H75HTaU3lMHiaR2NUp5MqwLZXpEY9M,14994
|
128
|
+
langroid/vector_store/postgres.py,sha256=DQHd6dt-OcV_QVNm-ymn28rlTfhI6hqgcpLTPCsm0jI,15990
|
126
129
|
langroid/vector_store/qdrantdb.py,sha256=v7TAsIoj_vxeKDYS9tpwJLBZA8fuTweTYxHo0X_uawM,17949
|
127
|
-
langroid/vector_store/weaviatedb.py,sha256=
|
128
|
-
langroid-0.
|
129
|
-
langroid-0.
|
130
|
-
langroid-0.
|
131
|
-
langroid-0.
|
130
|
+
langroid/vector_store/weaviatedb.py,sha256=ONEr2iGS0Ii73oMe7tRk6bB-BEXQUa70fYSrdI8d3yo,11481
|
131
|
+
langroid-0.41.0.dist-info/METADATA,sha256=jCbP1nZgmhcN4XJE7eh8SBMbFEwNVV8yVoZqrmV8pCQ,61259
|
132
|
+
langroid-0.41.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
133
|
+
langroid-0.41.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
134
|
+
langroid-0.41.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|