langroid 0.35.0__py3-none-any.whl → 0.36.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/agent/chat_agent.py +8 -11
- langroid/vector_store/__init__.py +11 -0
- langroid/vector_store/base.py +3 -0
- langroid/vector_store/weaviatedb.py +271 -0
- {langroid-0.35.0.dist-info → langroid-0.36.0.dist-info}/METADATA +5 -1
- {langroid-0.35.0.dist-info → langroid-0.36.0.dist-info}/RECORD +8 -7
- {langroid-0.35.0.dist-info → langroid-0.36.0.dist-info}/WHEEL +0 -0
- {langroid-0.35.0.dist-info → langroid-0.36.0.dist-info}/licenses/LICENSE +0 -0
langroid/agent/chat_agent.py
CHANGED
@@ -416,7 +416,7 @@ class ChatAgent(Agent):
|
|
416
416
|
]
|
417
417
|
|
418
418
|
if len(usable_tool_classes) == 0:
|
419
|
-
return "
|
419
|
+
return ""
|
420
420
|
format_instructions = "\n\n".join(
|
421
421
|
[
|
422
422
|
msg_cls.format_instructions(tool=self.config.use_tools)
|
@@ -568,17 +568,14 @@ class ChatAgent(Agent):
|
|
568
568
|
Returns:
|
569
569
|
LLMMessage object
|
570
570
|
"""
|
571
|
-
content =
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
571
|
+
content = self.system_message
|
572
|
+
if self.system_tool_instructions != "":
|
573
|
+
content += "\n\n" + self.system_tool_instructions
|
574
|
+
if self.system_tool_format_instructions != "":
|
575
|
+
content += "\n\n" + self.system_tool_format_instructions
|
576
|
+
if self.output_format_instructions != "":
|
577
|
+
content += "\n\n" + self.output_format_instructions
|
578
578
|
|
579
|
-
{self.output_format_instructions}
|
580
|
-
"""
|
581
|
-
)
|
582
579
|
# remove leading and trailing newlines and other whitespace
|
583
580
|
return LLMMessage(role=Role.SYSTEM, content=content.strip())
|
584
581
|
|
@@ -48,3 +48,14 @@ try:
|
|
48
48
|
__all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
|
49
49
|
except ImportError:
|
50
50
|
pass
|
51
|
+
|
52
|
+
try:
|
53
|
+
from . import weaviatedb
|
54
|
+
from .weaviatedb import WeaviateDBConfig, WeaviateDB
|
55
|
+
|
56
|
+
weaviatedb
|
57
|
+
WeaviateDB
|
58
|
+
WeaviateDBConfig
|
59
|
+
__all__.extend(["weaviatedb", "WeaviateDB", "WeaviateDBConfig"])
|
60
|
+
except ImportError:
|
61
|
+
pass
|
langroid/vector_store/base.py
CHANGED
@@ -59,6 +59,7 @@ class VectorStore(ABC):
|
|
59
59
|
from langroid.vector_store.meilisearch import MeiliSearch, MeiliSearchConfig
|
60
60
|
from langroid.vector_store.momento import MomentoVI, MomentoVIConfig
|
61
61
|
from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
|
62
|
+
from langroid.vector_store.weaviatedb import WeaviateDB, WeaviateDBConfig
|
62
63
|
|
63
64
|
if isinstance(config, QdrantDBConfig):
|
64
65
|
return QdrantDB(config)
|
@@ -70,6 +71,8 @@ class VectorStore(ABC):
|
|
70
71
|
return LanceDB(config)
|
71
72
|
elif isinstance(config, MeiliSearchConfig):
|
72
73
|
return MeiliSearch(config)
|
74
|
+
elif isinstance(config, WeaviateDBConfig):
|
75
|
+
return WeaviateDB(config)
|
73
76
|
|
74
77
|
else:
|
75
78
|
logger.warning(
|
@@ -0,0 +1,271 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import re
|
4
|
+
from typing import Any, List, Optional, Sequence, Tuple
|
5
|
+
|
6
|
+
from dotenv import load_dotenv
|
7
|
+
|
8
|
+
from langroid.embedding_models.base import (
|
9
|
+
EmbeddingModelsConfig,
|
10
|
+
)
|
11
|
+
from langroid.embedding_models.models import OpenAIEmbeddingsConfig
|
12
|
+
from langroid.exceptions import LangroidImportError
|
13
|
+
from langroid.mytypes import DocMetaData, Document, EmbeddingFunction
|
14
|
+
from langroid.utils.configuration import settings
|
15
|
+
from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
16
|
+
|
17
|
+
logger = logging.getLogger(__name__)
|
18
|
+
try:
|
19
|
+
import weaviate
|
20
|
+
from weaviate.classes.config import (
|
21
|
+
Configure,
|
22
|
+
VectorDistances,
|
23
|
+
)
|
24
|
+
from weaviate.classes.init import Auth
|
25
|
+
from weaviate.classes.query import Filter, MetadataQuery
|
26
|
+
from weaviate.util import generate_uuid5, get_valid_uuid
|
27
|
+
except ImportError:
|
28
|
+
raise LangroidImportError("weaviate", "weaviate")
|
29
|
+
|
30
|
+
|
31
|
+
class WeaviateDBConfig(VectorStoreConfig):
|
32
|
+
collection_name: str | None = "temp"
|
33
|
+
embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
|
34
|
+
distance: str = VectorDistances.COSINE
|
35
|
+
|
36
|
+
|
37
|
+
class WeaviateDB(VectorStore):
|
38
|
+
def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig()):
|
39
|
+
super().__init__(config)
|
40
|
+
self.config: WeaviateDBConfig = config
|
41
|
+
self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
|
42
|
+
self.embedding_dim = self.embedding_model.embedding_dims
|
43
|
+
load_dotenv()
|
44
|
+
key = os.getenv("WEAVIATE_API_KEY")
|
45
|
+
url = os.getenv("WEAVIATE_API_URL")
|
46
|
+
if None in [key, url]:
|
47
|
+
logger.warning(
|
48
|
+
"""WEAVIATE_API_KEY, WEAVIATE_API_URL env variable must be set to use
|
49
|
+
WeaviateDB in cloud mode. Please set these values
|
50
|
+
in your .env file.
|
51
|
+
"""
|
52
|
+
)
|
53
|
+
self.client = weaviate.connect_to_weaviate_cloud(
|
54
|
+
cluster_url=url,
|
55
|
+
auth_credentials=Auth.api_key(key),
|
56
|
+
)
|
57
|
+
if config.collection_name is not None:
|
58
|
+
WeaviateDB.validate_and_format_collection_name(config.collection_name)
|
59
|
+
|
60
|
+
def clear_empty_collections(self) -> int:
|
61
|
+
colls = self.client.collections.list_all()
|
62
|
+
n_deletes = 0
|
63
|
+
for coll_name, _ in colls.items():
|
64
|
+
val = self.client.collections.get(coll_name)
|
65
|
+
if len(val) == 0:
|
66
|
+
n_deletes += 1
|
67
|
+
self.client.collections.delete(coll_name)
|
68
|
+
return n_deletes
|
69
|
+
|
70
|
+
def list_collections(self, empty: bool = False) -> List[str]:
|
71
|
+
colls = self.client.collections.list_all()
|
72
|
+
if empty:
|
73
|
+
return list(colls.keys())
|
74
|
+
non_empty_colls = [
|
75
|
+
coll_name
|
76
|
+
for coll_name in colls.keys()
|
77
|
+
if len(self.client.collections.get(coll_name)) > 0
|
78
|
+
]
|
79
|
+
|
80
|
+
return non_empty_colls
|
81
|
+
|
82
|
+
def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
|
83
|
+
if not really:
|
84
|
+
logger.warning(
|
85
|
+
"Not really deleting all collections ,set really=True to confirm"
|
86
|
+
)
|
87
|
+
return 0
|
88
|
+
coll_names = [
|
89
|
+
c for c in self.list_collections(empty=True) if c.startswith(prefix)
|
90
|
+
]
|
91
|
+
if len(coll_names) == 0:
|
92
|
+
logger.warning(f"No collections found with prefix {prefix}")
|
93
|
+
return 0
|
94
|
+
n_empty_deletes = 0
|
95
|
+
n_non_empty_deletes = 0
|
96
|
+
for name in coll_names:
|
97
|
+
info = self.client.collections.get(name)
|
98
|
+
points_count = len(info)
|
99
|
+
|
100
|
+
n_empty_deletes += points_count == 0
|
101
|
+
n_non_empty_deletes += points_count > 0
|
102
|
+
self.client.collections.delete(name)
|
103
|
+
logger.warning(
|
104
|
+
f"""
|
105
|
+
Deleted {n_empty_deletes} empty collections and
|
106
|
+
{n_non_empty_deletes} non-empty collections.
|
107
|
+
"""
|
108
|
+
)
|
109
|
+
return n_empty_deletes + n_non_empty_deletes
|
110
|
+
|
111
|
+
def delete_collection(self, collection_name: str) -> None:
|
112
|
+
self.client.collections.delete(name=collection_name)
|
113
|
+
|
114
|
+
def create_collection(self, collection_name: str, replace: bool = False) -> None:
|
115
|
+
collection_name = WeaviateDB.validate_and_format_collection_name(
|
116
|
+
collection_name
|
117
|
+
)
|
118
|
+
self.config.collection_name = collection_name
|
119
|
+
if self.client.collections.exists(name=collection_name):
|
120
|
+
coll = self.client.collections.get(name=collection_name)
|
121
|
+
if len(coll) > 0:
|
122
|
+
logger.warning(f"Non-empty Collection {collection_name} already exists")
|
123
|
+
if not replace:
|
124
|
+
logger.warning("Not replacing collection")
|
125
|
+
return
|
126
|
+
else:
|
127
|
+
logger.warning("Recreating fresh collection")
|
128
|
+
self.client.collections.delete(name=collection_name)
|
129
|
+
|
130
|
+
vector_index_config = Configure.VectorIndex.hnsw(
|
131
|
+
distance_metric=VectorDistances.COSINE,
|
132
|
+
)
|
133
|
+
if self.config.embedding == OpenAIEmbeddingsConfig:
|
134
|
+
vectorizer_config = Configure.Vectorizer.text2vec_openai(
|
135
|
+
model=self.embedding_model
|
136
|
+
)
|
137
|
+
else:
|
138
|
+
vectorizer_config = None
|
139
|
+
|
140
|
+
collection_info = self.client.collections.create(
|
141
|
+
name=collection_name,
|
142
|
+
vector_index_config=vector_index_config,
|
143
|
+
vectorizer_config=vectorizer_config,
|
144
|
+
)
|
145
|
+
collection_info = self.client.collections.get(name=collection_name)
|
146
|
+
assert len(collection_info) in [0, None]
|
147
|
+
if settings.debug:
|
148
|
+
level = logger.getEffectiveLevel()
|
149
|
+
logger.setLevel(logging.INFO)
|
150
|
+
logger.info(collection_info)
|
151
|
+
logger.setLevel(level)
|
152
|
+
|
153
|
+
def add_documents(self, documents: Sequence[Document]) -> None:
|
154
|
+
super().maybe_add_ids(documents)
|
155
|
+
colls = self.list_collections(empty=True)
|
156
|
+
for doc in documents:
|
157
|
+
doc.metadata.id = str(self._create_valid_uuid_id(doc.metadata.id))
|
158
|
+
if len(documents) == 0:
|
159
|
+
return
|
160
|
+
|
161
|
+
document_dicts = [doc.dict() for doc in documents]
|
162
|
+
embedding_vecs = self.embedding_fn([doc.content for doc in documents])
|
163
|
+
if self.config.collection_name is None:
|
164
|
+
raise ValueError("No collection name set, cannot ingest docs")
|
165
|
+
if self.config.collection_name not in colls:
|
166
|
+
self.create_collection(self.config.collection_name, replace=True)
|
167
|
+
coll_name = self.client.collections.get(self.config.collection_name)
|
168
|
+
with coll_name.batch.dynamic() as batch:
|
169
|
+
for i, doc_dict in enumerate(document_dicts):
|
170
|
+
id = doc_dict["metadata"].pop("id", None)
|
171
|
+
batch.add_object(properties=doc_dict, uuid=id, vector=embedding_vecs[i])
|
172
|
+
|
173
|
+
def get_all_documents(self, where: str = "") -> List[Document]:
|
174
|
+
if self.config.collection_name is None:
|
175
|
+
raise ValueError("No collection name set, cannot retrieve docs")
|
176
|
+
# cannot use filter as client does not support json type queries
|
177
|
+
coll = self.client.collections.get(self.config.collection_name)
|
178
|
+
return [self.weaviate_obj_to_doc(item) for item in coll.iterator()]
|
179
|
+
|
180
|
+
def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
|
181
|
+
if self.config.collection_name is None:
|
182
|
+
raise ValueError("No collection name set, cannot retrieve docs")
|
183
|
+
|
184
|
+
docs = []
|
185
|
+
coll_name = self.client.collections.get(self.config.collection_name)
|
186
|
+
|
187
|
+
result = coll_name.query.fetch_objects(
|
188
|
+
filters=Filter.by_property("_id").contains_any(ids), limit=len(coll_name)
|
189
|
+
)
|
190
|
+
|
191
|
+
id_to_doc = {}
|
192
|
+
for item in result.objects:
|
193
|
+
doc = self.weaviate_obj_to_doc(item)
|
194
|
+
id_to_doc[doc.metadata.id] = doc
|
195
|
+
|
196
|
+
# Reconstruct the list of documents in the original order of input ids
|
197
|
+
docs = [id_to_doc[id] for id in ids if id in id_to_doc]
|
198
|
+
|
199
|
+
return docs
|
200
|
+
|
201
|
+
def similar_texts_with_scores(
|
202
|
+
self, text: str, k: int = 1, where: Optional[str] = None
|
203
|
+
) -> List[Tuple[Document, float]]:
|
204
|
+
embedding = self.embedding_fn([text])[0]
|
205
|
+
if self.config.collection_name is None:
|
206
|
+
raise ValueError("No collections name set,cannot search")
|
207
|
+
coll = self.client.collections.get(self.config.collection_name)
|
208
|
+
response = coll.query.near_vector(
|
209
|
+
near_vector=embedding,
|
210
|
+
limit=k,
|
211
|
+
return_properties=True,
|
212
|
+
return_metadata=MetadataQuery(distance=True),
|
213
|
+
)
|
214
|
+
return [
|
215
|
+
(self.weaviate_obj_to_doc(item), 1 - item.metadata.distance)
|
216
|
+
for item in response.objects
|
217
|
+
]
|
218
|
+
|
219
|
+
def _create_valid_uuid_id(self, id: str) -> Any:
|
220
|
+
try:
|
221
|
+
id = get_valid_uuid(id)
|
222
|
+
return id
|
223
|
+
except Exception:
|
224
|
+
return generate_uuid5(id)
|
225
|
+
|
226
|
+
def weaviate_obj_to_doc(self, input_object: Any) -> Document:
|
227
|
+
content = input_object.properties.get("content", "")
|
228
|
+
metadata_dict = input_object.properties.get("metadata", {})
|
229
|
+
|
230
|
+
window_ids = metadata_dict.pop("window_ids", [])
|
231
|
+
window_ids = [str(uuid) for uuid in window_ids]
|
232
|
+
|
233
|
+
# Ensure the id is a valid UUID string
|
234
|
+
id_value = get_valid_uuid(input_object.uuid)
|
235
|
+
|
236
|
+
metadata = DocMetaData(id=id_value, window_ids=window_ids, **metadata_dict)
|
237
|
+
|
238
|
+
return Document(content=content, metadata=metadata)
|
239
|
+
|
240
|
+
@staticmethod
|
241
|
+
def validate_and_format_collection_name(name: str) -> str:
|
242
|
+
"""
|
243
|
+
Formats the collection name to comply with Weaviate's naming rules:
|
244
|
+
- Name must start with a capital letter.
|
245
|
+
- Name can only contain letters, numbers, and underscores.
|
246
|
+
- Replaces invalid characters with underscores.
|
247
|
+
"""
|
248
|
+
if not name:
|
249
|
+
raise ValueError("Collection name cannot be empty.")
|
250
|
+
|
251
|
+
formatted_name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
|
252
|
+
|
253
|
+
# Ensure the first letter is capitalized
|
254
|
+
if not formatted_name[0].isupper():
|
255
|
+
formatted_name = formatted_name.capitalize()
|
256
|
+
|
257
|
+
# Check if the name now meets the criteria
|
258
|
+
if not re.match(r"^[A-Z][A-Za-z0-9_]*$", formatted_name):
|
259
|
+
raise ValueError(
|
260
|
+
f"Invalid collection name '{name}'."
|
261
|
+
" Names must start with a capital letter "
|
262
|
+
"and contain only letters, numbers, and underscores."
|
263
|
+
)
|
264
|
+
|
265
|
+
if formatted_name != name:
|
266
|
+
logger.warning(
|
267
|
+
f"Collection name '{name}' was reformatted to '{formatted_name}' "
|
268
|
+
"to comply with Weaviate's rules."
|
269
|
+
)
|
270
|
+
|
271
|
+
return formatted_name
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: langroid
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.36.0
|
4
4
|
Summary: Harness LLMs with Multi-Agent Programming
|
5
5
|
Author-email: Prasad Chalasani <pchalasani@gmail.com>
|
6
6
|
License: MIT
|
@@ -75,6 +75,7 @@ Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'all'
|
|
75
75
|
Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'all'
|
76
76
|
Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'all'
|
77
77
|
Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'all'
|
78
|
+
Requires-Dist: weaviate-client>=4.9.6; extra == 'all'
|
78
79
|
Provides-Extra: arango
|
79
80
|
Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
|
80
81
|
Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == 'arango'
|
@@ -148,6 +149,9 @@ Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
|
|
148
149
|
Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
|
149
150
|
Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'vecdbs'
|
150
151
|
Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'vecdbs'
|
152
|
+
Requires-Dist: weaviate-client>=4.9.6; extra == 'vecdbs'
|
153
|
+
Provides-Extra: weaviate
|
154
|
+
Requires-Dist: weaviate-client>=4.9.6; extra == 'weaviate'
|
151
155
|
Description-Content-Type: text/markdown
|
152
156
|
|
153
157
|
<div align="center">
|
@@ -5,7 +5,7 @@ langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
|
6
6
|
langroid/agent/base.py,sha256=oThlrYygKDu1-bKjAfygldJ511gMKT8Z0qCrD52DdDM,77834
|
7
7
|
langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
|
8
|
-
langroid/agent/chat_agent.py,sha256=
|
8
|
+
langroid/agent/chat_agent.py,sha256=UvcZRoQ5jIYvlei8rku0T2Ul8tMpEhCJ2FGvr5_yc5Q,82275
|
9
9
|
langroid/agent/chat_document.py,sha256=xzMtrPbaW-Y-BnF7kuhr2dorsD-D5rMWzfOqJ8HAoo8,17885
|
10
10
|
langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
|
11
11
|
langroid/agent/task.py,sha256=XrXUbSoiFasvpIsZPn_cBpdWaTCKljJPRimtLMrSZrs,90347
|
@@ -114,14 +114,15 @@ langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87s
|
|
114
114
|
langroid/utils/output/citations.py,sha256=PSY2cpti8W-ZGFMAgj1lYoEIZy0lsniLpCliMsVkXtc,1425
|
115
115
|
langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
|
116
116
|
langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
|
117
|
-
langroid/vector_store/__init__.py,sha256=
|
118
|
-
langroid/vector_store/base.py,sha256=
|
117
|
+
langroid/vector_store/__init__.py,sha256=BcoOm1tG3y0EqjkIGmMOHkY9iTUhDHgyruknWDKgqIg,1214
|
118
|
+
langroid/vector_store/base.py,sha256=c9slwOcSWCG0SFGDuPLAQF9vBLDb4Eg8uaUol27Jf9c,14209
|
119
119
|
langroid/vector_store/chromadb.py,sha256=9WXW9IoSnhOmGEtMruVhEtVWL_VO6NXnPIz-nzh0gIQ,8235
|
120
120
|
langroid/vector_store/lancedb.py,sha256=b3_vWkTjG8mweZ7ZNlUD-NjmQP_rLBZfyKWcxt2vosA,14855
|
121
121
|
langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
|
122
122
|
langroid/vector_store/momento.py,sha256=UNHGT6jXuQtqY9f6MdqGU14bVnS0zHgIJUa30ULpUJo,10474
|
123
123
|
langroid/vector_store/qdrantdb.py,sha256=HRLCt-FG8y4718omwpFaQZnWeYxPj0XCwS4tjokI1sU,18116
|
124
|
-
langroid
|
125
|
-
langroid-0.
|
126
|
-
langroid-0.
|
127
|
-
langroid-0.
|
124
|
+
langroid/vector_store/weaviatedb.py,sha256=Jxe-cp2PyZdQ9NQVNZJ-CnsYsNxgUBdfAOoLZQEN650,10602
|
125
|
+
langroid-0.36.0.dist-info/METADATA,sha256=aDLzYdeo80UbUZB0oEv9Rc1WMgWvG_jQtrBZnI2y5Bg,59508
|
126
|
+
langroid-0.36.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
127
|
+
langroid-0.36.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
|
128
|
+
langroid-0.36.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|