langroid 0.35.0__py3-none-any.whl → 0.36.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -416,7 +416,7 @@ class ChatAgent(Agent):
416
416
  ]
417
417
 
418
418
  if len(usable_tool_classes) == 0:
419
- return "You can ask questions in natural language."
419
+ return ""
420
420
  format_instructions = "\n\n".join(
421
421
  [
422
422
  msg_cls.format_instructions(tool=self.config.use_tools)
@@ -568,17 +568,14 @@ class ChatAgent(Agent):
568
568
  Returns:
569
569
  LLMMessage object
570
570
  """
571
- content = textwrap.dedent(
572
- f"""
573
- {self.system_message}
574
-
575
- {self.system_tool_instructions}
576
-
577
- {self.system_tool_format_instructions}
571
+ content = self.system_message
572
+ if self.system_tool_instructions != "":
573
+ content += "\n\n" + self.system_tool_instructions
574
+ if self.system_tool_format_instructions != "":
575
+ content += "\n\n" + self.system_tool_format_instructions
576
+ if self.output_format_instructions != "":
577
+ content += "\n\n" + self.output_format_instructions
578
578
 
579
- {self.output_format_instructions}
580
- """
581
- )
582
579
  # remove leading and trailing newlines and other whitespace
583
580
  return LLMMessage(role=Role.SYSTEM, content=content.strip())
584
581
 
@@ -48,3 +48,14 @@ try:
48
48
  __all__.extend(["chromadb", "ChromaDBConfig", "ChromaDB"])
49
49
  except ImportError:
50
50
  pass
51
+
52
+ try:
53
+ from . import weaviatedb
54
+ from .weaviatedb import WeaviateDBConfig, WeaviateDB
55
+
56
+ weaviatedb
57
+ WeaviateDB
58
+ WeaviateDBConfig
59
+ __all__.extend(["weaviatedb", "WeaviateDB", "WeaviateDBConfig"])
60
+ except ImportError:
61
+ pass
@@ -59,6 +59,7 @@ class VectorStore(ABC):
59
59
  from langroid.vector_store.meilisearch import MeiliSearch, MeiliSearchConfig
60
60
  from langroid.vector_store.momento import MomentoVI, MomentoVIConfig
61
61
  from langroid.vector_store.qdrantdb import QdrantDB, QdrantDBConfig
62
+ from langroid.vector_store.weaviatedb import WeaviateDB, WeaviateDBConfig
62
63
 
63
64
  if isinstance(config, QdrantDBConfig):
64
65
  return QdrantDB(config)
@@ -70,6 +71,8 @@ class VectorStore(ABC):
70
71
  return LanceDB(config)
71
72
  elif isinstance(config, MeiliSearchConfig):
72
73
  return MeiliSearch(config)
74
+ elif isinstance(config, WeaviateDBConfig):
75
+ return WeaviateDB(config)
73
76
 
74
77
  else:
75
78
  logger.warning(
@@ -0,0 +1,271 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ from typing import Any, List, Optional, Sequence, Tuple
5
+
6
+ from dotenv import load_dotenv
7
+
8
+ from langroid.embedding_models.base import (
9
+ EmbeddingModelsConfig,
10
+ )
11
+ from langroid.embedding_models.models import OpenAIEmbeddingsConfig
12
+ from langroid.exceptions import LangroidImportError
13
+ from langroid.mytypes import DocMetaData, Document, EmbeddingFunction
14
+ from langroid.utils.configuration import settings
15
+ from langroid.vector_store.base import VectorStore, VectorStoreConfig
16
+
17
+ logger = logging.getLogger(__name__)
18
+ try:
19
+ import weaviate
20
+ from weaviate.classes.config import (
21
+ Configure,
22
+ VectorDistances,
23
+ )
24
+ from weaviate.classes.init import Auth
25
+ from weaviate.classes.query import Filter, MetadataQuery
26
+ from weaviate.util import generate_uuid5, get_valid_uuid
27
+ except ImportError:
28
+ raise LangroidImportError("weaviate", "weaviate")
29
+
30
+
31
+ class WeaviateDBConfig(VectorStoreConfig):
32
+ collection_name: str | None = "temp"
33
+ embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
34
+ distance: str = VectorDistances.COSINE
35
+
36
+
37
+ class WeaviateDB(VectorStore):
38
+ def __init__(self, config: WeaviateDBConfig = WeaviateDBConfig()):
39
+ super().__init__(config)
40
+ self.config: WeaviateDBConfig = config
41
+ self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
42
+ self.embedding_dim = self.embedding_model.embedding_dims
43
+ load_dotenv()
44
+ key = os.getenv("WEAVIATE_API_KEY")
45
+ url = os.getenv("WEAVIATE_API_URL")
46
+ if None in [key, url]:
47
+ logger.warning(
48
+ """WEAVIATE_API_KEY, WEAVIATE_API_URL env variable must be set to use
49
+ WeaviateDB in cloud mode. Please set these values
50
+ in your .env file.
51
+ """
52
+ )
53
+ self.client = weaviate.connect_to_weaviate_cloud(
54
+ cluster_url=url,
55
+ auth_credentials=Auth.api_key(key),
56
+ )
57
+ if config.collection_name is not None:
58
+ WeaviateDB.validate_and_format_collection_name(config.collection_name)
59
+
60
+ def clear_empty_collections(self) -> int:
61
+ colls = self.client.collections.list_all()
62
+ n_deletes = 0
63
+ for coll_name, _ in colls.items():
64
+ val = self.client.collections.get(coll_name)
65
+ if len(val) == 0:
66
+ n_deletes += 1
67
+ self.client.collections.delete(coll_name)
68
+ return n_deletes
69
+
70
+ def list_collections(self, empty: bool = False) -> List[str]:
71
+ colls = self.client.collections.list_all()
72
+ if empty:
73
+ return list(colls.keys())
74
+ non_empty_colls = [
75
+ coll_name
76
+ for coll_name in colls.keys()
77
+ if len(self.client.collections.get(coll_name)) > 0
78
+ ]
79
+
80
+ return non_empty_colls
81
+
82
+ def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
83
+ if not really:
84
+ logger.warning(
85
+ "Not really deleting all collections ,set really=True to confirm"
86
+ )
87
+ return 0
88
+ coll_names = [
89
+ c for c in self.list_collections(empty=True) if c.startswith(prefix)
90
+ ]
91
+ if len(coll_names) == 0:
92
+ logger.warning(f"No collections found with prefix {prefix}")
93
+ return 0
94
+ n_empty_deletes = 0
95
+ n_non_empty_deletes = 0
96
+ for name in coll_names:
97
+ info = self.client.collections.get(name)
98
+ points_count = len(info)
99
+
100
+ n_empty_deletes += points_count == 0
101
+ n_non_empty_deletes += points_count > 0
102
+ self.client.collections.delete(name)
103
+ logger.warning(
104
+ f"""
105
+ Deleted {n_empty_deletes} empty collections and
106
+ {n_non_empty_deletes} non-empty collections.
107
+ """
108
+ )
109
+ return n_empty_deletes + n_non_empty_deletes
110
+
111
+ def delete_collection(self, collection_name: str) -> None:
112
+ self.client.collections.delete(name=collection_name)
113
+
114
+ def create_collection(self, collection_name: str, replace: bool = False) -> None:
115
+ collection_name = WeaviateDB.validate_and_format_collection_name(
116
+ collection_name
117
+ )
118
+ self.config.collection_name = collection_name
119
+ if self.client.collections.exists(name=collection_name):
120
+ coll = self.client.collections.get(name=collection_name)
121
+ if len(coll) > 0:
122
+ logger.warning(f"Non-empty Collection {collection_name} already exists")
123
+ if not replace:
124
+ logger.warning("Not replacing collection")
125
+ return
126
+ else:
127
+ logger.warning("Recreating fresh collection")
128
+ self.client.collections.delete(name=collection_name)
129
+
130
+ vector_index_config = Configure.VectorIndex.hnsw(
131
+ distance_metric=VectorDistances.COSINE,
132
+ )
133
+ if self.config.embedding == OpenAIEmbeddingsConfig:
134
+ vectorizer_config = Configure.Vectorizer.text2vec_openai(
135
+ model=self.embedding_model
136
+ )
137
+ else:
138
+ vectorizer_config = None
139
+
140
+ collection_info = self.client.collections.create(
141
+ name=collection_name,
142
+ vector_index_config=vector_index_config,
143
+ vectorizer_config=vectorizer_config,
144
+ )
145
+ collection_info = self.client.collections.get(name=collection_name)
146
+ assert len(collection_info) in [0, None]
147
+ if settings.debug:
148
+ level = logger.getEffectiveLevel()
149
+ logger.setLevel(logging.INFO)
150
+ logger.info(collection_info)
151
+ logger.setLevel(level)
152
+
153
+ def add_documents(self, documents: Sequence[Document]) -> None:
154
+ super().maybe_add_ids(documents)
155
+ colls = self.list_collections(empty=True)
156
+ for doc in documents:
157
+ doc.metadata.id = str(self._create_valid_uuid_id(doc.metadata.id))
158
+ if len(documents) == 0:
159
+ return
160
+
161
+ document_dicts = [doc.dict() for doc in documents]
162
+ embedding_vecs = self.embedding_fn([doc.content for doc in documents])
163
+ if self.config.collection_name is None:
164
+ raise ValueError("No collection name set, cannot ingest docs")
165
+ if self.config.collection_name not in colls:
166
+ self.create_collection(self.config.collection_name, replace=True)
167
+ coll_name = self.client.collections.get(self.config.collection_name)
168
+ with coll_name.batch.dynamic() as batch:
169
+ for i, doc_dict in enumerate(document_dicts):
170
+ id = doc_dict["metadata"].pop("id", None)
171
+ batch.add_object(properties=doc_dict, uuid=id, vector=embedding_vecs[i])
172
+
173
+ def get_all_documents(self, where: str = "") -> List[Document]:
174
+ if self.config.collection_name is None:
175
+ raise ValueError("No collection name set, cannot retrieve docs")
176
+ # cannot use filter as client does not support json type queries
177
+ coll = self.client.collections.get(self.config.collection_name)
178
+ return [self.weaviate_obj_to_doc(item) for item in coll.iterator()]
179
+
180
+ def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
181
+ if self.config.collection_name is None:
182
+ raise ValueError("No collection name set, cannot retrieve docs")
183
+
184
+ docs = []
185
+ coll_name = self.client.collections.get(self.config.collection_name)
186
+
187
+ result = coll_name.query.fetch_objects(
188
+ filters=Filter.by_property("_id").contains_any(ids), limit=len(coll_name)
189
+ )
190
+
191
+ id_to_doc = {}
192
+ for item in result.objects:
193
+ doc = self.weaviate_obj_to_doc(item)
194
+ id_to_doc[doc.metadata.id] = doc
195
+
196
+ # Reconstruct the list of documents in the original order of input ids
197
+ docs = [id_to_doc[id] for id in ids if id in id_to_doc]
198
+
199
+ return docs
200
+
201
+ def similar_texts_with_scores(
202
+ self, text: str, k: int = 1, where: Optional[str] = None
203
+ ) -> List[Tuple[Document, float]]:
204
+ embedding = self.embedding_fn([text])[0]
205
+ if self.config.collection_name is None:
206
+ raise ValueError("No collections name set,cannot search")
207
+ coll = self.client.collections.get(self.config.collection_name)
208
+ response = coll.query.near_vector(
209
+ near_vector=embedding,
210
+ limit=k,
211
+ return_properties=True,
212
+ return_metadata=MetadataQuery(distance=True),
213
+ )
214
+ return [
215
+ (self.weaviate_obj_to_doc(item), 1 - item.metadata.distance)
216
+ for item in response.objects
217
+ ]
218
+
219
+ def _create_valid_uuid_id(self, id: str) -> Any:
220
+ try:
221
+ id = get_valid_uuid(id)
222
+ return id
223
+ except Exception:
224
+ return generate_uuid5(id)
225
+
226
+ def weaviate_obj_to_doc(self, input_object: Any) -> Document:
227
+ content = input_object.properties.get("content", "")
228
+ metadata_dict = input_object.properties.get("metadata", {})
229
+
230
+ window_ids = metadata_dict.pop("window_ids", [])
231
+ window_ids = [str(uuid) for uuid in window_ids]
232
+
233
+ # Ensure the id is a valid UUID string
234
+ id_value = get_valid_uuid(input_object.uuid)
235
+
236
+ metadata = DocMetaData(id=id_value, window_ids=window_ids, **metadata_dict)
237
+
238
+ return Document(content=content, metadata=metadata)
239
+
240
+ @staticmethod
241
+ def validate_and_format_collection_name(name: str) -> str:
242
+ """
243
+ Formats the collection name to comply with Weaviate's naming rules:
244
+ - Name must start with a capital letter.
245
+ - Name can only contain letters, numbers, and underscores.
246
+ - Replaces invalid characters with underscores.
247
+ """
248
+ if not name:
249
+ raise ValueError("Collection name cannot be empty.")
250
+
251
+ formatted_name = re.sub(r"[^a-zA-Z0-9_]", "_", name)
252
+
253
+ # Ensure the first letter is capitalized
254
+ if not formatted_name[0].isupper():
255
+ formatted_name = formatted_name.capitalize()
256
+
257
+ # Check if the name now meets the criteria
258
+ if not re.match(r"^[A-Z][A-Za-z0-9_]*$", formatted_name):
259
+ raise ValueError(
260
+ f"Invalid collection name '{name}'."
261
+ " Names must start with a capital letter "
262
+ "and contain only letters, numbers, and underscores."
263
+ )
264
+
265
+ if formatted_name != name:
266
+ logger.warning(
267
+ f"Collection name '{name}' was reformatted to '{formatted_name}' "
268
+ "to comply with Weaviate's rules."
269
+ )
270
+
271
+ return formatted_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.35.0
3
+ Version: 0.36.0
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
5
  Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
@@ -75,6 +75,7 @@ Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'all'
75
75
  Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'all'
76
76
  Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'all'
77
77
  Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'all'
78
+ Requires-Dist: weaviate-client>=4.9.6; extra == 'all'
78
79
  Provides-Extra: arango
79
80
  Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
80
81
  Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == 'arango'
@@ -148,6 +149,9 @@ Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
148
149
  Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
149
150
  Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'vecdbs'
150
151
  Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'vecdbs'
152
+ Requires-Dist: weaviate-client>=4.9.6; extra == 'vecdbs'
153
+ Provides-Extra: weaviate
154
+ Requires-Dist: weaviate-client>=4.9.6; extra == 'weaviate'
151
155
  Description-Content-Type: text/markdown
152
156
 
153
157
  <div align="center">
@@ -5,7 +5,7 @@ langroid/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  langroid/agent/__init__.py,sha256=ll0Cubd2DZ-fsCMl7e10hf9ZjFGKzphfBco396IKITY,786
6
6
  langroid/agent/base.py,sha256=oThlrYygKDu1-bKjAfygldJ511gMKT8Z0qCrD52DdDM,77834
7
7
  langroid/agent/batch.py,sha256=vi1r5i1-vN80WfqHDSwjEym_KfGsqPGUtwktmiK1nuk,20635
8
- langroid/agent/chat_agent.py,sha256=A-7Iiiw7jsoJNlWerljM29BidkiIbjPOQIkGZpZHmt0,82210
8
+ langroid/agent/chat_agent.py,sha256=UvcZRoQ5jIYvlei8rku0T2Ul8tMpEhCJ2FGvr5_yc5Q,82275
9
9
  langroid/agent/chat_document.py,sha256=xzMtrPbaW-Y-BnF7kuhr2dorsD-D5rMWzfOqJ8HAoo8,17885
10
10
  langroid/agent/openai_assistant.py,sha256=JkAcs02bIrgPNVvUWVR06VCthc5-ulla2QMBzux_q6o,34340
11
11
  langroid/agent/task.py,sha256=XrXUbSoiFasvpIsZPn_cBpdWaTCKljJPRimtLMrSZrs,90347
@@ -114,14 +114,15 @@ langroid/utils/output/__init__.py,sha256=7P0f--4IZneNsTxXY5fd6d6iW-CeVe-KSsl-87s
114
114
  langroid/utils/output/citations.py,sha256=PSY2cpti8W-ZGFMAgj1lYoEIZy0lsniLpCliMsVkXtc,1425
115
115
  langroid/utils/output/printing.py,sha256=yzPJZN-8_jyOJmI9N_oLwEDfjMwVgk3IDiwnZ4eK_AE,2962
116
116
  langroid/utils/output/status.py,sha256=rzbE7mDJcgNNvdtylCseQcPGCGghtJvVq3lB-OPJ49E,1049
117
- langroid/vector_store/__init__.py,sha256=6xBjb_z4QtUy4vz4RuFbcbSwmHrggHL8-q0DwCf3PMM,972
118
- langroid/vector_store/base.py,sha256=L9_tIr8tghV09sbLZof6MhSp-wMxtdWxkAJsiU6A4-g,14038
117
+ langroid/vector_store/__init__.py,sha256=BcoOm1tG3y0EqjkIGmMOHkY9iTUhDHgyruknWDKgqIg,1214
118
+ langroid/vector_store/base.py,sha256=c9slwOcSWCG0SFGDuPLAQF9vBLDb4Eg8uaUol27Jf9c,14209
119
119
  langroid/vector_store/chromadb.py,sha256=9WXW9IoSnhOmGEtMruVhEtVWL_VO6NXnPIz-nzh0gIQ,8235
120
120
  langroid/vector_store/lancedb.py,sha256=b3_vWkTjG8mweZ7ZNlUD-NjmQP_rLBZfyKWcxt2vosA,14855
121
121
  langroid/vector_store/meilisearch.py,sha256=6frB7GFWeWmeKzRfLZIvzRjllniZ1cYj3HmhHQICXLs,11663
122
122
  langroid/vector_store/momento.py,sha256=UNHGT6jXuQtqY9f6MdqGU14bVnS0zHgIJUa30ULpUJo,10474
123
123
  langroid/vector_store/qdrantdb.py,sha256=HRLCt-FG8y4718omwpFaQZnWeYxPj0XCwS4tjokI1sU,18116
124
- langroid-0.35.0.dist-info/METADATA,sha256=57TZcMnBYqUH2CiqMsjLqJA2oclSLKY4igiXFlZwx_Y,59313
125
- langroid-0.35.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
126
- langroid-0.35.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
127
- langroid-0.35.0.dist-info/RECORD,,
124
+ langroid/vector_store/weaviatedb.py,sha256=Jxe-cp2PyZdQ9NQVNZJ-CnsYsNxgUBdfAOoLZQEN650,10602
125
+ langroid-0.36.0.dist-info/METADATA,sha256=aDLzYdeo80UbUZB0oEv9Rc1WMgWvG_jQtrBZnI2y5Bg,59508
126
+ langroid-0.36.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
127
+ langroid-0.36.0.dist-info/licenses/LICENSE,sha256=EgVbvA6VSYgUlvC3RvPKehSg7MFaxWDsFuzLOsPPfJg,1065
128
+ langroid-0.36.0.dist-info/RECORD,,