langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. langroid/__init__.py +70 -0
  2. langroid/agent/__init__.py +22 -0
  3. langroid/agent/base.py +120 -33
  4. langroid/agent/batch.py +134 -35
  5. langroid/agent/callbacks/__init__.py +0 -0
  6. langroid/agent/callbacks/chainlit.py +608 -0
  7. langroid/agent/chat_agent.py +164 -100
  8. langroid/agent/chat_document.py +19 -2
  9. langroid/agent/openai_assistant.py +20 -10
  10. langroid/agent/special/__init__.py +33 -10
  11. langroid/agent/special/doc_chat_agent.py +521 -108
  12. langroid/agent/special/lance_doc_chat_agent.py +258 -0
  13. langroid/agent/special/lance_rag/__init__.py +9 -0
  14. langroid/agent/special/lance_rag/critic_agent.py +136 -0
  15. langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
  16. langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
  17. langroid/agent/special/lance_tools.py +44 -0
  18. langroid/agent/special/neo4j/__init__.py +0 -0
  19. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  20. langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
  21. langroid/agent/special/neo4j/utils/__init__.py +0 -0
  22. langroid/agent/special/neo4j/utils/system_message.py +46 -0
  23. langroid/agent/special/relevance_extractor_agent.py +23 -7
  24. langroid/agent/special/retriever_agent.py +29 -174
  25. langroid/agent/special/sql/__init__.py +7 -0
  26. langroid/agent/special/sql/sql_chat_agent.py +47 -23
  27. langroid/agent/special/sql/utils/__init__.py +11 -0
  28. langroid/agent/special/sql/utils/description_extractors.py +95 -46
  29. langroid/agent/special/sql/utils/populate_metadata.py +28 -21
  30. langroid/agent/special/table_chat_agent.py +43 -9
  31. langroid/agent/task.py +423 -114
  32. langroid/agent/tool_message.py +67 -10
  33. langroid/agent/tools/__init__.py +8 -0
  34. langroid/agent/tools/duckduckgo_search_tool.py +66 -0
  35. langroid/agent/tools/google_search_tool.py +11 -0
  36. langroid/agent/tools/metaphor_search_tool.py +67 -0
  37. langroid/agent/tools/recipient_tool.py +6 -24
  38. langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
  39. langroid/cachedb/__init__.py +6 -0
  40. langroid/embedding_models/__init__.py +24 -0
  41. langroid/embedding_models/base.py +9 -1
  42. langroid/embedding_models/models.py +117 -17
  43. langroid/embedding_models/protoc/embeddings.proto +19 -0
  44. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  45. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  46. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  47. langroid/embedding_models/remote_embeds.py +153 -0
  48. langroid/language_models/__init__.py +22 -0
  49. langroid/language_models/azure_openai.py +47 -4
  50. langroid/language_models/base.py +26 -10
  51. langroid/language_models/config.py +5 -0
  52. langroid/language_models/openai_gpt.py +407 -121
  53. langroid/language_models/prompt_formatter/__init__.py +9 -0
  54. langroid/language_models/prompt_formatter/base.py +4 -6
  55. langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
  56. langroid/language_models/utils.py +10 -9
  57. langroid/mytypes.py +10 -4
  58. langroid/parsing/__init__.py +33 -1
  59. langroid/parsing/document_parser.py +259 -63
  60. langroid/parsing/image_text.py +32 -0
  61. langroid/parsing/parse_json.py +143 -0
  62. langroid/parsing/parser.py +20 -7
  63. langroid/parsing/repo_loader.py +108 -46
  64. langroid/parsing/search.py +8 -0
  65. langroid/parsing/table_loader.py +44 -0
  66. langroid/parsing/url_loader.py +59 -13
  67. langroid/parsing/urls.py +18 -9
  68. langroid/parsing/utils.py +130 -9
  69. langroid/parsing/web_search.py +73 -0
  70. langroid/prompts/__init__.py +7 -0
  71. langroid/prompts/chat-gpt4-system-prompt.md +68 -0
  72. langroid/prompts/prompts_config.py +1 -1
  73. langroid/utils/__init__.py +10 -0
  74. langroid/utils/algorithms/__init__.py +3 -0
  75. langroid/utils/configuration.py +0 -1
  76. langroid/utils/constants.py +4 -0
  77. langroid/utils/logging.py +2 -5
  78. langroid/utils/output/__init__.py +15 -2
  79. langroid/utils/output/status.py +33 -0
  80. langroid/utils/pandas_utils.py +30 -0
  81. langroid/utils/pydantic_utils.py +446 -4
  82. langroid/utils/system.py +36 -1
  83. langroid/vector_store/__init__.py +34 -2
  84. langroid/vector_store/base.py +33 -2
  85. langroid/vector_store/chromadb.py +42 -13
  86. langroid/vector_store/lancedb.py +226 -60
  87. langroid/vector_store/meilisearch.py +7 -6
  88. langroid/vector_store/momento.py +3 -2
  89. langroid/vector_store/qdrantdb.py +82 -11
  90. {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
  91. langroid-0.1.219.dist-info/RECORD +127 -0
  92. langroid/agent/special/recipient_validator_agent.py +0 -157
  93. langroid/parsing/json.py +0 -64
  94. langroid/utils/web/selenium_login.py +0 -36
  95. langroid-0.1.139.dist-info/RECORD +0 -103
  96. {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
  97. {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0
@@ -1,6 +1,9 @@
1
+ import hashlib
2
+ import json
1
3
  import logging
2
4
  import os
3
- from typing import List, Optional, Sequence, Tuple
5
+ import uuid
6
+ from typing import List, Optional, Sequence, Tuple, TypeVar
4
7
 
5
8
  from dotenv import load_dotenv
6
9
  from qdrant_client import QdrantClient
@@ -26,6 +29,33 @@ from langroid.vector_store.base import VectorStore, VectorStoreConfig
26
29
  logger = logging.getLogger(__name__)
27
30
 
28
31
 
32
+ T = TypeVar("T")
33
+
34
+
35
+ def from_optional(x: Optional[T], default: T) -> T:
36
+ if x is None:
37
+ return default
38
+
39
+ return x
40
+
41
+
42
+ def is_valid_uuid(uuid_to_test: str) -> bool:
43
+ """
44
+ Check if a given string is a valid UUID.
45
+ """
46
+ try:
47
+ uuid_obj = uuid.UUID(uuid_to_test)
48
+ return str(uuid_obj) == uuid_to_test
49
+ except Exception:
50
+ pass
51
+ # Check for valid unsigned 64-bit integer
52
+ try:
53
+ int_value = int(uuid_to_test)
54
+ return 0 <= int_value <= 18446744073709551615
55
+ except ValueError:
56
+ return False
57
+
58
+
29
59
  class QdrantDBConfig(VectorStoreConfig):
30
60
  cloud: bool = True
31
61
  collection_name: str | None = "temp"
@@ -35,7 +65,7 @@ class QdrantDBConfig(VectorStoreConfig):
35
65
 
36
66
 
37
67
  class QdrantDB(VectorStore):
38
- def __init__(self, config: QdrantDBConfig):
68
+ def __init__(self, config: QdrantDBConfig = QdrantDBConfig()):
39
69
  super().__init__(config)
40
70
  self.config = config
41
71
  emb_model = EmbeddingModel.create(config.embedding)
@@ -112,8 +142,10 @@ class QdrantDB(VectorStore):
112
142
  n_non_empty_deletes = 0
113
143
  for name in coll_names:
114
144
  info = self.client.get_collection(collection_name=name)
115
- n_empty_deletes += (info.points_count or 0) == 0
116
- n_non_empty_deletes += (info.points_count or 0) > 0
145
+ points_count = from_optional(info.points_count, 0)
146
+
147
+ n_empty_deletes += points_count == 0
148
+ n_non_empty_deletes += points_count > 0
117
149
  self.client.delete_collection(collection_name=name)
118
150
  logger.warning(
119
151
  f"""
@@ -138,7 +170,12 @@ class QdrantDB(VectorStore):
138
170
  for coll in colls:
139
171
  try:
140
172
  counts.append(
141
- self.client.get_collection(collection_name=coll.name).points_count
173
+ from_optional(
174
+ self.client.get_collection(
175
+ collection_name=coll.name
176
+ ).points_count,
177
+ 0,
178
+ )
142
179
  )
143
180
  except Exception:
144
181
  logger.warning(f"Error getting collection {coll.name}")
@@ -158,7 +195,10 @@ class QdrantDB(VectorStore):
158
195
  collections = self.list_collections()
159
196
  if collection_name in collections:
160
197
  coll = self.client.get_collection(collection_name=collection_name)
161
- if coll.status == CollectionStatus.GREEN and (coll.points_count or 0) > 0:
198
+ if (
199
+ coll.status == CollectionStatus.GREEN
200
+ and from_optional(coll.points_count, 0) > 0
201
+ ):
162
202
  logger.warning(f"Non-empty Collection {collection_name} already exists")
163
203
  if not replace:
164
204
  logger.warning("Not replacing collection")
@@ -182,10 +222,15 @@ class QdrantDB(VectorStore):
182
222
  logger.setLevel(level)
183
223
 
184
224
  def add_documents(self, documents: Sequence[Document]) -> None:
225
+ # Add id to metadata if not already present
185
226
  super().maybe_add_ids(documents)
227
+ # Fix the ids due to qdrant finickiness
228
+ for doc in documents:
229
+ doc.metadata.id = str(self._to_int_or_uuid(doc.metadata.id))
186
230
  colls = self.list_collections(empty=True)
187
231
  if len(documents) == 0:
188
232
  return
233
+ document_dicts = [doc.dict() for doc in documents]
189
234
  embedding_vecs = self.embedding_fn([doc.content for doc in documents])
190
235
  if self.config.collection_name is None:
191
236
  raise ValueError("No collection name set, cannot ingest docs")
@@ -201,7 +246,7 @@ class QdrantDB(VectorStore):
201
246
  points=Batch(
202
247
  ids=ids[i : i + b],
203
248
  vectors=embedding_vecs[i : i + b],
204
- payloads=documents[i : i + b],
249
+ payloads=document_dicts[i : i + b],
205
250
  ),
206
251
  )
207
252
 
@@ -210,19 +255,42 @@ class QdrantDB(VectorStore):
210
255
 
211
256
  def _to_int_or_uuid(self, id: str) -> int | str:
212
257
  try:
213
- return int(id)
258
+ int_val = int(id)
259
+ if is_valid_uuid(id):
260
+ return int_val
214
261
  except ValueError:
262
+ pass
263
+
264
+ # If doc_id is already a valid UUID, return it as is
265
+ if isinstance(id, str) and is_valid_uuid(id):
215
266
  return id
216
267
 
217
- def get_all_documents(self) -> List[Document]:
268
+ # Otherwise, generate a UUID from the doc_id
269
+ # Convert doc_id to string if it's not already
270
+ id_str = str(id)
271
+
272
+ # Hash the document ID using SHA-1
273
+ hash_object = hashlib.sha1(id_str.encode())
274
+ hash_digest = hash_object.hexdigest()
275
+
276
+ # Truncate or manipulate the hash to fit into a UUID (128 bits)
277
+ uuid_str = hash_digest[:32]
278
+
279
+ # Format this string into a UUID format
280
+ formatted_uuid = uuid.UUID(uuid_str)
281
+
282
+ return str(formatted_uuid)
283
+
284
+ def get_all_documents(self, where: str = "") -> List[Document]:
218
285
  if self.config.collection_name is None:
219
286
  raise ValueError("No collection name set, cannot retrieve docs")
220
287
  docs = []
221
288
  offset = 0
289
+ filter = Filter() if where == "" else Filter.parse_obj(json.loads(where))
222
290
  while True:
223
291
  results, next_page_offset = self.client.scroll(
224
292
  collection_name=self.config.collection_name,
225
- scroll_filter=None,
293
+ scroll_filter=filter,
226
294
  offset=offset,
227
295
  limit=10_000, # try getting all at once, if not we keep paging
228
296
  with_payload=True,
@@ -260,7 +328,10 @@ class QdrantDB(VectorStore):
260
328
  ) -> List[Tuple[Document, float]]:
261
329
  embedding = self.embedding_fn([text])[0]
262
330
  # TODO filter may not work yet
263
- filter = Filter() if where is None else Filter.from_json(where) # type: ignore
331
+ if where is None or where == "":
332
+ filter = Filter()
333
+ else:
334
+ filter = Filter.parse_obj(json.loads(where))
264
335
  if self.config.collection_name is None:
265
336
  raise ValueError("No collection name set, cannot search")
266
337
  search_result: List[ScoredPoint] = self.client.search(