langroid 0.33.6__py3-none-any.whl → 0.33.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. langroid/__init__.py +106 -0
  2. langroid/agent/__init__.py +41 -0
  3. langroid/agent/base.py +1983 -0
  4. langroid/agent/batch.py +398 -0
  5. langroid/agent/callbacks/__init__.py +0 -0
  6. langroid/agent/callbacks/chainlit.py +598 -0
  7. langroid/agent/chat_agent.py +1899 -0
  8. langroid/agent/chat_document.py +454 -0
  9. langroid/agent/openai_assistant.py +882 -0
  10. langroid/agent/special/__init__.py +59 -0
  11. langroid/agent/special/arangodb/__init__.py +0 -0
  12. langroid/agent/special/arangodb/arangodb_agent.py +656 -0
  13. langroid/agent/special/arangodb/system_messages.py +186 -0
  14. langroid/agent/special/arangodb/tools.py +107 -0
  15. langroid/agent/special/arangodb/utils.py +36 -0
  16. langroid/agent/special/doc_chat_agent.py +1466 -0
  17. langroid/agent/special/lance_doc_chat_agent.py +262 -0
  18. langroid/agent/special/lance_rag/__init__.py +9 -0
  19. langroid/agent/special/lance_rag/critic_agent.py +198 -0
  20. langroid/agent/special/lance_rag/lance_rag_task.py +82 -0
  21. langroid/agent/special/lance_rag/query_planner_agent.py +260 -0
  22. langroid/agent/special/lance_tools.py +61 -0
  23. langroid/agent/special/neo4j/__init__.py +0 -0
  24. langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
  25. langroid/agent/special/neo4j/neo4j_chat_agent.py +433 -0
  26. langroid/agent/special/neo4j/system_messages.py +120 -0
  27. langroid/agent/special/neo4j/tools.py +32 -0
  28. langroid/agent/special/relevance_extractor_agent.py +127 -0
  29. langroid/agent/special/retriever_agent.py +56 -0
  30. langroid/agent/special/sql/__init__.py +17 -0
  31. langroid/agent/special/sql/sql_chat_agent.py +654 -0
  32. langroid/agent/special/sql/utils/__init__.py +21 -0
  33. langroid/agent/special/sql/utils/description_extractors.py +190 -0
  34. langroid/agent/special/sql/utils/populate_metadata.py +85 -0
  35. langroid/agent/special/sql/utils/system_message.py +35 -0
  36. langroid/agent/special/sql/utils/tools.py +64 -0
  37. langroid/agent/special/table_chat_agent.py +263 -0
  38. langroid/agent/task.py +2095 -0
  39. langroid/agent/tool_message.py +393 -0
  40. langroid/agent/tools/__init__.py +38 -0
  41. langroid/agent/tools/duckduckgo_search_tool.py +50 -0
  42. langroid/agent/tools/file_tools.py +234 -0
  43. langroid/agent/tools/google_search_tool.py +39 -0
  44. langroid/agent/tools/metaphor_search_tool.py +68 -0
  45. langroid/agent/tools/orchestration.py +303 -0
  46. langroid/agent/tools/recipient_tool.py +235 -0
  47. langroid/agent/tools/retrieval_tool.py +32 -0
  48. langroid/agent/tools/rewind_tool.py +137 -0
  49. langroid/agent/tools/segment_extract_tool.py +41 -0
  50. langroid/agent/xml_tool_message.py +382 -0
  51. langroid/cachedb/__init__.py +17 -0
  52. langroid/cachedb/base.py +58 -0
  53. langroid/cachedb/momento_cachedb.py +108 -0
  54. langroid/cachedb/redis_cachedb.py +153 -0
  55. langroid/embedding_models/__init__.py +39 -0
  56. langroid/embedding_models/base.py +74 -0
  57. langroid/embedding_models/models.py +461 -0
  58. langroid/embedding_models/protoc/__init__.py +0 -0
  59. langroid/embedding_models/protoc/embeddings.proto +19 -0
  60. langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
  61. langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
  62. langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
  63. langroid/embedding_models/remote_embeds.py +153 -0
  64. langroid/exceptions.py +71 -0
  65. langroid/language_models/__init__.py +53 -0
  66. langroid/language_models/azure_openai.py +153 -0
  67. langroid/language_models/base.py +678 -0
  68. langroid/language_models/config.py +18 -0
  69. langroid/language_models/mock_lm.py +124 -0
  70. langroid/language_models/openai_gpt.py +1964 -0
  71. langroid/language_models/prompt_formatter/__init__.py +16 -0
  72. langroid/language_models/prompt_formatter/base.py +40 -0
  73. langroid/language_models/prompt_formatter/hf_formatter.py +132 -0
  74. langroid/language_models/prompt_formatter/llama2_formatter.py +75 -0
  75. langroid/language_models/utils.py +151 -0
  76. langroid/mytypes.py +84 -0
  77. langroid/parsing/__init__.py +52 -0
  78. langroid/parsing/agent_chats.py +38 -0
  79. langroid/parsing/code_parser.py +121 -0
  80. langroid/parsing/document_parser.py +718 -0
  81. langroid/parsing/para_sentence_split.py +62 -0
  82. langroid/parsing/parse_json.py +155 -0
  83. langroid/parsing/parser.py +313 -0
  84. langroid/parsing/repo_loader.py +790 -0
  85. langroid/parsing/routing.py +36 -0
  86. langroid/parsing/search.py +275 -0
  87. langroid/parsing/spider.py +102 -0
  88. langroid/parsing/table_loader.py +94 -0
  89. langroid/parsing/url_loader.py +111 -0
  90. langroid/parsing/urls.py +273 -0
  91. langroid/parsing/utils.py +373 -0
  92. langroid/parsing/web_search.py +156 -0
  93. langroid/prompts/__init__.py +9 -0
  94. langroid/prompts/dialog.py +17 -0
  95. langroid/prompts/prompts_config.py +5 -0
  96. langroid/prompts/templates.py +141 -0
  97. langroid/pydantic_v1/__init__.py +10 -0
  98. langroid/pydantic_v1/main.py +4 -0
  99. langroid/utils/__init__.py +19 -0
  100. langroid/utils/algorithms/__init__.py +3 -0
  101. langroid/utils/algorithms/graph.py +103 -0
  102. langroid/utils/configuration.py +98 -0
  103. langroid/utils/constants.py +30 -0
  104. langroid/utils/git_utils.py +252 -0
  105. langroid/utils/globals.py +49 -0
  106. langroid/utils/logging.py +135 -0
  107. langroid/utils/object_registry.py +66 -0
  108. langroid/utils/output/__init__.py +20 -0
  109. langroid/utils/output/citations.py +41 -0
  110. langroid/utils/output/printing.py +99 -0
  111. langroid/utils/output/status.py +40 -0
  112. langroid/utils/pandas_utils.py +30 -0
  113. langroid/utils/pydantic_utils.py +602 -0
  114. langroid/utils/system.py +286 -0
  115. langroid/utils/types.py +93 -0
  116. langroid/vector_store/__init__.py +50 -0
  117. langroid/vector_store/base.py +359 -0
  118. langroid/vector_store/chromadb.py +214 -0
  119. langroid/vector_store/lancedb.py +406 -0
  120. langroid/vector_store/meilisearch.py +299 -0
  121. langroid/vector_store/momento.py +278 -0
  122. langroid/vector_store/qdrantdb.py +468 -0
  123. {langroid-0.33.6.dist-info → langroid-0.33.7.dist-info}/METADATA +95 -94
  124. langroid-0.33.7.dist-info/RECORD +127 -0
  125. {langroid-0.33.6.dist-info → langroid-0.33.7.dist-info}/WHEEL +1 -1
  126. langroid-0.33.6.dist-info/RECORD +0 -7
  127. langroid-0.33.6.dist-info/entry_points.txt +0 -4
  128. pyproject.toml +0 -356
  129. {langroid-0.33.6.dist-info → langroid-0.33.7.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,468 @@
1
+ import hashlib
2
+ import json
3
+ import logging
4
+ import os
5
+ import uuid
6
+ from typing import Dict, List, Optional, Sequence, Tuple, TypeVar
7
+
8
+ from dotenv import load_dotenv
9
+ from qdrant_client import QdrantClient
10
+ from qdrant_client.conversions.common_types import ScoredPoint
11
+ from qdrant_client.http.models import (
12
+ Batch,
13
+ CollectionStatus,
14
+ Distance,
15
+ Filter,
16
+ NamedSparseVector,
17
+ NamedVector,
18
+ SearchRequest,
19
+ SparseIndexParams,
20
+ SparseVector,
21
+ SparseVectorParams,
22
+ VectorParams,
23
+ )
24
+
25
+ from langroid.embedding_models.base import (
26
+ EmbeddingModelsConfig,
27
+ )
28
+ from langroid.embedding_models.models import OpenAIEmbeddingsConfig
29
+ from langroid.mytypes import Document, EmbeddingFunction, Embeddings
30
+ from langroid.utils.configuration import settings
31
+ from langroid.vector_store.base import VectorStore, VectorStoreConfig
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ T = TypeVar("T")
37
+
38
+
39
+ def from_optional(x: Optional[T], default: T) -> T:
40
+ if x is None:
41
+ return default
42
+
43
+ return x
44
+
45
+
46
+ def is_valid_uuid(uuid_to_test: str) -> bool:
47
+ """
48
+ Check if a given string is a valid UUID.
49
+ """
50
+ try:
51
+ uuid_obj = uuid.UUID(uuid_to_test)
52
+ return str(uuid_obj) == uuid_to_test
53
+ except Exception:
54
+ pass
55
+ # Check for valid unsigned 64-bit integer
56
+ try:
57
+ int_value = int(uuid_to_test)
58
+ return 0 <= int_value <= 18446744073709551615
59
+ except ValueError:
60
+ return False
61
+
62
+
63
+ class QdrantDBConfig(VectorStoreConfig):
64
+ cloud: bool = True
65
+ docker: bool = False
66
+ collection_name: str | None = "temp"
67
+ storage_path: str = ".qdrant/data"
68
+ embedding: EmbeddingModelsConfig = OpenAIEmbeddingsConfig()
69
+ distance: str = Distance.COSINE
70
+ use_sparse_embeddings: bool = False
71
+ sparse_embedding_model: str = "naver/splade-v3-distilbert"
72
+ sparse_limit: int = 3
73
+
74
+
75
+ class QdrantDB(VectorStore):
76
+ def __init__(self, config: QdrantDBConfig = QdrantDBConfig()):
77
+ super().__init__(config)
78
+ self.config: QdrantDBConfig = config
79
+ self.embedding_fn: EmbeddingFunction = self.embedding_model.embedding_fn()
80
+ self.embedding_dim = self.embedding_model.embedding_dims
81
+ if self.config.use_sparse_embeddings:
82
+ try:
83
+ from transformers import AutoModelForMaskedLM, AutoTokenizer
84
+ except ImportError:
85
+ raise ImportError(
86
+ """
87
+ To use sparse embeddings,
88
+ you must install langroid with the [transformers] extra, e.g.:
89
+ pip install "langroid[transformers]"
90
+ """
91
+ )
92
+
93
+ self.sparse_tokenizer = AutoTokenizer.from_pretrained(
94
+ self.config.sparse_embedding_model
95
+ )
96
+ self.sparse_model = AutoModelForMaskedLM.from_pretrained(
97
+ self.config.sparse_embedding_model
98
+ )
99
+ self.host = config.host
100
+ self.port = config.port
101
+ load_dotenv()
102
+ key = os.getenv("QDRANT_API_KEY")
103
+ url = os.getenv("QDRANT_API_URL")
104
+ if config.docker:
105
+ if url is None:
106
+ logger.warning(
107
+ f"""The QDRANT_API_URL env variable must be set to use
108
+ QdrantDB in local docker mode. Please set this
109
+ value in your .env file.
110
+ Switching to local storage at {config.storage_path}
111
+ """
112
+ )
113
+ config.cloud = False
114
+ else:
115
+ config.cloud = True
116
+ elif config.cloud and None in [key, url]:
117
+ logger.warning(
118
+ f"""QDRANT_API_KEY, QDRANT_API_URL env variable must be set to use
119
+ QdrantDB in cloud mode. Please set these values
120
+ in your .env file.
121
+ Switching to local storage at {config.storage_path}
122
+ """
123
+ )
124
+ config.cloud = False
125
+
126
+ if config.cloud:
127
+ self.client = QdrantClient(
128
+ url=url,
129
+ api_key=key,
130
+ timeout=config.timeout,
131
+ )
132
+ else:
133
+ try:
134
+ self.client = QdrantClient(
135
+ path=config.storage_path,
136
+ )
137
+ except Exception as e:
138
+ new_storage_path = config.storage_path + ".new"
139
+ logger.warning(
140
+ f"""
141
+ Error connecting to local QdrantDB at {config.storage_path}:
142
+ {e}
143
+ Switching to {new_storage_path}
144
+ """
145
+ )
146
+ self.client = QdrantClient(
147
+ path=new_storage_path,
148
+ )
149
+
150
+ # Note: Only create collection if a non-null collection name is provided.
151
+ # This is useful to delay creation of vecdb until we have a suitable
152
+ # collection name (e.g. we could get it from the url or folder path).
153
+ if config.collection_name is not None:
154
+ self.create_collection(
155
+ config.collection_name, replace=config.replace_collection
156
+ )
157
+
158
+ def clear_empty_collections(self) -> int:
159
+ coll_names = self.list_collections()
160
+ n_deletes = 0
161
+ for name in coll_names:
162
+ info = self.client.get_collection(collection_name=name)
163
+ if info.points_count == 0:
164
+ n_deletes += 1
165
+ self.client.delete_collection(collection_name=name)
166
+ return n_deletes
167
+
168
+ def clear_all_collections(self, really: bool = False, prefix: str = "") -> int:
169
+ """Clear all collections with the given prefix."""
170
+ if not really:
171
+ logger.warning("Not deleting all collections, set really=True to confirm")
172
+ return 0
173
+ coll_names = [
174
+ c for c in self.list_collections(empty=True) if c.startswith(prefix)
175
+ ]
176
+ if len(coll_names) == 0:
177
+ logger.warning(f"No collections found with prefix {prefix}")
178
+ return 0
179
+ n_empty_deletes = 0
180
+ n_non_empty_deletes = 0
181
+ for name in coll_names:
182
+ info = self.client.get_collection(collection_name=name)
183
+ points_count = from_optional(info.points_count, 0)
184
+
185
+ n_empty_deletes += points_count == 0
186
+ n_non_empty_deletes += points_count > 0
187
+ self.client.delete_collection(collection_name=name)
188
+ logger.warning(
189
+ f"""
190
+ Deleted {n_empty_deletes} empty collections and
191
+ {n_non_empty_deletes} non-empty collections.
192
+ """
193
+ )
194
+ return n_empty_deletes + n_non_empty_deletes
195
+
196
+ def list_collections(self, empty: bool = False) -> List[str]:
197
+ """
198
+ Returns:
199
+ List of collection names that have at least one vector.
200
+
201
+ Args:
202
+ empty (bool, optional): Whether to include empty collections.
203
+ """
204
+ colls = list(self.client.get_collections())[0][1]
205
+ if empty:
206
+ return [coll.name for coll in colls]
207
+ counts = []
208
+ for coll in colls:
209
+ try:
210
+ counts.append(
211
+ from_optional(
212
+ self.client.get_collection(
213
+ collection_name=coll.name
214
+ ).points_count,
215
+ 0,
216
+ )
217
+ )
218
+ except Exception:
219
+ logger.warning(f"Error getting collection {coll.name}")
220
+ counts.append(0)
221
+ return [coll.name for coll, count in zip(colls, counts) if (count or 0) > 0]
222
+
223
+ def create_collection(self, collection_name: str, replace: bool = False) -> None:
224
+ """
225
+ Create a collection with the given name, optionally replacing an existing
226
+ collection if `replace` is True.
227
+ Args:
228
+ collection_name (str): Name of the collection to create.
229
+ replace (bool): Whether to replace an existing collection
230
+ with the same name. Defaults to False.
231
+ """
232
+ self.config.collection_name = collection_name
233
+ if self.client.collection_exists(collection_name=collection_name):
234
+ coll = self.client.get_collection(collection_name=collection_name)
235
+ if (
236
+ coll.status == CollectionStatus.GREEN
237
+ and from_optional(coll.points_count, 0) > 0
238
+ ):
239
+ logger.warning(f"Non-empty Collection {collection_name} already exists")
240
+ if not replace:
241
+ logger.warning("Not replacing collection")
242
+ return
243
+ else:
244
+ logger.warning("Recreating fresh collection")
245
+ self.client.delete_collection(collection_name=collection_name)
246
+
247
+ vectors_config = {
248
+ "": VectorParams(
249
+ size=self.embedding_dim,
250
+ distance=Distance.COSINE,
251
+ )
252
+ }
253
+ sparse_vectors_config = None
254
+ if self.config.use_sparse_embeddings:
255
+ sparse_vectors_config = {
256
+ "text-sparse": SparseVectorParams(index=SparseIndexParams())
257
+ }
258
+ self.client.create_collection(
259
+ collection_name=collection_name,
260
+ vectors_config=vectors_config,
261
+ sparse_vectors_config=sparse_vectors_config,
262
+ )
263
+ collection_info = self.client.get_collection(collection_name=collection_name)
264
+ assert collection_info.status == CollectionStatus.GREEN
265
+ assert collection_info.vectors_count in [0, None]
266
+ if settings.debug:
267
+ level = logger.getEffectiveLevel()
268
+ logger.setLevel(logging.INFO)
269
+ logger.info(collection_info)
270
+ logger.setLevel(level)
271
+
272
+ def get_sparse_embeddings(self, inputs: List[str]) -> List[SparseVector]:
273
+ if not self.config.use_sparse_embeddings:
274
+ return []
275
+ import torch
276
+
277
+ tokens = self.sparse_tokenizer(
278
+ inputs, return_tensors="pt", truncation=True, padding=True
279
+ )
280
+ output = self.sparse_model(**tokens)
281
+ vectors = torch.max(
282
+ torch.log(torch.relu(output.logits) + torch.tensor(1.0))
283
+ * tokens.attention_mask.unsqueeze(-1),
284
+ dim=1,
285
+ )[0].squeeze(dim=1)
286
+ sparse_embeddings = []
287
+ for vec in vectors:
288
+ cols = vec.nonzero().squeeze().cpu().tolist()
289
+ weights = vec[cols].cpu().tolist()
290
+ sparse_embeddings.append(
291
+ SparseVector(
292
+ indices=cols,
293
+ values=weights,
294
+ )
295
+ )
296
+ return sparse_embeddings
297
+
298
+ def add_documents(self, documents: Sequence[Document]) -> None:
299
+ # Add id to metadata if not already present
300
+ super().maybe_add_ids(documents)
301
+ # Fix the ids due to qdrant finickiness
302
+ for doc in documents:
303
+ doc.metadata.id = str(self._to_int_or_uuid(doc.metadata.id))
304
+ colls = self.list_collections(empty=True)
305
+ if len(documents) == 0:
306
+ return
307
+ document_dicts = [doc.dict() for doc in documents]
308
+ embedding_vecs = self.embedding_fn([doc.content for doc in documents])
309
+ sparse_embedding_vecs = self.get_sparse_embeddings(
310
+ [doc.content for doc in documents]
311
+ )
312
+ if self.config.collection_name is None:
313
+ raise ValueError("No collection name set, cannot ingest docs")
314
+ if self.config.collection_name not in colls:
315
+ self.create_collection(self.config.collection_name, replace=True)
316
+ ids = [self._to_int_or_uuid(d.id()) for d in documents]
317
+ # don't insert all at once, batch in chunks of b,
318
+ # else we get an API error
319
+ b = self.config.batch_size
320
+ for i in range(0, len(ids), b):
321
+ vectors: Dict[str, Embeddings | List[SparseVector]] = {
322
+ "": embedding_vecs[i : i + b]
323
+ }
324
+ if self.config.use_sparse_embeddings:
325
+ vectors["text-sparse"] = sparse_embedding_vecs[i : i + b]
326
+ self.client.upsert(
327
+ collection_name=self.config.collection_name,
328
+ points=Batch(
329
+ ids=ids[i : i + b],
330
+ vectors=vectors,
331
+ payloads=document_dicts[i : i + b],
332
+ ),
333
+ )
334
+
335
+ def delete_collection(self, collection_name: str) -> None:
336
+ self.client.delete_collection(collection_name=collection_name)
337
+
338
+ def _to_int_or_uuid(self, id: str) -> int | str:
339
+ try:
340
+ int_val = int(id)
341
+ if is_valid_uuid(id):
342
+ return int_val
343
+ except ValueError:
344
+ pass
345
+
346
+ # If doc_id is already a valid UUID, return it as is
347
+ if isinstance(id, str) and is_valid_uuid(id):
348
+ return id
349
+
350
+ # Otherwise, generate a UUID from the doc_id
351
+ # Convert doc_id to string if it's not already
352
+ id_str = str(id)
353
+
354
+ # Hash the document ID using SHA-1
355
+ hash_object = hashlib.sha1(id_str.encode())
356
+ hash_digest = hash_object.hexdigest()
357
+
358
+ # Truncate or manipulate the hash to fit into a UUID (128 bits)
359
+ uuid_str = hash_digest[:32]
360
+
361
+ # Format this string into a UUID format
362
+ formatted_uuid = uuid.UUID(uuid_str)
363
+
364
+ return str(formatted_uuid)
365
+
366
+ def get_all_documents(self, where: str = "") -> List[Document]:
367
+ if self.config.collection_name is None:
368
+ raise ValueError("No collection name set, cannot retrieve docs")
369
+ docs = []
370
+ offset = 0
371
+ filter = Filter() if where == "" else Filter.parse_obj(json.loads(where))
372
+ while True:
373
+ results, next_page_offset = self.client.scroll(
374
+ collection_name=self.config.collection_name,
375
+ scroll_filter=filter,
376
+ offset=offset,
377
+ limit=10_000, # try getting all at once, if not we keep paging
378
+ with_payload=True,
379
+ with_vectors=False,
380
+ )
381
+ docs += [
382
+ self.config.document_class(**record.payload) # type: ignore
383
+ for record in results
384
+ ]
385
+ # ignore
386
+ if next_page_offset is None:
387
+ break
388
+ offset = next_page_offset # type: ignore
389
+ return docs
390
+
391
+ def get_documents_by_ids(self, ids: List[str]) -> List[Document]:
392
+ if self.config.collection_name is None:
393
+ raise ValueError("No collection name set, cannot retrieve docs")
394
+ _ids = [self._to_int_or_uuid(id) for id in ids]
395
+ records = self.client.retrieve(
396
+ collection_name=self.config.collection_name,
397
+ ids=_ids,
398
+ with_vectors=False,
399
+ with_payload=True,
400
+ )
401
+ # Note the records may NOT be in the order of the ids,
402
+ # so we re-order them here.
403
+ id2payload = {record.id: record.payload for record in records}
404
+ ordered_payloads = [id2payload[id] for id in _ids if id in id2payload]
405
+ docs = [Document(**payload) for payload in ordered_payloads] # type: ignore
406
+ return docs
407
+
408
+ def similar_texts_with_scores(
409
+ self,
410
+ text: str,
411
+ k: int = 1,
412
+ where: Optional[str] = None,
413
+ neighbors: int = 0,
414
+ ) -> List[Tuple[Document, float]]:
415
+ embedding = self.embedding_fn([text])[0]
416
+ # TODO filter may not work yet
417
+ if where is None or where == "":
418
+ filter = Filter()
419
+ else:
420
+ filter = Filter.parse_obj(json.loads(where))
421
+ requests = [
422
+ SearchRequest(
423
+ vector=NamedVector(
424
+ name="",
425
+ vector=embedding,
426
+ ),
427
+ limit=k,
428
+ with_payload=True,
429
+ filter=filter,
430
+ )
431
+ ]
432
+ if self.config.use_sparse_embeddings:
433
+ sparse_embedding = self.get_sparse_embeddings([text])[0]
434
+ requests.append(
435
+ SearchRequest(
436
+ vector=NamedSparseVector(
437
+ name="text-sparse",
438
+ vector=sparse_embedding,
439
+ ),
440
+ limit=self.config.sparse_limit,
441
+ with_payload=True,
442
+ filter=filter,
443
+ )
444
+ )
445
+ if self.config.collection_name is None:
446
+ raise ValueError("No collection name set, cannot search")
447
+ search_result_lists: List[List[ScoredPoint]] = self.client.search_batch(
448
+ collection_name=self.config.collection_name, requests=requests
449
+ )
450
+
451
+ search_result = [
452
+ match for result in search_result_lists for match in result
453
+ ] # 2D list -> 1D list
454
+ scores = [match.score for match in search_result if match is not None]
455
+ docs = [
456
+ self.config.document_class(**(match.payload)) # type: ignore
457
+ for match in search_result
458
+ if match is not None
459
+ ]
460
+ if len(docs) == 0:
461
+ logger.warning(f"No matches found for {text}")
462
+ return []
463
+ doc_score_pairs = list(zip(docs, scores))
464
+ max_score = max(ds[1] for ds in doc_score_pairs)
465
+ if settings.debug:
466
+ logger.info(f"Found {len(doc_score_pairs)} matches, max score: {max_score}")
467
+ self.show_if_debug(doc_score_pairs)
468
+ return doc_score_pairs
@@ -1,9 +1,10 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: langroid
3
- Version: 0.33.6
3
+ Version: 0.33.7
4
4
  Summary: Harness LLMs with Multi-Agent Programming
5
- Author-Email: Prasad Chalasani <pchalasani@gmail.com>
5
+ Author-email: Prasad Chalasani <pchalasani@gmail.com>
6
6
  License: MIT
7
+ License-File: LICENSE
7
8
  Requires-Python: <3.13,>=3.10
8
9
  Requires-Dist: adb-cloud-connector<2.0.0,>=1.0.2
9
10
  Requires-Dist: aiohttp<4.0.0,>=3.9.1
@@ -42,111 +43,111 @@ Requires-Dist: pyyaml<7.0.0,>=6.0.1
42
43
  Requires-Dist: qdrant-client<2.0.0,>=1.8.0
43
44
  Requires-Dist: rank-bm25<1.0.0,>=0.2.2
44
45
  Requires-Dist: redis<6.0.0,>=5.0.1
45
- Requires-Dist: requests<3.0.0,>=2.31.0
46
46
  Requires-Dist: requests-oauthlib<2.0.0,>=1.3.1
47
+ Requires-Dist: requests<3.0.0,>=2.31.0
47
48
  Requires-Dist: rich<14.0.0,>=13.3.4
48
49
  Requires-Dist: thefuzz<1.0.0,>=0.20.0
49
50
  Requires-Dist: tiktoken<1.0.0,>=0.7.0
50
51
  Requires-Dist: trafilatura<2.0.0,>=1.5.0
51
52
  Requires-Dist: typer<1.0.0,>=0.9.0
52
53
  Requires-Dist: wget<4.0,>=3.2
54
+ Provides-Extra: all
55
+ Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'all'
56
+ Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == 'all'
57
+ Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'all'
58
+ Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'all'
59
+ Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == 'all'
60
+ Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'all'
61
+ Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == 'all'
62
+ Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'all'
63
+ Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'all'
64
+ Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'all'
65
+ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'all'
66
+ Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'all'
67
+ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'all'
68
+ Requires-Dist: pypdf>=5.1.0; extra == 'all'
69
+ Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'all'
70
+ Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == 'all'
71
+ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'all'
72
+ Requires-Dist: python-socketio<6.0.0,>=5.11.0; extra == 'all'
73
+ Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'all'
74
+ Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'all'
75
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'all'
76
+ Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'all'
77
+ Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'all'
78
+ Provides-Extra: arango
79
+ Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == 'arango'
80
+ Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == 'arango'
81
+ Provides-Extra: chainlit
82
+ Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == 'chainlit'
83
+ Requires-Dist: python-socketio<6.0.0,>=5.11.0; extra == 'chainlit'
84
+ Provides-Extra: chromadb
85
+ Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'chromadb'
86
+ Provides-Extra: db
87
+ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'db'
88
+ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'db'
89
+ Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'db'
53
90
  Provides-Extra: doc-chat
54
- Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == "doc-chat"
55
- Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == "doc-chat"
56
- Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == "doc-chat"
57
- Requires-Dist: pypdf>=5.1.0; extra == "doc-chat"
58
- Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == "doc-chat"
59
- Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == "doc-chat"
60
- Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == "doc-chat"
91
+ Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'doc-chat'
92
+ Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'doc-chat'
93
+ Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'doc-chat'
94
+ Requires-Dist: pypdf>=5.1.0; extra == 'doc-chat'
95
+ Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'doc-chat'
96
+ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'doc-chat'
97
+ Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'doc-chat'
98
+ Provides-Extra: docx
99
+ Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == 'docx'
100
+ Provides-Extra: fastembed
101
+ Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == 'fastembed'
102
+ Provides-Extra: hf-embeddings
103
+ Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'hf-embeddings'
104
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'hf-embeddings'
61
105
  Provides-Extra: hf-transformers
62
- Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == "hf-transformers"
63
- Requires-Dist: torch<3.0.0,>=2.0.0; extra == "hf-transformers"
64
- Requires-Dist: transformers<5.0.0,>=4.40.1; extra == "hf-transformers"
65
- Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == "hf-transformers"
66
- Provides-Extra: vecdbs
67
- Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == "vecdbs"
68
- Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == "vecdbs"
69
- Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == "vecdbs"
70
- Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == "vecdbs"
71
- Provides-Extra: db
72
- Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == "db"
73
- Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == "db"
74
- Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == "db"
75
- Provides-Extra: all
76
- Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == "all"
77
- Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == "all"
78
- Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == "all"
79
- Requires-Dist: pypdf>=5.1.0; extra == "all"
80
- Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == "all"
81
- Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == "all"
82
- Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == "all"
83
- Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == "all"
84
- Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == "all"
85
- Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == "all"
86
- Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == "all"
87
- Requires-Dist: torch<3.0.0,>=2.0.0; extra == "all"
88
- Requires-Dist: transformers<5.0.0,>=4.40.1; extra == "all"
89
- Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == "all"
90
- Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == "all"
91
- Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == "all"
92
- Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == "all"
93
- Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == "all"
94
- Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == "all"
95
- Requires-Dist: litellm<2.0.0,>=1.30.1; extra == "all"
96
- Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == "all"
97
- Requires-Dist: python-socketio<6.0.0,>=5.11.0; extra == "all"
98
- Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == "all"
106
+ Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == 'hf-transformers'
107
+ Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == 'hf-transformers'
108
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'hf-transformers'
109
+ Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'hf-transformers'
99
110
  Provides-Extra: lancedb
100
- Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == "lancedb"
101
- Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == "lancedb"
102
- Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == "lancedb"
111
+ Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'lancedb'
112
+ Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'lancedb'
113
+ Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'lancedb'
114
+ Provides-Extra: litellm
115
+ Requires-Dist: litellm<2.0.0,>=1.30.1; extra == 'litellm'
116
+ Provides-Extra: meilisearch
117
+ Requires-Dist: meilisearch-python-sdk<3.0.0,>=2.2.3; extra == 'meilisearch'
118
+ Provides-Extra: metaphor
119
+ Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == 'metaphor'
120
+ Provides-Extra: momento
121
+ Requires-Dist: momento<1.21.0,>=1.10.2; extra == 'momento'
122
+ Provides-Extra: mysql
123
+ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'mysql'
124
+ Provides-Extra: neo4j
125
+ Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == 'neo4j'
103
126
  Provides-Extra: pdf-parsers
104
- Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == "pdf-parsers"
105
- Requires-Dist: pypdf>=5.1.0; extra == "pdf-parsers"
106
- Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == "pdf-parsers"
107
- Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == "pdf-parsers"
108
- Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == "pdf-parsers"
109
- Provides-Extra: docx
110
- Requires-Dist: python-docx<2.0.0,>=1.1.0; extra == "docx"
127
+ Requires-Dist: pdf2image<2.0.0,>=1.17.0; extra == 'pdf-parsers'
128
+ Requires-Dist: pdfplumber<0.11.0,>=0.10.2; extra == 'pdf-parsers'
129
+ Requires-Dist: pymupdf<2.0.0,>=1.23.3; extra == 'pdf-parsers'
130
+ Requires-Dist: pypdf>=5.1.0; extra == 'pdf-parsers'
131
+ Requires-Dist: pytesseract<0.4.0,>=0.3.10; extra == 'pdf-parsers'
132
+ Provides-Extra: postgres
133
+ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'postgres'
111
134
  Provides-Extra: scrapy
112
- Requires-Dist: scrapy<3.0.0,>=2.11.0; extra == "scrapy"
113
- Provides-Extra: hf-embeddings
114
- Requires-Dist: sentence-transformers<3.0.0,>=2.2.2; extra == "hf-embeddings"
115
- Requires-Dist: torch<3.0.0,>=2.0.0; extra == "hf-embeddings"
135
+ Requires-Dist: scrapy<3.0.0,>=2.11.0; extra == 'scrapy'
136
+ Provides-Extra: sql
137
+ Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == 'sql'
138
+ Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == 'sql'
139
+ Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == 'sql'
116
140
  Provides-Extra: transformers
117
- Requires-Dist: transformers<5.0.0,>=4.40.1; extra == "transformers"
118
- Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == "transformers"
119
- Requires-Dist: torch<3.0.0,>=2.0.0; extra == "transformers"
141
+ Requires-Dist: huggingface-hub<0.22.0,>=0.21.2; extra == 'transformers'
142
+ Requires-Dist: torch<3.0.0,>=2.0.0; extra == 'transformers'
143
+ Requires-Dist: transformers<5.0.0,>=4.40.1; extra == 'transformers'
120
144
  Provides-Extra: unstructured
121
- Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == "unstructured"
122
- Provides-Extra: postgres
123
- Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == "postgres"
124
- Provides-Extra: mysql
125
- Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == "mysql"
126
- Provides-Extra: sql
127
- Requires-Dist: sqlalchemy<3.0.0,>=2.0.19; extra == "sql"
128
- Requires-Dist: pymysql<2.0.0,>=1.1.0; extra == "sql"
129
- Requires-Dist: psycopg2<3.0.0,>=2.9.7; extra == "sql"
130
- Provides-Extra: litellm
131
- Requires-Dist: litellm<2.0.0,>=1.30.1; extra == "litellm"
132
- Provides-Extra: neo4j
133
- Requires-Dist: neo4j<6.0.0,>=5.14.1; extra == "neo4j"
134
- Provides-Extra: arango
135
- Requires-Dist: python-arango<9.0.0,>=8.1.2; extra == "arango"
136
- Requires-Dist: arango-datasets<2.0.0,>=1.2.2; extra == "arango"
137
- Provides-Extra: metaphor
138
- Requires-Dist: metaphor-python<0.2.0,>=0.1.23; extra == "metaphor"
139
- Provides-Extra: chainlit
140
- Requires-Dist: chainlit<3.0.0,>=2.0.1; extra == "chainlit"
141
- Requires-Dist: python-socketio<6.0.0,>=5.11.0; extra == "chainlit"
142
- Provides-Extra: chromadb
143
- Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == "chromadb"
144
- Provides-Extra: meilisearch
145
- Requires-Dist: meilisearch-python-sdk<3.0.0,>=2.2.3; extra == "meilisearch"
146
- Provides-Extra: momento
147
- Requires-Dist: momento<1.21.0,>=1.10.2; extra == "momento"
148
- Provides-Extra: fastembed
149
- Requires-Dist: fastembed<0.4.0,>=0.3.1; extra == "fastembed"
145
+ Requires-Dist: unstructured[docx,pdf,pptx]<0.10.18,>=0.10.16; extra == 'unstructured'
146
+ Provides-Extra: vecdbs
147
+ Requires-Dist: chromadb<=0.4.23,>=0.4.21; extra == 'vecdbs'
148
+ Requires-Dist: lancedb<0.9.0,>=0.8.2; extra == 'vecdbs'
149
+ Requires-Dist: pyarrow<16.0.0,>=15.0.0; extra == 'vecdbs'
150
+ Requires-Dist: tantivy<0.22.0,>=0.21.0; extra == 'vecdbs'
150
151
  Description-Content-Type: text/markdown
151
152
 
152
153
  <div align="center">