langroid 0.1.139__py3-none-any.whl → 0.1.219__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langroid/__init__.py +70 -0
- langroid/agent/__init__.py +22 -0
- langroid/agent/base.py +120 -33
- langroid/agent/batch.py +134 -35
- langroid/agent/callbacks/__init__.py +0 -0
- langroid/agent/callbacks/chainlit.py +608 -0
- langroid/agent/chat_agent.py +164 -100
- langroid/agent/chat_document.py +19 -2
- langroid/agent/openai_assistant.py +20 -10
- langroid/agent/special/__init__.py +33 -10
- langroid/agent/special/doc_chat_agent.py +521 -108
- langroid/agent/special/lance_doc_chat_agent.py +258 -0
- langroid/agent/special/lance_rag/__init__.py +9 -0
- langroid/agent/special/lance_rag/critic_agent.py +136 -0
- langroid/agent/special/lance_rag/lance_rag_task.py +80 -0
- langroid/agent/special/lance_rag/query_planner_agent.py +180 -0
- langroid/agent/special/lance_tools.py +44 -0
- langroid/agent/special/neo4j/__init__.py +0 -0
- langroid/agent/special/neo4j/csv_kg_chat.py +174 -0
- langroid/agent/special/neo4j/neo4j_chat_agent.py +370 -0
- langroid/agent/special/neo4j/utils/__init__.py +0 -0
- langroid/agent/special/neo4j/utils/system_message.py +46 -0
- langroid/agent/special/relevance_extractor_agent.py +23 -7
- langroid/agent/special/retriever_agent.py +29 -174
- langroid/agent/special/sql/__init__.py +7 -0
- langroid/agent/special/sql/sql_chat_agent.py +47 -23
- langroid/agent/special/sql/utils/__init__.py +11 -0
- langroid/agent/special/sql/utils/description_extractors.py +95 -46
- langroid/agent/special/sql/utils/populate_metadata.py +28 -21
- langroid/agent/special/table_chat_agent.py +43 -9
- langroid/agent/task.py +423 -114
- langroid/agent/tool_message.py +67 -10
- langroid/agent/tools/__init__.py +8 -0
- langroid/agent/tools/duckduckgo_search_tool.py +66 -0
- langroid/agent/tools/google_search_tool.py +11 -0
- langroid/agent/tools/metaphor_search_tool.py +67 -0
- langroid/agent/tools/recipient_tool.py +6 -24
- langroid/agent/tools/sciphi_search_rag_tool.py +79 -0
- langroid/cachedb/__init__.py +6 -0
- langroid/embedding_models/__init__.py +24 -0
- langroid/embedding_models/base.py +9 -1
- langroid/embedding_models/models.py +117 -17
- langroid/embedding_models/protoc/embeddings.proto +19 -0
- langroid/embedding_models/protoc/embeddings_pb2.py +33 -0
- langroid/embedding_models/protoc/embeddings_pb2.pyi +50 -0
- langroid/embedding_models/protoc/embeddings_pb2_grpc.py +79 -0
- langroid/embedding_models/remote_embeds.py +153 -0
- langroid/language_models/__init__.py +22 -0
- langroid/language_models/azure_openai.py +47 -4
- langroid/language_models/base.py +26 -10
- langroid/language_models/config.py +5 -0
- langroid/language_models/openai_gpt.py +407 -121
- langroid/language_models/prompt_formatter/__init__.py +9 -0
- langroid/language_models/prompt_formatter/base.py +4 -6
- langroid/language_models/prompt_formatter/hf_formatter.py +135 -0
- langroid/language_models/utils.py +10 -9
- langroid/mytypes.py +10 -4
- langroid/parsing/__init__.py +33 -1
- langroid/parsing/document_parser.py +259 -63
- langroid/parsing/image_text.py +32 -0
- langroid/parsing/parse_json.py +143 -0
- langroid/parsing/parser.py +20 -7
- langroid/parsing/repo_loader.py +108 -46
- langroid/parsing/search.py +8 -0
- langroid/parsing/table_loader.py +44 -0
- langroid/parsing/url_loader.py +59 -13
- langroid/parsing/urls.py +18 -9
- langroid/parsing/utils.py +130 -9
- langroid/parsing/web_search.py +73 -0
- langroid/prompts/__init__.py +7 -0
- langroid/prompts/chat-gpt4-system-prompt.md +68 -0
- langroid/prompts/prompts_config.py +1 -1
- langroid/utils/__init__.py +10 -0
- langroid/utils/algorithms/__init__.py +3 -0
- langroid/utils/configuration.py +0 -1
- langroid/utils/constants.py +4 -0
- langroid/utils/logging.py +2 -5
- langroid/utils/output/__init__.py +15 -2
- langroid/utils/output/status.py +33 -0
- langroid/utils/pandas_utils.py +30 -0
- langroid/utils/pydantic_utils.py +446 -4
- langroid/utils/system.py +36 -1
- langroid/vector_store/__init__.py +34 -2
- langroid/vector_store/base.py +33 -2
- langroid/vector_store/chromadb.py +42 -13
- langroid/vector_store/lancedb.py +226 -60
- langroid/vector_store/meilisearch.py +7 -6
- langroid/vector_store/momento.py +3 -2
- langroid/vector_store/qdrantdb.py +82 -11
- {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/METADATA +190 -129
- langroid-0.1.219.dist-info/RECORD +127 -0
- langroid/agent/special/recipient_validator_agent.py +0 -157
- langroid/parsing/json.py +0 -64
- langroid/utils/web/selenium_login.py +0 -36
- langroid-0.1.139.dist-info/RECORD +0 -103
- {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/LICENSE +0 -0
- {langroid-0.1.139.dist-info → langroid-0.1.219.dist-info}/WHEEL +0 -0
@@ -1,6 +1,9 @@
|
|
1
|
+
import hashlib
|
2
|
+
import json
|
1
3
|
import logging
|
2
4
|
import os
|
3
|
-
|
5
|
+
import uuid
|
6
|
+
from typing import List, Optional, Sequence, Tuple, TypeVar
|
4
7
|
|
5
8
|
from dotenv import load_dotenv
|
6
9
|
from qdrant_client import QdrantClient
|
@@ -26,6 +29,33 @@ from langroid.vector_store.base import VectorStore, VectorStoreConfig
|
|
26
29
|
logger = logging.getLogger(__name__)
|
27
30
|
|
28
31
|
|
32
|
+
T = TypeVar("T")
|
33
|
+
|
34
|
+
|
35
|
+
def from_optional(x: Optional[T], default: T) -> T:
|
36
|
+
if x is None:
|
37
|
+
return default
|
38
|
+
|
39
|
+
return x
|
40
|
+
|
41
|
+
|
42
|
+
def is_valid_uuid(uuid_to_test: str) -> bool:
|
43
|
+
"""
|
44
|
+
Check if a given string is a valid UUID.
|
45
|
+
"""
|
46
|
+
try:
|
47
|
+
uuid_obj = uuid.UUID(uuid_to_test)
|
48
|
+
return str(uuid_obj) == uuid_to_test
|
49
|
+
except Exception:
|
50
|
+
pass
|
51
|
+
# Check for valid unsigned 64-bit integer
|
52
|
+
try:
|
53
|
+
int_value = int(uuid_to_test)
|
54
|
+
return 0 <= int_value <= 18446744073709551615
|
55
|
+
except ValueError:
|
56
|
+
return False
|
57
|
+
|
58
|
+
|
29
59
|
class QdrantDBConfig(VectorStoreConfig):
|
30
60
|
cloud: bool = True
|
31
61
|
collection_name: str | None = "temp"
|
@@ -35,7 +65,7 @@ class QdrantDBConfig(VectorStoreConfig):
|
|
35
65
|
|
36
66
|
|
37
67
|
class QdrantDB(VectorStore):
|
38
|
-
def __init__(self, config: QdrantDBConfig):
|
68
|
+
def __init__(self, config: QdrantDBConfig = QdrantDBConfig()):
|
39
69
|
super().__init__(config)
|
40
70
|
self.config = config
|
41
71
|
emb_model = EmbeddingModel.create(config.embedding)
|
@@ -112,8 +142,10 @@ class QdrantDB(VectorStore):
|
|
112
142
|
n_non_empty_deletes = 0
|
113
143
|
for name in coll_names:
|
114
144
|
info = self.client.get_collection(collection_name=name)
|
115
|
-
|
116
|
-
|
145
|
+
points_count = from_optional(info.points_count, 0)
|
146
|
+
|
147
|
+
n_empty_deletes += points_count == 0
|
148
|
+
n_non_empty_deletes += points_count > 0
|
117
149
|
self.client.delete_collection(collection_name=name)
|
118
150
|
logger.warning(
|
119
151
|
f"""
|
@@ -138,7 +170,12 @@ class QdrantDB(VectorStore):
|
|
138
170
|
for coll in colls:
|
139
171
|
try:
|
140
172
|
counts.append(
|
141
|
-
|
173
|
+
from_optional(
|
174
|
+
self.client.get_collection(
|
175
|
+
collection_name=coll.name
|
176
|
+
).points_count,
|
177
|
+
0,
|
178
|
+
)
|
142
179
|
)
|
143
180
|
except Exception:
|
144
181
|
logger.warning(f"Error getting collection {coll.name}")
|
@@ -158,7 +195,10 @@ class QdrantDB(VectorStore):
|
|
158
195
|
collections = self.list_collections()
|
159
196
|
if collection_name in collections:
|
160
197
|
coll = self.client.get_collection(collection_name=collection_name)
|
161
|
-
if
|
198
|
+
if (
|
199
|
+
coll.status == CollectionStatus.GREEN
|
200
|
+
and from_optional(coll.points_count, 0) > 0
|
201
|
+
):
|
162
202
|
logger.warning(f"Non-empty Collection {collection_name} already exists")
|
163
203
|
if not replace:
|
164
204
|
logger.warning("Not replacing collection")
|
@@ -182,10 +222,15 @@ class QdrantDB(VectorStore):
|
|
182
222
|
logger.setLevel(level)
|
183
223
|
|
184
224
|
def add_documents(self, documents: Sequence[Document]) -> None:
|
225
|
+
# Add id to metadata if not already present
|
185
226
|
super().maybe_add_ids(documents)
|
227
|
+
# Fix the ids due to qdrant finickiness
|
228
|
+
for doc in documents:
|
229
|
+
doc.metadata.id = str(self._to_int_or_uuid(doc.metadata.id))
|
186
230
|
colls = self.list_collections(empty=True)
|
187
231
|
if len(documents) == 0:
|
188
232
|
return
|
233
|
+
document_dicts = [doc.dict() for doc in documents]
|
189
234
|
embedding_vecs = self.embedding_fn([doc.content for doc in documents])
|
190
235
|
if self.config.collection_name is None:
|
191
236
|
raise ValueError("No collection name set, cannot ingest docs")
|
@@ -201,7 +246,7 @@ class QdrantDB(VectorStore):
|
|
201
246
|
points=Batch(
|
202
247
|
ids=ids[i : i + b],
|
203
248
|
vectors=embedding_vecs[i : i + b],
|
204
|
-
payloads=
|
249
|
+
payloads=document_dicts[i : i + b],
|
205
250
|
),
|
206
251
|
)
|
207
252
|
|
@@ -210,19 +255,42 @@ class QdrantDB(VectorStore):
|
|
210
255
|
|
211
256
|
def _to_int_or_uuid(self, id: str) -> int | str:
|
212
257
|
try:
|
213
|
-
|
258
|
+
int_val = int(id)
|
259
|
+
if is_valid_uuid(id):
|
260
|
+
return int_val
|
214
261
|
except ValueError:
|
262
|
+
pass
|
263
|
+
|
264
|
+
# If doc_id is already a valid UUID, return it as is
|
265
|
+
if isinstance(id, str) and is_valid_uuid(id):
|
215
266
|
return id
|
216
267
|
|
217
|
-
|
268
|
+
# Otherwise, generate a UUID from the doc_id
|
269
|
+
# Convert doc_id to string if it's not already
|
270
|
+
id_str = str(id)
|
271
|
+
|
272
|
+
# Hash the document ID using SHA-1
|
273
|
+
hash_object = hashlib.sha1(id_str.encode())
|
274
|
+
hash_digest = hash_object.hexdigest()
|
275
|
+
|
276
|
+
# Truncate or manipulate the hash to fit into a UUID (128 bits)
|
277
|
+
uuid_str = hash_digest[:32]
|
278
|
+
|
279
|
+
# Format this string into a UUID format
|
280
|
+
formatted_uuid = uuid.UUID(uuid_str)
|
281
|
+
|
282
|
+
return str(formatted_uuid)
|
283
|
+
|
284
|
+
def get_all_documents(self, where: str = "") -> List[Document]:
|
218
285
|
if self.config.collection_name is None:
|
219
286
|
raise ValueError("No collection name set, cannot retrieve docs")
|
220
287
|
docs = []
|
221
288
|
offset = 0
|
289
|
+
filter = Filter() if where == "" else Filter.parse_obj(json.loads(where))
|
222
290
|
while True:
|
223
291
|
results, next_page_offset = self.client.scroll(
|
224
292
|
collection_name=self.config.collection_name,
|
225
|
-
scroll_filter=
|
293
|
+
scroll_filter=filter,
|
226
294
|
offset=offset,
|
227
295
|
limit=10_000, # try getting all at once, if not we keep paging
|
228
296
|
with_payload=True,
|
@@ -260,7 +328,10 @@ class QdrantDB(VectorStore):
|
|
260
328
|
) -> List[Tuple[Document, float]]:
|
261
329
|
embedding = self.embedding_fn([text])[0]
|
262
330
|
# TODO filter may not work yet
|
263
|
-
|
331
|
+
if where is None or where == "":
|
332
|
+
filter = Filter()
|
333
|
+
else:
|
334
|
+
filter = Filter.parse_obj(json.loads(where))
|
264
335
|
if self.config.collection_name is None:
|
265
336
|
raise ValueError("No collection name set, cannot search")
|
266
337
|
search_result: List[ScoredPoint] = self.client.search(
|