agno 2.3.11__py3-none-any.whl → 2.3.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/compression/manager.py +87 -16
- agno/db/mongo/async_mongo.py +1 -1
- agno/db/mongo/mongo.py +1 -1
- agno/exceptions.py +1 -0
- agno/knowledge/knowledge.py +83 -20
- agno/knowledge/reader/csv_reader.py +2 -2
- agno/knowledge/reader/text_reader.py +15 -3
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/memory/strategies/base.py +3 -4
- agno/models/anthropic/claude.py +44 -0
- agno/models/aws/bedrock.py +60 -0
- agno/models/base.py +124 -30
- agno/models/google/gemini.py +141 -23
- agno/models/litellm/chat.py +25 -0
- agno/models/openai/responses.py +44 -0
- agno/os/routers/knowledge/knowledge.py +0 -1
- agno/run/agent.py +17 -0
- agno/run/requirement.py +89 -6
- agno/utils/print_response/agent.py +4 -4
- agno/utils/print_response/team.py +12 -12
- agno/utils/tokens.py +643 -27
- agno/vectordb/chroma/chromadb.py +6 -2
- agno/vectordb/lancedb/lance_db.py +3 -37
- agno/vectordb/milvus/milvus.py +6 -32
- agno/vectordb/mongodb/mongodb.py +0 -27
- agno/vectordb/pgvector/pgvector.py +15 -5
- agno/vectordb/pineconedb/pineconedb.py +0 -17
- agno/vectordb/qdrant/qdrant.py +6 -29
- agno/vectordb/redis/redisdb.py +0 -26
- agno/vectordb/singlestore/singlestore.py +16 -8
- agno/vectordb/surrealdb/surrealdb.py +0 -36
- agno/vectordb/weaviate/weaviate.py +6 -2
- {agno-2.3.11.dist-info → agno-2.3.12.dist-info}/METADATA +4 -1
- {agno-2.3.11.dist-info → agno-2.3.12.dist-info}/RECORD +37 -37
- {agno-2.3.11.dist-info → agno-2.3.12.dist-info}/WHEEL +0 -0
- {agno-2.3.11.dist-info → agno-2.3.12.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.11.dist-info → agno-2.3.12.dist-info}/top_level.txt +0 -0
agno/vectordb/chroma/chromadb.py
CHANGED
|
@@ -276,7 +276,9 @@ class ChromaDb(VectorDb):
|
|
|
276
276
|
|
|
277
277
|
for document in documents:
|
|
278
278
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
279
|
-
|
|
279
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
280
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
281
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
280
282
|
|
|
281
283
|
# Handle metadata and filters
|
|
282
284
|
metadata = document.meta_data or {}
|
|
@@ -435,7 +437,9 @@ class ChromaDb(VectorDb):
|
|
|
435
437
|
|
|
436
438
|
for document in documents:
|
|
437
439
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
438
|
-
|
|
440
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
441
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
442
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
439
443
|
|
|
440
444
|
# Handle metadata and filters
|
|
441
445
|
metadata = document.meta_data or {}
|
|
@@ -260,39 +260,6 @@ class LanceDb(VectorDb):
|
|
|
260
260
|
tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
|
|
261
261
|
return tbl # type: ignore
|
|
262
262
|
|
|
263
|
-
def doc_exists(self, document: Document) -> bool:
|
|
264
|
-
"""
|
|
265
|
-
Validating if the document exists or not
|
|
266
|
-
|
|
267
|
-
Args:
|
|
268
|
-
document (Document): Document to validate
|
|
269
|
-
"""
|
|
270
|
-
try:
|
|
271
|
-
if self.table is not None:
|
|
272
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
273
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
274
|
-
result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
|
|
275
|
-
return len(result) > 0
|
|
276
|
-
except Exception:
|
|
277
|
-
# Search sometimes fails with stale cache data, it means the doc doesn't exist
|
|
278
|
-
return False
|
|
279
|
-
|
|
280
|
-
return False
|
|
281
|
-
|
|
282
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
283
|
-
"""
|
|
284
|
-
Asynchronously validate if the document exists
|
|
285
|
-
|
|
286
|
-
Args:
|
|
287
|
-
document (Document): Document to validate
|
|
288
|
-
|
|
289
|
-
Returns:
|
|
290
|
-
bool: True if document exists, False otherwise
|
|
291
|
-
"""
|
|
292
|
-
if self.connection:
|
|
293
|
-
self.table = self.connection.open_table(name=self.table_name)
|
|
294
|
-
return self.doc_exists(document)
|
|
295
|
-
|
|
296
263
|
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
297
264
|
"""
|
|
298
265
|
Insert documents into the database.
|
|
@@ -309,9 +276,6 @@ class LanceDb(VectorDb):
|
|
|
309
276
|
data = []
|
|
310
277
|
|
|
311
278
|
for document in documents:
|
|
312
|
-
if self.doc_exists(document):
|
|
313
|
-
continue
|
|
314
|
-
|
|
315
279
|
# Add filters to document metadata if provided
|
|
316
280
|
if filters:
|
|
317
281
|
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
@@ -320,7 +284,9 @@ class LanceDb(VectorDb):
|
|
|
320
284
|
|
|
321
285
|
document.embed(embedder=self.embedder)
|
|
322
286
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
323
|
-
|
|
287
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
288
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
289
|
+
doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
|
|
324
290
|
payload = {
|
|
325
291
|
"name": document.name,
|
|
326
292
|
"meta_data": document.meta_data,
|
agno/vectordb/milvus/milvus.py
CHANGED
|
@@ -229,7 +229,9 @@ class Milvus(VectorDb):
|
|
|
229
229
|
"""
|
|
230
230
|
|
|
231
231
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
232
|
-
|
|
232
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
233
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
234
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
233
235
|
|
|
234
236
|
# Convert dictionary fields to JSON strings
|
|
235
237
|
meta_data_str = json.dumps(document.meta_data) if document.meta_data else "{}"
|
|
@@ -317,36 +319,6 @@ class Milvus(VectorDb):
|
|
|
317
319
|
max_length=65_535,
|
|
318
320
|
)
|
|
319
321
|
|
|
320
|
-
def doc_exists(self, document: Document) -> bool:
|
|
321
|
-
"""
|
|
322
|
-
Validating if the document exists or not
|
|
323
|
-
|
|
324
|
-
Args:
|
|
325
|
-
document (Document): Document to validate
|
|
326
|
-
"""
|
|
327
|
-
if self.client:
|
|
328
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
329
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
330
|
-
collection_points = self.client.get(
|
|
331
|
-
collection_name=self.collection,
|
|
332
|
-
ids=[doc_id],
|
|
333
|
-
)
|
|
334
|
-
return len(collection_points) > 0
|
|
335
|
-
return False
|
|
336
|
-
|
|
337
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
338
|
-
"""
|
|
339
|
-
Check if document exists asynchronously.
|
|
340
|
-
AsyncMilvusClient supports get().
|
|
341
|
-
"""
|
|
342
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
343
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
344
|
-
collection_points = await self.async_client.get(
|
|
345
|
-
collection_name=self.collection,
|
|
346
|
-
ids=[doc_id],
|
|
347
|
-
)
|
|
348
|
-
return len(collection_points) > 0
|
|
349
|
-
|
|
350
322
|
def name_exists(self, name: str) -> bool:
|
|
351
323
|
"""
|
|
352
324
|
Validates if a document with the given name exists in the collection.
|
|
@@ -528,7 +500,9 @@ class Milvus(VectorDb):
|
|
|
528
500
|
log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
|
|
529
501
|
return None
|
|
530
502
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
531
|
-
|
|
503
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
504
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
505
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
532
506
|
|
|
533
507
|
meta_data = document.meta_data or {}
|
|
534
508
|
if filters:
|
agno/vectordb/mongodb/mongodb.py
CHANGED
|
@@ -471,20 +471,6 @@ class MongoDb(VectorDb):
|
|
|
471
471
|
if self.wait_until_index_ready_in_seconds:
|
|
472
472
|
await self._wait_for_index_ready_async()
|
|
473
473
|
|
|
474
|
-
def doc_exists(self, document: Document) -> bool:
|
|
475
|
-
"""Check if a document exists in the MongoDB collection based on its content."""
|
|
476
|
-
try:
|
|
477
|
-
collection = self._get_collection()
|
|
478
|
-
# Use content hash as document ID
|
|
479
|
-
doc_id = md5(document.content.encode("utf-8")).hexdigest()
|
|
480
|
-
result = collection.find_one({"_id": doc_id})
|
|
481
|
-
exists = result is not None
|
|
482
|
-
log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
|
|
483
|
-
return exists
|
|
484
|
-
except Exception as e:
|
|
485
|
-
logger.error(f"Error checking document existence: {e}")
|
|
486
|
-
return False
|
|
487
|
-
|
|
488
474
|
def name_exists(self, name: str) -> bool:
|
|
489
475
|
"""Check if a document with a given name exists in the collection."""
|
|
490
476
|
try:
|
|
@@ -1024,19 +1010,6 @@ class MongoDb(VectorDb):
|
|
|
1024
1010
|
logger.error(f"Error getting document count: {e}")
|
|
1025
1011
|
return 0
|
|
1026
1012
|
|
|
1027
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
1028
|
-
"""Check if a document exists asynchronously."""
|
|
1029
|
-
try:
|
|
1030
|
-
collection = await self._get_async_collection()
|
|
1031
|
-
doc_id = md5(document.content.encode("utf-8")).hexdigest()
|
|
1032
|
-
result = await collection.find_one({"_id": doc_id})
|
|
1033
|
-
exists = result is not None
|
|
1034
|
-
log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
|
|
1035
|
-
return exists
|
|
1036
|
-
except Exception as e:
|
|
1037
|
-
logger.error(f"Error checking document existence asynchronously: {e}")
|
|
1038
|
-
return False
|
|
1039
|
-
|
|
1040
1013
|
async def async_insert(
|
|
1041
1014
|
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
1042
1015
|
) -> None:
|
|
@@ -367,7 +367,10 @@ class PgVector(VectorDb):
|
|
|
367
367
|
for doc in batch_docs:
|
|
368
368
|
try:
|
|
369
369
|
cleaned_content = self._clean_content(doc.content)
|
|
370
|
-
|
|
370
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
371
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
372
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
373
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
371
374
|
|
|
372
375
|
meta_data = doc.meta_data or {}
|
|
373
376
|
if filters:
|
|
@@ -456,7 +459,9 @@ class PgVector(VectorDb):
|
|
|
456
459
|
batch_records_dict: Dict[str, Dict[str, Any]] = {} # Use dict to deduplicate by ID
|
|
457
460
|
for doc in batch_docs:
|
|
458
461
|
try:
|
|
459
|
-
|
|
462
|
+
record = self._get_document_record(doc, filters, content_hash)
|
|
463
|
+
# Use the generated record ID (which includes content_hash) for deduplication
|
|
464
|
+
batch_records_dict[record["id"]] = record
|
|
460
465
|
except Exception as e:
|
|
461
466
|
log_error(f"Error processing document '{doc.name}': {e}")
|
|
462
467
|
|
|
@@ -497,7 +502,10 @@ class PgVector(VectorDb):
|
|
|
497
502
|
) -> Dict[str, Any]:
|
|
498
503
|
doc.embed(embedder=self.embedder)
|
|
499
504
|
cleaned_content = self._clean_content(doc.content)
|
|
500
|
-
|
|
505
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
506
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
507
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
508
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
501
509
|
|
|
502
510
|
meta_data = doc.meta_data or {}
|
|
503
511
|
if filters:
|
|
@@ -630,7 +638,10 @@ class PgVector(VectorDb):
|
|
|
630
638
|
for idx, doc in enumerate(batch_docs):
|
|
631
639
|
try:
|
|
632
640
|
cleaned_content = self._clean_content(doc.content)
|
|
633
|
-
|
|
641
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
642
|
+
# This allows the same URL/content to be inserted with different descriptions
|
|
643
|
+
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
644
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
634
645
|
|
|
635
646
|
if (
|
|
636
647
|
doc.embedding is not None
|
|
@@ -698,7 +709,6 @@ class PgVector(VectorDb):
|
|
|
698
709
|
content_id (str): The ID of the document.
|
|
699
710
|
metadata (Dict[str, Any]): The metadata to update.
|
|
700
711
|
"""
|
|
701
|
-
print("metadata is: ", metadata)
|
|
702
712
|
try:
|
|
703
713
|
with self.Session() as sess:
|
|
704
714
|
# Merge JSONB for metadata, but replace filters entirely (absolute value)
|
|
@@ -217,23 +217,6 @@ class PineconeDb(VectorDb):
|
|
|
217
217
|
log_debug(f"Deleting index: {self.name}")
|
|
218
218
|
self.client.delete_index(name=self.name, timeout=self.timeout)
|
|
219
219
|
|
|
220
|
-
def doc_exists(self, document: Document) -> bool:
|
|
221
|
-
"""Check if a document exists in the index.
|
|
222
|
-
|
|
223
|
-
Args:
|
|
224
|
-
document (Document): The document to check.
|
|
225
|
-
|
|
226
|
-
Returns:
|
|
227
|
-
bool: True if the document exists, False otherwise.
|
|
228
|
-
|
|
229
|
-
"""
|
|
230
|
-
response = self.index.fetch(ids=[document.id], namespace=self.namespace)
|
|
231
|
-
return len(response.vectors) > 0
|
|
232
|
-
|
|
233
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
234
|
-
"""Check if a document exists in the index asynchronously."""
|
|
235
|
-
return await asyncio.to_thread(self.doc_exists, document)
|
|
236
|
-
|
|
237
220
|
def name_exists(self, name: str) -> bool:
|
|
238
221
|
"""Check if an index with the given name exists.
|
|
239
222
|
|
agno/vectordb/qdrant/qdrant.py
CHANGED
|
@@ -259,33 +259,6 @@ class Qdrant(VectorDb):
|
|
|
259
259
|
else None,
|
|
260
260
|
)
|
|
261
261
|
|
|
262
|
-
def doc_exists(self, document: Document) -> bool:
|
|
263
|
-
"""
|
|
264
|
-
Validating if the document exists or not
|
|
265
|
-
|
|
266
|
-
Args:
|
|
267
|
-
document (Document): Document to validate
|
|
268
|
-
"""
|
|
269
|
-
if self.client:
|
|
270
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
271
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
272
|
-
collection_points = self.client.retrieve(
|
|
273
|
-
collection_name=self.collection,
|
|
274
|
-
ids=[doc_id],
|
|
275
|
-
)
|
|
276
|
-
return len(collection_points) > 0
|
|
277
|
-
return False
|
|
278
|
-
|
|
279
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
280
|
-
"""Check if a document exists asynchronously."""
|
|
281
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
282
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
283
|
-
collection_points = await self.async_client.retrieve(
|
|
284
|
-
collection_name=self.collection,
|
|
285
|
-
ids=[doc_id],
|
|
286
|
-
)
|
|
287
|
-
return len(collection_points) > 0
|
|
288
|
-
|
|
289
262
|
def name_exists(self, name: str) -> bool:
|
|
290
263
|
"""
|
|
291
264
|
Validates if a document with the given name exists in the collection.
|
|
@@ -347,7 +320,9 @@ class Qdrant(VectorDb):
|
|
|
347
320
|
points = []
|
|
348
321
|
for document in documents:
|
|
349
322
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
350
|
-
|
|
323
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
324
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
325
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
351
326
|
|
|
352
327
|
# TODO(v2.0.0): Remove conditional vector naming logic
|
|
353
328
|
if self.use_named_vectors:
|
|
@@ -457,7 +432,9 @@ class Qdrant(VectorDb):
|
|
|
457
432
|
|
|
458
433
|
async def process_document(document):
|
|
459
434
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
460
|
-
|
|
435
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
436
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
437
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
461
438
|
|
|
462
439
|
if self.search_type == SearchType.vector:
|
|
463
440
|
# For vector search, maintain backward compatibility with unnamed vectors
|
agno/vectordb/redis/redisdb.py
CHANGED
|
@@ -184,32 +184,6 @@ class RedisDB(VectorDb):
|
|
|
184
184
|
log_error(f"Error creating Redis index: {e}")
|
|
185
185
|
raise
|
|
186
186
|
|
|
187
|
-
def doc_exists(self, document: Document) -> bool:
|
|
188
|
-
"""Check if a document exists in the index."""
|
|
189
|
-
try:
|
|
190
|
-
doc_id = document.id or hash_string_sha256(document.content)
|
|
191
|
-
return self.id_exists(doc_id)
|
|
192
|
-
except Exception as e:
|
|
193
|
-
log_error(f"Error checking if document exists: {e}")
|
|
194
|
-
return False
|
|
195
|
-
|
|
196
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
197
|
-
"""Async version of doc_exists method."""
|
|
198
|
-
try:
|
|
199
|
-
doc_id = document.id or hash_string_sha256(document.content)
|
|
200
|
-
async_index = await self._get_async_index()
|
|
201
|
-
id_filter = Tag("id") == doc_id
|
|
202
|
-
query = FilterQuery(
|
|
203
|
-
filter_expression=id_filter,
|
|
204
|
-
return_fields=["id"],
|
|
205
|
-
num_results=1,
|
|
206
|
-
)
|
|
207
|
-
results = await async_index.query(query)
|
|
208
|
-
return len(results) > 0
|
|
209
|
-
except Exception as e:
|
|
210
|
-
log_error(f"Error checking if document exists: {e}")
|
|
211
|
-
return False
|
|
212
|
-
|
|
213
187
|
def name_exists(self, name: str) -> bool:
|
|
214
188
|
"""Check if a document with the given name exists."""
|
|
215
189
|
try:
|
|
@@ -185,8 +185,10 @@ class SingleStore(VectorDb):
|
|
|
185
185
|
for document in documents:
|
|
186
186
|
document.embed(embedder=self.embedder)
|
|
187
187
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
189
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
190
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
191
|
+
_id = record_id
|
|
190
192
|
|
|
191
193
|
meta_data_json = json.dumps(document.meta_data)
|
|
192
194
|
usage_json = json.dumps(document.usage)
|
|
@@ -246,8 +248,10 @@ class SingleStore(VectorDb):
|
|
|
246
248
|
for document in documents:
|
|
247
249
|
document.embed(embedder=self.embedder)
|
|
248
250
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
249
|
-
|
|
250
|
-
|
|
251
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
252
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
253
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
254
|
+
_id = record_id
|
|
251
255
|
|
|
252
256
|
meta_data_json = json.dumps(document.meta_data)
|
|
253
257
|
usage_json = json.dumps(document.usage)
|
|
@@ -548,8 +552,10 @@ class SingleStore(VectorDb):
|
|
|
548
552
|
counter = 0
|
|
549
553
|
for document in documents:
|
|
550
554
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
551
|
-
|
|
552
|
-
|
|
555
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
556
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
557
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
558
|
+
_id = record_id
|
|
553
559
|
|
|
554
560
|
meta_data_json = json.dumps(document.meta_data)
|
|
555
561
|
usage_json = json.dumps(document.usage)
|
|
@@ -632,8 +638,10 @@ class SingleStore(VectorDb):
|
|
|
632
638
|
counter = 0
|
|
633
639
|
for document in documents:
|
|
634
640
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
635
|
-
|
|
636
|
-
|
|
641
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
642
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
643
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
644
|
+
_id = record_id
|
|
637
645
|
|
|
638
646
|
meta_data_json = json.dumps(document.meta_data)
|
|
639
647
|
usage_json = json.dumps(document.usage)
|
|
@@ -31,12 +31,6 @@ class SurrealDb(VectorDb):
|
|
|
31
31
|
DEFINE INDEX IF NOT EXISTS vector_idx ON {collection} FIELDS embedding HNSW DIMENSION {dimensions} DIST {distance};
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
|
-
DOC_EXISTS_QUERY: Final[str] = """
|
|
35
|
-
SELECT * FROM {collection}
|
|
36
|
-
WHERE content = $content
|
|
37
|
-
LIMIT 1
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
34
|
NAME_EXISTS_QUERY: Final[str] = """
|
|
41
35
|
SELECT * FROM {collection}
|
|
42
36
|
WHERE meta_data.name = $name
|
|
@@ -221,23 +215,6 @@ class SurrealDb(VectorDb):
|
|
|
221
215
|
)
|
|
222
216
|
self.client.query(query)
|
|
223
217
|
|
|
224
|
-
def doc_exists(self, document: Document) -> bool:
|
|
225
|
-
"""Check if a document exists by its content.
|
|
226
|
-
|
|
227
|
-
Args:
|
|
228
|
-
document: The document to check.
|
|
229
|
-
|
|
230
|
-
Returns:
|
|
231
|
-
True if the document exists, False otherwise.
|
|
232
|
-
|
|
233
|
-
"""
|
|
234
|
-
log_debug(f"Checking if document exists: {document.content}")
|
|
235
|
-
result = self.client.query(
|
|
236
|
-
self.DOC_EXISTS_QUERY.format(collection=self.collection),
|
|
237
|
-
{"content": document.content},
|
|
238
|
-
)
|
|
239
|
-
return bool(self._extract_result(result))
|
|
240
|
-
|
|
241
218
|
def name_exists(self, name: str) -> bool:
|
|
242
219
|
"""Check if a document exists by its name.
|
|
243
220
|
|
|
@@ -493,19 +470,6 @@ class SurrealDb(VectorDb):
|
|
|
493
470
|
),
|
|
494
471
|
)
|
|
495
472
|
|
|
496
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
497
|
-
"""Check if a document exists by its content asynchronously.
|
|
498
|
-
|
|
499
|
-
Returns:
|
|
500
|
-
True if the document exists, False otherwise.
|
|
501
|
-
|
|
502
|
-
"""
|
|
503
|
-
response = await self.async_client.query(
|
|
504
|
-
self.DOC_EXISTS_QUERY.format(collection=self.collection),
|
|
505
|
-
{"content": document.content},
|
|
506
|
-
)
|
|
507
|
-
return bool(self._extract_result(response))
|
|
508
|
-
|
|
509
473
|
async def async_name_exists(self, name: str) -> bool:
|
|
510
474
|
"""Check if a document exists by its name asynchronously.
|
|
511
475
|
|
|
@@ -247,7 +247,9 @@ class Weaviate(VectorDb):
|
|
|
247
247
|
continue
|
|
248
248
|
|
|
249
249
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
250
|
-
|
|
250
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
251
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
252
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
251
253
|
doc_uuid = uuid.UUID(hex=record_id[:32])
|
|
252
254
|
|
|
253
255
|
# Merge filters with metadata
|
|
@@ -338,7 +340,9 @@ class Weaviate(VectorDb):
|
|
|
338
340
|
|
|
339
341
|
# Clean content and generate UUID
|
|
340
342
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
341
|
-
|
|
343
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
344
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
345
|
+
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
342
346
|
doc_uuid = uuid.UUID(hex=record_id[:32])
|
|
343
347
|
|
|
344
348
|
# Serialize meta_data to JSON string
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: agno
|
|
3
|
-
Version: 2.3.
|
|
3
|
+
Version: 2.3.12
|
|
4
4
|
Summary: Agno: a lightweight library for building Multi-Agent Systems
|
|
5
5
|
Author-email: Ashpreet Bedi <ashpreet@agno.com>
|
|
6
6
|
Project-URL: homepage, https://agno.com
|
|
@@ -102,6 +102,9 @@ Provides-Extra: openai
|
|
|
102
102
|
Requires-Dist: openai; extra == "openai"
|
|
103
103
|
Provides-Extra: portkey
|
|
104
104
|
Requires-Dist: portkey-ai; extra == "portkey"
|
|
105
|
+
Provides-Extra: tokenizers
|
|
106
|
+
Requires-Dist: tiktoken; extra == "tokenizers"
|
|
107
|
+
Requires-Dist: tokenizers; extra == "tokenizers"
|
|
105
108
|
Provides-Extra: agentql
|
|
106
109
|
Requires-Dist: agentql; extra == "agentql"
|
|
107
110
|
Provides-Extra: apify
|