agno 2.3.10__py3-none-any.whl → 2.3.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. agno/compression/manager.py +87 -16
  2. agno/db/base.py +5 -5
  3. agno/db/dynamo/dynamo.py +2 -2
  4. agno/db/firestore/firestore.py +2 -2
  5. agno/db/gcs_json/gcs_json_db.py +2 -2
  6. agno/db/in_memory/in_memory_db.py +2 -2
  7. agno/db/json/json_db.py +2 -2
  8. agno/db/mongo/async_mongo.py +170 -68
  9. agno/db/mongo/mongo.py +170 -76
  10. agno/db/mysql/async_mysql.py +93 -69
  11. agno/db/mysql/mysql.py +93 -68
  12. agno/db/postgres/async_postgres.py +104 -78
  13. agno/db/postgres/postgres.py +97 -69
  14. agno/db/redis/redis.py +2 -2
  15. agno/db/singlestore/singlestore.py +91 -66
  16. agno/db/sqlite/async_sqlite.py +101 -78
  17. agno/db/sqlite/sqlite.py +97 -69
  18. agno/db/surrealdb/surrealdb.py +2 -2
  19. agno/exceptions.py +1 -0
  20. agno/knowledge/chunking/fixed.py +4 -1
  21. agno/knowledge/knowledge.py +105 -24
  22. agno/knowledge/reader/csv_reader.py +2 -2
  23. agno/knowledge/reader/text_reader.py +15 -3
  24. agno/knowledge/reader/wikipedia_reader.py +33 -1
  25. agno/knowledge/utils.py +52 -7
  26. agno/memory/strategies/base.py +3 -4
  27. agno/models/anthropic/claude.py +44 -0
  28. agno/models/aws/bedrock.py +60 -0
  29. agno/models/base.py +124 -30
  30. agno/models/google/gemini.py +141 -23
  31. agno/models/litellm/chat.py +25 -0
  32. agno/models/openai/chat.py +21 -0
  33. agno/models/openai/responses.py +44 -0
  34. agno/os/routers/knowledge/knowledge.py +20 -9
  35. agno/run/agent.py +17 -0
  36. agno/run/requirement.py +89 -6
  37. agno/tracing/exporter.py +2 -2
  38. agno/utils/print_response/agent.py +4 -4
  39. agno/utils/print_response/team.py +12 -12
  40. agno/utils/tokens.py +643 -27
  41. agno/vectordb/base.py +15 -2
  42. agno/vectordb/chroma/chromadb.py +6 -2
  43. agno/vectordb/lancedb/lance_db.py +3 -37
  44. agno/vectordb/milvus/milvus.py +6 -32
  45. agno/vectordb/mongodb/mongodb.py +0 -27
  46. agno/vectordb/pgvector/pgvector.py +21 -11
  47. agno/vectordb/pineconedb/pineconedb.py +0 -17
  48. agno/vectordb/qdrant/qdrant.py +6 -29
  49. agno/vectordb/redis/redisdb.py +0 -26
  50. agno/vectordb/singlestore/singlestore.py +16 -8
  51. agno/vectordb/surrealdb/surrealdb.py +0 -36
  52. agno/vectordb/weaviate/weaviate.py +6 -2
  53. {agno-2.3.10.dist-info → agno-2.3.12.dist-info}/METADATA +4 -1
  54. {agno-2.3.10.dist-info → agno-2.3.12.dist-info}/RECORD +57 -57
  55. {agno-2.3.10.dist-info → agno-2.3.12.dist-info}/WHEEL +0 -0
  56. {agno-2.3.10.dist-info → agno-2.3.12.dist-info}/licenses/LICENSE +0 -0
  57. {agno-2.3.10.dist-info → agno-2.3.12.dist-info}/top_level.txt +0 -0
agno/vectordb/base.py CHANGED
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
2
2
  from typing import Any, Dict, List, Optional
3
3
 
4
4
  from agno.knowledge.document import Document
5
+ from agno.utils.log import log_warning
5
6
  from agno.utils.string import generate_id
6
7
 
7
8
 
@@ -114,9 +115,21 @@ class VectorDb(ABC):
114
115
  def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
115
116
  raise NotImplementedError
116
117
 
117
- @abstractmethod
118
118
  def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
119
- raise NotImplementedError
119
+ """
120
+ Update the metadata for documents with the given content_id.
121
+
122
+ Default implementation logs a warning. Subclasses should override this method
123
+ to provide their specific implementation.
124
+
125
+ Args:
126
+ content_id (str): The content ID to update
127
+ metadata (Dict[str, Any]): The metadata to update
128
+ """
129
+ log_warning(
130
+ f"{self.__class__.__name__}.update_metadata() is not implemented. "
131
+ f"Metadata update for content_id '{content_id}' was skipped."
132
+ )
120
133
 
121
134
  @abstractmethod
122
135
  def delete_by_content_id(self, content_id: str) -> bool:
@@ -276,7 +276,9 @@ class ChromaDb(VectorDb):
276
276
 
277
277
  for document in documents:
278
278
  cleaned_content = document.content.replace("\x00", "\ufffd")
279
- doc_id = md5(cleaned_content.encode()).hexdigest()
279
+ # Include content_hash in ID to ensure uniqueness across different content hashes
280
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
281
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
280
282
 
281
283
  # Handle metadata and filters
282
284
  metadata = document.meta_data or {}
@@ -435,7 +437,9 @@ class ChromaDb(VectorDb):
435
437
 
436
438
  for document in documents:
437
439
  cleaned_content = document.content.replace("\x00", "\ufffd")
438
- doc_id = md5(cleaned_content.encode()).hexdigest()
440
+ # Include content_hash in ID to ensure uniqueness across different content hashes
441
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
442
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
439
443
 
440
444
  # Handle metadata and filters
441
445
  metadata = document.meta_data or {}
@@ -260,39 +260,6 @@ class LanceDb(VectorDb):
260
260
  tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
261
261
  return tbl # type: ignore
262
262
 
263
- def doc_exists(self, document: Document) -> bool:
264
- """
265
- Validating if the document exists or not
266
-
267
- Args:
268
- document (Document): Document to validate
269
- """
270
- try:
271
- if self.table is not None:
272
- cleaned_content = document.content.replace("\x00", "\ufffd")
273
- doc_id = md5(cleaned_content.encode()).hexdigest()
274
- result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
275
- return len(result) > 0
276
- except Exception:
277
- # Search sometimes fails with stale cache data, it means the doc doesn't exist
278
- return False
279
-
280
- return False
281
-
282
- async def async_doc_exists(self, document: Document) -> bool:
283
- """
284
- Asynchronously validate if the document exists
285
-
286
- Args:
287
- document (Document): Document to validate
288
-
289
- Returns:
290
- bool: True if document exists, False otherwise
291
- """
292
- if self.connection:
293
- self.table = self.connection.open_table(name=self.table_name)
294
- return self.doc_exists(document)
295
-
296
263
  def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
297
264
  """
298
265
  Insert documents into the database.
@@ -309,9 +276,6 @@ class LanceDb(VectorDb):
309
276
  data = []
310
277
 
311
278
  for document in documents:
312
- if self.doc_exists(document):
313
- continue
314
-
315
279
  # Add filters to document metadata if provided
316
280
  if filters:
317
281
  meta_data = document.meta_data.copy() if document.meta_data else {}
@@ -320,7 +284,9 @@ class LanceDb(VectorDb):
320
284
 
321
285
  document.embed(embedder=self.embedder)
322
286
  cleaned_content = document.content.replace("\x00", "\ufffd")
323
- doc_id = str(md5(cleaned_content.encode()).hexdigest())
287
+ # Include content_hash in ID to ensure uniqueness across different content hashes
288
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
289
+ doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
324
290
  payload = {
325
291
  "name": document.name,
326
292
  "meta_data": document.meta_data,
@@ -229,7 +229,9 @@ class Milvus(VectorDb):
229
229
  """
230
230
 
231
231
  cleaned_content = document.content.replace("\x00", "\ufffd")
232
- doc_id = md5(cleaned_content.encode()).hexdigest()
232
+ # Include content_hash in ID to ensure uniqueness across different content hashes
233
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
234
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
233
235
 
234
236
  # Convert dictionary fields to JSON strings
235
237
  meta_data_str = json.dumps(document.meta_data) if document.meta_data else "{}"
@@ -317,36 +319,6 @@ class Milvus(VectorDb):
317
319
  max_length=65_535,
318
320
  )
319
321
 
320
- def doc_exists(self, document: Document) -> bool:
321
- """
322
- Validating if the document exists or not
323
-
324
- Args:
325
- document (Document): Document to validate
326
- """
327
- if self.client:
328
- cleaned_content = document.content.replace("\x00", "\ufffd")
329
- doc_id = md5(cleaned_content.encode()).hexdigest()
330
- collection_points = self.client.get(
331
- collection_name=self.collection,
332
- ids=[doc_id],
333
- )
334
- return len(collection_points) > 0
335
- return False
336
-
337
- async def async_doc_exists(self, document: Document) -> bool:
338
- """
339
- Check if document exists asynchronously.
340
- AsyncMilvusClient supports get().
341
- """
342
- cleaned_content = document.content.replace("\x00", "\ufffd")
343
- doc_id = md5(cleaned_content.encode()).hexdigest()
344
- collection_points = await self.async_client.get(
345
- collection_name=self.collection,
346
- ids=[doc_id],
347
- )
348
- return len(collection_points) > 0
349
-
350
322
  def name_exists(self, name: str) -> bool:
351
323
  """
352
324
  Validates if a document with the given name exists in the collection.
@@ -528,7 +500,9 @@ class Milvus(VectorDb):
528
500
  log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
529
501
  return None
530
502
  cleaned_content = document.content.replace("\x00", "\ufffd")
531
- doc_id = md5(cleaned_content.encode()).hexdigest()
503
+ # Include content_hash in ID to ensure uniqueness across different content hashes
504
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
505
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
532
506
 
533
507
  meta_data = document.meta_data or {}
534
508
  if filters:
@@ -471,20 +471,6 @@ class MongoDb(VectorDb):
471
471
  if self.wait_until_index_ready_in_seconds:
472
472
  await self._wait_for_index_ready_async()
473
473
 
474
- def doc_exists(self, document: Document) -> bool:
475
- """Check if a document exists in the MongoDB collection based on its content."""
476
- try:
477
- collection = self._get_collection()
478
- # Use content hash as document ID
479
- doc_id = md5(document.content.encode("utf-8")).hexdigest()
480
- result = collection.find_one({"_id": doc_id})
481
- exists = result is not None
482
- log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
483
- return exists
484
- except Exception as e:
485
- logger.error(f"Error checking document existence: {e}")
486
- return False
487
-
488
474
  def name_exists(self, name: str) -> bool:
489
475
  """Check if a document with a given name exists in the collection."""
490
476
  try:
@@ -1024,19 +1010,6 @@ class MongoDb(VectorDb):
1024
1010
  logger.error(f"Error getting document count: {e}")
1025
1011
  return 0
1026
1012
 
1027
- async def async_doc_exists(self, document: Document) -> bool:
1028
- """Check if a document exists asynchronously."""
1029
- try:
1030
- collection = await self._get_async_collection()
1031
- doc_id = md5(document.content.encode("utf-8")).hexdigest()
1032
- result = await collection.find_one({"_id": doc_id})
1033
- exists = result is not None
1034
- log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
1035
- return exists
1036
- except Exception as e:
1037
- logger.error(f"Error checking document existence asynchronously: {e}")
1038
- return False
1039
-
1040
1013
  async def async_insert(
1041
1014
  self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
1042
1015
  ) -> None:
@@ -367,7 +367,10 @@ class PgVector(VectorDb):
367
367
  for doc in batch_docs:
368
368
  try:
369
369
  cleaned_content = self._clean_content(doc.content)
370
- record_id = doc.id or content_hash
370
+ # Include content_hash in ID to ensure uniqueness across different content hashes
371
+ # This allows the same URL/content to be inserted with different descriptions
372
+ base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
373
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
371
374
 
372
375
  meta_data = doc.meta_data or {}
373
376
  if filters:
@@ -456,7 +459,9 @@ class PgVector(VectorDb):
456
459
  batch_records_dict: Dict[str, Dict[str, Any]] = {} # Use dict to deduplicate by ID
457
460
  for doc in batch_docs:
458
461
  try:
459
- batch_records_dict[doc.id] = self._get_document_record(doc, filters, content_hash) # type: ignore
462
+ record = self._get_document_record(doc, filters, content_hash)
463
+ # Use the generated record ID (which includes content_hash) for deduplication
464
+ batch_records_dict[record["id"]] = record
460
465
  except Exception as e:
461
466
  log_error(f"Error processing document '{doc.name}': {e}")
462
467
 
@@ -497,7 +502,10 @@ class PgVector(VectorDb):
497
502
  ) -> Dict[str, Any]:
498
503
  doc.embed(embedder=self.embedder)
499
504
  cleaned_content = self._clean_content(doc.content)
500
- record_id = doc.id or content_hash
505
+ # Include content_hash in ID to ensure uniqueness across different content hashes
506
+ # This allows the same URL/content to be inserted with different descriptions
507
+ base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
508
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
501
509
 
502
510
  meta_data = doc.meta_data or {}
503
511
  if filters:
@@ -630,7 +638,10 @@ class PgVector(VectorDb):
630
638
  for idx, doc in enumerate(batch_docs):
631
639
  try:
632
640
  cleaned_content = self._clean_content(doc.content)
633
- record_id = md5(cleaned_content.encode()).hexdigest()
641
+ # Include content_hash in ID to ensure uniqueness across different content hashes
642
+ # This allows the same URL/content to be inserted with different descriptions
643
+ base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
644
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
634
645
 
635
646
  if (
636
647
  doc.embedding is not None
@@ -695,25 +706,23 @@ class PgVector(VectorDb):
695
706
  Update the metadata for a document.
696
707
 
697
708
  Args:
698
- id (str): The ID of the document.
709
+ content_id (str): The ID of the document.
699
710
  metadata (Dict[str, Any]): The metadata to update.
700
711
  """
701
712
  try:
702
713
  with self.Session() as sess:
703
- # Merge JSONB instead of overwriting: coalesce(existing, '{}') || :new
714
+ # Merge JSONB for metadata, but replace filters entirely (absolute value)
704
715
  stmt = (
705
716
  update(self.table)
706
717
  .where(self.table.c.content_id == content_id)
707
718
  .values(
708
719
  meta_data=func.coalesce(self.table.c.meta_data, text("'{}'::jsonb")).op("||")(
709
- bindparam("md", metadata, type_=postgresql.JSONB)
710
- ),
711
- filters=func.coalesce(self.table.c.filters, text("'{}'::jsonb")).op("||")(
712
- bindparam("ft", metadata, type_=postgresql.JSONB)
720
+ bindparam("md", type_=postgresql.JSONB)
713
721
  ),
722
+ filters=bindparam("ft", type_=postgresql.JSONB),
714
723
  )
715
724
  )
716
- sess.execute(stmt)
725
+ sess.execute(stmt, {"md": metadata, "ft": metadata})
717
726
  sess.commit()
718
727
  except Exception as e:
719
728
  log_error(f"Error updating metadata for document {content_id}: {e}")
@@ -1111,6 +1120,7 @@ class PgVector(VectorDb):
1111
1120
  search_results = self.reranker.rerank(query=query, documents=search_results)
1112
1121
 
1113
1122
  log_info(f"Found {len(search_results)} documents")
1123
+
1114
1124
  return search_results
1115
1125
  except Exception as e:
1116
1126
  log_error(f"Error during hybrid search: {e}")
@@ -217,23 +217,6 @@ class PineconeDb(VectorDb):
217
217
  log_debug(f"Deleting index: {self.name}")
218
218
  self.client.delete_index(name=self.name, timeout=self.timeout)
219
219
 
220
- def doc_exists(self, document: Document) -> bool:
221
- """Check if a document exists in the index.
222
-
223
- Args:
224
- document (Document): The document to check.
225
-
226
- Returns:
227
- bool: True if the document exists, False otherwise.
228
-
229
- """
230
- response = self.index.fetch(ids=[document.id], namespace=self.namespace)
231
- return len(response.vectors) > 0
232
-
233
- async def async_doc_exists(self, document: Document) -> bool:
234
- """Check if a document exists in the index asynchronously."""
235
- return await asyncio.to_thread(self.doc_exists, document)
236
-
237
220
  def name_exists(self, name: str) -> bool:
238
221
  """Check if an index with the given name exists.
239
222
 
@@ -259,33 +259,6 @@ class Qdrant(VectorDb):
259
259
  else None,
260
260
  )
261
261
 
262
- def doc_exists(self, document: Document) -> bool:
263
- """
264
- Validating if the document exists or not
265
-
266
- Args:
267
- document (Document): Document to validate
268
- """
269
- if self.client:
270
- cleaned_content = document.content.replace("\x00", "\ufffd")
271
- doc_id = md5(cleaned_content.encode()).hexdigest()
272
- collection_points = self.client.retrieve(
273
- collection_name=self.collection,
274
- ids=[doc_id],
275
- )
276
- return len(collection_points) > 0
277
- return False
278
-
279
- async def async_doc_exists(self, document: Document) -> bool:
280
- """Check if a document exists asynchronously."""
281
- cleaned_content = document.content.replace("\x00", "\ufffd")
282
- doc_id = md5(cleaned_content.encode()).hexdigest()
283
- collection_points = await self.async_client.retrieve(
284
- collection_name=self.collection,
285
- ids=[doc_id],
286
- )
287
- return len(collection_points) > 0
288
-
289
262
  def name_exists(self, name: str) -> bool:
290
263
  """
291
264
  Validates if a document with the given name exists in the collection.
@@ -347,7 +320,9 @@ class Qdrant(VectorDb):
347
320
  points = []
348
321
  for document in documents:
349
322
  cleaned_content = document.content.replace("\x00", "\ufffd")
350
- doc_id = md5(cleaned_content.encode()).hexdigest()
323
+ # Include content_hash in ID to ensure uniqueness across different content hashes
324
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
325
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
351
326
 
352
327
  # TODO(v2.0.0): Remove conditional vector naming logic
353
328
  if self.use_named_vectors:
@@ -457,7 +432,9 @@ class Qdrant(VectorDb):
457
432
 
458
433
  async def process_document(document):
459
434
  cleaned_content = document.content.replace("\x00", "\ufffd")
460
- doc_id = md5(cleaned_content.encode()).hexdigest()
435
+ # Include content_hash in ID to ensure uniqueness across different content hashes
436
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
437
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
461
438
 
462
439
  if self.search_type == SearchType.vector:
463
440
  # For vector search, maintain backward compatibility with unnamed vectors
@@ -184,32 +184,6 @@ class RedisDB(VectorDb):
184
184
  log_error(f"Error creating Redis index: {e}")
185
185
  raise
186
186
 
187
- def doc_exists(self, document: Document) -> bool:
188
- """Check if a document exists in the index."""
189
- try:
190
- doc_id = document.id or hash_string_sha256(document.content)
191
- return self.id_exists(doc_id)
192
- except Exception as e:
193
- log_error(f"Error checking if document exists: {e}")
194
- return False
195
-
196
- async def async_doc_exists(self, document: Document) -> bool:
197
- """Async version of doc_exists method."""
198
- try:
199
- doc_id = document.id or hash_string_sha256(document.content)
200
- async_index = await self._get_async_index()
201
- id_filter = Tag("id") == doc_id
202
- query = FilterQuery(
203
- filter_expression=id_filter,
204
- return_fields=["id"],
205
- num_results=1,
206
- )
207
- results = await async_index.query(query)
208
- return len(results) > 0
209
- except Exception as e:
210
- log_error(f"Error checking if document exists: {e}")
211
- return False
212
-
213
187
  def name_exists(self, name: str) -> bool:
214
188
  """Check if a document with the given name exists."""
215
189
  try:
@@ -185,8 +185,10 @@ class SingleStore(VectorDb):
185
185
  for document in documents:
186
186
  document.embed(embedder=self.embedder)
187
187
  cleaned_content = document.content.replace("\x00", "\ufffd")
188
- record_id = md5(cleaned_content.encode()).hexdigest()
189
- _id = document.id or record_id
188
+ # Include content_hash in ID to ensure uniqueness across different content hashes
189
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
190
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
191
+ _id = record_id
190
192
 
191
193
  meta_data_json = json.dumps(document.meta_data)
192
194
  usage_json = json.dumps(document.usage)
@@ -246,8 +248,10 @@ class SingleStore(VectorDb):
246
248
  for document in documents:
247
249
  document.embed(embedder=self.embedder)
248
250
  cleaned_content = document.content.replace("\x00", "\ufffd")
249
- record_id = md5(cleaned_content.encode()).hexdigest()
250
- _id = document.id or record_id
251
+ # Include content_hash in ID to ensure uniqueness across different content hashes
252
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
253
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
254
+ _id = record_id
251
255
 
252
256
  meta_data_json = json.dumps(document.meta_data)
253
257
  usage_json = json.dumps(document.usage)
@@ -548,8 +552,10 @@ class SingleStore(VectorDb):
548
552
  counter = 0
549
553
  for document in documents:
550
554
  cleaned_content = document.content.replace("\x00", "\ufffd")
551
- record_id = md5(cleaned_content.encode()).hexdigest()
552
- _id = document.id or record_id
555
+ # Include content_hash in ID to ensure uniqueness across different content hashes
556
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
557
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
558
+ _id = record_id
553
559
 
554
560
  meta_data_json = json.dumps(document.meta_data)
555
561
  usage_json = json.dumps(document.usage)
@@ -632,8 +638,10 @@ class SingleStore(VectorDb):
632
638
  counter = 0
633
639
  for document in documents:
634
640
  cleaned_content = document.content.replace("\x00", "\ufffd")
635
- record_id = md5(cleaned_content.encode()).hexdigest()
636
- _id = document.id or record_id
641
+ # Include content_hash in ID to ensure uniqueness across different content hashes
642
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
643
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
644
+ _id = record_id
637
645
 
638
646
  meta_data_json = json.dumps(document.meta_data)
639
647
  usage_json = json.dumps(document.usage)
@@ -31,12 +31,6 @@ class SurrealDb(VectorDb):
31
31
  DEFINE INDEX IF NOT EXISTS vector_idx ON {collection} FIELDS embedding HNSW DIMENSION {dimensions} DIST {distance};
32
32
  """
33
33
 
34
- DOC_EXISTS_QUERY: Final[str] = """
35
- SELECT * FROM {collection}
36
- WHERE content = $content
37
- LIMIT 1
38
- """
39
-
40
34
  NAME_EXISTS_QUERY: Final[str] = """
41
35
  SELECT * FROM {collection}
42
36
  WHERE meta_data.name = $name
@@ -221,23 +215,6 @@ class SurrealDb(VectorDb):
221
215
  )
222
216
  self.client.query(query)
223
217
 
224
- def doc_exists(self, document: Document) -> bool:
225
- """Check if a document exists by its content.
226
-
227
- Args:
228
- document: The document to check.
229
-
230
- Returns:
231
- True if the document exists, False otherwise.
232
-
233
- """
234
- log_debug(f"Checking if document exists: {document.content}")
235
- result = self.client.query(
236
- self.DOC_EXISTS_QUERY.format(collection=self.collection),
237
- {"content": document.content},
238
- )
239
- return bool(self._extract_result(result))
240
-
241
218
  def name_exists(self, name: str) -> bool:
242
219
  """Check if a document exists by its name.
243
220
 
@@ -493,19 +470,6 @@ class SurrealDb(VectorDb):
493
470
  ),
494
471
  )
495
472
 
496
- async def async_doc_exists(self, document: Document) -> bool:
497
- """Check if a document exists by its content asynchronously.
498
-
499
- Returns:
500
- True if the document exists, False otherwise.
501
-
502
- """
503
- response = await self.async_client.query(
504
- self.DOC_EXISTS_QUERY.format(collection=self.collection),
505
- {"content": document.content},
506
- )
507
- return bool(self._extract_result(response))
508
-
509
473
  async def async_name_exists(self, name: str) -> bool:
510
474
  """Check if a document exists by its name asynchronously.
511
475
 
@@ -247,7 +247,9 @@ class Weaviate(VectorDb):
247
247
  continue
248
248
 
249
249
  cleaned_content = document.content.replace("\x00", "\ufffd")
250
- record_id = md5(cleaned_content.encode()).hexdigest()
250
+ # Include content_hash in ID to ensure uniqueness across different content hashes
251
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
252
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
251
253
  doc_uuid = uuid.UUID(hex=record_id[:32])
252
254
 
253
255
  # Merge filters with metadata
@@ -338,7 +340,9 @@ class Weaviate(VectorDb):
338
340
 
339
341
  # Clean content and generate UUID
340
342
  cleaned_content = document.content.replace("\x00", "\ufffd")
341
- record_id = md5(cleaned_content.encode()).hexdigest()
343
+ # Include content_hash in ID to ensure uniqueness across different content hashes
344
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
345
+ record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
342
346
  doc_uuid = uuid.UUID(hex=record_id[:32])
343
347
 
344
348
  # Serialize meta_data to JSON string
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agno
3
- Version: 2.3.10
3
+ Version: 2.3.12
4
4
  Summary: Agno: a lightweight library for building Multi-Agent Systems
5
5
  Author-email: Ashpreet Bedi <ashpreet@agno.com>
6
6
  Project-URL: homepage, https://agno.com
@@ -102,6 +102,9 @@ Provides-Extra: openai
102
102
  Requires-Dist: openai; extra == "openai"
103
103
  Provides-Extra: portkey
104
104
  Requires-Dist: portkey-ai; extra == "portkey"
105
+ Provides-Extra: tokenizers
106
+ Requires-Dist: tiktoken; extra == "tokenizers"
107
+ Requires-Dist: tokenizers; extra == "tokenizers"
105
108
  Provides-Extra: agentql
106
109
  Requires-Dist: agentql; extra == "agentql"
107
110
  Provides-Extra: apify