PyPI - agno - Versions diffs - 2.3.11__py3-none-any.whl → 2.3.12__py3-none-any.whl - Mend

agno 2.3.11py3-none-any.whl → 2.3.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

agno/compression/manager.py +87 -16
agno/db/mongo/async_mongo.py +1 -1
agno/db/mongo/mongo.py +1 -1
agno/exceptions.py +1 -0
agno/knowledge/knowledge.py +83 -20
agno/knowledge/reader/csv_reader.py +2 -2
agno/knowledge/reader/text_reader.py +15 -3
agno/knowledge/reader/wikipedia_reader.py +33 -1
agno/memory/strategies/base.py +3 -4
agno/models/anthropic/claude.py +44 -0
agno/models/aws/bedrock.py +60 -0
agno/models/base.py +124 -30
agno/models/google/gemini.py +141 -23
agno/models/litellm/chat.py +25 -0
agno/models/openai/responses.py +44 -0
agno/os/routers/knowledge/knowledge.py +0 -1
agno/run/agent.py +17 -0
agno/run/requirement.py +89 -6
agno/utils/print_response/agent.py +4 -4
agno/utils/print_response/team.py +12 -12
agno/utils/tokens.py +643 -27
agno/vectordb/chroma/chromadb.py +6 -2
agno/vectordb/lancedb/lance_db.py +3 -37
agno/vectordb/milvus/milvus.py +6 -32
agno/vectordb/mongodb/mongodb.py +0 -27
agno/vectordb/pgvector/pgvector.py +15 -5
agno/vectordb/pineconedb/pineconedb.py +0 -17
agno/vectordb/qdrant/qdrant.py +6 -29
agno/vectordb/redis/redisdb.py +0 -26
agno/vectordb/singlestore/singlestore.py +16 -8
agno/vectordb/surrealdb/surrealdb.py +0 -36
agno/vectordb/weaviate/weaviate.py +6 -2
{agno-2.3.11.dist-info → agno-2.3.12.dist-info}/METADATA +4 -1
{agno-2.3.11.dist-info → agno-2.3.12.dist-info}/RECORD +37 -37
{agno-2.3.11.dist-info → agno-2.3.12.dist-info}/WHEEL +0 -0
{agno-2.3.11.dist-info → agno-2.3.12.dist-info}/licenses/LICENSE +0 -0
{agno-2.3.11.dist-info → agno-2.3.12.dist-info}/top_level.txt +0 -0

agno/vectordb/chroma/chromadb.py CHANGED Viewed

@@ -276,7 +276,9 @@ class ChromaDb(VectorDb):
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             # Handle metadata and filters
             metadata = document.meta_data or {}
@@ -435,7 +437,9 @@ class ChromaDb(VectorDb):
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             # Handle metadata and filters
             metadata = document.meta_data or {}

agno/vectordb/lancedb/lance_db.py CHANGED Viewed

@@ -260,39 +260,6 @@ class LanceDb(VectorDb):
             tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True)  # type: ignore
         return tbl  # type: ignore
-    def doc_exists(self, document: Document) -> bool:
-        """
-        Validating if the document exists or not
-        Args:
-            document (Document): Document to validate
-        """
-        try:
-            if self.table is not None:
-                cleaned_content = document.content.replace("\x00", "\ufffd")
-                doc_id = md5(cleaned_content.encode()).hexdigest()
-                result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
-                return len(result) > 0
-        except Exception:
-            # Search sometimes fails with stale cache data, it means the doc doesn't exist
-            return False
-        return False
-    async def async_doc_exists(self, document: Document) -> bool:
-        """
-        Asynchronously validate if the document exists
-        Args:
-            document (Document): Document to validate
-        Returns:
-            bool: True if document exists, False otherwise
-        """
-        if self.connection:
-            self.table = self.connection.open_table(name=self.table_name)
-        return self.doc_exists(document)
     def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
         """
         Insert documents into the database.
@@ -309,9 +276,6 @@ class LanceDb(VectorDb):
         data = []
         for document in documents:
-            if self.doc_exists(document):
-                continue
             # Add filters to document metadata if provided
             if filters:
                 meta_data = document.meta_data.copy() if document.meta_data else {}
@@ -320,7 +284,9 @@ class LanceDb(VectorDb):
             document.embed(embedder=self.embedder)
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = str(md5(cleaned_content.encode()).hexdigest())
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
             payload = {
                 "name": document.name,
                 "meta_data": document.meta_data,

agno/vectordb/milvus/milvus.py CHANGED Viewed

@@ -229,7 +229,9 @@ class Milvus(VectorDb):
         """
         cleaned_content = document.content.replace("\x00", "\ufffd")
-        doc_id = md5(cleaned_content.encode()).hexdigest()
+        # Include content_hash in ID to ensure uniqueness across different content hashes
+        base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+        doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
         # Convert dictionary fields to JSON strings
         meta_data_str = json.dumps(document.meta_data) if document.meta_data else "{}"
@@ -317,36 +319,6 @@ class Milvus(VectorDb):
                     max_length=65_535,
                 )
-    def doc_exists(self, document: Document) -> bool:
-        """
-        Validating if the document exists or not
-        Args:
-            document (Document): Document to validate
-        """
-        if self.client:
-            cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
-            collection_points = self.client.get(
-                collection_name=self.collection,
-                ids=[doc_id],
-            )
-            return len(collection_points) > 0
-        return False
-    async def async_doc_exists(self, document: Document) -> bool:
-        """
-        Check if document exists asynchronously.
-        AsyncMilvusClient supports get().
-        """
-        cleaned_content = document.content.replace("\x00", "\ufffd")
-        doc_id = md5(cleaned_content.encode()).hexdigest()
-        collection_points = await self.async_client.get(
-            collection_name=self.collection,
-            ids=[doc_id],
-        )
-        return len(collection_points) > 0
     def name_exists(self, name: str) -> bool:
         """
         Validates if a document with the given name exists in the collection.
@@ -528,7 +500,9 @@ class Milvus(VectorDb):
                     log_debug(f"Skipping document without embedding: {document.name} ({document.meta_data})")
                     return None
                 cleaned_content = document.content.replace("\x00", "\ufffd")
-                doc_id = md5(cleaned_content.encode()).hexdigest()
+                # Include content_hash in ID to ensure uniqueness across different content hashes
+                base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+                doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
                 meta_data = document.meta_data or {}
                 if filters:

agno/vectordb/mongodb/mongodb.py CHANGED Viewed

@@ -471,20 +471,6 @@ class MongoDb(VectorDb):
             if self.wait_until_index_ready_in_seconds:
                 await self._wait_for_index_ready_async()
-    def doc_exists(self, document: Document) -> bool:
-        """Check if a document exists in the MongoDB collection based on its content."""
-        try:
-            collection = self._get_collection()
-            # Use content hash as document ID
-            doc_id = md5(document.content.encode("utf-8")).hexdigest()
-            result = collection.find_one({"_id": doc_id})
-            exists = result is not None
-            log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
-            return exists
-        except Exception as e:
-            logger.error(f"Error checking document existence: {e}")
-            return False
     def name_exists(self, name: str) -> bool:
         """Check if a document with a given name exists in the collection."""
         try:
@@ -1024,19 +1010,6 @@ class MongoDb(VectorDb):
             logger.error(f"Error getting document count: {e}")
             return 0
-    async def async_doc_exists(self, document: Document) -> bool:
-        """Check if a document exists asynchronously."""
-        try:
-            collection = await self._get_async_collection()
-            doc_id = md5(document.content.encode("utf-8")).hexdigest()
-            result = await collection.find_one({"_id": doc_id})
-            exists = result is not None
-            log_debug(f"Document {'exists' if exists else 'does not exist'}: {doc_id}")
-            return exists
-        except Exception as e:
-            logger.error(f"Error checking document existence asynchronously: {e}")
-            return False
     async def async_insert(
         self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
     ) -> None:

agno/vectordb/pgvector/pgvector.py CHANGED Viewed

@@ -367,7 +367,10 @@ class PgVector(VectorDb):
                         for doc in batch_docs:
                             try:
                                 cleaned_content = self._clean_content(doc.content)
-                                record_id = doc.id or content_hash
+                                # Include content_hash in ID to ensure uniqueness across different content hashes
+                                # This allows the same URL/content to be inserted with different descriptions
+                                base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
+                                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
                                 meta_data = doc.meta_data or {}
                                 if filters:
@@ -456,7 +459,9 @@ class PgVector(VectorDb):
                         batch_records_dict: Dict[str, Dict[str, Any]] = {}  # Use dict to deduplicate by ID
                         for doc in batch_docs:
                             try:
-                                batch_records_dict[doc.id] = self._get_document_record(doc, filters, content_hash)  # type: ignore
+                                record = self._get_document_record(doc, filters, content_hash)
+                                # Use the generated record ID (which includes content_hash) for deduplication
+                                batch_records_dict[record["id"]] = record
                             except Exception as e:
                                 log_error(f"Error processing document '{doc.name}': {e}")
@@ -497,7 +502,10 @@ class PgVector(VectorDb):
     ) -> Dict[str, Any]:
         doc.embed(embedder=self.embedder)
         cleaned_content = self._clean_content(doc.content)
-        record_id = doc.id or content_hash
+        # Include content_hash in ID to ensure uniqueness across different content hashes
+        # This allows the same URL/content to be inserted with different descriptions
+        base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
+        record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
         meta_data = doc.meta_data or {}
         if filters:
@@ -630,7 +638,10 @@ class PgVector(VectorDb):
                         for idx, doc in enumerate(batch_docs):
                             try:
                                 cleaned_content = self._clean_content(doc.content)
-                                record_id = md5(cleaned_content.encode()).hexdigest()
+                                # Include content_hash in ID to ensure uniqueness across different content hashes
+                                # This allows the same URL/content to be inserted with different descriptions
+                                base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
+                                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
                                 if (
                                     doc.embedding is not None
@@ -698,7 +709,6 @@ class PgVector(VectorDb):
             content_id (str): The ID of the document.
             metadata (Dict[str, Any]): The metadata to update.
         """
-        print("metadata is: ", metadata)
         try:
             with self.Session() as sess:
                 # Merge JSONB for metadata, but replace filters entirely (absolute value)

agno/vectordb/pineconedb/pineconedb.py CHANGED Viewed

@@ -217,23 +217,6 @@ class PineconeDb(VectorDb):
             log_debug(f"Deleting index: {self.name}")
             self.client.delete_index(name=self.name, timeout=self.timeout)
-    def doc_exists(self, document: Document) -> bool:
-        """Check if a document exists in the index.
-        Args:
-            document (Document): The document to check.
-        Returns:
-            bool: True if the document exists, False otherwise.
-        """
-        response = self.index.fetch(ids=[document.id], namespace=self.namespace)
-        return len(response.vectors) > 0
-    async def async_doc_exists(self, document: Document) -> bool:
-        """Check if a document exists in the index asynchronously."""
-        return await asyncio.to_thread(self.doc_exists, document)
     def name_exists(self, name: str) -> bool:
         """Check if an index with the given name exists.

agno/vectordb/qdrant/qdrant.py CHANGED Viewed

@@ -259,33 +259,6 @@ class Qdrant(VectorDb):
                 else None,
             )
-    def doc_exists(self, document: Document) -> bool:
-        """
-        Validating if the document exists or not
-        Args:
-            document (Document): Document to validate
-        """
-        if self.client:
-            cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
-            collection_points = self.client.retrieve(
-                collection_name=self.collection,
-                ids=[doc_id],
-            )
-            return len(collection_points) > 0
-        return False
-    async def async_doc_exists(self, document: Document) -> bool:
-        """Check if a document exists asynchronously."""
-        cleaned_content = document.content.replace("\x00", "\ufffd")
-        doc_id = md5(cleaned_content.encode()).hexdigest()
-        collection_points = await self.async_client.retrieve(
-            collection_name=self.collection,
-            ids=[doc_id],
-        )
-        return len(collection_points) > 0
     def name_exists(self, name: str) -> bool:
         """
         Validates if a document with the given name exists in the collection.
@@ -347,7 +320,9 @@ class Qdrant(VectorDb):
         points = []
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             # TODO(v2.0.0): Remove conditional vector naming logic
             if self.use_named_vectors:
@@ -457,7 +432,9 @@ class Qdrant(VectorDb):
         async def process_document(document):
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             if self.search_type == SearchType.vector:
                 # For vector search, maintain backward compatibility with unnamed vectors

agno/vectordb/redis/redisdb.py CHANGED Viewed

@@ -184,32 +184,6 @@ class RedisDB(VectorDb):
                 log_error(f"Error creating Redis index: {e}")
                 raise
-    def doc_exists(self, document: Document) -> bool:
-        """Check if a document exists in the index."""
-        try:
-            doc_id = document.id or hash_string_sha256(document.content)
-            return self.id_exists(doc_id)
-        except Exception as e:
-            log_error(f"Error checking if document exists: {e}")
-            return False
-    async def async_doc_exists(self, document: Document) -> bool:
-        """Async version of doc_exists method."""
-        try:
-            doc_id = document.id or hash_string_sha256(document.content)
-            async_index = await self._get_async_index()
-            id_filter = Tag("id") == doc_id
-            query = FilterQuery(
-                filter_expression=id_filter,
-                return_fields=["id"],
-                num_results=1,
-            )
-            results = await async_index.query(query)
-            return len(results) > 0
-        except Exception as e:
-            log_error(f"Error checking if document exists: {e}")
-            return False
     def name_exists(self, name: str) -> bool:
         """Check if a document with the given name exists."""
         try:

agno/vectordb/singlestore/singlestore.py CHANGED Viewed

@@ -185,8 +185,10 @@ class SingleStore(VectorDb):
             for document in documents:
                 document.embed(embedder=self.embedder)
                 cleaned_content = document.content.replace("\x00", "\ufffd")
-                record_id = md5(cleaned_content.encode()).hexdigest()
-                _id = document.id or record_id
+                # Include content_hash in ID to ensure uniqueness across different content hashes
+                base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
+                _id = record_id
                 meta_data_json = json.dumps(document.meta_data)
                 usage_json = json.dumps(document.usage)
@@ -246,8 +248,10 @@ class SingleStore(VectorDb):
             for document in documents:
                 document.embed(embedder=self.embedder)
                 cleaned_content = document.content.replace("\x00", "\ufffd")
-                record_id = md5(cleaned_content.encode()).hexdigest()
-                _id = document.id or record_id
+                # Include content_hash in ID to ensure uniqueness across different content hashes
+                base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
+                _id = record_id
                 meta_data_json = json.dumps(document.meta_data)
                 usage_json = json.dumps(document.usage)
@@ -548,8 +552,10 @@ class SingleStore(VectorDb):
             counter = 0
             for document in documents:
                 cleaned_content = document.content.replace("\x00", "\ufffd")
-                record_id = md5(cleaned_content.encode()).hexdigest()
-                _id = document.id or record_id
+                # Include content_hash in ID to ensure uniqueness across different content hashes
+                base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
+                _id = record_id
                 meta_data_json = json.dumps(document.meta_data)
                 usage_json = json.dumps(document.usage)
@@ -632,8 +638,10 @@ class SingleStore(VectorDb):
             counter = 0
             for document in documents:
                 cleaned_content = document.content.replace("\x00", "\ufffd")
-                record_id = md5(cleaned_content.encode()).hexdigest()
-                _id = document.id or record_id
+                # Include content_hash in ID to ensure uniqueness across different content hashes
+                base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+                record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
+                _id = record_id
                 meta_data_json = json.dumps(document.meta_data)
                 usage_json = json.dumps(document.usage)

agno/vectordb/surrealdb/surrealdb.py CHANGED Viewed

@@ -31,12 +31,6 @@ class SurrealDb(VectorDb):
         DEFINE INDEX IF NOT EXISTS vector_idx ON {collection} FIELDS embedding HNSW DIMENSION {dimensions} DIST {distance};
     """
-    DOC_EXISTS_QUERY: Final[str] = """
-        SELECT * FROM {collection}
-        WHERE content = $content
-        LIMIT 1
-    """
     NAME_EXISTS_QUERY: Final[str] = """
         SELECT * FROM {collection}
         WHERE meta_data.name = $name
@@ -221,23 +215,6 @@ class SurrealDb(VectorDb):
             )
             self.client.query(query)
-    def doc_exists(self, document: Document) -> bool:
-        """Check if a document exists by its content.
-        Args:
-            document: The document to check.
-        Returns:
-            True if the document exists, False otherwise.
-        """
-        log_debug(f"Checking if document exists: {document.content}")
-        result = self.client.query(
-            self.DOC_EXISTS_QUERY.format(collection=self.collection),
-            {"content": document.content},
-        )
-        return bool(self._extract_result(result))
     def name_exists(self, name: str) -> bool:
         """Check if a document exists by its name.
@@ -493,19 +470,6 @@ class SurrealDb(VectorDb):
             ),
         )
-    async def async_doc_exists(self, document: Document) -> bool:
-        """Check if a document exists by its content asynchronously.
-        Returns:
-            True if the document exists, False otherwise.
-        """
-        response = await self.async_client.query(
-            self.DOC_EXISTS_QUERY.format(collection=self.collection),
-            {"content": document.content},
-        )
-        return bool(self._extract_result(response))
     async def async_name_exists(self, name: str) -> bool:
         """Check if a document exists by its name asynchronously.

agno/vectordb/weaviate/weaviate.py CHANGED Viewed

@@ -247,7 +247,9 @@ class Weaviate(VectorDb):
                 continue
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            record_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             doc_uuid = uuid.UUID(hex=record_id[:32])
             # Merge filters with metadata
@@ -338,7 +340,9 @@ class Weaviate(VectorDb):
                     # Clean content and generate UUID
                     cleaned_content = document.content.replace("\x00", "\ufffd")
-                    record_id = md5(cleaned_content.encode()).hexdigest()
+                    # Include content_hash in ID to ensure uniqueness across different content hashes
+                    base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+                    record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
                     doc_uuid = uuid.UUID(hex=record_id[:32])
                     # Serialize meta_data to JSON string

{agno-2.3.11.dist-info → agno-2.3.12.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agno
-Version: 2.3.11
+Version: 2.3.12
 Summary: Agno: a lightweight library for building Multi-Agent Systems
 Author-email: Ashpreet Bedi <ashpreet@agno.com>
 Project-URL: homepage, https://agno.com
@@ -102,6 +102,9 @@ Provides-Extra: openai
 Requires-Dist: openai; extra == "openai"
 Provides-Extra: portkey
 Requires-Dist: portkey-ai; extra == "portkey"
+Provides-Extra: tokenizers
+Requires-Dist: tiktoken; extra == "tokenizers"
+Requires-Dist: tokenizers; extra == "tokenizers"
 Provides-Extra: agentql
 Requires-Dist: agentql; extra == "agentql"
 Provides-Extra: apify

agno 2.3.11__py3-none-any.whl → 2.3.12__py3-none-any.whl

agno 2.3.11py3-none-any.whl → 2.3.12py3-none-any.whl