PyPI - agno - Versions diffs - 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl - Mend

agno 2.1.4py3-none-any.whl → 2.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

agno/agent/agent.py +1767 -535
agno/db/async_postgres/__init__.py +3 -0
agno/db/async_postgres/async_postgres.py +1668 -0
agno/db/async_postgres/schemas.py +124 -0
agno/db/async_postgres/utils.py +289 -0
agno/db/base.py +237 -2
agno/db/dynamo/dynamo.py +2 -2
agno/db/firestore/firestore.py +2 -2
agno/db/firestore/utils.py +4 -2
agno/db/gcs_json/gcs_json_db.py +2 -2
agno/db/in_memory/in_memory_db.py +2 -2
agno/db/json/json_db.py +2 -2
agno/db/migrations/v1_to_v2.py +30 -13
agno/db/mongo/mongo.py +18 -6
agno/db/mysql/mysql.py +35 -13
agno/db/postgres/postgres.py +29 -6
agno/db/redis/redis.py +2 -2
agno/db/singlestore/singlestore.py +2 -2
agno/db/sqlite/sqlite.py +34 -12
agno/db/sqlite/utils.py +8 -3
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +8 -2
agno/knowledge/knowledge.py +260 -46
agno/knowledge/reader/pdf_reader.py +4 -6
agno/knowledge/reader/reader_factory.py +2 -3
agno/memory/manager.py +241 -33
agno/models/anthropic/claude.py +37 -0
agno/os/app.py +8 -7
agno/os/interfaces/a2a/router.py +3 -5
agno/os/interfaces/agui/router.py +4 -1
agno/os/interfaces/agui/utils.py +27 -6
agno/os/interfaces/slack/router.py +2 -4
agno/os/mcp.py +98 -41
agno/os/router.py +23 -0
agno/os/routers/evals/evals.py +52 -20
agno/os/routers/evals/utils.py +14 -14
agno/os/routers/knowledge/knowledge.py +130 -9
agno/os/routers/knowledge/schemas.py +57 -0
agno/os/routers/memory/memory.py +116 -44
agno/os/routers/metrics/metrics.py +16 -6
agno/os/routers/session/session.py +65 -22
agno/os/schema.py +36 -0
agno/os/utils.py +67 -12
agno/reasoning/anthropic.py +80 -0
agno/reasoning/gemini.py +73 -0
agno/reasoning/openai.py +5 -0
agno/reasoning/vertexai.py +76 -0
agno/session/workflow.py +3 -3
agno/team/team.py +918 -175
agno/tools/googlesheets.py +20 -5
agno/tools/mcp_toolbox.py +3 -3
agno/tools/scrapegraph.py +1 -1
agno/utils/models/claude.py +3 -1
agno/utils/streamlit.py +1 -1
agno/vectordb/base.py +22 -1
agno/vectordb/cassandra/cassandra.py +9 -0
agno/vectordb/chroma/chromadb.py +26 -6
agno/vectordb/clickhouse/clickhousedb.py +9 -1
agno/vectordb/couchbase/couchbase.py +11 -0
agno/vectordb/lancedb/lance_db.py +20 -0
agno/vectordb/langchaindb/langchaindb.py +11 -0
agno/vectordb/lightrag/lightrag.py +9 -0
agno/vectordb/llamaindex/llamaindexdb.py +15 -1
agno/vectordb/milvus/milvus.py +23 -0
agno/vectordb/mongodb/mongodb.py +22 -0
agno/vectordb/pgvector/pgvector.py +19 -0
agno/vectordb/pineconedb/pineconedb.py +35 -4
agno/vectordb/qdrant/qdrant.py +24 -0
agno/vectordb/singlestore/singlestore.py +25 -17
agno/vectordb/surrealdb/surrealdb.py +18 -1
agno/vectordb/upstashdb/upstashdb.py +26 -1
agno/vectordb/weaviate/weaviate.py +18 -0
agno/workflow/condition.py +4 -0
agno/workflow/loop.py +4 -0
agno/workflow/parallel.py +4 -0
agno/workflow/router.py +4 -0
agno/workflow/step.py +22 -14
agno/workflow/steps.py +4 -0
agno/workflow/types.py +2 -2
agno/workflow/workflow.py +328 -61
{agno-2.1.4.dist-info → agno-2.1.5.dist-info}/METADATA +100 -41
{agno-2.1.4.dist-info → agno-2.1.5.dist-info}/RECORD +88 -81
{agno-2.1.4.dist-info → agno-2.1.5.dist-info}/WHEEL +0 -0
{agno-2.1.4.dist-info → agno-2.1.5.dist-info}/licenses/LICENSE +0 -0
{agno-2.1.4.dist-info → agno-2.1.5.dist-info}/top_level.txt +0 -0

agno/knowledge/knowledge.py CHANGED Viewed

@@ -12,7 +12,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
 from httpx import AsyncClient
-from agno.db.base import BaseDb
+from agno.db.base import AsyncBaseDb, BaseDb
 from agno.db.schemas.knowledge import KnowledgeRow
 from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
 from agno.knowledge.document import Document
@@ -39,7 +39,7 @@ class Knowledge:
     name: Optional[str] = None
     description: Optional[str] = None
     vector_db: Optional[Any] = None
-    contents_db: Optional[BaseDb] = None
+    contents_db: Optional[Union[BaseDb, AsyncBaseDb]] = None
     max_results: int = 10
     readers: Optional[Dict[str, Reader]] = None
@@ -394,10 +394,10 @@ class Knowledge:
             if self._should_include_file(str(path), include, exclude):
                 log_info(f"Adding file {path} due to include/exclude filters")
-                self._add_to_contents_db(content)
+                await self._add_to_contents_db(content)
                 if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
                     content.status = ContentStatus.COMPLETED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 # Handle LightRAG special case - read file and upload directly
@@ -483,7 +483,6 @@ class Knowledge:
         3. Read the content
         4. Prepare and insert the content in the vector database
         """
         from agno.vectordb import VectorDb
         self.vector_db = cast(VectorDb, self.vector_db)
@@ -495,7 +494,7 @@ class Knowledge:
             raise ValueError("No url provided")
         # 1. Add content to contents database
-        self._add_to_contents_db(content)
+        await self._add_to_contents_db(content)
         if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
             content.status = ContentStatus.COMPLETED
             self._update_content(content)
@@ -513,12 +512,12 @@ class Knowledge:
             if not all([parsed_url.scheme, parsed_url.netloc]):
                 content.status = ContentStatus.FAILED
                 content.status_message = f"Invalid URL format: {content.url}"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 log_warning(f"Invalid URL format: {content.url}")
         except Exception as e:
             content.status = ContentStatus.FAILED
             content.status_message = f"Invalid URL: {content.url} - {str(e)}"
-            self._update_content(content)
+            await self._aupdate_content(content)
             log_warning(f"Invalid URL: {content.url} - {str(e)}")
         # 3. Fetch and load content if file has an extension
@@ -576,7 +575,7 @@ class Knowledge:
             log_error(f"Error reading URL: {content.url} - {str(e)}")
             content.status = ContentStatus.FAILED
             content.status_message = f"Error reading URL: {content.url} - {str(e)}"
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         # 6. Chunk documents if needed
@@ -622,10 +621,10 @@ class Knowledge:
         log_info(f"Adding content from {content.name}")
-        self._add_to_contents_db(content)
+        await self._add_to_contents_db(content)
         if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
             content.status = ContentStatus.COMPLETED
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
@@ -648,7 +647,7 @@ class Knowledge:
                 else:
                     content.status = ContentStatus.FAILED
                     content.status_message = "Text reader not available"
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
         elif isinstance(content.file_data, FileData):
@@ -677,12 +676,13 @@ class Knowledge:
                 if len(read_documents) == 0:
                     content.status = ContentStatus.FAILED
                     content.status_message = "Content could not be read"
-                    self._update_content(content)
+                    await self._aupdate_content(content)
+                    return
         else:
             content.status = ContentStatus.FAILED
             content.status_message = "No content provided"
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         await self._handle_vector_db_insert(content, read_documents, upsert)
@@ -716,7 +716,7 @@ class Knowledge:
             content.content_hash = self._build_content_hash(content)
             content.id = generate_id(content.content_hash)
-            self._add_to_contents_db(content)
+            await self._add_to_contents_db(content)
             if self._should_skip(content.content_hash, skip_if_exists):
                 content.status = ContentStatus.COMPLETED
                 self._update_content(content)
@@ -726,6 +726,11 @@ class Knowledge:
                 await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
                 return
+            if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
+                log_info(f"Content {content.content_hash} already exists, skipping")
+                continue
+            await self._add_to_contents_db(content)
             if content.reader is None:
                 log_error(f"No reader available for topic: {topic}")
                 content.status = ContentStatus.FAILED
@@ -742,8 +747,7 @@ class Knowledge:
             else:
                 content.status = ContentStatus.FAILED
                 content.status_message = "No content found for topic"
-                self._update_content(content)
-                continue
+                await self._aupdate_content(content)
             await self._handle_vector_db_insert(content, read_documents, upsert)
@@ -812,10 +816,10 @@ class Knowledge:
             # 3. Hash content and add it to the contents database
             content_entry.content_hash = self._build_content_hash(content_entry)
             content_entry.id = generate_id(content_entry.content_hash)
-            self._add_to_contents_db(content_entry)
+            await self._add_to_contents_db(content_entry)
             if self._should_skip(content_entry.content_hash, skip_if_exists):
                 content_entry.status = ContentStatus.COMPLETED
-                self._update_content(content_entry)
+                await self._aupdate_content(content_entry)
                 return
             # 4. Select reader
@@ -894,10 +898,10 @@ class Knowledge:
             # 3. Hash content and add it to the contents database
             content_entry.content_hash = self._build_content_hash(content_entry)
             content_entry.id = generate_id(content_entry.content_hash)
-            self._add_to_contents_db(content_entry)
+            await self._add_to_contents_db(content_entry)
             if self._should_skip(content_entry.content_hash, skip_if_exists):
                 content_entry.status = ContentStatus.COMPLETED
-                self._update_content(content_entry)
+                await self._aupdate_content(content_entry)
                 return
             # 4. Select reader
@@ -937,7 +941,7 @@ class Knowledge:
             log_error("No vector database configured")
             content.status = ContentStatus.FAILED
             content.status_message = "No vector database configured"
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         if self.vector_db.upsert_available() and upsert:
@@ -947,7 +951,7 @@ class Knowledge:
                 log_error(f"Error upserting document: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = "Could not upsert embedding"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         else:
             try:
@@ -960,11 +964,11 @@ class Knowledge:
                 log_error(f"Error inserting document: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = "Could not insert embedding"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         content.status = ContentStatus.COMPLETED
-        self._update_content(content)
+        await self._aupdate_content(content)
     async def _load_content(
         self,
@@ -1064,7 +1068,7 @@ class Knowledge:
         # Already a string, return as-is
         return value
-    def _add_to_contents_db(self, content: Content):
+    async def _add_to_contents_db(self, content: Content):
         if self.contents_db:
             created_at = content.created_at if content.created_at else int(time.time())
             updated_at = content.updated_at if content.updated_at else int(time.time())
@@ -1102,13 +1106,21 @@ class Knowledge:
                 created_at=created_at,
                 updated_at=updated_at,
             )
-            self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            if isinstance(self.contents_db, AsyncBaseDb):
+                await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            else:
+                self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
     def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
         from agno.vectordb import VectorDb
         self.vector_db = cast(VectorDb, self.vector_db)
         if self.contents_db:
+            if isinstance(self.contents_db, AsyncBaseDb):
+                raise ValueError(
+                    "update_content() is not supported with an async DB. Please use aupdate_content() instead."
+                )
             if not content.id:
                 log_warning("Content id is required to update Knowledge content")
                 return None
@@ -1156,12 +1168,58 @@ class Knowledge:
                 log_warning("Contents DB not found for knowledge base")
             return None
+    async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
+        if self.contents_db:
+            if not content.id:
+                log_warning("Content id is required to update Knowledge content")
+                return None
+            # TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
+            if isinstance(self.contents_db, AsyncBaseDb):
+                content_row = await self.contents_db.get_knowledge_content(content.id)
+            else:
+                content_row = self.contents_db.get_knowledge_content(content.id)
+            if content_row is None:
+                log_warning(f"Content row not found for id: {content.id}, cannot update status")
+                return None
+            if content.name is not None:
+                content_row.name = content.name
+            if content.description is not None:
+                content_row.description = content.description
+            if content.metadata is not None:
+                content_row.metadata = content.metadata
+            if content.status is not None:
+                content_row.status = content.status
+            if content.status_message is not None:
+                content_row.status_message = content.status_message if content.status_message else ""
+            if content.external_id is not None:
+                content_row.external_id = content.external_id
+            content_row.updated_at = int(time.time())
+            if isinstance(self.contents_db, AsyncBaseDb):
+                await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            else:
+                self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            if self.vector_db and content.metadata:
+                self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
+            if content.metadata:
+                self.add_filters(content.metadata)
+            return content_row.to_dict()
+        else:
+            log_warning(f"Contents DB not found for knowledge base: {self.name}")
+            return None
     async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
         from agno.vectordb import VectorDb
         self.vector_db = cast(VectorDb, self.vector_db)
-        self._add_to_contents_db(content)
+        await self._add_to_contents_db(content)
         if content_type == KnowledgeContentOrigin.PATH:
             if content.file_data is None:
                 log_warning("No file data provided")
@@ -1192,18 +1250,18 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support file insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             except Exception as e:
                 log_error(f"Error uploading file to LightRAG: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = f"Could not upload to LightRAG: {str(e)}"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         elif content_type == KnowledgeContentOrigin.URL:
@@ -1213,7 +1271,7 @@ class Knowledge:
                 if reader is None:
                     log_error("No URL reader available")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 reader.chunk = False
@@ -1225,7 +1283,7 @@ class Knowledge:
                 if not read_documents:
                     log_error("No documents read from URL")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 if self.vector_db and hasattr(self.vector_db, "insert_text"):
@@ -1236,19 +1294,19 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support text insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             except Exception as e:
                 log_error(f"Error uploading file to LightRAG: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = f"Could not upload to LightRAG: {str(e)}"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         elif content_type == KnowledgeContentOrigin.CONTENT:
@@ -1269,11 +1327,11 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support file insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
             else:
                 log_warning(f"No file data available for LightRAG upload: {content.name}")
             return
@@ -1284,13 +1342,13 @@ class Knowledge:
             if content.reader is None:
                 log_error("No reader available for topic content")
                 content.status = ContentStatus.FAILED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             if not content.topics:
                 log_error("No topics available for content")
                 content.status = ContentStatus.FAILED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             read_documents = content.reader.read(content.topics)
@@ -1303,24 +1361,35 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support text insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             else:
                 log_warning(f"No documents found for LightRAG upload: {content.name}")
                 return
     def search(
-        self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
+        self,
+        query: str,
+        max_results: Optional[int] = None,
+        filters: Optional[Dict[str, Any]] = None,
+        search_type: Optional[str] = None,
     ) -> List[Document]:
         """Returns relevant documents matching a query"""
         from agno.vectordb import VectorDb
+        from agno.vectordb.search import SearchType
         self.vector_db = cast(VectorDb, self.vector_db)
+        if (
+            hasattr(self.vector_db, "search_type")
+            and isinstance(self.vector_db.search_type, SearchType)
+            and search_type
+        ):
+            self.vector_db.search_type = SearchType(search_type)
         try:
             if self.vector_db is None:
                 log_warning("No vector db provided")
@@ -1334,13 +1403,23 @@ class Knowledge:
             return []
     async def async_search(
-        self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
+        self,
+        query: str,
+        max_results: Optional[int] = None,
+        filters: Optional[Dict[str, Any]] = None,
+        search_type: Optional[str] = None,
     ) -> List[Document]:
         """Returns relevant documents matching a query"""
         from agno.vectordb import VectorDb
+        from agno.vectordb.search import SearchType
         self.vector_db = cast(VectorDb, self.vector_db)
+        if (
+            hasattr(self.vector_db, "search_type")
+            and isinstance(self.vector_db.search_type, SearchType)
+            and search_type
+        ):
+            self.vector_db.search_type = SearchType(search_type)
         try:
             if self.vector_db is None:
                 log_warning("No vector db provided")
@@ -1442,10 +1521,46 @@ class Knowledge:
     def patch_content(self, content: Content) -> Optional[Dict[str, Any]]:
         return self._update_content(content)
+    async def apatch_content(self, content: Content) -> Optional[Dict[str, Any]]:
+        return await self._aupdate_content(content)
     def get_content_by_id(self, content_id: str) -> Optional[Content]:
         if self.contents_db is None:
             raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            raise ValueError(
+                "get_content_by_id() is not supported for async databases. Please use aget_content_by_id() instead."
+            )
         content_row = self.contents_db.get_knowledge_content(content_id)
+        if content_row is None:
+            return None
+        content = Content(
+            id=content_row.id,
+            name=content_row.name,
+            description=content_row.description,
+            metadata=content_row.metadata,
+            file_type=content_row.type,
+            size=content_row.size,
+            status=ContentStatus(content_row.status) if content_row.status else None,
+            status_message=content_row.status_message,
+            created_at=content_row.created_at,
+            updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
+            external_id=content_row.external_id,
+        )
+        return content
+    async def aget_content_by_id(self, content_id: str) -> Optional[Content]:
+        if self.contents_db is None:
+            raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            content_row = await self.contents_db.get_knowledge_content(content_id)
+        else:
+            content_row = self.contents_db.get_knowledge_content(content_id)
         if content_row is None:
             return None
         content = Content(
@@ -1472,6 +1587,10 @@ class Knowledge:
     ) -> Tuple[List[Content], int]:
         if self.contents_db is None:
             raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            raise ValueError("get_content() is not supported for async databases. Please use aget_content() instead.")
         contents, count = self.contents_db.get_knowledge_contents(
             limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
         )
@@ -1495,9 +1614,53 @@ class Knowledge:
             result.append(content)
         return result, count
+    async def aget_content(
+        self,
+        limit: Optional[int] = None,
+        page: Optional[int] = None,
+        sort_by: Optional[str] = None,
+        sort_order: Optional[str] = None,
+    ) -> Tuple[List[Content], int]:
+        if self.contents_db is None:
+            raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            contents, count = await self.contents_db.get_knowledge_contents(
+                limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
+            )
+        else:
+            contents, count = self.contents_db.get_knowledge_contents(
+                limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
+            )
+        result = []
+        for content_row in contents:
+            # Create Content from database row
+            content = Content(
+                id=content_row.id,
+                name=content_row.name,
+                description=content_row.description,
+                metadata=content_row.metadata,
+                size=content_row.size,
+                file_type=content_row.type,
+                status=ContentStatus(content_row.status) if content_row.status else None,
+                status_message=content_row.status_message,
+                created_at=content_row.created_at,
+                updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
+                external_id=content_row.external_id,
+            )
+            result.append(content)
+        return result, count
     def get_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
         if self.contents_db is None:
             raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            raise ValueError(
+                "get_content_status() is not supported for async databases. Please use aget_content_status() instead."
+            )
         content_row = self.contents_db.get_knowledge_content(content_id)
         if content_row is None:
             return None, "Content not found"
@@ -1517,6 +1680,33 @@ class Knowledge:
         return status, content_row.status_message
+    async def aget_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
+        if self.contents_db is None:
+            raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            content_row = await self.contents_db.get_knowledge_content(content_id)
+        else:
+            content_row = self.contents_db.get_knowledge_content(content_id)
+        if content_row is None:
+            return None, "Content not found"
+        # Convert string status to enum, defaulting to PROCESSING if unknown
+        status_str = content_row.status
+        try:
+            status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
+        except ValueError:
+            # Handle legacy or unknown statuses
+            if status_str and "failed" in status_str.lower():
+                status = ContentStatus.FAILED
+            elif status_str and "completed" in status_str.lower():
+                status = ContentStatus.COMPLETED
+            else:
+                status = ContentStatus.PROCESSING
+        return status, content_row.status_message
     def remove_content_by_id(self, content_id: str):
         from agno.vectordb import VectorDb
@@ -1535,12 +1725,36 @@ class Knowledge:
         if self.contents_db is not None:
             self.contents_db.delete_knowledge_content(content_id)
+    async def aremove_content_by_id(self, content_id: str):
+        if self.vector_db is not None:
+            if self.vector_db.__class__.__name__ == "LightRag":
+                # For LightRAG, get the content first to find the external_id
+                content = await self.aget_content_by_id(content_id)
+                if content and content.external_id:
+                    self.vector_db.delete_by_external_id(content.external_id)  # type: ignore
+                else:
+                    log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
+            else:
+                self.vector_db.delete_by_content_id(content_id)
+        if self.contents_db is not None:
+            if isinstance(self.contents_db, AsyncBaseDb):
+                await self.contents_db.delete_knowledge_content(content_id)
+            else:
+                self.contents_db.delete_knowledge_content(content_id)
     def remove_all_content(self):
         contents, _ = self.get_content()
         for content in contents:
             if content.id is not None:
                 self.remove_content_by_id(content.id)
+    async def aremove_all_content(self):
+        contents, _ = await self.aget_content()
+        for content in contents:
+            if content.id is not None:
+                await self.aremove_content_by_id(content.id)
     # --- Reader Factory Integration ---
     def construct_readers(self):

agno/knowledge/reader/pdf_reader.py CHANGED Viewed

@@ -4,7 +4,8 @@ from pathlib import Path
 from typing import IO, Any, List, Optional, Tuple, Union
 from uuid import uuid4
-from agno.knowledge.chunking.strategy import ChunkingStrategyType
+from agno.knowledge.chunking.document import DocumentChunking
+from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
 from agno.knowledge.document.base import Document
 from agno.knowledge.reader.base import Reader
 from agno.knowledge.types import ContentType
@@ -183,6 +184,7 @@ class BasePDFReader(Reader):
         page_start_numbering_format: Optional[str] = None,
         page_end_numbering_format: Optional[str] = None,
         password: Optional[str] = None,
+        chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
         **kwargs,
     ):
         if page_start_numbering_format is None:
@@ -195,11 +197,7 @@ class BasePDFReader(Reader):
         self.page_end_numbering_format = page_end_numbering_format
         self.password = password
-        if self.chunking_strategy is None:
-            from agno.knowledge.chunking.document import DocumentChunking
-            self.chunking_strategy = DocumentChunking(chunk_size=5000)
-        super().__init__(**kwargs)
+        super().__init__(chunking_strategy=chunking_strategy, **kwargs)
     @classmethod
     def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:

agno/knowledge/reader/reader_factory.py CHANGED Viewed

@@ -16,8 +16,7 @@ class ReaderFactory:
         from agno.knowledge.reader.pdf_reader import PDFReader
         config: Dict[str, Any] = {
-            "chunk": True,
-            "chunk_size": 100,
+            "name": "PDF Reader",
             "description": "Processes PDF documents with OCR support for images and text extraction",
         }
         config.update(kwargs)
@@ -201,7 +200,7 @@ class ReaderFactory:
             return cls.create_reader("pdf")
         elif extension in [".csv", "text/csv"]:
             return cls.create_reader("csv")
-        elif extension in [".docx", ".doc"]:
+        elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
             return cls.create_reader("docx")
         elif extension == ".json":
             return cls.create_reader("json")

agno 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl

agno 2.1.4py3-none-any.whl → 2.1.5py3-none-any.whl