PyPI - agno - Versions diffs - 1.7.9__py3-none-any.whl → 1.7.11__py3-none-any.whl - Mend

agno 1.7.9py3-none-any.whl → 1.7.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

agno/agent/agent.py +1 -1
agno/app/fastapi/app.py +3 -1
agno/app/fastapi/async_router.py +1 -1
agno/app/playground/app.py +1 -0
agno/document/chunking/semantic.py +1 -3
agno/document/reader/markdown_reader.py +2 -7
agno/document/reader/pdf_reader.py +69 -13
agno/document/reader/text_reader.py +2 -2
agno/knowledge/agent.py +70 -75
agno/knowledge/markdown.py +15 -2
agno/knowledge/pdf.py +32 -8
agno/knowledge/pdf_url.py +13 -5
agno/knowledge/website.py +4 -1
agno/media.py +2 -0
agno/models/aws/bedrock.py +51 -21
agno/models/dashscope/__init__.py +5 -0
agno/models/dashscope/dashscope.py +81 -0
agno/models/openai/chat.py +3 -0
agno/models/openai/responses.py +53 -7
agno/models/qwen/__init__.py +5 -0
agno/run/response.py +4 -0
agno/run/team.py +4 -0
agno/storage/in_memory.py +234 -0
agno/team/team.py +25 -9
agno/tools/brandfetch.py +210 -0
agno/tools/github.py +46 -18
agno/tools/trafilatura.py +372 -0
agno/vectordb/clickhouse/clickhousedb.py +1 -1
agno/vectordb/milvus/milvus.py +89 -1
agno/vectordb/weaviate/weaviate.py +84 -18
agno/workflow/workflow.py +3 -0
{agno-1.7.9.dist-info → agno-1.7.11.dist-info}/METADATA +5 -1
{agno-1.7.9.dist-info → agno-1.7.11.dist-info}/RECORD +37 -31
{agno-1.7.9.dist-info → agno-1.7.11.dist-info}/WHEEL +0 -0
{agno-1.7.9.dist-info → agno-1.7.11.dist-info}/entry_points.txt +0 -0
{agno-1.7.9.dist-info → agno-1.7.11.dist-info}/licenses/LICENSE +0 -0
{agno-1.7.9.dist-info → agno-1.7.11.dist-info}/top_level.txt +0 -0

agno/agent/agent.py CHANGED Viewed

@@ -6872,7 +6872,7 @@ class Agent:
             document_name = query.replace(" ", "_").replace("?", "").replace("!", "").replace(".", "")
         document_content = json.dumps({"query": query, "result": result})
         log_info(f"Adding document to knowledge base: {document_name}: {document_content}")
-        self.knowledge.add_document_to_knowledge_base(
+        self.knowledge.load_document(
             document=Document(
                 name=document_name,
                 content=document_content,

agno/app/fastapi/app.py CHANGED Viewed

@@ -81,6 +81,7 @@ class FastAPIApp(BaseAPIApp):
                     workflow.app_id = self.app_id
                 if not workflow.workflow_id:
                     workflow.workflow_id = generate_id(workflow.name)
+                workflow.initialize_workflow()
     def get_router(self) -> APIRouter:
         return get_sync_router(agents=self.agents, teams=self.teams, workflows=self.workflows)
@@ -95,6 +96,7 @@ class FastAPIApp(BaseAPIApp):
         host: str = "localhost",
         port: int = 7777,
         reload: bool = False,
+        workers: Optional[int] = None,
         **kwargs,
     ):
         self.set_app_id()
@@ -102,4 +104,4 @@ class FastAPIApp(BaseAPIApp):
         log_info(f"Starting API on {host}:{port}")
-        uvicorn.run(app=app, host=host, port=port, reload=reload, **kwargs)
+        uvicorn.run(app=app, host=host, port=port, reload=reload, workers=workers, **kwargs)

agno/app/fastapi/async_router.py CHANGED Viewed

@@ -231,7 +231,7 @@ def get_async_router(
         return base64_images, base64_audios, base64_videos
-    def team_process_file(
+    async def team_process_file(
         files: List[UploadFile],
     ):
         base64_images: List[Image] = []

agno/app/playground/app.py CHANGED Viewed

@@ -87,6 +87,7 @@ class Playground:
                     workflow.app_id = self.app_id
                 if not workflow.workflow_id:
                     workflow.workflow_id = generate_id(workflow.name)
+                workflow.initialize_workflow()
     def set_app_id(self) -> str:
         # If app_id is already set, keep it instead of overriding with UUID

agno/document/chunking/semantic.py CHANGED Viewed

@@ -14,9 +14,7 @@ except ImportError:
 class SemanticChunking(ChunkingStrategy):
     """Chunking strategy that splits text into semantic chunks using chonkie"""
-    def __init__(
-        self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: Optional[float] = 0.5
-    ):
+    def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
         self.embedder = embedder or OpenAIEmbedder(id="text-embedding-3-small")  # type: ignore
         self.chunk_size = chunk_size
         self.similarity_threshold = similarity_threshold

agno/document/reader/markdown_reader.py CHANGED Viewed

@@ -1,11 +1,9 @@
 import asyncio
 import uuid
 from pathlib import Path
-from typing import IO, Any, List, Optional, Union
+from typing import IO, Any, List, Union
 from agno.document.base import Document
-from agno.document.chunking.markdown import MarkdownChunking
-from agno.document.chunking.strategy import ChunkingStrategy
 from agno.document.reader.base import Reader
 from agno.utils.log import log_info, logger
@@ -13,9 +11,6 @@ from agno.utils.log import log_info, logger
 class MarkdownReader(Reader):
     """Reader for Markdown files"""
-    def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = MarkdownChunking()) -> None:
-        super().__init__(chunking_strategy=chunking_strategy)
     def read(self, file: Union[Path, IO[Any]]) -> List[Document]:
         try:
             if isinstance(file, Path):
@@ -30,7 +25,7 @@ class MarkdownReader(Reader):
                 file.seek(0)
                 file_contents = file.read().decode("utf-8")
-            documents = [Document(name=file_name, id=str({uuid.uuid4()}), content=file_contents)]
+            documents = [Document(name=file_name, id=str(uuid.uuid4()), content=file_contents)]
             if self.chunk:
                 chunked_documents = []
                 for document in documents:

agno/document/reader/pdf_reader.py CHANGED Viewed

@@ -7,7 +7,7 @@ from uuid import uuid4
 from agno.document.base import Document
 from agno.document.reader.base import Reader
 from agno.utils.http import async_fetch_with_retry, fetch_with_retry
-from agno.utils.log import log_info, logger
+from agno.utils.log import log_error, log_info, logger
 try:
     from pypdf import PdfReader as DocumentReader  # noqa: F401
@@ -177,6 +177,7 @@ class BasePDFReader(Reader):
         split_on_pages: bool = True,
         page_start_numbering_format: Optional[str] = None,
         page_end_numbering_format: Optional[str] = None,
+        password: Optional[str] = None,
         **kwargs,
     ):
         if page_start_numbering_format is None:
@@ -187,6 +188,7 @@ class BasePDFReader(Reader):
         self.split_on_pages = split_on_pages
         self.page_start_numbering_format = page_start_numbering_format
         self.page_end_numbering_format = page_end_numbering_format
+        self.password = password
         super().__init__(**kwargs)
@@ -196,6 +198,28 @@ class BasePDFReader(Reader):
             chunked_documents.extend(self.chunk_document(document))
         return chunked_documents
+    def _decrypt_pdf(self, doc_reader: DocumentReader, doc_name: str, password: Optional[str] = None) -> bool:
+        if not doc_reader.is_encrypted:
+            return True
+        # Use provided password or fall back to instance password
+        pdf_password = password or self.password
+        if not pdf_password:
+            logger.error(f"PDF {doc_name} is password protected but no password provided")
+            return False
+        try:
+            decrypted_pdf = doc_reader.decrypt(pdf_password)
+            if decrypted_pdf:
+                log_info(f"Successfully decrypted PDF {doc_name} with user password")
+                return True
+            else:
+                log_error(f"Failed to decrypt PDF {doc_name}: incorrect password")
+                return False
+        except Exception as e:
+            log_error(f"Error decrypting PDF {doc_name}: {e}")
+            return False
     def _create_documents(self, pdf_content: List[str], doc_name: str, use_uuid_for_id: bool, page_number_shift):
         if self.split_on_pages:
             shift = page_number_shift if page_number_shift is not None else 1
@@ -282,7 +306,7 @@ class BasePDFReader(Reader):
 class PDFReader(BasePDFReader):
     """Reader for PDF files"""
-    def read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
+    def read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
         try:
             if isinstance(pdf, str):
                 doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
@@ -299,10 +323,14 @@ class PDFReader(BasePDFReader):
             logger.error(f"Error reading PDF: {e}")
             return []
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
-    async def async_read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
+    async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
         try:
             if isinstance(pdf, str):
                 doc_name = pdf.split("/")[-1].split(".")[0].replace(" ", "_")
@@ -319,6 +347,10 @@ class PDFReader(BasePDFReader):
             logger.error(f"Error reading PDF: {e}")
             return []
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=True)
@@ -326,11 +358,11 @@ class PDFReader(BasePDFReader):
 class PDFUrlReader(BasePDFReader):
     """Reader for PDF files from URL"""
-    def __init__(self, proxy: Optional[str] = None, **kwargs):
-        super().__init__(**kwargs)
+    def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
+        super().__init__(password=password, **kwargs)
         self.proxy = proxy
-    def read(self, url: str) -> List[Document]:
+    def read(self, url: str, password: Optional[str] = None) -> List[Document]:
         if not url:
             raise ValueError("No url provided")
@@ -344,10 +376,14 @@ class PDFUrlReader(BasePDFReader):
         doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
         pdf_reader = DocumentReader(BytesIO(response.content))
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return self._pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
-    async def async_read(self, url: str) -> List[Document]:
+    async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
         if not url:
             raise ValueError("No url provided")
@@ -364,6 +400,10 @@ class PDFUrlReader(BasePDFReader):
         doc_name = url.split("/")[-1].split(".")[0].replace("/", "_").replace(" ", "_")
         pdf_reader = DocumentReader(BytesIO(response.content))
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, use_uuid_for_id=False)
@@ -371,7 +411,7 @@ class PDFUrlReader(BasePDFReader):
 class PDFImageReader(BasePDFReader):
     """Reader for PDF files with text and images extraction"""
-    def read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
+    def read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
         if not pdf:
             raise ValueError("No pdf provided")
@@ -386,10 +426,14 @@ class PDFImageReader(BasePDFReader):
         log_info(f"Reading: {doc_name}")
         pdf_reader = DocumentReader(pdf)
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
-    async def async_read(self, pdf: Union[str, Path, IO[Any]]) -> List[Document]:
+    async def async_read(self, pdf: Union[str, Path, IO[Any]], password: Optional[str] = None) -> List[Document]:
         if not pdf:
             raise ValueError("No pdf provided")
@@ -404,6 +448,10 @@ class PDFImageReader(BasePDFReader):
         log_info(f"Reading: {doc_name}")
         pdf_reader = DocumentReader(pdf)
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
@@ -411,11 +459,11 @@ class PDFImageReader(BasePDFReader):
 class PDFUrlImageReader(BasePDFReader):
     """Reader for PDF files from URL with text and images extraction"""
-    def __init__(self, proxy: Optional[str] = None, **kwargs):
-        super().__init__(**kwargs)
+    def __init__(self, proxy: Optional[str] = None, password: Optional[str] = None, **kwargs):
+        super().__init__(password=password, **kwargs)
         self.proxy = proxy
-    def read(self, url: str) -> List[Document]:
+    def read(self, url: str, password: Optional[str] = None) -> List[Document]:
         if not url:
             raise ValueError("No url provided")
@@ -430,10 +478,14 @@ class PDFUrlImageReader(BasePDFReader):
         doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
         pdf_reader = DocumentReader(BytesIO(response.content))
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return self._pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)
-    async def async_read(self, url: str) -> List[Document]:
+    async def async_read(self, url: str, password: Optional[str] = None) -> List[Document]:
         if not url:
             raise ValueError("No url provided")
@@ -451,5 +503,9 @@ class PDFUrlImageReader(BasePDFReader):
         doc_name = url.split("/")[-1].split(".")[0].replace(" ", "_")
         pdf_reader = DocumentReader(BytesIO(response.content))
+        # Handle PDF decryption
+        if not self._decrypt_pdf(pdf_reader, doc_name, password):
+            return []
         # Read and chunk.
         return await self._async_pdf_reader_to_documents(pdf_reader, doc_name, read_images=True, use_uuid_for_id=False)

agno/document/reader/text_reader.py CHANGED Viewed

@@ -28,7 +28,7 @@ class TextReader(Reader):
             documents = [
                 Document(
                     name=file_name,
-                    id=str({uuid.uuid4()}),
+                    id=str(uuid.uuid4()),
                     content=file_contents,
                 )
             ]
@@ -67,7 +67,7 @@ class TextReader(Reader):
             document = Document(
                 name=file_name,
-                id=str({uuid.uuid4()}),
+                id=str(uuid.uuid4()),
                 content=file_contents,
             )

agno/knowledge/agent.py CHANGED Viewed

@@ -2,7 +2,7 @@ import asyncio
 from pathlib import Path
 from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Set, Tuple
-from pydantic import BaseModel, ConfigDict, Field, model_validator
+from pydantic import BaseModel, ConfigDict, model_validator
 from agno.document import Document
 from agno.document.chunking.fixed import FixedSizeChunking
@@ -24,8 +24,7 @@ class AgentKnowledge(BaseModel):
     # Number of documents to optimize the vector db on
     optimize_on: Optional[int] = 1000
-    chunking_strategy: ChunkingStrategy = Field(default_factory=FixedSizeChunking)
+    chunking_strategy: Optional[ChunkingStrategy] = None
     model_config = ConfigDict(arbitrary_types_allowed=True)
     valid_metadata_filters: Set[str] = None  # type: ignore
@@ -33,7 +32,7 @@ class AgentKnowledge(BaseModel):
     @model_validator(mode="after")
     def update_reader(self) -> "AgentKnowledge":
         if self.reader is not None and self.reader.chunking_strategy is None:
-            self.reader.chunking_strategy = self.chunking_strategy
+            self.reader.chunking_strategy = self.chunking_strategy or FixedSizeChunking()
         return self
     @property
@@ -50,6 +49,53 @@ class AgentKnowledge(BaseModel):
         """
         raise NotImplementedError
+    def _upsert_warning(self, upsert) -> None:
+        """Log a warning if upsert is not available"""
+        if upsert and self.vector_db is not None and not self.vector_db.upsert_available():
+            log_info(
+                f"Vector db '{self.vector_db.__class__.__module__}' does not support upsert. Falling back to insert."
+            )
+    def _load_init(self, recreate: bool, upsert: bool) -> None:
+        """Initial setup for loading knowledge base"""
+        if self.vector_db is None:
+            logger.warning("No vector db provided")
+            return
+        if recreate:
+            log_info("Dropping collection")
+            self.vector_db.drop()
+        if not self.vector_db.exists():
+            log_info("Creating collection")
+            self.vector_db.create()
+        self._upsert_warning(upsert)
+    async def _aload_init(self, recreate: bool, upsert: bool) -> None:
+        """Initial async setup for loading knowledge base"""
+        if self.vector_db is None:
+            logger.warning("No vector db provided")
+            return
+        if recreate:
+            log_info("Dropping collection")
+            try:
+                await self.vector_db.async_drop()
+            except NotImplementedError:
+                logger.warning("Vector db does not support async drop, falling back to sync drop")
+                self.vector_db.drop()
+        if not self.vector_db.exists():
+            log_info("Creating collection")
+            try:
+                await self.vector_db.async_create()
+            except NotImplementedError:
+                logger.warning("Vector db does not support async create, falling back to sync create")
+                self.vector_db.create()
+        self._upsert_warning(upsert)
     def search(
         self, query: str, num_documents: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
     ) -> List[Document]:
@@ -80,7 +126,7 @@ class AgentKnowledge(BaseModel):
             try:
                 return await self.vector_db.async_search(query=query, limit=_num_documents, filters=filters)
             except NotImplementedError:
-                logger.info("Vector db does not support async search")
+                log_info("Vector db does not support async search")
                 return self.search(query=query, num_documents=_num_documents, filters=filters)
         except Exception as e:
             logger.error(f"Error searching for documents: {e}")
@@ -99,18 +145,10 @@ class AgentKnowledge(BaseModel):
             upsert (bool): If True, upserts documents to the vector db. Defaults to False.
             skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
         """
+        self._load_init(recreate, upsert)
         if self.vector_db is None:
-            logger.warning("No vector db provided")
             return
-        if recreate:
-            log_info("Dropping collection")
-            self.vector_db.drop()
-        if not self.vector_db.exists():
-            log_info("Creating collection")
-            self.vector_db.create()
         log_info("Loading knowledge base")
         num_documents = 0
         for document_list in self.document_lists:
@@ -123,8 +161,7 @@ class AgentKnowledge(BaseModel):
             # Upsert documents if upsert is True and vector db supports upsert
             if upsert and self.vector_db.upsert_available():
-                for doc in document_list:
-                    self.vector_db.upsert(documents=[doc], filters=doc.meta_data)
+                self.vector_db.upsert(documents=documents_to_load, filters=doc.meta_data)
             # Insert documents
             else:
                 # Filter out documents which already exist in the vector db
@@ -133,11 +170,10 @@ class AgentKnowledge(BaseModel):
                     documents_to_load = self.filter_existing_documents(document_list)
                 if documents_to_load:
-                    for doc in documents_to_load:
-                        self.vector_db.insert(documents=[doc], filters=doc.meta_data)
+                    self.vector_db.insert(documents=documents_to_load, filters=doc.meta_data)
             num_documents += len(documents_to_load)
-            log_info(f"Added {len(documents_to_load)} documents to knowledge base")
+        log_info(f"Added {num_documents} documents to knowledge base")
     async def aload(
         self,
@@ -152,19 +188,10 @@ class AgentKnowledge(BaseModel):
             upsert (bool): If True, upserts documents to the vector db. Defaults to False.
             skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
         """
+        await self._aload_init(recreate, upsert)
         if self.vector_db is None:
-            logger.warning("No vector db provided")
             return
-        if recreate:
-            log_info("Dropping collection")
-            await self.vector_db.async_drop()
-        if not await self.vector_db.async_exists():
-            log_info("Creating collection")
-            await self.vector_db.async_create()
         log_info("Loading knowledge base")
         num_documents = 0
         document_iterator = self.async_document_lists
@@ -177,8 +204,7 @@ class AgentKnowledge(BaseModel):
             # Upsert documents if upsert is True and vector db supports upsert
             if upsert and self.vector_db.upsert_available():
-                for doc in document_list:
-                    await self.vector_db.async_upsert(documents=[doc], filters=doc.meta_data)
+                await self.vector_db.async_upsert(documents=documents_to_load, filters=doc.meta_data)
             # Insert documents
             else:
                 # Filter out documents which already exist in the vector db
@@ -187,11 +213,10 @@ class AgentKnowledge(BaseModel):
                     documents_to_load = await self.async_filter_existing_documents(document_list)
                 if documents_to_load:
-                    for doc in documents_to_load:
-                        await self.vector_db.async_insert(documents=[doc], filters=doc.meta_data)
+                    await self.vector_db.async_insert(documents=documents_to_load, filters=doc.meta_data)
             num_documents += len(documents_to_load)
-            log_info(f"Added {len(documents_to_load)} documents to knowledge base")
+        log_info(f"Added {num_documents} documents to knowledge base")
     def load_documents(
         self,
@@ -208,15 +233,11 @@ class AgentKnowledge(BaseModel):
             skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
             filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
         """
-        log_info("Loading knowledge base")
+        self._load_init(recreate=False, upsert=upsert)
         if self.vector_db is None:
-            logger.warning("No vector db provided")
             return
-        log_debug("Creating collection")
-        self.vector_db.create()
+        log_info("Loading knowledge base")
         # Upsert documents if upsert is True
         if upsert and self.vector_db.upsert_available():
             self.vector_db.upsert(documents=documents, filters=filters)
@@ -251,17 +272,11 @@ class AgentKnowledge(BaseModel):
             skip_existing (bool): If True, skips documents which already exist in the vector db when inserting. Defaults to True.
             filters (Optional[Dict[str, Any]]): Filters to add to each row that can be used to limit results during querying. Defaults to None.
         """
-        log_info("Loading knowledge base")
+        await self._aload_init(recreate=False, upsert=upsert)
         if self.vector_db is None:
-            logger.warning("No vector db provided")
             return
-        log_debug("Creating collection")
-        try:
-            await self.vector_db.async_create()
-        except NotImplementedError:
-            logger.warning("Vector db does not support async create")
-            self.vector_db.create()
+        log_info("Loading knowledge base")
         # Upsert documents if upsert is True
         if upsert and self.vector_db.upsert_available():
@@ -302,7 +317,7 @@ class AgentKnowledge(BaseModel):
             else:
                 log_info("No new documents to load")
-    def add_document_to_knowledge_base(
+    def load_document(
         self,
         document: Document,
         upsert: bool = False,
@@ -414,8 +429,6 @@ class AgentKnowledge(BaseModel):
         Returns:
             List[Document]: Filtered list of documents that don't exist in the database
         """
-        from agno.utils.log import log_debug, log_info
         if not self.vector_db:
             log_debug("No vector database configured, skipping document filtering")
             return documents
@@ -556,20 +569,9 @@ class AgentKnowledge(BaseModel):
             self._track_metadata_structure(metadata)
         # 3. Prepare vector DB
+        self._load_init(recreate, upsert=False)
         if self.vector_db is None:
-            logger.warning("Cannot load file: No vector db provided.")
             return False
-        # Recreate collection if requested
-        if recreate:
-            # log_info(f"Recreating collection.")
-            self.vector_db.drop()
-        # Create collection if it doesn't exist
-        if not self.vector_db.exists():
-            # log_info(f"Collection does not exist. Creating.")
-            self.vector_db.create()
         return True
     async def aprepare_load(
@@ -604,20 +606,9 @@ class AgentKnowledge(BaseModel):
             self._track_metadata_structure(metadata)
         # 3. Prepare vector DB
+        await self._aload_init(recreate, upsert=False)
         if self.vector_db is None:
-            logger.warning("Cannot load file: No vector db provided.")
             return False
-        # Recreate collection if requested
-        if recreate:
-            log_info("Recreating collection.")
-            await self.vector_db.async_drop()
-        # Create collection if it doesn't exist
-        if not await self.vector_db.async_exists():
-            log_info("Collection does not exist. Creating.")
-            await self.vector_db.async_create()
         return True
     def process_documents(
@@ -642,6 +633,8 @@ class AgentKnowledge(BaseModel):
         log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
+        self._upsert_warning(upsert)
         # Decide loading strategy: upsert or insert (with optional skip)
         if upsert and self.vector_db.upsert_available():  # type: ignore
             log_debug(f"Upserting {len(documents)} documents.")  # type: ignore
@@ -681,6 +674,8 @@ class AgentKnowledge(BaseModel):
             logger.warning(f"No documents were read from {source_info}")
             return
+        self._upsert_warning(upsert)
         log_info(f"Loading {len(documents)} documents from {source_info} with metadata: {metadata}")
         # Decide loading strategy: upsert or insert (with optional skip)

agno/knowledge/markdown.py CHANGED Viewed

@@ -1,7 +1,10 @@
 from pathlib import Path
-from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union
+from typing import Any, AsyncIterator, Dict, Iterator, List, Optional, Union, cast
+from pydantic import model_validator
 from agno.document import Document
+from agno.document.chunking.markdown import MarkdownChunking
 from agno.document.reader.markdown_reader import MarkdownReader
 from agno.knowledge.agent import AgentKnowledge
 from agno.utils.log import log_info, logger
@@ -10,11 +13,18 @@ from agno.utils.log import log_info, logger
 class MarkdownKnowledgeBase(AgentKnowledge):
     path: Optional[Union[str, Path, List[Dict[str, Union[str, Dict[str, Any]]]]]] = None
     formats: List[str] = [".md"]
-    reader: MarkdownReader = MarkdownReader()
+    reader: Optional[MarkdownReader] = None
+    @model_validator(mode="after")
+    def set_reader(self) -> "MarkdownKnowledgeBase":
+        if self.reader is None:
+            self.reader = MarkdownReader(chunking_strategy=self.chunking_strategy or MarkdownChunking())
+        return self
     @property
     def document_lists(self) -> Iterator[List[Document]]:
         """Iterate over text files and yield lists of documents."""
+        self.reader = cast(MarkdownReader, self.reader)
         if self.path is None:
             raise ValueError("Path is not set")
@@ -49,6 +59,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
     @property
     async def async_document_lists(self) -> AsyncIterator[List[Document]]:
         """Iterate over text files and yield lists of documents asynchronously."""
+        self.reader = cast(MarkdownReader, self.reader)
         if self.path is None:
             raise ValueError("Path is not set")
@@ -85,6 +96,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
         skip_existing: bool = True,
     ) -> None:
         """Load documents from a single text file with specific metadata into the vector DB."""
+        self.reader = cast(MarkdownReader, self.reader)
         _file_path = Path(path) if isinstance(path, str) else path
@@ -117,6 +129,7 @@ class MarkdownKnowledgeBase(AgentKnowledge):
         skip_existing: bool = True,
     ) -> None:
         """Load documents from a single text file with specific metadata into the vector DB."""
+        self.reader = cast(MarkdownReader, self.reader)
         _file_path = Path(path) if isinstance(path, str) else path

agno 1.7.9__py3-none-any.whl → 1.7.11__py3-none-any.whl

agno 1.7.9py3-none-any.whl → 1.7.11py3-none-any.whl