PyPI - agno - Versions diffs - 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl - Mend

agno 2.0.1py3-none-any.whl → 2.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (314) hide show

agno/agent/agent.py +6015 -2823
agno/api/api.py +2 -0
agno/api/os.py +1 -1
agno/culture/__init__.py +3 -0
agno/culture/manager.py +956 -0
agno/db/async_postgres/__init__.py +3 -0
agno/db/base.py +385 -6
agno/db/dynamo/dynamo.py +388 -81
agno/db/dynamo/schemas.py +47 -10
agno/db/dynamo/utils.py +63 -4
agno/db/firestore/firestore.py +435 -64
agno/db/firestore/schemas.py +11 -0
agno/db/firestore/utils.py +102 -4
agno/db/gcs_json/gcs_json_db.py +384 -42
agno/db/gcs_json/utils.py +60 -26
agno/db/in_memory/in_memory_db.py +351 -66
agno/db/in_memory/utils.py +60 -2
agno/db/json/json_db.py +339 -48
agno/db/json/utils.py +60 -26
agno/db/migrations/manager.py +199 -0
agno/db/migrations/v1_to_v2.py +510 -37
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +938 -0
agno/db/mongo/__init__.py +15 -1
agno/db/mongo/async_mongo.py +2036 -0
agno/db/mongo/mongo.py +653 -76
agno/db/mongo/schemas.py +13 -0
agno/db/mongo/utils.py +80 -8
agno/db/mysql/mysql.py +687 -25
agno/db/mysql/schemas.py +61 -37
agno/db/mysql/utils.py +60 -2
agno/db/postgres/__init__.py +2 -1
agno/db/postgres/async_postgres.py +2001 -0
agno/db/postgres/postgres.py +676 -57
agno/db/postgres/schemas.py +43 -18
agno/db/postgres/utils.py +164 -2
agno/db/redis/redis.py +344 -38
agno/db/redis/schemas.py +18 -0
agno/db/redis/utils.py +60 -2
agno/db/schemas/__init__.py +2 -1
agno/db/schemas/culture.py +120 -0
agno/db/schemas/memory.py +13 -0
agno/db/singlestore/schemas.py +26 -1
agno/db/singlestore/singlestore.py +687 -53
agno/db/singlestore/utils.py +60 -2
agno/db/sqlite/__init__.py +2 -1
agno/db/sqlite/async_sqlite.py +2371 -0
agno/db/sqlite/schemas.py +24 -0
agno/db/sqlite/sqlite.py +774 -85
agno/db/sqlite/utils.py +168 -5
agno/db/surrealdb/__init__.py +3 -0
agno/db/surrealdb/metrics.py +292 -0
agno/db/surrealdb/models.py +309 -0
agno/db/surrealdb/queries.py +71 -0
agno/db/surrealdb/surrealdb.py +1361 -0
agno/db/surrealdb/utils.py +147 -0
agno/db/utils.py +50 -22
agno/eval/accuracy.py +50 -43
agno/eval/performance.py +6 -3
agno/eval/reliability.py +6 -3
agno/eval/utils.py +33 -16
agno/exceptions.py +68 -1
agno/filters.py +354 -0
agno/guardrails/__init__.py +6 -0
agno/guardrails/base.py +19 -0
agno/guardrails/openai.py +144 -0
agno/guardrails/pii.py +94 -0
agno/guardrails/prompt_injection.py +52 -0
agno/integrations/discord/client.py +1 -0
agno/knowledge/chunking/agentic.py +13 -10
agno/knowledge/chunking/fixed.py +1 -1
agno/knowledge/chunking/semantic.py +40 -8
agno/knowledge/chunking/strategy.py +59 -15
agno/knowledge/embedder/aws_bedrock.py +9 -4
agno/knowledge/embedder/azure_openai.py +54 -0
agno/knowledge/embedder/base.py +2 -0
agno/knowledge/embedder/cohere.py +184 -5
agno/knowledge/embedder/fastembed.py +1 -1
agno/knowledge/embedder/google.py +79 -1
agno/knowledge/embedder/huggingface.py +9 -4
agno/knowledge/embedder/jina.py +63 -0
agno/knowledge/embedder/mistral.py +78 -11
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/ollama.py +13 -0
agno/knowledge/embedder/openai.py +37 -65
agno/knowledge/embedder/sentence_transformer.py +8 -4
agno/knowledge/embedder/vllm.py +262 -0
agno/knowledge/embedder/voyageai.py +69 -16
agno/knowledge/knowledge.py +594 -186
agno/knowledge/reader/base.py +9 -2
agno/knowledge/reader/csv_reader.py +8 -10
agno/knowledge/reader/docx_reader.py +5 -6
agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
agno/knowledge/reader/json_reader.py +6 -5
agno/knowledge/reader/markdown_reader.py +13 -13
agno/knowledge/reader/pdf_reader.py +43 -68
agno/knowledge/reader/pptx_reader.py +101 -0
agno/knowledge/reader/reader_factory.py +51 -6
agno/knowledge/reader/s3_reader.py +3 -15
agno/knowledge/reader/tavily_reader.py +194 -0
agno/knowledge/reader/text_reader.py +13 -13
agno/knowledge/reader/web_search_reader.py +2 -43
agno/knowledge/reader/website_reader.py +43 -25
agno/knowledge/reranker/__init__.py +2 -8
agno/knowledge/types.py +9 -0
agno/knowledge/utils.py +20 -0
agno/media.py +72 -0
agno/memory/manager.py +336 -82
agno/models/aimlapi/aimlapi.py +2 -2
agno/models/anthropic/claude.py +183 -37
agno/models/aws/bedrock.py +52 -112
agno/models/aws/claude.py +33 -1
agno/models/azure/ai_foundry.py +33 -15
agno/models/azure/openai_chat.py +25 -8
agno/models/base.py +999 -519
agno/models/cerebras/cerebras.py +19 -13
agno/models/cerebras/cerebras_openai.py +8 -5
agno/models/cohere/chat.py +27 -1
agno/models/cometapi/__init__.py +5 -0
agno/models/cometapi/cometapi.py +57 -0
agno/models/dashscope/dashscope.py +1 -0
agno/models/deepinfra/deepinfra.py +2 -2
agno/models/deepseek/deepseek.py +2 -2
agno/models/fireworks/fireworks.py +2 -2
agno/models/google/gemini.py +103 -31
agno/models/groq/groq.py +28 -11
agno/models/huggingface/huggingface.py +2 -1
agno/models/internlm/internlm.py +2 -2
agno/models/langdb/langdb.py +4 -4
agno/models/litellm/chat.py +18 -1
agno/models/litellm/litellm_openai.py +2 -2
agno/models/llama_cpp/__init__.py +5 -0
agno/models/llama_cpp/llama_cpp.py +22 -0
agno/models/message.py +139 -0
agno/models/meta/llama.py +27 -10
agno/models/meta/llama_openai.py +5 -17
agno/models/nebius/nebius.py +6 -6
agno/models/nexus/__init__.py +3 -0
agno/models/nexus/nexus.py +22 -0
agno/models/nvidia/nvidia.py +2 -2
agno/models/ollama/chat.py +59 -5
agno/models/openai/chat.py +69 -29
agno/models/openai/responses.py +103 -106
agno/models/openrouter/openrouter.py +41 -3
agno/models/perplexity/perplexity.py +4 -5
agno/models/portkey/portkey.py +3 -3
agno/models/requesty/__init__.py +5 -0
agno/models/requesty/requesty.py +52 -0
agno/models/response.py +77 -1
agno/models/sambanova/sambanova.py +2 -2
agno/models/siliconflow/__init__.py +5 -0
agno/models/siliconflow/siliconflow.py +25 -0
agno/models/together/together.py +2 -2
agno/models/utils.py +254 -8
agno/models/vercel/v0.py +2 -2
agno/models/vertexai/__init__.py +0 -0
agno/models/vertexai/claude.py +96 -0
agno/models/vllm/vllm.py +1 -0
agno/models/xai/xai.py +3 -2
agno/os/app.py +543 -178
agno/os/auth.py +24 -14
agno/os/config.py +1 -0
agno/os/interfaces/__init__.py +1 -0
agno/os/interfaces/a2a/__init__.py +3 -0
agno/os/interfaces/a2a/a2a.py +42 -0
agno/os/interfaces/a2a/router.py +250 -0
agno/os/interfaces/a2a/utils.py +924 -0
agno/os/interfaces/agui/agui.py +23 -7
agno/os/interfaces/agui/router.py +27 -3
agno/os/interfaces/agui/utils.py +242 -142
agno/os/interfaces/base.py +6 -2
agno/os/interfaces/slack/router.py +81 -23
agno/os/interfaces/slack/slack.py +29 -14
agno/os/interfaces/whatsapp/router.py +11 -4
agno/os/interfaces/whatsapp/whatsapp.py +14 -7
agno/os/mcp.py +111 -54
agno/os/middleware/__init__.py +7 -0
agno/os/middleware/jwt.py +233 -0
agno/os/router.py +556 -139
agno/os/routers/evals/evals.py +71 -34
agno/os/routers/evals/schemas.py +31 -31
agno/os/routers/evals/utils.py +6 -5
agno/os/routers/health.py +31 -0
agno/os/routers/home.py +52 -0
agno/os/routers/knowledge/knowledge.py +185 -38
agno/os/routers/knowledge/schemas.py +82 -22
agno/os/routers/memory/memory.py +158 -53
agno/os/routers/memory/schemas.py +20 -16
agno/os/routers/metrics/metrics.py +20 -8
agno/os/routers/metrics/schemas.py +16 -16
agno/os/routers/session/session.py +499 -38
agno/os/schema.py +308 -198
agno/os/utils.py +401 -41
agno/reasoning/anthropic.py +80 -0
agno/reasoning/azure_ai_foundry.py +2 -2
agno/reasoning/deepseek.py +2 -2
agno/reasoning/default.py +3 -1
agno/reasoning/gemini.py +73 -0
agno/reasoning/groq.py +2 -2
agno/reasoning/ollama.py +2 -2
agno/reasoning/openai.py +7 -2
agno/reasoning/vertexai.py +76 -0
agno/run/__init__.py +6 -0
agno/run/agent.py +248 -94
agno/run/base.py +44 -5
agno/run/team.py +238 -97
agno/run/workflow.py +144 -33
agno/session/agent.py +105 -89
agno/session/summary.py +65 -25
agno/session/team.py +176 -96
agno/session/workflow.py +406 -40
agno/team/team.py +3854 -1610
agno/tools/dalle.py +2 -4
agno/tools/decorator.py +4 -2
agno/tools/duckduckgo.py +15 -11
agno/tools/e2b.py +14 -7
agno/tools/eleven_labs.py +23 -25
agno/tools/exa.py +21 -16
agno/tools/file.py +153 -23
agno/tools/file_generation.py +350 -0
agno/tools/firecrawl.py +4 -4
agno/tools/function.py +250 -30
agno/tools/gmail.py +238 -14
agno/tools/google_drive.py +270 -0
agno/tools/googlecalendar.py +36 -8
agno/tools/googlesheets.py +20 -5
agno/tools/jira.py +20 -0
agno/tools/knowledge.py +3 -3
agno/tools/mcp/__init__.py +10 -0
agno/tools/mcp/mcp.py +331 -0
agno/tools/mcp/multi_mcp.py +347 -0
agno/tools/mcp/params.py +24 -0
agno/tools/mcp_toolbox.py +284 -0
agno/tools/mem0.py +11 -17
agno/tools/memori.py +1 -53
agno/tools/memory.py +419 -0
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/notion.py +204 -0
agno/tools/parallel.py +314 -0
agno/tools/scrapegraph.py +58 -31
agno/tools/searxng.py +2 -2
agno/tools/serper.py +2 -2
agno/tools/slack.py +18 -3
agno/tools/spider.py +2 -2
agno/tools/tavily.py +146 -0
agno/tools/whatsapp.py +1 -1
agno/tools/workflow.py +278 -0
agno/tools/yfinance.py +12 -11
agno/utils/agent.py +820 -0
agno/utils/audio.py +27 -0
agno/utils/common.py +90 -1
agno/utils/events.py +217 -2
agno/utils/gemini.py +180 -22
agno/utils/hooks.py +57 -0
agno/utils/http.py +111 -0
agno/utils/knowledge.py +12 -5
agno/utils/log.py +1 -0
agno/utils/mcp.py +92 -2
agno/utils/media.py +188 -10
agno/utils/merge_dict.py +22 -1
agno/utils/message.py +60 -0
agno/utils/models/claude.py +40 -11
agno/utils/print_response/agent.py +105 -21
agno/utils/print_response/team.py +103 -38
agno/utils/print_response/workflow.py +251 -34
agno/utils/reasoning.py +22 -1
agno/utils/serialize.py +32 -0
agno/utils/streamlit.py +16 -10
agno/utils/string.py +41 -0
agno/utils/team.py +98 -9
agno/utils/tools.py +1 -1
agno/vectordb/base.py +23 -4
agno/vectordb/cassandra/cassandra.py +65 -9
agno/vectordb/chroma/chromadb.py +182 -38
agno/vectordb/clickhouse/clickhousedb.py +64 -11
agno/vectordb/couchbase/couchbase.py +105 -10
agno/vectordb/lancedb/lance_db.py +124 -133
agno/vectordb/langchaindb/langchaindb.py +25 -7
agno/vectordb/lightrag/lightrag.py +17 -3
agno/vectordb/llamaindex/__init__.py +3 -0
agno/vectordb/llamaindex/llamaindexdb.py +46 -7
agno/vectordb/milvus/milvus.py +126 -9
agno/vectordb/mongodb/__init__.py +7 -1
agno/vectordb/mongodb/mongodb.py +112 -7
agno/vectordb/pgvector/pgvector.py +142 -21
agno/vectordb/pineconedb/pineconedb.py +80 -8
agno/vectordb/qdrant/qdrant.py +125 -39
agno/vectordb/redis/__init__.py +9 -0
agno/vectordb/redis/redisdb.py +694 -0
agno/vectordb/singlestore/singlestore.py +111 -25
agno/vectordb/surrealdb/surrealdb.py +31 -5
agno/vectordb/upstashdb/upstashdb.py +76 -8
agno/vectordb/weaviate/weaviate.py +86 -15
agno/workflow/__init__.py +2 -0
agno/workflow/agent.py +299 -0
agno/workflow/condition.py +112 -18
agno/workflow/loop.py +69 -10
agno/workflow/parallel.py +266 -118
agno/workflow/router.py +110 -17
agno/workflow/step.py +638 -129
agno/workflow/steps.py +65 -6
agno/workflow/types.py +61 -23
agno/workflow/workflow.py +2085 -272
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
agno-2.3.0.dist-info/RECORD +577 -0
agno/knowledge/reader/url_reader.py +0 -128
agno/tools/googlesearch.py +0 -98
agno/tools/mcp.py +0 -610
agno/utils/models/aws_claude.py +0 -170
agno-2.0.1.dist-info/RECORD +0 -515
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
{agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0

agno/knowledge/knowledge.py CHANGED Viewed

@@ -4,24 +4,23 @@ import io
 import time
 from dataclasses import dataclass
 from enum import Enum
-from functools import cached_property
 from io import BytesIO
 from os.path import basename
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
-from uuid import uuid4
 from httpx import AsyncClient
-from agno.db.base import BaseDb
+from agno.db.base import AsyncBaseDb, BaseDb
 from agno.db.schemas.knowledge import KnowledgeRow
+from agno.filters import FilterExpr
 from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
 from agno.knowledge.document import Document
 from agno.knowledge.reader import Reader, ReaderFactory
 from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
 from agno.utils.http import async_fetch_with_retry
 from agno.utils.log import log_debug, log_error, log_info, log_warning
-from agno.vectordb import VectorDb
+from agno.utils.string import generate_id
 ContentDict = Dict[str, Union[str, Dict[str, str]]]
@@ -39,19 +38,19 @@ class Knowledge:
     name: Optional[str] = None
     description: Optional[str] = None
-    vector_db: Optional[VectorDb] = None
-    contents_db: Optional[BaseDb] = None
+    vector_db: Optional[Any] = None
+    contents_db: Optional[Union[BaseDb, AsyncBaseDb]] = None
     max_results: int = 10
     readers: Optional[Dict[str, Reader]] = None
     def __post_init__(self):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if self.vector_db and not self.vector_db.exists():
             self.vector_db.create()
         self.construct_readers()
-        self.valid_metadata_filters = set()
-    # --- SDK Specific Methods ---
     # --- Add Contents ---
     @overload
@@ -64,9 +63,12 @@ class Knowledge:
         paths: Optional[List[str]] = None,
         urls: Optional[List[str]] = None,
         metadata: Optional[Dict[str, str]] = None,
+        topics: Optional[List[str]] = None,
+        text_contents: Optional[List[str]] = None,
+        reader: Optional[Reader] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
-        upsert: bool = False,
+        upsert: bool = True,
         skip_if_exists: bool = False,
         remote_content: Optional[RemoteContent] = None,
     ) -> None: ...
@@ -74,6 +76,8 @@ class Knowledge:
     async def add_contents_async(self, *args, **kwargs) -> None:
         if args and isinstance(args[0], list):
             arguments = args[0]
+            upsert = kwargs.get("upsert", True)
+            skip_if_exists = kwargs.get("skip_if_exists", False)
             for argument in arguments:
                 await self.add_content_async(
                     name=argument.get("name"),
@@ -82,11 +86,12 @@ class Knowledge:
                     url=argument.get("url"),
                     metadata=argument.get("metadata"),
                     topics=argument.get("topics"),
+                    text_content=argument.get("text_content"),
                     reader=argument.get("reader"),
                     include=argument.get("include"),
                     exclude=argument.get("exclude"),
-                    upsert=argument.get("upsert", False),
-                    skip_if_exists=argument.get("skip_if_exists", False),
+                    upsert=argument.get("upsert", upsert),
+                    skip_if_exists=argument.get("skip_if_exists", skip_if_exists),
                     remote_content=argument.get("remote_content", None),
                 )
@@ -95,14 +100,15 @@ class Knowledge:
             metadata = kwargs.get("metadata", {})
             description = kwargs.get("description", [])
             topics = kwargs.get("topics", [])
+            reader = kwargs.get("reader", None)
             paths = kwargs.get("paths", [])
             urls = kwargs.get("urls", [])
+            text_contents = kwargs.get("text_contents", [])
             include = kwargs.get("include")
             exclude = kwargs.get("exclude")
-            upsert = kwargs.get("upsert", False)
+            upsert = kwargs.get("upsert", True)
             skip_if_exists = kwargs.get("skip_if_exists", False)
             remote_content = kwargs.get("remote_content", None)
             for path in paths:
                 await self.add_content_async(
                     name=name,
@@ -113,6 +119,7 @@ class Knowledge:
                     exclude=exclude,
                     upsert=upsert,
                     skip_if_exists=skip_if_exists,
+                    reader=reader,
                 )
             for url in urls:
                 await self.add_content_async(
@@ -124,6 +131,21 @@ class Knowledge:
                     exclude=exclude,
                     upsert=upsert,
                     skip_if_exists=skip_if_exists,
+                    reader=reader,
+                )
+            for i, text_content in enumerate(text_contents):
+                content_name = f"{name}_{i}" if name else f"text_content_{i}"
+                log_debug(f"Adding text content: {content_name}")
+                await self.add_content_async(
+                    name=content_name,
+                    description=description,
+                    text_content=text_content,
+                    metadata=metadata,
+                    include=include,
+                    exclude=exclude,
+                    upsert=upsert,
+                    skip_if_exists=skip_if_exists,
+                    reader=reader,
                 )
             if topics:
                 await self.add_content_async(
@@ -135,6 +157,7 @@ class Knowledge:
                     exclude=exclude,
                     upsert=upsert,
                     skip_if_exists=skip_if_exists,
+                    reader=reader,
                 )
             if remote_content:
@@ -145,6 +168,7 @@ class Knowledge:
                     remote_content=remote_content,
                     upsert=upsert,
                     skip_if_exists=skip_if_exists,
+                    reader=reader,
                 )
         else:
@@ -160,10 +184,14 @@ class Knowledge:
         paths: Optional[List[str]] = None,
         urls: Optional[List[str]] = None,
         metadata: Optional[Dict[str, str]] = None,
+        topics: Optional[List[str]] = None,
+        text_contents: Optional[List[str]] = None,
+        reader: Optional[Reader] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
-        upsert: bool = False,
+        upsert: bool = True,
         skip_if_exists: bool = False,
+        remote_content: Optional[RemoteContent] = None,
     ) -> None: ...
     def add_contents(self, *args, **kwargs) -> None:
@@ -181,10 +209,14 @@ class Knowledge:
             paths: Optional list of file paths to load content from
             urls: Optional list of URLs to load content from
             metadata: Optional metadata dictionary to apply to all content
+            topics: Optional list of topics to add
+            text_contents: Optional list of text content strings to add
+            reader: Optional reader to use for processing content
             include: Optional list of file patterns to include
             exclude: Optional list of file patterns to exclude
             upsert: Whether to update existing content if it already exists
             skip_if_exists: Whether to skip adding content if it already exists
+            remote_content: Optional remote content (S3, GCS, etc.) to add
         """
         asyncio.run(self.add_contents_async(*args, **kwargs))
@@ -200,7 +232,7 @@ class Knowledge:
         metadata: Optional[Dict[str, str]] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
-        upsert: bool = False,
+        upsert: bool = True,
         skip_if_exists: bool = False,
         reader: Optional[Reader] = None,
         auth: Optional[ContentAuth] = None,
@@ -228,11 +260,13 @@ class Knowledge:
     ) -> None:
         # Validation: At least one of the parameters must be provided
         if all(argument is None for argument in [path, url, text_content, topics, remote_content]):
-            log_info("At least one of 'path', 'url', 'text_content', 'topics', or 'remote_content' must be provided.")
+            log_warning(
+                "At least one of 'path', 'url', 'text_content', 'topics', or 'remote_content' must be provided."
+            )
             return
         if not skip_if_exists:
-            log_info("skip_if_exists is disabled, disabling upsert")
+            log_debug("skip_if_exists is disabled, disabling upsert")
             upsert = False
         content = None
@@ -241,7 +275,6 @@ class Knowledge:
             file_data = FileData(content=text_content, type="Text")
         content = Content(
-            id=str(uuid4()),
             name=name,
             description=description,
             path=path,
@@ -253,6 +286,8 @@ class Knowledge:
             reader=reader,
             auth=auth,
         )
+        content.content_hash = self._build_content_hash(content)
+        content.id = generate_id(content.content_hash)
         await self._load_content(content, upsert, skip_if_exists, include, exclude)
@@ -266,7 +301,7 @@ class Knowledge:
         metadata: Optional[Dict[str, str]] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
-        upsert: bool = False,
+        upsert: bool = True,
         skip_if_exists: bool = False,
         reader: Optional[Reader] = None,
         auth: Optional[ContentAuth] = None,
@@ -289,7 +324,7 @@ class Knowledge:
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
         upsert: bool = True,
-        skip_if_exists: bool = True,
+        skip_if_exists: bool = False,
         auth: Optional[ContentAuth] = None,
     ) -> None:
         """
@@ -303,7 +338,7 @@ class Knowledge:
             text_content: Optional text content to add directly
             metadata: Optional metadata dictionary
             topics: Optional list of topics
-            config: Optional cloud storage configuration
+            remote_content: Optional cloud storage configuration
             reader: Optional custom reader for processing the content
             include: Optional list of file patterns to include
             exclude: Optional list of file patterns to exclude
@@ -329,6 +364,26 @@ class Knowledge:
             )
         )
+    def _should_skip(self, content_hash: str, skip_if_exists: bool) -> bool:
+        """
+        Handle the skip_if_exists logic for content that already exists in the vector database.
+        Args:
+            content_hash: The content hash string to check for existence
+            skip_if_exists: Whether to skip if content already exists
+        Returns:
+            bool: True if should skip processing, False if should continue
+        """
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
+            log_debug(f"Content already exists: {content_hash}, skipping...")
+            return True
+        return False
     async def _load_from_path(
         self,
         content: Content,
@@ -337,25 +392,28 @@ class Knowledge:
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
     ):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         log_info(f"Adding content from path, {content.id}, {content.name}, {content.path}, {content.description}")
         path = Path(content.path)  # type: ignore
         if path.is_file():
             if self._should_include_file(str(path), include, exclude):
-                log_info(f"Adding file {path} due to include/exclude filters")
+                log_debug(f"Adding file {path} due to include/exclude filters")
+                await self._add_to_contents_db(content)
+                if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
+                    content.status = ContentStatus.COMPLETED
+                    await self._aupdate_content(content)
+                    return
                 # Handle LightRAG special case - read file and upload directly
                 if self.vector_db.__class__.__name__ == "LightRag":
                     await self._process_lightrag_content(content, KnowledgeContentOrigin.PATH)
                     return
-                content.content_hash = self._build_content_hash(content)
-                if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
-                    log_info(f"Content {content.content_hash} already exists, skipping")
-                    return
-                self._add_to_contents_db(content)
                 if content.reader:
                     # TODO: We will refactor this to eventually pass authorization to all readers
                     import inspect
@@ -370,7 +428,7 @@ class Knowledge:
                 else:
                     reader = ReaderFactory.get_reader_for_extension(path.suffix)
-                    log_info(f"Using Reader: {reader.__class__.__name__}")
+                    log_debug(f"Using Reader: {reader.__class__.__name__}")
                     if reader:
                         # TODO: We will refactor this to eventually pass authorization to all readers
                         import inspect
@@ -407,15 +465,16 @@ class Knowledge:
                     log_debug(f"Skipping file {file_path} due to include/exclude filters")
                     continue
-                id = str(uuid4())
                 file_content = Content(
-                    id=id,
                     name=content.name,
                     path=str(file_path),
                     metadata=content.metadata,
                     description=content.description,
                     reader=content.reader,
                 )
+                file_content.content_hash = self._build_content_hash(file_content)
+                file_content.id = generate_id(file_content.content_hash)
                 await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
         else:
             log_warning(f"Invalid path: {path}")
@@ -433,22 +492,26 @@ class Knowledge:
         3. Read the content
         4. Prepare and insert the content in the vector database
         """
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         log_info(f"Adding content from URL {content.url}")
         content.file_type = "url"
         if not content.url:
             raise ValueError("No url provided")
-        if self.vector_db.__class__.__name__ == "LightRag":
-            await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
+        # 1. Add content to contents database
+        await self._add_to_contents_db(content)
+        if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
+            content.status = ContentStatus.COMPLETED
+            await self._aupdate_content(content)
             return
-        # 1. Set content hash
-        content.content_hash = self._build_content_hash(content)
-        if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
-            log_info(f"Content {content.content_hash} already exists, skipping")
+        if self.vector_db.__class__.__name__ == "LightRag":
+            await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
             return
-        self._add_to_contents_db(content)
         # 2. Validate URL
         try:
@@ -458,18 +521,23 @@ class Knowledge:
             if not all([parsed_url.scheme, parsed_url.netloc]):
                 content.status = ContentStatus.FAILED
                 content.status_message = f"Invalid URL format: {content.url}"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 log_warning(f"Invalid URL format: {content.url}")
         except Exception as e:
             content.status = ContentStatus.FAILED
             content.status_message = f"Invalid URL: {content.url} - {str(e)}"
-            self._update_content(content)
+            await self._aupdate_content(content)
             log_warning(f"Invalid URL: {content.url} - {str(e)}")
-        # 3. Fetch and load content
-        async with AsyncClient() as client:
-            response = await async_fetch_with_retry(content.url, client=client)
-        bytes_content = BytesIO(response.content)
+        # 3. Fetch and load content if file has an extension
+        url_path = Path(parsed_url.path)
+        file_extension = url_path.suffix.lower()
+        bytes_content = None
+        if file_extension:
+            async with AsyncClient() as client:
+                response = await async_fetch_with_retry(content.url, client=client)
+            bytes_content = BytesIO(response.content)
         # 4. Select reader
         # If a reader was provided by the user, use it
@@ -477,8 +545,6 @@ class Knowledge:
         name = content.name if content.name else content.url
         # Else select based on file extension
         if reader is None:
-            url_path = Path(parsed_url.path)
-            file_extension = url_path.suffix.lower()
             if file_extension == ".csv":
                 name = basename(parsed_url.path) or "data.csv"
                 reader = self.csv_reader
@@ -486,6 +552,8 @@ class Knowledge:
                 reader = self.pdf_reader
             elif file_extension == ".docx":
                 reader = self.docx_reader
+            elif file_extension == ".pptx":
+                reader = self.pptx_reader
             elif file_extension == ".json":
                 reader = self.json_reader
             elif file_extension == ".markdown":
@@ -504,20 +572,26 @@ class Knowledge:
                 if reader.__class__.__name__ == "YouTubeReader":
                     read_documents = reader.read(content.url, name=name)
                 elif "password" in read_signature.parameters and content.auth and content.auth.password:
-                    read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
+                    if bytes_content:
+                        read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
+                    else:
+                        read_documents = reader.read(content.url, name=name, password=content.auth.password)
                 else:
-                    read_documents = reader.read(bytes_content, name=name)
+                    if bytes_content:
+                        read_documents = reader.read(bytes_content, name=name)
+                    else:
+                        read_documents = reader.read(content.url, name=name)
         except Exception as e:
             log_error(f"Error reading URL: {content.url} - {str(e)}")
             content.status = ContentStatus.FAILED
             content.status_message = f"Error reading URL: {content.url} - {str(e)}"
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         # 6. Chunk documents if needed
         if reader and not reader.chunk:
             read_documents = await reader.chunk_documents_async(read_documents)
         # 7. Prepare and insert the content in the vector database
         file_size = 0
         if read_documents:
@@ -531,8 +605,12 @@ class Knowledge:
         self,
         content: Content,
         upsert: bool = True,
-        skip_if_exists: bool = True,
+        skip_if_exists: bool = False,
     ):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if content.name:
             name = content.name
         elif content.file_data and content.file_data.content:
@@ -554,28 +632,24 @@ class Knowledge:
         log_info(f"Adding content from {content.name}")
-        if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
-            await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
+        await self._add_to_contents_db(content)
+        if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
+            content.status = ContentStatus.COMPLETED
+            await self._aupdate_content(content)
             return
-        content.content_hash = self._build_content_hash(content)
-        if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
-            log_info(f"Content {content.content_hash} already exists, skipping")
+        if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
+            await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
             return
-        self._add_to_contents_db(content)
         read_documents = []
         if isinstance(content.file_data, str):
-            try:
-                content_bytes = content.file_data.encode("utf-8")
-            except UnicodeEncodeError:
-                content_bytes = content.file_data.encode("latin-1")
+            content_bytes = content.file_data.encode("utf-8", errors="replace")
             content_io = io.BytesIO(content_bytes)
             if content.reader:
-                log_info(f"Using reader: {content.reader.__class__.__name__} to read content")
+                log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
                 read_documents = content.reader.read(content_io, name=name)
             else:
                 text_reader = self.text_reader
@@ -584,7 +658,7 @@ class Knowledge:
                 else:
                     content.status = ContentStatus.FAILED
                     content.status_message = "Text reader not available"
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
         elif isinstance(content.file_data, FileData):
@@ -592,27 +666,19 @@ class Knowledge:
                 if isinstance(content.file_data.content, bytes):
                     content_io = io.BytesIO(content.file_data.content)
                 elif isinstance(content.file_data.content, str):
-                    if self._is_text_mime_type(content.file_data.type):
-                        try:
-                            content_bytes = content.file_data.content.encode("utf-8")
-                        except UnicodeEncodeError:
-                            log_debug(f"UTF-8 encoding failed for {content.file_data.type}, using latin-1")
-                            content_bytes = content.file_data.content.encode("latin-1")
-                    else:
-                        content_bytes = content.file_data.content.encode("latin-1")
+                    content_bytes = content.file_data.content.encode("utf-8", errors="replace")
                     content_io = io.BytesIO(content_bytes)
                 else:
                     content_io = content.file_data.content  # type: ignore
                 # Respect an explicitly provided reader; otherwise select based on file type
                 if content.reader:
-                    log_info(f"Using reader: {content.reader.__class__.__name__} to read content")
+                    log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
                     reader = content.reader
                 else:
                     reader = self._select_reader(content.file_data.type)
                 name = content.name if content.name else f"content_{content.file_data.type}"
                 read_documents = reader.read(content_io, name=name)
                 for read_document in read_documents:
                     if content.metadata:
                         read_document.meta_data.update(content.metadata)
@@ -621,12 +687,13 @@ class Knowledge:
                 if len(read_documents) == 0:
                     content.status = ContentStatus.FAILED
                     content.status_message = "Content could not be read"
-                    self._update_content(content)
+                    await self._aupdate_content(content)
+                    return
         else:
             content.status = ContentStatus.FAILED
             content.status_message = "No content provided"
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         await self._handle_vector_db_insert(content, read_documents, upsert)
@@ -637,6 +704,9 @@ class Knowledge:
         upsert: bool,
         skip_if_exists: bool,
     ):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         log_info(f"Adding content from topics: {content.topics}")
         if content.topics is None:
@@ -644,9 +714,7 @@ class Knowledge:
             return
         for topic in content.topics:
-            id = str(uuid4())
             content = Content(
-                id=id,
                 name=topic,
                 metadata=content.metadata,
                 reader=content.reader,
@@ -656,30 +724,41 @@ class Knowledge:
                 ),
                 topics=[topic],
             )
+            content.content_hash = self._build_content_hash(content)
+            content.id = generate_id(content.content_hash)
+            await self._add_to_contents_db(content)
+            if self._should_skip(content.content_hash, skip_if_exists):
+                content.status = ContentStatus.COMPLETED
+                await self._aupdate_content(content)
+                return
             if self.vector_db.__class__.__name__ == "LightRag":
                 await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
                 return
-            content.content_hash = self._build_content_hash(content)
             if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
                 log_info(f"Content {content.content_hash} already exists, skipping")
                 continue
-            self._add_to_contents_db(content)
+            await self._add_to_contents_db(content)
             if content.reader is None:
                 log_error(f"No reader available for topic: {topic}")
+                content.status = ContentStatus.FAILED
+                content.status_message = "No reader available for topic"
+                await self._aupdate_content(content)
                 continue
             read_documents = content.reader.read(topic)
             if len(read_documents) > 0:
                 for read_document in read_documents:
-                    read_document.content_id = id
+                    read_document.content_id = content.id
                     if read_document.content:
                         read_document.size = len(read_document.content.encode("utf-8"))
             else:
                 content.status = ContentStatus.FAILED
                 content.status_message = "No content found for topic"
-                self._update_content(content)
+                await self._aupdate_content(content)
             await self._handle_vector_db_insert(content, read_documents, upsert)
@@ -735,11 +814,9 @@ class Knowledge:
         for s3_object in objects_to_read:
             # 2. Setup Content object
-            id = str(uuid4())
             content_name = content.name or ""
             content_name += "_" + (s3_object.name or "")
             content_entry = Content(
-                id=id,
                 name=content_name,
                 description=content.description,
                 status=ContentStatus.PROCESSING,
@@ -748,11 +825,13 @@ class Knowledge:
             )
             # 3. Hash content and add it to the contents database
-            content_hash = self._build_content_hash(content_entry)
-            if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
-                log_info(f"Content {content_hash} already exists, skipping")
-                continue
-            self._add_to_contents_db(content_entry)
+            content_entry.content_hash = self._build_content_hash(content_entry)
+            content_entry.id = generate_id(content_entry.content_hash)
+            await self._add_to_contents_db(content_entry)
+            if self._should_skip(content_entry.content_hash, skip_if_exists):
+                content_entry.status = ContentStatus.COMPLETED
+                await self._aupdate_content(content_entry)
+                return
             # 4. Select reader
             reader = content.reader
@@ -763,6 +842,8 @@ class Knowledge:
                     reader = self.csv_reader
                 elif s3_object.uri.endswith(".docx"):
                     reader = self.docx_reader
+                elif s3_object.uri.endswith(".pptx"):
+                    reader = self.pptx_reader
                 elif s3_object.uri.endswith(".json"):
                     reader = self.json_reader
                 elif s3_object.uri.endswith(".markdown"):
@@ -818,10 +899,8 @@ class Knowledge:
         for gcs_object in objects_to_read:
             # 2. Setup Content object
-            id = str(uuid4())
             name = (content.name or "content") + "_" + gcs_object.name
             content_entry = Content(
-                id=id,
                 name=name,
                 description=content.description,
                 status=ContentStatus.PROCESSING,
@@ -830,15 +909,15 @@ class Knowledge:
             )
             # 3. Hash content and add it to the contents database
-            content_hash = self._build_content_hash(content_entry)
-            if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
-                log_info(f"Content {content_hash} already exists, skipping")
-                continue
-            # 4. Add it to the contents database
-            self._add_to_contents_db(content_entry)
+            content_entry.content_hash = self._build_content_hash(content_entry)
+            content_entry.id = generate_id(content_entry.content_hash)
+            await self._add_to_contents_db(content_entry)
+            if self._should_skip(content_entry.content_hash, skip_if_exists):
+                content_entry.status = ContentStatus.COMPLETED
+                await self._aupdate_content(content_entry)
+                return
-            # 5. Select reader
+            # 4. Select reader
             reader = content.reader
             if reader is None:
                 if gcs_object.name.endswith(".pdf"):
@@ -847,6 +926,8 @@ class Knowledge:
                     reader = self.csv_reader
                 elif gcs_object.name.endswith(".docx"):
                     reader = self.docx_reader
+                elif gcs_object.name.endswith(".pptx"):
+                    reader = self.pptx_reader
                 elif gcs_object.name.endswith(".json"):
                     reader = self.json_reader
                 elif gcs_object.name.endswith(".markdown"):
@@ -866,37 +947,43 @@ class Knowledge:
                 read_document.content_id = content.id
             await self._handle_vector_db_insert(content_entry, read_documents, upsert)
-    async def _handle_vector_db_insert(self, content, read_documents, upsert):
+    async def _handle_vector_db_insert(self, content: Content, read_documents, upsert):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if not self.vector_db:
             log_error("No vector database configured")
             content.status = ContentStatus.FAILED
             content.status_message = "No vector database configured"
-            self._update_content(content)
+            await self._aupdate_content(content)
             return
         if self.vector_db.upsert_available() and upsert:
             try:
-                await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata)
+                await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata)  # type: ignore[arg-type]
             except Exception as e:
                 log_error(f"Error upserting document: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = "Could not upsert embedding"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         else:
             try:
                 await self.vector_db.async_insert(
-                    content.content_hash, documents=read_documents, filters=content.metadata
+                    content.content_hash,  # type: ignore[arg-type]
+                    documents=read_documents,
+                    filters=content.metadata,  # type: ignore[arg-type]
                 )
             except Exception as e:
                 log_error(f"Error inserting document: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = "Could not insert embedding"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         content.status = ContentStatus.COMPLETED
-        self._update_content(content)
+        await self._aupdate_content(content)
     async def _load_content(
         self,
@@ -906,11 +993,6 @@ class Knowledge:
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
     ) -> None:
-        log_info(f"Loading content: {content.id}")
-        if content.metadata:
-            self.add_filters(content.metadata)
         if content.path:
             await self._load_from_path(content, upsert, skip_if_exists, include, exclude)
@@ -954,7 +1036,49 @@ class Knowledge:
             )
             return hashlib.sha256(fallback.encode()).hexdigest()
-    def _add_to_contents_db(self, content: Content):
+    def _ensure_string_field(self, value: Any, field_name: str, default: str = "") -> str:
+        """
+        Safely ensure a field is a string, handling various edge cases.
+        Args:
+            value: The value to convert to string
+            field_name: Name of the field for logging purposes
+            default: Default string value if conversion fails
+        Returns:
+            str: A safe string value
+        """
+        # Handle None/falsy values
+        if value is None or value == "":
+            return default
+        # Handle unexpected list types (the root cause of our Pydantic warning)
+        if isinstance(value, list):
+            if len(value) == 0:
+                log_debug(f"Empty list found for {field_name}, using default: '{default}'")
+                return default
+            elif len(value) == 1:
+                # Single item list, extract the item
+                log_debug(f"Single-item list found for {field_name}, extracting: '{value[0]}'")
+                return str(value[0]) if value[0] is not None else default
+            else:
+                # Multiple items, join them
+                log_debug(f"Multi-item list found for {field_name}, joining: {value}")
+                return " | ".join(str(item) for item in value if item is not None)
+        # Handle other unexpected types
+        if not isinstance(value, str):
+            log_debug(f"Non-string type {type(value)} found for {field_name}, converting: '{value}'")
+            try:
+                return str(value)
+            except Exception as e:
+                log_warning(f"Failed to convert {field_name} to string: {e}, using default")
+                return default
+        # Already a string, return as-is
+        return value
+    async def _add_to_contents_db(self, content: Content):
         if self.contents_db:
             created_at = content.created_at if content.created_at else int(time.time())
             updated_at = content.updated_at if content.updated_at else int(time.time())
@@ -966,10 +1090,18 @@ class Knowledge:
                 if content.file_data and content.file_data.type
                 else None
             )
+            # Safely handle string fields with proper type checking
+            safe_name = self._ensure_string_field(content.name, "content.name", default="")
+            safe_description = self._ensure_string_field(content.description, "content.description", default="")
+            safe_linked_to = self._ensure_string_field(self.name, "knowledge.name", default="")
+            safe_status_message = self._ensure_string_field(
+                content.status_message, "content.status_message", default=""
+            )
             content_row = KnowledgeRow(
                 id=content.id,
-                name=content.name if content.name else "",
-                description=content.description if content.description else "",
+                name=safe_name,
+                description=safe_description,
                 metadata=content.metadata,
                 type=file_type,
                 size=content.size
@@ -977,17 +1109,28 @@ class Knowledge:
                 else len(content.file_data.content)
                 if content.file_data and content.file_data.content
                 else None,
-                linked_to=self.name,
+                linked_to=safe_linked_to,
                 access_count=0,
                 status=content.status if content.status else ContentStatus.PROCESSING,
-                status_message="",
+                status_message=safe_status_message,
                 created_at=created_at,
                 updated_at=updated_at,
             )
-            self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            if isinstance(self.contents_db, AsyncBaseDb):
+                await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            else:
+                self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
     def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if self.contents_db:
+            if isinstance(self.contents_db, AsyncBaseDb):
+                raise ValueError(
+                    "update_content() is not supported with an async DB. Please use aupdate_content() instead."
+                )
             if not content.id:
                 log_warning("Content id is required to update Knowledge content")
                 return None
@@ -998,6 +1141,55 @@ class Knowledge:
                 log_warning(f"Content row not found for id: {content.id}, cannot update status")
                 return None
+            # Apply safe string handling for updates as well
+            if content.name is not None:
+                content_row.name = self._ensure_string_field(content.name, "content.name", default="")
+            if content.description is not None:
+                content_row.description = self._ensure_string_field(
+                    content.description, "content.description", default=""
+                )
+            if content.metadata is not None:
+                content_row.metadata = content.metadata
+            if content.status is not None:
+                content_row.status = content.status
+            if content.status_message is not None:
+                content_row.status_message = self._ensure_string_field(
+                    content.status_message, "content.status_message", default=""
+                )
+            if content.external_id is not None:
+                content_row.external_id = self._ensure_string_field(
+                    content.external_id, "content.external_id", default=""
+                )
+            content_row.updated_at = int(time.time())
+            self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            if self.vector_db and content.metadata:
+                self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
+            return content_row.to_dict()
+        else:
+            if self.name:
+                log_warning(f"Contents DB not found for knowledge base: {self.name}")
+            else:
+                log_warning("Contents DB not found for knowledge base")
+            return None
+    async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
+        if self.contents_db:
+            if not content.id:
+                log_warning("Content id is required to update Knowledge content")
+                return None
+            # TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
+            if isinstance(self.contents_db, AsyncBaseDb):
+                content_row = await self.contents_db.get_knowledge_content(content.id)
+            else:
+                content_row = self.contents_db.get_knowledge_content(content.id)
+            if content_row is None:
+                log_warning(f"Content row not found for id: {content.id}, cannot update status")
+                return None
             if content.name is not None:
                 content_row.name = content.name
             if content.description is not None:
@@ -1012,22 +1204,29 @@ class Knowledge:
                 content_row.external_id = content.external_id
             content_row.updated_at = int(time.time())
-            self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            if isinstance(self.contents_db, AsyncBaseDb):
+                await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+            else:
+                self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
             if self.vector_db and content.metadata:
                 self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
-            if content.metadata:
-                self.add_filters(content.metadata)
             return content_row.to_dict()
         else:
-            log_warning(f"Contents DB not found for knowledge base: {self.name}")
+            if self.name:
+                log_warning(f"Contents DB not found for knowledge base: {self.name}")
+            else:
+                log_warning("Contents DB not found for knowledge base")
             return None
     async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
-        self._add_to_contents_db(content)
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        await self._add_to_contents_db(content)
         if content_type == KnowledgeContentOrigin.PATH:
             if content.file_data is None:
                 log_warning("No file data provided")
@@ -1058,18 +1257,18 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support file insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             except Exception as e:
                 log_error(f"Error uploading file to LightRAG: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = f"Could not upload to LightRAG: {str(e)}"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         elif content_type == KnowledgeContentOrigin.URL:
@@ -1079,7 +1278,7 @@ class Knowledge:
                 if reader is None:
                     log_error("No URL reader available")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 reader.chunk = False
@@ -1091,7 +1290,7 @@ class Knowledge:
                 if not read_documents:
                     log_error("No documents read from URL")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 if self.vector_db and hasattr(self.vector_db, "insert_text"):
@@ -1102,19 +1301,19 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support text insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             except Exception as e:
                 log_error(f"Error uploading file to LightRAG: {e}")
                 content.status = ContentStatus.FAILED
                 content.status_message = f"Could not upload to LightRAG: {str(e)}"
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
         elif content_type == KnowledgeContentOrigin.CONTENT:
@@ -1135,11 +1334,11 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support file insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
             else:
                 log_warning(f"No file data available for LightRAG upload: {content.name}")
             return
@@ -1150,20 +1349,17 @@ class Knowledge:
             if content.reader is None:
                 log_error("No reader available for topic content")
                 content.status = ContentStatus.FAILED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             if not content.topics:
                 log_error("No topics available for content")
                 content.status = ContentStatus.FAILED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             read_documents = content.reader.read(content.topics)
             if len(read_documents) > 0:
-                print("READ DOCUMENTS: ", len(read_documents))
-                print("READ DOCUMENTS: ", read_documents[0])
                 if self.vector_db and hasattr(self.vector_db, "insert_text"):
                     result = await self.vector_db.insert_text(
                         file_source=content.topics[0],
@@ -1172,21 +1368,35 @@ class Knowledge:
                 else:
                     log_error("Vector database does not support text insertion")
                     content.status = ContentStatus.FAILED
-                    self._update_content(content)
+                    await self._aupdate_content(content)
                     return
                 content.external_id = result
                 content.status = ContentStatus.COMPLETED
-                self._update_content(content)
+                await self._aupdate_content(content)
                 return
             else:
                 log_warning(f"No documents found for LightRAG upload: {content.name}")
                 return
     def search(
-        self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
+        self,
+        query: str,
+        max_results: Optional[int] = None,
+        filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
+        search_type: Optional[str] = None,
     ) -> List[Document]:
         """Returns relevant documents matching a query"""
+        from agno.vectordb import VectorDb
+        from agno.vectordb.search import SearchType
+        self.vector_db = cast(VectorDb, self.vector_db)
+        if (
+            hasattr(self.vector_db, "search_type")
+            and isinstance(self.vector_db.search_type, SearchType)
+            and search_type
+        ):
+            self.vector_db.search_type = SearchType(search_type)
         try:
             if self.vector_db is None:
                 log_warning("No vector db provided")
@@ -1200,10 +1410,23 @@ class Knowledge:
             return []
     async def async_search(
-        self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
+        self,
+        query: str,
+        max_results: Optional[int] = None,
+        filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
+        search_type: Optional[str] = None,
     ) -> List[Document]:
         """Returns relevant documents matching a query"""
+        from agno.vectordb import VectorDb
+        from agno.vectordb.search import SearchType
+        self.vector_db = cast(VectorDb, self.vector_db)
+        if (
+            hasattr(self.vector_db, "search_type")
+            and isinstance(self.vector_db.search_type, SearchType)
+            and search_type
+        ):
+            self.vector_db.search_type = SearchType(search_type)
         try:
             if self.vector_db is None:
                 log_warning("No vector db provided")
@@ -1220,66 +1443,114 @@ class Knowledge:
             log_error(f"Error searching for documents: {e}")
             return []
-    def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
-        if self.valid_metadata_filters is None:
-            self.valid_metadata_filters = set()
-        self.valid_metadata_filters.update(self._get_filters_from_db)
+    def get_valid_filters(self) -> Set[str]:
+        if self.contents_db is None:
+            log_warning("No contents db provided. This is required for filtering.")
+            return set()
+        contents, _ = self.get_content()
+        valid_filters: Set[str] = set()
+        for content in contents:
+            if content.metadata:
+                valid_filters.update(content.metadata.keys())
+        return valid_filters
+    async def async_get_valid_filters(self) -> Set[str]:
+        if self.contents_db is None:
+            log_warning("No contents db provided. This is required for filtering.")
+            return set()
+        contents, _ = await self.aget_content()
+        valid_filters: Set[str] = set()
+        for content in contents:
+            if content.metadata:
+                valid_filters.update(content.metadata.keys())
+        return valid_filters
+    def _validate_filters(
+        self, filters: Union[Dict[str, Any], List[FilterExpr]], valid_metadata_filters: Set[str]
+    ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
         if not filters:
             return {}, []
-        valid_filters: Dict[str, Any] = {}
+        valid_filters: Union[Dict[str, Any], List[FilterExpr]] = {}
         invalid_keys = []
-        # If no metadata filters tracked yet, all keys are considered invalid
-        if self.valid_metadata_filters is None:
-            invalid_keys = list(filters.keys())
-            log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
-            return {}, invalid_keys
-        for key, value in filters.items():
-            # Handle both normal keys and prefixed keys like meta_data.key
-            base_key = key.split(".")[-1] if "." in key else key
-            if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
-                valid_filters[key] = value
-            else:
-                invalid_keys.append(key)
-                log_debug(f"Invalid filter key: {key} - not present in knowledge base")
+        if isinstance(filters, dict):
+            # If no metadata filters tracked yet, all keys are considered invalid
+            if valid_metadata_filters is None or not valid_metadata_filters:
+                invalid_keys = list(filters.keys())
+                log_warning(
+                    f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}"
+                )
+                return {}, invalid_keys
+            for key, value in filters.items():
+                # Handle both normal keys and prefixed keys like meta_data.key
+                base_key = key.split(".")[-1] if "." in key else key
+                if base_key in valid_metadata_filters or key in valid_metadata_filters:
+                    valid_filters[key] = value  # type: ignore
+                else:
+                    invalid_keys.append(key)
+                    log_warning(f"Invalid filter key: {key} - not present in knowledge base")
+        elif isinstance(filters, List):
+            # Validate that list contains FilterExpr instances
+            for i, filter_item in enumerate(filters):
+                if not isinstance(filter_item, FilterExpr):
+                    log_warning(
+                        f"Invalid filter at index {i}: expected FilterExpr instance, "
+                        f"got {type(filter_item).__name__}. "
+                        f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
+                        f"AND(...), OR(...), NOT(...) from agno.filters"
+                    )
+            # Filter expressions are already validated, return empty dict/list
+            # The actual filtering happens in the vector_db layer
+            return filters, []
         return valid_filters, invalid_keys
-    def add_filters(self, metadata: Dict[str, Any]) -> None:
-        if self.valid_metadata_filters is None:
-            self.valid_metadata_filters = set()
+    def validate_filters(
+        self, filters: Union[Dict[str, Any], List[FilterExpr]]
+    ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
+        valid_filters_from_db = self.get_valid_filters()
-        if metadata is not None:
-            for key in metadata.keys():
-                self.valid_metadata_filters.add(key)
+        valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
-    @cached_property
-    def _get_filters_from_db(self) -> Set[str]:
-        if self.contents_db is None:
-            return set()
-        contents, _ = self.get_content()
-        valid_filters: Set[str] = set()
-        for content in contents:
-            if content.metadata:
-                valid_filters.update(content.metadata.keys())
-        return valid_filters
+        return valid_filters, invalid_keys
+    async def async_validate_filters(
+        self, filters: Union[Dict[str, Any], List[FilterExpr]]
+    ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
+        """Return a tuple containing a dict with all valid filters and a list of invalid filter keys"""
+        valid_filters_from_db = await self.async_get_valid_filters()
+        valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
+        return valid_filters, invalid_keys
     def remove_vector_by_id(self, id: str) -> bool:
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if self.vector_db is None:
             log_warning("No vector DB provided")
             return False
         return self.vector_db.delete_by_id(id)
     def remove_vectors_by_name(self, name: str) -> bool:
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if self.vector_db is None:
             log_warning("No vector DB provided")
             return False
         return self.vector_db.delete_by_name(name)
     def remove_vectors_by_metadata(self, metadata: Dict[str, Any]) -> bool:
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if self.vector_db is None:
             log_warning("No vector DB provided")
             return False
@@ -1290,10 +1561,46 @@ class Knowledge:
     def patch_content(self, content: Content) -> Optional[Dict[str, Any]]:
         return self._update_content(content)
+    async def apatch_content(self, content: Content) -> Optional[Dict[str, Any]]:
+        return await self._aupdate_content(content)
     def get_content_by_id(self, content_id: str) -> Optional[Content]:
         if self.contents_db is None:
             raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            raise ValueError(
+                "get_content_by_id() is not supported for async databases. Please use aget_content_by_id() instead."
+            )
         content_row = self.contents_db.get_knowledge_content(content_id)
+        if content_row is None:
+            return None
+        content = Content(
+            id=content_row.id,
+            name=content_row.name,
+            description=content_row.description,
+            metadata=content_row.metadata,
+            file_type=content_row.type,
+            size=content_row.size,
+            status=ContentStatus(content_row.status) if content_row.status else None,
+            status_message=content_row.status_message,
+            created_at=content_row.created_at,
+            updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
+            external_id=content_row.external_id,
+        )
+        return content
+    async def aget_content_by_id(self, content_id: str) -> Optional[Content]:
+        if self.contents_db is None:
+            raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            content_row = await self.contents_db.get_knowledge_content(content_id)
+        else:
+            content_row = self.contents_db.get_knowledge_content(content_id)
         if content_row is None:
             return None
         content = Content(
@@ -1320,6 +1627,10 @@ class Knowledge:
     ) -> Tuple[List[Content], int]:
         if self.contents_db is None:
             raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            raise ValueError("get_content() is not supported for async databases. Please use aget_content() instead.")
         contents, count = self.contents_db.get_knowledge_contents(
             limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
         )
@@ -1343,9 +1654,53 @@ class Knowledge:
             result.append(content)
         return result, count
+    async def aget_content(
+        self,
+        limit: Optional[int] = None,
+        page: Optional[int] = None,
+        sort_by: Optional[str] = None,
+        sort_order: Optional[str] = None,
+    ) -> Tuple[List[Content], int]:
+        if self.contents_db is None:
+            raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            contents, count = await self.contents_db.get_knowledge_contents(
+                limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
+            )
+        else:
+            contents, count = self.contents_db.get_knowledge_contents(
+                limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
+            )
+        result = []
+        for content_row in contents:
+            # Create Content from database row
+            content = Content(
+                id=content_row.id,
+                name=content_row.name,
+                description=content_row.description,
+                metadata=content_row.metadata,
+                size=content_row.size,
+                file_type=content_row.type,
+                status=ContentStatus(content_row.status) if content_row.status else None,
+                status_message=content_row.status_message,
+                created_at=content_row.created_at,
+                updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
+                external_id=content_row.external_id,
+            )
+            result.append(content)
+        return result, count
     def get_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
         if self.contents_db is None:
             raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            raise ValueError(
+                "get_content_status() is not supported for async databases. Please use aget_content_status() instead."
+            )
         content_row = self.contents_db.get_knowledge_content(content_id)
         if content_row is None:
             return None, "Content not found"
@@ -1365,7 +1720,37 @@ class Knowledge:
         return status, content_row.status_message
+    async def aget_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
+        if self.contents_db is None:
+            raise ValueError("No contents db provided")
+        if isinstance(self.contents_db, AsyncBaseDb):
+            content_row = await self.contents_db.get_knowledge_content(content_id)
+        else:
+            content_row = self.contents_db.get_knowledge_content(content_id)
+        if content_row is None:
+            return None, "Content not found"
+        # Convert string status to enum, defaulting to PROCESSING if unknown
+        status_str = content_row.status
+        try:
+            status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
+        except ValueError:
+            # Handle legacy or unknown statuses
+            if status_str and "failed" in status_str.lower():
+                status = ContentStatus.FAILED
+            elif status_str and "completed" in status_str.lower():
+                status = ContentStatus.COMPLETED
+            else:
+                status = ContentStatus.PROCESSING
+        return status, content_row.status_message
     def remove_content_by_id(self, content_id: str):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
         if self.vector_db is not None:
             if self.vector_db.__class__.__name__ == "LightRag":
                 # For LightRAG, get the content first to find the external_id
@@ -1380,12 +1765,36 @@ class Knowledge:
         if self.contents_db is not None:
             self.contents_db.delete_knowledge_content(content_id)
+    async def aremove_content_by_id(self, content_id: str):
+        if self.vector_db is not None:
+            if self.vector_db.__class__.__name__ == "LightRag":
+                # For LightRAG, get the content first to find the external_id
+                content = await self.aget_content_by_id(content_id)
+                if content and content.external_id:
+                    self.vector_db.delete_by_external_id(content.external_id)  # type: ignore
+                else:
+                    log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
+            else:
+                self.vector_db.delete_by_content_id(content_id)
+        if self.contents_db is not None:
+            if isinstance(self.contents_db, AsyncBaseDb):
+                await self.contents_db.delete_knowledge_content(content_id)
+            else:
+                self.contents_db.delete_knowledge_content(content_id)
     def remove_all_content(self):
         contents, _ = self.get_content()
         for content in contents:
             if content.id is not None:
                 self.remove_content_by_id(content.id)
+    async def aremove_all_content(self):
+        contents, _ = await self.aget_content()
+        for content in contents:
+            if content.id is not None:
+                await self.aremove_content_by_id(content.id)
     # --- Reader Factory Integration ---
     def construct_readers(self):
@@ -1423,12 +1832,6 @@ class Knowledge:
         log_info(f"Selecting reader for extension: {extension}")
         return ReaderFactory.get_reader_for_extension(extension)
-    def get_filters(self) -> List[str]:
-        return [
-            "filter_tag_1",
-            "filter_tag2",
-        ]
     # --- Convenience Properties for Backward Compatibility ---
     def _is_text_mime_type(self, mime_type: str) -> bool:
@@ -1520,6 +1923,11 @@ class Knowledge:
         """Docx reader - lazy loaded via factory."""
         return self._get_reader("docx")
+    @property
+    def pptx_reader(self) -> Optional[Reader]:
+        """PPTX reader - lazy loaded via factory."""
+        return self._get_reader("pptx")
     @property
     def json_reader(self) -> Optional[Reader]:
         """JSON reader - lazy loaded via factory."""

agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

agno 2.0.1py3-none-any.whl → 2.3.0py3-none-any.whl