PyPI - agno - Versions diffs - 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl - Mend

agno 2.3.8py3-none-any.whl → 2.3.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

agno/agent/agent.py +134 -94
agno/db/mysql/__init__.py +2 -1
agno/db/mysql/async_mysql.py +2888 -0
agno/db/mysql/mysql.py +17 -8
agno/db/mysql/utils.py +139 -6
agno/db/postgres/async_postgres.py +10 -5
agno/db/postgres/postgres.py +7 -2
agno/db/schemas/evals.py +1 -0
agno/db/singlestore/singlestore.py +5 -1
agno/db/sqlite/async_sqlite.py +3 -3
agno/eval/__init__.py +10 -0
agno/eval/accuracy.py +11 -8
agno/eval/agent_as_judge.py +861 -0
agno/eval/base.py +29 -0
agno/eval/utils.py +2 -1
agno/exceptions.py +7 -0
agno/knowledge/embedder/openai.py +8 -8
agno/knowledge/knowledge.py +1142 -176
agno/media.py +22 -6
agno/models/aws/claude.py +8 -7
agno/models/base.py +61 -2
agno/models/deepseek/deepseek.py +67 -0
agno/models/google/gemini.py +134 -51
agno/models/google/utils.py +22 -0
agno/models/message.py +5 -0
agno/models/openai/chat.py +4 -0
agno/os/app.py +64 -74
agno/os/interfaces/a2a/router.py +3 -4
agno/os/interfaces/agui/router.py +2 -0
agno/os/router.py +3 -1607
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +581 -0
agno/os/routers/agents/schema.py +261 -0
agno/os/routers/evals/evals.py +26 -6
agno/os/routers/evals/schemas.py +34 -2
agno/os/routers/evals/utils.py +77 -18
agno/os/routers/knowledge/knowledge.py +1 -1
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +496 -0
agno/os/routers/teams/schema.py +257 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +545 -0
agno/os/routers/workflows/schema.py +75 -0
agno/os/schema.py +1 -559
agno/os/utils.py +139 -2
agno/team/team.py +87 -24
agno/tools/file_generation.py +12 -6
agno/tools/firecrawl.py +15 -7
agno/tools/function.py +37 -23
agno/tools/shopify.py +1519 -0
agno/tools/spotify.py +2 -5
agno/utils/hooks.py +64 -5
agno/utils/http.py +2 -2
agno/utils/media.py +11 -1
agno/utils/print_response/agent.py +8 -0
agno/utils/print_response/team.py +8 -0
agno/vectordb/pgvector/pgvector.py +88 -51
agno/workflow/parallel.py +5 -3
agno/workflow/step.py +14 -2
agno/workflow/types.py +38 -2
agno/workflow/workflow.py +12 -4
{agno-2.3.8.dist-info → agno-2.3.10.dist-info}/METADATA +7 -2
{agno-2.3.8.dist-info → agno-2.3.10.dist-info}/RECORD +66 -52
{agno-2.3.8.dist-info → agno-2.3.10.dist-info}/WHEEL +0 -0
{agno-2.3.8.dist-info → agno-2.3.10.dist-info}/licenses/LICENSE +0 -0
{agno-2.3.8.dist-info → agno-2.3.10.dist-info}/top_level.txt +0 -0

agno/knowledge/knowledge.py CHANGED Viewed

@@ -198,8 +198,6 @@ class Knowledge:
         """
         Synchronously add multiple content items to the knowledge base.
-        This method wraps the asynchronous add_contents method
         Supports two usage patterns:
         1. Pass a list of content dictionaries as first argument
         2. Pass keyword arguments with paths, urls, metadata, etc.
@@ -214,11 +212,109 @@ class Knowledge:
             reader: Optional reader to use for processing content
             include: Optional list of file patterns to include
             exclude: Optional list of file patterns to exclude
-            upsert: Whether to update existing content if it already exists
-            skip_if_exists: Whether to skip adding content if it already exists
+            upsert: Whether to update existing content if it already exists (only used when skip_if_exists=False)
+            skip_if_exists: Whether to skip adding content if it already exists (default: True)
             remote_content: Optional remote content (S3, GCS, etc.) to add
         """
-        asyncio.run(self.add_contents_async(*args, **kwargs))
+        if args and isinstance(args[0], list):
+            arguments = args[0]
+            upsert = kwargs.get("upsert", True)
+            skip_if_exists = kwargs.get("skip_if_exists", False)
+            for argument in arguments:
+                self.add_content(
+                    name=argument.get("name"),
+                    description=argument.get("description"),
+                    path=argument.get("path"),
+                    url=argument.get("url"),
+                    metadata=argument.get("metadata"),
+                    topics=argument.get("topics"),
+                    text_content=argument.get("text_content"),
+                    reader=argument.get("reader"),
+                    include=argument.get("include"),
+                    exclude=argument.get("exclude"),
+                    upsert=argument.get("upsert", upsert),
+                    skip_if_exists=argument.get("skip_if_exists", skip_if_exists),
+                    remote_content=argument.get("remote_content", None),
+                )
+        elif kwargs:
+            name = kwargs.get("name", [])
+            metadata = kwargs.get("metadata", {})
+            description = kwargs.get("description", [])
+            topics = kwargs.get("topics", [])
+            reader = kwargs.get("reader", None)
+            paths = kwargs.get("paths", [])
+            urls = kwargs.get("urls", [])
+            text_contents = kwargs.get("text_contents", [])
+            include = kwargs.get("include")
+            exclude = kwargs.get("exclude")
+            upsert = kwargs.get("upsert", True)
+            skip_if_exists = kwargs.get("skip_if_exists", False)
+            remote_content = kwargs.get("remote_content", None)
+            for path in paths:
+                self.add_content(
+                    name=name,
+                    description=description,
+                    path=path,
+                    metadata=metadata,
+                    include=include,
+                    exclude=exclude,
+                    upsert=upsert,
+                    skip_if_exists=skip_if_exists,
+                    reader=reader,
+                )
+            for url in urls:
+                self.add_content(
+                    name=name,
+                    description=description,
+                    url=url,
+                    metadata=metadata,
+                    include=include,
+                    exclude=exclude,
+                    upsert=upsert,
+                    skip_if_exists=skip_if_exists,
+                    reader=reader,
+                )
+            for i, text_content in enumerate(text_contents):
+                content_name = f"{name}_{i}" if name else f"text_content_{i}"
+                log_debug(f"Adding text content: {content_name}")
+                self.add_content(
+                    name=content_name,
+                    description=description,
+                    text_content=text_content,
+                    metadata=metadata,
+                    include=include,
+                    exclude=exclude,
+                    upsert=upsert,
+                    skip_if_exists=skip_if_exists,
+                    reader=reader,
+                )
+            if topics:
+                self.add_content(
+                    name=name,
+                    description=description,
+                    topics=topics,
+                    metadata=metadata,
+                    include=include,
+                    exclude=exclude,
+                    upsert=upsert,
+                    skip_if_exists=skip_if_exists,
+                    reader=reader,
+                )
+            if remote_content:
+                self.add_content(
+                    name=name,
+                    metadata=metadata,
+                    description=description,
+                    remote_content=remote_content,
+                    upsert=upsert,
+                    skip_if_exists=skip_if_exists,
+                    reader=reader,
+                )
+        else:
+            raise ValueError("Invalid usage of add_contents.")
     # --- Add Content ---
@@ -255,7 +351,7 @@ class Knowledge:
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
         upsert: bool = True,
-        skip_if_exists: bool = True,
+        skip_if_exists: bool = False,
         auth: Optional[ContentAuth] = None,
     ) -> None:
         # Validation: At least one of the parameters must be provided
@@ -265,10 +361,6 @@ class Knowledge:
             )
             return
-        if not skip_if_exists:
-            log_debug("skip_if_exists is disabled, disabling upsert")
-            upsert = False
         content = None
         file_data = None
         if text_content:
@@ -289,7 +381,7 @@ class Knowledge:
         content.content_hash = self._build_content_hash(content)
         content.id = generate_id(content.content_hash)
-        await self._load_content(content, upsert, skip_if_exists, include, exclude)
+        await self._load_content_async(content, upsert, skip_if_exists, include, exclude)
     @overload
     def add_content(
@@ -342,27 +434,37 @@ class Knowledge:
             reader: Optional custom reader for processing the content
             include: Optional list of file patterns to include
             exclude: Optional list of file patterns to exclude
-            upsert: Whether to update existing content if it already exists
-            skip_if_exists: Whether to skip adding content if it already exists
+            upsert: Whether to update existing content if it already exists (only used when skip_if_exists=False)
+            skip_if_exists: Whether to skip adding content if it already exists (default: False)
         """
-        asyncio.run(
-            self.add_content_async(
-                name=name,
-                description=description,
-                path=path,
-                url=url,
-                text_content=text_content,
-                metadata=metadata,
-                topics=topics,
-                remote_content=remote_content,
-                reader=reader,
-                include=include,
-                exclude=exclude,
-                upsert=upsert,
-                skip_if_exists=skip_if_exists,
-                auth=auth,
+        # Validation: At least one of the parameters must be provided
+        if all(argument is None for argument in [path, url, text_content, topics, remote_content]):
+            log_warning(
+                "At least one of 'path', 'url', 'text_content', 'topics', or 'remote_content' must be provided."
             )
+            return
+        content = None
+        file_data = None
+        if text_content:
+            file_data = FileData(content=text_content, type="Text")
+        content = Content(
+            name=name,
+            description=description,
+            path=path,
+            url=url,
+            file_data=file_data if file_data else None,
+            metadata=metadata,
+            topics=topics,
+            remote_content=remote_content,
+            reader=reader,
+            auth=auth,
         )
+        content.content_hash = self._build_content_hash(content)
+        content.id = generate_id(content.content_hash)
+        self._load_content(content, upsert, skip_if_exists, include, exclude)
     def _should_skip(self, content_hash: str, skip_if_exists: bool) -> bool:
         """
@@ -384,7 +486,148 @@ class Knowledge:
         return False
-    async def _load_from_path(
+    def _select_reader_by_extension(
+        self, file_extension: str, provided_reader: Optional[Reader] = None
+    ) -> Tuple[Optional[Reader], str]:
+        """
+        Select a reader based on file extension.
+        Args:
+            file_extension: File extension (e.g., '.pdf', '.csv')
+            provided_reader: Optional reader already provided
+        Returns:
+            Tuple of (reader, name) where name may be adjusted based on extension
+        """
+        if provided_reader:
+            return provided_reader, ""
+        file_extension = file_extension.lower()
+        if file_extension == ".csv":
+            return self.csv_reader, "data.csv"
+        elif file_extension == ".pdf":
+            return self.pdf_reader, ""
+        elif file_extension == ".docx":
+            return self.docx_reader, ""
+        elif file_extension == ".pptx":
+            return self.pptx_reader, ""
+        elif file_extension == ".json":
+            return self.json_reader, ""
+        elif file_extension == ".markdown":
+            return self.markdown_reader, ""
+        else:
+            return self.text_reader, ""
+    def _select_reader_by_uri(self, uri: str, provided_reader: Optional[Reader] = None) -> Optional[Reader]:
+        """
+        Select a reader based on URI/file path extension.
+        Args:
+            uri: URI or file path
+            provided_reader: Optional reader already provided
+        Returns:
+            Selected reader or None
+        """
+        if provided_reader:
+            return provided_reader
+        uri_lower = uri.lower()
+        if uri_lower.endswith(".pdf"):
+            return self.pdf_reader
+        elif uri_lower.endswith(".csv"):
+            return self.csv_reader
+        elif uri_lower.endswith(".docx"):
+            return self.docx_reader
+        elif uri_lower.endswith(".pptx"):
+            return self.pptx_reader
+        elif uri_lower.endswith(".json"):
+            return self.json_reader
+        elif uri_lower.endswith(".markdown"):
+            return self.markdown_reader
+        else:
+            return self.text_reader
+    def _read_with_reader(
+        self,
+        reader: Reader,
+        source: Union[Path, str, BytesIO],
+        name: Optional[str] = None,
+        password: Optional[str] = None,
+    ) -> List[Document]:
+        """
+        Read content using a reader with optional password handling.
+        Args:
+            reader: Reader to use
+            source: Source to read from (Path, URL string, or BytesIO)
+            name: Optional name for the document
+            password: Optional password for protected files
+        Returns:
+            List of documents read
+        """
+        import inspect
+        read_signature = inspect.signature(reader.read)
+        if password and "password" in read_signature.parameters:
+            if isinstance(source, BytesIO):
+                return reader.read(source, name=name, password=password)
+            else:
+                return reader.read(source, name=name, password=password)
+        else:
+            if isinstance(source, BytesIO):
+                return reader.read(source, name=name)
+            else:
+                return reader.read(source, name=name)
+    def _prepare_documents_for_insert(
+        self,
+        documents: List[Document],
+        content_id: str,
+        calculate_sizes: bool = False,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> List[Document]:
+        """
+        Prepare documents for insertion by assigning content_id and optionally calculating sizes and updating metadata.
+        Args:
+            documents: List of documents to prepare
+            content_id: Content ID to assign to documents
+            calculate_sizes: Whether to calculate document sizes
+            metadata: Optional metadata to merge into document metadata
+        Returns:
+            List of prepared documents
+        """
+        for document in documents:
+            document.content_id = content_id
+            if calculate_sizes and document.content and not document.size:
+                document.size = len(document.content.encode("utf-8"))
+            if metadata:
+                document.meta_data.update(metadata)
+        return documents
+    def _chunk_documents_sync(self, reader: Reader, documents: List[Document]) -> List[Document]:
+        """
+        Chunk documents synchronously.
+        Args:
+            reader: Reader with chunking strategy
+            documents: Documents to chunk
+        Returns:
+            List of chunked documents
+        """
+        if not reader or reader.chunk:
+            return documents
+        chunked_documents = []
+        for doc in documents:
+            chunked_documents.extend(reader.chunk_document(doc))
+        return chunked_documents
+    async def _load_from_path_async(
         self,
         content: Content,
         upsert: bool,
@@ -403,7 +646,7 @@ class Knowledge:
             if self._should_include_file(str(path), include, exclude):
                 log_debug(f"Adding file {path} due to include/exclude filters")
-                await self._add_to_contents_db(content)
+                await self._add_to_contents_db_async(content)
                 if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
                     content.status = ContentStatus.COMPLETED
                     await self._aupdate_content(content)
@@ -411,7 +654,90 @@ class Knowledge:
                 # Handle LightRAG special case - read file and upload directly
                 if self.vector_db.__class__.__name__ == "LightRag":
-                    await self._process_lightrag_content(content, KnowledgeContentOrigin.PATH)
+                    await self._process_lightrag_content_async(content, KnowledgeContentOrigin.PATH)
+                    return
+                if content.reader:
+                    reader = content.reader
+                else:
+                    reader = ReaderFactory.get_reader_for_extension(path.suffix)
+                    log_debug(f"Using Reader: {reader.__class__.__name__}")
+                if reader:
+                    password = content.auth.password if content.auth and content.auth.password else None
+                    read_documents = self._read_with_reader(
+                        reader, path, name=content.name or path.name, password=password
+                    )
+                else:
+                    read_documents = []
+                if not content.file_type:
+                    content.file_type = path.suffix
+                if not content.size and content.file_data:
+                    content.size = len(content.file_data.content)  # type: ignore
+                if not content.size:
+                    try:
+                        content.size = path.stat().st_size
+                    except (OSError, IOError) as e:
+                        log_warning(f"Could not get file size for {path}: {e}")
+                        content.size = 0
+                if not content.id:
+                    content.id = generate_id(content.content_hash or "")
+                self._prepare_documents_for_insert(read_documents, content.id)
+                await self._handle_vector_db_insert_async(content, read_documents, upsert)
+        elif path.is_dir():
+            for file_path in path.iterdir():
+                # Apply include/exclude filtering
+                if not self._should_include_file(str(file_path), include, exclude):
+                    log_debug(f"Skipping file {file_path} due to include/exclude filters")
+                    continue
+                file_content = Content(
+                    name=content.name,
+                    path=str(file_path),
+                    metadata=content.metadata,
+                    description=content.description,
+                    reader=content.reader,
+                )
+                file_content.content_hash = self._build_content_hash(file_content)
+                file_content.id = generate_id(file_content.content_hash)
+                await self._load_from_path_async(file_content, upsert, skip_if_exists, include, exclude)
+        else:
+            log_warning(f"Invalid path: {path}")
+    def _load_from_path(
+        self,
+        content: Content,
+        upsert: bool,
+        skip_if_exists: bool,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ):
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        log_info(f"Adding content from path, {content.id}, {content.name}, {content.path}, {content.description}")
+        path = Path(content.path)  # type: ignore
+        if path.is_file():
+            if self._should_include_file(str(path), include, exclude):
+                log_debug(f"Adding file {path} due to include/exclude filters")
+                self._add_to_contents_db(content)
+                if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
+                    content.status = ContentStatus.COMPLETED
+                    self._update_content(content)
+                    return
+                # Handle LightRAG special case - read file and upload directly
+                if self.vector_db.__class__.__name__ == "LightRag":
+                    self._process_lightrag_content(content, KnowledgeContentOrigin.PATH)
                     return
                 if content.reader:
@@ -453,10 +779,11 @@ class Knowledge:
                         log_warning(f"Could not get file size for {path}: {e}")
                         content.size = 0
-                for read_document in read_documents:
-                    read_document.content_id = content.id
+                if not content.id:
+                    content.id = generate_id(content.content_hash or "")
+                self._prepare_documents_for_insert(read_documents, content.id)
-                await self._handle_vector_db_insert(content, read_documents, upsert)
+                self._handle_vector_db_insert(content, read_documents, upsert)
         elif path.is_dir():
             for file_path in path.iterdir():
@@ -475,11 +802,11 @@ class Knowledge:
                 file_content.content_hash = self._build_content_hash(file_content)
                 file_content.id = generate_id(file_content.content_hash)
-                await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
+                self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
         else:
             log_warning(f"Invalid path: {path}")
-    async def _load_from_url(
+    async def _load_from_url_async(
         self,
         content: Content,
         upsert: bool,
@@ -503,14 +830,14 @@ class Knowledge:
             raise ValueError("No url provided")
         # 1. Add content to contents database
-        await self._add_to_contents_db(content)
+        await self._add_to_contents_db_async(content)
         if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
             content.status = ContentStatus.COMPLETED
             await self._aupdate_content(content)
             return
         if self.vector_db.__class__.__name__ == "LightRag":
-            await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
+            await self._process_lightrag_content_async(content, KnowledgeContentOrigin.URL)
             return
         # 2. Validate URL
@@ -540,47 +867,25 @@ class Knowledge:
             bytes_content = BytesIO(response.content)
         # 4. Select reader
-        # If a reader was provided by the user, use it
-        reader = content.reader
         name = content.name if content.name else content.url
-        # Else select based on file extension
-        if reader is None:
-            if file_extension == ".csv":
-                name = basename(parsed_url.path) or "data.csv"
-                reader = self.csv_reader
-            elif file_extension == ".pdf":
-                reader = self.pdf_reader
-            elif file_extension == ".docx":
-                reader = self.docx_reader
-            elif file_extension == ".pptx":
-                reader = self.pptx_reader
-            elif file_extension == ".json":
-                reader = self.json_reader
-            elif file_extension == ".markdown":
-                reader = self.markdown_reader
-            else:
-                reader = self.text_reader
+        if file_extension:
+            reader, default_name = self._select_reader_by_extension(file_extension, content.reader)
+            if default_name and file_extension == ".csv":
+                name = basename(parsed_url.path) or default_name
+        else:
+            reader = content.reader or self.website_reader
         # 5. Read content
         try:
             read_documents = []
             if reader is not None:
-                # TODO: We will refactor this to eventually pass authorization to all readers
-                import inspect
-                read_signature = inspect.signature(reader.read)
+                # Special handling for YouTubeReader
                 if reader.__class__.__name__ == "YouTubeReader":
                     read_documents = reader.read(content.url, name=name)
-                elif "password" in read_signature.parameters and content.auth and content.auth.password:
-                    if bytes_content:
-                        read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
-                    else:
-                        read_documents = reader.read(content.url, name=name, password=content.auth.password)
                 else:
-                    if bytes_content:
-                        read_documents = reader.read(bytes_content, name=name)
-                    else:
-                        read_documents = reader.read(content.url, name=name)
+                    password = content.auth.password if content.auth and content.auth.password else None
+                    source = bytes_content if bytes_content else content.url
+                    read_documents = self._read_with_reader(reader, source, name=name, password=password)
         except Exception as e:
             log_error(f"Error reading URL: {content.url} - {str(e)}")
@@ -593,19 +898,115 @@ class Knowledge:
         if reader and not reader.chunk:
             read_documents = await reader.chunk_documents_async(read_documents)
         # 7. Prepare and insert the content in the vector database
-        file_size = 0
-        if read_documents:
-            for read_document in read_documents:
-                if read_document.size:
-                    file_size += read_document.size
-                read_document.content_id = content.id
-        await self._handle_vector_db_insert(content, read_documents, upsert)
-    async def _load_from_content(
+        if not content.id:
+            content.id = generate_id(content.content_hash or "")
+        self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
+        await self._handle_vector_db_insert_async(content, read_documents, upsert)
+    def _load_from_url(
         self,
         content: Content,
-        upsert: bool = True,
-        skip_if_exists: bool = False,
+        upsert: bool,
+        skip_if_exists: bool,
+    ):
+        """Synchronous version of _load_from_url.
+        Load the content from a URL:
+        1. Set content hash
+        2. Validate the URL
+        3. Read the content
+        4. Prepare and insert the content in the vector database
+        """
+        from agno.utils.http import fetch_with_retry
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        log_info(f"Adding content from URL {content.url}")
+        content.file_type = "url"
+        if not content.url:
+            raise ValueError("No url provided")
+        # 1. Add content to contents database
+        self._add_to_contents_db(content)
+        if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
+            content.status = ContentStatus.COMPLETED
+            self._update_content(content)
+            return
+        if self.vector_db.__class__.__name__ == "LightRag":
+            self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
+            return
+        # 2. Validate URL
+        try:
+            from urllib.parse import urlparse
+            parsed_url = urlparse(content.url)
+            if not all([parsed_url.scheme, parsed_url.netloc]):
+                content.status = ContentStatus.FAILED
+                content.status_message = f"Invalid URL format: {content.url}"
+                self._update_content(content)
+                log_warning(f"Invalid URL format: {content.url}")
+        except Exception as e:
+            content.status = ContentStatus.FAILED
+            content.status_message = f"Invalid URL: {content.url} - {str(e)}"
+            self._update_content(content)
+            log_warning(f"Invalid URL: {content.url} - {str(e)}")
+        # 3. Fetch and load content if file has an extension
+        url_path = Path(parsed_url.path)
+        file_extension = url_path.suffix.lower()
+        bytes_content = None
+        if file_extension:
+            response = fetch_with_retry(content.url)
+            bytes_content = BytesIO(response.content)
+        # 4. Select reader
+        name = content.name if content.name else content.url
+        if file_extension:
+            reader, default_name = self._select_reader_by_extension(file_extension, content.reader)
+            if default_name and file_extension == ".csv":
+                name = basename(parsed_url.path) or default_name
+        else:
+            reader = content.reader or self.website_reader
+        # 5. Read content
+        try:
+            read_documents = []
+            if reader is not None:
+                # Special handling for YouTubeReader
+                if reader.__class__.__name__ == "YouTubeReader":
+                    read_documents = reader.read(content.url, name=name)
+                else:
+                    password = content.auth.password if content.auth and content.auth.password else None
+                    source = bytes_content if bytes_content else content.url
+                    read_documents = self._read_with_reader(reader, source, name=name, password=password)
+        except Exception as e:
+            log_error(f"Error reading URL: {content.url} - {str(e)}")
+            content.status = ContentStatus.FAILED
+            content.status_message = f"Error reading URL: {content.url} - {str(e)}"
+            self._update_content(content)
+            return
+        # 6. Chunk documents if needed (sync version)
+        if reader:
+            read_documents = self._chunk_documents_sync(reader, read_documents)
+        # 7. Prepare and insert the content in the vector database
+        if not content.id:
+            content.id = generate_id(content.content_hash or "")
+        self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
+        self._handle_vector_db_insert(content, read_documents, upsert)
+    async def _load_from_content_async(
+        self,
+        content: Content,
+        upsert: bool = True,
+        skip_if_exists: bool = False,
     ):
         from agno.vectordb import VectorDb
@@ -632,14 +1033,14 @@ class Knowledge:
         log_info(f"Adding content from {content.name}")
-        await self._add_to_contents_db(content)
+        await self._add_to_contents_db_async(content)
         if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
             content.status = ContentStatus.COMPLETED
             await self._aupdate_content(content)
             return
         if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
-            await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
+            await self._process_lightrag_content_async(content, KnowledgeContentOrigin.CONTENT)
             return
         read_documents = []
@@ -679,10 +1080,9 @@ class Knowledge:
                     reader = self._select_reader(content.file_data.type)
                 name = content.name if content.name else f"content_{content.file_data.type}"
                 read_documents = reader.read(content_io, name=name)
-                for read_document in read_documents:
-                    if content.metadata:
-                        read_document.meta_data.update(content.metadata)
-                    read_document.content_id = content.id
+                if not content.id:
+                    content.id = generate_id(content.content_hash or "")
+                self._prepare_documents_for_insert(read_documents, content.id, metadata=content.metadata)
                 if len(read_documents) == 0:
                     content.status = ContentStatus.FAILED
@@ -696,9 +1096,106 @@ class Knowledge:
             await self._aupdate_content(content)
             return
-        await self._handle_vector_db_insert(content, read_documents, upsert)
+        await self._handle_vector_db_insert_async(content, read_documents, upsert)
+    def _load_from_content(
+        self,
+        content: Content,
+        upsert: bool = True,
+        skip_if_exists: bool = False,
+    ):
+        """Synchronous version of _load_from_content."""
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        if content.name:
+            name = content.name
+        elif content.file_data and content.file_data.content:
+            if isinstance(content.file_data.content, bytes):
+                name = content.file_data.content[:10].decode("utf-8", errors="ignore")
+            elif isinstance(content.file_data.content, str):
+                name = (
+                    content.file_data.content[:10]
+                    if len(content.file_data.content) >= 10
+                    else content.file_data.content
+                )
+            else:
+                name = str(content.file_data.content)[:10]
+        else:
+            name = None
+        if name is not None:
+            content.name = name
+        log_info(f"Adding content from {content.name}")
+        self._add_to_contents_db(content)
+        if self._should_skip(content.content_hash, skip_if_exists):  # type: ignore[arg-type]
+            content.status = ContentStatus.COMPLETED
+            self._update_content(content)
+            return
+        if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
+            self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
+            return
+        read_documents = []
+        if isinstance(content.file_data, str):
+            content_bytes = content.file_data.encode("utf-8", errors="replace")
+            content_io = io.BytesIO(content_bytes)
+            if content.reader:
+                log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
+                read_documents = content.reader.read(content_io, name=name)
+            else:
+                text_reader = self.text_reader
+                if text_reader:
+                    read_documents = text_reader.read(content_io, name=name)
+                else:
+                    content.status = ContentStatus.FAILED
+                    content.status_message = "Text reader not available"
+                    self._update_content(content)
+                    return
-    async def _load_from_topics(
+        elif isinstance(content.file_data, FileData):
+            if content.file_data.type:
+                if isinstance(content.file_data.content, bytes):
+                    content_io = io.BytesIO(content.file_data.content)
+                elif isinstance(content.file_data.content, str):
+                    content_bytes = content.file_data.content.encode("utf-8", errors="replace")
+                    content_io = io.BytesIO(content_bytes)
+                else:
+                    content_io = content.file_data.content  # type: ignore
+                # Respect an explicitly provided reader; otherwise select based on file type
+                if content.reader:
+                    log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
+                    reader = content.reader
+                else:
+                    reader = self._select_reader(content.file_data.type)
+                name = content.name if content.name else f"content_{content.file_data.type}"
+                read_documents = reader.read(content_io, name=name)
+                if not content.id:
+                    content.id = generate_id(content.content_hash or "")
+                self._prepare_documents_for_insert(read_documents, content.id, metadata=content.metadata)
+                if len(read_documents) == 0:
+                    content.status = ContentStatus.FAILED
+                    content.status_message = "Content could not be read"
+                    self._update_content(content)
+                    return
+        else:
+            content.status = ContentStatus.FAILED
+            content.status_message = "No content provided"
+            self._update_content(content)
+            return
+        self._handle_vector_db_insert(content, read_documents, upsert)
+    async def _load_from_topics_async(
         self,
         content: Content,
         upsert: bool,
@@ -727,47 +1224,264 @@ class Knowledge:
             content.content_hash = self._build_content_hash(content)
             content.id = generate_id(content.content_hash)
-            await self._add_to_contents_db(content)
+            await self._add_to_contents_db_async(content)
             if self._should_skip(content.content_hash, skip_if_exists):
                 content.status = ContentStatus.COMPLETED
                 await self._aupdate_content(content)
                 return
-            if self.vector_db.__class__.__name__ == "LightRag":
-                await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
+            if self.vector_db.__class__.__name__ == "LightRag":
+                await self._process_lightrag_content_async(content, KnowledgeContentOrigin.TOPIC)
+                return
+            if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
+                log_info(f"Content {content.content_hash} already exists, skipping")
+                continue
+            await self._add_to_contents_db_async(content)
+            if content.reader is None:
+                log_error(f"No reader available for topic: {topic}")
+                content.status = ContentStatus.FAILED
+                content.status_message = "No reader available for topic"
+                await self._aupdate_content(content)
+                continue
+            read_documents = content.reader.read(topic)
+            if len(read_documents) > 0:
+                self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
+            else:
+                content.status = ContentStatus.FAILED
+                content.status_message = "No content found for topic"
+                await self._aupdate_content(content)
+            await self._handle_vector_db_insert_async(content, read_documents, upsert)
+    def _load_from_topics(
+        self,
+        content: Content,
+        upsert: bool,
+        skip_if_exists: bool,
+    ):
+        """Synchronous version of _load_from_topics."""
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        log_info(f"Adding content from topics: {content.topics}")
+        if content.topics is None:
+            log_warning("No topics provided for content")
+            return
+        for topic in content.topics:
+            content = Content(
+                name=topic,
+                metadata=content.metadata,
+                reader=content.reader,
+                status=ContentStatus.PROCESSING if content.reader else ContentStatus.FAILED,
+                file_data=FileData(
+                    type="Topic",
+                ),
+                topics=[topic],
+            )
+            content.content_hash = self._build_content_hash(content)
+            content.id = generate_id(content.content_hash)
+            self._add_to_contents_db(content)
+            if self._should_skip(content.content_hash, skip_if_exists):
+                content.status = ContentStatus.COMPLETED
+                self._update_content(content)
+                return
+            if self.vector_db.__class__.__name__ == "LightRag":
+                self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
+                return
+            if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
+                log_info(f"Content {content.content_hash} already exists, skipping")
+                continue
+            self._add_to_contents_db(content)
+            if content.reader is None:
+                log_error(f"No reader available for topic: {topic}")
+                content.status = ContentStatus.FAILED
+                content.status_message = "No reader available for topic"
+                self._update_content(content)
+                continue
+            read_documents = content.reader.read(topic)
+            if len(read_documents) > 0:
+                self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
+            else:
+                content.status = ContentStatus.FAILED
+                content.status_message = "No content found for topic"
+                self._update_content(content)
+            self._handle_vector_db_insert(content, read_documents, upsert)
+    async def _load_from_remote_content_async(
+        self,
+        content: Content,
+        upsert: bool,
+        skip_if_exists: bool,
+    ):
+        if content.remote_content is None:
+            log_warning("No remote content provided for content")
+            return
+        remote_content = content.remote_content
+        if isinstance(remote_content, S3Content):
+            await self._load_from_s3_async(content, upsert, skip_if_exists)
+        elif isinstance(remote_content, GCSContent):
+            await self._load_from_gcs_async(content, upsert, skip_if_exists)
+        else:
+            log_warning(f"Unsupported remote content type: {type(remote_content)}")
+    async def _load_from_s3_async(self, content: Content, upsert: bool, skip_if_exists: bool):
+        """Load the contextual S3 content.
+        1. Identify objects to read
+        2. Setup Content object
+        3. Hash content and add it to the contents database
+        4. Select reader
+        5. Fetch and load the content
+        6. Read the content
+        7. Prepare and insert the content in the vector database
+        8. Remove temporary file if needed
+        """
+        from agno.cloud.aws.s3.object import S3Object
+        remote_content: S3Content = cast(S3Content, content.remote_content)
+        # 1. Identify objects to read
+        objects_to_read: List[S3Object] = []
+        if remote_content.bucket is not None:
+            if remote_content.key is not None:
+                _object = S3Object(bucket_name=remote_content.bucket.name, name=remote_content.key)
+                objects_to_read.append(_object)
+            elif remote_content.object is not None:
+                objects_to_read.append(remote_content.object)
+            elif remote_content.prefix is not None:
+                objects_to_read.extend(remote_content.bucket.get_objects(prefix=remote_content.prefix))
+            else:
+                objects_to_read.extend(remote_content.bucket.get_objects())
+        for s3_object in objects_to_read:
+            # 2. Setup Content object
+            content_name = content.name or ""
+            content_name += "_" + (s3_object.name or "")
+            content_entry = Content(
+                name=content_name,
+                description=content.description,
+                status=ContentStatus.PROCESSING,
+                metadata=content.metadata,
+                file_type="s3",
+            )
+            # 3. Hash content and add it to the contents database
+            content_entry.content_hash = self._build_content_hash(content_entry)
+            content_entry.id = generate_id(content_entry.content_hash)
+            await self._add_to_contents_db_async(content_entry)
+            if self._should_skip(content_entry.content_hash, skip_if_exists):
+                content_entry.status = ContentStatus.COMPLETED
+                await self._aupdate_content(content_entry)
+                return
+            # 4. Select reader
+            reader = self._select_reader_by_uri(s3_object.uri, content.reader)
+            reader = cast(Reader, reader)
+            # 5. Fetch and load the content
+            temporary_file = None
+            obj_name = content_name or s3_object.name.split("/")[-1]
+            readable_content: Optional[Union[BytesIO, Path]] = None
+            if s3_object.uri.endswith(".pdf"):
+                readable_content = BytesIO(s3_object.get_resource().get()["Body"].read())
+            else:
+                temporary_file = Path("storage").joinpath(obj_name)
+                readable_content = temporary_file
+                s3_object.download(readable_content)  # type: ignore
+            # 6. Read the content
+            read_documents = reader.read(readable_content, name=obj_name)
+            # 7. Prepare and insert the content in the vector database
+            if not content.id:
+                content.id = generate_id(content.content_hash or "")
+            self._prepare_documents_for_insert(read_documents, content.id)
+            await self._handle_vector_db_insert_async(content_entry, read_documents, upsert)
+            # 8. Remove temporary file if needed
+            if temporary_file:
+                temporary_file.unlink()
+    async def _load_from_gcs_async(self, content: Content, upsert: bool, skip_if_exists: bool):
+        """Load the contextual GCS content.
+        1. Identify objects to read
+        2. Setup Content object
+        3. Hash content and add it to the contents database
+        4. Select reader
+        5. Fetch and load the content
+        6. Read the content
+        7. Prepare and insert the content in the vector database
+        """
+        remote_content: GCSContent = cast(GCSContent, content.remote_content)
+        # 1. Identify objects to read
+        objects_to_read = []
+        if remote_content.blob_name is not None:
+            objects_to_read.append(remote_content.bucket.blob(remote_content.blob_name))  # type: ignore
+        elif remote_content.prefix is not None:
+            objects_to_read.extend(remote_content.bucket.list_blobs(prefix=remote_content.prefix))  # type: ignore
+        else:
+            objects_to_read.extend(remote_content.bucket.list_blobs())  # type: ignore
+        for gcs_object in objects_to_read:
+            # 2. Setup Content object
+            name = (content.name or "content") + "_" + gcs_object.name
+            content_entry = Content(
+                name=name,
+                description=content.description,
+                status=ContentStatus.PROCESSING,
+                metadata=content.metadata,
+                file_type="gcs",
+            )
+            # 3. Hash content and add it to the contents database
+            content_entry.content_hash = self._build_content_hash(content_entry)
+            content_entry.id = generate_id(content_entry.content_hash)
+            await self._add_to_contents_db_async(content_entry)
+            if self._should_skip(content_entry.content_hash, skip_if_exists):
+                content_entry.status = ContentStatus.COMPLETED
+                await self._aupdate_content(content_entry)
                 return
-            if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
-                log_info(f"Content {content.content_hash} already exists, skipping")
-                continue
+            # 4. Select reader
+            reader = self._select_reader_by_uri(gcs_object.name, content.reader)
+            reader = cast(Reader, reader)
-            await self._add_to_contents_db(content)
-            if content.reader is None:
-                log_error(f"No reader available for topic: {topic}")
-                content.status = ContentStatus.FAILED
-                content.status_message = "No reader available for topic"
-                await self._aupdate_content(content)
-                continue
+            # 5. Fetch and load the content
+            readable_content = BytesIO(gcs_object.download_as_bytes())
-            read_documents = content.reader.read(topic)
-            if len(read_documents) > 0:
-                for read_document in read_documents:
-                    read_document.content_id = content.id
-                    if read_document.content:
-                        read_document.size = len(read_document.content.encode("utf-8"))
-            else:
-                content.status = ContentStatus.FAILED
-                content.status_message = "No content found for topic"
-                await self._aupdate_content(content)
+            # 6. Read the content
+            read_documents = reader.read(readable_content, name=name)
-            await self._handle_vector_db_insert(content, read_documents, upsert)
+            # 7. Prepare and insert the content in the vector database
+            if not content.id:
+                content.id = generate_id(content.content_hash or "")
+            self._prepare_documents_for_insert(read_documents, content.id)
+            await self._handle_vector_db_insert_async(content_entry, read_documents, upsert)
-    async def _load_from_remote_content(
+    def _load_from_remote_content(
         self,
         content: Content,
         upsert: bool,
         skip_if_exists: bool,
     ):
+        """Synchronous version of _load_from_remote_content."""
         if content.remote_content is None:
             log_warning("No remote content provided for content")
             return
@@ -775,17 +1489,18 @@ class Knowledge:
         remote_content = content.remote_content
         if isinstance(remote_content, S3Content):
-            await self._load_from_s3(content, upsert, skip_if_exists)
+            self._load_from_s3(content, upsert, skip_if_exists)
         elif isinstance(remote_content, GCSContent):
-            await self._load_from_gcs(content, upsert, skip_if_exists)
+            self._load_from_gcs(content, upsert, skip_if_exists)
         else:
             log_warning(f"Unsupported remote content type: {type(remote_content)}")
-    async def _load_from_s3(self, content: Content, upsert: bool, skip_if_exists: bool):
-        """Load the contextual S3 content.
+    def _load_from_s3(self, content: Content, upsert: bool, skip_if_exists: bool):
+        """Synchronous version of _load_from_s3.
+        Load the contextual S3 content:
         1. Identify objects to read
         2. Setup Content object
         3. Hash content and add it to the contents database
@@ -827,29 +1542,14 @@ class Knowledge:
             # 3. Hash content and add it to the contents database
             content_entry.content_hash = self._build_content_hash(content_entry)
             content_entry.id = generate_id(content_entry.content_hash)
-            await self._add_to_contents_db(content_entry)
+            self._add_to_contents_db(content_entry)
             if self._should_skip(content_entry.content_hash, skip_if_exists):
                 content_entry.status = ContentStatus.COMPLETED
-                await self._aupdate_content(content_entry)
+                self._update_content(content_entry)
                 return
             # 4. Select reader
-            reader = content.reader
-            if reader is None:
-                if s3_object.uri.endswith(".pdf"):
-                    reader = self.pdf_reader
-                elif s3_object.uri.endswith(".csv"):
-                    reader = self.csv_reader
-                elif s3_object.uri.endswith(".docx"):
-                    reader = self.docx_reader
-                elif s3_object.uri.endswith(".pptx"):
-                    reader = self.pptx_reader
-                elif s3_object.uri.endswith(".json"):
-                    reader = self.json_reader
-                elif s3_object.uri.endswith(".markdown"):
-                    reader = self.markdown_reader
-                else:
-                    reader = self.text_reader
+            reader = self._select_reader_by_uri(s3_object.uri, content.reader)
             reader = cast(Reader, reader)
             # 5. Fetch and load the content
@@ -867,17 +1567,19 @@ class Knowledge:
             read_documents = reader.read(readable_content, name=obj_name)
             # 7. Prepare and insert the content in the vector database
-            for read_document in read_documents:
-                read_document.content_id = content.id
-            await self._handle_vector_db_insert(content_entry, read_documents, upsert)
+            if not content.id:
+                content.id = generate_id(content.content_hash or "")
+            self._prepare_documents_for_insert(read_documents, content.id)
+            self._handle_vector_db_insert(content_entry, read_documents, upsert)
             # 8. Remove temporary file if needed
             if temporary_file:
                 temporary_file.unlink()
-    async def _load_from_gcs(self, content: Content, upsert: bool, skip_if_exists: bool):
-        """Load the contextual GCS content.
+    def _load_from_gcs(self, content: Content, upsert: bool, skip_if_exists: bool):
+        """Synchronous version of _load_from_gcs.
+        Load the contextual GCS content:
         1. Identify objects to read
         2. Setup Content object
         3. Hash content and add it to the contents database
@@ -911,29 +1613,14 @@ class Knowledge:
             # 3. Hash content and add it to the contents database
             content_entry.content_hash = self._build_content_hash(content_entry)
             content_entry.id = generate_id(content_entry.content_hash)
-            await self._add_to_contents_db(content_entry)
+            self._add_to_contents_db(content_entry)
             if self._should_skip(content_entry.content_hash, skip_if_exists):
                 content_entry.status = ContentStatus.COMPLETED
-                await self._aupdate_content(content_entry)
+                self._update_content(content_entry)
                 return
             # 4. Select reader
-            reader = content.reader
-            if reader is None:
-                if gcs_object.name.endswith(".pdf"):
-                    reader = self.pdf_reader
-                elif gcs_object.name.endswith(".csv"):
-                    reader = self.csv_reader
-                elif gcs_object.name.endswith(".docx"):
-                    reader = self.docx_reader
-                elif gcs_object.name.endswith(".pptx"):
-                    reader = self.pptx_reader
-                elif gcs_object.name.endswith(".json"):
-                    reader = self.json_reader
-                elif gcs_object.name.endswith(".markdown"):
-                    reader = self.markdown_reader
-                else:
-                    reader = self.text_reader
+            reader = self._select_reader_by_uri(gcs_object.name, content.reader)
             reader = cast(Reader, reader)
             # 5. Fetch and load the content
@@ -943,11 +1630,12 @@ class Knowledge:
             read_documents = reader.read(readable_content, name=name)
             # 7. Prepare and insert the content in the vector database
-            for read_document in read_documents:
-                read_document.content_id = content.id
-            await self._handle_vector_db_insert(content_entry, read_documents, upsert)
+            if not content.id:
+                content.id = generate_id(content.content_hash or "")
+            self._prepare_documents_for_insert(read_documents, content.id)
+            self._handle_vector_db_insert(content_entry, read_documents, upsert)
-    async def _handle_vector_db_insert(self, content: Content, read_documents, upsert):
+    async def _handle_vector_db_insert_async(self, content: Content, read_documents, upsert):
         from agno.vectordb import VectorDb
         self.vector_db = cast(VectorDb, self.vector_db)
@@ -985,7 +1673,70 @@ class Knowledge:
         content.status = ContentStatus.COMPLETED
         await self._aupdate_content(content)
-    async def _load_content(
+    def _handle_vector_db_insert(self, content: Content, read_documents, upsert):
+        """Synchronously handle vector database insertion."""
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        if not self.vector_db:
+            log_error("No vector database configured")
+            content.status = ContentStatus.FAILED
+            content.status_message = "No vector database configured"
+            self._update_content(content)
+            return
+        if self.vector_db.upsert_available() and upsert:
+            try:
+                self.vector_db.upsert(content.content_hash, read_documents, content.metadata)  # type: ignore[arg-type]
+            except Exception as e:
+                log_error(f"Error upserting document: {e}")
+                content.status = ContentStatus.FAILED
+                content.status_message = "Could not upsert embedding"
+                self._update_content(content)
+                return
+        else:
+            try:
+                self.vector_db.insert(
+                    content.content_hash,  # type: ignore[arg-type]
+                    documents=read_documents,
+                    filters=content.metadata,  # type: ignore[arg-type]
+                )
+            except Exception as e:
+                log_error(f"Error inserting document: {e}")
+                content.status = ContentStatus.FAILED
+                content.status_message = "Could not insert embedding"
+                self._update_content(content)
+                return
+        content.status = ContentStatus.COMPLETED
+        self._update_content(content)
+    def _load_content(
+        self,
+        content: Content,
+        upsert: bool,
+        skip_if_exists: bool,
+        include: Optional[List[str]] = None,
+        exclude: Optional[List[str]] = None,
+    ) -> None:
+        """Synchronously load content."""
+        if content.path:
+            self._load_from_path(content, upsert, skip_if_exists, include, exclude)
+        if content.url:
+            self._load_from_url(content, upsert, skip_if_exists)
+        if content.file_data:
+            self._load_from_content(content, upsert, skip_if_exists)
+        if content.topics:
+            self._load_from_topics(content, upsert, skip_if_exists)
+        if content.remote_content:
+            self._load_from_remote_content(content, upsert, skip_if_exists)
+    async def _load_content_async(
         self,
         content: Content,
         upsert: bool,
@@ -994,19 +1745,19 @@ class Knowledge:
         exclude: Optional[List[str]] = None,
     ) -> None:
         if content.path:
-            await self._load_from_path(content, upsert, skip_if_exists, include, exclude)
+            await self._load_from_path_async(content, upsert, skip_if_exists, include, exclude)
         if content.url:
-            await self._load_from_url(content, upsert, skip_if_exists)
+            await self._load_from_url_async(content, upsert, skip_if_exists)
         if content.file_data:
-            await self._load_from_content(content, upsert, skip_if_exists)
+            await self._load_from_content_async(content, upsert, skip_if_exists)
         if content.topics:
-            await self._load_from_topics(content, upsert, skip_if_exists)
+            await self._load_from_topics_async(content, upsert, skip_if_exists)
         if content.remote_content:
-            await self._load_from_remote_content(content, upsert, skip_if_exists)
+            await self._load_from_remote_content_async(content, upsert, skip_if_exists)
     def _build_content_hash(self, content: Content) -> str:
         """
@@ -1078,7 +1829,7 @@ class Knowledge:
         # Already a string, return as-is
         return value
-    async def _add_to_contents_db(self, content: Content):
+    async def _add_to_contents_db_async(self, content: Content):
         if self.contents_db:
             created_at = content.created_at if content.created_at else int(time.time())
             updated_at = content.updated_at if content.updated_at else int(time.time())
@@ -1121,6 +1872,52 @@ class Knowledge:
             else:
                 self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
+    def _add_to_contents_db(self, content: Content):
+        """Synchronously add content to contents database."""
+        if self.contents_db:
+            if isinstance(self.contents_db, AsyncBaseDb):
+                raise ValueError(
+                    "_add_to_contents_db() is not supported with an async DB. Please use add_content_async with AsyncDb."
+                )
+            created_at = content.created_at if content.created_at else int(time.time())
+            updated_at = content.updated_at if content.updated_at else int(time.time())
+            file_type = (
+                content.file_type
+                if content.file_type
+                else content.file_data.type
+                if content.file_data and content.file_data.type
+                else None
+            )
+            # Safely handle string fields with proper type checking
+            safe_name = self._ensure_string_field(content.name, "content.name", default="")
+            safe_description = self._ensure_string_field(content.description, "content.description", default="")
+            safe_linked_to = self._ensure_string_field(self.name, "knowledge.name", default="")
+            safe_status_message = self._ensure_string_field(
+                content.status_message, "content.status_message", default=""
+            )
+            content_row = KnowledgeRow(
+                id=content.id,
+                name=safe_name,
+                description=safe_description,
+                metadata=content.metadata,
+                type=file_type,
+                size=content.size
+                if content.size
+                else len(content.file_data.content)
+                if content.file_data and content.file_data.content
+                else None,
+                linked_to=safe_linked_to,
+                access_count=0,
+                status=content.status if content.status else ContentStatus.PROCESSING,
+                status_message=safe_status_message,
+                created_at=created_at,
+                updated_at=updated_at,
+            )
+            self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
     def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
         from agno.vectordb import VectorDb
@@ -1221,12 +2018,12 @@ class Knowledge:
                 log_warning("Contents DB not found for knowledge base")
             return None
-    async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
+    async def _process_lightrag_content_async(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
         from agno.vectordb import VectorDb
         self.vector_db = cast(VectorDb, self.vector_db)
-        await self._add_to_contents_db(content)
+        await self._add_to_contents_db_async(content)
         if content_type == KnowledgeContentOrigin.PATH:
             if content.file_data is None:
                 log_warning("No file data provided")
@@ -1283,9 +2080,9 @@ class Knowledge:
                 reader.chunk = False
                 read_documents = reader.read(content.url, name=content.name)
-                for read_document in read_documents:
-                    read_document.content_id = content.id
+                if not content.id:
+                    content.id = generate_id(content.content_hash or "")
+                self._prepare_documents_for_insert(read_documents, content.id)
                 if not read_documents:
                     log_error("No documents read from URL")
@@ -1378,6 +2175,175 @@ class Knowledge:
                 log_warning(f"No documents found for LightRAG upload: {content.name}")
                 return
+    def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
+        """Synchronously process LightRAG content. Uses asyncio.run() only for LightRAG-specific async methods."""
+        from agno.vectordb import VectorDb
+        self.vector_db = cast(VectorDb, self.vector_db)
+        self._add_to_contents_db(content)
+        if content_type == KnowledgeContentOrigin.PATH:
+            if content.file_data is None:
+                log_warning("No file data provided")
+            if content.path is None:
+                log_error("No path provided for content")
+                return
+            path = Path(content.path)
+            log_info(f"Uploading file to LightRAG from path: {path}")
+            try:
+                # Read the file content from path
+                with open(path, "rb") as f:
+                    file_content = f.read()
+                # Get file type from extension or content.file_type
+                file_type = content.file_type or path.suffix
+                if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
+                    # LightRAG only has async methods, use asyncio.run() here
+                    result = asyncio.run(
+                        self.vector_db.insert_file_bytes(
+                            file_content=file_content,
+                            filename=path.name,
+                            content_type=file_type,
+                            send_metadata=True,
+                        )
+                    )
+                else:
+                    log_error("Vector database does not support file insertion")
+                    content.status = ContentStatus.FAILED
+                    self._update_content(content)
+                    return
+                content.external_id = result
+                content.status = ContentStatus.COMPLETED
+                self._update_content(content)
+                return
+            except Exception as e:
+                log_error(f"Error uploading file to LightRAG: {e}")
+                content.status = ContentStatus.FAILED
+                content.status_message = f"Could not upload to LightRAG: {str(e)}"
+                self._update_content(content)
+                return
+        elif content_type == KnowledgeContentOrigin.URL:
+            log_info(f"Uploading file to LightRAG from URL: {content.url}")
+            try:
+                reader = content.reader or self.website_reader
+                if reader is None:
+                    log_error("No URL reader available")
+                    content.status = ContentStatus.FAILED
+                    self._update_content(content)
+                    return
+                reader.chunk = False
+                read_documents = reader.read(content.url, name=content.name)
+                if not content.id:
+                    content.id = generate_id(content.content_hash or "")
+                self._prepare_documents_for_insert(read_documents, content.id)
+                if not read_documents:
+                    log_error("No documents read from URL")
+                    content.status = ContentStatus.FAILED
+                    self._update_content(content)
+                    return
+                if self.vector_db and hasattr(self.vector_db, "insert_text"):
+                    # LightRAG only has async methods, use asyncio.run() here
+                    result = asyncio.run(
+                        self.vector_db.insert_text(
+                            file_source=content.url,
+                            text=read_documents[0].content,
+                        )
+                    )
+                else:
+                    log_error("Vector database does not support text insertion")
+                    content.status = ContentStatus.FAILED
+                    self._update_content(content)
+                    return
+                content.external_id = result
+                content.status = ContentStatus.COMPLETED
+                self._update_content(content)
+                return
+            except Exception as e:
+                log_error(f"Error uploading file to LightRAG: {e}")
+                content.status = ContentStatus.FAILED
+                content.status_message = f"Could not upload to LightRAG: {str(e)}"
+                self._update_content(content)
+                return
+        elif content_type == KnowledgeContentOrigin.CONTENT:
+            filename = (
+                content.file_data.filename if content.file_data and content.file_data.filename else "uploaded_file"
+            )
+            log_info(f"Uploading file to LightRAG: {filename}")
+            # Use the content from file_data
+            if content.file_data and content.file_data.content:
+                if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
+                    # LightRAG only has async methods, use asyncio.run() here
+                    result = asyncio.run(
+                        self.vector_db.insert_file_bytes(
+                            file_content=content.file_data.content,
+                            filename=filename,
+                            content_type=content.file_data.type,
+                            send_metadata=True,
+                        )
+                    )
+                else:
+                    log_error("Vector database does not support file insertion")
+                    content.status = ContentStatus.FAILED
+                    self._update_content(content)
+                    return
+                content.external_id = result
+                content.status = ContentStatus.COMPLETED
+                self._update_content(content)
+            else:
+                log_warning(f"No file data available for LightRAG upload: {content.name}")
+            return
+        elif content_type == KnowledgeContentOrigin.TOPIC:
+            log_info(f"Uploading file to LightRAG: {content.name}")
+            if content.reader is None:
+                log_error("No reader available for topic content")
+                content.status = ContentStatus.FAILED
+                self._update_content(content)
+                return
+            if not content.topics:
+                log_error("No topics available for content")
+                content.status = ContentStatus.FAILED
+                self._update_content(content)
+                return
+            read_documents = content.reader.read(content.topics)
+            if len(read_documents) > 0:
+                if self.vector_db and hasattr(self.vector_db, "insert_text"):
+                    # LightRAG only has async methods, use asyncio.run() here
+                    result = asyncio.run(
+                        self.vector_db.insert_text(
+                            file_source=content.topics[0],
+                            text=read_documents[0].content,
+                        )
+                    )
+                else:
+                    log_error("Vector database does not support text insertion")
+                    content.status = ContentStatus.FAILED
+                    self._update_content(content)
+                    return
+                content.external_id = result
+                content.status = ContentStatus.COMPLETED
+                self._update_content(content)
+                return
+            else:
+                log_warning(f"No documents found for LightRAG upload: {content.name}")
+                return
     def search(
         self,
         query: str,

agno 2.3.8__py3-none-any.whl → 2.3.10__py3-none-any.whl

agno 2.3.8py3-none-any.whl → 2.3.10py3-none-any.whl