PyPI - agno - Versions diffs - 2.0.0rc1__py3-none-any.whl → 2.0.0rc2__py3-none-any.whl - Mend

agno 2.0.0rc1py3-none-any.whl → 2.0.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

agno/agent/agent.py +32 -14
agno/db/mongo/mongo.py +8 -3
agno/eval/accuracy.py +12 -5
agno/knowledge/chunking/strategy.py +14 -14
agno/knowledge/knowledge.py +156 -120
agno/knowledge/reader/arxiv_reader.py +5 -5
agno/knowledge/reader/csv_reader.py +6 -77
agno/knowledge/reader/docx_reader.py +5 -5
agno/knowledge/reader/firecrawl_reader.py +5 -5
agno/knowledge/reader/json_reader.py +5 -5
agno/knowledge/reader/markdown_reader.py +31 -9
agno/knowledge/reader/pdf_reader.py +10 -123
agno/knowledge/reader/reader_factory.py +65 -72
agno/knowledge/reader/s3_reader.py +44 -114
agno/knowledge/reader/text_reader.py +5 -5
agno/knowledge/reader/url_reader.py +75 -31
agno/knowledge/reader/web_search_reader.py +6 -29
agno/knowledge/reader/website_reader.py +5 -5
agno/knowledge/reader/wikipedia_reader.py +5 -5
agno/knowledge/reader/youtube_reader.py +6 -6
agno/knowledge/utils.py +10 -10
agno/models/aws/bedrock.py +3 -7
agno/models/base.py +37 -6
agno/os/app.py +32 -24
agno/os/mcp.py +39 -59
agno/os/router.py +547 -16
agno/os/routers/evals/evals.py +197 -12
agno/os/routers/knowledge/knowledge.py +428 -14
agno/os/routers/memory/memory.py +250 -28
agno/os/routers/metrics/metrics.py +125 -7
agno/os/routers/session/session.py +393 -25
agno/os/schema.py +55 -2
agno/run/agent.py +9 -0
agno/run/team.py +93 -2
agno/run/workflow.py +25 -12
agno/team/team.py +861 -1051
agno/tools/mcp.py +1 -2
agno/utils/log.py +52 -2
agno/utils/mcp.py +55 -3
agno/utils/models/claude.py +0 -8
agno/utils/print_response/team.py +177 -73
agno/utils/streamlit.py +27 -0
agno/workflow/workflow.py +9 -0
{agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/METADATA +1 -1
{agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/RECORD +48 -49
agno/knowledge/reader/gcs_reader.py +0 -67
{agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/WHEEL +0 -0
{agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/licenses/LICENSE +0 -0
{agno-2.0.0rc1.dist-info → agno-2.0.0rc2.dist-info}/top_level.txt +0 -0

agno/agent/agent.py CHANGED Viewed

@@ -129,10 +129,10 @@ class Agent:
     session_id: Optional[str] = None
     # Default session state (stored in the database to persist across runs)
     session_state: Optional[Dict[str, Any]] = None
-    # If True, the agent can update the session state
-    enable_agentic_state: bool = False
-    # If True, add the session state to the user prompt
+    # Set to True to add the session_state to the context
     add_session_state_to_context: bool = False
+    # Set to True to give the agent tools to update the session_state dynamically
+    enable_agentic_state: bool = False
     # If True, cache the current Agent session in memory for faster access
     cache_session: bool = False
@@ -321,8 +321,6 @@ class Agent:
     # --- If this Agent is part of a workflow ---
     # Optional workflow ID. Indicates this agent is part of a workflow.
     workflow_id: Optional[str] = None
-    # Set when this agent is part of a workflow.
-    workflow_session_id: Optional[str] = None
     # Metadata stored with this agent
     metadata: Optional[Dict[str, Any]] = None
@@ -345,7 +343,6 @@ class Agent:
         id: Optional[str] = None,
         introduction: Optional[str] = None,
         user_id: Optional[str] = None,
-        app_id: Optional[str] = None,
         session_id: Optional[str] = None,
         session_state: Optional[Dict[str, Any]] = None,
         add_session_state_to_context: bool = False,
@@ -429,7 +426,6 @@ class Agent:
         self.id = id
         self.introduction = introduction
         self.user_id = user_id
-        self.app_id = app_id
         self.session_id = session_id
         self.session_state = session_state
@@ -593,6 +589,15 @@ class Agent:
         if isinstance(input, Message):
             input = input.content  # type: ignore
+        # If input is a string, convert it to a dict
+        if isinstance(input, str):
+            import json
+            try:
+                input = json.loads(input)
+            except Exception as e:
+                raise ValueError(f"Failed to parse input. Is it a valid JSON string?: {e}")
         # Case 1: Message is already a BaseModel instance
         if isinstance(input, BaseModel):
             if isinstance(input, self.input_schema):
@@ -3231,6 +3236,12 @@ class Agent:
         if isinstance(model_response_event, tuple(get_args(RunOutputEvent))) or isinstance(
             model_response_event, tuple(get_args(TeamRunOutputEvent))
         ):
+            if model_response_event.event == RunEvent.custom_event:  # type: ignore
+                model_response_event.agent_id = self.id  # type: ignore
+                model_response_event.agent_name = self.name  # type: ignore
+                model_response_event.session_id = session.session_id  # type: ignore
+                model_response_event.run_id = run_response.run_id  # type: ignore
             # We just bubble the event up
             yield self._handle_event(model_response_event, run_response)  # type: ignore
         else:
@@ -4365,7 +4376,7 @@ class Agent:
             return agent_session
-        log_warning(f"AgentSession {session_id_to_load} not found in db")
+        log_debug(f"AgentSession {session_id_to_load} not found in db")
         return None
     def save_session(self, session: AgentSession) -> None:
@@ -6841,6 +6852,7 @@ class Agent:
         if self.output_schema is not None:
             markdown = False
+            markdown = False
         if stream is None:
             stream = self.stream or False
@@ -7172,10 +7184,12 @@ class Agent:
             image_artifacts = []
             for img in images:
                 try:
+                    artifact_id = img.id if hasattr(img, "id") and img.id else str(uuid4())
                     if img.url:
-                        image_artifacts.append(ImageArtifact(id=str(uuid4()), url=img.url))
+                        image_artifacts.append(ImageArtifact(id=artifact_id, url=img.url))
                     elif img.content:
-                        image_artifacts.append(ImageArtifact(id=str(uuid4()), content=img.content))
+                        image_artifacts.append(ImageArtifact(id=artifact_id, content=img.content))
                 except Exception as e:
                     log_warning(f"Error creating ImageArtifact: {e}")
                     continue
@@ -7185,10 +7199,12 @@ class Agent:
             video_artifacts = []
             for vid in videos:
                 try:
+                    artifact_id = vid.id if hasattr(vid, "id") and vid.id else str(uuid4())
                     if vid.url:
-                        video_artifacts.append(VideoArtifact(id=str(uuid4()), url=vid.url))
+                        video_artifacts.append(VideoArtifact(id=artifact_id, url=vid.url))
                     elif vid.content:
-                        video_artifacts.append(VideoArtifact(id=str(uuid4()), content=vid.content))
+                        video_artifacts.append(VideoArtifact(id=artifact_id, content=vid.content))
                 except Exception as e:
                     log_warning(f"Error creating VideoArtifact: {e}")
                     continue
@@ -7198,10 +7214,12 @@ class Agent:
             audio_artifacts = []
             for aud in audios:
                 try:
+                    artifact_id = aud.id if hasattr(aud, "id") and aud.id else str(uuid4())
                     if aud.url:
-                        audio_artifacts.append(AudioArtifact(id=str(uuid4()), url=aud.url))
+                        audio_artifacts.append(AudioArtifact(id=artifact_id, url=aud.url))
                     elif aud.content:
-                        audio_artifacts.append(AudioArtifact(id=str(uuid4()), content=aud.content))
+                        audio_artifacts.append(AudioArtifact(id=artifact_id, content=aud.content))
                 except Exception as e:
                     log_warning(f"Error creating AudioArtifact: {e}")
                     continue

agno/db/mongo/mongo.py CHANGED Viewed

@@ -672,7 +672,9 @@ class MongoDb(BaseDb):
             if result is None or not deserialize:
                 return result
-            return UserMemory.from_dict(result)
+            # Remove MongoDB's _id field before creating UserMemory object
+            result_filtered = {k: v for k, v in result.items() if k != "_id"}
+            return UserMemory.from_dict(result_filtered)
         except Exception as e:
             log_error(f"Exception reading from collection: {e}")
@@ -750,7 +752,8 @@ class MongoDb(BaseDb):
             if not deserialize:
                 return records, total_count
-            return [UserMemory.from_dict(record) for record in records]
+            # Remove MongoDB's _id field before creating UserMemory objects
+            return [UserMemory.from_dict({k: v for k, v in record.items() if k != "_id"}) for record in records]
         except Exception as e:
             log_error(f"Exception reading from collection: {e}")
@@ -861,7 +864,9 @@ class MongoDb(BaseDb):
             if not deserialize:
                 return update_doc
-            return UserMemory.from_dict(update_doc)
+            # Remove MongoDB's _id field before creating UserMemory object
+            update_doc_filtered = {k: v for k, v in update_doc.items() if k != "_id"}
+            return UserMemory.from_dict(update_doc_filtered)
         except Exception as e:
             log_error(f"Exception upserting user memory: {e}")

agno/eval/accuracy.py CHANGED Viewed

@@ -97,11 +97,18 @@ class AccuracyResult:
             title_justify="center",
         )
         summary_table.add_row("Number of Runs", f"{len(self.results)}")
-        summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
-        summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
-        summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
-        summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
-        summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
+        if self.avg_score is not None:
+            summary_table.add_row("Average Score", f"{self.avg_score:.2f}")
+        if self.mean_score is not None:
+            summary_table.add_row("Mean Score", f"{self.mean_score:.2f}")
+        if self.min_score is not None:
+            summary_table.add_row("Minimum Score", f"{self.min_score:.2f}")
+        if self.max_score is not None:
+            summary_table.add_row("Maximum Score", f"{self.max_score:.2f}")
+        if self.std_dev_score is not None:
+            summary_table.add_row("Standard Deviation", f"{self.std_dev_score:.2f}")
         console.print(summary_table)
     def print_results(self, console: Optional["Console"] = None):

agno/knowledge/chunking/strategy.py CHANGED Viewed

@@ -35,13 +35,13 @@ class ChunkingStrategy(ABC):
 class ChunkingStrategyType(str, Enum):
     """Enumeration of available chunking strategies."""
-    AGENTIC_CHUNKING = "AgenticChunking"
-    DOCUMENT_CHUNKING = "DocumentChunking"
-    RECURSIVE_CHUNKING = "RecursiveChunking"
-    SEMANTIC_CHUNKING = "SemanticChunking"
-    FIXED_SIZE_CHUNKING = "FixedSizeChunking"
-    ROW_CHUNKING = "RowChunking"
-    MARKDOWN_CHUNKING = "MarkdownChunking"
+    AGENTIC_CHUNKER = "AgenticChunker"
+    DOCUMENT_CHUNKER = "DocumentChunker"
+    RECURSIVE_CHUNKER = "RecursiveChunker"
+    SEMANTIC_CHUNKER = "SemanticChunker"
+    FIXED_SIZE_CHUNKER = "FixedSizeChunker"
+    ROW_CHUNKER = "RowChunker"
+    MARKDOWN_CHUNKER = "MarkdownChunker"
     @classmethod
     def from_string(cls, strategy_name: str) -> "ChunkingStrategyType":
@@ -63,13 +63,13 @@ class ChunkingStrategyFactory:
     def create_strategy(cls, strategy_type: ChunkingStrategyType, **kwargs) -> ChunkingStrategy:
         """Create an instance of the chunking strategy with the given parameters."""
         strategy_map = {
-            ChunkingStrategyType.AGENTIC_CHUNKING: cls._create_agentic_chunking,
-            ChunkingStrategyType.DOCUMENT_CHUNKING: cls._create_document_chunking,
-            ChunkingStrategyType.RECURSIVE_CHUNKING: cls._create_recursive_chunking,
-            ChunkingStrategyType.SEMANTIC_CHUNKING: cls._create_semantic_chunking,
-            ChunkingStrategyType.FIXED_SIZE_CHUNKING: cls._create_fixed_chunking,
-            ChunkingStrategyType.ROW_CHUNKING: cls._create_row_chunking,
-            ChunkingStrategyType.MARKDOWN_CHUNKING: cls._create_markdown_chunking,
+            ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
+            ChunkingStrategyType.DOCUMENT_CHUNKER: cls._create_document_chunking,
+            ChunkingStrategyType.RECURSIVE_CHUNKER: cls._create_recursive_chunking,
+            ChunkingStrategyType.SEMANTIC_CHUNKER: cls._create_semantic_chunking,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER: cls._create_fixed_chunking,
+            ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
+            ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
         }
         return strategy_map[strategy_type](**kwargs)

agno/knowledge/knowledge.py CHANGED Viewed

@@ -5,16 +5,21 @@ import time
 from dataclasses import dataclass
 from enum import Enum
 from functools import cached_property
+from io import BytesIO
+from os.path import basename
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
 from uuid import uuid4
+from httpx import AsyncClient
 from agno.db.base import BaseDb
 from agno.db.schemas.knowledge import KnowledgeRow
 from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
 from agno.knowledge.document import Document
 from agno.knowledge.reader import Reader, ReaderFactory
 from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
+from agno.utils.http import async_fetch_with_retry
 from agno.utils.log import log_debug, log_error, log_info, log_warning
 from agno.vectordb import VectorDb
@@ -421,20 +426,31 @@ class Knowledge:
         upsert: bool,
         skip_if_exists: bool,
     ):
+        """Load the content in the contextual URL
+        1. Set content hash
+        2. Validate the URL
+        3. Read the content
+        4. Prepare and insert the content in the vector database
+        """
         log_info(f"Adding content from URL {content.url}")
         content.file_type = "url"
+        if not content.url:
+            raise ValueError("No url provided")
         if self.vector_db.__class__.__name__ == "LightRag":
             await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
             return
+        # 1. Set content hash
         content.content_hash = self._build_content_hash(content)
         if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
             log_info(f"Content {content.content_hash} already exists, skipping")
             return
         self._add_to_contents_db(content)
-        # Validate URL
+        # 2. Validate URL
         try:
             from urllib.parse import urlparse
@@ -450,61 +466,47 @@ class Knowledge:
             self._update_content(content)
             log_warning(f"Invalid URL: {content.url} - {str(e)}")
-        # Determine file type from URL
-        url_path = Path(parsed_url.path)  # type: ignore
-        file_extension = url_path.suffix.lower()
-        read_documents = []
-        try:
-            if content.url.endswith("llms-full.txt") or content.url.endswith("llms.txt"):  # type: ignore
-                log_info("Detected llms, using url reader")
-                reader = content.reader or self.url_reader
-                if reader is not None:
-                    # TODO: We will refactor this to eventually pass authorization to all readers
-                    import inspect
+        # 3. Fetch and load content
+        async with AsyncClient() as client:
+            response = await async_fetch_with_retry(content.url, client=client)
+        bytes_content = BytesIO(response.content)
-                    read_signature = inspect.signature(reader.read)
-                    if "password" in read_signature.parameters and content.auth and content.auth.password:
-                        read_documents = reader.read(content.url, name=content.name, password=content.auth.password)
-                    else:
-                        read_documents = reader.read(content.url, name=content.name)
-            elif file_extension and file_extension is not None:
-                log_info(f"Detected file type: {file_extension} from URL: {content.url}")
-                if content.reader:
-                    reader = content.reader
-                else:
-                    reader = self._select_url_file_reader(file_extension)
-                if reader is not None:
-                    log_info(f"Selected reader: {reader.__class__.__name__}")
-                    # TODO: We will refactor this to eventually pass authorization to all readers
-                    import inspect
-                    read_signature = inspect.signature(reader.read)
-                    if "password" in read_signature.parameters and content.auth and content.auth.password:
-                        read_documents = reader.read(content.url, name=content.name, password=content.auth.password)
-                    else:
-                        read_documents = reader.read(content.url, name=content.name)
-                else:
-                    log_info(f"No reader found for file extension: {file_extension}")
+        # 4. Select reader
+        # If a reader was provided by the user, use it
+        reader = content.reader
+        name = content.name
+        # Else select based on file extension
+        if reader is None:
+            url_path = Path(parsed_url.path)
+            file_extension = url_path.suffix.lower()
+            if file_extension == ".csv":
+                name = basename(parsed_url.path) or "data.csv"
+                reader = self.csv_reader
+            elif file_extension == ".pdf":
+                reader = self.pdf_reader
+            elif file_extension == ".docx":
+                reader = self.docx_reader
+            elif file_extension == ".json":
+                reader = self.json_reader
+            elif file_extension == ".markdown":
+                reader = self.markdown_reader
             else:
-                log_info(f"No file extension found for URL: {content.url}, determining website type")
-                if content.reader:
-                    reader = content.reader
-                else:
-                    reader = self._select_url_reader(content.url)  # type: ignore
-                if reader is not None:
-                    log_info(f"Selected reader: {reader.__class__.__name__}")
-                    # TODO: We will refactor this to eventually pass authorization to all readers
-                    import inspect
+                reader = self.text_reader
-                    read_signature = inspect.signature(reader.read)
-                    if "password" in read_signature.parameters and content.auth and content.auth.password:
-                        read_documents = reader.read(content.url, name=content.name, password=content.auth.password)
-                    else:
-                        read_documents = reader.read(content.url, name=content.name)
+        # 5. Read content
+        try:
+            read_documents = []
+            if reader is not None:
+                # TODO: We will refactor this to eventually pass authorization to all readers
+                import inspect
+                read_signature = inspect.signature(reader.read)
+                if reader.__class__.__name__ == "YouTubeReader":
+                    read_documents = reader.read(content.url, name=name)
+                elif "password" in read_signature.parameters and content.auth and content.auth.password:
+                    read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
                 else:
-                    log_info(f"No reader found for URL: {content.url}")
+                    read_documents = reader.read(bytes_content, name=name)
         except Exception as e:
             log_error(f"Error reading URL: {content.url} - {str(e)}")
             content.status = ContentStatus.FAILED
@@ -512,13 +514,17 @@ class Knowledge:
             self._update_content(content)
             return
+        # 6. Chunk documents if needed
+        if reader and not reader.chunk:
+            read_documents = await reader.chunk_documents_async(read_documents)
+        # 7. Prepare and insert the content in the vector database
         file_size = 0
         if read_documents:
             for read_document in read_documents:
                 if read_document.size:
                     file_size += read_document.size
                 read_document.content_id = content.id
         await self._handle_vector_db_insert(content, read_documents, upsert)
     async def _load_from_content(
@@ -699,21 +705,23 @@ class Knowledge:
             log_warning(f"Unsupported remote content type: {type(remote_content)}")
     async def _load_from_s3(self, content: Content, upsert: bool, skip_if_exists: bool):
-        from agno.aws.resource.s3.object import S3Object  # type: ignore
-        if content.reader is None:
-            reader = self.s3_reader
-        else:
-            reader = content.reader
-        if reader is None:
-            log_warning("No reader provided for content")
-            return
+        """Load the contextual S3 content.
+        1. Identify objects to read
+        2. Setup Content object
+        3. Hash content and add it to the contents database
+        4. Select reader
+        5. Fetch and load the content
+        6. Read the content
+        7. Prepare and insert the content in the vector database
+        8. Remove temporary file if needed
+        """
+        from agno.cloud.aws.s3.object import S3Object
         remote_content: S3Content = cast(S3Content, content.remote_content)
+        # 1. Identify objects to read
         objects_to_read: List[S3Object] = []
         if remote_content.bucket is not None:
             if remote_content.key is not None:
                 _object = S3Object(bucket_name=remote_content.bucket.name, name=remote_content.key)
@@ -725,10 +733,11 @@ class Knowledge:
             else:
                 objects_to_read.extend(remote_content.bucket.get_objects())
-        for object in objects_to_read:
+        for s3_object in objects_to_read:
+            # 2. Setup Content object
             id = str(uuid4())
             content_name = content.name or ""
-            content_name += "_" + (object.name or "")
+            content_name += "_" + (s3_object.name or "")
             content_entry = Content(
                 id=id,
                 name=content_name,
@@ -738,63 +747,123 @@ class Knowledge:
                 file_type="s3",
             )
+            # 3. Hash content and add it to the contents database
             content_hash = self._build_content_hash(content_entry)
             if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
                 log_info(f"Content {content_hash} already exists, skipping")
                 continue
             self._add_to_contents_db(content_entry)
-            read_documents = reader.read(content_entry.name, object)
+            # 4. Select reader
+            reader = content.reader
+            if reader is None:
+                if s3_object.uri.endswith(".pdf"):
+                    reader = self.pdf_reader
+                elif s3_object.uri.endswith(".csv"):
+                    reader = self.csv_reader
+                elif s3_object.uri.endswith(".docx"):
+                    reader = self.docx_reader
+                elif s3_object.uri.endswith(".json"):
+                    reader = self.json_reader
+                elif s3_object.uri.endswith(".markdown"):
+                    reader = self.markdown_reader
+                else:
+                    reader = self.text_reader
+            reader = cast(Reader, reader)
+            # 5. Fetch and load the content
+            temporary_file = None
+            obj_name = content_name or s3_object.name.split("/")[-1]
+            readable_content: Optional[Union[BytesIO, Path]] = None
+            if s3_object.uri.endswith(".pdf"):
+                readable_content = BytesIO(s3_object.get_resource().get()["Body"].read())
+            else:
+                temporary_file = Path("storage").joinpath(obj_name)
+                readable_content = temporary_file
+                s3_object.download(readable_content)  # type: ignore
+            # 6. Read the content
+            read_documents = reader.read(readable_content, name=obj_name)
+            # 7. Prepare and insert the content in the vector database
             for read_document in read_documents:
                 read_document.content_id = content.id
             await self._handle_vector_db_insert(content_entry, read_documents, upsert)
-    async def _load_from_gcs(self, content: Content, upsert: bool, skip_if_exists: bool):
-        if content.reader is None:
-            reader = self.gcs_reader
-        else:
-            reader = content.reader
-        if reader is None:
-            log_warning("No reader provided for content")
-            return
+            # 8. Remove temporary file if needed
+            if temporary_file:
+                temporary_file.unlink()
+    async def _load_from_gcs(self, content: Content, upsert: bool, skip_if_exists: bool):
+        """Load the contextual GCS content.
+        1. Identify objects to read
+        2. Setup Content object
+        3. Hash content and add it to the contents database
+        4. Select reader
+        5. Fetch and load the content
+        6. Read the content
+        7. Prepare and insert the content in the vector database
+        """
         remote_content: GCSContent = cast(GCSContent, content.remote_content)
-        objects_to_read = []
+        # 1. Identify objects to read
+        objects_to_read = []
         if remote_content.blob_name is not None:
-            objects_to_read.append(remote_content.bucket.blob(remote_content.blob_name))
+            objects_to_read.append(remote_content.bucket.blob(remote_content.blob_name))  # type: ignore
         elif remote_content.prefix is not None:
-            objects_to_read.extend(remote_content.bucket.list_blobs(prefix=remote_content.prefix))
+            objects_to_read.extend(remote_content.bucket.list_blobs(prefix=remote_content.prefix))  # type: ignore
         else:
-            objects_to_read.extend(remote_content.bucket.list_blobs())
+            objects_to_read.extend(remote_content.bucket.list_blobs())  # type: ignore
-        for object in objects_to_read:
+        for gcs_object in objects_to_read:
+            # 2. Setup Content object
             id = str(uuid4())
+            name = (content.name or "content") + "_" + gcs_object.name
             content_entry = Content(
                 id=id,
-                name=(content.name or "content") + "_" + object.name,
+                name=name,
                 description=content.description,
                 status=ContentStatus.PROCESSING,
                 metadata=content.metadata,
                 file_type="gcs",
             )
+            # 3. Hash content and add it to the contents database
             content_hash = self._build_content_hash(content_entry)
             if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
                 log_info(f"Content {content_hash} already exists, skipping")
                 continue
+            # 4. Add it to the contents database
             self._add_to_contents_db(content_entry)
-            read_documents = reader.read(content_entry.name, object)
+            # 5. Select reader
+            reader = content.reader
+            if reader is None:
+                if gcs_object.name.endswith(".pdf"):
+                    reader = self.pdf_reader
+                elif gcs_object.name.endswith(".csv"):
+                    reader = self.csv_reader
+                elif gcs_object.name.endswith(".docx"):
+                    reader = self.docx_reader
+                elif gcs_object.name.endswith(".json"):
+                    reader = self.json_reader
+                elif gcs_object.name.endswith(".markdown"):
+                    reader = self.markdown_reader
+                else:
+                    reader = self.text_reader
+            reader = cast(Reader, reader)
+            # 5. Fetch and load the content
+            readable_content = BytesIO(gcs_object.download_as_bytes())
+            # 6. Read the content
+            read_documents = reader.read(readable_content, name=name)
+            # 7. Prepare and insert the content in the vector database
             for read_document in read_documents:
                 read_document.content_id = content.id
             await self._handle_vector_db_insert(content_entry, read_documents, upsert)
     async def _handle_vector_db_insert(self, content, read_documents, upsert):
@@ -1006,7 +1075,7 @@ class Knowledge:
         elif content_type == KnowledgeContentOrigin.URL:
             log_info(f"Uploading file to LightRAG from URL: {content.url}")
             try:
-                reader = self.url_reader
+                reader = content.reader or self.website_reader
                 if reader is None:
                     log_error("No URL reader available")
                     content.status = ContentStatus.FAILED
@@ -1354,14 +1423,6 @@ class Knowledge:
         log_info(f"Selecting reader for extension: {extension}")
         return ReaderFactory.get_reader_for_extension(extension)
-    def _select_url_reader(self, url: str) -> Reader:
-        """Select the appropriate reader for a URL."""
-        return ReaderFactory.get_reader_for_url(url)
-    def _select_url_file_reader(self, extension: str) -> Reader:
-        """Select the appropriate reader for a URL file extension."""
-        return ReaderFactory.get_reader_for_url_file(extension)
     def get_filters(self) -> List[str]:
         return [
             "filter_tag_1",
@@ -1484,32 +1545,7 @@ class Knowledge:
         """Firecrawl reader - lazy loaded via factory."""
         return self._get_reader("firecrawl")
-    @property
-    def url_reader(self) -> Optional[Reader]:
-        """URL reader - lazy loaded via factory."""
-        return self._get_reader("url")
-    @property
-    def pdf_url_reader(self) -> Optional[Reader]:
-        """PDF URL reader - lazy loaded via factory."""
-        return self._get_reader("pdf_url")
     @property
     def youtube_reader(self) -> Optional[Reader]:
         """YouTube reader - lazy loaded via factory."""
         return self._get_reader("youtube")
-    @property
-    def csv_url_reader(self) -> Optional[Reader]:
-        """CSV URL reader - lazy loaded via factory."""
-        return self._get_reader("csv_url")
-    @property
-    def s3_reader(self) -> Optional[Reader]:
-        """S3 reader - lazy loaded via factory."""
-        return self._get_reader("s3")
-    @property
-    def gcs_reader(self) -> Optional[Reader]:
-        """GCS reader - lazy loaded via factory."""
-        return self._get_reader("gcs")

agno/knowledge/reader/arxiv_reader.py CHANGED Viewed

@@ -20,11 +20,11 @@ class ArxivReader(Reader):
     def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
         """Get the list of supported chunking strategies for Arxiv readers."""
         return [
-            ChunkingStrategyType.FIXED_SIZE_CHUNKING,
-            ChunkingStrategyType.AGENTIC_CHUNKING,
-            ChunkingStrategyType.DOCUMENT_CHUNKING,
-            ChunkingStrategyType.RECURSIVE_CHUNKING,
-            ChunkingStrategyType.SEMANTIC_CHUNKING,
+            ChunkingStrategyType.FIXED_SIZE_CHUNKER,
+            ChunkingStrategyType.AGENTIC_CHUNKER,
+            ChunkingStrategyType.DOCUMENT_CHUNKER,
+            ChunkingStrategyType.RECURSIVE_CHUNKER,
+            ChunkingStrategyType.SEMANTIC_CHUNKER,
         ]
     @classmethod

agno 2.0.0rc1__py3-none-any.whl → 2.0.0rc2__py3-none-any.whl

agno 2.0.0rc1py3-none-any.whl → 2.0.0rc2py3-none-any.whl