PyPI - agno - Versions diffs - 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl - Mend

agno 2.4.5py3-none-any.whl → 2.4.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

agno/agent/agent.py +2 -1
agno/db/singlestore/singlestore.py +4 -5
agno/db/surrealdb/models.py +1 -1
agno/knowledge/chunking/agentic.py +1 -5
agno/knowledge/chunking/code.py +1 -1
agno/knowledge/chunking/document.py +22 -42
agno/knowledge/chunking/fixed.py +1 -5
agno/knowledge/chunking/markdown.py +9 -25
agno/knowledge/chunking/recursive.py +1 -3
agno/knowledge/chunking/row.py +3 -2
agno/knowledge/chunking/semantic.py +1 -1
agno/knowledge/chunking/strategy.py +19 -0
agno/knowledge/embedder/aws_bedrock.py +325 -106
agno/knowledge/knowledge.py +173 -14
agno/knowledge/reader/text_reader.py +1 -1
agno/knowledge/reranker/aws_bedrock.py +299 -0
agno/learn/machine.py +5 -6
agno/learn/stores/learned_knowledge.py +108 -131
agno/run/workflow.py +3 -0
agno/tools/mcp/mcp.py +26 -1
agno/utils/print_response/agent.py +8 -8
agno/utils/print_response/team.py +8 -8
agno/vectordb/lancedb/lance_db.py +9 -9
agno/workflow/condition.py +135 -56
{agno-2.4.5.dist-info → agno-2.4.7.dist-info}/METADATA +34 -59
{agno-2.4.5.dist-info → agno-2.4.7.dist-info}/RECORD +29 -28
{agno-2.4.5.dist-info → agno-2.4.7.dist-info}/WHEEL +0 -0
{agno-2.4.5.dist-info → agno-2.4.7.dist-info}/licenses/LICENSE +0 -0
{agno-2.4.5.dist-info → agno-2.4.7.dist-info}/top_level.txt +0 -0

agno/knowledge/knowledge.py CHANGED Viewed

@@ -823,7 +823,13 @@ class Knowledge:
                     log_warning(f"Invalid filter key: {key} - not present in knowledge base")
         elif isinstance(filters, List):
-            # Validate that list contains FilterExpr instances
+            # Validate list filters against known metadata keys
+            if valid_metadata_filters is None or not valid_metadata_filters:
+                # Can't validate keys without metadata - return original list
+                log_warning("No valid metadata filters tracked yet. Cannot validate list filter keys.")
+                return filters, []
+            valid_list_filters: List[FilterExpr] = []
             for i, filter_item in enumerate(filters):
                 if not isinstance(filter_item, FilterExpr):
                     log_warning(
@@ -832,9 +838,23 @@ class Knowledge:
                         f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
                         f"AND(...), OR(...), NOT(...) from agno.filters"
                     )
-            # Filter expressions are already validated, return empty dict/list
-            # The actual filtering happens in the vector_db layer
-            return filters, []
+                    continue
+                # Check if filter has a key attribute and validate it
+                if hasattr(filter_item, "key"):
+                    key = filter_item.key
+                    base_key = key.split(".")[-1] if "." in key else key
+                    if base_key in valid_metadata_filters or key in valid_metadata_filters:
+                        valid_list_filters.append(filter_item)
+                    else:
+                        invalid_keys.append(key)
+                        log_warning(f"Invalid filter key: {key} - not present in knowledge base")
+                else:
+                    # Complex filters (AND, OR, NOT) - keep them as-is
+                    # They contain nested filters that will be validated by the vector DB
+                    valid_list_filters.append(filter_item)
+            return valid_list_filters, invalid_keys
         return valid_filters, invalid_keys
@@ -1541,7 +1561,49 @@ class Knowledge:
         # 6. Chunk documents if needed
         if reader and not reader.chunk:
             read_documents = await reader.chunk_documents_async(read_documents)
-        # 7. Prepare and insert the content in the vector database
+        # 7. Group documents by source URL for multi-page readers (like WebsiteReader)
+        docs_by_source: Dict[str, List[Document]] = {}
+        for doc in read_documents:
+            source_url = doc.meta_data.get("url", content.url) if doc.meta_data else content.url
+            source_url = source_url or "unknown"
+            if source_url not in docs_by_source:
+                docs_by_source[source_url] = []
+            docs_by_source[source_url].append(doc)
+        # 8. Process each source separately if multiple sources exist
+        if len(docs_by_source) > 1:
+            for source_url, source_docs in docs_by_source.items():
+                # Compute per-document hash based on actual source URL
+                doc_hash = self._build_document_content_hash(source_docs[0], content)
+                # Check skip_if_exists for each source individually
+                if self._should_skip(doc_hash, skip_if_exists):
+                    log_debug(f"Skipping already indexed: {source_url}")
+                    continue
+                doc_id = generate_id(doc_hash)
+                self._prepare_documents_for_insert(source_docs, doc_id, calculate_sizes=True)
+                # Insert with per-document hash
+                if self.vector_db.upsert_available() and upsert:
+                    try:
+                        await self.vector_db.async_upsert(doc_hash, source_docs, content.metadata)
+                    except Exception as e:
+                        log_error(f"Error upserting document from {source_url}: {e}")
+                        continue
+                else:
+                    try:
+                        await self.vector_db.async_insert(doc_hash, documents=source_docs, filters=content.metadata)
+                    except Exception as e:
+                        log_error(f"Error inserting document from {source_url}: {e}")
+                        continue
+            content.status = ContentStatus.COMPLETED
+            await self._aupdate_content(content)
+            return
+        # 9. Single source - use existing logic with original content hash
         if not content.id:
             content.id = generate_id(content.content_hash or "")
         self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
@@ -1648,7 +1710,48 @@ class Knowledge:
         if reader:
             read_documents = self._chunk_documents_sync(reader, read_documents)
-        # 7. Prepare and insert the content in the vector database
+        # 7. Group documents by source URL for multi-page readers (like WebsiteReader)
+        docs_by_source: Dict[str, List[Document]] = {}
+        for doc in read_documents:
+            source_url = doc.meta_data.get("url", content.url) if doc.meta_data else content.url
+            source_url = source_url or "unknown"
+            if source_url not in docs_by_source:
+                docs_by_source[source_url] = []
+            docs_by_source[source_url].append(doc)
+        # 8. Process each source separately if multiple sources exist
+        if len(docs_by_source) > 1:
+            for source_url, source_docs in docs_by_source.items():
+                # Compute per-document hash based on actual source URL
+                doc_hash = self._build_document_content_hash(source_docs[0], content)
+                # Check skip_if_exists for each source individually
+                if self._should_skip(doc_hash, skip_if_exists):
+                    log_debug(f"Skipping already indexed: {source_url}")
+                    continue
+                doc_id = generate_id(doc_hash)
+                self._prepare_documents_for_insert(source_docs, doc_id, calculate_sizes=True)
+                # Insert with per-document hash
+                if self.vector_db.upsert_available() and upsert:
+                    try:
+                        self.vector_db.upsert(doc_hash, source_docs, content.metadata)
+                    except Exception as e:
+                        log_error(f"Error upserting document from {source_url}: {e}")
+                        continue
+                else:
+                    try:
+                        self.vector_db.insert(doc_hash, documents=source_docs, filters=content.metadata)
+                    except Exception as e:
+                        log_error(f"Error inserting document from {source_url}: {e}")
+                        continue
+            content.status = ContentStatus.COMPLETED
+            self._update_content(content)
+            return
+        # 9. Single source - use existing logic with original content hash
         if not content.id:
             content.id = generate_id(content.content_hash or "")
         self._prepare_documents_for_insert(read_documents, content.id, calculate_sizes=True)
@@ -1900,11 +2003,11 @@ class Knowledge:
             if self._should_skip(content.content_hash, skip_if_exists):
                 content.status = ContentStatus.COMPLETED
                 await self._aupdate_content(content)
-                return
+                continue  # Skip to next topic, don't exit loop
             if self.vector_db.__class__.__name__ == "LightRag":
                 await self._aprocess_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
-                return
+                continue  # Skip to next topic, don't exit loop
             if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
                 log_info(f"Content {content.content_hash} already exists, skipping")
@@ -1961,11 +2064,11 @@ class Knowledge:
             if self._should_skip(content.content_hash, skip_if_exists):
                 content.status = ContentStatus.COMPLETED
                 self._update_content(content)
-                return
+                continue  # Skip to next topic, don't exit loop
             if self.vector_db.__class__.__name__ == "LightRag":
                 self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
-                return
+                continue  # Skip to next topic, don't exit loop
             if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
                 log_info(f"Content {content.content_hash} already exists, skipping")
@@ -3896,6 +3999,42 @@ class Knowledge:
         hash_input = ":".join(hash_parts)
         return hashlib.sha256(hash_input.encode()).hexdigest()
+    def _build_document_content_hash(self, document: Document, content: Content) -> str:
+        """
+        Build content hash for a specific document.
+        Used for multi-page readers (like WebsiteReader) where each crawled page
+        should have its own unique content hash based on its actual URL.
+        Args:
+            document: The document to build the hash for
+            content: The original content object (for fallback name/description)
+        Returns:
+            A unique hash string for this specific document
+        """
+        hash_parts = []
+        if content.name:
+            hash_parts.append(content.name)
+        if content.description:
+            hash_parts.append(content.description)
+        # Use document's own URL if available (set by WebsiteReader)
+        doc_url = document.meta_data.get("url") if document.meta_data else None
+        if doc_url:
+            hash_parts.append(str(doc_url))
+        elif content.url:
+            hash_parts.append(content.url)
+        elif content.path:
+            hash_parts.append(str(content.path))
+        else:
+            # Fallback: use content hash for uniqueness
+            hash_parts.append(hashlib.sha256(document.content.encode()).hexdigest()[:16])
+        hash_input = ":".join(hash_parts)
+        return hashlib.sha256(hash_input.encode()).hexdigest()
     def _ensure_string_field(self, value: Any, field_name: str, default: str = "") -> str:
         """
         Safely ensure a field is a string, handling various edge cases.
@@ -4625,7 +4764,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
             retrieval_timer = Timer()
             retrieval_timer.start()
-            docs = self.search(query=query, filters=knowledge_filters)
+            try:
+                docs = self.search(query=query, filters=knowledge_filters)
+            except Exception as e:
+                retrieval_timer.stop()
+                log_warning(f"Knowledge search failed: {e}")
+                return f"Error searching knowledge base: {type(e).__name__}"
             if run_response is not None and docs:
                 references = MessageReferences(
@@ -4657,7 +4801,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
             retrieval_timer = Timer()
             retrieval_timer.start()
-            docs = await self.asearch(query=query, filters=knowledge_filters)
+            try:
+                docs = await self.asearch(query=query, filters=knowledge_filters)
+            except Exception as e:
+                retrieval_timer.stop()
+                log_warning(f"Knowledge search failed: {e}")
+                return f"Error searching knowledge base: {type(e).__name__}"
             if run_response is not None and docs:
                 references = MessageReferences(
@@ -4735,7 +4884,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
             retrieval_timer = Timer()
             retrieval_timer.start()
-            docs = self.search(query=query, filters=search_filters)
+            try:
+                docs = self.search(query=query, filters=search_filters)
+            except Exception as e:
+                retrieval_timer.stop()
+                log_warning(f"Knowledge search failed: {e}")
+                return f"Error searching knowledge base: {type(e).__name__}"
             if run_response is not None and docs:
                 references = MessageReferences(
@@ -4789,7 +4943,12 @@ Make sure to pass the filters as [Dict[str: Any]] to the tool. FOLLOW THIS STRUC
             retrieval_timer = Timer()
             retrieval_timer.start()
-            docs = await self.asearch(query=query, filters=search_filters)
+            try:
+                docs = await self.asearch(query=query, filters=search_filters)
+            except Exception as e:
+                retrieval_timer.stop()
+                log_warning(f"Knowledge search failed: {e}")
+                return f"Error searching knowledge base: {type(e).__name__}"
             if run_response is not None and docs:
                 references = MessageReferences(

agno/knowledge/reader/text_reader.py CHANGED Viewed

@@ -110,7 +110,7 @@ class TextReader(Reader):
         chunked_documents = self.chunk_document(document)
         if not chunked_documents:
-            return [document]
+            return []
         tasks = [process_chunk(chunk_doc) for chunk_doc in chunked_documents]
         return await asyncio.gather(*tasks)

agno/knowledge/reranker/aws_bedrock.py ADDED Viewed

@@ -0,0 +1,299 @@
+from os import getenv
+from typing import Any, Dict, List, Literal, Optional
+from pydantic import ConfigDict, Field
+from agno.knowledge.document import Document
+from agno.knowledge.reranker.base import Reranker
+from agno.utils.log import logger
+try:
+    from boto3 import client as AwsClient
+    from boto3.session import Session
+    from botocore.exceptions import ClientError
+except ImportError:
+    raise ImportError("`boto3` not installed. Please install it via `pip install boto3`.")
+# Model ID constants
+AMAZON_RERANK_V1 = "amazon.rerank-v1:0"
+COHERE_RERANK_V3_5 = "cohere.rerank-v3-5:0"
+# Type alias for supported models
+RerankerModel = Literal["amazon.rerank-v1:0", "cohere.rerank-v3-5:0"]
+class AwsBedrockReranker(Reranker):
+    """
+    AWS Bedrock reranker supporting Amazon Rerank 1.0 and Cohere Rerank 3.5 models.
+    This reranker uses the unified Bedrock Rerank API (bedrock-agent-runtime)
+    which provides a consistent interface for both model providers.
+    To use this reranker, you need to either:
+    1. Set the following environment variables:
+       - AWS_ACCESS_KEY_ID
+       - AWS_SECRET_ACCESS_KEY
+       - AWS_REGION
+    2. Or provide a boto3 Session object
+    Args:
+        model (str): The model ID to use. Options:
+            - 'amazon.rerank-v1:0' (Amazon Rerank 1.0)
+            - 'cohere.rerank-v3-5:0' (Cohere Rerank 3.5)
+            Default is 'cohere.rerank-v3-5:0'.
+        top_n (Optional[int]): Number of top results to return after reranking.
+            If None, returns all documents reranked.
+        aws_region (Optional[str]): The AWS region to use.
+        aws_access_key_id (Optional[str]): The AWS access key ID to use.
+        aws_secret_access_key (Optional[str]): The AWS secret access key to use.
+        session (Optional[Session]): A boto3 Session object for authentication.
+        additional_model_request_fields (Optional[Dict]): Additional model-specific
+            parameters to pass in the request (e.g., Cohere-specific options).
+    Example:
+        ```python
+        from agno.knowledge.reranker.aws_bedrock import AwsBedrockReranker
+        # Using Cohere Rerank 3.5 (default)
+        reranker = AwsBedrockReranker(
+            model="cohere.rerank-v3-5:0",
+            top_n=5,
+            aws_region="us-west-2",
+        )
+        # Using Amazon Rerank 1.0
+        reranker = AwsBedrockReranker(
+            model="amazon.rerank-v1:0",
+            top_n=10,
+            aws_region="us-west-2",
+        )
+        # Rerank documents
+        reranked_docs = reranker.rerank(query="What is machine learning?", documents=docs)
+        ```
+    Note:
+        - Amazon Rerank 1.0 is NOT available in us-east-1 (N. Virginia).
+          Use Cohere Rerank 3.5 in that region.
+        - Maximum 1000 documents per request.
+    """
+    model_config = ConfigDict(arbitrary_types_allowed=True, populate_by_name=True)
+    model: str = Field(default=COHERE_RERANK_V3_5, description="Reranker model ID")
+    top_n: Optional[int] = Field(default=None, description="Number of top results to return")
+    aws_region: Optional[str] = Field(default=None, description="AWS region")
+    aws_access_key_id: Optional[str] = Field(default=None, description="AWS access key ID")
+    aws_secret_access_key: Optional[str] = Field(default=None, description="AWS secret access key")
+    session: Optional[Session] = Field(default=None, description="Boto3 session", exclude=True)
+    additional_model_request_fields: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Additional model-specific request parameters",
+    )
+    _client: Optional[AwsClient] = None
+    @property
+    def client(self) -> AwsClient:
+        """
+        Returns a bedrock-agent-runtime client for the Rerank API.
+        Returns:
+            AwsClient: An instance of the bedrock-agent-runtime client.
+        """
+        if self._client is not None:
+            return self._client
+        if self.session:
+            self._client = self.session.client("bedrock-agent-runtime")
+            return self._client
+        aws_access_key_id = self.aws_access_key_id or getenv("AWS_ACCESS_KEY_ID")
+        aws_secret_access_key = self.aws_secret_access_key or getenv("AWS_SECRET_ACCESS_KEY")
+        aws_region = self.aws_region or getenv("AWS_REGION")
+        if not aws_access_key_id or not aws_secret_access_key:
+            # Fall back to default credential chain
+            self._client = AwsClient(
+                service_name="bedrock-agent-runtime",
+                region_name=aws_region,
+            )
+        else:
+            self._client = AwsClient(
+                service_name="bedrock-agent-runtime",
+                region_name=aws_region,
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret_access_key,
+            )
+        return self._client
+    def _get_model_arn(self) -> str:
+        """
+        Constructs the full model ARN for the reranker model.
+        Returns:
+            str: The model ARN.
+        """
+        region = self.aws_region or getenv("AWS_REGION", "us-west-2")
+        return f"arn:aws:bedrock:{region}::foundation-model/{self.model}"
+    def _build_sources(self, documents: List[Document]) -> List[Dict[str, Any]]:
+        """
+        Convert Document objects to Bedrock Rerank API source format.
+        Args:
+            documents: List of Document objects to convert.
+        Returns:
+            List of RerankSource objects for the API.
+        """
+        sources = []
+        for doc in documents:
+            # Use text format for document content
+            source = {
+                "type": "INLINE",
+                "inlineDocumentSource": {
+                    "type": "TEXT",
+                    "textDocument": {
+                        "text": doc.content,
+                    },
+                },
+            }
+            sources.append(source)
+        return sources
+    def _rerank(self, query: str, documents: List[Document]) -> List[Document]:
+        """
+        Internal method to perform reranking via Bedrock Rerank API.
+        Args:
+            query: The query string to rank documents against.
+            documents: List of Document objects to rerank.
+        Returns:
+            List of Document objects sorted by relevance score.
+        """
+        if not documents:
+            return []
+        # Validate top_n
+        top_n = self.top_n
+        if top_n is not None and top_n <= 0:
+            logger.warning(f"top_n should be a positive integer, got {self.top_n}, setting top_n to None")
+            top_n = None
+        # Build the request
+        rerank_request: Dict[str, Any] = {
+            "queries": [
+                {
+                    "type": "TEXT",
+                    "textQuery": {
+                        "text": query,
+                    },
+                }
+            ],
+            "sources": self._build_sources(documents),
+            "rerankingConfiguration": {
+                "type": "BEDROCK_RERANKING_MODEL",
+                "bedrockRerankingConfiguration": {
+                    "modelConfiguration": {
+                        "modelArn": self._get_model_arn(),
+                    },
+                },
+            },
+        }
+        # Add numberOfResults if top_n is specified
+        if top_n is not None:
+            rerank_request["rerankingConfiguration"]["bedrockRerankingConfiguration"]["numberOfResults"] = top_n
+        # Add additional model request fields if provided
+        if self.additional_model_request_fields:
+            rerank_request["rerankingConfiguration"]["bedrockRerankingConfiguration"]["modelConfiguration"][
+                "additionalModelRequestFields"
+            ] = self.additional_model_request_fields
+        # Call the Rerank API
+        response = self.client.rerank(**rerank_request)
+        # Process results
+        reranked_docs: List[Document] = []
+        results = response.get("results", [])
+        for result in results:
+            index = result.get("index")
+            relevance_score = result.get("relevanceScore")
+            if index is not None and index < len(documents):
+                doc = documents[index]
+                doc.reranking_score = relevance_score
+                reranked_docs.append(doc)
+        # Results from API are already sorted by relevance, but ensure sorting
+        reranked_docs.sort(
+            key=lambda x: x.reranking_score if x.reranking_score is not None else float("-inf"),
+            reverse=True,
+        )
+        return reranked_docs
+    def rerank(self, query: str, documents: List[Document]) -> List[Document]:
+        """
+        Rerank documents based on their relevance to a query.
+        Args:
+            query: The query string to rank documents against.
+            documents: List of Document objects to rerank.
+        Returns:
+            List of Document objects sorted by relevance score (highest first).
+            Each document will have its `reranking_score` attribute set.
+        """
+        try:
+            return self._rerank(query=query, documents=documents)
+        except ClientError as e:
+            error_code = e.response.get("Error", {}).get("Code", "Unknown")
+            error_message = e.response.get("Error", {}).get("Message", str(e))
+            logger.error(f"AWS Bedrock Rerank API error ({error_code}): {error_message}. Returning original documents.")
+            return documents
+        except Exception as e:
+            logger.error(f"Error reranking documents: {e}. Returning original documents.")
+            return documents
+class CohereBedrockReranker(AwsBedrockReranker):
+    """
+    Convenience class for Cohere Rerank 3.5 on AWS Bedrock.
+    This is a pre-configured AwsBedrockReranker using the Cohere Rerank 3.5 model.
+    Example:
+        ```python
+        reranker = CohereBedrockReranker(top_n=5, aws_region="us-west-2")
+        reranked_docs = reranker.rerank(query="What is AI?", documents=docs)
+        ```
+    """
+    model: str = Field(default=COHERE_RERANK_V3_5)
+class AmazonReranker(AwsBedrockReranker):
+    """
+    Convenience class for Amazon Rerank 1.0 on AWS Bedrock.
+    This is a pre-configured AwsBedrockReranker using the Amazon Rerank 1.0 model.
+    Note: Amazon Rerank 1.0 is NOT available in us-east-1 (N. Virginia).
+    Example:
+        ```python
+        reranker = AmazonReranker(top_n=5, aws_region="us-west-2")
+        reranked_docs = reranker.rerank(query="What is AI?", documents=docs)
+        ```
+    """
+    model: str = Field(default=AMAZON_RERANK_V1)

agno/learn/machine.py CHANGED Viewed

@@ -645,12 +645,11 @@ class LearningMachine:
         for name, store in self.stores.items():
             try:
                 result = await store.arecall(**context)
-                if result is not None:
-                    results[name] = result
-                    try:
-                        log_debug(f"Recalled from {name}: {result}")
-                    except Exception:
-                        pass
+                results[name] = result
+                try:
+                    log_debug(f"Recalled from {name}: {result}")
+                except Exception:
+                    pass
             except Exception as e:
                 log_warning(f"Error recalling from {name}: {e}")

agno 2.4.5__py3-none-any.whl → 2.4.7__py3-none-any.whl

agno 2.4.5py3-none-any.whl → 2.4.7py3-none-any.whl