PyPI - agno - Versions diffs - 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl - Mend

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (383) hide show

agno/agent/__init__.py +6 -0
agno/agent/agent.py +5252 -3145
agno/agent/remote.py +525 -0
agno/api/api.py +2 -0
agno/client/__init__.py +3 -0
agno/client/a2a/__init__.py +10 -0
agno/client/a2a/client.py +554 -0
agno/client/a2a/schemas.py +112 -0
agno/client/a2a/utils.py +369 -0
agno/client/os.py +2669 -0
agno/compression/__init__.py +3 -0
agno/compression/manager.py +247 -0
agno/culture/manager.py +2 -2
agno/db/base.py +927 -6
agno/db/dynamo/dynamo.py +788 -2
agno/db/dynamo/schemas.py +128 -0
agno/db/dynamo/utils.py +26 -3
agno/db/firestore/firestore.py +674 -50
agno/db/firestore/schemas.py +41 -0
agno/db/firestore/utils.py +25 -10
agno/db/gcs_json/gcs_json_db.py +506 -3
agno/db/gcs_json/utils.py +14 -2
agno/db/in_memory/in_memory_db.py +203 -4
agno/db/in_memory/utils.py +14 -2
agno/db/json/json_db.py +498 -2
agno/db/json/utils.py +14 -2
agno/db/migrations/manager.py +199 -0
agno/db/migrations/utils.py +19 -0
agno/db/migrations/v1_to_v2.py +54 -16
agno/db/migrations/versions/__init__.py +0 -0
agno/db/migrations/versions/v2_3_0.py +977 -0
agno/db/mongo/async_mongo.py +1013 -39
agno/db/mongo/mongo.py +684 -4
agno/db/mongo/schemas.py +48 -0
agno/db/mongo/utils.py +17 -0
agno/db/mysql/__init__.py +2 -1
agno/db/mysql/async_mysql.py +2958 -0
agno/db/mysql/mysql.py +722 -53
agno/db/mysql/schemas.py +77 -11
agno/db/mysql/utils.py +151 -8
agno/db/postgres/async_postgres.py +1254 -137
agno/db/postgres/postgres.py +2316 -93
agno/db/postgres/schemas.py +153 -21
agno/db/postgres/utils.py +22 -7
agno/db/redis/redis.py +531 -3
agno/db/redis/schemas.py +36 -0
agno/db/redis/utils.py +31 -15
agno/db/schemas/evals.py +1 -0
agno/db/schemas/memory.py +20 -9
agno/db/singlestore/schemas.py +70 -1
agno/db/singlestore/singlestore.py +737 -74
agno/db/singlestore/utils.py +13 -3
agno/db/sqlite/async_sqlite.py +1069 -89
agno/db/sqlite/schemas.py +133 -1
agno/db/sqlite/sqlite.py +2203 -165
agno/db/sqlite/utils.py +21 -11
agno/db/surrealdb/models.py +25 -0
agno/db/surrealdb/surrealdb.py +603 -1
agno/db/utils.py +60 -0
agno/eval/__init__.py +26 -3
agno/eval/accuracy.py +25 -12
agno/eval/agent_as_judge.py +871 -0
agno/eval/base.py +29 -0
agno/eval/performance.py +10 -4
agno/eval/reliability.py +22 -13
agno/eval/utils.py +2 -1
agno/exceptions.py +42 -0
agno/hooks/__init__.py +3 -0
agno/hooks/decorator.py +164 -0
agno/integrations/discord/client.py +13 -2
agno/knowledge/__init__.py +4 -0
agno/knowledge/chunking/code.py +90 -0
agno/knowledge/chunking/document.py +65 -4
agno/knowledge/chunking/fixed.py +4 -1
agno/knowledge/chunking/markdown.py +102 -11
agno/knowledge/chunking/recursive.py +2 -2
agno/knowledge/chunking/semantic.py +130 -48
agno/knowledge/chunking/strategy.py +18 -0
agno/knowledge/embedder/azure_openai.py +0 -1
agno/knowledge/embedder/google.py +1 -1
agno/knowledge/embedder/mistral.py +1 -1
agno/knowledge/embedder/nebius.py +1 -1
agno/knowledge/embedder/openai.py +16 -12
agno/knowledge/filesystem.py +412 -0
agno/knowledge/knowledge.py +4261 -1199
agno/knowledge/protocol.py +134 -0
agno/knowledge/reader/arxiv_reader.py +3 -2
agno/knowledge/reader/base.py +9 -7
agno/knowledge/reader/csv_reader.py +91 -42
agno/knowledge/reader/docx_reader.py +9 -10
agno/knowledge/reader/excel_reader.py +225 -0
agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
agno/knowledge/reader/firecrawl_reader.py +3 -2
agno/knowledge/reader/json_reader.py +16 -22
agno/knowledge/reader/markdown_reader.py +15 -14
agno/knowledge/reader/pdf_reader.py +33 -28
agno/knowledge/reader/pptx_reader.py +9 -10
agno/knowledge/reader/reader_factory.py +135 -1
agno/knowledge/reader/s3_reader.py +8 -16
agno/knowledge/reader/tavily_reader.py +3 -3
agno/knowledge/reader/text_reader.py +15 -14
agno/knowledge/reader/utils/__init__.py +17 -0
agno/knowledge/reader/utils/spreadsheet.py +114 -0
agno/knowledge/reader/web_search_reader.py +8 -65
agno/knowledge/reader/website_reader.py +16 -13
agno/knowledge/reader/wikipedia_reader.py +36 -3
agno/knowledge/reader/youtube_reader.py +3 -2
agno/knowledge/remote_content/__init__.py +33 -0
agno/knowledge/remote_content/config.py +266 -0
agno/knowledge/remote_content/remote_content.py +105 -17
agno/knowledge/utils.py +76 -22
agno/learn/__init__.py +71 -0
agno/learn/config.py +463 -0
agno/learn/curate.py +185 -0
agno/learn/machine.py +725 -0
agno/learn/schemas.py +1114 -0
agno/learn/stores/__init__.py +38 -0
agno/learn/stores/decision_log.py +1156 -0
agno/learn/stores/entity_memory.py +3275 -0
agno/learn/stores/learned_knowledge.py +1583 -0
agno/learn/stores/protocol.py +117 -0
agno/learn/stores/session_context.py +1217 -0
agno/learn/stores/user_memory.py +1495 -0
agno/learn/stores/user_profile.py +1220 -0
agno/learn/utils.py +209 -0
agno/media.py +22 -6
agno/memory/__init__.py +14 -1
agno/memory/manager.py +223 -8
agno/memory/strategies/__init__.py +15 -0
agno/memory/strategies/base.py +66 -0
agno/memory/strategies/summarize.py +196 -0
agno/memory/strategies/types.py +37 -0
agno/models/aimlapi/aimlapi.py +17 -0
agno/models/anthropic/claude.py +434 -59
agno/models/aws/bedrock.py +121 -20
agno/models/aws/claude.py +131 -274
agno/models/azure/ai_foundry.py +10 -6
agno/models/azure/openai_chat.py +33 -10
agno/models/base.py +1162 -561
agno/models/cerebras/cerebras.py +120 -24
agno/models/cerebras/cerebras_openai.py +21 -2
agno/models/cohere/chat.py +65 -6
agno/models/cometapi/cometapi.py +18 -1
agno/models/dashscope/dashscope.py +2 -3
agno/models/deepinfra/deepinfra.py +18 -1
agno/models/deepseek/deepseek.py +69 -3
agno/models/fireworks/fireworks.py +18 -1
agno/models/google/gemini.py +959 -89
agno/models/google/utils.py +22 -0
agno/models/groq/groq.py +48 -18
agno/models/huggingface/huggingface.py +17 -6
agno/models/ibm/watsonx.py +16 -6
agno/models/internlm/internlm.py +18 -1
agno/models/langdb/langdb.py +13 -1
agno/models/litellm/chat.py +88 -9
agno/models/litellm/litellm_openai.py +18 -1
agno/models/message.py +24 -5
agno/models/meta/llama.py +40 -13
agno/models/meta/llama_openai.py +22 -21
agno/models/metrics.py +12 -0
agno/models/mistral/mistral.py +8 -4
agno/models/n1n/__init__.py +3 -0
agno/models/n1n/n1n.py +57 -0
agno/models/nebius/nebius.py +6 -7
agno/models/nvidia/nvidia.py +20 -3
agno/models/ollama/__init__.py +2 -0
agno/models/ollama/chat.py +17 -6
agno/models/ollama/responses.py +100 -0
agno/models/openai/__init__.py +2 -0
agno/models/openai/chat.py +117 -26
agno/models/openai/open_responses.py +46 -0
agno/models/openai/responses.py +110 -32
agno/models/openrouter/__init__.py +2 -0
agno/models/openrouter/openrouter.py +67 -2
agno/models/openrouter/responses.py +146 -0
agno/models/perplexity/perplexity.py +19 -1
agno/models/portkey/portkey.py +7 -6
agno/models/requesty/requesty.py +19 -2
agno/models/response.py +20 -2
agno/models/sambanova/sambanova.py +20 -3
agno/models/siliconflow/siliconflow.py +19 -2
agno/models/together/together.py +20 -3
agno/models/vercel/v0.py +20 -3
agno/models/vertexai/claude.py +124 -4
agno/models/vllm/vllm.py +19 -14
agno/models/xai/xai.py +19 -2
agno/os/app.py +467 -137
agno/os/auth.py +253 -5
agno/os/config.py +22 -0
agno/os/interfaces/a2a/a2a.py +7 -6
agno/os/interfaces/a2a/router.py +635 -26
agno/os/interfaces/a2a/utils.py +32 -33
agno/os/interfaces/agui/agui.py +5 -3
agno/os/interfaces/agui/router.py +26 -16
agno/os/interfaces/agui/utils.py +97 -57
agno/os/interfaces/base.py +7 -7
agno/os/interfaces/slack/router.py +16 -7
agno/os/interfaces/slack/slack.py +7 -7
agno/os/interfaces/whatsapp/router.py +35 -7
agno/os/interfaces/whatsapp/security.py +3 -1
agno/os/interfaces/whatsapp/whatsapp.py +11 -8
agno/os/managers.py +326 -0
agno/os/mcp.py +652 -79
agno/os/middleware/__init__.py +4 -0
agno/os/middleware/jwt.py +718 -115
agno/os/middleware/trailing_slash.py +27 -0
agno/os/router.py +105 -1558
agno/os/routers/agents/__init__.py +3 -0
agno/os/routers/agents/router.py +655 -0
agno/os/routers/agents/schema.py +288 -0
agno/os/routers/components/__init__.py +3 -0
agno/os/routers/components/components.py +475 -0
agno/os/routers/database.py +155 -0
agno/os/routers/evals/evals.py +111 -18
agno/os/routers/evals/schemas.py +38 -5
agno/os/routers/evals/utils.py +80 -11
agno/os/routers/health.py +3 -3
agno/os/routers/knowledge/knowledge.py +284 -35
agno/os/routers/knowledge/schemas.py +14 -2
agno/os/routers/memory/memory.py +274 -11
agno/os/routers/memory/schemas.py +44 -3
agno/os/routers/metrics/metrics.py +30 -15
agno/os/routers/metrics/schemas.py +10 -6
agno/os/routers/registry/__init__.py +3 -0
agno/os/routers/registry/registry.py +337 -0
agno/os/routers/session/session.py +143 -14
agno/os/routers/teams/__init__.py +3 -0
agno/os/routers/teams/router.py +550 -0
agno/os/routers/teams/schema.py +280 -0
agno/os/routers/traces/__init__.py +3 -0
agno/os/routers/traces/schemas.py +414 -0
agno/os/routers/traces/traces.py +549 -0
agno/os/routers/workflows/__init__.py +3 -0
agno/os/routers/workflows/router.py +757 -0
agno/os/routers/workflows/schema.py +139 -0
agno/os/schema.py +157 -584
agno/os/scopes.py +469 -0
agno/os/settings.py +3 -0
agno/os/utils.py +574 -185
agno/reasoning/anthropic.py +85 -1
agno/reasoning/azure_ai_foundry.py +93 -1
agno/reasoning/deepseek.py +102 -2
agno/reasoning/default.py +6 -7
agno/reasoning/gemini.py +87 -3
agno/reasoning/groq.py +109 -2
agno/reasoning/helpers.py +6 -7
agno/reasoning/manager.py +1238 -0
agno/reasoning/ollama.py +93 -1
agno/reasoning/openai.py +115 -1
agno/reasoning/vertexai.py +85 -1
agno/registry/__init__.py +3 -0
agno/registry/registry.py +68 -0
agno/remote/__init__.py +3 -0
agno/remote/base.py +581 -0
agno/run/__init__.py +2 -4
agno/run/agent.py +134 -19
agno/run/base.py +49 -1
agno/run/cancel.py +65 -52
agno/run/cancellation_management/__init__.py +9 -0
agno/run/cancellation_management/base.py +78 -0
agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
agno/run/requirement.py +181 -0
agno/run/team.py +111 -19
agno/run/workflow.py +2 -1
agno/session/agent.py +57 -92
agno/session/summary.py +1 -1
agno/session/team.py +62 -115
agno/session/workflow.py +353 -57
agno/skills/__init__.py +17 -0
agno/skills/agent_skills.py +377 -0
agno/skills/errors.py +32 -0
agno/skills/loaders/__init__.py +4 -0
agno/skills/loaders/base.py +27 -0
agno/skills/loaders/local.py +216 -0
agno/skills/skill.py +65 -0
agno/skills/utils.py +107 -0
agno/skills/validator.py +277 -0
agno/table.py +10 -0
agno/team/__init__.py +5 -1
agno/team/remote.py +447 -0
agno/team/team.py +3769 -2202
agno/tools/brandfetch.py +27 -18
agno/tools/browserbase.py +225 -16
agno/tools/crawl4ai.py +3 -0
agno/tools/duckduckgo.py +25 -71
agno/tools/exa.py +0 -21
agno/tools/file.py +14 -13
agno/tools/file_generation.py +12 -6
agno/tools/firecrawl.py +15 -7
agno/tools/function.py +94 -113
agno/tools/google_bigquery.py +11 -2
agno/tools/google_drive.py +4 -3
agno/tools/knowledge.py +9 -4
agno/tools/mcp/mcp.py +301 -18
agno/tools/mcp/multi_mcp.py +269 -14
agno/tools/mem0.py +11 -10
agno/tools/memory.py +47 -46
agno/tools/mlx_transcribe.py +10 -7
agno/tools/models/nebius.py +5 -5
agno/tools/models_labs.py +20 -10
agno/tools/nano_banana.py +151 -0
agno/tools/parallel.py +0 -7
agno/tools/postgres.py +76 -36
agno/tools/python.py +14 -6
agno/tools/reasoning.py +30 -23
agno/tools/redshift.py +406 -0
agno/tools/shopify.py +1519 -0
agno/tools/spotify.py +919 -0
agno/tools/tavily.py +4 -1
agno/tools/toolkit.py +253 -18
agno/tools/websearch.py +93 -0
agno/tools/website.py +1 -1
agno/tools/wikipedia.py +1 -1
agno/tools/workflow.py +56 -48
agno/tools/yfinance.py +12 -11
agno/tracing/__init__.py +12 -0
agno/tracing/exporter.py +161 -0
agno/tracing/schemas.py +276 -0
agno/tracing/setup.py +112 -0
agno/utils/agent.py +251 -10
agno/utils/cryptography.py +22 -0
agno/utils/dttm.py +33 -0
agno/utils/events.py +264 -7
agno/utils/hooks.py +111 -3
agno/utils/http.py +161 -2
agno/utils/mcp.py +49 -8
agno/utils/media.py +22 -1
agno/utils/models/ai_foundry.py +9 -2
agno/utils/models/claude.py +20 -5
agno/utils/models/cohere.py +9 -2
agno/utils/models/llama.py +9 -2
agno/utils/models/mistral.py +4 -2
agno/utils/os.py +0 -0
agno/utils/print_response/agent.py +99 -16
agno/utils/print_response/team.py +223 -24
agno/utils/print_response/workflow.py +0 -2
agno/utils/prompts.py +8 -6
agno/utils/remote.py +23 -0
agno/utils/response.py +1 -13
agno/utils/string.py +91 -2
agno/utils/team.py +62 -12
agno/utils/tokens.py +657 -0
agno/vectordb/base.py +15 -2
agno/vectordb/cassandra/cassandra.py +1 -1
agno/vectordb/chroma/__init__.py +2 -1
agno/vectordb/chroma/chromadb.py +468 -23
agno/vectordb/clickhouse/clickhousedb.py +1 -1
agno/vectordb/couchbase/couchbase.py +6 -2
agno/vectordb/lancedb/lance_db.py +7 -38
agno/vectordb/lightrag/lightrag.py +7 -6
agno/vectordb/milvus/milvus.py +118 -84
agno/vectordb/mongodb/__init__.py +2 -1
agno/vectordb/mongodb/mongodb.py +14 -31
agno/vectordb/pgvector/pgvector.py +120 -66
agno/vectordb/pineconedb/pineconedb.py +2 -19
agno/vectordb/qdrant/__init__.py +2 -1
agno/vectordb/qdrant/qdrant.py +33 -56
agno/vectordb/redis/__init__.py +2 -1
agno/vectordb/redis/redisdb.py +19 -31
agno/vectordb/singlestore/singlestore.py +17 -9
agno/vectordb/surrealdb/surrealdb.py +2 -38
agno/vectordb/weaviate/__init__.py +2 -1
agno/vectordb/weaviate/weaviate.py +7 -3
agno/workflow/__init__.py +5 -1
agno/workflow/agent.py +2 -2
agno/workflow/condition.py +12 -10
agno/workflow/loop.py +28 -9
agno/workflow/parallel.py +21 -13
agno/workflow/remote.py +362 -0
agno/workflow/router.py +12 -9
agno/workflow/step.py +261 -36
agno/workflow/steps.py +12 -8
agno/workflow/types.py +40 -77
agno/workflow/workflow.py +939 -213
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
agno-2.4.3.dist-info/RECORD +677 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
agno/tools/googlesearch.py +0 -98
agno/tools/memori.py +0 -339
agno-2.2.13.dist-info/RECORD +0 -575
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
{agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0

agno/vectordb/chroma/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from agno.vectordb.chroma.chromadb import ChromaDb
+from agno.vectordb.chroma.chromadb import ChromaDb, SearchType
 __all__ = [
     "ChromaDb",
+    "SearchType",
 ]

agno/vectordb/chroma/chromadb.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import asyncio
 import json
+from collections import defaultdict
+from concurrent.futures import ThreadPoolExecutor
 from hashlib import md5
-from typing import Any, Dict, List, Mapping, Optional, Union, cast
+from typing import Any, Dict, List, Mapping, Optional, Tuple, Union, cast
 try:
     from chromadb import Client as ChromaDbClient
@@ -20,12 +22,64 @@ from agno.knowledge.reranker.base import Reranker
 from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
 from agno.vectordb.base import VectorDb
 from agno.vectordb.distance import Distance
+from agno.vectordb.search import SearchType
+def reciprocal_rank_fusion(
+    ranked_lists: List[List[Tuple[str, float]]],
+    k: int = 60,
+) -> List[Tuple[str, float]]:
+    """
+    Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).
+    RRF is a simple yet effective method for combining multiple rankings.
+    The formula is: RRF(d) = sum(1 / (k + rank_i(d))) for each ranking i
+    Args:
+        ranked_lists: List of ranked results, each as [(doc_id, score), ...]
+        k: RRF constant (default 60, as per original paper by Cormack et al.)
+    Returns:
+        Fused ranking as [(doc_id, rrf_score), ...] sorted by score descending
+    """
+    rrf_scores: Dict[str, float] = defaultdict(float)
+    for ranked_list in ranked_lists:
+        for rank, (doc_id, _) in enumerate(ranked_list, start=1):
+            rrf_scores[doc_id] += 1.0 / (k + rank)
+    sorted_results = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
+    return sorted_results
 class ChromaDb(VectorDb):
+    """
+    ChromaDb class for managing vector operations with ChromaDB.
+    Args:
+        collection: The name of the ChromaDB collection. If not provided, derived from 'name'.
+        name: Name of the vector database. Also used as collection name if 'collection' is not provided.
+        description: Description of the vector database.
+        id: Unique identifier for this vector database instance.
+        embedder: The embedder to use when embedding the document contents.
+        distance: The distance metric to use when searching for documents.
+        path: The path to store the ChromaDB data (for persistent client).
+        persistent_client: Whether to use a persistent client.
+        search_type: The search type to use when searching for documents.
+            - SearchType.vector: Pure vector similarity search (default)
+            - SearchType.keyword: Keyword-based search using document content
+            - SearchType.hybrid: Combines vector + FTS with Reciprocal Rank Fusion
+        hybrid_rrf_k: RRF (Reciprocal Rank Fusion) constant for hybrid search.
+            Controls ranking smoothness - higher values give more weight to lower-ranked
+            results, lower values make top results more dominant. Default is 60
+            (per original RRF paper by Cormack et al.).
+        reranker: The reranker to use when reranking documents.
+        **kwargs: Additional arguments to pass to the ChromaDB client.
+    """
     def __init__(
         self,
-        collection: str,
+        collection: Optional[str] = None,
         name: Optional[str] = None,
         description: Optional[str] = None,
         id: Optional[str] = None,
@@ -33,12 +87,18 @@ class ChromaDb(VectorDb):
         distance: Distance = Distance.cosine,
         path: str = "tmp/chromadb",
         persistent_client: bool = False,
+        search_type: SearchType = SearchType.vector,
+        hybrid_rrf_k: int = 60,
         reranker: Optional[Reranker] = None,
         **kwargs,
     ):
-        # Validate required parameters
-        if not collection:
-            raise ValueError("Collection name must be provided.")
+        # Derive collection from name if not provided
+        if collection is None:
+            if name is not None:
+                # Sanitize name: lowercase and replace spaces with underscores
+                collection = name.lower().replace(" ", "_")
+            else:
+                raise ValueError("Either 'collection' or 'name' must be provided.")
         # Dynamic ID generation based on unique identifiers
         if id is None:
@@ -57,7 +117,7 @@ class ChromaDb(VectorDb):
             from agno.knowledge.embedder.openai import OpenAIEmbedder
             embedder = OpenAIEmbedder()
-            log_info("Embedder not provided, using OpenAIEmbedder as default.")
+            log_debug("Embedder not provided, using OpenAIEmbedder as default.")
         self.embedder: Embedder = embedder
         # Distance metric
         self.distance: Distance = distance
@@ -72,6 +132,10 @@ class ChromaDb(VectorDb):
         self.persistent_client: bool = persistent_client
         self.path: str = path
+        # Search type configuration
+        self.search_type: SearchType = search_type
+        self.hybrid_rrf_k: int = hybrid_rrf_k
         # Reranker instance
         self.reranker: Optional[Reranker] = reranker
@@ -272,11 +336,13 @@ class ChromaDb(VectorDb):
                 embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
                 await asyncio.gather(*embed_tasks, return_exceptions=True)
             except Exception as e:
-                log_error(f"Error processing document: {e}")
+                logger.error(f"Error processing document: {e}")
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             # Handle metadata and filters
             metadata = document.meta_data or {}
@@ -435,7 +501,9 @@ class ChromaDb(VectorDb):
         for document in documents:
             cleaned_content = document.content.replace("\x00", "\ufffd")
-            doc_id = md5(cleaned_content.encode()).hexdigest()
+            # Include content_hash in ID to ensure uniqueness across different content hashes
+            base_id = document.id or md5(cleaned_content.encode()).hexdigest()
+            doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
             # Handle metadata and filters
             metadata = document.meta_data or {}
@@ -498,6 +566,41 @@ class ChromaDb(VectorDb):
         if isinstance(filters, list):
             log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
             filters = None
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
+        # Route to appropriate search method based on search_type
+        if self.search_type == SearchType.vector:
+            search_results = self._vector_search(query, limit, filters)
+        elif self.search_type == SearchType.keyword:
+            search_results = self._keyword_search(query, limit, filters)
+        elif self.search_type == SearchType.hybrid:
+            search_results = self._hybrid_search(query, limit, filters)
+        else:
+            logger.error(f"Invalid search type '{self.search_type}'.")
+            return []
+        if self.reranker and search_results:
+            try:
+                search_results = self.reranker.rerank(query=query, documents=search_results)
+            except Exception as e:
+                log_warning(f"Reranker failed, returning unranked results: {e}")
+        log_info(f"Found {len(search_results)} documents")
+        return search_results
+    def _vector_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+        """Perform pure vector similarity search.
+        Args:
+            query (str): Query to search for.
+            limit (int): Number of results to return.
+            filters (Optional[Dict[str, Any]]): Metadata filters to apply.
+        Returns:
+            List[Document]: List of search results.
+        """
         query_embedding = self.embedder.get_embedding(query)
         if query_embedding is None:
             logger.error(f"Error getting embedding for Query: {query}")
@@ -512,11 +615,248 @@ class ChromaDb(VectorDb):
         result: QueryResult = self._collection.query(
             query_embeddings=query_embedding,
             n_results=limit,
-            where=where_filter,  # Add where filter
+            where=where_filter,
             include=["metadatas", "documents", "embeddings", "distances", "uris"],
         )
-        # Build search results
+        return self._build_search_results(result)
+    def _keyword_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+        """Perform keyword-based search using document content filtering.
+        This uses ChromaDB's where_document filter with $contains operator
+        for basic full-text search functionality.
+        Args:
+            query (str): Query to search for (keywords to match in document content).
+            limit (int): Number of results to return.
+            filters (Optional[Dict[str, Any]]): Metadata filters to apply.
+        Returns:
+            List[Document]: List of search results.
+        """
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
+        # Convert simple filters to ChromaDB's format if needed
+        where_filter = self._convert_filters(filters) if filters else None
+        # Get first significant word for $contains filter
+        query_words = query.split()
+        if not query_words:
+            return []
+        # Use where_document to filter by document content
+        where_document: Dict[str, Any] = {"$contains": query_words[0]}
+        try:
+            # Get documents matching the keyword filter
+            result = self._collection.get(
+                where=where_filter,
+                where_document=cast(Any, where_document),
+                limit=limit,
+                include=["metadatas", "documents", "embeddings"],
+            )
+            return self._build_get_results(cast(Dict[str, Any], result), query)
+        except Exception as e:
+            logger.error(f"Error in keyword search: {e}")
+            return []
+    def _hybrid_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
+        """Perform hybrid search combining vector similarity with full-text search using RRF.
+        This method combines:
+        1. Dense vector similarity search (semantic search)
+        2. Full-text search (keyword/lexical search)
+        Results are fused using Reciprocal Rank Fusion (RRF) for optimal ranking.
+        Args:
+            query (str): Query to search for.
+            limit (int): Number of results to return.
+            filters (Optional[Dict[str, Any]]): Metadata filters to apply.
+        Returns:
+            List[Document]: List of search results with RRF-fused ranking.
+        """
+        query_embedding = self.embedder.get_embedding(query)
+        if query_embedding is None:
+            logger.error(f"Error getting embedding for Query: {query}")
+            return []
+        if not self._collection:
+            self._collection = self.client.get_collection(name=self.collection_name)
+        # Convert simple filters to ChromaDB's format if needed
+        where_filter = self._convert_filters(filters) if filters else None
+        # Fetch more candidates than needed for better fusion
+        fetch_k = min(limit * 3, 100)
+        def dense_vector_similarity_search() -> List[Tuple[str, float]]:
+            """Dense vector similarity search."""
+            try:
+                results = self._collection.query(  # type: ignore
+                    query_embeddings=query_embedding,
+                    n_results=fetch_k,
+                    where=where_filter,
+                    include=["documents", "metadatas", "distances"],
+                )
+                ranked: List[Tuple[str, float]] = []
+                if results.get("ids") and results["ids"][0]:
+                    for i, doc_id in enumerate(results["ids"][0]):
+                        distance = results["distances"][0][i] if results.get("distances") else 0  # type: ignore
+                        # Convert distance to similarity score (lower distance = higher score)
+                        score = 1.0 / (1.0 + distance)
+                        ranked.append((doc_id, score))
+                return ranked
+            except Exception as e:
+                log_error(f"Error in vector search component: {e}")
+                return []
+        def fts_search() -> List[Tuple[str, float]]:
+            """Full-text search using ChromaDB's where_document filter."""
+            try:
+                query_words = query.split()
+                if not query_words:
+                    return []
+                # Use first word for $contains filter
+                fts_where_document: Dict[str, Any] = {"$contains": query_words[0]}
+                results = self._collection.query(  # type: ignore
+                    query_embeddings=query_embedding,
+                    n_results=fetch_k,
+                    where=where_filter,
+                    where_document=cast(Any, fts_where_document),
+                    include=["documents", "metadatas", "distances"],
+                )
+                ranked: List[Tuple[str, float]] = []
+                if results.get("ids") and results["ids"][0]:
+                    for i, doc_id in enumerate(results["ids"][0]):
+                        # Score based on term overlap (simple BM25-like scoring)
+                        doc = results["documents"][0][i] if results.get("documents") else ""  # type: ignore
+                        query_terms = set(query.lower().split())
+                        doc_terms = set(doc.lower().split()) if doc else set()
+                        overlap = len(query_terms & doc_terms)
+                        score = overlap / max(len(query_terms), 1)
+                        ranked.append((doc_id, score))
+                # Sort by score descending
+                ranked.sort(key=lambda x: x[1], reverse=True)
+                return ranked
+            except Exception as e:
+                log_error(f"Error in FTS search component: {e}")
+                return []
+        # Execute searches in parallel for better performance
+        with ThreadPoolExecutor(max_workers=2) as executor:
+            vector_future = executor.submit(dense_vector_similarity_search)
+            fts_future = executor.submit(fts_search)
+            vector_results = vector_future.result()
+            fts_results = fts_future.result()
+        # Apply RRF fusion
+        fused_ranking = reciprocal_rank_fusion(
+            [vector_results, fts_results],
+            k=self.hybrid_rrf_k,
+        )
+        # Get top IDs from fused ranking
+        top_ids = [doc_id for doc_id, _ in fused_ranking[:limit]]
+        if not top_ids:
+            return []
+        # Fetch full document data for top results
+        try:
+            full_results = self._collection.get(
+                ids=top_ids,
+                include=["documents", "metadatas", "embeddings"],
+            )
+        except Exception as e:
+            log_error(f"Error fetching full results: {e}")
+            return []
+        # Build lookup dict for results
+        doc_lookup: Dict[str, Dict[str, Any]] = {}
+        result_ids = full_results.get("ids", [])
+        result_docs = full_results.get("documents")
+        result_metas = full_results.get("metadatas")
+        result_embeds = full_results.get("embeddings")
+        for i, doc_id in enumerate(result_ids if result_ids is not None else []):
+            doc_lookup[doc_id] = {
+                "document": result_docs[i] if result_docs is not None and i < len(result_docs) else None,
+                "metadata": result_metas[i] if result_metas is not None and i < len(result_metas) else None,
+                "embedding": result_embeds[i] if result_embeds is not None and i < len(result_embeds) else None,
+            }
+        # Build final results in fused ranking order
+        search_results: List[Document] = []
+        rrf_scores = dict(fused_ranking)
+        for doc_id in top_ids:
+            if doc_id not in doc_lookup:
+                continue
+            doc_data = doc_lookup[doc_id]
+            doc_metadata = dict(doc_data["metadata"]) if doc_data["metadata"] else {}
+            # Add RRF score to metadata
+            doc_metadata["rrf_score"] = rrf_scores.get(doc_id, 0.0)
+            # Extract the fields we added to metadata
+            name_val = doc_metadata.pop("name", None)
+            content_id_val = doc_metadata.pop("content_id", None)
+            # Convert types to match Document constructor expectations
+            name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
+            content_id = (
+                str(content_id_val)
+                if content_id_val is not None and not isinstance(content_id_val, str)
+                else content_id_val
+            )
+            content = str(doc_data["document"]) if doc_data["document"] is not None else ""
+            # Process embedding
+            embedding = None
+            if doc_data["embedding"] is not None:
+                embed_data = doc_data["embedding"]
+                if hasattr(embed_data, "tolist") and callable(getattr(embed_data, "tolist", None)):
+                    try:
+                        embedding = list(cast(Any, embed_data).tolist())
+                    except (AttributeError, TypeError):
+                        embedding = list(embed_data) if isinstance(embed_data, (list, tuple)) else None
+                elif isinstance(embed_data, (list, tuple)):
+                    embedding = [float(x) for x in embed_data if isinstance(x, (int, float))]
+            search_results.append(
+                Document(
+                    id=doc_id,
+                    name=name,
+                    meta_data=doc_metadata,
+                    content=content,
+                    embedding=embedding,
+                    content_id=content_id,
+                )
+            )
+        return search_results
+    def _build_search_results(self, result: QueryResult) -> List[Document]:
+        """Build Document list from ChromaDB QueryResult.
+        Args:
+            result: The QueryResult from ChromaDB query.
+        Returns:
+            List[Document]: List of Document objects.
+        """
         search_results: List[Document] = []
         ids_list = result.get("ids", [[]])  # type: ignore
@@ -525,13 +865,33 @@ class ChromaDb(VectorDb):
         embeddings_list = result.get("embeddings")  # type: ignore
         distances_list = result.get("distances", [[]])  # type: ignore
-        if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
+        # Check if we have valid results - handle numpy arrays carefully
+        if ids_list is None or len(ids_list) == 0:
+            return search_results
+        if metadata_list is None or len(metadata_list) == 0:
+            return search_results
+        if documents_list is None or len(documents_list) == 0:
+            return search_results
+        if distances_list is None or len(distances_list) == 0:
             return search_results
         ids = ids_list[0]
         metadata = [dict(m) if m else {} for m in metadata_list[0]]  # Convert to mutable dicts
         documents = documents_list[0]
-        embeddings_raw = embeddings_list[0] if embeddings_list else []
+        # Handle embeddings - may be None or numpy array
+        embeddings_raw: Any = []
+        if embeddings_list is not None:
+            try:
+                if len(embeddings_list) > 0:
+                    embeddings_raw = embeddings_list[0]
+            except (TypeError, ValueError):
+                # numpy array truth value issue - try direct access
+                try:
+                    embeddings_raw = embeddings_list[0]
+                except Exception:
+                    embeddings_raw = []
         embeddings = []
         for e in embeddings_raw:
             if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
@@ -545,7 +905,8 @@ class ChromaDb(VectorDb):
                 embeddings.append([float(e)])
             else:
                 embeddings.append([])
-        distances = distances_list[0]
+        distances = distances_list[0] if len(distances_list) > 0 else []
         for idx, distance in enumerate(distances):
             if idx < len(metadata):
@@ -578,12 +939,95 @@ class ChromaDb(VectorDb):
                     )
                 )
         except Exception as e:
-            logger.error(f"Error building search results: {e}")
+            log_error(f"Error building search results: {e}")
-        if self.reranker:
-            search_results = self.reranker.rerank(query=query, documents=search_results)
+        return search_results
+    def _build_get_results(self, result: Dict[str, Any], query: str = "") -> List[Document]:
+        """Build Document list from ChromaDB GetResult.
+        Args:
+            result: The GetResult from ChromaDB get.
+            query: The original query for scoring.
+        Returns:
+            List[Document]: List of Document objects.
+        """
+        search_results: List[Document] = []
+        ids = result.get("ids", [])
+        metadatas = result.get("metadatas", [])
+        documents = result.get("documents", [])
+        embeddings_raw = result.get("embeddings")
+        # Check ids safely (may be numpy array)
+        if ids is None:
+            return search_results
+        try:
+            if len(ids) == 0:
+                return search_results
+        except (TypeError, ValueError):
+            return search_results
+        embeddings = []
+        # Handle embeddings - may be None or numpy array
+        if embeddings_raw is not None:
+            try:
+                for e in embeddings_raw:
+                    if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
+                        try:
+                            embeddings.append(list(cast(Any, e).tolist()))
+                        except (AttributeError, TypeError):
+                            embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
+                    elif isinstance(e, (list, tuple)):
+                        embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
+                    elif isinstance(e, (int, float)):
+                        embeddings.append([float(e)])
+                    else:
+                        embeddings.append([])
+            except (TypeError, ValueError):
+                # numpy array iteration issue
+                embeddings = []
+        try:
+            for idx, id_ in enumerate(ids):
+                doc_metadata = dict(metadatas[idx]) if metadatas and idx < len(metadatas) and metadatas[idx] else {}
+                document = documents[idx] if documents and idx < len(documents) else ""
+                # Calculate simple keyword score if query provided
+                if query and document:
+                    query_terms = set(query.lower().split())
+                    doc_terms = set(document.lower().split())
+                    overlap = len(query_terms & doc_terms)
+                    doc_metadata["keyword_score"] = overlap / max(len(query_terms), 1)
+                # Extract the fields we added to metadata
+                name_val = doc_metadata.pop("name", None)
+                content_id_val = doc_metadata.pop("content_id", None)
+                # Convert types to match Document constructor expectations
+                name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
+                content_id = (
+                    str(content_id_val)
+                    if content_id_val is not None and not isinstance(content_id_val, str)
+                    else content_id_val
+                )
+                content = str(document) if document is not None else ""
+                embedding = embeddings[idx] if idx < len(embeddings) else None
+                search_results.append(
+                    Document(
+                        id=id_,
+                        name=name,
+                        meta_data=doc_metadata,
+                        content=content,
+                        embedding=embedding,
+                        content_id=content_id,
+                    )
+                )
+        except Exception as e:
+            log_error(f"Error building get results: {e}")
-        log_info(f"Found {len(search_results)} documents")
         return search_results
     def _convert_filters(self, filters: Dict[str, Any]) -> Dict[str, Any]:
@@ -888,7 +1332,7 @@ class ChromaDb(VectorDb):
                     current_metadatas = []
                 if not ids:
-                    logger.debug(f"No documents found with content_id: {content_id}")
+                    log_debug(f"No documents found with content_id: {content_id}")
                     return
                 # Flatten the new metadata first
@@ -908,12 +1352,13 @@ class ChromaDb(VectorDb):
                 # Convert to the expected type for ChromaDB
                 chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
+                chroma_metadatas = [{k: v for k, v in m.items() if k and v} for m in chroma_metadatas]
                 collection.update(ids=ids, metadatas=chroma_metadatas)  # type: ignore
-                logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
+                log_debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
             except TypeError as te:
                 if "object of type 'int' has no len()" in str(te):
-                    logger.warning(
+                    log_warning(
                         f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
                     )
                     return
@@ -921,9 +1366,9 @@ class ChromaDb(VectorDb):
                     raise te
         except Exception as e:
-            logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
+            log_error(f"Error updating metadata for content_id '{content_id}': {e}")
             raise
     def get_supported_search_types(self) -> List[str]:
         """Get the supported search types for this vector database."""
-        return []  # ChromaDb doesn't use SearchType enum
+        return [SearchType.vector, SearchType.keyword, SearchType.hybrid]

agno/vectordb/clickhouse/clickhousedb.py CHANGED Viewed

@@ -71,7 +71,7 @@ class Clickhouse(VectorDb):
             from agno.knowledge.embedder.openai import OpenAIEmbedder
             _embedder = OpenAIEmbedder()
-            log_info("Embedder not provided, using OpenAIEmbedder as default.")
+            log_debug("Embedder not provided, using OpenAIEmbedder as default.")
         self.embedder: Embedder = _embedder
         self.dimensions: Optional[int] = self.embedder.dimensions

agno/vectordb/couchbase/couchbase.py CHANGED Viewed

@@ -6,7 +6,6 @@ from typing import Any, Dict, List, Optional, Union
 from agno.filters import FilterExpr
 from agno.knowledge.document import Document
 from agno.knowledge.embedder import Embedder
-from agno.knowledge.embedder.openai import OpenAIEmbedder
 from agno.utils.log import log_debug, log_info, log_warning, logger
 from agno.vectordb.base import VectorDb
@@ -62,7 +61,7 @@ class CouchbaseSearch(VectorDb):
         couchbase_connection_string: str,
         cluster_options: ClusterOptions,
         search_index: Union[str, SearchIndex],
-        embedder: Embedder = OpenAIEmbedder(),
+        embedder: Optional[Embedder] = None,
         overwrite: bool = False,
         is_global_level_index: bool = False,
         wait_until_index_ready: float = 0,
@@ -97,6 +96,11 @@ class CouchbaseSearch(VectorDb):
         self.collection_name = collection_name
         self.connection_string = couchbase_connection_string
         self.cluster_options = cluster_options
+        if embedder is None:
+            from agno.knowledge.embedder.openai import OpenAIEmbedder
+            embedder = OpenAIEmbedder()
+            log_debug("Embedder not provided, using OpenAIEmbedder as default.")
         self.embedder = embedder
         self.overwrite = overwrite
         self.is_global_level_index = is_global_level_index

agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

agno 2.2.13py3-none-any.whl → 2.4.3py3-none-any.whl