agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
agno/vectordb/base.py
CHANGED
|
@@ -1,11 +1,29 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from typing import Any, Dict, List, Optional
|
|
3
3
|
|
|
4
|
+
from agno.knowledge.document import Document
|
|
5
|
+
from agno.utils.log import log_warning
|
|
6
|
+
from agno.utils.string import generate_id
|
|
7
|
+
|
|
4
8
|
|
|
5
9
|
class VectorDb(ABC):
|
|
6
10
|
"""Base class for Vector Databases"""
|
|
7
11
|
|
|
8
|
-
|
|
12
|
+
def __init__(self, *, id: Optional[str] = None, name: Optional[str] = None, description: Optional[str] = None):
|
|
13
|
+
"""Initialize base VectorDb.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
id: Optional custom ID. If not provided, an id will be generated.
|
|
17
|
+
name: Optional name for the vector database.
|
|
18
|
+
description: Optional description for the vector database.
|
|
19
|
+
"""
|
|
20
|
+
if name is None:
|
|
21
|
+
name = self.__class__.__name__
|
|
22
|
+
|
|
23
|
+
self.name = name
|
|
24
|
+
self.description = description
|
|
25
|
+
# Last resort fallback to generate id from name if ID not specified
|
|
26
|
+
self.id = id if id else generate_id(name)
|
|
9
27
|
|
|
10
28
|
@abstractmethod
|
|
11
29
|
def create(self) -> None:
|
|
@@ -55,13 +73,11 @@ class VectorDb(ABC):
|
|
|
55
73
|
raise NotImplementedError
|
|
56
74
|
|
|
57
75
|
@abstractmethod
|
|
58
|
-
def search(self, query: str, limit: int = 5, filters: Optional[
|
|
76
|
+
def search(self, query: str, limit: int = 5, filters: Optional[Any] = None) -> List[Document]:
|
|
59
77
|
raise NotImplementedError
|
|
60
78
|
|
|
61
79
|
@abstractmethod
|
|
62
|
-
async def async_search(
|
|
63
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
64
|
-
) -> List[Document]:
|
|
80
|
+
async def async_search(self, query: str, limit: int = 5, filters: Optional[Any] = None) -> List[Document]:
|
|
65
81
|
raise NotImplementedError
|
|
66
82
|
|
|
67
83
|
@abstractmethod
|
|
@@ -99,10 +115,26 @@ class VectorDb(ABC):
|
|
|
99
115
|
def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
|
|
100
116
|
raise NotImplementedError
|
|
101
117
|
|
|
102
|
-
@abstractmethod
|
|
103
118
|
def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
|
|
104
|
-
|
|
119
|
+
"""
|
|
120
|
+
Update the metadata for documents with the given content_id.
|
|
121
|
+
|
|
122
|
+
Default implementation logs a warning. Subclasses should override this method
|
|
123
|
+
to provide their specific implementation.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
content_id (str): The content ID to update
|
|
127
|
+
metadata (Dict[str, Any]): The metadata to update
|
|
128
|
+
"""
|
|
129
|
+
log_warning(
|
|
130
|
+
f"{self.__class__.__name__}.update_metadata() is not implemented. "
|
|
131
|
+
f"Metadata update for content_id '{content_id}' was skipped."
|
|
132
|
+
)
|
|
105
133
|
|
|
106
134
|
@abstractmethod
|
|
107
135
|
def delete_by_content_id(self, content_id: str) -> bool:
|
|
108
136
|
raise NotImplementedError
|
|
137
|
+
|
|
138
|
+
@abstractmethod
|
|
139
|
+
def get_supported_search_types(self) -> List[str]:
|
|
140
|
+
raise NotImplementedError
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Dict, Iterable, List, Optional
|
|
2
|
+
from typing import Any, Dict, Iterable, List, Optional, Union
|
|
3
3
|
|
|
4
|
+
from agno.filters import FilterExpr
|
|
4
5
|
from agno.knowledge.document import Document
|
|
5
6
|
from agno.knowledge.embedder import Embedder
|
|
6
|
-
from agno.utils.log import log_debug, log_error, log_info
|
|
7
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
7
8
|
from agno.vectordb.base import VectorDb
|
|
8
9
|
from agno.vectordb.cassandra.index import AgnoMetadataVectorCassandraTable
|
|
9
10
|
|
|
@@ -15,6 +16,8 @@ class Cassandra(VectorDb):
|
|
|
15
16
|
keyspace: str,
|
|
16
17
|
embedder: Optional[Embedder] = None,
|
|
17
18
|
session=None,
|
|
19
|
+
name: Optional[str] = None,
|
|
20
|
+
description: Optional[str] = None,
|
|
18
21
|
) -> None:
|
|
19
22
|
if not table_name:
|
|
20
23
|
raise ValueError("Table name must be provided.")
|
|
@@ -30,6 +33,9 @@ class Cassandra(VectorDb):
|
|
|
30
33
|
|
|
31
34
|
embedder = OpenAIEmbedder()
|
|
32
35
|
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
36
|
+
# Initialize base class with name and description
|
|
37
|
+
super().__init__(name=name, description=description)
|
|
38
|
+
|
|
33
39
|
self.table_name: str = table_name
|
|
34
40
|
self.embedder: Embedder = embedder
|
|
35
41
|
self.session = session
|
|
@@ -199,13 +205,17 @@ class Cassandra(VectorDb):
|
|
|
199
205
|
self.delete_by_content_hash(content_hash)
|
|
200
206
|
await self.async_insert(content_hash, documents, filters)
|
|
201
207
|
|
|
202
|
-
def search(
|
|
208
|
+
def search(
|
|
209
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
210
|
+
) -> List[Document]:
|
|
203
211
|
"""Keyword-based search on document metadata."""
|
|
204
212
|
log_debug(f"Cassandra VectorDB : Performing Vector Search on {self.table_name} with query {query}")
|
|
213
|
+
if filters is not None:
|
|
214
|
+
log_warning("Filters are not yet supported in Cassandra. No filters will be applied.")
|
|
205
215
|
return self.vector_search(query=query, limit=limit)
|
|
206
216
|
|
|
207
217
|
async def async_search(
|
|
208
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
218
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
209
219
|
) -> List[Document]:
|
|
210
220
|
"""Search asynchronously by running in a thread."""
|
|
211
221
|
return await asyncio.to_thread(self.search, query, limit, filters)
|
|
@@ -216,7 +226,9 @@ class Cassandra(VectorDb):
|
|
|
216
226
|
) -> List[Document]:
|
|
217
227
|
return [self._row_to_document(row=hit) for hit in hits]
|
|
218
228
|
|
|
219
|
-
def vector_search(
|
|
229
|
+
def vector_search(
|
|
230
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
231
|
+
) -> List[Document]:
|
|
220
232
|
"""Vector similarity search implementation."""
|
|
221
233
|
query_embedding = self.embedder.get_embedding(query)
|
|
222
234
|
hits = list(
|
|
@@ -483,3 +495,7 @@ class Cassandra(VectorDb):
|
|
|
483
495
|
except Exception as e:
|
|
484
496
|
log_error(f"Error updating metadata for content_id {content_id}: {e}")
|
|
485
497
|
raise
|
|
498
|
+
|
|
499
|
+
def get_supported_search_types(self) -> List[str]:
|
|
500
|
+
"""Get the supported search types for this vector database."""
|
|
501
|
+
return [] # Cassandra doesn't use SearchType enum
|
agno/vectordb/chroma/chromadb.py
CHANGED
|
@@ -13,10 +13,11 @@ try:
|
|
|
13
13
|
except ImportError:
|
|
14
14
|
raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
|
|
15
15
|
|
|
16
|
+
from agno.filters import FilterExpr
|
|
16
17
|
from agno.knowledge.document import Document
|
|
17
18
|
from agno.knowledge.embedder import Embedder
|
|
18
19
|
from agno.knowledge.reranker.base import Reranker
|
|
19
|
-
from agno.utils.log import log_debug, log_error, log_info, logger
|
|
20
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
|
|
20
21
|
from agno.vectordb.base import VectorDb
|
|
21
22
|
from agno.vectordb.distance import Distance
|
|
22
23
|
|
|
@@ -25,6 +26,9 @@ class ChromaDb(VectorDb):
|
|
|
25
26
|
def __init__(
|
|
26
27
|
self,
|
|
27
28
|
collection: str,
|
|
29
|
+
name: Optional[str] = None,
|
|
30
|
+
description: Optional[str] = None,
|
|
31
|
+
id: Optional[str] = None,
|
|
28
32
|
embedder: Optional[Embedder] = None,
|
|
29
33
|
distance: Distance = Distance.cosine,
|
|
30
34
|
path: str = "tmp/chromadb",
|
|
@@ -32,9 +36,22 @@ class ChromaDb(VectorDb):
|
|
|
32
36
|
reranker: Optional[Reranker] = None,
|
|
33
37
|
**kwargs,
|
|
34
38
|
):
|
|
39
|
+
# Validate required parameters
|
|
40
|
+
if not collection:
|
|
41
|
+
raise ValueError("Collection name must be provided.")
|
|
42
|
+
|
|
43
|
+
# Dynamic ID generation based on unique identifiers
|
|
44
|
+
if id is None:
|
|
45
|
+
from agno.utils.string import generate_id
|
|
46
|
+
|
|
47
|
+
seed = f"{path}#{collection}"
|
|
48
|
+
id = generate_id(seed)
|
|
49
|
+
|
|
50
|
+
# Initialize base class with name, description, and generated ID
|
|
51
|
+
super().__init__(id=id, name=name, description=description)
|
|
52
|
+
|
|
35
53
|
# Collection attributes
|
|
36
54
|
self.collection_name: str = collection
|
|
37
|
-
|
|
38
55
|
# Embedder for embedding the document contents
|
|
39
56
|
if embedder is None:
|
|
40
57
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
@@ -259,7 +276,9 @@ class ChromaDb(VectorDb):
|
|
|
259
276
|
|
|
260
277
|
for document in documents:
|
|
261
278
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
262
|
-
|
|
279
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
280
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
281
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
263
282
|
|
|
264
283
|
# Handle metadata and filters
|
|
265
284
|
metadata = document.meta_data or {}
|
|
@@ -418,7 +437,9 @@ class ChromaDb(VectorDb):
|
|
|
418
437
|
|
|
419
438
|
for document in documents:
|
|
420
439
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
421
|
-
|
|
440
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
441
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
442
|
+
doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
422
443
|
|
|
423
444
|
# Handle metadata and filters
|
|
424
445
|
metadata = document.meta_data or {}
|
|
@@ -461,13 +482,15 @@ class ChromaDb(VectorDb):
|
|
|
461
482
|
logger.error(f"Error upserting documents by content hash: {e}")
|
|
462
483
|
raise
|
|
463
484
|
|
|
464
|
-
def search(
|
|
485
|
+
def search(
|
|
486
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
487
|
+
) -> List[Document]:
|
|
465
488
|
"""Search the collection for a query.
|
|
466
489
|
|
|
467
490
|
Args:
|
|
468
491
|
query (str): Query to search for.
|
|
469
492
|
limit (int): Number of results to return.
|
|
470
|
-
filters (Optional[Dict[str, Any]]): Filters to apply while searching.
|
|
493
|
+
filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
|
|
471
494
|
Supports ChromaDB's filtering operators:
|
|
472
495
|
- $eq, $ne: Equality/Inequality
|
|
473
496
|
- $gt, $gte, $lt, $lte: Numeric comparisons
|
|
@@ -476,6 +499,9 @@ class ChromaDb(VectorDb):
|
|
|
476
499
|
Returns:
|
|
477
500
|
List[Document]: List of search results.
|
|
478
501
|
"""
|
|
502
|
+
if isinstance(filters, list):
|
|
503
|
+
log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
|
|
504
|
+
filters = None
|
|
479
505
|
query_embedding = self.embedder.get_embedding(query)
|
|
480
506
|
if query_embedding is None:
|
|
481
507
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -497,11 +523,11 @@ class ChromaDb(VectorDb):
|
|
|
497
523
|
# Build search results
|
|
498
524
|
search_results: List[Document] = []
|
|
499
525
|
|
|
500
|
-
ids_list = result.get("ids", [[]])
|
|
501
|
-
metadata_list = result.get("metadatas", [[{}]])
|
|
502
|
-
documents_list = result.get("documents", [[]])
|
|
503
|
-
embeddings_list = result.get("embeddings")
|
|
504
|
-
distances_list = result.get("distances", [[]])
|
|
526
|
+
ids_list = result.get("ids", [[]]) # type: ignore
|
|
527
|
+
metadata_list = result.get("metadatas", [[{}]]) # type: ignore
|
|
528
|
+
documents_list = result.get("documents", [[]]) # type: ignore
|
|
529
|
+
embeddings_list = result.get("embeddings") # type: ignore
|
|
530
|
+
distances_list = result.get("distances", [[]]) # type: ignore
|
|
505
531
|
|
|
506
532
|
if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
|
|
507
533
|
return search_results
|
|
@@ -590,7 +616,7 @@ class ChromaDb(VectorDb):
|
|
|
590
616
|
return converted
|
|
591
617
|
|
|
592
618
|
async def async_search(
|
|
593
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
619
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
594
620
|
) -> List[Document]:
|
|
595
621
|
"""Search asynchronously by running in a thread."""
|
|
596
622
|
return await asyncio.to_thread(self.search, query, limit, filters)
|
|
@@ -886,6 +912,7 @@ class ChromaDb(VectorDb):
|
|
|
886
912
|
|
|
887
913
|
# Convert to the expected type for ChromaDB
|
|
888
914
|
chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
|
|
915
|
+
chroma_metadatas = [{k: v for k, v in m.items() if k and v} for m in chroma_metadatas]
|
|
889
916
|
collection.update(ids=ids, metadatas=chroma_metadatas) # type: ignore
|
|
890
917
|
logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
|
|
891
918
|
|
|
@@ -901,3 +928,7 @@ class ChromaDb(VectorDb):
|
|
|
901
928
|
except Exception as e:
|
|
902
929
|
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
903
930
|
raise
|
|
931
|
+
|
|
932
|
+
def get_supported_search_types(self) -> List[str]:
|
|
933
|
+
"""Get the supported search types for this vector database."""
|
|
934
|
+
return [] # ChromaDb doesn't use SearchType enum
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from hashlib import md5
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
4
|
|
|
5
5
|
from agno.vectordb.clickhouse.index import HNSW
|
|
6
6
|
|
|
@@ -11,9 +11,10 @@ try:
|
|
|
11
11
|
except ImportError:
|
|
12
12
|
raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
|
|
13
13
|
|
|
14
|
+
from agno.filters import FilterExpr
|
|
14
15
|
from agno.knowledge.document import Document
|
|
15
16
|
from agno.knowledge.embedder import Embedder
|
|
16
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
17
18
|
from agno.vectordb.base import VectorDb
|
|
18
19
|
from agno.vectordb.distance import Distance
|
|
19
20
|
|
|
@@ -23,6 +24,8 @@ class Clickhouse(VectorDb):
|
|
|
23
24
|
self,
|
|
24
25
|
table_name: str,
|
|
25
26
|
host: str,
|
|
27
|
+
name: Optional[str] = None,
|
|
28
|
+
description: Optional[str] = None,
|
|
26
29
|
username: Optional[str] = None,
|
|
27
30
|
password: str = "",
|
|
28
31
|
port: int = 0,
|
|
@@ -41,9 +44,11 @@ class Clickhouse(VectorDb):
|
|
|
41
44
|
self.password = password
|
|
42
45
|
self.port = port
|
|
43
46
|
self.dsn = dsn
|
|
47
|
+
# Initialize base class with name and description
|
|
48
|
+
super().__init__(name=name, description=description)
|
|
49
|
+
|
|
44
50
|
self.compress = compress
|
|
45
51
|
self.database_name = database_name
|
|
46
|
-
|
|
47
52
|
if not client:
|
|
48
53
|
client = clickhouse_connect.get_client(
|
|
49
54
|
host=self.host,
|
|
@@ -444,7 +449,11 @@ class Clickhouse(VectorDb):
|
|
|
444
449
|
parameters=parameters,
|
|
445
450
|
)
|
|
446
451
|
|
|
447
|
-
def search(
|
|
452
|
+
def search(
|
|
453
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
454
|
+
) -> List[Document]:
|
|
455
|
+
if filters is not None:
|
|
456
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
448
457
|
query_embedding = self.embedder.get_embedding(query)
|
|
449
458
|
if query_embedding is None:
|
|
450
459
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -498,11 +507,14 @@ class Clickhouse(VectorDb):
|
|
|
498
507
|
return search_results
|
|
499
508
|
|
|
500
509
|
async def async_search(
|
|
501
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
510
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
502
511
|
) -> List[Document]:
|
|
503
512
|
"""Search for documents asynchronously."""
|
|
504
513
|
async_client = await self._ensure_async_client()
|
|
505
514
|
|
|
515
|
+
if filters is not None:
|
|
516
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
517
|
+
|
|
506
518
|
query_embedding = self.embedder.get_embedding(query)
|
|
507
519
|
if query_embedding is None:
|
|
508
520
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -817,3 +829,7 @@ class Clickhouse(VectorDb):
|
|
|
817
829
|
except Exception as e:
|
|
818
830
|
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
819
831
|
raise
|
|
832
|
+
|
|
833
|
+
def get_supported_search_types(self) -> List[str]:
|
|
834
|
+
"""Get the supported search types for this vector database."""
|
|
835
|
+
return [] # Clickhouse doesn't use SearchType enum
|
|
@@ -3,10 +3,10 @@ import time
|
|
|
3
3
|
from datetime import timedelta
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
|
+
from agno.filters import FilterExpr
|
|
6
7
|
from agno.knowledge.document import Document
|
|
7
8
|
from agno.knowledge.embedder import Embedder
|
|
8
|
-
from agno.
|
|
9
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
9
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
10
10
|
from agno.vectordb.base import VectorDb
|
|
11
11
|
|
|
12
12
|
try:
|
|
@@ -61,11 +61,13 @@ class CouchbaseSearch(VectorDb):
|
|
|
61
61
|
couchbase_connection_string: str,
|
|
62
62
|
cluster_options: ClusterOptions,
|
|
63
63
|
search_index: Union[str, SearchIndex],
|
|
64
|
-
embedder: Embedder =
|
|
64
|
+
embedder: Optional[Embedder] = None,
|
|
65
65
|
overwrite: bool = False,
|
|
66
66
|
is_global_level_index: bool = False,
|
|
67
67
|
wait_until_index_ready: float = 0,
|
|
68
68
|
batch_limit: int = 500,
|
|
69
|
+
name: Optional[str] = None,
|
|
70
|
+
description: Optional[str] = None,
|
|
69
71
|
**kwargs,
|
|
70
72
|
):
|
|
71
73
|
"""
|
|
@@ -75,6 +77,8 @@ class CouchbaseSearch(VectorDb):
|
|
|
75
77
|
bucket_name (str): Name of the Couchbase bucket.
|
|
76
78
|
scope_name (str): Name of the scope within the bucket.
|
|
77
79
|
collection_name (str): Name of the collection within the scope.
|
|
80
|
+
name (Optional[str]): Name of the vector database.
|
|
81
|
+
description (Optional[str]): Description of the vector database.
|
|
78
82
|
couchbase_connection_string (str): Couchbase connection string.
|
|
79
83
|
cluster_options (ClusterOptions): Options for configuring the Couchbase cluster connection.
|
|
80
84
|
search_index (Union[str, SearchIndex], optional): Search index configuration, either as index name or SearchIndex definition.
|
|
@@ -92,10 +96,18 @@ class CouchbaseSearch(VectorDb):
|
|
|
92
96
|
self.collection_name = collection_name
|
|
93
97
|
self.connection_string = couchbase_connection_string
|
|
94
98
|
self.cluster_options = cluster_options
|
|
99
|
+
if embedder is None:
|
|
100
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
101
|
+
|
|
102
|
+
embedder = OpenAIEmbedder()
|
|
103
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
95
104
|
self.embedder = embedder
|
|
96
105
|
self.overwrite = overwrite
|
|
97
106
|
self.is_global_level_index = is_global_level_index
|
|
98
107
|
self.wait_until_index_ready = wait_until_index_ready
|
|
108
|
+
# Initialize base class with name and description
|
|
109
|
+
super().__init__(name=name, description=description)
|
|
110
|
+
|
|
99
111
|
self.kwargs = kwargs
|
|
100
112
|
self.batch_limit = batch_limit
|
|
101
113
|
if isinstance(search_index, str):
|
|
@@ -451,7 +463,12 @@ class CouchbaseSearch(VectorDb):
|
|
|
451
463
|
if errors_occurred:
|
|
452
464
|
logger.warning("Some errors occurred during the upsert operation. Please check logs for details.")
|
|
453
465
|
|
|
454
|
-
def search(
|
|
466
|
+
def search(
|
|
467
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
468
|
+
) -> List[Document]:
|
|
469
|
+
if isinstance(filters, List):
|
|
470
|
+
log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
|
|
471
|
+
filters = None
|
|
455
472
|
"""Search the Couchbase bucket for documents relevant to the query."""
|
|
456
473
|
query_embedding = self.embedder.get_embedding(query)
|
|
457
474
|
if query_embedding is None:
|
|
@@ -1061,8 +1078,11 @@ class CouchbaseSearch(VectorDb):
|
|
|
1061
1078
|
logger.info(f"[async] Total successfully upserted: {total_upserted_count}, Total failed: {total_failed_count}.")
|
|
1062
1079
|
|
|
1063
1080
|
async def async_search(
|
|
1064
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
1081
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
1065
1082
|
) -> List[Document]:
|
|
1083
|
+
if isinstance(filters, List):
|
|
1084
|
+
log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
|
|
1085
|
+
filters = None
|
|
1066
1086
|
query_embedding = self.embedder.get_embedding(query)
|
|
1067
1087
|
if query_embedding is None:
|
|
1068
1088
|
logger.error(f"[async] Failed to generate embedding for query: {query}")
|
|
@@ -1420,3 +1440,7 @@ class CouchbaseSearch(VectorDb):
|
|
|
1420
1440
|
except Exception as e:
|
|
1421
1441
|
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
1422
1442
|
raise
|
|
1443
|
+
|
|
1444
|
+
def get_supported_search_types(self) -> List[str]:
|
|
1445
|
+
"""Get the supported search types for this vector database."""
|
|
1446
|
+
return [] # CouchbaseSearch doesn't use SearchType enum
|