agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +6015 -2823
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +385 -6
- agno/db/dynamo/dynamo.py +388 -81
- agno/db/dynamo/schemas.py +47 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +435 -64
- agno/db/firestore/schemas.py +11 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +384 -42
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +351 -66
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +339 -48
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +510 -37
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2036 -0
- agno/db/mongo/mongo.py +653 -76
- agno/db/mongo/schemas.py +13 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/mysql.py +687 -25
- agno/db/mysql/schemas.py +61 -37
- agno/db/mysql/utils.py +60 -2
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2001 -0
- agno/db/postgres/postgres.py +676 -57
- agno/db/postgres/schemas.py +43 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +344 -38
- agno/db/redis/schemas.py +18 -0
- agno/db/redis/utils.py +60 -2
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/memory.py +13 -0
- agno/db/singlestore/schemas.py +26 -1
- agno/db/singlestore/singlestore.py +687 -53
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2371 -0
- agno/db/sqlite/schemas.py +24 -0
- agno/db/sqlite/sqlite.py +774 -85
- agno/db/sqlite/utils.py +168 -5
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +309 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1361 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +50 -22
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +68 -1
- agno/filters.py +354 -0
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +52 -0
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +1 -1
- agno/knowledge/chunking/semantic.py +40 -8
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +13 -0
- agno/knowledge/embedder/openai.py +37 -65
- agno/knowledge/embedder/sentence_transformer.py +8 -4
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +594 -186
- agno/knowledge/reader/base.py +9 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
- agno/knowledge/reader/json_reader.py +6 -5
- agno/knowledge/reader/markdown_reader.py +13 -13
- agno/knowledge/reader/pdf_reader.py +43 -68
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +51 -6
- agno/knowledge/reader/s3_reader.py +3 -15
- agno/knowledge/reader/tavily_reader.py +194 -0
- agno/knowledge/reader/text_reader.py +13 -13
- agno/knowledge/reader/web_search_reader.py +2 -43
- agno/knowledge/reader/website_reader.py +43 -25
- agno/knowledge/reranker/__init__.py +2 -8
- agno/knowledge/types.py +9 -0
- agno/knowledge/utils.py +20 -0
- agno/media.py +72 -0
- agno/memory/manager.py +336 -82
- agno/models/aimlapi/aimlapi.py +2 -2
- agno/models/anthropic/claude.py +183 -37
- agno/models/aws/bedrock.py +52 -112
- agno/models/aws/claude.py +33 -1
- agno/models/azure/ai_foundry.py +33 -15
- agno/models/azure/openai_chat.py +25 -8
- agno/models/base.py +999 -519
- agno/models/cerebras/cerebras.py +19 -13
- agno/models/cerebras/cerebras_openai.py +8 -5
- agno/models/cohere/chat.py +27 -1
- agno/models/cometapi/__init__.py +5 -0
- agno/models/cometapi/cometapi.py +57 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/deepinfra/deepinfra.py +2 -2
- agno/models/deepseek/deepseek.py +2 -2
- agno/models/fireworks/fireworks.py +2 -2
- agno/models/google/gemini.py +103 -31
- agno/models/groq/groq.py +28 -11
- agno/models/huggingface/huggingface.py +2 -1
- agno/models/internlm/internlm.py +2 -2
- agno/models/langdb/langdb.py +4 -4
- agno/models/litellm/chat.py +18 -1
- agno/models/litellm/litellm_openai.py +2 -2
- agno/models/llama_cpp/__init__.py +5 -0
- agno/models/llama_cpp/llama_cpp.py +22 -0
- agno/models/message.py +139 -0
- agno/models/meta/llama.py +27 -10
- agno/models/meta/llama_openai.py +5 -17
- agno/models/nebius/nebius.py +6 -6
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +22 -0
- agno/models/nvidia/nvidia.py +2 -2
- agno/models/ollama/chat.py +59 -5
- agno/models/openai/chat.py +69 -29
- agno/models/openai/responses.py +103 -106
- agno/models/openrouter/openrouter.py +41 -3
- agno/models/perplexity/perplexity.py +4 -5
- agno/models/portkey/portkey.py +3 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +52 -0
- agno/models/response.py +77 -1
- agno/models/sambanova/sambanova.py +2 -2
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/models/together/together.py +2 -2
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +2 -2
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +96 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +3 -2
- agno/os/app.py +543 -178
- agno/os/auth.py +24 -14
- agno/os/config.py +1 -0
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/a2a/__init__.py +3 -0
- agno/os/interfaces/a2a/a2a.py +42 -0
- agno/os/interfaces/a2a/router.py +250 -0
- agno/os/interfaces/a2a/utils.py +924 -0
- agno/os/interfaces/agui/agui.py +23 -7
- agno/os/interfaces/agui/router.py +27 -3
- agno/os/interfaces/agui/utils.py +242 -142
- agno/os/interfaces/base.py +6 -2
- agno/os/interfaces/slack/router.py +81 -23
- agno/os/interfaces/slack/slack.py +29 -14
- agno/os/interfaces/whatsapp/router.py +11 -4
- agno/os/interfaces/whatsapp/whatsapp.py +14 -7
- agno/os/mcp.py +111 -54
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +556 -139
- agno/os/routers/evals/evals.py +71 -34
- agno/os/routers/evals/schemas.py +31 -31
- agno/os/routers/evals/utils.py +6 -5
- agno/os/routers/health.py +31 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +185 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +158 -53
- agno/os/routers/memory/schemas.py +20 -16
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +499 -38
- agno/os/schema.py +308 -198
- agno/os/utils.py +401 -41
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/azure_ai_foundry.py +2 -2
- agno/reasoning/deepseek.py +2 -2
- agno/reasoning/default.py +3 -1
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/groq.py +2 -2
- agno/reasoning/ollama.py +2 -2
- agno/reasoning/openai.py +7 -2
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +248 -94
- agno/run/base.py +44 -5
- agno/run/team.py +238 -97
- agno/run/workflow.py +144 -33
- agno/session/agent.py +105 -89
- agno/session/summary.py +65 -25
- agno/session/team.py +176 -96
- agno/session/workflow.py +406 -40
- agno/team/team.py +3854 -1610
- agno/tools/dalle.py +2 -4
- agno/tools/decorator.py +4 -2
- agno/tools/duckduckgo.py +15 -11
- agno/tools/e2b.py +14 -7
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +350 -0
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +250 -30
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +270 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/knowledge.py +3 -3
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +284 -0
- agno/tools/mem0.py +11 -17
- agno/tools/memori.py +1 -53
- agno/tools/memory.py +419 -0
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/scrapegraph.py +58 -31
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/slack.py +18 -3
- agno/tools/spider.py +2 -2
- agno/tools/tavily.py +146 -0
- agno/tools/whatsapp.py +1 -1
- agno/tools/workflow.py +278 -0
- agno/tools/yfinance.py +12 -11
- agno/utils/agent.py +820 -0
- agno/utils/audio.py +27 -0
- agno/utils/common.py +90 -1
- agno/utils/events.py +217 -2
- agno/utils/gemini.py +180 -22
- agno/utils/hooks.py +57 -0
- agno/utils/http.py +111 -0
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +188 -10
- agno/utils/merge_dict.py +22 -1
- agno/utils/message.py +60 -0
- agno/utils/models/claude.py +40 -11
- agno/utils/print_response/agent.py +105 -21
- agno/utils/print_response/team.py +103 -38
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/reasoning.py +22 -1
- agno/utils/serialize.py +32 -0
- agno/utils/streamlit.py +16 -10
- agno/utils/string.py +41 -0
- agno/utils/team.py +98 -9
- agno/utils/tools.py +1 -1
- agno/vectordb/base.py +23 -4
- agno/vectordb/cassandra/cassandra.py +65 -9
- agno/vectordb/chroma/chromadb.py +182 -38
- agno/vectordb/clickhouse/clickhousedb.py +64 -11
- agno/vectordb/couchbase/couchbase.py +105 -10
- agno/vectordb/lancedb/lance_db.py +124 -133
- agno/vectordb/langchaindb/langchaindb.py +25 -7
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/__init__.py +3 -0
- agno/vectordb/llamaindex/llamaindexdb.py +46 -7
- agno/vectordb/milvus/milvus.py +126 -9
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +112 -7
- agno/vectordb/pgvector/pgvector.py +142 -21
- agno/vectordb/pineconedb/pineconedb.py +80 -8
- agno/vectordb/qdrant/qdrant.py +125 -39
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +694 -0
- agno/vectordb/singlestore/singlestore.py +111 -25
- agno/vectordb/surrealdb/surrealdb.py +31 -5
- agno/vectordb/upstashdb/upstashdb.py +76 -8
- agno/vectordb/weaviate/weaviate.py +86 -15
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +112 -18
- agno/workflow/loop.py +69 -10
- agno/workflow/parallel.py +266 -118
- agno/workflow/router.py +110 -17
- agno/workflow/step.py +638 -129
- agno/workflow/steps.py +65 -6
- agno/workflow/types.py +61 -23
- agno/workflow/workflow.py +2085 -272
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
- agno-2.3.0.dist-info/RECORD +577 -0
- agno/knowledge/reader/url_reader.py +0 -128
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -610
- agno/utils/models/aws_claude.py +0 -170
- agno-2.0.1.dist-info/RECORD +0 -515
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
|
@@ -3,10 +3,10 @@ import time
|
|
|
3
3
|
from datetime import timedelta
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
|
+
from agno.filters import FilterExpr
|
|
6
7
|
from agno.knowledge.document import Document
|
|
7
8
|
from agno.knowledge.embedder import Embedder
|
|
8
|
-
from agno.
|
|
9
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
9
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
10
10
|
from agno.vectordb.base import VectorDb
|
|
11
11
|
|
|
12
12
|
try:
|
|
@@ -61,11 +61,13 @@ class CouchbaseSearch(VectorDb):
|
|
|
61
61
|
couchbase_connection_string: str,
|
|
62
62
|
cluster_options: ClusterOptions,
|
|
63
63
|
search_index: Union[str, SearchIndex],
|
|
64
|
-
embedder: Embedder =
|
|
64
|
+
embedder: Optional[Embedder] = None,
|
|
65
65
|
overwrite: bool = False,
|
|
66
66
|
is_global_level_index: bool = False,
|
|
67
67
|
wait_until_index_ready: float = 0,
|
|
68
68
|
batch_limit: int = 500,
|
|
69
|
+
name: Optional[str] = None,
|
|
70
|
+
description: Optional[str] = None,
|
|
69
71
|
**kwargs,
|
|
70
72
|
):
|
|
71
73
|
"""
|
|
@@ -75,6 +77,8 @@ class CouchbaseSearch(VectorDb):
|
|
|
75
77
|
bucket_name (str): Name of the Couchbase bucket.
|
|
76
78
|
scope_name (str): Name of the scope within the bucket.
|
|
77
79
|
collection_name (str): Name of the collection within the scope.
|
|
80
|
+
name (Optional[str]): Name of the vector database.
|
|
81
|
+
description (Optional[str]): Description of the vector database.
|
|
78
82
|
couchbase_connection_string (str): Couchbase connection string.
|
|
79
83
|
cluster_options (ClusterOptions): Options for configuring the Couchbase cluster connection.
|
|
80
84
|
search_index (Union[str, SearchIndex], optional): Search index configuration, either as index name or SearchIndex definition.
|
|
@@ -92,10 +96,18 @@ class CouchbaseSearch(VectorDb):
|
|
|
92
96
|
self.collection_name = collection_name
|
|
93
97
|
self.connection_string = couchbase_connection_string
|
|
94
98
|
self.cluster_options = cluster_options
|
|
99
|
+
if embedder is None:
|
|
100
|
+
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
101
|
+
|
|
102
|
+
embedder = OpenAIEmbedder()
|
|
103
|
+
log_info("Embedder not provided, using OpenAIEmbedder as default.")
|
|
95
104
|
self.embedder = embedder
|
|
96
105
|
self.overwrite = overwrite
|
|
97
106
|
self.is_global_level_index = is_global_level_index
|
|
98
107
|
self.wait_until_index_ready = wait_until_index_ready
|
|
108
|
+
# Initialize base class with name and description
|
|
109
|
+
super().__init__(name=name, description=description)
|
|
110
|
+
|
|
99
111
|
self.kwargs = kwargs
|
|
100
112
|
self.batch_limit = batch_limit
|
|
101
113
|
if isinstance(search_index, str):
|
|
@@ -451,7 +463,12 @@ class CouchbaseSearch(VectorDb):
|
|
|
451
463
|
if errors_occurred:
|
|
452
464
|
logger.warning("Some errors occurred during the upsert operation. Please check logs for details.")
|
|
453
465
|
|
|
454
|
-
def search(
|
|
466
|
+
def search(
|
|
467
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
468
|
+
) -> List[Document]:
|
|
469
|
+
if isinstance(filters, List):
|
|
470
|
+
log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
|
|
471
|
+
filters = None
|
|
455
472
|
"""Search the Couchbase bucket for documents relevant to the query."""
|
|
456
473
|
query_embedding = self.embedder.get_embedding(query)
|
|
457
474
|
if query_embedding is None:
|
|
@@ -871,8 +888,44 @@ class CouchbaseSearch(VectorDb):
|
|
|
871
888
|
async_collection_instance = await self.get_async_collection()
|
|
872
889
|
all_docs_to_insert: Dict[str, Any] = {}
|
|
873
890
|
|
|
874
|
-
|
|
875
|
-
|
|
891
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
892
|
+
# Use batch embedding when enabled and supported
|
|
893
|
+
try:
|
|
894
|
+
# Extract content from all documents
|
|
895
|
+
doc_contents = [doc.content for doc in documents]
|
|
896
|
+
|
|
897
|
+
# Get batch embeddings and usage
|
|
898
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
899
|
+
|
|
900
|
+
# Process documents with pre-computed embeddings
|
|
901
|
+
for j, doc in enumerate(documents):
|
|
902
|
+
try:
|
|
903
|
+
if j < len(embeddings):
|
|
904
|
+
doc.embedding = embeddings[j]
|
|
905
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
906
|
+
except Exception as e:
|
|
907
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
908
|
+
|
|
909
|
+
except Exception as e:
|
|
910
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
911
|
+
error_str = str(e).lower()
|
|
912
|
+
is_rate_limit = any(
|
|
913
|
+
phrase in error_str
|
|
914
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
915
|
+
)
|
|
916
|
+
|
|
917
|
+
if is_rate_limit:
|
|
918
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
919
|
+
raise e
|
|
920
|
+
else:
|
|
921
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
922
|
+
# Fall back to individual embedding
|
|
923
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
924
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
925
|
+
else:
|
|
926
|
+
# Use individual embedding
|
|
927
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
928
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
876
929
|
|
|
877
930
|
for document in documents:
|
|
878
931
|
try:
|
|
@@ -937,8 +990,44 @@ class CouchbaseSearch(VectorDb):
|
|
|
937
990
|
async_collection_instance = await self.get_async_collection()
|
|
938
991
|
all_docs_to_upsert: Dict[str, Any] = {}
|
|
939
992
|
|
|
940
|
-
|
|
941
|
-
|
|
993
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
994
|
+
# Use batch embedding when enabled and supported
|
|
995
|
+
try:
|
|
996
|
+
# Extract content from all documents
|
|
997
|
+
doc_contents = [doc.content for doc in documents]
|
|
998
|
+
|
|
999
|
+
# Get batch embeddings and usage
|
|
1000
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
1001
|
+
|
|
1002
|
+
# Process documents with pre-computed embeddings
|
|
1003
|
+
for j, doc in enumerate(documents):
|
|
1004
|
+
try:
|
|
1005
|
+
if j < len(embeddings):
|
|
1006
|
+
doc.embedding = embeddings[j]
|
|
1007
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
1008
|
+
except Exception as e:
|
|
1009
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
1010
|
+
|
|
1011
|
+
except Exception as e:
|
|
1012
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
1013
|
+
error_str = str(e).lower()
|
|
1014
|
+
is_rate_limit = any(
|
|
1015
|
+
phrase in error_str
|
|
1016
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
if is_rate_limit:
|
|
1020
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
1021
|
+
raise e
|
|
1022
|
+
else:
|
|
1023
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
1024
|
+
# Fall back to individual embedding
|
|
1025
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
1026
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
1027
|
+
else:
|
|
1028
|
+
# Use individual embedding
|
|
1029
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
1030
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
942
1031
|
|
|
943
1032
|
for document in documents:
|
|
944
1033
|
try:
|
|
@@ -989,8 +1078,11 @@ class CouchbaseSearch(VectorDb):
|
|
|
989
1078
|
logger.info(f"[async] Total successfully upserted: {total_upserted_count}, Total failed: {total_failed_count}.")
|
|
990
1079
|
|
|
991
1080
|
async def async_search(
|
|
992
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
1081
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
993
1082
|
) -> List[Document]:
|
|
1083
|
+
if isinstance(filters, List):
|
|
1084
|
+
log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
|
|
1085
|
+
filters = None
|
|
994
1086
|
query_embedding = self.embedder.get_embedding(query)
|
|
995
1087
|
if query_embedding is None:
|
|
996
1088
|
logger.error(f"[async] Failed to generate embedding for query: {query}")
|
|
@@ -1225,7 +1317,6 @@ class CouchbaseSearch(VectorDb):
|
|
|
1225
1317
|
rows = list(result.rows()) # Collect once
|
|
1226
1318
|
|
|
1227
1319
|
for row in rows:
|
|
1228
|
-
print(row)
|
|
1229
1320
|
self.collection.remove(row.get("doc_id"))
|
|
1230
1321
|
log_info(f"Deleted {len(rows)} documents with metadata {metadata}")
|
|
1231
1322
|
return True
|
|
@@ -1349,3 +1440,7 @@ class CouchbaseSearch(VectorDb):
|
|
|
1349
1440
|
except Exception as e:
|
|
1350
1441
|
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
1351
1442
|
raise
|
|
1443
|
+
|
|
1444
|
+
def get_supported_search_types(self) -> List[str]:
|
|
1445
|
+
"""Get the supported search types for this vector database."""
|
|
1446
|
+
return [] # CouchbaseSearch doesn't use SearchType enum
|
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
import json
|
|
3
3
|
from hashlib import md5
|
|
4
4
|
from os import getenv
|
|
5
|
-
from typing import Any, Dict, List, Optional
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
try:
|
|
8
8
|
import lancedb
|
|
@@ -10,10 +10,11 @@ try:
|
|
|
10
10
|
except ImportError:
|
|
11
11
|
raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
|
|
12
12
|
|
|
13
|
+
from agno.filters import FilterExpr
|
|
13
14
|
from agno.knowledge.document import Document
|
|
14
15
|
from agno.knowledge.embedder import Embedder
|
|
15
16
|
from agno.knowledge.reranker.base import Reranker
|
|
16
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
17
18
|
from agno.vectordb.base import VectorDb
|
|
18
19
|
from agno.vectordb.distance import Distance
|
|
19
20
|
from agno.vectordb.search import SearchType
|
|
@@ -25,6 +26,8 @@ class LanceDb(VectorDb):
|
|
|
25
26
|
|
|
26
27
|
Args:
|
|
27
28
|
uri: The URI of the LanceDB database.
|
|
29
|
+
name: Name of the vector database.
|
|
30
|
+
description: Description of the vector database.
|
|
28
31
|
connection: The LanceDB connection to use.
|
|
29
32
|
table: The LanceDB table instance to use.
|
|
30
33
|
async_connection: The LanceDB async connection to use.
|
|
@@ -44,6 +47,9 @@ class LanceDb(VectorDb):
|
|
|
44
47
|
def __init__(
|
|
45
48
|
self,
|
|
46
49
|
uri: lancedb.URI = "/tmp/lancedb",
|
|
50
|
+
name: Optional[str] = None,
|
|
51
|
+
description: Optional[str] = None,
|
|
52
|
+
id: Optional[str] = None,
|
|
47
53
|
connection: Optional[lancedb.LanceDBConnection] = None,
|
|
48
54
|
table: Optional[lancedb.db.LanceTable] = None,
|
|
49
55
|
async_connection: Optional[lancedb.AsyncConnection] = None,
|
|
@@ -59,6 +65,17 @@ class LanceDb(VectorDb):
|
|
|
59
65
|
on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
|
|
60
66
|
fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
|
|
61
67
|
):
|
|
68
|
+
# Dynamic ID generation based on unique identifiers
|
|
69
|
+
if id is None:
|
|
70
|
+
from agno.utils.string import generate_id
|
|
71
|
+
|
|
72
|
+
table_identifier = table_name or "default_table"
|
|
73
|
+
seed = f"{uri}#{table_identifier}"
|
|
74
|
+
id = generate_id(seed)
|
|
75
|
+
|
|
76
|
+
# Initialize base class with name, description, and generated ID
|
|
77
|
+
super().__init__(id=id, name=name, description=description)
|
|
78
|
+
|
|
62
79
|
# Embedder for embedding the document contents
|
|
63
80
|
if embedder is None:
|
|
64
81
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
@@ -142,7 +159,7 @@ class LanceDb(VectorDb):
|
|
|
142
159
|
|
|
143
160
|
def _prepare_vector(self, embedding) -> List[float]:
|
|
144
161
|
"""Prepare vector embedding for insertion, ensuring correct dimensions and type."""
|
|
145
|
-
if embedding is not None:
|
|
162
|
+
if embedding is not None and len(embedding) > 0:
|
|
146
163
|
# Convert to list of floats
|
|
147
164
|
vector = [float(x) for x in embedding]
|
|
148
165
|
|
|
@@ -160,7 +177,7 @@ class LanceDb(VectorDb):
|
|
|
160
177
|
|
|
161
178
|
return vector
|
|
162
179
|
else:
|
|
163
|
-
# Fallback if embedding is None
|
|
180
|
+
# Fallback if embedding is None or empty
|
|
164
181
|
return [0.0] * (self.dimensions or 1536)
|
|
165
182
|
|
|
166
183
|
async def _get_async_connection(self) -> lancedb.AsyncConnection:
|
|
@@ -184,7 +201,6 @@ class LanceDb(VectorDb):
|
|
|
184
201
|
# Re-establish sync connection to see async changes
|
|
185
202
|
if self.connection and self.table_name in self.connection.table_names():
|
|
186
203
|
self.table = self.connection.open_table(self.table_name)
|
|
187
|
-
log_debug(f"Refreshed sync connection for table: {self.table_name}")
|
|
188
204
|
except Exception as e:
|
|
189
205
|
log_debug(f"Could not refresh sync connection: {e}")
|
|
190
206
|
# If refresh fails, we can still function but sync methods might not see async changes
|
|
@@ -343,6 +359,9 @@ class LanceDb(VectorDb):
|
|
|
343
359
|
"""
|
|
344
360
|
Asynchronously insert documents into the database.
|
|
345
361
|
|
|
362
|
+
Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
|
|
363
|
+
synchronization issues causing empty vectors. We still do async embedding for performance.
|
|
364
|
+
|
|
346
365
|
Args:
|
|
347
366
|
documents (List[Document]): List of documents to insert
|
|
348
367
|
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
@@ -352,80 +371,36 @@ class LanceDb(VectorDb):
|
|
|
352
371
|
return
|
|
353
372
|
|
|
354
373
|
log_debug(f"Inserting {len(documents)} documents")
|
|
355
|
-
data = []
|
|
356
|
-
|
|
357
|
-
# Prepare documents for insertion.
|
|
358
|
-
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
359
|
-
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
360
|
-
|
|
361
|
-
for document in documents:
|
|
362
|
-
if await self.async_doc_exists(document):
|
|
363
|
-
continue
|
|
364
|
-
|
|
365
|
-
# Add filters to document metadata if provided
|
|
366
|
-
if filters:
|
|
367
|
-
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
368
|
-
meta_data.update(filters)
|
|
369
|
-
document.meta_data = meta_data
|
|
370
374
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
"content": cleaned_content,
|
|
377
|
-
"usage": document.usage,
|
|
378
|
-
"content_id": document.content_id,
|
|
379
|
-
"content_hash": content_hash,
|
|
380
|
-
}
|
|
381
|
-
data.append(
|
|
382
|
-
{
|
|
383
|
-
"id": doc_id,
|
|
384
|
-
"vector": self._prepare_vector(document.embedding),
|
|
385
|
-
"payload": json.dumps(payload),
|
|
386
|
-
}
|
|
387
|
-
)
|
|
388
|
-
log_debug(f"Parsed document: {document.name} ({document.meta_data})")
|
|
389
|
-
|
|
390
|
-
if not data:
|
|
391
|
-
log_debug("No new data to insert")
|
|
392
|
-
return
|
|
393
|
-
|
|
394
|
-
try:
|
|
395
|
-
await self._get_async_connection()
|
|
396
|
-
|
|
397
|
-
# Ensure the async table is created before inserting
|
|
398
|
-
if self.async_table is None:
|
|
399
|
-
try:
|
|
400
|
-
await self.async_create()
|
|
401
|
-
except Exception as create_e:
|
|
402
|
-
logger.error(f"Failed to create async table: {create_e}")
|
|
403
|
-
# Continue to fallback logic below
|
|
404
|
-
|
|
405
|
-
if self.async_table is None:
|
|
406
|
-
# Fall back to sync insertion if async table creation failed
|
|
407
|
-
logger.warning("Async table not available, falling back to sync insertion")
|
|
408
|
-
return self.insert(content_hash, documents, filters)
|
|
409
|
-
|
|
410
|
-
if self.on_bad_vectors is not None:
|
|
411
|
-
await self.async_table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value) # type: ignore
|
|
412
|
-
else:
|
|
413
|
-
await self.async_table.add(data) # type: ignore
|
|
375
|
+
# Still do async embedding for performance
|
|
376
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
377
|
+
try:
|
|
378
|
+
doc_contents = [doc.content for doc in documents]
|
|
379
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
414
380
|
|
|
415
|
-
|
|
381
|
+
for j, doc in enumerate(documents):
|
|
382
|
+
if j < len(embeddings):
|
|
383
|
+
doc.embedding = embeddings[j]
|
|
384
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
385
|
+
except Exception as e:
|
|
386
|
+
error_str = str(e).lower()
|
|
387
|
+
is_rate_limit = any(
|
|
388
|
+
phrase in error_str
|
|
389
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
390
|
+
)
|
|
391
|
+
if is_rate_limit:
|
|
392
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
393
|
+
raise e
|
|
394
|
+
else:
|
|
395
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
396
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
397
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
398
|
+
else:
|
|
399
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
400
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
416
401
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
except Exception as e:
|
|
420
|
-
logger.error(f"Error during async document insertion: {e}")
|
|
421
|
-
# Try falling back to sync insertion as a last resort
|
|
422
|
-
try:
|
|
423
|
-
logger.warning("Async insertion failed, attempting sync fallback")
|
|
424
|
-
self.insert(content_hash, documents, filters)
|
|
425
|
-
logger.info("Sync fallback successful")
|
|
426
|
-
except Exception as sync_e:
|
|
427
|
-
logger.error(f"Sync fallback also failed: {sync_e}")
|
|
428
|
-
raise e from sync_e
|
|
402
|
+
# Use sync insert to avoid sync/async table synchronization issues
|
|
403
|
+
self.insert(content_hash, documents, filters)
|
|
429
404
|
|
|
430
405
|
def upsert_available(self) -> bool:
|
|
431
406
|
"""Check if upsert is available in LanceDB."""
|
|
@@ -446,11 +421,42 @@ class LanceDb(VectorDb):
|
|
|
446
421
|
async def async_upsert(
|
|
447
422
|
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
448
423
|
) -> None:
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
424
|
+
"""
|
|
425
|
+
Asynchronously upsert documents into the database.
|
|
426
|
+
|
|
427
|
+
Note: Uses async embedding for performance, then sync upsert for reliability.
|
|
428
|
+
"""
|
|
429
|
+
if len(documents) > 0:
|
|
430
|
+
# Do async embedding for performance
|
|
431
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
432
|
+
try:
|
|
433
|
+
doc_contents = [doc.content for doc in documents]
|
|
434
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
435
|
+
for j, doc in enumerate(documents):
|
|
436
|
+
if j < len(embeddings):
|
|
437
|
+
doc.embedding = embeddings[j]
|
|
438
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
439
|
+
except Exception as e:
|
|
440
|
+
error_str = str(e).lower()
|
|
441
|
+
is_rate_limit = any(
|
|
442
|
+
phrase in error_str
|
|
443
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
444
|
+
)
|
|
445
|
+
if is_rate_limit:
|
|
446
|
+
raise e
|
|
447
|
+
else:
|
|
448
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
449
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
450
|
+
else:
|
|
451
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
452
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
453
|
+
|
|
454
|
+
# Use sync upsert for reliability
|
|
455
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
452
456
|
|
|
453
|
-
def search(
|
|
457
|
+
def search(
|
|
458
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
459
|
+
) -> List[Document]:
|
|
454
460
|
"""
|
|
455
461
|
Search for documents matching the query.
|
|
456
462
|
|
|
@@ -467,6 +473,10 @@ class LanceDb(VectorDb):
|
|
|
467
473
|
|
|
468
474
|
results = None
|
|
469
475
|
|
|
476
|
+
if isinstance(filters, list):
|
|
477
|
+
log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
|
|
478
|
+
filters = None
|
|
479
|
+
|
|
470
480
|
if self.search_type == SearchType.vector:
|
|
471
481
|
results = self.vector_search(query, limit)
|
|
472
482
|
elif self.search_type == SearchType.keyword:
|
|
@@ -508,11 +518,14 @@ class LanceDb(VectorDb):
|
|
|
508
518
|
return search_results
|
|
509
519
|
|
|
510
520
|
async def async_search(
|
|
511
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
521
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
512
522
|
) -> List[Document]:
|
|
513
523
|
"""
|
|
514
524
|
Asynchronously search for documents matching the query.
|
|
515
525
|
|
|
526
|
+
Note: Currently wraps sync search method since LanceDB async search has sync/async table
|
|
527
|
+
synchronization issues. Performance impact is minimal for search operations.
|
|
528
|
+
|
|
516
529
|
Args:
|
|
517
530
|
query (str): Query string to search for
|
|
518
531
|
limit (int): Maximum number of results to return
|
|
@@ -521,53 +534,12 @@ class LanceDb(VectorDb):
|
|
|
521
534
|
Returns:
|
|
522
535
|
List[Document]: List of matching documents
|
|
523
536
|
"""
|
|
524
|
-
#
|
|
525
|
-
|
|
526
|
-
self.table = self.connection.open_table(name=self.table_name)
|
|
527
|
-
|
|
528
|
-
results = None
|
|
529
|
-
|
|
530
|
-
if self.search_type == SearchType.vector:
|
|
531
|
-
results = self.vector_search(query, limit)
|
|
532
|
-
elif self.search_type == SearchType.keyword:
|
|
533
|
-
results = self.keyword_search(query, limit)
|
|
534
|
-
elif self.search_type == SearchType.hybrid:
|
|
535
|
-
results = self.hybrid_search(query, limit)
|
|
536
|
-
else:
|
|
537
|
-
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
538
|
-
return []
|
|
539
|
-
|
|
540
|
-
if results is None:
|
|
541
|
-
return []
|
|
542
|
-
|
|
543
|
-
search_results = self._build_search_results(results)
|
|
537
|
+
# Wrap sync search method to avoid sync/async table synchronization issues
|
|
538
|
+
return self.search(query=query, limit=limit, filters=filters)
|
|
544
539
|
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
for doc in search_results:
|
|
549
|
-
if doc.meta_data is None:
|
|
550
|
-
continue
|
|
551
|
-
|
|
552
|
-
# Check if all filter criteria match
|
|
553
|
-
match = True
|
|
554
|
-
for key, value in filters.items():
|
|
555
|
-
if key not in doc.meta_data or doc.meta_data[key] != value:
|
|
556
|
-
match = False
|
|
557
|
-
break
|
|
558
|
-
|
|
559
|
-
if match:
|
|
560
|
-
filtered_results.append(doc)
|
|
561
|
-
|
|
562
|
-
search_results = filtered_results
|
|
563
|
-
|
|
564
|
-
if self.reranker and search_results:
|
|
565
|
-
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
566
|
-
|
|
567
|
-
log_info(f"Found {len(search_results)} documents")
|
|
568
|
-
return search_results
|
|
569
|
-
|
|
570
|
-
def vector_search(self, query: str, limit: int = 5) -> List[Document]:
|
|
540
|
+
def vector_search(
|
|
541
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
542
|
+
) -> List[Document]:
|
|
571
543
|
query_embedding = self.embedder.get_embedding(query)
|
|
572
544
|
if query_embedding is None:
|
|
573
545
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -587,7 +559,9 @@ class LanceDb(VectorDb):
|
|
|
587
559
|
|
|
588
560
|
return results.to_pandas()
|
|
589
561
|
|
|
590
|
-
def hybrid_search(
|
|
562
|
+
def hybrid_search(
|
|
563
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
564
|
+
) -> List[Document]:
|
|
591
565
|
query_embedding = self.embedder.get_embedding(query)
|
|
592
566
|
if query_embedding is None:
|
|
593
567
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -616,7 +590,9 @@ class LanceDb(VectorDb):
|
|
|
616
590
|
|
|
617
591
|
return results.to_pandas()
|
|
618
592
|
|
|
619
|
-
def keyword_search(
|
|
593
|
+
def keyword_search(
|
|
594
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
595
|
+
) -> List[Document]:
|
|
620
596
|
if self.table is None:
|
|
621
597
|
logger.error("Table not initialized. Please create the table first")
|
|
622
598
|
return []
|
|
@@ -950,17 +926,28 @@ class LanceDb(VectorDb):
|
|
|
950
926
|
logger.error("Table not initialized")
|
|
951
927
|
return
|
|
952
928
|
|
|
953
|
-
#
|
|
954
|
-
|
|
955
|
-
results = self.table.search().
|
|
929
|
+
# Get all documents and filter in Python (LanceDB doesn't support JSON operators)
|
|
930
|
+
total_count = self.table.count_rows()
|
|
931
|
+
results = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
|
|
956
932
|
|
|
957
933
|
if results.empty:
|
|
934
|
+
logger.debug("No documents found")
|
|
935
|
+
return
|
|
936
|
+
|
|
937
|
+
# Find matching documents with the given content_id
|
|
938
|
+
matching_rows = []
|
|
939
|
+
for _, row in results.iterrows():
|
|
940
|
+
payload = json.loads(row["payload"])
|
|
941
|
+
if payload.get("content_id") == content_id:
|
|
942
|
+
matching_rows.append(row)
|
|
943
|
+
|
|
944
|
+
if not matching_rows:
|
|
958
945
|
logger.debug(f"No documents found with content_id: {content_id}")
|
|
959
946
|
return
|
|
960
947
|
|
|
961
948
|
# Update each matching document
|
|
962
949
|
updated_count = 0
|
|
963
|
-
for
|
|
950
|
+
for row in matching_rows:
|
|
964
951
|
row_id = row["id"]
|
|
965
952
|
current_payload = json.loads(row["payload"])
|
|
966
953
|
|
|
@@ -1002,3 +989,7 @@ class LanceDb(VectorDb):
|
|
|
1002
989
|
except Exception as e:
|
|
1003
990
|
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
1004
991
|
raise
|
|
992
|
+
|
|
993
|
+
def get_supported_search_types(self) -> List[str]:
|
|
994
|
+
"""Get the supported search types for this vector database."""
|
|
995
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
|
+
from agno.filters import FilterExpr
|
|
3
4
|
from agno.knowledge.document import Document
|
|
4
|
-
from agno.utils.log import log_debug, logger
|
|
5
|
+
from agno.utils.log import log_debug, log_warning, logger
|
|
5
6
|
from agno.vectordb.base import VectorDb
|
|
6
7
|
|
|
7
8
|
|
|
@@ -11,16 +12,23 @@ class LangChainVectorDb(VectorDb):
|
|
|
11
12
|
vectorstore: Optional[Any] = None,
|
|
12
13
|
search_kwargs: Optional[dict] = None,
|
|
13
14
|
knowledge_retriever: Optional[Any] = None,
|
|
15
|
+
name: Optional[str] = None,
|
|
16
|
+
description: Optional[str] = None,
|
|
14
17
|
):
|
|
15
18
|
"""
|
|
16
19
|
Initialize LangChainVectorDb.
|
|
17
20
|
|
|
18
21
|
Args:
|
|
19
22
|
vectorstore: The LangChain vectorstore instance
|
|
23
|
+
name (Optional[str]): Name of the vector database.
|
|
24
|
+
description (Optional[str]): Description of the vector database.
|
|
20
25
|
search_kwargs: Additional search parameters for the retriever
|
|
21
26
|
knowledge_retriever: An optional LangChain retriever instance
|
|
22
27
|
"""
|
|
23
28
|
self.vectorstore = vectorstore
|
|
29
|
+
# Initialize base class with name and description
|
|
30
|
+
super().__init__(name=name, description=description)
|
|
31
|
+
|
|
24
32
|
self.search_kwargs = search_kwargs
|
|
25
33
|
self.knowledge_retriever = knowledge_retriever
|
|
26
34
|
|
|
@@ -64,10 +72,16 @@ class LangChainVectorDb(VectorDb):
|
|
|
64
72
|
raise NotImplementedError
|
|
65
73
|
|
|
66
74
|
def search(
|
|
67
|
-
self, query: str,
|
|
75
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
68
76
|
) -> List[Document]:
|
|
69
77
|
"""Returns relevant documents matching the query"""
|
|
70
78
|
|
|
79
|
+
if isinstance(filters, List):
|
|
80
|
+
log_warning(
|
|
81
|
+
"Filter Expressions are not supported in LangChainDB. No filters will be applied. Use filters as a dictionary."
|
|
82
|
+
)
|
|
83
|
+
filters = None
|
|
84
|
+
|
|
71
85
|
try:
|
|
72
86
|
from langchain_core.documents import Document as LangChainDocument
|
|
73
87
|
from langchain_core.retrievers import BaseRetriever
|
|
@@ -79,7 +93,7 @@ class LangChainVectorDb(VectorDb):
|
|
|
79
93
|
if self.vectorstore is not None and self.knowledge_retriever is None:
|
|
80
94
|
log_debug("Creating knowledge retriever")
|
|
81
95
|
if self.search_kwargs is None:
|
|
82
|
-
self.search_kwargs = {"k":
|
|
96
|
+
self.search_kwargs = {"k": limit}
|
|
83
97
|
if filters is not None:
|
|
84
98
|
self.search_kwargs.update(filters)
|
|
85
99
|
self.knowledge_retriever = self.vectorstore.as_retriever(search_kwargs=self.search_kwargs)
|
|
@@ -91,7 +105,7 @@ class LangChainVectorDb(VectorDb):
|
|
|
91
105
|
if not isinstance(self.knowledge_retriever, BaseRetriever):
|
|
92
106
|
raise ValueError(f"Knowledge retriever is not of type BaseRetriever: {self.knowledge_retriever}")
|
|
93
107
|
|
|
94
|
-
log_debug(f"Getting {
|
|
108
|
+
log_debug(f"Getting {limit} relevant documents for query: {query}")
|
|
95
109
|
lc_documents: List[LangChainDocument] = self.knowledge_retriever.invoke(input=query)
|
|
96
110
|
documents = []
|
|
97
111
|
for lc_doc in lc_documents:
|
|
@@ -104,9 +118,9 @@ class LangChainVectorDb(VectorDb):
|
|
|
104
118
|
return documents
|
|
105
119
|
|
|
106
120
|
async def async_search(
|
|
107
|
-
self, query: str,
|
|
121
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
108
122
|
) -> List[Document]:
|
|
109
|
-
return self.search(query,
|
|
123
|
+
return self.search(query, limit, filters)
|
|
110
124
|
|
|
111
125
|
def drop(self) -> None:
|
|
112
126
|
raise NotImplementedError
|
|
@@ -143,3 +157,7 @@ class LangChainVectorDb(VectorDb):
|
|
|
143
157
|
metadata (Dict[str, Any]): The metadata to update
|
|
144
158
|
"""
|
|
145
159
|
raise NotImplementedError("update_metadata not supported for LangChain vectorstores")
|
|
160
|
+
|
|
161
|
+
def get_supported_search_types(self) -> List[str]:
|
|
162
|
+
"""Get the supported search types for this vector database."""
|
|
163
|
+
return [] # LangChainVectorDb doesn't use SearchType enum
|