agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
import json
|
|
3
3
|
from hashlib import md5
|
|
4
4
|
from os import getenv
|
|
5
|
-
from typing import Any, Dict, List, Optional
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
try:
|
|
8
8
|
import lancedb
|
|
@@ -10,10 +10,11 @@ try:
|
|
|
10
10
|
except ImportError:
|
|
11
11
|
raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
|
|
12
12
|
|
|
13
|
+
from agno.filters import FilterExpr
|
|
13
14
|
from agno.knowledge.document import Document
|
|
14
15
|
from agno.knowledge.embedder import Embedder
|
|
15
16
|
from agno.knowledge.reranker.base import Reranker
|
|
16
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
17
18
|
from agno.vectordb.base import VectorDb
|
|
18
19
|
from agno.vectordb.distance import Distance
|
|
19
20
|
from agno.vectordb.search import SearchType
|
|
@@ -25,6 +26,8 @@ class LanceDb(VectorDb):
|
|
|
25
26
|
|
|
26
27
|
Args:
|
|
27
28
|
uri: The URI of the LanceDB database.
|
|
29
|
+
name: Name of the vector database.
|
|
30
|
+
description: Description of the vector database.
|
|
28
31
|
connection: The LanceDB connection to use.
|
|
29
32
|
table: The LanceDB table instance to use.
|
|
30
33
|
async_connection: The LanceDB async connection to use.
|
|
@@ -44,6 +47,9 @@ class LanceDb(VectorDb):
|
|
|
44
47
|
def __init__(
|
|
45
48
|
self,
|
|
46
49
|
uri: lancedb.URI = "/tmp/lancedb",
|
|
50
|
+
name: Optional[str] = None,
|
|
51
|
+
description: Optional[str] = None,
|
|
52
|
+
id: Optional[str] = None,
|
|
47
53
|
connection: Optional[lancedb.LanceDBConnection] = None,
|
|
48
54
|
table: Optional[lancedb.db.LanceTable] = None,
|
|
49
55
|
async_connection: Optional[lancedb.AsyncConnection] = None,
|
|
@@ -59,6 +65,17 @@ class LanceDb(VectorDb):
|
|
|
59
65
|
on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
|
|
60
66
|
fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
|
|
61
67
|
):
|
|
68
|
+
# Dynamic ID generation based on unique identifiers
|
|
69
|
+
if id is None:
|
|
70
|
+
from agno.utils.string import generate_id
|
|
71
|
+
|
|
72
|
+
table_identifier = table_name or "default_table"
|
|
73
|
+
seed = f"{uri}#{table_identifier}"
|
|
74
|
+
id = generate_id(seed)
|
|
75
|
+
|
|
76
|
+
# Initialize base class with name, description, and generated ID
|
|
77
|
+
super().__init__(id=id, name=name, description=description)
|
|
78
|
+
|
|
62
79
|
# Embedder for embedding the document contents
|
|
63
80
|
if embedder is None:
|
|
64
81
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
@@ -142,7 +159,7 @@ class LanceDb(VectorDb):
|
|
|
142
159
|
|
|
143
160
|
def _prepare_vector(self, embedding) -> List[float]:
|
|
144
161
|
"""Prepare vector embedding for insertion, ensuring correct dimensions and type."""
|
|
145
|
-
if embedding is not None:
|
|
162
|
+
if embedding is not None and len(embedding) > 0:
|
|
146
163
|
# Convert to list of floats
|
|
147
164
|
vector = [float(x) for x in embedding]
|
|
148
165
|
|
|
@@ -160,7 +177,7 @@ class LanceDb(VectorDb):
|
|
|
160
177
|
|
|
161
178
|
return vector
|
|
162
179
|
else:
|
|
163
|
-
# Fallback if embedding is None
|
|
180
|
+
# Fallback if embedding is None or empty
|
|
164
181
|
return [0.0] * (self.dimensions or 1536)
|
|
165
182
|
|
|
166
183
|
async def _get_async_connection(self) -> lancedb.AsyncConnection:
|
|
@@ -184,7 +201,6 @@ class LanceDb(VectorDb):
|
|
|
184
201
|
# Re-establish sync connection to see async changes
|
|
185
202
|
if self.connection and self.table_name in self.connection.table_names():
|
|
186
203
|
self.table = self.connection.open_table(self.table_name)
|
|
187
|
-
log_debug(f"Refreshed sync connection for table: {self.table_name}")
|
|
188
204
|
except Exception as e:
|
|
189
205
|
log_debug(f"Could not refresh sync connection: {e}")
|
|
190
206
|
# If refresh fails, we can still function but sync methods might not see async changes
|
|
@@ -244,39 +260,6 @@ class LanceDb(VectorDb):
|
|
|
244
260
|
tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
|
|
245
261
|
return tbl # type: ignore
|
|
246
262
|
|
|
247
|
-
def doc_exists(self, document: Document) -> bool:
|
|
248
|
-
"""
|
|
249
|
-
Validating if the document exists or not
|
|
250
|
-
|
|
251
|
-
Args:
|
|
252
|
-
document (Document): Document to validate
|
|
253
|
-
"""
|
|
254
|
-
try:
|
|
255
|
-
if self.table is not None:
|
|
256
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
257
|
-
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
258
|
-
result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
|
|
259
|
-
return len(result) > 0
|
|
260
|
-
except Exception:
|
|
261
|
-
# Search sometimes fails with stale cache data, it means the doc doesn't exist
|
|
262
|
-
return False
|
|
263
|
-
|
|
264
|
-
return False
|
|
265
|
-
|
|
266
|
-
async def async_doc_exists(self, document: Document) -> bool:
|
|
267
|
-
"""
|
|
268
|
-
Asynchronously validate if the document exists
|
|
269
|
-
|
|
270
|
-
Args:
|
|
271
|
-
document (Document): Document to validate
|
|
272
|
-
|
|
273
|
-
Returns:
|
|
274
|
-
bool: True if document exists, False otherwise
|
|
275
|
-
"""
|
|
276
|
-
if self.connection:
|
|
277
|
-
self.table = self.connection.open_table(name=self.table_name)
|
|
278
|
-
return self.doc_exists(document)
|
|
279
|
-
|
|
280
263
|
def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
281
264
|
"""
|
|
282
265
|
Insert documents into the database.
|
|
@@ -293,9 +276,6 @@ class LanceDb(VectorDb):
|
|
|
293
276
|
data = []
|
|
294
277
|
|
|
295
278
|
for document in documents:
|
|
296
|
-
if self.doc_exists(document):
|
|
297
|
-
continue
|
|
298
|
-
|
|
299
279
|
# Add filters to document metadata if provided
|
|
300
280
|
if filters:
|
|
301
281
|
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
@@ -304,7 +284,9 @@ class LanceDb(VectorDb):
|
|
|
304
284
|
|
|
305
285
|
document.embed(embedder=self.embedder)
|
|
306
286
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
307
|
-
|
|
287
|
+
# Include content_hash in ID to ensure uniqueness across different content hashes
|
|
288
|
+
base_id = document.id or md5(cleaned_content.encode()).hexdigest()
|
|
289
|
+
doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
|
|
308
290
|
payload = {
|
|
309
291
|
"name": document.name,
|
|
310
292
|
"meta_data": document.meta_data,
|
|
@@ -343,6 +325,9 @@ class LanceDb(VectorDb):
|
|
|
343
325
|
"""
|
|
344
326
|
Asynchronously insert documents into the database.
|
|
345
327
|
|
|
328
|
+
Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
|
|
329
|
+
synchronization issues causing empty vectors. We still do async embedding for performance.
|
|
330
|
+
|
|
346
331
|
Args:
|
|
347
332
|
documents (List[Document]): List of documents to insert
|
|
348
333
|
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
@@ -352,115 +337,36 @@ class LanceDb(VectorDb):
|
|
|
352
337
|
return
|
|
353
338
|
|
|
354
339
|
log_debug(f"Inserting {len(documents)} documents")
|
|
355
|
-
data = []
|
|
356
340
|
|
|
341
|
+
# Still do async embedding for performance
|
|
357
342
|
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
358
|
-
# Use batch embedding when enabled and supported
|
|
359
343
|
try:
|
|
360
|
-
# Extract content from all documents
|
|
361
344
|
doc_contents = [doc.content for doc in documents]
|
|
362
|
-
|
|
363
|
-
# Get batch embeddings and usage
|
|
364
345
|
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
365
346
|
|
|
366
|
-
# Process documents with pre-computed embeddings
|
|
367
347
|
for j, doc in enumerate(documents):
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
doc.usage = usages[j] if j < len(usages) else None
|
|
372
|
-
except Exception as e:
|
|
373
|
-
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
374
|
-
|
|
348
|
+
if j < len(embeddings):
|
|
349
|
+
doc.embedding = embeddings[j]
|
|
350
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
375
351
|
except Exception as e:
|
|
376
|
-
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
377
352
|
error_str = str(e).lower()
|
|
378
353
|
is_rate_limit = any(
|
|
379
354
|
phrase in error_str
|
|
380
355
|
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
381
356
|
)
|
|
382
|
-
|
|
383
357
|
if is_rate_limit:
|
|
384
358
|
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
385
359
|
raise e
|
|
386
360
|
else:
|
|
387
361
|
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
388
|
-
# Fall back to individual embedding
|
|
389
362
|
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
390
363
|
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
391
364
|
else:
|
|
392
|
-
|
|
393
|
-
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
365
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
394
366
|
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
395
367
|
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
continue
|
|
399
|
-
|
|
400
|
-
# Add filters to document metadata if provided
|
|
401
|
-
if filters:
|
|
402
|
-
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
403
|
-
meta_data.update(filters)
|
|
404
|
-
document.meta_data = meta_data
|
|
405
|
-
|
|
406
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
407
|
-
doc_id = str(md5(cleaned_content.encode()).hexdigest())
|
|
408
|
-
payload = {
|
|
409
|
-
"name": document.name,
|
|
410
|
-
"meta_data": document.meta_data,
|
|
411
|
-
"content": cleaned_content,
|
|
412
|
-
"usage": document.usage,
|
|
413
|
-
"content_id": document.content_id,
|
|
414
|
-
"content_hash": content_hash,
|
|
415
|
-
}
|
|
416
|
-
data.append(
|
|
417
|
-
{
|
|
418
|
-
"id": doc_id,
|
|
419
|
-
"vector": self._prepare_vector(document.embedding),
|
|
420
|
-
"payload": json.dumps(payload),
|
|
421
|
-
}
|
|
422
|
-
)
|
|
423
|
-
log_debug(f"Parsed document: {document.name} ({document.meta_data})")
|
|
424
|
-
|
|
425
|
-
if not data:
|
|
426
|
-
log_debug("No new data to insert")
|
|
427
|
-
return
|
|
428
|
-
|
|
429
|
-
try:
|
|
430
|
-
await self._get_async_connection()
|
|
431
|
-
|
|
432
|
-
# Ensure the async table is created before inserting
|
|
433
|
-
if self.async_table is None:
|
|
434
|
-
try:
|
|
435
|
-
await self.async_create()
|
|
436
|
-
except Exception as create_e:
|
|
437
|
-
logger.error(f"Failed to create async table: {create_e}")
|
|
438
|
-
# Continue to fallback logic below
|
|
439
|
-
|
|
440
|
-
if self.async_table is None:
|
|
441
|
-
# Fall back to sync insertion if async table creation failed
|
|
442
|
-
logger.warning("Async table not available, falling back to sync insertion")
|
|
443
|
-
return self.insert(content_hash, documents, filters)
|
|
444
|
-
|
|
445
|
-
if self.on_bad_vectors is not None:
|
|
446
|
-
await self.async_table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value) # type: ignore
|
|
447
|
-
else:
|
|
448
|
-
await self.async_table.add(data) # type: ignore
|
|
449
|
-
|
|
450
|
-
log_debug(f"Asynchronously inserted {len(data)} documents")
|
|
451
|
-
|
|
452
|
-
# Refresh sync connection to see async changes
|
|
453
|
-
self._refresh_sync_connection()
|
|
454
|
-
except Exception as e:
|
|
455
|
-
logger.error(f"Error during async document insertion: {e}")
|
|
456
|
-
# Try falling back to sync insertion as a last resort
|
|
457
|
-
try:
|
|
458
|
-
logger.warning("Async insertion failed, attempting sync fallback")
|
|
459
|
-
self.insert(content_hash, documents, filters)
|
|
460
|
-
logger.info("Sync fallback successful")
|
|
461
|
-
except Exception as sync_e:
|
|
462
|
-
logger.error(f"Sync fallback also failed: {sync_e}")
|
|
463
|
-
raise e from sync_e
|
|
368
|
+
# Use sync insert to avoid sync/async table synchronization issues
|
|
369
|
+
self.insert(content_hash, documents, filters)
|
|
464
370
|
|
|
465
371
|
def upsert_available(self) -> bool:
|
|
466
372
|
"""Check if upsert is available in LanceDB."""
|
|
@@ -481,11 +387,42 @@ class LanceDb(VectorDb):
|
|
|
481
387
|
async def async_upsert(
|
|
482
388
|
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
483
389
|
) -> None:
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
390
|
+
"""
|
|
391
|
+
Asynchronously upsert documents into the database.
|
|
487
392
|
|
|
488
|
-
|
|
393
|
+
Note: Uses async embedding for performance, then sync upsert for reliability.
|
|
394
|
+
"""
|
|
395
|
+
if len(documents) > 0:
|
|
396
|
+
# Do async embedding for performance
|
|
397
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
398
|
+
try:
|
|
399
|
+
doc_contents = [doc.content for doc in documents]
|
|
400
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
401
|
+
for j, doc in enumerate(documents):
|
|
402
|
+
if j < len(embeddings):
|
|
403
|
+
doc.embedding = embeddings[j]
|
|
404
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
405
|
+
except Exception as e:
|
|
406
|
+
error_str = str(e).lower()
|
|
407
|
+
is_rate_limit = any(
|
|
408
|
+
phrase in error_str
|
|
409
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
410
|
+
)
|
|
411
|
+
if is_rate_limit:
|
|
412
|
+
raise e
|
|
413
|
+
else:
|
|
414
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
415
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
416
|
+
else:
|
|
417
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
418
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
419
|
+
|
|
420
|
+
# Use sync upsert for reliability
|
|
421
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
422
|
+
|
|
423
|
+
def search(
|
|
424
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
425
|
+
) -> List[Document]:
|
|
489
426
|
"""
|
|
490
427
|
Search for documents matching the query.
|
|
491
428
|
|
|
@@ -502,6 +439,10 @@ class LanceDb(VectorDb):
|
|
|
502
439
|
|
|
503
440
|
results = None
|
|
504
441
|
|
|
442
|
+
if isinstance(filters, list):
|
|
443
|
+
log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
|
|
444
|
+
filters = None
|
|
445
|
+
|
|
505
446
|
if self.search_type == SearchType.vector:
|
|
506
447
|
results = self.vector_search(query, limit)
|
|
507
448
|
elif self.search_type == SearchType.keyword:
|
|
@@ -543,11 +484,14 @@ class LanceDb(VectorDb):
|
|
|
543
484
|
return search_results
|
|
544
485
|
|
|
545
486
|
async def async_search(
|
|
546
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
487
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
547
488
|
) -> List[Document]:
|
|
548
489
|
"""
|
|
549
490
|
Asynchronously search for documents matching the query.
|
|
550
491
|
|
|
492
|
+
Note: Currently wraps sync search method since LanceDB async search has sync/async table
|
|
493
|
+
synchronization issues. Performance impact is minimal for search operations.
|
|
494
|
+
|
|
551
495
|
Args:
|
|
552
496
|
query (str): Query string to search for
|
|
553
497
|
limit (int): Maximum number of results to return
|
|
@@ -556,53 +500,12 @@ class LanceDb(VectorDb):
|
|
|
556
500
|
Returns:
|
|
557
501
|
List[Document]: List of matching documents
|
|
558
502
|
"""
|
|
559
|
-
#
|
|
560
|
-
|
|
561
|
-
self.table = self.connection.open_table(name=self.table_name)
|
|
503
|
+
# Wrap sync search method to avoid sync/async table synchronization issues
|
|
504
|
+
return self.search(query=query, limit=limit, filters=filters)
|
|
562
505
|
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
results = self.vector_search(query, limit)
|
|
567
|
-
elif self.search_type == SearchType.keyword:
|
|
568
|
-
results = self.keyword_search(query, limit)
|
|
569
|
-
elif self.search_type == SearchType.hybrid:
|
|
570
|
-
results = self.hybrid_search(query, limit)
|
|
571
|
-
else:
|
|
572
|
-
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
573
|
-
return []
|
|
574
|
-
|
|
575
|
-
if results is None:
|
|
576
|
-
return []
|
|
577
|
-
|
|
578
|
-
search_results = self._build_search_results(results)
|
|
579
|
-
|
|
580
|
-
# Filter results based on metadata if filters are provided
|
|
581
|
-
if filters and search_results:
|
|
582
|
-
filtered_results = []
|
|
583
|
-
for doc in search_results:
|
|
584
|
-
if doc.meta_data is None:
|
|
585
|
-
continue
|
|
586
|
-
|
|
587
|
-
# Check if all filter criteria match
|
|
588
|
-
match = True
|
|
589
|
-
for key, value in filters.items():
|
|
590
|
-
if key not in doc.meta_data or doc.meta_data[key] != value:
|
|
591
|
-
match = False
|
|
592
|
-
break
|
|
593
|
-
|
|
594
|
-
if match:
|
|
595
|
-
filtered_results.append(doc)
|
|
596
|
-
|
|
597
|
-
search_results = filtered_results
|
|
598
|
-
|
|
599
|
-
if self.reranker and search_results:
|
|
600
|
-
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
601
|
-
|
|
602
|
-
log_info(f"Found {len(search_results)} documents")
|
|
603
|
-
return search_results
|
|
604
|
-
|
|
605
|
-
def vector_search(self, query: str, limit: int = 5) -> List[Document]:
|
|
506
|
+
def vector_search(
|
|
507
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
508
|
+
) -> List[Document]:
|
|
606
509
|
query_embedding = self.embedder.get_embedding(query)
|
|
607
510
|
if query_embedding is None:
|
|
608
511
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -622,7 +525,9 @@ class LanceDb(VectorDb):
|
|
|
622
525
|
|
|
623
526
|
return results.to_pandas()
|
|
624
527
|
|
|
625
|
-
def hybrid_search(
|
|
528
|
+
def hybrid_search(
|
|
529
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
530
|
+
) -> List[Document]:
|
|
626
531
|
query_embedding = self.embedder.get_embedding(query)
|
|
627
532
|
if query_embedding is None:
|
|
628
533
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -651,7 +556,9 @@ class LanceDb(VectorDb):
|
|
|
651
556
|
|
|
652
557
|
return results.to_pandas()
|
|
653
558
|
|
|
654
|
-
def keyword_search(
|
|
559
|
+
def keyword_search(
|
|
560
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
561
|
+
) -> List[Document]:
|
|
655
562
|
if self.table is None:
|
|
656
563
|
logger.error("Table not initialized. Please create the table first")
|
|
657
564
|
return []
|
|
@@ -1048,3 +955,7 @@ class LanceDb(VectorDb):
|
|
|
1048
955
|
except Exception as e:
|
|
1049
956
|
logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
1050
957
|
raise
|
|
958
|
+
|
|
959
|
+
def get_supported_search_types(self) -> List[str]:
|
|
960
|
+
"""Get the supported search types for this vector database."""
|
|
961
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
|
+
from agno.filters import FilterExpr
|
|
3
4
|
from agno.knowledge.document import Document
|
|
4
|
-
from agno.utils.log import log_debug, logger
|
|
5
|
+
from agno.utils.log import log_debug, log_warning, logger
|
|
5
6
|
from agno.vectordb.base import VectorDb
|
|
6
7
|
|
|
7
8
|
|
|
@@ -11,16 +12,23 @@ class LangChainVectorDb(VectorDb):
|
|
|
11
12
|
vectorstore: Optional[Any] = None,
|
|
12
13
|
search_kwargs: Optional[dict] = None,
|
|
13
14
|
knowledge_retriever: Optional[Any] = None,
|
|
15
|
+
name: Optional[str] = None,
|
|
16
|
+
description: Optional[str] = None,
|
|
14
17
|
):
|
|
15
18
|
"""
|
|
16
19
|
Initialize LangChainVectorDb.
|
|
17
20
|
|
|
18
21
|
Args:
|
|
19
22
|
vectorstore: The LangChain vectorstore instance
|
|
23
|
+
name (Optional[str]): Name of the vector database.
|
|
24
|
+
description (Optional[str]): Description of the vector database.
|
|
20
25
|
search_kwargs: Additional search parameters for the retriever
|
|
21
26
|
knowledge_retriever: An optional LangChain retriever instance
|
|
22
27
|
"""
|
|
23
28
|
self.vectorstore = vectorstore
|
|
29
|
+
# Initialize base class with name and description
|
|
30
|
+
super().__init__(name=name, description=description)
|
|
31
|
+
|
|
24
32
|
self.search_kwargs = search_kwargs
|
|
25
33
|
self.knowledge_retriever = knowledge_retriever
|
|
26
34
|
|
|
@@ -63,9 +71,17 @@ class LangChainVectorDb(VectorDb):
|
|
|
63
71
|
logger.warning("LangChainKnowledgeBase.async_upsert() not supported - please check the vectorstore manually.")
|
|
64
72
|
raise NotImplementedError
|
|
65
73
|
|
|
66
|
-
def search(
|
|
74
|
+
def search(
|
|
75
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
76
|
+
) -> List[Document]:
|
|
67
77
|
"""Returns relevant documents matching the query"""
|
|
68
78
|
|
|
79
|
+
if isinstance(filters, List):
|
|
80
|
+
log_warning(
|
|
81
|
+
"Filter Expressions are not supported in LangChainDB. No filters will be applied. Use filters as a dictionary."
|
|
82
|
+
)
|
|
83
|
+
filters = None
|
|
84
|
+
|
|
69
85
|
try:
|
|
70
86
|
from langchain_core.documents import Document as LangChainDocument
|
|
71
87
|
from langchain_core.retrievers import BaseRetriever
|
|
@@ -102,7 +118,7 @@ class LangChainVectorDb(VectorDb):
|
|
|
102
118
|
return documents
|
|
103
119
|
|
|
104
120
|
async def async_search(
|
|
105
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
121
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
106
122
|
) -> List[Document]:
|
|
107
123
|
return self.search(query, limit, filters)
|
|
108
124
|
|
|
@@ -141,3 +157,7 @@ class LangChainVectorDb(VectorDb):
|
|
|
141
157
|
metadata (Dict[str, Any]): The metadata to update
|
|
142
158
|
"""
|
|
143
159
|
raise NotImplementedError("update_metadata not supported for LangChain vectorstores")
|
|
160
|
+
|
|
161
|
+
def get_supported_search_types(self) -> List[str]:
|
|
162
|
+
"""Get the supported search types for this vector database."""
|
|
163
|
+
return [] # LangChainVectorDb doesn't use SearchType enum
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
3
|
|
|
4
4
|
import httpx
|
|
5
5
|
|
|
6
|
+
from agno.filters import FilterExpr
|
|
6
7
|
from agno.knowledge.document import Document
|
|
7
8
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
8
9
|
from agno.vectordb.base import VectorDb
|
|
@@ -21,9 +22,14 @@ class LightRag(VectorDb):
|
|
|
21
22
|
api_key: Optional[str] = None,
|
|
22
23
|
auth_header_name: str = "X-API-KEY",
|
|
23
24
|
auth_header_format: str = "{api_key}",
|
|
25
|
+
name: Optional[str] = None,
|
|
26
|
+
description: Optional[str] = None,
|
|
24
27
|
):
|
|
25
28
|
self.server_url = server_url
|
|
26
29
|
self.api_key = api_key
|
|
30
|
+
# Initialize base class with name and description
|
|
31
|
+
super().__init__(name=name, description=description)
|
|
32
|
+
|
|
27
33
|
self.auth_header_name = auth_header_name
|
|
28
34
|
self.auth_header_format = auth_header_format
|
|
29
35
|
|
|
@@ -87,14 +93,18 @@ class LightRag(VectorDb):
|
|
|
87
93
|
"""Async upsert documents into the vector database"""
|
|
88
94
|
pass
|
|
89
95
|
|
|
90
|
-
def search(
|
|
96
|
+
def search(
|
|
97
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
98
|
+
) -> List[Document]:
|
|
91
99
|
result = asyncio.run(self.async_search(query, limit=limit, filters=filters))
|
|
92
100
|
return result if result is not None else []
|
|
93
101
|
|
|
94
102
|
async def async_search(
|
|
95
|
-
self, query: str, limit: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
|
|
103
|
+
self, query: str, limit: Optional[int] = None, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
96
104
|
) -> Optional[List[Document]]:
|
|
97
105
|
mode: str = "hybrid" # Default mode, can be "local", "global", or "hybrid"
|
|
106
|
+
if filters is not None:
|
|
107
|
+
log_warning("Filters are not supported in LightRAG. No filters will be applied.")
|
|
98
108
|
try:
|
|
99
109
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
100
110
|
response = await client.post(
|
|
@@ -372,3 +382,7 @@ class LightRag(VectorDb):
|
|
|
372
382
|
metadata (Dict[str, Any]): The metadata to update
|
|
373
383
|
"""
|
|
374
384
|
raise NotImplementedError("update_metadata not supported for LightRag - use LightRag's native methods")
|
|
385
|
+
|
|
386
|
+
def get_supported_search_types(self) -> List[str]:
|
|
387
|
+
"""Get the supported search types for this vector database."""
|
|
388
|
+
return [] # LightRag doesn't use SearchType enum
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from typing import Any, Callable, Dict, List, Optional
|
|
1
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
|
+
from agno.filters import FilterExpr
|
|
3
4
|
from agno.knowledge.document import Document
|
|
4
|
-
from agno.utils.log import logger
|
|
5
|
+
from agno.utils.log import log_warning, logger
|
|
5
6
|
from agno.vectordb.base import VectorDb
|
|
6
7
|
|
|
7
8
|
try:
|
|
@@ -17,8 +18,18 @@ class LlamaIndexVectorDb(VectorDb):
|
|
|
17
18
|
knowledge_retriever: BaseRetriever
|
|
18
19
|
loader: Optional[Callable] = None
|
|
19
20
|
|
|
20
|
-
def __init__(
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
knowledge_retriever: BaseRetriever,
|
|
24
|
+
loader: Optional[Callable] = None,
|
|
25
|
+
name: Optional[str] = None,
|
|
26
|
+
description: Optional[str] = None,
|
|
27
|
+
**kwargs,
|
|
28
|
+
):
|
|
21
29
|
super().__init__(**kwargs)
|
|
30
|
+
# Initialize base class with name and description
|
|
31
|
+
super().__init__(name=name, description=description)
|
|
32
|
+
|
|
22
33
|
self.knowledge_retriever = knowledge_retriever
|
|
23
34
|
self.loader = loader
|
|
24
35
|
|
|
@@ -58,7 +69,9 @@ class LlamaIndexVectorDb(VectorDb):
|
|
|
58
69
|
logger.warning("LlamaIndexVectorDb.async_upsert() not supported - please check the vectorstore manually.")
|
|
59
70
|
raise NotImplementedError
|
|
60
71
|
|
|
61
|
-
def search(
|
|
72
|
+
def search(
|
|
73
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
74
|
+
) -> List[Document]:
|
|
62
75
|
"""
|
|
63
76
|
Returns relevant documents matching the query.
|
|
64
77
|
|
|
@@ -72,6 +85,9 @@ class LlamaIndexVectorDb(VectorDb):
|
|
|
72
85
|
Raises:
|
|
73
86
|
ValueError: If the knowledge retriever is not of type BaseRetriever.
|
|
74
87
|
"""
|
|
88
|
+
if filters is not None:
|
|
89
|
+
log_warning("Filters are not supported in LlamaIndex. No filters will be applied.")
|
|
90
|
+
|
|
75
91
|
if not isinstance(self.knowledge_retriever, BaseRetriever):
|
|
76
92
|
raise ValueError(f"Knowledge retriever is not of type BaseRetriever: {self.knowledge_retriever}")
|
|
77
93
|
|
|
@@ -89,7 +105,7 @@ class LlamaIndexVectorDb(VectorDb):
|
|
|
89
105
|
return documents
|
|
90
106
|
|
|
91
107
|
async def async_search(
|
|
92
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
108
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
93
109
|
) -> List[Document]:
|
|
94
110
|
return self.search(query, limit, filters)
|
|
95
111
|
|
|
@@ -144,3 +160,7 @@ class LlamaIndexVectorDb(VectorDb):
|
|
|
144
160
|
"LlamaIndexVectorDb.delete_by_content_id() not supported - please check the vectorstore manually."
|
|
145
161
|
)
|
|
146
162
|
return False
|
|
163
|
+
|
|
164
|
+
def get_supported_search_types(self) -> List[str]:
|
|
165
|
+
"""Get the supported search types for this vector database."""
|
|
166
|
+
return [] # LlamaIndexVectorDb doesn't use SearchType enum
|