agno 2.2.10__py3-none-any.whl → 2.2.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +75 -48
- agno/db/dynamo/utils.py +1 -1
- agno/db/firestore/utils.py +1 -1
- agno/db/gcs_json/utils.py +1 -1
- agno/db/in_memory/utils.py +1 -1
- agno/db/json/utils.py +1 -1
- agno/db/mongo/utils.py +3 -3
- agno/db/mysql/mysql.py +1 -1
- agno/db/mysql/utils.py +1 -1
- agno/db/postgres/utils.py +1 -1
- agno/db/redis/utils.py +1 -1
- agno/db/singlestore/singlestore.py +1 -1
- agno/db/singlestore/utils.py +1 -1
- agno/db/sqlite/async_sqlite.py +1 -1
- agno/db/sqlite/sqlite.py +1 -1
- agno/db/sqlite/utils.py +1 -1
- agno/filters.py +354 -0
- agno/knowledge/chunking/agentic.py +8 -9
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/knowledge.py +43 -22
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/utils.py +20 -0
- agno/models/anthropic/claude.py +45 -9
- agno/models/base.py +4 -0
- agno/os/app.py +23 -7
- agno/os/interfaces/slack/router.py +53 -33
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/router.py +25 -1
- agno/os/routers/health.py +5 -3
- agno/os/routers/knowledge/knowledge.py +43 -17
- agno/os/routers/knowledge/schemas.py +4 -3
- agno/run/agent.py +11 -1
- agno/run/base.py +3 -2
- agno/session/agent.py +10 -5
- agno/team/team.py +57 -18
- agno/tools/file_generation.py +4 -4
- agno/tools/gmail.py +179 -0
- agno/tools/parallel.py +314 -0
- agno/utils/agent.py +22 -17
- agno/utils/gemini.py +15 -5
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/models/claude.py +2 -1
- agno/utils/print_response/agent.py +5 -4
- agno/utils/print_response/team.py +5 -4
- agno/vectordb/base.py +2 -4
- agno/vectordb/cassandra/cassandra.py +12 -5
- agno/vectordb/chroma/chromadb.py +10 -4
- agno/vectordb/clickhouse/clickhousedb.py +12 -4
- agno/vectordb/couchbase/couchbase.py +12 -3
- agno/vectordb/lancedb/lance_db.py +69 -144
- agno/vectordb/langchaindb/langchaindb.py +13 -4
- agno/vectordb/lightrag/lightrag.py +8 -3
- agno/vectordb/llamaindex/llamaindexdb.py +10 -4
- agno/vectordb/milvus/milvus.py +16 -5
- agno/vectordb/mongodb/mongodb.py +14 -3
- agno/vectordb/pgvector/pgvector.py +73 -15
- agno/vectordb/pineconedb/pineconedb.py +6 -2
- agno/vectordb/qdrant/qdrant.py +25 -13
- agno/vectordb/redis/redisdb.py +37 -30
- agno/vectordb/singlestore/singlestore.py +9 -4
- agno/vectordb/surrealdb/surrealdb.py +13 -3
- agno/vectordb/upstashdb/upstashdb.py +8 -5
- agno/vectordb/weaviate/weaviate.py +29 -12
- agno/workflow/step.py +3 -2
- agno/workflow/types.py +20 -1
- agno/workflow/workflow.py +103 -14
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/METADATA +4 -1
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/RECORD +73 -71
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/WHEEL +0 -0
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/licenses/LICENSE +0 -0
- {agno-2.2.10.dist-info → agno-2.2.12.dist-info}/top_level.txt +0 -0
|
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Set, Unio
|
|
|
2
2
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
|
|
5
|
+
from agno.filters import FilterExpr
|
|
5
6
|
from agno.media import Audio, File, Image, Video
|
|
6
7
|
from agno.models.message import Message
|
|
7
8
|
from agno.models.response import ToolExecution
|
|
@@ -33,7 +34,7 @@ def print_response(
|
|
|
33
34
|
videos: Optional[Sequence[Video]] = None,
|
|
34
35
|
files: Optional[Sequence[File]] = None,
|
|
35
36
|
markdown: bool = False,
|
|
36
|
-
knowledge_filters: Optional[Dict[str, Any]] = None,
|
|
37
|
+
knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
37
38
|
add_history_to_context: Optional[bool] = None,
|
|
38
39
|
dependencies: Optional[Dict[str, Any]] = None,
|
|
39
40
|
add_dependencies_to_context: Optional[bool] = None,
|
|
@@ -333,7 +334,7 @@ def print_response_stream(
|
|
|
333
334
|
markdown: bool = False,
|
|
334
335
|
stream_events: bool = False,
|
|
335
336
|
stream_intermediate_steps: bool = False, # type: ignore
|
|
336
|
-
knowledge_filters: Optional[Dict[str, Any]] = None,
|
|
337
|
+
knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
337
338
|
add_history_to_context: Optional[bool] = None,
|
|
338
339
|
dependencies: Optional[Dict[str, Any]] = None,
|
|
339
340
|
add_dependencies_to_context: Optional[bool] = None,
|
|
@@ -865,7 +866,7 @@ async def aprint_response(
|
|
|
865
866
|
videos: Optional[Sequence[Video]] = None,
|
|
866
867
|
files: Optional[Sequence[File]] = None,
|
|
867
868
|
markdown: bool = False,
|
|
868
|
-
knowledge_filters: Optional[Dict[str, Any]] = None,
|
|
869
|
+
knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
869
870
|
add_history_to_context: Optional[bool] = None,
|
|
870
871
|
dependencies: Optional[Dict[str, Any]] = None,
|
|
871
872
|
add_dependencies_to_context: Optional[bool] = None,
|
|
@@ -1163,7 +1164,7 @@ async def aprint_response_stream(
|
|
|
1163
1164
|
markdown: bool = False,
|
|
1164
1165
|
stream_events: bool = False,
|
|
1165
1166
|
stream_intermediate_steps: bool = False, # type: ignore
|
|
1166
|
-
knowledge_filters: Optional[Dict[str, Any]] = None,
|
|
1167
|
+
knowledge_filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
|
|
1167
1168
|
add_history_to_context: Optional[bool] = None,
|
|
1168
1169
|
dependencies: Optional[Dict[str, Any]] = None,
|
|
1169
1170
|
add_dependencies_to_context: Optional[bool] = None,
|
agno/vectordb/base.py
CHANGED
|
@@ -72,13 +72,11 @@ class VectorDb(ABC):
|
|
|
72
72
|
raise NotImplementedError
|
|
73
73
|
|
|
74
74
|
@abstractmethod
|
|
75
|
-
def search(self, query: str, limit: int = 5, filters: Optional[
|
|
75
|
+
def search(self, query: str, limit: int = 5, filters: Optional[Any] = None) -> List[Document]:
|
|
76
76
|
raise NotImplementedError
|
|
77
77
|
|
|
78
78
|
@abstractmethod
|
|
79
|
-
async def async_search(
|
|
80
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
81
|
-
) -> List[Document]:
|
|
79
|
+
async def async_search(self, query: str, limit: int = 5, filters: Optional[Any] = None) -> List[Document]:
|
|
82
80
|
raise NotImplementedError
|
|
83
81
|
|
|
84
82
|
@abstractmethod
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Dict, Iterable, List, Optional
|
|
2
|
+
from typing import Any, Dict, Iterable, List, Optional, Union
|
|
3
3
|
|
|
4
|
+
from agno.filters import FilterExpr
|
|
4
5
|
from agno.knowledge.document import Document
|
|
5
6
|
from agno.knowledge.embedder import Embedder
|
|
6
|
-
from agno.utils.log import log_debug, log_error, log_info
|
|
7
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
7
8
|
from agno.vectordb.base import VectorDb
|
|
8
9
|
from agno.vectordb.cassandra.index import AgnoMetadataVectorCassandraTable
|
|
9
10
|
|
|
@@ -204,13 +205,17 @@ class Cassandra(VectorDb):
|
|
|
204
205
|
self.delete_by_content_hash(content_hash)
|
|
205
206
|
await self.async_insert(content_hash, documents, filters)
|
|
206
207
|
|
|
207
|
-
def search(
|
|
208
|
+
def search(
|
|
209
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
210
|
+
) -> List[Document]:
|
|
208
211
|
"""Keyword-based search on document metadata."""
|
|
209
212
|
log_debug(f"Cassandra VectorDB : Performing Vector Search on {self.table_name} with query {query}")
|
|
213
|
+
if filters is not None:
|
|
214
|
+
log_warning("Filters are not yet supported in Cassandra. No filters will be applied.")
|
|
210
215
|
return self.vector_search(query=query, limit=limit)
|
|
211
216
|
|
|
212
217
|
async def async_search(
|
|
213
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
218
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
214
219
|
) -> List[Document]:
|
|
215
220
|
"""Search asynchronously by running in a thread."""
|
|
216
221
|
return await asyncio.to_thread(self.search, query, limit, filters)
|
|
@@ -221,7 +226,9 @@ class Cassandra(VectorDb):
|
|
|
221
226
|
) -> List[Document]:
|
|
222
227
|
return [self._row_to_document(row=hit) for hit in hits]
|
|
223
228
|
|
|
224
|
-
def vector_search(
|
|
229
|
+
def vector_search(
|
|
230
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
231
|
+
) -> List[Document]:
|
|
225
232
|
"""Vector similarity search implementation."""
|
|
226
233
|
query_embedding = self.embedder.get_embedding(query)
|
|
227
234
|
hits = list(
|
agno/vectordb/chroma/chromadb.py
CHANGED
|
@@ -13,10 +13,11 @@ try:
|
|
|
13
13
|
except ImportError:
|
|
14
14
|
raise ImportError("The `chromadb` package is not installed. Please install it via `pip install chromadb`.")
|
|
15
15
|
|
|
16
|
+
from agno.filters import FilterExpr
|
|
16
17
|
from agno.knowledge.document import Document
|
|
17
18
|
from agno.knowledge.embedder import Embedder
|
|
18
19
|
from agno.knowledge.reranker.base import Reranker
|
|
19
|
-
from agno.utils.log import log_debug, log_error, log_info, logger
|
|
20
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
|
|
20
21
|
from agno.vectordb.base import VectorDb
|
|
21
22
|
from agno.vectordb.distance import Distance
|
|
22
23
|
|
|
@@ -477,13 +478,15 @@ class ChromaDb(VectorDb):
|
|
|
477
478
|
logger.error(f"Error upserting documents by content hash: {e}")
|
|
478
479
|
raise
|
|
479
480
|
|
|
480
|
-
def search(
|
|
481
|
+
def search(
|
|
482
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
483
|
+
) -> List[Document]:
|
|
481
484
|
"""Search the collection for a query.
|
|
482
485
|
|
|
483
486
|
Args:
|
|
484
487
|
query (str): Query to search for.
|
|
485
488
|
limit (int): Number of results to return.
|
|
486
|
-
filters (Optional[Dict[str, Any]]): Filters to apply while searching.
|
|
489
|
+
filters (Optional[Union[Dict[str, Any], List[FilterExpr]]]): Filters to apply while searching.
|
|
487
490
|
Supports ChromaDB's filtering operators:
|
|
488
491
|
- $eq, $ne: Equality/Inequality
|
|
489
492
|
- $gt, $gte, $lt, $lte: Numeric comparisons
|
|
@@ -492,6 +495,9 @@ class ChromaDb(VectorDb):
|
|
|
492
495
|
Returns:
|
|
493
496
|
List[Document]: List of search results.
|
|
494
497
|
"""
|
|
498
|
+
if isinstance(filters, list):
|
|
499
|
+
log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
|
|
500
|
+
filters = None
|
|
495
501
|
query_embedding = self.embedder.get_embedding(query)
|
|
496
502
|
if query_embedding is None:
|
|
497
503
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -606,7 +612,7 @@ class ChromaDb(VectorDb):
|
|
|
606
612
|
return converted
|
|
607
613
|
|
|
608
614
|
async def async_search(
|
|
609
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
615
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
610
616
|
) -> List[Document]:
|
|
611
617
|
"""Search asynchronously by running in a thread."""
|
|
612
618
|
return await asyncio.to_thread(self.search, query, limit, filters)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
from hashlib import md5
|
|
3
|
-
from typing import Any, Dict, List, Optional
|
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
|
4
4
|
|
|
5
5
|
from agno.vectordb.clickhouse.index import HNSW
|
|
6
6
|
|
|
@@ -11,9 +11,10 @@ try:
|
|
|
11
11
|
except ImportError:
|
|
12
12
|
raise ImportError("`clickhouse-connect` not installed. Use `pip install clickhouse-connect` to install it")
|
|
13
13
|
|
|
14
|
+
from agno.filters import FilterExpr
|
|
14
15
|
from agno.knowledge.document import Document
|
|
15
16
|
from agno.knowledge.embedder import Embedder
|
|
16
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
17
18
|
from agno.vectordb.base import VectorDb
|
|
18
19
|
from agno.vectordb.distance import Distance
|
|
19
20
|
|
|
@@ -448,7 +449,11 @@ class Clickhouse(VectorDb):
|
|
|
448
449
|
parameters=parameters,
|
|
449
450
|
)
|
|
450
451
|
|
|
451
|
-
def search(
|
|
452
|
+
def search(
|
|
453
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
454
|
+
) -> List[Document]:
|
|
455
|
+
if filters is not None:
|
|
456
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
452
457
|
query_embedding = self.embedder.get_embedding(query)
|
|
453
458
|
if query_embedding is None:
|
|
454
459
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -502,11 +507,14 @@ class Clickhouse(VectorDb):
|
|
|
502
507
|
return search_results
|
|
503
508
|
|
|
504
509
|
async def async_search(
|
|
505
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
510
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
506
511
|
) -> List[Document]:
|
|
507
512
|
"""Search for documents asynchronously."""
|
|
508
513
|
async_client = await self._ensure_async_client()
|
|
509
514
|
|
|
515
|
+
if filters is not None:
|
|
516
|
+
log_warning("Filters are not yet supported in Clickhouse. No filters will be applied.")
|
|
517
|
+
|
|
510
518
|
query_embedding = self.embedder.get_embedding(query)
|
|
511
519
|
if query_embedding is None:
|
|
512
520
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -3,10 +3,11 @@ import time
|
|
|
3
3
|
from datetime import timedelta
|
|
4
4
|
from typing import Any, Dict, List, Optional, Union
|
|
5
5
|
|
|
6
|
+
from agno.filters import FilterExpr
|
|
6
7
|
from agno.knowledge.document import Document
|
|
7
8
|
from agno.knowledge.embedder import Embedder
|
|
8
9
|
from agno.knowledge.embedder.openai import OpenAIEmbedder
|
|
9
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
10
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
10
11
|
from agno.vectordb.base import VectorDb
|
|
11
12
|
|
|
12
13
|
try:
|
|
@@ -458,7 +459,12 @@ class CouchbaseSearch(VectorDb):
|
|
|
458
459
|
if errors_occurred:
|
|
459
460
|
logger.warning("Some errors occurred during the upsert operation. Please check logs for details.")
|
|
460
461
|
|
|
461
|
-
def search(
|
|
462
|
+
def search(
|
|
463
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
464
|
+
) -> List[Document]:
|
|
465
|
+
if isinstance(filters, List):
|
|
466
|
+
log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
|
|
467
|
+
filters = None
|
|
462
468
|
"""Search the Couchbase bucket for documents relevant to the query."""
|
|
463
469
|
query_embedding = self.embedder.get_embedding(query)
|
|
464
470
|
if query_embedding is None:
|
|
@@ -1068,8 +1074,11 @@ class CouchbaseSearch(VectorDb):
|
|
|
1068
1074
|
logger.info(f"[async] Total successfully upserted: {total_upserted_count}, Total failed: {total_failed_count}.")
|
|
1069
1075
|
|
|
1070
1076
|
async def async_search(
|
|
1071
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
1077
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
1072
1078
|
) -> List[Document]:
|
|
1079
|
+
if isinstance(filters, List):
|
|
1080
|
+
log_warning("Filter Expressions are not yet supported in Couchbase. No filters will be applied.")
|
|
1081
|
+
filters = None
|
|
1073
1082
|
query_embedding = self.embedder.get_embedding(query)
|
|
1074
1083
|
if query_embedding is None:
|
|
1075
1084
|
logger.error(f"[async] Failed to generate embedding for query: {query}")
|
|
@@ -2,7 +2,7 @@ import asyncio
|
|
|
2
2
|
import json
|
|
3
3
|
from hashlib import md5
|
|
4
4
|
from os import getenv
|
|
5
|
-
from typing import Any, Dict, List, Optional
|
|
5
|
+
from typing import Any, Dict, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
try:
|
|
8
8
|
import lancedb
|
|
@@ -10,10 +10,11 @@ try:
|
|
|
10
10
|
except ImportError:
|
|
11
11
|
raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
|
|
12
12
|
|
|
13
|
+
from agno.filters import FilterExpr
|
|
13
14
|
from agno.knowledge.document import Document
|
|
14
15
|
from agno.knowledge.embedder import Embedder
|
|
15
16
|
from agno.knowledge.reranker.base import Reranker
|
|
16
|
-
from agno.utils.log import log_debug, log_info, logger
|
|
17
|
+
from agno.utils.log import log_debug, log_info, log_warning, logger
|
|
17
18
|
from agno.vectordb.base import VectorDb
|
|
18
19
|
from agno.vectordb.distance import Distance
|
|
19
20
|
from agno.vectordb.search import SearchType
|
|
@@ -158,7 +159,7 @@ class LanceDb(VectorDb):
|
|
|
158
159
|
|
|
159
160
|
def _prepare_vector(self, embedding) -> List[float]:
|
|
160
161
|
"""Prepare vector embedding for insertion, ensuring correct dimensions and type."""
|
|
161
|
-
if embedding is not None:
|
|
162
|
+
if embedding is not None and len(embedding) > 0:
|
|
162
163
|
# Convert to list of floats
|
|
163
164
|
vector = [float(x) for x in embedding]
|
|
164
165
|
|
|
@@ -176,7 +177,7 @@ class LanceDb(VectorDb):
|
|
|
176
177
|
|
|
177
178
|
return vector
|
|
178
179
|
else:
|
|
179
|
-
# Fallback if embedding is None
|
|
180
|
+
# Fallback if embedding is None or empty
|
|
180
181
|
return [0.0] * (self.dimensions or 1536)
|
|
181
182
|
|
|
182
183
|
async def _get_async_connection(self) -> lancedb.AsyncConnection:
|
|
@@ -200,7 +201,6 @@ class LanceDb(VectorDb):
|
|
|
200
201
|
# Re-establish sync connection to see async changes
|
|
201
202
|
if self.connection and self.table_name in self.connection.table_names():
|
|
202
203
|
self.table = self.connection.open_table(self.table_name)
|
|
203
|
-
log_debug(f"Refreshed sync connection for table: {self.table_name}")
|
|
204
204
|
except Exception as e:
|
|
205
205
|
log_debug(f"Could not refresh sync connection: {e}")
|
|
206
206
|
# If refresh fails, we can still function but sync methods might not see async changes
|
|
@@ -359,6 +359,9 @@ class LanceDb(VectorDb):
|
|
|
359
359
|
"""
|
|
360
360
|
Asynchronously insert documents into the database.
|
|
361
361
|
|
|
362
|
+
Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
|
|
363
|
+
synchronization issues causing empty vectors. We still do async embedding for performance.
|
|
364
|
+
|
|
362
365
|
Args:
|
|
363
366
|
documents (List[Document]): List of documents to insert
|
|
364
367
|
filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
|
|
@@ -368,115 +371,36 @@ class LanceDb(VectorDb):
|
|
|
368
371
|
return
|
|
369
372
|
|
|
370
373
|
log_debug(f"Inserting {len(documents)} documents")
|
|
371
|
-
data = []
|
|
372
374
|
|
|
375
|
+
# Still do async embedding for performance
|
|
373
376
|
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
374
|
-
# Use batch embedding when enabled and supported
|
|
375
377
|
try:
|
|
376
|
-
# Extract content from all documents
|
|
377
378
|
doc_contents = [doc.content for doc in documents]
|
|
378
|
-
|
|
379
|
-
# Get batch embeddings and usage
|
|
380
379
|
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
381
380
|
|
|
382
|
-
# Process documents with pre-computed embeddings
|
|
383
381
|
for j, doc in enumerate(documents):
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
doc.usage = usages[j] if j < len(usages) else None
|
|
388
|
-
except Exception as e:
|
|
389
|
-
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
390
|
-
|
|
382
|
+
if j < len(embeddings):
|
|
383
|
+
doc.embedding = embeddings[j]
|
|
384
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
391
385
|
except Exception as e:
|
|
392
|
-
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
393
386
|
error_str = str(e).lower()
|
|
394
387
|
is_rate_limit = any(
|
|
395
388
|
phrase in error_str
|
|
396
389
|
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
397
390
|
)
|
|
398
|
-
|
|
399
391
|
if is_rate_limit:
|
|
400
392
|
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
401
393
|
raise e
|
|
402
394
|
else:
|
|
403
395
|
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
404
|
-
# Fall back to individual embedding
|
|
405
396
|
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
406
397
|
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
407
398
|
else:
|
|
408
|
-
|
|
409
|
-
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
399
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
410
400
|
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
411
401
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
continue
|
|
415
|
-
|
|
416
|
-
# Add filters to document metadata if provided
|
|
417
|
-
if filters:
|
|
418
|
-
meta_data = document.meta_data.copy() if document.meta_data else {}
|
|
419
|
-
meta_data.update(filters)
|
|
420
|
-
document.meta_data = meta_data
|
|
421
|
-
|
|
422
|
-
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
423
|
-
doc_id = str(md5(cleaned_content.encode()).hexdigest())
|
|
424
|
-
payload = {
|
|
425
|
-
"name": document.name,
|
|
426
|
-
"meta_data": document.meta_data,
|
|
427
|
-
"content": cleaned_content,
|
|
428
|
-
"usage": document.usage,
|
|
429
|
-
"content_id": document.content_id,
|
|
430
|
-
"content_hash": content_hash,
|
|
431
|
-
}
|
|
432
|
-
data.append(
|
|
433
|
-
{
|
|
434
|
-
"id": doc_id,
|
|
435
|
-
"vector": self._prepare_vector(document.embedding),
|
|
436
|
-
"payload": json.dumps(payload),
|
|
437
|
-
}
|
|
438
|
-
)
|
|
439
|
-
log_debug(f"Parsed document: {document.name} ({document.meta_data})")
|
|
440
|
-
|
|
441
|
-
if not data:
|
|
442
|
-
log_debug("No new data to insert")
|
|
443
|
-
return
|
|
444
|
-
|
|
445
|
-
try:
|
|
446
|
-
await self._get_async_connection()
|
|
447
|
-
|
|
448
|
-
# Ensure the async table is created before inserting
|
|
449
|
-
if self.async_table is None:
|
|
450
|
-
try:
|
|
451
|
-
await self.async_create()
|
|
452
|
-
except Exception as create_e:
|
|
453
|
-
logger.error(f"Failed to create async table: {create_e}")
|
|
454
|
-
# Continue to fallback logic below
|
|
455
|
-
|
|
456
|
-
if self.async_table is None:
|
|
457
|
-
# Fall back to sync insertion if async table creation failed
|
|
458
|
-
logger.warning("Async table not available, falling back to sync insertion")
|
|
459
|
-
return self.insert(content_hash, documents, filters)
|
|
460
|
-
|
|
461
|
-
if self.on_bad_vectors is not None:
|
|
462
|
-
await self.async_table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value) # type: ignore
|
|
463
|
-
else:
|
|
464
|
-
await self.async_table.add(data) # type: ignore
|
|
465
|
-
|
|
466
|
-
log_debug(f"Asynchronously inserted {len(data)} documents")
|
|
467
|
-
|
|
468
|
-
# Refresh sync connection to see async changes
|
|
469
|
-
self._refresh_sync_connection()
|
|
470
|
-
except Exception as e:
|
|
471
|
-
logger.error(f"Error during async document insertion: {e}")
|
|
472
|
-
# Try falling back to sync insertion as a last resort
|
|
473
|
-
try:
|
|
474
|
-
logger.warning("Async insertion failed, attempting sync fallback")
|
|
475
|
-
self.insert(content_hash, documents, filters)
|
|
476
|
-
logger.info("Sync fallback successful")
|
|
477
|
-
except Exception as sync_e:
|
|
478
|
-
logger.error(f"Sync fallback also failed: {sync_e}")
|
|
479
|
-
raise e from sync_e
|
|
402
|
+
# Use sync insert to avoid sync/async table synchronization issues
|
|
403
|
+
self.insert(content_hash, documents, filters)
|
|
480
404
|
|
|
481
405
|
def upsert_available(self) -> bool:
|
|
482
406
|
"""Check if upsert is available in LanceDB."""
|
|
@@ -497,11 +421,42 @@ class LanceDb(VectorDb):
|
|
|
497
421
|
async def async_upsert(
|
|
498
422
|
self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
|
|
499
423
|
) -> None:
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
await self.async_insert(content_hash=content_hash, documents=documents, filters=filters)
|
|
424
|
+
"""
|
|
425
|
+
Asynchronously upsert documents into the database.
|
|
503
426
|
|
|
504
|
-
|
|
427
|
+
Note: Uses async embedding for performance, then sync upsert for reliability.
|
|
428
|
+
"""
|
|
429
|
+
if len(documents) > 0:
|
|
430
|
+
# Do async embedding for performance
|
|
431
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
432
|
+
try:
|
|
433
|
+
doc_contents = [doc.content for doc in documents]
|
|
434
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
435
|
+
for j, doc in enumerate(documents):
|
|
436
|
+
if j < len(embeddings):
|
|
437
|
+
doc.embedding = embeddings[j]
|
|
438
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
439
|
+
except Exception as e:
|
|
440
|
+
error_str = str(e).lower()
|
|
441
|
+
is_rate_limit = any(
|
|
442
|
+
phrase in error_str
|
|
443
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
444
|
+
)
|
|
445
|
+
if is_rate_limit:
|
|
446
|
+
raise e
|
|
447
|
+
else:
|
|
448
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
449
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
450
|
+
else:
|
|
451
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
452
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
453
|
+
|
|
454
|
+
# Use sync upsert for reliability
|
|
455
|
+
self.upsert(content_hash=content_hash, documents=documents, filters=filters)
|
|
456
|
+
|
|
457
|
+
def search(
|
|
458
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
459
|
+
) -> List[Document]:
|
|
505
460
|
"""
|
|
506
461
|
Search for documents matching the query.
|
|
507
462
|
|
|
@@ -518,6 +473,10 @@ class LanceDb(VectorDb):
|
|
|
518
473
|
|
|
519
474
|
results = None
|
|
520
475
|
|
|
476
|
+
if isinstance(filters, list):
|
|
477
|
+
log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
|
|
478
|
+
filters = None
|
|
479
|
+
|
|
521
480
|
if self.search_type == SearchType.vector:
|
|
522
481
|
results = self.vector_search(query, limit)
|
|
523
482
|
elif self.search_type == SearchType.keyword:
|
|
@@ -559,11 +518,14 @@ class LanceDb(VectorDb):
|
|
|
559
518
|
return search_results
|
|
560
519
|
|
|
561
520
|
async def async_search(
|
|
562
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
521
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
563
522
|
) -> List[Document]:
|
|
564
523
|
"""
|
|
565
524
|
Asynchronously search for documents matching the query.
|
|
566
525
|
|
|
526
|
+
Note: Currently wraps sync search method since LanceDB async search has sync/async table
|
|
527
|
+
synchronization issues. Performance impact is minimal for search operations.
|
|
528
|
+
|
|
567
529
|
Args:
|
|
568
530
|
query (str): Query string to search for
|
|
569
531
|
limit (int): Maximum number of results to return
|
|
@@ -572,53 +534,12 @@ class LanceDb(VectorDb):
|
|
|
572
534
|
Returns:
|
|
573
535
|
List[Document]: List of matching documents
|
|
574
536
|
"""
|
|
575
|
-
#
|
|
576
|
-
|
|
577
|
-
self.table = self.connection.open_table(name=self.table_name)
|
|
578
|
-
|
|
579
|
-
results = None
|
|
580
|
-
|
|
581
|
-
if self.search_type == SearchType.vector:
|
|
582
|
-
results = self.vector_search(query, limit)
|
|
583
|
-
elif self.search_type == SearchType.keyword:
|
|
584
|
-
results = self.keyword_search(query, limit)
|
|
585
|
-
elif self.search_type == SearchType.hybrid:
|
|
586
|
-
results = self.hybrid_search(query, limit)
|
|
587
|
-
else:
|
|
588
|
-
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
589
|
-
return []
|
|
590
|
-
|
|
591
|
-
if results is None:
|
|
592
|
-
return []
|
|
593
|
-
|
|
594
|
-
search_results = self._build_search_results(results)
|
|
537
|
+
# Wrap sync search method to avoid sync/async table synchronization issues
|
|
538
|
+
return self.search(query=query, limit=limit, filters=filters)
|
|
595
539
|
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
for doc in search_results:
|
|
600
|
-
if doc.meta_data is None:
|
|
601
|
-
continue
|
|
602
|
-
|
|
603
|
-
# Check if all filter criteria match
|
|
604
|
-
match = True
|
|
605
|
-
for key, value in filters.items():
|
|
606
|
-
if key not in doc.meta_data or doc.meta_data[key] != value:
|
|
607
|
-
match = False
|
|
608
|
-
break
|
|
609
|
-
|
|
610
|
-
if match:
|
|
611
|
-
filtered_results.append(doc)
|
|
612
|
-
|
|
613
|
-
search_results = filtered_results
|
|
614
|
-
|
|
615
|
-
if self.reranker and search_results:
|
|
616
|
-
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
617
|
-
|
|
618
|
-
log_info(f"Found {len(search_results)} documents")
|
|
619
|
-
return search_results
|
|
620
|
-
|
|
621
|
-
def vector_search(self, query: str, limit: int = 5) -> List[Document]:
|
|
540
|
+
def vector_search(
|
|
541
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
542
|
+
) -> List[Document]:
|
|
622
543
|
query_embedding = self.embedder.get_embedding(query)
|
|
623
544
|
if query_embedding is None:
|
|
624
545
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -638,7 +559,9 @@ class LanceDb(VectorDb):
|
|
|
638
559
|
|
|
639
560
|
return results.to_pandas()
|
|
640
561
|
|
|
641
|
-
def hybrid_search(
|
|
562
|
+
def hybrid_search(
|
|
563
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
564
|
+
) -> List[Document]:
|
|
642
565
|
query_embedding = self.embedder.get_embedding(query)
|
|
643
566
|
if query_embedding is None:
|
|
644
567
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -667,7 +590,9 @@ class LanceDb(VectorDb):
|
|
|
667
590
|
|
|
668
591
|
return results.to_pandas()
|
|
669
592
|
|
|
670
|
-
def keyword_search(
|
|
593
|
+
def keyword_search(
|
|
594
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
595
|
+
) -> List[Document]:
|
|
671
596
|
if self.table is None:
|
|
672
597
|
logger.error("Table not initialized. Please create the table first")
|
|
673
598
|
return []
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
|
+
from agno.filters import FilterExpr
|
|
3
4
|
from agno.knowledge.document import Document
|
|
4
|
-
from agno.utils.log import log_debug, logger
|
|
5
|
+
from agno.utils.log import log_debug, log_warning, logger
|
|
5
6
|
from agno.vectordb.base import VectorDb
|
|
6
7
|
|
|
7
8
|
|
|
@@ -70,9 +71,17 @@ class LangChainVectorDb(VectorDb):
|
|
|
70
71
|
logger.warning("LangChainKnowledgeBase.async_upsert() not supported - please check the vectorstore manually.")
|
|
71
72
|
raise NotImplementedError
|
|
72
73
|
|
|
73
|
-
def search(
|
|
74
|
+
def search(
|
|
75
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
76
|
+
) -> List[Document]:
|
|
74
77
|
"""Returns relevant documents matching the query"""
|
|
75
78
|
|
|
79
|
+
if isinstance(filters, List):
|
|
80
|
+
log_warning(
|
|
81
|
+
"Filter Expressions are not supported in LangChainDB. No filters will be applied. Use filters as a dictionary."
|
|
82
|
+
)
|
|
83
|
+
filters = None
|
|
84
|
+
|
|
76
85
|
try:
|
|
77
86
|
from langchain_core.documents import Document as LangChainDocument
|
|
78
87
|
from langchain_core.retrievers import BaseRetriever
|
|
@@ -109,7 +118,7 @@ class LangChainVectorDb(VectorDb):
|
|
|
109
118
|
return documents
|
|
110
119
|
|
|
111
120
|
async def async_search(
|
|
112
|
-
self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None
|
|
121
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
113
122
|
) -> List[Document]:
|
|
114
123
|
return self.search(query, limit, filters)
|
|
115
124
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
-
from typing import Any, Dict, List, Optional
|
|
2
|
+
from typing import Any, Dict, List, Optional, Union
|
|
3
3
|
|
|
4
4
|
import httpx
|
|
5
5
|
|
|
6
|
+
from agno.filters import FilterExpr
|
|
6
7
|
from agno.knowledge.document import Document
|
|
7
8
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
8
9
|
from agno.vectordb.base import VectorDb
|
|
@@ -92,14 +93,18 @@ class LightRag(VectorDb):
|
|
|
92
93
|
"""Async upsert documents into the vector database"""
|
|
93
94
|
pass
|
|
94
95
|
|
|
95
|
-
def search(
|
|
96
|
+
def search(
|
|
97
|
+
self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
98
|
+
) -> List[Document]:
|
|
96
99
|
result = asyncio.run(self.async_search(query, limit=limit, filters=filters))
|
|
97
100
|
return result if result is not None else []
|
|
98
101
|
|
|
99
102
|
async def async_search(
|
|
100
|
-
self, query: str, limit: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
|
|
103
|
+
self, query: str, limit: Optional[int] = None, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
|
|
101
104
|
) -> Optional[List[Document]]:
|
|
102
105
|
mode: str = "hybrid" # Default mode, can be "local", "global", or "hybrid"
|
|
106
|
+
if filters is not None:
|
|
107
|
+
log_warning("Filters are not supported in LightRAG. No filters will be applied.")
|
|
103
108
|
try:
|
|
104
109
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
105
110
|
response = await client.post(
|