agno 2.0.11__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +607 -176
- agno/db/in_memory/in_memory_db.py +42 -29
- agno/db/mongo/mongo.py +65 -66
- agno/db/postgres/postgres.py +6 -4
- agno/db/utils.py +50 -22
- agno/exceptions.py +62 -1
- agno/guardrails/__init__.py +6 -0
- agno/guardrails/base.py +19 -0
- agno/guardrails/openai.py +144 -0
- agno/guardrails/pii.py +94 -0
- agno/guardrails/prompt_injection.py +51 -0
- agno/knowledge/embedder/aws_bedrock.py +9 -4
- agno/knowledge/embedder/azure_openai.py +54 -0
- agno/knowledge/embedder/base.py +2 -0
- agno/knowledge/embedder/cohere.py +184 -5
- agno/knowledge/embedder/google.py +79 -1
- agno/knowledge/embedder/huggingface.py +9 -4
- agno/knowledge/embedder/jina.py +63 -0
- agno/knowledge/embedder/mistral.py +78 -11
- agno/knowledge/embedder/ollama.py +5 -0
- agno/knowledge/embedder/openai.py +18 -54
- agno/knowledge/embedder/voyageai.py +69 -16
- agno/knowledge/knowledge.py +11 -4
- agno/knowledge/reader/pdf_reader.py +4 -3
- agno/knowledge/reader/website_reader.py +3 -2
- agno/models/base.py +125 -32
- agno/models/cerebras/cerebras.py +1 -0
- agno/models/cerebras/cerebras_openai.py +1 -0
- agno/models/dashscope/dashscope.py +1 -0
- agno/models/google/gemini.py +27 -5
- agno/models/openai/chat.py +13 -4
- agno/models/openai/responses.py +1 -1
- agno/models/perplexity/perplexity.py +2 -3
- agno/models/requesty/__init__.py +5 -0
- agno/models/requesty/requesty.py +49 -0
- agno/models/vllm/vllm.py +1 -0
- agno/models/xai/xai.py +1 -0
- agno/os/app.py +98 -126
- agno/os/interfaces/__init__.py +1 -0
- agno/os/interfaces/agui/agui.py +21 -5
- agno/os/interfaces/base.py +4 -2
- agno/os/interfaces/slack/slack.py +13 -8
- agno/os/interfaces/whatsapp/router.py +2 -0
- agno/os/interfaces/whatsapp/whatsapp.py +12 -5
- agno/os/mcp.py +2 -2
- agno/os/middleware/__init__.py +7 -0
- agno/os/middleware/jwt.py +233 -0
- agno/os/router.py +182 -46
- agno/os/routers/home.py +2 -2
- agno/os/routers/memory/memory.py +23 -1
- agno/os/routers/memory/schemas.py +1 -1
- agno/os/routers/session/session.py +20 -3
- agno/os/utils.py +74 -8
- agno/run/agent.py +120 -77
- agno/run/base.py +2 -13
- agno/run/team.py +115 -72
- agno/run/workflow.py +5 -15
- agno/session/summary.py +9 -10
- agno/session/team.py +2 -1
- agno/team/team.py +721 -169
- agno/tools/firecrawl.py +4 -4
- agno/tools/function.py +42 -2
- agno/tools/knowledge.py +3 -3
- agno/tools/searxng.py +2 -2
- agno/tools/serper.py +2 -2
- agno/tools/spider.py +2 -2
- agno/tools/workflow.py +4 -5
- agno/utils/events.py +66 -1
- agno/utils/hooks.py +57 -0
- agno/utils/media.py +11 -9
- agno/utils/print_response/agent.py +43 -5
- agno/utils/print_response/team.py +48 -12
- agno/utils/serialize.py +32 -0
- agno/vectordb/cassandra/cassandra.py +44 -4
- agno/vectordb/chroma/chromadb.py +79 -8
- agno/vectordb/clickhouse/clickhousedb.py +43 -6
- agno/vectordb/couchbase/couchbase.py +76 -5
- agno/vectordb/lancedb/lance_db.py +38 -3
- agno/vectordb/milvus/milvus.py +76 -4
- agno/vectordb/mongodb/mongodb.py +76 -4
- agno/vectordb/pgvector/pgvector.py +50 -6
- agno/vectordb/pineconedb/pineconedb.py +39 -2
- agno/vectordb/qdrant/qdrant.py +76 -26
- agno/vectordb/singlestore/singlestore.py +77 -4
- agno/vectordb/upstashdb/upstashdb.py +42 -2
- agno/vectordb/weaviate/weaviate.py +39 -3
- agno/workflow/types.py +5 -6
- agno/workflow/workflow.py +58 -2
- {agno-2.0.11.dist-info → agno-2.1.1.dist-info}/METADATA +4 -3
- {agno-2.0.11.dist-info → agno-2.1.1.dist-info}/RECORD +93 -82
- {agno-2.0.11.dist-info → agno-2.1.1.dist-info}/WHEEL +0 -0
- {agno-2.0.11.dist-info → agno-2.1.1.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.11.dist-info → agno-2.1.1.dist-info}/top_level.txt +0 -0
agno/vectordb/milvus/milvus.py
CHANGED
|
@@ -457,8 +457,44 @@ class Milvus(VectorDb):
|
|
|
457
457
|
"""Insert documents asynchronously based on search type."""
|
|
458
458
|
log_info(f"Inserting {len(documents)} documents asynchronously")
|
|
459
459
|
|
|
460
|
-
|
|
461
|
-
|
|
460
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
461
|
+
# Use batch embedding when enabled and supported
|
|
462
|
+
try:
|
|
463
|
+
# Extract content from all documents
|
|
464
|
+
doc_contents = [doc.content for doc in documents]
|
|
465
|
+
|
|
466
|
+
# Get batch embeddings and usage
|
|
467
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
468
|
+
|
|
469
|
+
# Process documents with pre-computed embeddings
|
|
470
|
+
for j, doc in enumerate(documents):
|
|
471
|
+
try:
|
|
472
|
+
if j < len(embeddings):
|
|
473
|
+
doc.embedding = embeddings[j]
|
|
474
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
475
|
+
except Exception as e:
|
|
476
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
477
|
+
|
|
478
|
+
except Exception as e:
|
|
479
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
480
|
+
error_str = str(e).lower()
|
|
481
|
+
is_rate_limit = any(
|
|
482
|
+
phrase in error_str
|
|
483
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
if is_rate_limit:
|
|
487
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
488
|
+
raise e
|
|
489
|
+
else:
|
|
490
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
491
|
+
# Fall back to individual embedding
|
|
492
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
493
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
494
|
+
else:
|
|
495
|
+
# Use individual embedding
|
|
496
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
497
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
462
498
|
|
|
463
499
|
if self.search_type == SearchType.hybrid:
|
|
464
500
|
await asyncio.gather(
|
|
@@ -547,8 +583,44 @@ class Milvus(VectorDb):
|
|
|
547
583
|
) -> None:
|
|
548
584
|
log_debug(f"Upserting {len(documents)} documents asynchronously")
|
|
549
585
|
|
|
550
|
-
|
|
551
|
-
|
|
586
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
587
|
+
# Use batch embedding when enabled and supported
|
|
588
|
+
try:
|
|
589
|
+
# Extract content from all documents
|
|
590
|
+
doc_contents = [doc.content for doc in documents]
|
|
591
|
+
|
|
592
|
+
# Get batch embeddings and usage
|
|
593
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
594
|
+
|
|
595
|
+
# Process documents with pre-computed embeddings
|
|
596
|
+
for j, doc in enumerate(documents):
|
|
597
|
+
try:
|
|
598
|
+
if j < len(embeddings):
|
|
599
|
+
doc.embedding = embeddings[j]
|
|
600
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
601
|
+
except Exception as e:
|
|
602
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
603
|
+
|
|
604
|
+
except Exception as e:
|
|
605
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
606
|
+
error_str = str(e).lower()
|
|
607
|
+
is_rate_limit = any(
|
|
608
|
+
phrase in error_str
|
|
609
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
610
|
+
)
|
|
611
|
+
|
|
612
|
+
if is_rate_limit:
|
|
613
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
614
|
+
raise e
|
|
615
|
+
else:
|
|
616
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
617
|
+
# Fall back to individual embedding
|
|
618
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
619
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
620
|
+
else:
|
|
621
|
+
# Use individual embedding
|
|
622
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
623
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
552
624
|
|
|
553
625
|
async def process_document(document):
|
|
554
626
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
agno/vectordb/mongodb/mongodb.py
CHANGED
|
@@ -1018,8 +1018,44 @@ class MongoDb(VectorDb):
|
|
|
1018
1018
|
log_debug(f"Inserting {len(documents)} documents asynchronously")
|
|
1019
1019
|
collection = await self._get_async_collection()
|
|
1020
1020
|
|
|
1021
|
-
|
|
1022
|
-
|
|
1021
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
1022
|
+
# Use batch embedding when enabled and supported
|
|
1023
|
+
try:
|
|
1024
|
+
# Extract content from all documents
|
|
1025
|
+
doc_contents = [doc.content for doc in documents]
|
|
1026
|
+
|
|
1027
|
+
# Get batch embeddings and usage
|
|
1028
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
1029
|
+
|
|
1030
|
+
# Process documents with pre-computed embeddings
|
|
1031
|
+
for j, doc in enumerate(documents):
|
|
1032
|
+
try:
|
|
1033
|
+
if j < len(embeddings):
|
|
1034
|
+
doc.embedding = embeddings[j]
|
|
1035
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
1036
|
+
except Exception as e:
|
|
1037
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
1038
|
+
|
|
1039
|
+
except Exception as e:
|
|
1040
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
1041
|
+
error_str = str(e).lower()
|
|
1042
|
+
is_rate_limit = any(
|
|
1043
|
+
phrase in error_str
|
|
1044
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
1045
|
+
)
|
|
1046
|
+
|
|
1047
|
+
if is_rate_limit:
|
|
1048
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
1049
|
+
raise e
|
|
1050
|
+
else:
|
|
1051
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
1052
|
+
# Fall back to individual embedding
|
|
1053
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
1054
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
1055
|
+
else:
|
|
1056
|
+
# Use individual embedding
|
|
1057
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
1058
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
1023
1059
|
|
|
1024
1060
|
prepared_docs = []
|
|
1025
1061
|
for document in documents:
|
|
@@ -1047,8 +1083,44 @@ class MongoDb(VectorDb):
|
|
|
1047
1083
|
log_info(f"Upserting {len(documents)} documents asynchronously")
|
|
1048
1084
|
collection = await self._get_async_collection()
|
|
1049
1085
|
|
|
1050
|
-
|
|
1051
|
-
|
|
1086
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
1087
|
+
# Use batch embedding when enabled and supported
|
|
1088
|
+
try:
|
|
1089
|
+
# Extract content from all documents
|
|
1090
|
+
doc_contents = [doc.content for doc in documents]
|
|
1091
|
+
|
|
1092
|
+
# Get batch embeddings and usage
|
|
1093
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
1094
|
+
|
|
1095
|
+
# Process documents with pre-computed embeddings
|
|
1096
|
+
for j, doc in enumerate(documents):
|
|
1097
|
+
try:
|
|
1098
|
+
if j < len(embeddings):
|
|
1099
|
+
doc.embedding = embeddings[j]
|
|
1100
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
1101
|
+
except Exception as e:
|
|
1102
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
1103
|
+
|
|
1104
|
+
except Exception as e:
|
|
1105
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
1106
|
+
error_str = str(e).lower()
|
|
1107
|
+
is_rate_limit = any(
|
|
1108
|
+
phrase in error_str
|
|
1109
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
if is_rate_limit:
|
|
1113
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
1114
|
+
raise e
|
|
1115
|
+
else:
|
|
1116
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
1117
|
+
# Fall back to individual embedding
|
|
1118
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
1119
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
1120
|
+
else:
|
|
1121
|
+
# Use individual embedding
|
|
1122
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
1123
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
1052
1124
|
|
|
1053
1125
|
for document in documents:
|
|
1054
1126
|
try:
|
|
@@ -55,7 +55,6 @@ class PgVector(VectorDb):
|
|
|
55
55
|
schema_version: int = 1,
|
|
56
56
|
auto_upgrade_schema: bool = False,
|
|
57
57
|
reranker: Optional[Reranker] = None,
|
|
58
|
-
use_batch: bool = False,
|
|
59
58
|
):
|
|
60
59
|
"""
|
|
61
60
|
Initialize the PgVector instance.
|
|
@@ -96,7 +95,6 @@ class PgVector(VectorDb):
|
|
|
96
95
|
self.db_url: Optional[str] = db_url
|
|
97
96
|
self.db_engine: Engine = db_engine
|
|
98
97
|
self.metadata: MetaData = MetaData(schema=self.schema)
|
|
99
|
-
self.use_batch: bool = use_batch
|
|
100
98
|
|
|
101
99
|
# Embedder for embedding the document contents
|
|
102
100
|
if embedder is None:
|
|
@@ -337,8 +335,8 @@ class PgVector(VectorDb):
|
|
|
337
335
|
batch_docs = documents[i : i + batch_size]
|
|
338
336
|
log_debug(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
|
|
339
337
|
try:
|
|
340
|
-
|
|
341
|
-
await
|
|
338
|
+
# Embed all documents in the batch
|
|
339
|
+
await self._async_embed_documents(batch_docs)
|
|
342
340
|
|
|
343
341
|
# Prepare documents for insertion
|
|
344
342
|
batch_records = []
|
|
@@ -493,6 +491,52 @@ class PgVector(VectorDb):
|
|
|
493
491
|
"content_id": doc.content_id,
|
|
494
492
|
}
|
|
495
493
|
|
|
494
|
+
async def _async_embed_documents(self, batch_docs: List[Document]) -> None:
|
|
495
|
+
"""
|
|
496
|
+
Embed a batch of documents using either batch embedding or individual embedding.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
batch_docs: List of documents to embed
|
|
500
|
+
"""
|
|
501
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
502
|
+
# Use batch embedding when enabled and supported
|
|
503
|
+
try:
|
|
504
|
+
# Extract content from all documents
|
|
505
|
+
doc_contents = [doc.content for doc in batch_docs]
|
|
506
|
+
|
|
507
|
+
# Get batch embeddings and usage
|
|
508
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
509
|
+
|
|
510
|
+
# Process documents with pre-computed embeddings
|
|
511
|
+
for j, doc in enumerate(batch_docs):
|
|
512
|
+
try:
|
|
513
|
+
if j < len(embeddings):
|
|
514
|
+
doc.embedding = embeddings[j]
|
|
515
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
516
|
+
except Exception as e:
|
|
517
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
518
|
+
|
|
519
|
+
except Exception as e:
|
|
520
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
521
|
+
error_str = str(e).lower()
|
|
522
|
+
is_rate_limit = any(
|
|
523
|
+
phrase in error_str
|
|
524
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
if is_rate_limit:
|
|
528
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
529
|
+
raise e
|
|
530
|
+
else:
|
|
531
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
532
|
+
# Fall back to individual embedding
|
|
533
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
|
|
534
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
535
|
+
else:
|
|
536
|
+
# Use individual embedding
|
|
537
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in batch_docs]
|
|
538
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
539
|
+
|
|
496
540
|
async def async_upsert(
|
|
497
541
|
self,
|
|
498
542
|
content_hash: str,
|
|
@@ -530,8 +574,8 @@ class PgVector(VectorDb):
|
|
|
530
574
|
batch_docs = documents[i : i + batch_size]
|
|
531
575
|
log_info(f"Processing batch starting at index {i}, size: {len(batch_docs)}")
|
|
532
576
|
try:
|
|
533
|
-
|
|
534
|
-
await
|
|
577
|
+
# Embed all documents in the batch
|
|
578
|
+
await self._async_embed_documents(batch_docs)
|
|
535
579
|
|
|
536
580
|
# Prepare documents for upserting
|
|
537
581
|
batch_records_dict = {} # Use dict to deduplicate by ID
|
|
@@ -338,8 +338,45 @@ class PineconeDb(VectorDb):
|
|
|
338
338
|
async def _prepare_vectors(self, documents: List[Document]) -> List[Dict[str, Any]]:
|
|
339
339
|
"""Prepare vectors for upsert."""
|
|
340
340
|
vectors = []
|
|
341
|
-
|
|
342
|
-
|
|
341
|
+
|
|
342
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
343
|
+
# Use batch embedding when enabled and supported
|
|
344
|
+
try:
|
|
345
|
+
# Extract content from all documents
|
|
346
|
+
doc_contents = [doc.content for doc in documents]
|
|
347
|
+
|
|
348
|
+
# Get batch embeddings and usage
|
|
349
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
350
|
+
|
|
351
|
+
# Process documents with pre-computed embeddings
|
|
352
|
+
for j, doc in enumerate(documents):
|
|
353
|
+
try:
|
|
354
|
+
if j < len(embeddings):
|
|
355
|
+
doc.embedding = embeddings[j]
|
|
356
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
357
|
+
except Exception as e:
|
|
358
|
+
logger.error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
359
|
+
|
|
360
|
+
except Exception as e:
|
|
361
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
362
|
+
error_str = str(e).lower()
|
|
363
|
+
is_rate_limit = any(
|
|
364
|
+
phrase in error_str
|
|
365
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
if is_rate_limit:
|
|
369
|
+
logger.error(f"Rate limit detected during batch embedding. {e}")
|
|
370
|
+
raise e
|
|
371
|
+
else:
|
|
372
|
+
logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
373
|
+
# Fall back to individual embedding
|
|
374
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
375
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
376
|
+
else:
|
|
377
|
+
# Use individual embedding
|
|
378
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
379
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
343
380
|
|
|
344
381
|
for doc in documents:
|
|
345
382
|
doc.meta_data["text"] = doc.content
|
agno/vectordb/qdrant/qdrant.py
CHANGED
|
@@ -131,7 +131,8 @@ class Qdrant(VectorDb):
|
|
|
131
131
|
if fastembed_kwargs:
|
|
132
132
|
default_kwargs.update(fastembed_kwargs)
|
|
133
133
|
|
|
134
|
-
|
|
134
|
+
# Type ignore for mypy as SparseTextEmbedding constructor accepts flexible kwargs
|
|
135
|
+
self.sparse_encoder = SparseTextEmbedding(**default_kwargs) # type: ignore
|
|
135
136
|
|
|
136
137
|
except ImportError as e:
|
|
137
138
|
raise ImportError(
|
|
@@ -192,10 +193,12 @@ class Qdrant(VectorDb):
|
|
|
192
193
|
# Configure vectors based on search type
|
|
193
194
|
if self.search_type == SearchType.vector:
|
|
194
195
|
# Maintain backward compatibility with unnamed vectors
|
|
195
|
-
vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
|
|
196
|
+
vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
196
197
|
else:
|
|
197
198
|
# Use named vectors for hybrid search
|
|
198
|
-
vectors_config = {
|
|
199
|
+
vectors_config = {
|
|
200
|
+
self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
201
|
+
} # type: ignore
|
|
199
202
|
|
|
200
203
|
self.client.create_collection(
|
|
201
204
|
collection_name=self.collection,
|
|
@@ -220,10 +223,12 @@ class Qdrant(VectorDb):
|
|
|
220
223
|
# Configure vectors based on search type
|
|
221
224
|
if self.search_type == SearchType.vector:
|
|
222
225
|
# Maintain backward compatibility with unnamed vectors
|
|
223
|
-
vectors_config = models.VectorParams(size=self.dimensions, distance=_distance)
|
|
226
|
+
vectors_config = models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
224
227
|
else:
|
|
225
228
|
# Use named vectors for hybrid search
|
|
226
|
-
vectors_config = {
|
|
229
|
+
vectors_config = {
|
|
230
|
+
self.dense_vector_name: models.VectorParams(size=self.dimensions or 1536, distance=_distance)
|
|
231
|
+
} # type: ignore
|
|
227
232
|
|
|
228
233
|
await self.async_client.create_collection(
|
|
229
234
|
collection_name=self.collection,
|
|
@@ -281,7 +286,7 @@ class Qdrant(VectorDb):
|
|
|
281
286
|
return len(scroll_result[0]) > 0
|
|
282
287
|
return False
|
|
283
288
|
|
|
284
|
-
async def async_name_exists(self, name: str) -> bool:
|
|
289
|
+
async def async_name_exists(self, name: str) -> bool: # type: ignore[override]
|
|
285
290
|
"""
|
|
286
291
|
Asynchronously validates if a document with the given name exists in the collection.
|
|
287
292
|
|
|
@@ -341,7 +346,9 @@ class Qdrant(VectorDb):
|
|
|
341
346
|
vector[self.dense_vector_name] = document.embedding
|
|
342
347
|
|
|
343
348
|
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
344
|
-
vector[self.sparse_vector_name] = next(
|
|
349
|
+
vector[self.sparse_vector_name] = next(
|
|
350
|
+
iter(self.sparse_encoder.embed([document.content]))
|
|
351
|
+
).as_object() # type: ignore
|
|
345
352
|
|
|
346
353
|
# Create payload with document properties
|
|
347
354
|
payload = {
|
|
@@ -363,7 +370,7 @@ class Qdrant(VectorDb):
|
|
|
363
370
|
points.append(
|
|
364
371
|
models.PointStruct(
|
|
365
372
|
id=doc_id,
|
|
366
|
-
vector=vector,
|
|
373
|
+
vector=vector, # type: ignore
|
|
367
374
|
payload=payload,
|
|
368
375
|
)
|
|
369
376
|
)
|
|
@@ -384,26 +391,69 @@ class Qdrant(VectorDb):
|
|
|
384
391
|
"""
|
|
385
392
|
log_debug(f"Inserting {len(documents)} documents asynchronously")
|
|
386
393
|
|
|
394
|
+
# Apply batch embedding when needed for vector or hybrid search
|
|
395
|
+
if self.search_type in [SearchType.vector, SearchType.hybrid]:
|
|
396
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
397
|
+
# Use batch embedding when enabled and supported
|
|
398
|
+
try:
|
|
399
|
+
# Extract content from all documents
|
|
400
|
+
doc_contents = [doc.content for doc in documents]
|
|
401
|
+
|
|
402
|
+
# Get batch embeddings and usage
|
|
403
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
404
|
+
|
|
405
|
+
# Process documents with pre-computed embeddings
|
|
406
|
+
for j, doc in enumerate(documents):
|
|
407
|
+
try:
|
|
408
|
+
if j < len(embeddings):
|
|
409
|
+
doc.embedding = embeddings[j]
|
|
410
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
411
|
+
except Exception as e:
|
|
412
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
413
|
+
|
|
414
|
+
except Exception as e:
|
|
415
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
416
|
+
error_str = str(e).lower()
|
|
417
|
+
is_rate_limit = any(
|
|
418
|
+
phrase in error_str
|
|
419
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
if is_rate_limit:
|
|
423
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
424
|
+
raise e
|
|
425
|
+
else:
|
|
426
|
+
log_warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
427
|
+
# Fall back to individual embedding
|
|
428
|
+
for doc in documents:
|
|
429
|
+
if self.search_type in [SearchType.vector, SearchType.hybrid]:
|
|
430
|
+
doc.embed(embedder=self.embedder)
|
|
431
|
+
else:
|
|
432
|
+
# Use individual embedding
|
|
433
|
+
for doc in documents:
|
|
434
|
+
if self.search_type in [SearchType.vector, SearchType.hybrid]:
|
|
435
|
+
doc.embed(embedder=self.embedder)
|
|
436
|
+
|
|
387
437
|
async def process_document(document):
|
|
388
438
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
389
439
|
doc_id = md5(cleaned_content.encode()).hexdigest()
|
|
390
440
|
|
|
391
441
|
if self.search_type == SearchType.vector:
|
|
392
442
|
# For vector search, maintain backward compatibility with unnamed vectors
|
|
393
|
-
document.
|
|
394
|
-
vector = document.embedding
|
|
443
|
+
vector = document.embedding # Already embedded above
|
|
395
444
|
else:
|
|
396
445
|
# For other search types, use named vectors
|
|
397
446
|
vector = {}
|
|
398
447
|
if self.search_type in [SearchType.hybrid]:
|
|
399
|
-
|
|
400
|
-
vector[self.dense_vector_name] = document.embedding
|
|
448
|
+
vector[self.dense_vector_name] = document.embedding # Already embedded above
|
|
401
449
|
|
|
402
450
|
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
403
|
-
vector[self.sparse_vector_name] = next(
|
|
451
|
+
vector[self.sparse_vector_name] = next(
|
|
452
|
+
iter(self.sparse_encoder.embed([document.content]))
|
|
453
|
+
).as_object() # type: ignore
|
|
404
454
|
|
|
405
455
|
if self.search_type in [SearchType.keyword, SearchType.hybrid]:
|
|
406
|
-
vector[self.sparse_vector_name] = next(self.sparse_encoder.embed([document.content])).as_object()
|
|
456
|
+
vector[self.sparse_vector_name] = next(iter(self.sparse_encoder.embed([document.content]))).as_object()
|
|
407
457
|
|
|
408
458
|
# Create payload with document properties
|
|
409
459
|
payload = {
|
|
@@ -423,9 +473,9 @@ class Qdrant(VectorDb):
|
|
|
423
473
|
payload["meta_data"].update(filters)
|
|
424
474
|
|
|
425
475
|
log_debug(f"Inserted document asynchronously: {document.name} ({document.meta_data})")
|
|
426
|
-
return models.PointStruct(
|
|
476
|
+
return models.PointStruct( # type: ignore
|
|
427
477
|
id=doc_id,
|
|
428
|
-
vector=vector,
|
|
478
|
+
vector=vector, # type: ignore
|
|
429
479
|
payload=payload,
|
|
430
480
|
)
|
|
431
481
|
|
|
@@ -501,12 +551,12 @@ class Qdrant(VectorDb):
|
|
|
501
551
|
filters: Optional[Dict[str, Any]],
|
|
502
552
|
) -> List[models.ScoredPoint]:
|
|
503
553
|
dense_embedding = self.embedder.get_embedding(query)
|
|
504
|
-
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
|
|
554
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
505
555
|
call = self.client.query_points(
|
|
506
556
|
collection_name=self.collection,
|
|
507
557
|
prefetch=[
|
|
508
558
|
models.Prefetch(
|
|
509
|
-
query=models.SparseVector(**sparse_embedding),
|
|
559
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
|
|
510
560
|
limit=limit,
|
|
511
561
|
using=self.sparse_vector_name,
|
|
512
562
|
),
|
|
@@ -557,10 +607,10 @@ class Qdrant(VectorDb):
|
|
|
557
607
|
limit: int,
|
|
558
608
|
filters: Optional[Dict[str, Any]],
|
|
559
609
|
) -> List[models.ScoredPoint]:
|
|
560
|
-
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
|
|
610
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
561
611
|
call = self.client.query_points(
|
|
562
612
|
collection_name=self.collection,
|
|
563
|
-
query=models.SparseVector(**sparse_embedding),
|
|
613
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore
|
|
564
614
|
with_vectors=True,
|
|
565
615
|
with_payload=True,
|
|
566
616
|
limit=limit,
|
|
@@ -606,10 +656,10 @@ class Qdrant(VectorDb):
|
|
|
606
656
|
limit: int,
|
|
607
657
|
filters: Optional[Dict[str, Any]],
|
|
608
658
|
) -> List[models.ScoredPoint]:
|
|
609
|
-
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
|
|
659
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
610
660
|
call = await self.async_client.query_points(
|
|
611
661
|
collection_name=self.collection,
|
|
612
|
-
query=models.SparseVector(**sparse_embedding),
|
|
662
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore
|
|
613
663
|
with_vectors=True,
|
|
614
664
|
with_payload=True,
|
|
615
665
|
limit=limit,
|
|
@@ -625,12 +675,12 @@ class Qdrant(VectorDb):
|
|
|
625
675
|
filters: Optional[Dict[str, Any]],
|
|
626
676
|
) -> List[models.ScoredPoint]:
|
|
627
677
|
dense_embedding = self.embedder.get_embedding(query)
|
|
628
|
-
sparse_embedding = next(self.sparse_encoder.embed([query])).as_object()
|
|
678
|
+
sparse_embedding = next(iter(self.sparse_encoder.embed([query]))).as_object()
|
|
629
679
|
call = await self.async_client.query_points(
|
|
630
680
|
collection_name=self.collection,
|
|
631
681
|
prefetch=[
|
|
632
682
|
models.Prefetch(
|
|
633
|
-
query=models.SparseVector(**sparse_embedding),
|
|
683
|
+
query=models.SparseVector(**sparse_embedding), # type: ignore # type: ignore
|
|
634
684
|
limit=limit,
|
|
635
685
|
using=self.sparse_vector_name,
|
|
636
686
|
),
|
|
@@ -689,7 +739,7 @@ class Qdrant(VectorDb):
|
|
|
689
739
|
filter_conditions.append(models.FieldCondition(key=key, match=models.MatchValue(value=value)))
|
|
690
740
|
|
|
691
741
|
if filter_conditions:
|
|
692
|
-
return models.Filter(must=filter_conditions)
|
|
742
|
+
return models.Filter(must=filter_conditions) # type: ignore
|
|
693
743
|
|
|
694
744
|
return None
|
|
695
745
|
|
|
@@ -807,7 +857,7 @@ class Qdrant(VectorDb):
|
|
|
807
857
|
)
|
|
808
858
|
|
|
809
859
|
# Create a filter that requires ALL metadata conditions to match
|
|
810
|
-
filter_condition = models.Filter(must=filter_conditions)
|
|
860
|
+
filter_condition = models.Filter(must=filter_conditions) # type: ignore
|
|
811
861
|
|
|
812
862
|
# First, count how many points will be deleted
|
|
813
863
|
count_result = self.client.count(collection_name=self.collection, count_filter=filter_condition, exact=True)
|
|
@@ -496,8 +496,44 @@ class SingleStore(VectorDb):
|
|
|
496
496
|
documents: List[Document],
|
|
497
497
|
filters: Optional[Dict[str, Any]] = None,
|
|
498
498
|
) -> None:
|
|
499
|
-
|
|
500
|
-
|
|
499
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
500
|
+
# Use batch embedding when enabled and supported
|
|
501
|
+
try:
|
|
502
|
+
# Extract content from all documents
|
|
503
|
+
doc_contents = [doc.content for doc in documents]
|
|
504
|
+
|
|
505
|
+
# Get batch embeddings and usage
|
|
506
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
507
|
+
|
|
508
|
+
# Process documents with pre-computed embeddings
|
|
509
|
+
for j, doc in enumerate(documents):
|
|
510
|
+
try:
|
|
511
|
+
if j < len(embeddings):
|
|
512
|
+
doc.embedding = embeddings[j]
|
|
513
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
514
|
+
except Exception as e:
|
|
515
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
516
|
+
|
|
517
|
+
except Exception as e:
|
|
518
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
519
|
+
error_str = str(e).lower()
|
|
520
|
+
is_rate_limit = any(
|
|
521
|
+
phrase in error_str
|
|
522
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
if is_rate_limit:
|
|
526
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
527
|
+
raise e
|
|
528
|
+
else:
|
|
529
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
530
|
+
# Fall back to individual embedding
|
|
531
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
532
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
533
|
+
else:
|
|
534
|
+
# Use individual embedding
|
|
535
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
536
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
501
537
|
|
|
502
538
|
with self.Session.begin() as sess:
|
|
503
539
|
counter = 0
|
|
@@ -543,8 +579,45 @@ class SingleStore(VectorDb):
|
|
|
543
579
|
filters (Optional[Dict[str, Any]]): Optional filters for the upsert.
|
|
544
580
|
batch_size (int): Number of documents to upsert in each batch.
|
|
545
581
|
"""
|
|
546
|
-
|
|
547
|
-
|
|
582
|
+
|
|
583
|
+
if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
|
|
584
|
+
# Use batch embedding when enabled and supported
|
|
585
|
+
try:
|
|
586
|
+
# Extract content from all documents
|
|
587
|
+
doc_contents = [doc.content for doc in documents]
|
|
588
|
+
|
|
589
|
+
# Get batch embeddings and usage
|
|
590
|
+
embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
|
|
591
|
+
|
|
592
|
+
# Process documents with pre-computed embeddings
|
|
593
|
+
for j, doc in enumerate(documents):
|
|
594
|
+
try:
|
|
595
|
+
if j < len(embeddings):
|
|
596
|
+
doc.embedding = embeddings[j]
|
|
597
|
+
doc.usage = usages[j] if j < len(usages) else None
|
|
598
|
+
except Exception as e:
|
|
599
|
+
log_error(f"Error assigning batch embedding to document '{doc.name}': {e}")
|
|
600
|
+
|
|
601
|
+
except Exception as e:
|
|
602
|
+
# Check if this is a rate limit error - don't fall back as it would make things worse
|
|
603
|
+
error_str = str(e).lower()
|
|
604
|
+
is_rate_limit = any(
|
|
605
|
+
phrase in error_str
|
|
606
|
+
for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
|
|
607
|
+
)
|
|
608
|
+
|
|
609
|
+
if is_rate_limit:
|
|
610
|
+
log_error(f"Rate limit detected during batch embedding. {e}")
|
|
611
|
+
raise e
|
|
612
|
+
else:
|
|
613
|
+
log_error(f"Async batch embedding failed, falling back to individual embeddings: {e}")
|
|
614
|
+
# Fall back to individual embedding
|
|
615
|
+
embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
|
|
616
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
617
|
+
else:
|
|
618
|
+
# Use individual embedding
|
|
619
|
+
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
620
|
+
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
548
621
|
|
|
549
622
|
with self.Session.begin() as sess:
|
|
550
623
|
counter = 0
|