agno 2.3.16__py3-none-any.whl → 2.3.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/__init__.py +2 -0
- agno/agent/agent.py +4 -53
- agno/agent/remote.py +351 -0
- agno/client/__init__.py +3 -0
- agno/client/os.py +2669 -0
- agno/db/base.py +20 -0
- agno/db/mongo/async_mongo.py +11 -0
- agno/db/mongo/mongo.py +10 -0
- agno/db/mysql/async_mysql.py +9 -0
- agno/db/mysql/mysql.py +9 -0
- agno/db/postgres/async_postgres.py +9 -0
- agno/db/postgres/postgres.py +9 -0
- agno/db/postgres/utils.py +3 -2
- agno/db/sqlite/async_sqlite.py +9 -0
- agno/db/sqlite/sqlite.py +11 -1
- agno/exceptions.py +23 -0
- agno/knowledge/chunking/semantic.py +123 -46
- agno/knowledge/reader/csv_reader.py +1 -1
- agno/knowledge/reader/field_labeled_csv_reader.py +1 -1
- agno/knowledge/reader/json_reader.py +1 -1
- agno/models/google/gemini.py +5 -0
- agno/os/app.py +108 -25
- agno/os/auth.py +25 -1
- agno/os/interfaces/a2a/a2a.py +7 -6
- agno/os/interfaces/a2a/router.py +13 -13
- agno/os/interfaces/agui/agui.py +5 -3
- agno/os/interfaces/agui/router.py +23 -16
- agno/os/interfaces/base.py +7 -7
- agno/os/interfaces/slack/router.py +6 -6
- agno/os/interfaces/slack/slack.py +7 -7
- agno/os/interfaces/whatsapp/router.py +29 -6
- agno/os/interfaces/whatsapp/whatsapp.py +11 -8
- agno/os/managers.py +326 -0
- agno/os/mcp.py +651 -79
- agno/os/router.py +125 -18
- agno/os/routers/agents/router.py +65 -22
- agno/os/routers/agents/schema.py +16 -4
- agno/os/routers/database.py +5 -0
- agno/os/routers/evals/evals.py +93 -11
- agno/os/routers/evals/utils.py +6 -6
- agno/os/routers/knowledge/knowledge.py +104 -16
- agno/os/routers/memory/memory.py +124 -7
- agno/os/routers/metrics/metrics.py +21 -4
- agno/os/routers/session/session.py +141 -12
- agno/os/routers/teams/router.py +40 -14
- agno/os/routers/teams/schema.py +12 -4
- agno/os/routers/traces/traces.py +54 -4
- agno/os/routers/workflows/router.py +223 -117
- agno/os/routers/workflows/schema.py +65 -1
- agno/os/schema.py +38 -12
- agno/os/utils.py +87 -166
- agno/remote/__init__.py +3 -0
- agno/remote/base.py +484 -0
- agno/run/workflow.py +1 -0
- agno/team/__init__.py +2 -0
- agno/team/remote.py +287 -0
- agno/team/team.py +25 -54
- agno/tracing/exporter.py +10 -6
- agno/tracing/setup.py +2 -1
- agno/utils/agent.py +58 -1
- agno/utils/http.py +68 -20
- agno/utils/os.py +0 -0
- agno/utils/remote.py +23 -0
- agno/vectordb/chroma/chromadb.py +452 -16
- agno/vectordb/pgvector/pgvector.py +7 -0
- agno/vectordb/redis/redisdb.py +1 -1
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +2 -2
- agno/workflow/remote.py +222 -0
- agno/workflow/types.py +0 -73
- agno/workflow/workflow.py +119 -68
- {agno-2.3.16.dist-info → agno-2.3.18.dist-info}/METADATA +1 -1
- {agno-2.3.16.dist-info → agno-2.3.18.dist-info}/RECORD +76 -66
- {agno-2.3.16.dist-info → agno-2.3.18.dist-info}/WHEEL +0 -0
- {agno-2.3.16.dist-info → agno-2.3.18.dist-info}/licenses/LICENSE +0 -0
- {agno-2.3.16.dist-info → agno-2.3.18.dist-info}/top_level.txt +0 -0
agno/vectordb/chroma/chromadb.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import asyncio
|
|
2
2
|
import json
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
3
5
|
from hashlib import md5
|
|
4
|
-
from typing import Any, Dict, List, Mapping, Optional, Union, cast
|
|
6
|
+
from typing import Any, Dict, List, Mapping, Optional, Tuple, Union, cast
|
|
5
7
|
|
|
6
8
|
try:
|
|
7
9
|
from chromadb import Client as ChromaDbClient
|
|
@@ -20,9 +22,61 @@ from agno.knowledge.reranker.base import Reranker
|
|
|
20
22
|
from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
|
|
21
23
|
from agno.vectordb.base import VectorDb
|
|
22
24
|
from agno.vectordb.distance import Distance
|
|
25
|
+
from agno.vectordb.search import SearchType
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def reciprocal_rank_fusion(
|
|
29
|
+
ranked_lists: List[List[Tuple[str, float]]],
|
|
30
|
+
k: int = 60,
|
|
31
|
+
) -> List[Tuple[str, float]]:
|
|
32
|
+
"""
|
|
33
|
+
Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).
|
|
34
|
+
|
|
35
|
+
RRF is a simple yet effective method for combining multiple rankings.
|
|
36
|
+
The formula is: RRF(d) = sum(1 / (k + rank_i(d))) for each ranking i
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
ranked_lists: List of ranked results, each as [(doc_id, score), ...]
|
|
40
|
+
k: RRF constant (default 60, as per original paper by Cormack et al.)
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Fused ranking as [(doc_id, rrf_score), ...] sorted by score descending
|
|
44
|
+
"""
|
|
45
|
+
rrf_scores: Dict[str, float] = defaultdict(float)
|
|
46
|
+
|
|
47
|
+
for ranked_list in ranked_lists:
|
|
48
|
+
for rank, (doc_id, _) in enumerate(ranked_list, start=1):
|
|
49
|
+
rrf_scores[doc_id] += 1.0 / (k + rank)
|
|
50
|
+
|
|
51
|
+
sorted_results = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
|
|
52
|
+
return sorted_results
|
|
23
53
|
|
|
24
54
|
|
|
25
55
|
class ChromaDb(VectorDb):
|
|
56
|
+
"""
|
|
57
|
+
ChromaDb class for managing vector operations with ChromaDB.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
collection: The name of the ChromaDB collection.
|
|
61
|
+
name: Name of the vector database.
|
|
62
|
+
description: Description of the vector database.
|
|
63
|
+
id: Unique identifier for this vector database instance.
|
|
64
|
+
embedder: The embedder to use when embedding the document contents.
|
|
65
|
+
distance: The distance metric to use when searching for documents.
|
|
66
|
+
path: The path to store the ChromaDB data (for persistent client).
|
|
67
|
+
persistent_client: Whether to use a persistent client.
|
|
68
|
+
search_type: The search type to use when searching for documents.
|
|
69
|
+
- SearchType.vector: Pure vector similarity search (default)
|
|
70
|
+
- SearchType.keyword: Keyword-based search using document content
|
|
71
|
+
- SearchType.hybrid: Combines vector + FTS with Reciprocal Rank Fusion
|
|
72
|
+
hybrid_rrf_k: RRF (Reciprocal Rank Fusion) constant for hybrid search.
|
|
73
|
+
Controls ranking smoothness - higher values give more weight to lower-ranked
|
|
74
|
+
results, lower values make top results more dominant. Default is 60
|
|
75
|
+
(per original RRF paper by Cormack et al.).
|
|
76
|
+
reranker: The reranker to use when reranking documents.
|
|
77
|
+
**kwargs: Additional arguments to pass to the ChromaDB client.
|
|
78
|
+
"""
|
|
79
|
+
|
|
26
80
|
def __init__(
|
|
27
81
|
self,
|
|
28
82
|
collection: str,
|
|
@@ -33,6 +87,8 @@ class ChromaDb(VectorDb):
|
|
|
33
87
|
distance: Distance = Distance.cosine,
|
|
34
88
|
path: str = "tmp/chromadb",
|
|
35
89
|
persistent_client: bool = False,
|
|
90
|
+
search_type: SearchType = SearchType.vector,
|
|
91
|
+
hybrid_rrf_k: int = 60,
|
|
36
92
|
reranker: Optional[Reranker] = None,
|
|
37
93
|
**kwargs,
|
|
38
94
|
):
|
|
@@ -72,6 +128,10 @@ class ChromaDb(VectorDb):
|
|
|
72
128
|
self.persistent_client: bool = persistent_client
|
|
73
129
|
self.path: str = path
|
|
74
130
|
|
|
131
|
+
# Search type configuration
|
|
132
|
+
self.search_type: SearchType = search_type
|
|
133
|
+
self.hybrid_rrf_k: int = hybrid_rrf_k
|
|
134
|
+
|
|
75
135
|
# Reranker instance
|
|
76
136
|
self.reranker: Optional[Reranker] = reranker
|
|
77
137
|
|
|
@@ -272,7 +332,7 @@ class ChromaDb(VectorDb):
|
|
|
272
332
|
embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
|
|
273
333
|
await asyncio.gather(*embed_tasks, return_exceptions=True)
|
|
274
334
|
except Exception as e:
|
|
275
|
-
|
|
335
|
+
logger.error(f"Error processing document: {e}")
|
|
276
336
|
|
|
277
337
|
for document in documents:
|
|
278
338
|
cleaned_content = document.content.replace("\x00", "\ufffd")
|
|
@@ -502,6 +562,41 @@ class ChromaDb(VectorDb):
|
|
|
502
562
|
if isinstance(filters, list):
|
|
503
563
|
log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
|
|
504
564
|
filters = None
|
|
565
|
+
|
|
566
|
+
if not self._collection:
|
|
567
|
+
self._collection = self.client.get_collection(name=self.collection_name)
|
|
568
|
+
|
|
569
|
+
# Route to appropriate search method based on search_type
|
|
570
|
+
if self.search_type == SearchType.vector:
|
|
571
|
+
search_results = self._vector_search(query, limit, filters)
|
|
572
|
+
elif self.search_type == SearchType.keyword:
|
|
573
|
+
search_results = self._keyword_search(query, limit, filters)
|
|
574
|
+
elif self.search_type == SearchType.hybrid:
|
|
575
|
+
search_results = self._hybrid_search(query, limit, filters)
|
|
576
|
+
else:
|
|
577
|
+
logger.error(f"Invalid search type '{self.search_type}'.")
|
|
578
|
+
return []
|
|
579
|
+
|
|
580
|
+
if self.reranker and search_results:
|
|
581
|
+
try:
|
|
582
|
+
search_results = self.reranker.rerank(query=query, documents=search_results)
|
|
583
|
+
except Exception as e:
|
|
584
|
+
log_warning(f"Reranker failed, returning unranked results: {e}")
|
|
585
|
+
|
|
586
|
+
log_info(f"Found {len(search_results)} documents")
|
|
587
|
+
return search_results
|
|
588
|
+
|
|
589
|
+
def _vector_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
|
|
590
|
+
"""Perform pure vector similarity search.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
query (str): Query to search for.
|
|
594
|
+
limit (int): Number of results to return.
|
|
595
|
+
filters (Optional[Dict[str, Any]]): Metadata filters to apply.
|
|
596
|
+
|
|
597
|
+
Returns:
|
|
598
|
+
List[Document]: List of search results.
|
|
599
|
+
"""
|
|
505
600
|
query_embedding = self.embedder.get_embedding(query)
|
|
506
601
|
if query_embedding is None:
|
|
507
602
|
logger.error(f"Error getting embedding for Query: {query}")
|
|
@@ -516,11 +611,248 @@ class ChromaDb(VectorDb):
|
|
|
516
611
|
result: QueryResult = self._collection.query(
|
|
517
612
|
query_embeddings=query_embedding,
|
|
518
613
|
n_results=limit,
|
|
519
|
-
where=where_filter,
|
|
614
|
+
where=where_filter,
|
|
520
615
|
include=["metadatas", "documents", "embeddings", "distances", "uris"],
|
|
521
616
|
)
|
|
522
617
|
|
|
523
|
-
|
|
618
|
+
return self._build_search_results(result)
|
|
619
|
+
|
|
620
|
+
def _keyword_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
|
|
621
|
+
"""Perform keyword-based search using document content filtering.
|
|
622
|
+
|
|
623
|
+
This uses ChromaDB's where_document filter with $contains operator
|
|
624
|
+
for basic full-text search functionality.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
query (str): Query to search for (keywords to match in document content).
|
|
628
|
+
limit (int): Number of results to return.
|
|
629
|
+
filters (Optional[Dict[str, Any]]): Metadata filters to apply.
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
List[Document]: List of search results.
|
|
633
|
+
"""
|
|
634
|
+
if not self._collection:
|
|
635
|
+
self._collection = self.client.get_collection(name=self.collection_name)
|
|
636
|
+
|
|
637
|
+
# Convert simple filters to ChromaDB's format if needed
|
|
638
|
+
where_filter = self._convert_filters(filters) if filters else None
|
|
639
|
+
|
|
640
|
+
# Get first significant word for $contains filter
|
|
641
|
+
query_words = query.split()
|
|
642
|
+
if not query_words:
|
|
643
|
+
return []
|
|
644
|
+
|
|
645
|
+
# Use where_document to filter by document content
|
|
646
|
+
where_document: Dict[str, Any] = {"$contains": query_words[0]}
|
|
647
|
+
|
|
648
|
+
try:
|
|
649
|
+
# Get documents matching the keyword filter
|
|
650
|
+
result = self._collection.get(
|
|
651
|
+
where=where_filter,
|
|
652
|
+
where_document=cast(Any, where_document),
|
|
653
|
+
limit=limit,
|
|
654
|
+
include=["metadatas", "documents", "embeddings"],
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
return self._build_get_results(cast(Dict[str, Any], result), query)
|
|
658
|
+
except Exception as e:
|
|
659
|
+
logger.error(f"Error in keyword search: {e}")
|
|
660
|
+
return []
|
|
661
|
+
|
|
662
|
+
def _hybrid_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
|
|
663
|
+
"""Perform hybrid search combining vector similarity with full-text search using RRF.
|
|
664
|
+
|
|
665
|
+
This method combines:
|
|
666
|
+
1. Dense vector similarity search (semantic search)
|
|
667
|
+
2. Full-text search (keyword/lexical search)
|
|
668
|
+
|
|
669
|
+
Results are fused using Reciprocal Rank Fusion (RRF) for optimal ranking.
|
|
670
|
+
|
|
671
|
+
Args:
|
|
672
|
+
query (str): Query to search for.
|
|
673
|
+
limit (int): Number of results to return.
|
|
674
|
+
filters (Optional[Dict[str, Any]]): Metadata filters to apply.
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
List[Document]: List of search results with RRF-fused ranking.
|
|
678
|
+
"""
|
|
679
|
+
query_embedding = self.embedder.get_embedding(query)
|
|
680
|
+
if query_embedding is None:
|
|
681
|
+
logger.error(f"Error getting embedding for Query: {query}")
|
|
682
|
+
return []
|
|
683
|
+
|
|
684
|
+
if not self._collection:
|
|
685
|
+
self._collection = self.client.get_collection(name=self.collection_name)
|
|
686
|
+
|
|
687
|
+
# Convert simple filters to ChromaDB's format if needed
|
|
688
|
+
where_filter = self._convert_filters(filters) if filters else None
|
|
689
|
+
|
|
690
|
+
# Fetch more candidates than needed for better fusion
|
|
691
|
+
fetch_k = min(limit * 3, 100)
|
|
692
|
+
|
|
693
|
+
def dense_vector_similarity_search() -> List[Tuple[str, float]]:
|
|
694
|
+
"""Dense vector similarity search."""
|
|
695
|
+
try:
|
|
696
|
+
results = self._collection.query( # type: ignore
|
|
697
|
+
query_embeddings=query_embedding,
|
|
698
|
+
n_results=fetch_k,
|
|
699
|
+
where=where_filter,
|
|
700
|
+
include=["documents", "metadatas", "distances"],
|
|
701
|
+
)
|
|
702
|
+
|
|
703
|
+
ranked: List[Tuple[str, float]] = []
|
|
704
|
+
if results.get("ids") and results["ids"][0]:
|
|
705
|
+
for i, doc_id in enumerate(results["ids"][0]):
|
|
706
|
+
distance = results["distances"][0][i] if results.get("distances") else 0 # type: ignore
|
|
707
|
+
# Convert distance to similarity score (lower distance = higher score)
|
|
708
|
+
score = 1.0 / (1.0 + distance)
|
|
709
|
+
ranked.append((doc_id, score))
|
|
710
|
+
return ranked
|
|
711
|
+
except Exception as e:
|
|
712
|
+
log_error(f"Error in vector search component: {e}")
|
|
713
|
+
return []
|
|
714
|
+
|
|
715
|
+
def fts_search() -> List[Tuple[str, float]]:
|
|
716
|
+
"""Full-text search using ChromaDB's where_document filter."""
|
|
717
|
+
try:
|
|
718
|
+
query_words = query.split()
|
|
719
|
+
if not query_words:
|
|
720
|
+
return []
|
|
721
|
+
|
|
722
|
+
# Use first word for $contains filter
|
|
723
|
+
fts_where_document: Dict[str, Any] = {"$contains": query_words[0]}
|
|
724
|
+
|
|
725
|
+
results = self._collection.query( # type: ignore
|
|
726
|
+
query_embeddings=query_embedding,
|
|
727
|
+
n_results=fetch_k,
|
|
728
|
+
where=where_filter,
|
|
729
|
+
where_document=cast(Any, fts_where_document),
|
|
730
|
+
include=["documents", "metadatas", "distances"],
|
|
731
|
+
)
|
|
732
|
+
|
|
733
|
+
ranked: List[Tuple[str, float]] = []
|
|
734
|
+
if results.get("ids") and results["ids"][0]:
|
|
735
|
+
for i, doc_id in enumerate(results["ids"][0]):
|
|
736
|
+
# Score based on term overlap (simple BM25-like scoring)
|
|
737
|
+
doc = results["documents"][0][i] if results.get("documents") else "" # type: ignore
|
|
738
|
+
query_terms = set(query.lower().split())
|
|
739
|
+
doc_terms = set(doc.lower().split()) if doc else set()
|
|
740
|
+
overlap = len(query_terms & doc_terms)
|
|
741
|
+
score = overlap / max(len(query_terms), 1)
|
|
742
|
+
ranked.append((doc_id, score))
|
|
743
|
+
|
|
744
|
+
# Sort by score descending
|
|
745
|
+
ranked.sort(key=lambda x: x[1], reverse=True)
|
|
746
|
+
return ranked
|
|
747
|
+
except Exception as e:
|
|
748
|
+
log_error(f"Error in FTS search component: {e}")
|
|
749
|
+
return []
|
|
750
|
+
|
|
751
|
+
# Execute searches in parallel for better performance
|
|
752
|
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
|
753
|
+
vector_future = executor.submit(dense_vector_similarity_search)
|
|
754
|
+
fts_future = executor.submit(fts_search)
|
|
755
|
+
|
|
756
|
+
vector_results = vector_future.result()
|
|
757
|
+
fts_results = fts_future.result()
|
|
758
|
+
|
|
759
|
+
# Apply RRF fusion
|
|
760
|
+
fused_ranking = reciprocal_rank_fusion(
|
|
761
|
+
[vector_results, fts_results],
|
|
762
|
+
k=self.hybrid_rrf_k,
|
|
763
|
+
)
|
|
764
|
+
|
|
765
|
+
# Get top IDs from fused ranking
|
|
766
|
+
top_ids = [doc_id for doc_id, _ in fused_ranking[:limit]]
|
|
767
|
+
|
|
768
|
+
if not top_ids:
|
|
769
|
+
return []
|
|
770
|
+
|
|
771
|
+
# Fetch full document data for top results
|
|
772
|
+
try:
|
|
773
|
+
full_results = self._collection.get(
|
|
774
|
+
ids=top_ids,
|
|
775
|
+
include=["documents", "metadatas", "embeddings"],
|
|
776
|
+
)
|
|
777
|
+
except Exception as e:
|
|
778
|
+
log_error(f"Error fetching full results: {e}")
|
|
779
|
+
return []
|
|
780
|
+
|
|
781
|
+
# Build lookup dict for results
|
|
782
|
+
doc_lookup: Dict[str, Dict[str, Any]] = {}
|
|
783
|
+
result_ids = full_results.get("ids", [])
|
|
784
|
+
result_docs = full_results.get("documents")
|
|
785
|
+
result_metas = full_results.get("metadatas")
|
|
786
|
+
result_embeds = full_results.get("embeddings")
|
|
787
|
+
|
|
788
|
+
for i, doc_id in enumerate(result_ids if result_ids is not None else []):
|
|
789
|
+
doc_lookup[doc_id] = {
|
|
790
|
+
"document": result_docs[i] if result_docs is not None and i < len(result_docs) else None,
|
|
791
|
+
"metadata": result_metas[i] if result_metas is not None and i < len(result_metas) else None,
|
|
792
|
+
"embedding": result_embeds[i] if result_embeds is not None and i < len(result_embeds) else None,
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
# Build final results in fused ranking order
|
|
796
|
+
search_results: List[Document] = []
|
|
797
|
+
rrf_scores = dict(fused_ranking)
|
|
798
|
+
|
|
799
|
+
for doc_id in top_ids:
|
|
800
|
+
if doc_id not in doc_lookup:
|
|
801
|
+
continue
|
|
802
|
+
|
|
803
|
+
doc_data = doc_lookup[doc_id]
|
|
804
|
+
doc_metadata = dict(doc_data["metadata"]) if doc_data["metadata"] else {}
|
|
805
|
+
|
|
806
|
+
# Add RRF score to metadata
|
|
807
|
+
doc_metadata["rrf_score"] = rrf_scores.get(doc_id, 0.0)
|
|
808
|
+
|
|
809
|
+
# Extract the fields we added to metadata
|
|
810
|
+
name_val = doc_metadata.pop("name", None)
|
|
811
|
+
content_id_val = doc_metadata.pop("content_id", None)
|
|
812
|
+
|
|
813
|
+
# Convert types to match Document constructor expectations
|
|
814
|
+
name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
|
|
815
|
+
content_id = (
|
|
816
|
+
str(content_id_val)
|
|
817
|
+
if content_id_val is not None and not isinstance(content_id_val, str)
|
|
818
|
+
else content_id_val
|
|
819
|
+
)
|
|
820
|
+
content = str(doc_data["document"]) if doc_data["document"] is not None else ""
|
|
821
|
+
|
|
822
|
+
# Process embedding
|
|
823
|
+
embedding = None
|
|
824
|
+
if doc_data["embedding"] is not None:
|
|
825
|
+
embed_data = doc_data["embedding"]
|
|
826
|
+
if hasattr(embed_data, "tolist") and callable(getattr(embed_data, "tolist", None)):
|
|
827
|
+
try:
|
|
828
|
+
embedding = list(cast(Any, embed_data).tolist())
|
|
829
|
+
except (AttributeError, TypeError):
|
|
830
|
+
embedding = list(embed_data) if isinstance(embed_data, (list, tuple)) else None
|
|
831
|
+
elif isinstance(embed_data, (list, tuple)):
|
|
832
|
+
embedding = [float(x) for x in embed_data if isinstance(x, (int, float))]
|
|
833
|
+
|
|
834
|
+
search_results.append(
|
|
835
|
+
Document(
|
|
836
|
+
id=doc_id,
|
|
837
|
+
name=name,
|
|
838
|
+
meta_data=doc_metadata,
|
|
839
|
+
content=content,
|
|
840
|
+
embedding=embedding,
|
|
841
|
+
content_id=content_id,
|
|
842
|
+
)
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
return search_results
|
|
846
|
+
|
|
847
|
+
def _build_search_results(self, result: QueryResult) -> List[Document]:
|
|
848
|
+
"""Build Document list from ChromaDB QueryResult.
|
|
849
|
+
|
|
850
|
+
Args:
|
|
851
|
+
result: The QueryResult from ChromaDB query.
|
|
852
|
+
|
|
853
|
+
Returns:
|
|
854
|
+
List[Document]: List of Document objects.
|
|
855
|
+
"""
|
|
524
856
|
search_results: List[Document] = []
|
|
525
857
|
|
|
526
858
|
ids_list = result.get("ids", [[]]) # type: ignore
|
|
@@ -529,13 +861,33 @@ class ChromaDb(VectorDb):
|
|
|
529
861
|
embeddings_list = result.get("embeddings") # type: ignore
|
|
530
862
|
distances_list = result.get("distances", [[]]) # type: ignore
|
|
531
863
|
|
|
532
|
-
|
|
864
|
+
# Check if we have valid results - handle numpy arrays carefully
|
|
865
|
+
if ids_list is None or len(ids_list) == 0:
|
|
866
|
+
return search_results
|
|
867
|
+
if metadata_list is None or len(metadata_list) == 0:
|
|
868
|
+
return search_results
|
|
869
|
+
if documents_list is None or len(documents_list) == 0:
|
|
870
|
+
return search_results
|
|
871
|
+
if distances_list is None or len(distances_list) == 0:
|
|
533
872
|
return search_results
|
|
534
873
|
|
|
535
874
|
ids = ids_list[0]
|
|
536
875
|
metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
|
|
537
876
|
documents = documents_list[0]
|
|
538
|
-
|
|
877
|
+
|
|
878
|
+
# Handle embeddings - may be None or numpy array
|
|
879
|
+
embeddings_raw: Any = []
|
|
880
|
+
if embeddings_list is not None:
|
|
881
|
+
try:
|
|
882
|
+
if len(embeddings_list) > 0:
|
|
883
|
+
embeddings_raw = embeddings_list[0]
|
|
884
|
+
except (TypeError, ValueError):
|
|
885
|
+
# numpy array truth value issue - try direct access
|
|
886
|
+
try:
|
|
887
|
+
embeddings_raw = embeddings_list[0]
|
|
888
|
+
except Exception:
|
|
889
|
+
embeddings_raw = []
|
|
890
|
+
|
|
539
891
|
embeddings = []
|
|
540
892
|
for e in embeddings_raw:
|
|
541
893
|
if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
|
|
@@ -549,7 +901,8 @@ class ChromaDb(VectorDb):
|
|
|
549
901
|
embeddings.append([float(e)])
|
|
550
902
|
else:
|
|
551
903
|
embeddings.append([])
|
|
552
|
-
|
|
904
|
+
|
|
905
|
+
distances = distances_list[0] if len(distances_list) > 0 else []
|
|
553
906
|
|
|
554
907
|
for idx, distance in enumerate(distances):
|
|
555
908
|
if idx < len(metadata):
|
|
@@ -582,12 +935,95 @@ class ChromaDb(VectorDb):
|
|
|
582
935
|
)
|
|
583
936
|
)
|
|
584
937
|
except Exception as e:
|
|
585
|
-
|
|
938
|
+
log_error(f"Error building search results: {e}")
|
|
586
939
|
|
|
587
|
-
|
|
588
|
-
|
|
940
|
+
return search_results
|
|
941
|
+
|
|
942
|
+
def _build_get_results(self, result: Dict[str, Any], query: str = "") -> List[Document]:
|
|
943
|
+
"""Build Document list from ChromaDB GetResult.
|
|
944
|
+
|
|
945
|
+
Args:
|
|
946
|
+
result: The GetResult from ChromaDB get.
|
|
947
|
+
query: The original query for scoring.
|
|
948
|
+
|
|
949
|
+
Returns:
|
|
950
|
+
List[Document]: List of Document objects.
|
|
951
|
+
"""
|
|
952
|
+
search_results: List[Document] = []
|
|
953
|
+
|
|
954
|
+
ids = result.get("ids", [])
|
|
955
|
+
metadatas = result.get("metadatas", [])
|
|
956
|
+
documents = result.get("documents", [])
|
|
957
|
+
embeddings_raw = result.get("embeddings")
|
|
958
|
+
|
|
959
|
+
# Check ids safely (may be numpy array)
|
|
960
|
+
if ids is None:
|
|
961
|
+
return search_results
|
|
962
|
+
try:
|
|
963
|
+
if len(ids) == 0:
|
|
964
|
+
return search_results
|
|
965
|
+
except (TypeError, ValueError):
|
|
966
|
+
return search_results
|
|
967
|
+
|
|
968
|
+
embeddings = []
|
|
969
|
+
# Handle embeddings - may be None or numpy array
|
|
970
|
+
if embeddings_raw is not None:
|
|
971
|
+
try:
|
|
972
|
+
for e in embeddings_raw:
|
|
973
|
+
if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
|
|
974
|
+
try:
|
|
975
|
+
embeddings.append(list(cast(Any, e).tolist()))
|
|
976
|
+
except (AttributeError, TypeError):
|
|
977
|
+
embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
|
|
978
|
+
elif isinstance(e, (list, tuple)):
|
|
979
|
+
embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
|
|
980
|
+
elif isinstance(e, (int, float)):
|
|
981
|
+
embeddings.append([float(e)])
|
|
982
|
+
else:
|
|
983
|
+
embeddings.append([])
|
|
984
|
+
except (TypeError, ValueError):
|
|
985
|
+
# numpy array iteration issue
|
|
986
|
+
embeddings = []
|
|
987
|
+
|
|
988
|
+
try:
|
|
989
|
+
for idx, id_ in enumerate(ids):
|
|
990
|
+
doc_metadata = dict(metadatas[idx]) if metadatas and idx < len(metadatas) and metadatas[idx] else {}
|
|
991
|
+
document = documents[idx] if documents and idx < len(documents) else ""
|
|
992
|
+
|
|
993
|
+
# Calculate simple keyword score if query provided
|
|
994
|
+
if query and document:
|
|
995
|
+
query_terms = set(query.lower().split())
|
|
996
|
+
doc_terms = set(document.lower().split())
|
|
997
|
+
overlap = len(query_terms & doc_terms)
|
|
998
|
+
doc_metadata["keyword_score"] = overlap / max(len(query_terms), 1)
|
|
999
|
+
|
|
1000
|
+
# Extract the fields we added to metadata
|
|
1001
|
+
name_val = doc_metadata.pop("name", None)
|
|
1002
|
+
content_id_val = doc_metadata.pop("content_id", None)
|
|
1003
|
+
|
|
1004
|
+
# Convert types to match Document constructor expectations
|
|
1005
|
+
name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
|
|
1006
|
+
content_id = (
|
|
1007
|
+
str(content_id_val)
|
|
1008
|
+
if content_id_val is not None and not isinstance(content_id_val, str)
|
|
1009
|
+
else content_id_val
|
|
1010
|
+
)
|
|
1011
|
+
content = str(document) if document is not None else ""
|
|
1012
|
+
embedding = embeddings[idx] if idx < len(embeddings) else None
|
|
1013
|
+
|
|
1014
|
+
search_results.append(
|
|
1015
|
+
Document(
|
|
1016
|
+
id=id_,
|
|
1017
|
+
name=name,
|
|
1018
|
+
meta_data=doc_metadata,
|
|
1019
|
+
content=content,
|
|
1020
|
+
embedding=embedding,
|
|
1021
|
+
content_id=content_id,
|
|
1022
|
+
)
|
|
1023
|
+
)
|
|
1024
|
+
except Exception as e:
|
|
1025
|
+
log_error(f"Error building get results: {e}")
|
|
589
1026
|
|
|
590
|
-
log_info(f"Found {len(search_results)} documents")
|
|
591
1027
|
return search_results
|
|
592
1028
|
|
|
593
1029
|
def _convert_filters(self, filters: Dict[str, Any]) -> Dict[str, Any]:
|
|
@@ -892,7 +1328,7 @@ class ChromaDb(VectorDb):
|
|
|
892
1328
|
current_metadatas = []
|
|
893
1329
|
|
|
894
1330
|
if not ids:
|
|
895
|
-
|
|
1331
|
+
log_debug(f"No documents found with content_id: {content_id}")
|
|
896
1332
|
return
|
|
897
1333
|
|
|
898
1334
|
# Flatten the new metadata first
|
|
@@ -914,11 +1350,11 @@ class ChromaDb(VectorDb):
|
|
|
914
1350
|
chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
|
|
915
1351
|
chroma_metadatas = [{k: v for k, v in m.items() if k and v} for m in chroma_metadatas]
|
|
916
1352
|
collection.update(ids=ids, metadatas=chroma_metadatas) # type: ignore
|
|
917
|
-
|
|
1353
|
+
log_debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
|
|
918
1354
|
|
|
919
1355
|
except TypeError as te:
|
|
920
1356
|
if "object of type 'int' has no len()" in str(te):
|
|
921
|
-
|
|
1357
|
+
log_warning(
|
|
922
1358
|
f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
|
|
923
1359
|
)
|
|
924
1360
|
return
|
|
@@ -926,9 +1362,9 @@ class ChromaDb(VectorDb):
|
|
|
926
1362
|
raise te
|
|
927
1363
|
|
|
928
1364
|
except Exception as e:
|
|
929
|
-
|
|
1365
|
+
log_error(f"Error updating metadata for content_id '{content_id}': {e}")
|
|
930
1366
|
raise
|
|
931
1367
|
|
|
932
1368
|
def get_supported_search_types(self) -> List[str]:
|
|
933
1369
|
"""Get the supported search types for this vector database."""
|
|
934
|
-
return [
|
|
1370
|
+
return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
|
|
@@ -643,6 +643,13 @@ class PgVector(VectorDb):
|
|
|
643
643
|
base_id = doc.id or md5(cleaned_content.encode()).hexdigest()
|
|
644
644
|
record_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
|
|
645
645
|
|
|
646
|
+
if (
|
|
647
|
+
doc.embedding is not None
|
|
648
|
+
and isinstance(doc.embedding, list)
|
|
649
|
+
and len(doc.embedding) == 0
|
|
650
|
+
):
|
|
651
|
+
log_warning(f"Document {idx} '{doc.name}' has empty embedding (length 0)")
|
|
652
|
+
|
|
646
653
|
if (
|
|
647
654
|
doc.embedding is not None
|
|
648
655
|
and isinstance(doc.embedding, list)
|
agno/vectordb/redis/redisdb.py
CHANGED
|
@@ -477,7 +477,7 @@ class RedisDB(VectorDb):
|
|
|
477
477
|
vector_field_name="embedding",
|
|
478
478
|
text=query,
|
|
479
479
|
text_field_name="content",
|
|
480
|
-
|
|
480
|
+
linear_alpha=self.vector_score_weight,
|
|
481
481
|
return_fields=["id", "name", "content"],
|
|
482
482
|
num_results=limit,
|
|
483
483
|
)
|
agno/workflow/__init__.py
CHANGED
|
@@ -2,6 +2,7 @@ from agno.workflow.agent import WorkflowAgent
|
|
|
2
2
|
from agno.workflow.condition import Condition
|
|
3
3
|
from agno.workflow.loop import Loop
|
|
4
4
|
from agno.workflow.parallel import Parallel
|
|
5
|
+
from agno.workflow.remote import RemoteWorkflow
|
|
5
6
|
from agno.workflow.router import Router
|
|
6
7
|
from agno.workflow.step import Step
|
|
7
8
|
from agno.workflow.steps import Steps
|
|
@@ -11,6 +12,7 @@ from agno.workflow.workflow import Workflow
|
|
|
11
12
|
__all__ = [
|
|
12
13
|
"Workflow",
|
|
13
14
|
"WorkflowAgent",
|
|
15
|
+
"RemoteWorkflow",
|
|
14
16
|
"Steps",
|
|
15
17
|
"Step",
|
|
16
18
|
"Loop",
|
agno/workflow/agent.py
CHANGED
|
@@ -5,9 +5,9 @@ from typing import TYPE_CHECKING, Any, Callable, Optional
|
|
|
5
5
|
from agno.agent import Agent
|
|
6
6
|
from agno.models.base import Model
|
|
7
7
|
from agno.run import RunContext
|
|
8
|
-
from agno.workflow.types import WebSocketHandler
|
|
9
8
|
|
|
10
9
|
if TYPE_CHECKING:
|
|
10
|
+
from agno.os.managers import WebSocketHandler
|
|
11
11
|
from agno.session.workflow import WorkflowSession
|
|
12
12
|
from agno.workflow.types import WorkflowExecutionInput
|
|
13
13
|
|
|
@@ -190,7 +190,7 @@ Guidelines:
|
|
|
190
190
|
execution_input: "WorkflowExecutionInput",
|
|
191
191
|
run_context: RunContext,
|
|
192
192
|
stream: bool = False,
|
|
193
|
-
websocket_handler: Optional[WebSocketHandler] = None,
|
|
193
|
+
websocket_handler: Optional["WebSocketHandler"] = None,
|
|
194
194
|
) -> Callable:
|
|
195
195
|
"""
|
|
196
196
|
Create the async workflow execution tool that this agent can call.
|