agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +5540 -2273
- agno/api/api.py +2 -0
- agno/api/os.py +1 -1
- agno/compression/__init__.py +3 -0
- agno/compression/manager.py +247 -0
- agno/culture/__init__.py +3 -0
- agno/culture/manager.py +956 -0
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/base.py +689 -6
- agno/db/dynamo/dynamo.py +933 -37
- agno/db/dynamo/schemas.py +174 -10
- agno/db/dynamo/utils.py +63 -4
- agno/db/firestore/firestore.py +831 -9
- agno/db/firestore/schemas.py +51 -0
- agno/db/firestore/utils.py +102 -4
- agno/db/gcs_json/gcs_json_db.py +660 -12
- agno/db/gcs_json/utils.py +60 -26
- agno/db/in_memory/in_memory_db.py +287 -14
- agno/db/in_memory/utils.py +60 -2
- agno/db/json/json_db.py +590 -14
- agno/db/json/utils.py +60 -26
- agno/db/migrations/manager.py +199 -0
- agno/db/migrations/v1_to_v2.py +43 -13
- agno/db/migrations/versions/__init__.py +0 -0
- agno/db/migrations/versions/v2_3_0.py +938 -0
- agno/db/mongo/__init__.py +15 -1
- agno/db/mongo/async_mongo.py +2760 -0
- agno/db/mongo/mongo.py +879 -11
- agno/db/mongo/schemas.py +42 -0
- agno/db/mongo/utils.py +80 -8
- agno/db/mysql/__init__.py +2 -1
- agno/db/mysql/async_mysql.py +2912 -0
- agno/db/mysql/mysql.py +946 -68
- agno/db/mysql/schemas.py +72 -10
- agno/db/mysql/utils.py +198 -7
- agno/db/postgres/__init__.py +2 -1
- agno/db/postgres/async_postgres.py +2579 -0
- agno/db/postgres/postgres.py +942 -57
- agno/db/postgres/schemas.py +81 -18
- agno/db/postgres/utils.py +164 -2
- agno/db/redis/redis.py +671 -7
- agno/db/redis/schemas.py +50 -0
- agno/db/redis/utils.py +65 -7
- agno/db/schemas/__init__.py +2 -1
- agno/db/schemas/culture.py +120 -0
- agno/db/schemas/evals.py +1 -0
- agno/db/schemas/memory.py +17 -2
- agno/db/singlestore/schemas.py +63 -0
- agno/db/singlestore/singlestore.py +949 -83
- agno/db/singlestore/utils.py +60 -2
- agno/db/sqlite/__init__.py +2 -1
- agno/db/sqlite/async_sqlite.py +2911 -0
- agno/db/sqlite/schemas.py +62 -0
- agno/db/sqlite/sqlite.py +965 -46
- agno/db/sqlite/utils.py +169 -8
- agno/db/surrealdb/__init__.py +3 -0
- agno/db/surrealdb/metrics.py +292 -0
- agno/db/surrealdb/models.py +334 -0
- agno/db/surrealdb/queries.py +71 -0
- agno/db/surrealdb/surrealdb.py +1908 -0
- agno/db/surrealdb/utils.py +147 -0
- agno/db/utils.py +2 -0
- agno/eval/__init__.py +10 -0
- agno/eval/accuracy.py +75 -55
- agno/eval/agent_as_judge.py +861 -0
- agno/eval/base.py +29 -0
- agno/eval/performance.py +16 -7
- agno/eval/reliability.py +28 -16
- agno/eval/utils.py +35 -17
- agno/exceptions.py +27 -2
- agno/filters.py +354 -0
- agno/guardrails/prompt_injection.py +1 -0
- agno/hooks/__init__.py +3 -0
- agno/hooks/decorator.py +164 -0
- agno/integrations/discord/client.py +1 -1
- agno/knowledge/chunking/agentic.py +13 -10
- agno/knowledge/chunking/fixed.py +4 -1
- agno/knowledge/chunking/semantic.py +9 -4
- agno/knowledge/chunking/strategy.py +59 -15
- agno/knowledge/embedder/fastembed.py +1 -1
- agno/knowledge/embedder/nebius.py +1 -1
- agno/knowledge/embedder/ollama.py +8 -0
- agno/knowledge/embedder/openai.py +8 -8
- agno/knowledge/embedder/sentence_transformer.py +6 -2
- agno/knowledge/embedder/vllm.py +262 -0
- agno/knowledge/knowledge.py +1618 -318
- agno/knowledge/reader/base.py +6 -2
- agno/knowledge/reader/csv_reader.py +8 -10
- agno/knowledge/reader/docx_reader.py +5 -6
- agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
- agno/knowledge/reader/json_reader.py +5 -4
- agno/knowledge/reader/markdown_reader.py +8 -8
- agno/knowledge/reader/pdf_reader.py +17 -19
- agno/knowledge/reader/pptx_reader.py +101 -0
- agno/knowledge/reader/reader_factory.py +32 -3
- agno/knowledge/reader/s3_reader.py +3 -3
- agno/knowledge/reader/tavily_reader.py +193 -0
- agno/knowledge/reader/text_reader.py +22 -10
- agno/knowledge/reader/web_search_reader.py +1 -48
- agno/knowledge/reader/website_reader.py +10 -10
- agno/knowledge/reader/wikipedia_reader.py +33 -1
- agno/knowledge/types.py +1 -0
- agno/knowledge/utils.py +72 -7
- agno/media.py +22 -6
- agno/memory/__init__.py +14 -1
- agno/memory/manager.py +544 -83
- agno/memory/strategies/__init__.py +15 -0
- agno/memory/strategies/base.py +66 -0
- agno/memory/strategies/summarize.py +196 -0
- agno/memory/strategies/types.py +37 -0
- agno/models/aimlapi/aimlapi.py +17 -0
- agno/models/anthropic/claude.py +515 -40
- agno/models/aws/bedrock.py +102 -21
- agno/models/aws/claude.py +131 -274
- agno/models/azure/ai_foundry.py +41 -19
- agno/models/azure/openai_chat.py +39 -8
- agno/models/base.py +1249 -525
- agno/models/cerebras/cerebras.py +91 -21
- agno/models/cerebras/cerebras_openai.py +21 -2
- agno/models/cohere/chat.py +40 -6
- agno/models/cometapi/cometapi.py +18 -1
- agno/models/dashscope/dashscope.py +2 -3
- agno/models/deepinfra/deepinfra.py +18 -1
- agno/models/deepseek/deepseek.py +69 -3
- agno/models/fireworks/fireworks.py +18 -1
- agno/models/google/gemini.py +877 -80
- agno/models/google/utils.py +22 -0
- agno/models/groq/groq.py +51 -18
- agno/models/huggingface/huggingface.py +17 -6
- agno/models/ibm/watsonx.py +16 -6
- agno/models/internlm/internlm.py +18 -1
- agno/models/langdb/langdb.py +13 -1
- agno/models/litellm/chat.py +44 -9
- agno/models/litellm/litellm_openai.py +18 -1
- agno/models/message.py +28 -5
- agno/models/meta/llama.py +47 -14
- agno/models/meta/llama_openai.py +22 -17
- agno/models/mistral/mistral.py +8 -4
- agno/models/nebius/nebius.py +6 -7
- agno/models/nvidia/nvidia.py +20 -3
- agno/models/ollama/chat.py +24 -8
- agno/models/openai/chat.py +104 -29
- agno/models/openai/responses.py +101 -81
- agno/models/openrouter/openrouter.py +60 -3
- agno/models/perplexity/perplexity.py +17 -1
- agno/models/portkey/portkey.py +7 -6
- agno/models/requesty/requesty.py +24 -4
- agno/models/response.py +73 -2
- agno/models/sambanova/sambanova.py +20 -3
- agno/models/siliconflow/siliconflow.py +19 -2
- agno/models/together/together.py +20 -3
- agno/models/utils.py +254 -8
- agno/models/vercel/v0.py +20 -3
- agno/models/vertexai/__init__.py +0 -0
- agno/models/vertexai/claude.py +190 -0
- agno/models/vllm/vllm.py +19 -14
- agno/models/xai/xai.py +19 -2
- agno/os/app.py +549 -152
- agno/os/auth.py +190 -3
- agno/os/config.py +23 -0
- agno/os/interfaces/a2a/router.py +8 -11
- agno/os/interfaces/a2a/utils.py +1 -1
- agno/os/interfaces/agui/router.py +18 -3
- agno/os/interfaces/agui/utils.py +152 -39
- agno/os/interfaces/slack/router.py +55 -37
- agno/os/interfaces/slack/slack.py +9 -1
- agno/os/interfaces/whatsapp/router.py +0 -1
- agno/os/interfaces/whatsapp/security.py +3 -1
- agno/os/mcp.py +110 -52
- agno/os/middleware/__init__.py +2 -0
- agno/os/middleware/jwt.py +676 -112
- agno/os/router.py +40 -1478
- agno/os/routers/agents/__init__.py +3 -0
- agno/os/routers/agents/router.py +599 -0
- agno/os/routers/agents/schema.py +261 -0
- agno/os/routers/evals/evals.py +96 -39
- agno/os/routers/evals/schemas.py +65 -33
- agno/os/routers/evals/utils.py +80 -10
- agno/os/routers/health.py +10 -4
- agno/os/routers/knowledge/knowledge.py +196 -38
- agno/os/routers/knowledge/schemas.py +82 -22
- agno/os/routers/memory/memory.py +279 -52
- agno/os/routers/memory/schemas.py +46 -17
- agno/os/routers/metrics/metrics.py +20 -8
- agno/os/routers/metrics/schemas.py +16 -16
- agno/os/routers/session/session.py +462 -34
- agno/os/routers/teams/__init__.py +3 -0
- agno/os/routers/teams/router.py +512 -0
- agno/os/routers/teams/schema.py +257 -0
- agno/os/routers/traces/__init__.py +3 -0
- agno/os/routers/traces/schemas.py +414 -0
- agno/os/routers/traces/traces.py +499 -0
- agno/os/routers/workflows/__init__.py +3 -0
- agno/os/routers/workflows/router.py +624 -0
- agno/os/routers/workflows/schema.py +75 -0
- agno/os/schema.py +256 -693
- agno/os/scopes.py +469 -0
- agno/os/utils.py +514 -36
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/run/__init__.py +6 -0
- agno/run/agent.py +155 -32
- agno/run/base.py +55 -3
- agno/run/requirement.py +181 -0
- agno/run/team.py +125 -38
- agno/run/workflow.py +72 -18
- agno/session/agent.py +102 -89
- agno/session/summary.py +56 -15
- agno/session/team.py +164 -90
- agno/session/workflow.py +405 -40
- agno/table.py +10 -0
- agno/team/team.py +3974 -1903
- agno/tools/dalle.py +2 -4
- agno/tools/eleven_labs.py +23 -25
- agno/tools/exa.py +21 -16
- agno/tools/file.py +153 -23
- agno/tools/file_generation.py +16 -10
- agno/tools/firecrawl.py +15 -7
- agno/tools/function.py +193 -38
- agno/tools/gmail.py +238 -14
- agno/tools/google_drive.py +271 -0
- agno/tools/googlecalendar.py +36 -8
- agno/tools/googlesheets.py +20 -5
- agno/tools/jira.py +20 -0
- agno/tools/mcp/__init__.py +10 -0
- agno/tools/mcp/mcp.py +331 -0
- agno/tools/mcp/multi_mcp.py +347 -0
- agno/tools/mcp/params.py +24 -0
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/models/nebius.py +5 -5
- agno/tools/models_labs.py +20 -10
- agno/tools/nano_banana.py +151 -0
- agno/tools/notion.py +204 -0
- agno/tools/parallel.py +314 -0
- agno/tools/postgres.py +76 -36
- agno/tools/redshift.py +406 -0
- agno/tools/scrapegraph.py +1 -1
- agno/tools/shopify.py +1519 -0
- agno/tools/slack.py +18 -3
- agno/tools/spotify.py +919 -0
- agno/tools/tavily.py +146 -0
- agno/tools/toolkit.py +25 -0
- agno/tools/workflow.py +8 -1
- agno/tools/yfinance.py +12 -11
- agno/tracing/__init__.py +12 -0
- agno/tracing/exporter.py +157 -0
- agno/tracing/schemas.py +276 -0
- agno/tracing/setup.py +111 -0
- agno/utils/agent.py +938 -0
- agno/utils/cryptography.py +22 -0
- agno/utils/dttm.py +33 -0
- agno/utils/events.py +151 -3
- agno/utils/gemini.py +15 -5
- agno/utils/hooks.py +118 -4
- agno/utils/http.py +113 -2
- agno/utils/knowledge.py +12 -5
- agno/utils/log.py +1 -0
- agno/utils/mcp.py +92 -2
- agno/utils/media.py +187 -1
- agno/utils/merge_dict.py +3 -3
- agno/utils/message.py +60 -0
- agno/utils/models/ai_foundry.py +9 -2
- agno/utils/models/claude.py +49 -14
- agno/utils/models/cohere.py +9 -2
- agno/utils/models/llama.py +9 -2
- agno/utils/models/mistral.py +4 -2
- agno/utils/print_response/agent.py +109 -16
- agno/utils/print_response/team.py +223 -30
- agno/utils/print_response/workflow.py +251 -34
- agno/utils/streamlit.py +1 -1
- agno/utils/team.py +98 -9
- agno/utils/tokens.py +657 -0
- agno/vectordb/base.py +39 -7
- agno/vectordb/cassandra/cassandra.py +21 -5
- agno/vectordb/chroma/chromadb.py +43 -12
- agno/vectordb/clickhouse/clickhousedb.py +21 -5
- agno/vectordb/couchbase/couchbase.py +29 -5
- agno/vectordb/lancedb/lance_db.py +92 -181
- agno/vectordb/langchaindb/langchaindb.py +24 -4
- agno/vectordb/lightrag/lightrag.py +17 -3
- agno/vectordb/llamaindex/llamaindexdb.py +25 -5
- agno/vectordb/milvus/milvus.py +50 -37
- agno/vectordb/mongodb/__init__.py +7 -1
- agno/vectordb/mongodb/mongodb.py +36 -30
- agno/vectordb/pgvector/pgvector.py +201 -77
- agno/vectordb/pineconedb/pineconedb.py +41 -23
- agno/vectordb/qdrant/qdrant.py +67 -54
- agno/vectordb/redis/__init__.py +9 -0
- agno/vectordb/redis/redisdb.py +682 -0
- agno/vectordb/singlestore/singlestore.py +50 -29
- agno/vectordb/surrealdb/surrealdb.py +31 -41
- agno/vectordb/upstashdb/upstashdb.py +34 -6
- agno/vectordb/weaviate/weaviate.py +53 -14
- agno/workflow/__init__.py +2 -0
- agno/workflow/agent.py +299 -0
- agno/workflow/condition.py +120 -18
- agno/workflow/loop.py +77 -10
- agno/workflow/parallel.py +231 -143
- agno/workflow/router.py +118 -17
- agno/workflow/step.py +609 -170
- agno/workflow/steps.py +73 -6
- agno/workflow/types.py +96 -21
- agno/workflow/workflow.py +2039 -262
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
- agno-2.3.13.dist-info/RECORD +613 -0
- agno/tools/googlesearch.py +0 -98
- agno/tools/mcp.py +0 -679
- agno/tools/memori.py +0 -339
- agno-2.1.2.dist-info/RECORD +0 -543
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
2
|
from enum import Enum
|
|
3
|
-
from typing import List
|
|
3
|
+
from typing import List, Optional
|
|
4
4
|
|
|
5
5
|
from agno.knowledge.document.base import Document
|
|
6
6
|
|
|
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
|
|
|
60
60
|
"""Factory for creating chunking strategy instances."""
|
|
61
61
|
|
|
62
62
|
@classmethod
|
|
63
|
-
def create_strategy(
|
|
63
|
+
def create_strategy(
|
|
64
|
+
cls,
|
|
65
|
+
strategy_type: ChunkingStrategyType,
|
|
66
|
+
chunk_size: Optional[int] = None,
|
|
67
|
+
overlap: Optional[int] = None,
|
|
68
|
+
**kwargs,
|
|
69
|
+
) -> ChunkingStrategy:
|
|
64
70
|
"""Create an instance of the chunking strategy with the given parameters."""
|
|
65
71
|
strategy_map = {
|
|
66
72
|
ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
|
|
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
|
|
|
71
77
|
ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
|
|
72
78
|
ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
|
|
73
79
|
}
|
|
74
|
-
return strategy_map[strategy_type](**kwargs)
|
|
80
|
+
return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
|
|
75
81
|
|
|
76
82
|
@classmethod
|
|
77
|
-
def _create_agentic_chunking(
|
|
83
|
+
def _create_agentic_chunking(
|
|
84
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
85
|
+
) -> ChunkingStrategy:
|
|
78
86
|
from agno.knowledge.chunking.agentic import AgenticChunking
|
|
79
87
|
|
|
80
|
-
#
|
|
81
|
-
if
|
|
82
|
-
kwargs["max_chunk_size"] =
|
|
88
|
+
# AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
|
|
89
|
+
if chunk_size is not None:
|
|
90
|
+
kwargs["max_chunk_size"] = chunk_size
|
|
91
|
+
# Remove overlap since AgenticChunking doesn't support it
|
|
83
92
|
return AgenticChunking(**kwargs)
|
|
84
93
|
|
|
85
94
|
@classmethod
|
|
86
|
-
def _create_document_chunking(
|
|
95
|
+
def _create_document_chunking(
|
|
96
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
97
|
+
) -> ChunkingStrategy:
|
|
87
98
|
from agno.knowledge.chunking.document import DocumentChunking
|
|
88
99
|
|
|
100
|
+
# DocumentChunking accepts both chunk_size and overlap
|
|
101
|
+
if chunk_size is not None:
|
|
102
|
+
kwargs["chunk_size"] = chunk_size
|
|
103
|
+
if overlap is not None:
|
|
104
|
+
kwargs["overlap"] = overlap
|
|
89
105
|
return DocumentChunking(**kwargs)
|
|
90
106
|
|
|
91
107
|
@classmethod
|
|
92
|
-
def _create_recursive_chunking(
|
|
108
|
+
def _create_recursive_chunking(
|
|
109
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
110
|
+
) -> ChunkingStrategy:
|
|
93
111
|
from agno.knowledge.chunking.recursive import RecursiveChunking
|
|
94
112
|
|
|
113
|
+
# RecursiveChunking accepts both chunk_size and overlap
|
|
114
|
+
if chunk_size is not None:
|
|
115
|
+
kwargs["chunk_size"] = chunk_size
|
|
116
|
+
if overlap is not None:
|
|
117
|
+
kwargs["overlap"] = overlap
|
|
95
118
|
return RecursiveChunking(**kwargs)
|
|
96
119
|
|
|
97
120
|
@classmethod
|
|
98
|
-
def _create_semantic_chunking(
|
|
121
|
+
def _create_semantic_chunking(
|
|
122
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
123
|
+
) -> ChunkingStrategy:
|
|
99
124
|
from agno.knowledge.chunking.semantic import SemanticChunking
|
|
100
125
|
|
|
126
|
+
# SemanticChunking accepts chunk_size but not overlap
|
|
127
|
+
if chunk_size is not None:
|
|
128
|
+
kwargs["chunk_size"] = chunk_size
|
|
129
|
+
# Remove overlap since SemanticChunking doesn't support it
|
|
101
130
|
return SemanticChunking(**kwargs)
|
|
102
131
|
|
|
103
132
|
@classmethod
|
|
104
|
-
def _create_fixed_chunking(
|
|
133
|
+
def _create_fixed_chunking(
|
|
134
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
135
|
+
) -> ChunkingStrategy:
|
|
105
136
|
from agno.knowledge.chunking.fixed import FixedSizeChunking
|
|
106
137
|
|
|
138
|
+
# FixedSizeChunking accepts both chunk_size and overlap
|
|
139
|
+
if chunk_size is not None:
|
|
140
|
+
kwargs["chunk_size"] = chunk_size
|
|
141
|
+
if overlap is not None:
|
|
142
|
+
kwargs["overlap"] = overlap
|
|
107
143
|
return FixedSizeChunking(**kwargs)
|
|
108
144
|
|
|
109
145
|
@classmethod
|
|
110
|
-
def _create_row_chunking(
|
|
146
|
+
def _create_row_chunking(
|
|
147
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
148
|
+
) -> ChunkingStrategy:
|
|
111
149
|
from agno.knowledge.chunking.row import RowChunking
|
|
112
150
|
|
|
113
|
-
#
|
|
114
|
-
kwargs.pop("chunk_size", None)
|
|
151
|
+
# RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
|
|
115
152
|
return RowChunking(**kwargs)
|
|
116
153
|
|
|
117
154
|
@classmethod
|
|
118
|
-
def _create_markdown_chunking(
|
|
155
|
+
def _create_markdown_chunking(
|
|
156
|
+
cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
|
|
157
|
+
) -> ChunkingStrategy:
|
|
119
158
|
from agno.knowledge.chunking.markdown import MarkdownChunking
|
|
120
159
|
|
|
160
|
+
# MarkdownChunking accepts both chunk_size and overlap
|
|
161
|
+
if chunk_size is not None:
|
|
162
|
+
kwargs["chunk_size"] = chunk_size
|
|
163
|
+
if overlap is not None:
|
|
164
|
+
kwargs["overlap"] = overlap
|
|
121
165
|
return MarkdownChunking(**kwargs)
|
|
@@ -23,7 +23,7 @@ class FastEmbedEmbedder(Embedder):
|
|
|
23
23
|
"""Using BAAI/bge-small-en-v1.5 model, more models available: https://qdrant.github.io/fastembed/examples/Supported_Models/"""
|
|
24
24
|
|
|
25
25
|
id: str = "BAAI/bge-small-en-v1.5"
|
|
26
|
-
dimensions: int = 384
|
|
26
|
+
dimensions: Optional[int] = 384
|
|
27
27
|
|
|
28
28
|
def get_embedding(self, text: str) -> List[float]:
|
|
29
29
|
model = TextEmbedding(model_name=self.id)
|
|
@@ -85,6 +85,10 @@ class OllamaEmbedder(Embedder):
|
|
|
85
85
|
if self.options is not None:
|
|
86
86
|
kwargs["options"] = self.options
|
|
87
87
|
|
|
88
|
+
# Add dimensions parameter for models that support it
|
|
89
|
+
if self.dimensions is not None:
|
|
90
|
+
kwargs["dimensions"] = self.dimensions
|
|
91
|
+
|
|
88
92
|
response = self.client.embed(input=text, model=self.id, **kwargs)
|
|
89
93
|
if response and "embeddings" in response:
|
|
90
94
|
embeddings = response["embeddings"]
|
|
@@ -117,6 +121,10 @@ class OllamaEmbedder(Embedder):
|
|
|
117
121
|
if self.options is not None:
|
|
118
122
|
kwargs["options"] = self.options
|
|
119
123
|
|
|
124
|
+
# Add dimensions parameter for models that support it
|
|
125
|
+
if self.dimensions is not None:
|
|
126
|
+
kwargs["dimensions"] = self.dimensions
|
|
127
|
+
|
|
120
128
|
response = await self.aclient.embed(input=text, model=self.id, **kwargs)
|
|
121
129
|
if response and "embeddings" in response:
|
|
122
130
|
embeddings = response["embeddings"]
|
|
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
4
4
|
from typing_extensions import Literal
|
|
5
5
|
|
|
6
6
|
from agno.knowledge.embedder.base import Embedder
|
|
7
|
-
from agno.utils.log import
|
|
7
|
+
from agno.utils.log import log_info, log_warning
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
10
|
from openai import AsyncOpenAI
|
|
@@ -82,7 +82,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
82
82
|
response: CreateEmbeddingResponse = self.response(text=text)
|
|
83
83
|
return response.data[0].embedding
|
|
84
84
|
except Exception as e:
|
|
85
|
-
|
|
85
|
+
log_warning(e)
|
|
86
86
|
return []
|
|
87
87
|
|
|
88
88
|
def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
|
|
@@ -95,7 +95,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
95
95
|
return embedding, usage.model_dump()
|
|
96
96
|
return embedding, None
|
|
97
97
|
except Exception as e:
|
|
98
|
-
|
|
98
|
+
log_warning(e)
|
|
99
99
|
return [], None
|
|
100
100
|
|
|
101
101
|
async def async_get_embedding(self, text: str) -> List[float]:
|
|
@@ -115,7 +115,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
115
115
|
response: CreateEmbeddingResponse = await self.aclient.embeddings.create(**req)
|
|
116
116
|
return response.data[0].embedding
|
|
117
117
|
except Exception as e:
|
|
118
|
-
|
|
118
|
+
log_warning(e)
|
|
119
119
|
return []
|
|
120
120
|
|
|
121
121
|
async def async_get_embedding_and_usage(self, text: str):
|
|
@@ -137,7 +137,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
137
137
|
usage = response.usage
|
|
138
138
|
return embedding, usage.model_dump() if usage else None
|
|
139
139
|
except Exception as e:
|
|
140
|
-
|
|
140
|
+
log_warning(f"Error getting embedding: {e}")
|
|
141
141
|
return [], None
|
|
142
142
|
|
|
143
143
|
async def async_get_embeddings_batch_and_usage(
|
|
@@ -154,7 +154,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
154
154
|
"""
|
|
155
155
|
all_embeddings = []
|
|
156
156
|
all_usage = []
|
|
157
|
-
|
|
157
|
+
log_info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
|
|
158
158
|
|
|
159
159
|
for i in range(0, len(texts), self.batch_size):
|
|
160
160
|
batch_texts = texts[i : i + self.batch_size]
|
|
@@ -180,7 +180,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
180
180
|
usage_dict = response.usage.model_dump() if response.usage else None
|
|
181
181
|
all_usage.extend([usage_dict] * len(batch_embeddings))
|
|
182
182
|
except Exception as e:
|
|
183
|
-
|
|
183
|
+
log_warning(f"Error in async batch embedding: {e}")
|
|
184
184
|
# Fallback to individual calls for this batch
|
|
185
185
|
for text in batch_texts:
|
|
186
186
|
try:
|
|
@@ -188,7 +188,7 @@ class OpenAIEmbedder(Embedder):
|
|
|
188
188
|
all_embeddings.append(embedding)
|
|
189
189
|
all_usage.append(usage)
|
|
190
190
|
except Exception as e2:
|
|
191
|
-
|
|
191
|
+
log_warning(f"Error in individual async embedding fallback: {e2}")
|
|
192
192
|
all_embeddings.append([])
|
|
193
193
|
all_usage.append(None)
|
|
194
194
|
|
|
@@ -25,10 +25,14 @@ class SentenceTransformerEmbedder(Embedder):
|
|
|
25
25
|
prompt: Optional[str] = None
|
|
26
26
|
normalize_embeddings: bool = False
|
|
27
27
|
|
|
28
|
-
def
|
|
29
|
-
|
|
28
|
+
def __post_init__(self):
|
|
29
|
+
# Initialize the SentenceTransformer model eagerly to avoid race conditions in async contexts
|
|
30
|
+
if self.sentence_transformer_client is None:
|
|
30
31
|
self.sentence_transformer_client = SentenceTransformer(model_name_or_path=self.id)
|
|
31
32
|
|
|
33
|
+
def get_embedding(self, text: Union[str, List[str]]) -> List[float]:
|
|
34
|
+
if self.sentence_transformer_client is None:
|
|
35
|
+
raise RuntimeError("SentenceTransformer model not initialized")
|
|
32
36
|
model = self.sentence_transformer_client
|
|
33
37
|
embedding = model.encode(text, prompt=self.prompt, normalize_embeddings=self.normalize_embeddings)
|
|
34
38
|
try:
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from os import getenv
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from agno.knowledge.embedder.base import Embedder
|
|
7
|
+
from agno.utils.log import logger
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
from vllm import LLM # type: ignore
|
|
11
|
+
from vllm.outputs import EmbeddingRequestOutput # type: ignore
|
|
12
|
+
except ImportError:
|
|
13
|
+
raise ImportError("`vllm` not installed. Please install using `pip install vllm`.")
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from openai import AsyncOpenAI
|
|
17
|
+
from openai import OpenAI as OpenAIClient
|
|
18
|
+
from openai.types.create_embedding_response import CreateEmbeddingResponse
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class VLLMEmbedder(Embedder):
|
|
23
|
+
"""
|
|
24
|
+
VLLM Embedder supporting both local and remote deployment modes.
|
|
25
|
+
|
|
26
|
+
Local Mode (default):
|
|
27
|
+
- Loads model locally and runs inference on your GPU/CPU
|
|
28
|
+
- No API key required
|
|
29
|
+
- Example: VLLMEmbedder(id="intfloat/e5-mistral-7b-instruct")
|
|
30
|
+
|
|
31
|
+
Remote Mode:
|
|
32
|
+
- Connects to a remote vLLM server via OpenAI-compatible API
|
|
33
|
+
- Uses OpenAI SDK to communicate with vLLM's OpenAI-compatible endpoint
|
|
34
|
+
- Requires base_url and optionally api_key
|
|
35
|
+
- Example: VLLMEmbedder(base_url="http://localhost:8000/v1", api_key="your-key")
|
|
36
|
+
- Ref: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
id: str = "sentence-transformers/all-MiniLM-L6-v2"
|
|
40
|
+
dimensions: int = 4096
|
|
41
|
+
# Local mode parameters
|
|
42
|
+
enforce_eager: bool = True
|
|
43
|
+
vllm_kwargs: Optional[Dict[str, Any]] = None
|
|
44
|
+
vllm_client: Optional[LLM] = None
|
|
45
|
+
# Remote mode parameters
|
|
46
|
+
api_key: Optional[str] = getenv("VLLM_API_KEY")
|
|
47
|
+
base_url: Optional[str] = None
|
|
48
|
+
request_params: Optional[Dict[str, Any]] = None
|
|
49
|
+
client_params: Optional[Dict[str, Any]] = None
|
|
50
|
+
remote_client: Optional["OpenAIClient"] = None # OpenAI-compatible client for vLLM server
|
|
51
|
+
async_remote_client: Optional["AsyncOpenAI"] = None # Async OpenAI-compatible client for vLLM server
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def is_remote(self) -> bool:
|
|
55
|
+
"""Determine if we should use remote mode."""
|
|
56
|
+
return self.base_url is not None
|
|
57
|
+
|
|
58
|
+
def _get_vllm_client(self) -> LLM:
|
|
59
|
+
"""Get local VLLM client."""
|
|
60
|
+
if self.vllm_client:
|
|
61
|
+
return self.vllm_client
|
|
62
|
+
|
|
63
|
+
_vllm_params: Dict[str, Any] = {
|
|
64
|
+
"model": self.id,
|
|
65
|
+
"task": "embed",
|
|
66
|
+
"enforce_eager": self.enforce_eager,
|
|
67
|
+
}
|
|
68
|
+
if self.vllm_kwargs:
|
|
69
|
+
_vllm_params.update(self.vllm_kwargs)
|
|
70
|
+
self.vllm_client = LLM(**_vllm_params)
|
|
71
|
+
return self.vllm_client
|
|
72
|
+
|
|
73
|
+
def _get_remote_client(self) -> "OpenAIClient":
|
|
74
|
+
"""Get OpenAI-compatible client for remote vLLM server."""
|
|
75
|
+
if self.remote_client:
|
|
76
|
+
return self.remote_client
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
from openai import OpenAI as OpenAIClient
|
|
80
|
+
except ImportError:
|
|
81
|
+
raise ImportError("`openai` package required for remote vLLM mode. ")
|
|
82
|
+
|
|
83
|
+
_client_params: Dict[str, Any] = {
|
|
84
|
+
"api_key": self.api_key or "EMPTY", # VLLM can run without API key
|
|
85
|
+
"base_url": self.base_url,
|
|
86
|
+
}
|
|
87
|
+
if self.client_params:
|
|
88
|
+
_client_params.update(self.client_params)
|
|
89
|
+
self.remote_client = OpenAIClient(**_client_params)
|
|
90
|
+
return self.remote_client
|
|
91
|
+
|
|
92
|
+
def _get_async_remote_client(self) -> "AsyncOpenAI":
|
|
93
|
+
"""Get async OpenAI-compatible client for remote vLLM server."""
|
|
94
|
+
if self.async_remote_client:
|
|
95
|
+
return self.async_remote_client
|
|
96
|
+
|
|
97
|
+
try:
|
|
98
|
+
from openai import AsyncOpenAI
|
|
99
|
+
except ImportError:
|
|
100
|
+
raise ImportError("`openai` package required for remote vLLM mode. ")
|
|
101
|
+
|
|
102
|
+
_client_params: Dict[str, Any] = {
|
|
103
|
+
"api_key": self.api_key or "EMPTY",
|
|
104
|
+
"base_url": self.base_url,
|
|
105
|
+
}
|
|
106
|
+
if self.client_params:
|
|
107
|
+
_client_params.update(self.client_params)
|
|
108
|
+
self.async_remote_client = AsyncOpenAI(**_client_params)
|
|
109
|
+
return self.async_remote_client
|
|
110
|
+
|
|
111
|
+
def _create_embedding_local(self, text: str) -> Optional[EmbeddingRequestOutput]:
|
|
112
|
+
"""Create embedding using local VLLM."""
|
|
113
|
+
try:
|
|
114
|
+
outputs = self._get_vllm_client().embed([text])
|
|
115
|
+
return outputs[0] if outputs else None
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.warning(f"Error creating local embedding: {e}")
|
|
118
|
+
return None
|
|
119
|
+
|
|
120
|
+
def _create_embedding_remote(self, text: str) -> "CreateEmbeddingResponse":
|
|
121
|
+
"""Create embedding using remote vLLM server."""
|
|
122
|
+
_request_params: Dict[str, Any] = {
|
|
123
|
+
"input": text,
|
|
124
|
+
"model": self.id,
|
|
125
|
+
}
|
|
126
|
+
if self.request_params:
|
|
127
|
+
_request_params.update(self.request_params)
|
|
128
|
+
return self._get_remote_client().embeddings.create(**_request_params)
|
|
129
|
+
|
|
130
|
+
def get_embedding(self, text: str) -> List[float]:
|
|
131
|
+
try:
|
|
132
|
+
if self.is_remote:
|
|
133
|
+
# Remote mode: OpenAI-compatible API
|
|
134
|
+
response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
|
|
135
|
+
return response.data[0].embedding
|
|
136
|
+
else:
|
|
137
|
+
# Local mode: Direct VLLM
|
|
138
|
+
output = self._create_embedding_local(text=text)
|
|
139
|
+
if output and hasattr(output, "outputs") and hasattr(output.outputs, "embedding"):
|
|
140
|
+
embedding = output.outputs.embedding
|
|
141
|
+
if len(embedding) != self.dimensions:
|
|
142
|
+
logger.warning(f"Expected embedding dimension {self.dimensions}, but got {len(embedding)}")
|
|
143
|
+
return embedding
|
|
144
|
+
return []
|
|
145
|
+
except Exception as e:
|
|
146
|
+
logger.warning(f"Error extracting embedding: {e}")
|
|
147
|
+
return []
|
|
148
|
+
|
|
149
|
+
def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
|
|
150
|
+
if self.is_remote:
|
|
151
|
+
try:
|
|
152
|
+
response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
|
|
153
|
+
embedding = response.data[0].embedding
|
|
154
|
+
usage = response.usage
|
|
155
|
+
if usage:
|
|
156
|
+
return embedding, usage.model_dump()
|
|
157
|
+
return embedding, None
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.warning(f"Error in remote embedding: {e}")
|
|
160
|
+
return [], None
|
|
161
|
+
else:
|
|
162
|
+
embedding = self.get_embedding(text=text)
|
|
163
|
+
# Local VLLM doesn't provide usage information
|
|
164
|
+
return embedding, None
|
|
165
|
+
|
|
166
|
+
async def async_get_embedding(self, text: str) -> List[float]:
|
|
167
|
+
"""Async version of get_embedding using thread executor for local mode."""
|
|
168
|
+
if self.is_remote:
|
|
169
|
+
# Remote mode: async client for vLLM server
|
|
170
|
+
try:
|
|
171
|
+
req: Dict[str, Any] = {
|
|
172
|
+
"input": text,
|
|
173
|
+
"model": self.id,
|
|
174
|
+
}
|
|
175
|
+
if self.request_params:
|
|
176
|
+
req.update(self.request_params)
|
|
177
|
+
response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
|
|
178
|
+
return response.data[0].embedding
|
|
179
|
+
except Exception as e:
|
|
180
|
+
logger.warning(f"Error in async remote embedding: {e}")
|
|
181
|
+
return []
|
|
182
|
+
else:
|
|
183
|
+
# Local mode: use thread executor for CPU-bound operations
|
|
184
|
+
loop = asyncio.get_event_loop()
|
|
185
|
+
return await loop.run_in_executor(None, self.get_embedding, text)
|
|
186
|
+
|
|
187
|
+
async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
|
|
188
|
+
"""Async version of get_embedding_and_usage using thread executor for local mode."""
|
|
189
|
+
if self.is_remote:
|
|
190
|
+
try:
|
|
191
|
+
req: Dict[str, Any] = {
|
|
192
|
+
"input": text,
|
|
193
|
+
"model": self.id,
|
|
194
|
+
}
|
|
195
|
+
if self.request_params:
|
|
196
|
+
req.update(self.request_params)
|
|
197
|
+
response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
|
|
198
|
+
embedding = response.data[0].embedding
|
|
199
|
+
usage = response.usage
|
|
200
|
+
return embedding, usage.model_dump() if usage else None
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.warning(f"Error in async remote embedding: {e}")
|
|
203
|
+
return [], None
|
|
204
|
+
else:
|
|
205
|
+
# Local mode: use thread executor for CPU-bound operations
|
|
206
|
+
try:
|
|
207
|
+
loop = asyncio.get_event_loop()
|
|
208
|
+
return await loop.run_in_executor(None, self.get_embedding_and_usage, text)
|
|
209
|
+
except Exception as e:
|
|
210
|
+
logger.warning(f"Error in async local embedding: {e}")
|
|
211
|
+
return [], None
|
|
212
|
+
|
|
213
|
+
async def async_get_embeddings_batch_and_usage(
|
|
214
|
+
self, texts: List[str]
|
|
215
|
+
) -> Tuple[List[List[float]], List[Optional[Dict]]]:
|
|
216
|
+
"""
|
|
217
|
+
Get embeddings and usage for multiple texts in batches (async version).
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
texts: List of text strings to embed
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
Tuple of (List of embedding vectors, List of usage dictionaries)
|
|
224
|
+
"""
|
|
225
|
+
all_embeddings = []
|
|
226
|
+
all_usage = []
|
|
227
|
+
logger.info(f"Getting embeddings for {len(texts)} texts in batches of {self.batch_size} (async)")
|
|
228
|
+
|
|
229
|
+
for i in range(0, len(texts), self.batch_size):
|
|
230
|
+
batch_texts = texts[i : i + self.batch_size]
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
if self.is_remote:
|
|
234
|
+
# Remote mode: use batch API
|
|
235
|
+
req: Dict[str, Any] = {
|
|
236
|
+
"input": batch_texts,
|
|
237
|
+
"model": self.id,
|
|
238
|
+
}
|
|
239
|
+
if self.request_params:
|
|
240
|
+
req.update(self.request_params)
|
|
241
|
+
response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
|
|
242
|
+
batch_embeddings = [data.embedding for data in response.data]
|
|
243
|
+
all_embeddings.extend(batch_embeddings)
|
|
244
|
+
|
|
245
|
+
# For each embedding in the batch, add the same usage information
|
|
246
|
+
usage_dict = response.usage.model_dump() if response.usage else None
|
|
247
|
+
all_usage.extend([usage_dict] * len(batch_embeddings))
|
|
248
|
+
else:
|
|
249
|
+
# Local mode: process individually using thread executor
|
|
250
|
+
for text in batch_texts:
|
|
251
|
+
embedding, usage = await self.async_get_embedding_and_usage(text)
|
|
252
|
+
all_embeddings.append(embedding)
|
|
253
|
+
all_usage.append(usage)
|
|
254
|
+
|
|
255
|
+
except Exception as e:
|
|
256
|
+
logger.warning(f"Error in async batch embedding: {e}")
|
|
257
|
+
# Fallback: add empty results for failed batch
|
|
258
|
+
for _ in batch_texts:
|
|
259
|
+
all_embeddings.append([])
|
|
260
|
+
all_usage.append(None)
|
|
261
|
+
|
|
262
|
+
return all_embeddings, all_usage
|