agno 2.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +5540 -2273
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/compression/__init__.py +3 -0
  5. agno/compression/manager.py +247 -0
  6. agno/culture/__init__.py +3 -0
  7. agno/culture/manager.py +956 -0
  8. agno/db/async_postgres/__init__.py +3 -0
  9. agno/db/base.py +689 -6
  10. agno/db/dynamo/dynamo.py +933 -37
  11. agno/db/dynamo/schemas.py +174 -10
  12. agno/db/dynamo/utils.py +63 -4
  13. agno/db/firestore/firestore.py +831 -9
  14. agno/db/firestore/schemas.py +51 -0
  15. agno/db/firestore/utils.py +102 -4
  16. agno/db/gcs_json/gcs_json_db.py +660 -12
  17. agno/db/gcs_json/utils.py +60 -26
  18. agno/db/in_memory/in_memory_db.py +287 -14
  19. agno/db/in_memory/utils.py +60 -2
  20. agno/db/json/json_db.py +590 -14
  21. agno/db/json/utils.py +60 -26
  22. agno/db/migrations/manager.py +199 -0
  23. agno/db/migrations/v1_to_v2.py +43 -13
  24. agno/db/migrations/versions/__init__.py +0 -0
  25. agno/db/migrations/versions/v2_3_0.py +938 -0
  26. agno/db/mongo/__init__.py +15 -1
  27. agno/db/mongo/async_mongo.py +2760 -0
  28. agno/db/mongo/mongo.py +879 -11
  29. agno/db/mongo/schemas.py +42 -0
  30. agno/db/mongo/utils.py +80 -8
  31. agno/db/mysql/__init__.py +2 -1
  32. agno/db/mysql/async_mysql.py +2912 -0
  33. agno/db/mysql/mysql.py +946 -68
  34. agno/db/mysql/schemas.py +72 -10
  35. agno/db/mysql/utils.py +198 -7
  36. agno/db/postgres/__init__.py +2 -1
  37. agno/db/postgres/async_postgres.py +2579 -0
  38. agno/db/postgres/postgres.py +942 -57
  39. agno/db/postgres/schemas.py +81 -18
  40. agno/db/postgres/utils.py +164 -2
  41. agno/db/redis/redis.py +671 -7
  42. agno/db/redis/schemas.py +50 -0
  43. agno/db/redis/utils.py +65 -7
  44. agno/db/schemas/__init__.py +2 -1
  45. agno/db/schemas/culture.py +120 -0
  46. agno/db/schemas/evals.py +1 -0
  47. agno/db/schemas/memory.py +17 -2
  48. agno/db/singlestore/schemas.py +63 -0
  49. agno/db/singlestore/singlestore.py +949 -83
  50. agno/db/singlestore/utils.py +60 -2
  51. agno/db/sqlite/__init__.py +2 -1
  52. agno/db/sqlite/async_sqlite.py +2911 -0
  53. agno/db/sqlite/schemas.py +62 -0
  54. agno/db/sqlite/sqlite.py +965 -46
  55. agno/db/sqlite/utils.py +169 -8
  56. agno/db/surrealdb/__init__.py +3 -0
  57. agno/db/surrealdb/metrics.py +292 -0
  58. agno/db/surrealdb/models.py +334 -0
  59. agno/db/surrealdb/queries.py +71 -0
  60. agno/db/surrealdb/surrealdb.py +1908 -0
  61. agno/db/surrealdb/utils.py +147 -0
  62. agno/db/utils.py +2 -0
  63. agno/eval/__init__.py +10 -0
  64. agno/eval/accuracy.py +75 -55
  65. agno/eval/agent_as_judge.py +861 -0
  66. agno/eval/base.py +29 -0
  67. agno/eval/performance.py +16 -7
  68. agno/eval/reliability.py +28 -16
  69. agno/eval/utils.py +35 -17
  70. agno/exceptions.py +27 -2
  71. agno/filters.py +354 -0
  72. agno/guardrails/prompt_injection.py +1 -0
  73. agno/hooks/__init__.py +3 -0
  74. agno/hooks/decorator.py +164 -0
  75. agno/integrations/discord/client.py +1 -1
  76. agno/knowledge/chunking/agentic.py +13 -10
  77. agno/knowledge/chunking/fixed.py +4 -1
  78. agno/knowledge/chunking/semantic.py +9 -4
  79. agno/knowledge/chunking/strategy.py +59 -15
  80. agno/knowledge/embedder/fastembed.py +1 -1
  81. agno/knowledge/embedder/nebius.py +1 -1
  82. agno/knowledge/embedder/ollama.py +8 -0
  83. agno/knowledge/embedder/openai.py +8 -8
  84. agno/knowledge/embedder/sentence_transformer.py +6 -2
  85. agno/knowledge/embedder/vllm.py +262 -0
  86. agno/knowledge/knowledge.py +1618 -318
  87. agno/knowledge/reader/base.py +6 -2
  88. agno/knowledge/reader/csv_reader.py +8 -10
  89. agno/knowledge/reader/docx_reader.py +5 -6
  90. agno/knowledge/reader/field_labeled_csv_reader.py +16 -20
  91. agno/knowledge/reader/json_reader.py +5 -4
  92. agno/knowledge/reader/markdown_reader.py +8 -8
  93. agno/knowledge/reader/pdf_reader.py +17 -19
  94. agno/knowledge/reader/pptx_reader.py +101 -0
  95. agno/knowledge/reader/reader_factory.py +32 -3
  96. agno/knowledge/reader/s3_reader.py +3 -3
  97. agno/knowledge/reader/tavily_reader.py +193 -0
  98. agno/knowledge/reader/text_reader.py +22 -10
  99. agno/knowledge/reader/web_search_reader.py +1 -48
  100. agno/knowledge/reader/website_reader.py +10 -10
  101. agno/knowledge/reader/wikipedia_reader.py +33 -1
  102. agno/knowledge/types.py +1 -0
  103. agno/knowledge/utils.py +72 -7
  104. agno/media.py +22 -6
  105. agno/memory/__init__.py +14 -1
  106. agno/memory/manager.py +544 -83
  107. agno/memory/strategies/__init__.py +15 -0
  108. agno/memory/strategies/base.py +66 -0
  109. agno/memory/strategies/summarize.py +196 -0
  110. agno/memory/strategies/types.py +37 -0
  111. agno/models/aimlapi/aimlapi.py +17 -0
  112. agno/models/anthropic/claude.py +515 -40
  113. agno/models/aws/bedrock.py +102 -21
  114. agno/models/aws/claude.py +131 -274
  115. agno/models/azure/ai_foundry.py +41 -19
  116. agno/models/azure/openai_chat.py +39 -8
  117. agno/models/base.py +1249 -525
  118. agno/models/cerebras/cerebras.py +91 -21
  119. agno/models/cerebras/cerebras_openai.py +21 -2
  120. agno/models/cohere/chat.py +40 -6
  121. agno/models/cometapi/cometapi.py +18 -1
  122. agno/models/dashscope/dashscope.py +2 -3
  123. agno/models/deepinfra/deepinfra.py +18 -1
  124. agno/models/deepseek/deepseek.py +69 -3
  125. agno/models/fireworks/fireworks.py +18 -1
  126. agno/models/google/gemini.py +877 -80
  127. agno/models/google/utils.py +22 -0
  128. agno/models/groq/groq.py +51 -18
  129. agno/models/huggingface/huggingface.py +17 -6
  130. agno/models/ibm/watsonx.py +16 -6
  131. agno/models/internlm/internlm.py +18 -1
  132. agno/models/langdb/langdb.py +13 -1
  133. agno/models/litellm/chat.py +44 -9
  134. agno/models/litellm/litellm_openai.py +18 -1
  135. agno/models/message.py +28 -5
  136. agno/models/meta/llama.py +47 -14
  137. agno/models/meta/llama_openai.py +22 -17
  138. agno/models/mistral/mistral.py +8 -4
  139. agno/models/nebius/nebius.py +6 -7
  140. agno/models/nvidia/nvidia.py +20 -3
  141. agno/models/ollama/chat.py +24 -8
  142. agno/models/openai/chat.py +104 -29
  143. agno/models/openai/responses.py +101 -81
  144. agno/models/openrouter/openrouter.py +60 -3
  145. agno/models/perplexity/perplexity.py +17 -1
  146. agno/models/portkey/portkey.py +7 -6
  147. agno/models/requesty/requesty.py +24 -4
  148. agno/models/response.py +73 -2
  149. agno/models/sambanova/sambanova.py +20 -3
  150. agno/models/siliconflow/siliconflow.py +19 -2
  151. agno/models/together/together.py +20 -3
  152. agno/models/utils.py +254 -8
  153. agno/models/vercel/v0.py +20 -3
  154. agno/models/vertexai/__init__.py +0 -0
  155. agno/models/vertexai/claude.py +190 -0
  156. agno/models/vllm/vllm.py +19 -14
  157. agno/models/xai/xai.py +19 -2
  158. agno/os/app.py +549 -152
  159. agno/os/auth.py +190 -3
  160. agno/os/config.py +23 -0
  161. agno/os/interfaces/a2a/router.py +8 -11
  162. agno/os/interfaces/a2a/utils.py +1 -1
  163. agno/os/interfaces/agui/router.py +18 -3
  164. agno/os/interfaces/agui/utils.py +152 -39
  165. agno/os/interfaces/slack/router.py +55 -37
  166. agno/os/interfaces/slack/slack.py +9 -1
  167. agno/os/interfaces/whatsapp/router.py +0 -1
  168. agno/os/interfaces/whatsapp/security.py +3 -1
  169. agno/os/mcp.py +110 -52
  170. agno/os/middleware/__init__.py +2 -0
  171. agno/os/middleware/jwt.py +676 -112
  172. agno/os/router.py +40 -1478
  173. agno/os/routers/agents/__init__.py +3 -0
  174. agno/os/routers/agents/router.py +599 -0
  175. agno/os/routers/agents/schema.py +261 -0
  176. agno/os/routers/evals/evals.py +96 -39
  177. agno/os/routers/evals/schemas.py +65 -33
  178. agno/os/routers/evals/utils.py +80 -10
  179. agno/os/routers/health.py +10 -4
  180. agno/os/routers/knowledge/knowledge.py +196 -38
  181. agno/os/routers/knowledge/schemas.py +82 -22
  182. agno/os/routers/memory/memory.py +279 -52
  183. agno/os/routers/memory/schemas.py +46 -17
  184. agno/os/routers/metrics/metrics.py +20 -8
  185. agno/os/routers/metrics/schemas.py +16 -16
  186. agno/os/routers/session/session.py +462 -34
  187. agno/os/routers/teams/__init__.py +3 -0
  188. agno/os/routers/teams/router.py +512 -0
  189. agno/os/routers/teams/schema.py +257 -0
  190. agno/os/routers/traces/__init__.py +3 -0
  191. agno/os/routers/traces/schemas.py +414 -0
  192. agno/os/routers/traces/traces.py +499 -0
  193. agno/os/routers/workflows/__init__.py +3 -0
  194. agno/os/routers/workflows/router.py +624 -0
  195. agno/os/routers/workflows/schema.py +75 -0
  196. agno/os/schema.py +256 -693
  197. agno/os/scopes.py +469 -0
  198. agno/os/utils.py +514 -36
  199. agno/reasoning/anthropic.py +80 -0
  200. agno/reasoning/gemini.py +73 -0
  201. agno/reasoning/openai.py +5 -0
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +155 -32
  205. agno/run/base.py +55 -3
  206. agno/run/requirement.py +181 -0
  207. agno/run/team.py +125 -38
  208. agno/run/workflow.py +72 -18
  209. agno/session/agent.py +102 -89
  210. agno/session/summary.py +56 -15
  211. agno/session/team.py +164 -90
  212. agno/session/workflow.py +405 -40
  213. agno/table.py +10 -0
  214. agno/team/team.py +3974 -1903
  215. agno/tools/dalle.py +2 -4
  216. agno/tools/eleven_labs.py +23 -25
  217. agno/tools/exa.py +21 -16
  218. agno/tools/file.py +153 -23
  219. agno/tools/file_generation.py +16 -10
  220. agno/tools/firecrawl.py +15 -7
  221. agno/tools/function.py +193 -38
  222. agno/tools/gmail.py +238 -14
  223. agno/tools/google_drive.py +271 -0
  224. agno/tools/googlecalendar.py +36 -8
  225. agno/tools/googlesheets.py +20 -5
  226. agno/tools/jira.py +20 -0
  227. agno/tools/mcp/__init__.py +10 -0
  228. agno/tools/mcp/mcp.py +331 -0
  229. agno/tools/mcp/multi_mcp.py +347 -0
  230. agno/tools/mcp/params.py +24 -0
  231. agno/tools/mcp_toolbox.py +3 -3
  232. agno/tools/models/nebius.py +5 -5
  233. agno/tools/models_labs.py +20 -10
  234. agno/tools/nano_banana.py +151 -0
  235. agno/tools/notion.py +204 -0
  236. agno/tools/parallel.py +314 -0
  237. agno/tools/postgres.py +76 -36
  238. agno/tools/redshift.py +406 -0
  239. agno/tools/scrapegraph.py +1 -1
  240. agno/tools/shopify.py +1519 -0
  241. agno/tools/slack.py +18 -3
  242. agno/tools/spotify.py +919 -0
  243. agno/tools/tavily.py +146 -0
  244. agno/tools/toolkit.py +25 -0
  245. agno/tools/workflow.py +8 -1
  246. agno/tools/yfinance.py +12 -11
  247. agno/tracing/__init__.py +12 -0
  248. agno/tracing/exporter.py +157 -0
  249. agno/tracing/schemas.py +276 -0
  250. agno/tracing/setup.py +111 -0
  251. agno/utils/agent.py +938 -0
  252. agno/utils/cryptography.py +22 -0
  253. agno/utils/dttm.py +33 -0
  254. agno/utils/events.py +151 -3
  255. agno/utils/gemini.py +15 -5
  256. agno/utils/hooks.py +118 -4
  257. agno/utils/http.py +113 -2
  258. agno/utils/knowledge.py +12 -5
  259. agno/utils/log.py +1 -0
  260. agno/utils/mcp.py +92 -2
  261. agno/utils/media.py +187 -1
  262. agno/utils/merge_dict.py +3 -3
  263. agno/utils/message.py +60 -0
  264. agno/utils/models/ai_foundry.py +9 -2
  265. agno/utils/models/claude.py +49 -14
  266. agno/utils/models/cohere.py +9 -2
  267. agno/utils/models/llama.py +9 -2
  268. agno/utils/models/mistral.py +4 -2
  269. agno/utils/print_response/agent.py +109 -16
  270. agno/utils/print_response/team.py +223 -30
  271. agno/utils/print_response/workflow.py +251 -34
  272. agno/utils/streamlit.py +1 -1
  273. agno/utils/team.py +98 -9
  274. agno/utils/tokens.py +657 -0
  275. agno/vectordb/base.py +39 -7
  276. agno/vectordb/cassandra/cassandra.py +21 -5
  277. agno/vectordb/chroma/chromadb.py +43 -12
  278. agno/vectordb/clickhouse/clickhousedb.py +21 -5
  279. agno/vectordb/couchbase/couchbase.py +29 -5
  280. agno/vectordb/lancedb/lance_db.py +92 -181
  281. agno/vectordb/langchaindb/langchaindb.py +24 -4
  282. agno/vectordb/lightrag/lightrag.py +17 -3
  283. agno/vectordb/llamaindex/llamaindexdb.py +25 -5
  284. agno/vectordb/milvus/milvus.py +50 -37
  285. agno/vectordb/mongodb/__init__.py +7 -1
  286. agno/vectordb/mongodb/mongodb.py +36 -30
  287. agno/vectordb/pgvector/pgvector.py +201 -77
  288. agno/vectordb/pineconedb/pineconedb.py +41 -23
  289. agno/vectordb/qdrant/qdrant.py +67 -54
  290. agno/vectordb/redis/__init__.py +9 -0
  291. agno/vectordb/redis/redisdb.py +682 -0
  292. agno/vectordb/singlestore/singlestore.py +50 -29
  293. agno/vectordb/surrealdb/surrealdb.py +31 -41
  294. agno/vectordb/upstashdb/upstashdb.py +34 -6
  295. agno/vectordb/weaviate/weaviate.py +53 -14
  296. agno/workflow/__init__.py +2 -0
  297. agno/workflow/agent.py +299 -0
  298. agno/workflow/condition.py +120 -18
  299. agno/workflow/loop.py +77 -10
  300. agno/workflow/parallel.py +231 -143
  301. agno/workflow/router.py +118 -17
  302. agno/workflow/step.py +609 -170
  303. agno/workflow/steps.py +73 -6
  304. agno/workflow/types.py +96 -21
  305. agno/workflow/workflow.py +2039 -262
  306. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/METADATA +201 -66
  307. agno-2.3.13.dist-info/RECORD +613 -0
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -679
  310. agno/tools/memori.py +0 -339
  311. agno-2.1.2.dist-info/RECORD +0 -543
  312. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +0 -0
  313. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from enum import Enum
3
- from typing import List
3
+ from typing import List, Optional
4
4
 
5
5
  from agno.knowledge.document.base import Document
6
6
 
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
60
60
  """Factory for creating chunking strategy instances."""
61
61
 
62
62
  @classmethod
63
- def create_strategy(cls, strategy_type: ChunkingStrategyType, **kwargs) -> ChunkingStrategy:
63
+ def create_strategy(
64
+ cls,
65
+ strategy_type: ChunkingStrategyType,
66
+ chunk_size: Optional[int] = None,
67
+ overlap: Optional[int] = None,
68
+ **kwargs,
69
+ ) -> ChunkingStrategy:
64
70
  """Create an instance of the chunking strategy with the given parameters."""
65
71
  strategy_map = {
66
72
  ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
71
77
  ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
72
78
  ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
73
79
  }
74
- return strategy_map[strategy_type](**kwargs)
80
+ return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
75
81
 
76
82
  @classmethod
77
- def _create_agentic_chunking(cls, **kwargs) -> ChunkingStrategy:
83
+ def _create_agentic_chunking(
84
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
85
+ ) -> ChunkingStrategy:
78
86
  from agno.knowledge.chunking.agentic import AgenticChunking
79
87
 
80
- # Map chunk_size to max_chunk_size for AgenticChunking
81
- if "chunk_size" in kwargs and "max_chunk_size" not in kwargs:
82
- kwargs["max_chunk_size"] = kwargs.pop("chunk_size")
88
+ # AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
89
+ if chunk_size is not None:
90
+ kwargs["max_chunk_size"] = chunk_size
91
+ # Remove overlap since AgenticChunking doesn't support it
83
92
  return AgenticChunking(**kwargs)
84
93
 
85
94
  @classmethod
86
- def _create_document_chunking(cls, **kwargs) -> ChunkingStrategy:
95
+ def _create_document_chunking(
96
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
97
+ ) -> ChunkingStrategy:
87
98
  from agno.knowledge.chunking.document import DocumentChunking
88
99
 
100
+ # DocumentChunking accepts both chunk_size and overlap
101
+ if chunk_size is not None:
102
+ kwargs["chunk_size"] = chunk_size
103
+ if overlap is not None:
104
+ kwargs["overlap"] = overlap
89
105
  return DocumentChunking(**kwargs)
90
106
 
91
107
  @classmethod
92
- def _create_recursive_chunking(cls, **kwargs) -> ChunkingStrategy:
108
+ def _create_recursive_chunking(
109
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
110
+ ) -> ChunkingStrategy:
93
111
  from agno.knowledge.chunking.recursive import RecursiveChunking
94
112
 
113
+ # RecursiveChunking accepts both chunk_size and overlap
114
+ if chunk_size is not None:
115
+ kwargs["chunk_size"] = chunk_size
116
+ if overlap is not None:
117
+ kwargs["overlap"] = overlap
95
118
  return RecursiveChunking(**kwargs)
96
119
 
97
120
  @classmethod
98
- def _create_semantic_chunking(cls, **kwargs) -> ChunkingStrategy:
121
+ def _create_semantic_chunking(
122
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
123
+ ) -> ChunkingStrategy:
99
124
  from agno.knowledge.chunking.semantic import SemanticChunking
100
125
 
126
+ # SemanticChunking accepts chunk_size but not overlap
127
+ if chunk_size is not None:
128
+ kwargs["chunk_size"] = chunk_size
129
+ # Remove overlap since SemanticChunking doesn't support it
101
130
  return SemanticChunking(**kwargs)
102
131
 
103
132
  @classmethod
104
- def _create_fixed_chunking(cls, **kwargs) -> ChunkingStrategy:
133
+ def _create_fixed_chunking(
134
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
135
+ ) -> ChunkingStrategy:
105
136
  from agno.knowledge.chunking.fixed import FixedSizeChunking
106
137
 
138
+ # FixedSizeChunking accepts both chunk_size and overlap
139
+ if chunk_size is not None:
140
+ kwargs["chunk_size"] = chunk_size
141
+ if overlap is not None:
142
+ kwargs["overlap"] = overlap
107
143
  return FixedSizeChunking(**kwargs)
108
144
 
109
145
  @classmethod
110
- def _create_row_chunking(cls, **kwargs) -> ChunkingStrategy:
146
+ def _create_row_chunking(
147
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
148
+ ) -> ChunkingStrategy:
111
149
  from agno.knowledge.chunking.row import RowChunking
112
150
 
113
- # Remove chunk_size if present since RowChunking doesn't use it
114
- kwargs.pop("chunk_size", None)
151
+ # RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
115
152
  return RowChunking(**kwargs)
116
153
 
117
154
  @classmethod
118
- def _create_markdown_chunking(cls, **kwargs) -> ChunkingStrategy:
155
+ def _create_markdown_chunking(
156
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
157
+ ) -> ChunkingStrategy:
119
158
  from agno.knowledge.chunking.markdown import MarkdownChunking
120
159
 
160
+ # MarkdownChunking accepts both chunk_size and overlap
161
+ if chunk_size is not None:
162
+ kwargs["chunk_size"] = chunk_size
163
+ if overlap is not None:
164
+ kwargs["overlap"] = overlap
121
165
  return MarkdownChunking(**kwargs)
@@ -23,7 +23,7 @@ class FastEmbedEmbedder(Embedder):
23
23
  """Using BAAI/bge-small-en-v1.5 model, more models available: https://qdrant.github.io/fastembed/examples/Supported_Models/"""
24
24
 
25
25
  id: str = "BAAI/bge-small-en-v1.5"
26
- dimensions: int = 384
26
+ dimensions: Optional[int] = 384
27
27
 
28
28
  def get_embedding(self, text: str) -> List[float]:
29
29
  model = TextEmbedding(model_name=self.id)
@@ -10,4 +10,4 @@ class NebiusEmbedder(OpenAIEmbedder):
10
10
  id: str = "BAAI/bge-en-icl"
11
11
  dimensions: int = 1024
12
12
  api_key: Optional[str] = getenv("NEBIUS_API_KEY")
13
- base_url: str = "https://api.studio.nebius.com/v1/"
13
+ base_url: str = "https://api.tokenfactory.nebius.com/v1/"
@@ -85,6 +85,10 @@ class OllamaEmbedder(Embedder):
85
85
  if self.options is not None:
86
86
  kwargs["options"] = self.options
87
87
 
88
+ # Add dimensions parameter for models that support it
89
+ if self.dimensions is not None:
90
+ kwargs["dimensions"] = self.dimensions
91
+
88
92
  response = self.client.embed(input=text, model=self.id, **kwargs)
89
93
  if response and "embeddings" in response:
90
94
  embeddings = response["embeddings"]
@@ -117,6 +121,10 @@ class OllamaEmbedder(Embedder):
117
121
  if self.options is not None:
118
122
  kwargs["options"] = self.options
119
123
 
124
+ # Add dimensions parameter for models that support it
125
+ if self.dimensions is not None:
126
+ kwargs["dimensions"] = self.dimensions
127
+
120
128
  response = await self.aclient.embed(input=text, model=self.id, **kwargs)
121
129
  if response and "embeddings" in response:
122
130
  embeddings = response["embeddings"]
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple
4
4
  from typing_extensions import Literal
5
5
 
6
6
  from agno.knowledge.embedder.base import Embedder
7
- from agno.utils.log import logger
7
+ from agno.utils.log import log_info, log_warning
8
8
 
9
9
  try:
10
10
  from openai import AsyncOpenAI
@@ -82,7 +82,7 @@ class OpenAIEmbedder(Embedder):
82
82
  response: CreateEmbeddingResponse = self.response(text=text)
83
83
  return response.data[0].embedding
84
84
  except Exception as e:
85
- logger.warning(e)
85
+ log_warning(e)
86
86
  return []
87
87
 
88
88
  def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
@@ -95,7 +95,7 @@ class OpenAIEmbedder(Embedder):
95
95
  return embedding, usage.model_dump()
96
96
  return embedding, None
97
97
  except Exception as e:
98
- logger.warning(e)
98
+ log_warning(e)
99
99
  return [], None
100
100
 
101
101
  async def async_get_embedding(self, text: str) -> List[float]:
@@ -115,7 +115,7 @@ class OpenAIEmbedder(Embedder):
115
115
  response: CreateEmbeddingResponse = await self.aclient.embeddings.create(**req)
116
116
  return response.data[0].embedding
117
117
  except Exception as e:
118
- logger.warning(e)
118
+ log_warning(e)
119
119
  return []
120
120
 
121
121
  async def async_get_embedding_and_usage(self, text: str):
@@ -137,7 +137,7 @@ class OpenAIEmbedder(Embedder):
137
137
  usage = response.usage
138
138
  return embedding, usage.model_dump() if usage else None
139
139
  except Exception as e:
140
- logger.warning(e)
140
+ log_warning(f"Error getting embedding: {e}")
141
141
  return [], None
142
142
 
143
143
  async def async_get_embeddings_batch_and_usage(
@@ -154,7 +154,7 @@ class OpenAIEmbedder(Embedder):
154
154
  """
155
155
  all_embeddings = []
156
156
  all_usage = []
157
- logger.info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
157
+ log_info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
158
158
 
159
159
  for i in range(0, len(texts), self.batch_size):
160
160
  batch_texts = texts[i : i + self.batch_size]
@@ -180,7 +180,7 @@ class OpenAIEmbedder(Embedder):
180
180
  usage_dict = response.usage.model_dump() if response.usage else None
181
181
  all_usage.extend([usage_dict] * len(batch_embeddings))
182
182
  except Exception as e:
183
- logger.warning(f"Error in async batch embedding: {e}")
183
+ log_warning(f"Error in async batch embedding: {e}")
184
184
  # Fallback to individual calls for this batch
185
185
  for text in batch_texts:
186
186
  try:
@@ -188,7 +188,7 @@ class OpenAIEmbedder(Embedder):
188
188
  all_embeddings.append(embedding)
189
189
  all_usage.append(usage)
190
190
  except Exception as e2:
191
- logger.warning(f"Error in individual async embedding fallback: {e2}")
191
+ log_warning(f"Error in individual async embedding fallback: {e2}")
192
192
  all_embeddings.append([])
193
193
  all_usage.append(None)
194
194
 
@@ -25,10 +25,14 @@ class SentenceTransformerEmbedder(Embedder):
25
25
  prompt: Optional[str] = None
26
26
  normalize_embeddings: bool = False
27
27
 
28
- def get_embedding(self, text: Union[str, List[str]]) -> List[float]:
29
- if not self.sentence_transformer_client:
28
+ def __post_init__(self):
29
+ # Initialize the SentenceTransformer model eagerly to avoid race conditions in async contexts
30
+ if self.sentence_transformer_client is None:
30
31
  self.sentence_transformer_client = SentenceTransformer(model_name_or_path=self.id)
31
32
 
33
+ def get_embedding(self, text: Union[str, List[str]]) -> List[float]:
34
+ if self.sentence_transformer_client is None:
35
+ raise RuntimeError("SentenceTransformer model not initialized")
32
36
  model = self.sentence_transformer_client
33
37
  embedding = model.encode(text, prompt=self.prompt, normalize_embeddings=self.normalize_embeddings)
34
38
  try:
@@ -0,0 +1,262 @@
1
+ import asyncio
2
+ from dataclasses import dataclass
3
+ from os import getenv
4
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
5
+
6
+ from agno.knowledge.embedder.base import Embedder
7
+ from agno.utils.log import logger
8
+
9
+ try:
10
+ from vllm import LLM # type: ignore
11
+ from vllm.outputs import EmbeddingRequestOutput # type: ignore
12
+ except ImportError:
13
+ raise ImportError("`vllm` not installed. Please install using `pip install vllm`.")
14
+
15
+ if TYPE_CHECKING:
16
+ from openai import AsyncOpenAI
17
+ from openai import OpenAI as OpenAIClient
18
+ from openai.types.create_embedding_response import CreateEmbeddingResponse
19
+
20
+
21
+ @dataclass
22
+ class VLLMEmbedder(Embedder):
23
+ """
24
+ VLLM Embedder supporting both local and remote deployment modes.
25
+
26
+ Local Mode (default):
27
+ - Loads model locally and runs inference on your GPU/CPU
28
+ - No API key required
29
+ - Example: VLLMEmbedder(id="intfloat/e5-mistral-7b-instruct")
30
+
31
+ Remote Mode:
32
+ - Connects to a remote vLLM server via OpenAI-compatible API
33
+ - Uses OpenAI SDK to communicate with vLLM's OpenAI-compatible endpoint
34
+ - Requires base_url and optionally api_key
35
+ - Example: VLLMEmbedder(base_url="http://localhost:8000/v1", api_key="your-key")
36
+ - Ref: https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
37
+ """
38
+
39
+ id: str = "sentence-transformers/all-MiniLM-L6-v2"
40
+ dimensions: int = 4096
41
+ # Local mode parameters
42
+ enforce_eager: bool = True
43
+ vllm_kwargs: Optional[Dict[str, Any]] = None
44
+ vllm_client: Optional[LLM] = None
45
+ # Remote mode parameters
46
+ api_key: Optional[str] = getenv("VLLM_API_KEY")
47
+ base_url: Optional[str] = None
48
+ request_params: Optional[Dict[str, Any]] = None
49
+ client_params: Optional[Dict[str, Any]] = None
50
+ remote_client: Optional["OpenAIClient"] = None # OpenAI-compatible client for vLLM server
51
+ async_remote_client: Optional["AsyncOpenAI"] = None # Async OpenAI-compatible client for vLLM server
52
+
53
+ @property
54
+ def is_remote(self) -> bool:
55
+ """Determine if we should use remote mode."""
56
+ return self.base_url is not None
57
+
58
+ def _get_vllm_client(self) -> LLM:
59
+ """Get local VLLM client."""
60
+ if self.vllm_client:
61
+ return self.vllm_client
62
+
63
+ _vllm_params: Dict[str, Any] = {
64
+ "model": self.id,
65
+ "task": "embed",
66
+ "enforce_eager": self.enforce_eager,
67
+ }
68
+ if self.vllm_kwargs:
69
+ _vllm_params.update(self.vllm_kwargs)
70
+ self.vllm_client = LLM(**_vllm_params)
71
+ return self.vllm_client
72
+
73
+ def _get_remote_client(self) -> "OpenAIClient":
74
+ """Get OpenAI-compatible client for remote vLLM server."""
75
+ if self.remote_client:
76
+ return self.remote_client
77
+
78
+ try:
79
+ from openai import OpenAI as OpenAIClient
80
+ except ImportError:
81
+ raise ImportError("`openai` package required for remote vLLM mode. ")
82
+
83
+ _client_params: Dict[str, Any] = {
84
+ "api_key": self.api_key or "EMPTY", # VLLM can run without API key
85
+ "base_url": self.base_url,
86
+ }
87
+ if self.client_params:
88
+ _client_params.update(self.client_params)
89
+ self.remote_client = OpenAIClient(**_client_params)
90
+ return self.remote_client
91
+
92
+ def _get_async_remote_client(self) -> "AsyncOpenAI":
93
+ """Get async OpenAI-compatible client for remote vLLM server."""
94
+ if self.async_remote_client:
95
+ return self.async_remote_client
96
+
97
+ try:
98
+ from openai import AsyncOpenAI
99
+ except ImportError:
100
+ raise ImportError("`openai` package required for remote vLLM mode. ")
101
+
102
+ _client_params: Dict[str, Any] = {
103
+ "api_key": self.api_key or "EMPTY",
104
+ "base_url": self.base_url,
105
+ }
106
+ if self.client_params:
107
+ _client_params.update(self.client_params)
108
+ self.async_remote_client = AsyncOpenAI(**_client_params)
109
+ return self.async_remote_client
110
+
111
+ def _create_embedding_local(self, text: str) -> Optional[EmbeddingRequestOutput]:
112
+ """Create embedding using local VLLM."""
113
+ try:
114
+ outputs = self._get_vllm_client().embed([text])
115
+ return outputs[0] if outputs else None
116
+ except Exception as e:
117
+ logger.warning(f"Error creating local embedding: {e}")
118
+ return None
119
+
120
+ def _create_embedding_remote(self, text: str) -> "CreateEmbeddingResponse":
121
+ """Create embedding using remote vLLM server."""
122
+ _request_params: Dict[str, Any] = {
123
+ "input": text,
124
+ "model": self.id,
125
+ }
126
+ if self.request_params:
127
+ _request_params.update(self.request_params)
128
+ return self._get_remote_client().embeddings.create(**_request_params)
129
+
130
+ def get_embedding(self, text: str) -> List[float]:
131
+ try:
132
+ if self.is_remote:
133
+ # Remote mode: OpenAI-compatible API
134
+ response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
135
+ return response.data[0].embedding
136
+ else:
137
+ # Local mode: Direct VLLM
138
+ output = self._create_embedding_local(text=text)
139
+ if output and hasattr(output, "outputs") and hasattr(output.outputs, "embedding"):
140
+ embedding = output.outputs.embedding
141
+ if len(embedding) != self.dimensions:
142
+ logger.warning(f"Expected embedding dimension {self.dimensions}, but got {len(embedding)}")
143
+ return embedding
144
+ return []
145
+ except Exception as e:
146
+ logger.warning(f"Error extracting embedding: {e}")
147
+ return []
148
+
149
+ def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
150
+ if self.is_remote:
151
+ try:
152
+ response: "CreateEmbeddingResponse" = self._create_embedding_remote(text=text)
153
+ embedding = response.data[0].embedding
154
+ usage = response.usage
155
+ if usage:
156
+ return embedding, usage.model_dump()
157
+ return embedding, None
158
+ except Exception as e:
159
+ logger.warning(f"Error in remote embedding: {e}")
160
+ return [], None
161
+ else:
162
+ embedding = self.get_embedding(text=text)
163
+ # Local VLLM doesn't provide usage information
164
+ return embedding, None
165
+
166
+ async def async_get_embedding(self, text: str) -> List[float]:
167
+ """Async version of get_embedding using thread executor for local mode."""
168
+ if self.is_remote:
169
+ # Remote mode: async client for vLLM server
170
+ try:
171
+ req: Dict[str, Any] = {
172
+ "input": text,
173
+ "model": self.id,
174
+ }
175
+ if self.request_params:
176
+ req.update(self.request_params)
177
+ response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
178
+ return response.data[0].embedding
179
+ except Exception as e:
180
+ logger.warning(f"Error in async remote embedding: {e}")
181
+ return []
182
+ else:
183
+ # Local mode: use thread executor for CPU-bound operations
184
+ loop = asyncio.get_event_loop()
185
+ return await loop.run_in_executor(None, self.get_embedding, text)
186
+
187
+ async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
188
+ """Async version of get_embedding_and_usage using thread executor for local mode."""
189
+ if self.is_remote:
190
+ try:
191
+ req: Dict[str, Any] = {
192
+ "input": text,
193
+ "model": self.id,
194
+ }
195
+ if self.request_params:
196
+ req.update(self.request_params)
197
+ response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
198
+ embedding = response.data[0].embedding
199
+ usage = response.usage
200
+ return embedding, usage.model_dump() if usage else None
201
+ except Exception as e:
202
+ logger.warning(f"Error in async remote embedding: {e}")
203
+ return [], None
204
+ else:
205
+ # Local mode: use thread executor for CPU-bound operations
206
+ try:
207
+ loop = asyncio.get_event_loop()
208
+ return await loop.run_in_executor(None, self.get_embedding_and_usage, text)
209
+ except Exception as e:
210
+ logger.warning(f"Error in async local embedding: {e}")
211
+ return [], None
212
+
213
+ async def async_get_embeddings_batch_and_usage(
214
+ self, texts: List[str]
215
+ ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
216
+ """
217
+ Get embeddings and usage for multiple texts in batches (async version).
218
+
219
+ Args:
220
+ texts: List of text strings to embed
221
+
222
+ Returns:
223
+ Tuple of (List of embedding vectors, List of usage dictionaries)
224
+ """
225
+ all_embeddings = []
226
+ all_usage = []
227
+ logger.info(f"Getting embeddings for {len(texts)} texts in batches of {self.batch_size} (async)")
228
+
229
+ for i in range(0, len(texts), self.batch_size):
230
+ batch_texts = texts[i : i + self.batch_size]
231
+
232
+ try:
233
+ if self.is_remote:
234
+ # Remote mode: use batch API
235
+ req: Dict[str, Any] = {
236
+ "input": batch_texts,
237
+ "model": self.id,
238
+ }
239
+ if self.request_params:
240
+ req.update(self.request_params)
241
+ response: "CreateEmbeddingResponse" = await self._get_async_remote_client().embeddings.create(**req)
242
+ batch_embeddings = [data.embedding for data in response.data]
243
+ all_embeddings.extend(batch_embeddings)
244
+
245
+ # For each embedding in the batch, add the same usage information
246
+ usage_dict = response.usage.model_dump() if response.usage else None
247
+ all_usage.extend([usage_dict] * len(batch_embeddings))
248
+ else:
249
+ # Local mode: process individually using thread executor
250
+ for text in batch_texts:
251
+ embedding, usage = await self.async_get_embedding_and_usage(text)
252
+ all_embeddings.append(embedding)
253
+ all_usage.append(usage)
254
+
255
+ except Exception as e:
256
+ logger.warning(f"Error in async batch embedding: {e}")
257
+ # Fallback: add empty results for failed batch
258
+ for _ in batch_texts:
259
+ all_embeddings.append([])
260
+ all_usage.append(None)
261
+
262
+ return all_embeddings, all_usage