agno 2.0.1__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (314) hide show
  1. agno/agent/agent.py +6015 -2823
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +594 -186
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +2 -8
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +72 -0
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +999 -519
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +103 -31
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +139 -0
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +59 -5
  142. agno/models/openai/chat.py +69 -29
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +77 -1
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -178
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +248 -94
  205. agno/run/base.py +44 -5
  206. agno/run/team.py +238 -97
  207. agno/run/workflow.py +144 -33
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1610
  213. agno/tools/dalle.py +2 -4
  214. agno/tools/decorator.py +4 -2
  215. agno/tools/duckduckgo.py +15 -11
  216. agno/tools/e2b.py +14 -7
  217. agno/tools/eleven_labs.py +23 -25
  218. agno/tools/exa.py +21 -16
  219. agno/tools/file.py +153 -23
  220. agno/tools/file_generation.py +350 -0
  221. agno/tools/firecrawl.py +4 -4
  222. agno/tools/function.py +250 -30
  223. agno/tools/gmail.py +238 -14
  224. agno/tools/google_drive.py +270 -0
  225. agno/tools/googlecalendar.py +36 -8
  226. agno/tools/googlesheets.py +20 -5
  227. agno/tools/jira.py +20 -0
  228. agno/tools/knowledge.py +3 -3
  229. agno/tools/mcp/__init__.py +10 -0
  230. agno/tools/mcp/mcp.py +331 -0
  231. agno/tools/mcp/multi_mcp.py +347 -0
  232. agno/tools/mcp/params.py +24 -0
  233. agno/tools/mcp_toolbox.py +284 -0
  234. agno/tools/mem0.py +11 -17
  235. agno/tools/memori.py +1 -53
  236. agno/tools/memory.py +419 -0
  237. agno/tools/models/nebius.py +5 -5
  238. agno/tools/models_labs.py +20 -10
  239. agno/tools/notion.py +204 -0
  240. agno/tools/parallel.py +314 -0
  241. agno/tools/scrapegraph.py +58 -31
  242. agno/tools/searxng.py +2 -2
  243. agno/tools/serper.py +2 -2
  244. agno/tools/slack.py +18 -3
  245. agno/tools/spider.py +2 -2
  246. agno/tools/tavily.py +146 -0
  247. agno/tools/whatsapp.py +1 -1
  248. agno/tools/workflow.py +278 -0
  249. agno/tools/yfinance.py +12 -11
  250. agno/utils/agent.py +820 -0
  251. agno/utils/audio.py +27 -0
  252. agno/utils/common.py +90 -1
  253. agno/utils/events.py +217 -2
  254. agno/utils/gemini.py +180 -22
  255. agno/utils/hooks.py +57 -0
  256. agno/utils/http.py +111 -0
  257. agno/utils/knowledge.py +12 -5
  258. agno/utils/log.py +1 -0
  259. agno/utils/mcp.py +92 -2
  260. agno/utils/media.py +188 -10
  261. agno/utils/merge_dict.py +22 -1
  262. agno/utils/message.py +60 -0
  263. agno/utils/models/claude.py +40 -11
  264. agno/utils/print_response/agent.py +105 -21
  265. agno/utils/print_response/team.py +103 -38
  266. agno/utils/print_response/workflow.py +251 -34
  267. agno/utils/reasoning.py +22 -1
  268. agno/utils/serialize.py +32 -0
  269. agno/utils/streamlit.py +16 -10
  270. agno/utils/string.py +41 -0
  271. agno/utils/team.py +98 -9
  272. agno/utils/tools.py +1 -1
  273. agno/vectordb/base.py +23 -4
  274. agno/vectordb/cassandra/cassandra.py +65 -9
  275. agno/vectordb/chroma/chromadb.py +182 -38
  276. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  277. agno/vectordb/couchbase/couchbase.py +105 -10
  278. agno/vectordb/lancedb/lance_db.py +124 -133
  279. agno/vectordb/langchaindb/langchaindb.py +25 -7
  280. agno/vectordb/lightrag/lightrag.py +17 -3
  281. agno/vectordb/llamaindex/__init__.py +3 -0
  282. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  283. agno/vectordb/milvus/milvus.py +126 -9
  284. agno/vectordb/mongodb/__init__.py +7 -1
  285. agno/vectordb/mongodb/mongodb.py +112 -7
  286. agno/vectordb/pgvector/pgvector.py +142 -21
  287. agno/vectordb/pineconedb/pineconedb.py +80 -8
  288. agno/vectordb/qdrant/qdrant.py +125 -39
  289. agno/vectordb/redis/__init__.py +9 -0
  290. agno/vectordb/redis/redisdb.py +694 -0
  291. agno/vectordb/singlestore/singlestore.py +111 -25
  292. agno/vectordb/surrealdb/surrealdb.py +31 -5
  293. agno/vectordb/upstashdb/upstashdb.py +76 -8
  294. agno/vectordb/weaviate/weaviate.py +86 -15
  295. agno/workflow/__init__.py +2 -0
  296. agno/workflow/agent.py +299 -0
  297. agno/workflow/condition.py +112 -18
  298. agno/workflow/loop.py +69 -10
  299. agno/workflow/parallel.py +266 -118
  300. agno/workflow/router.py +110 -17
  301. agno/workflow/step.py +638 -129
  302. agno/workflow/steps.py +65 -6
  303. agno/workflow/types.py +61 -23
  304. agno/workflow/workflow.py +2085 -272
  305. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/METADATA +182 -58
  306. agno-2.3.0.dist-info/RECORD +577 -0
  307. agno/knowledge/reader/url_reader.py +0 -128
  308. agno/tools/googlesearch.py +0 -98
  309. agno/tools/mcp.py +0 -610
  310. agno/utils/models/aws_claude.py +0 -170
  311. agno-2.0.1.dist-info/RECORD +0 -515
  312. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  313. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/licenses/LICENSE +0 -0
  314. {agno-2.0.1.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
@@ -1,28 +1,31 @@
1
- from typing import List, Optional
1
+ from typing import List, Optional, Union
2
2
 
3
3
  from agno.knowledge.chunking.strategy import ChunkingStrategy
4
4
  from agno.knowledge.document.base import Document
5
5
  from agno.models.base import Model
6
6
  from agno.models.defaults import DEFAULT_OPENAI_MODEL_ID
7
7
  from agno.models.message import Message
8
+ from agno.models.utils import get_model
8
9
 
9
10
 
10
11
  class AgenticChunking(ChunkingStrategy):
11
12
  """Chunking strategy that uses an LLM to determine natural breakpoints in the text"""
12
13
 
13
- def __init__(self, model: Optional[Model] = None, max_chunk_size: int = 5000):
14
+ def __init__(self, model: Optional[Union[Model, str]] = None, max_chunk_size: int = 5000):
15
+ # Convert model string to Model instance
16
+ model = get_model(model)
14
17
  if model is None:
15
18
  try:
16
19
  from agno.models.openai import OpenAIChat
17
20
  except Exception:
18
21
  raise ValueError("`openai` isn't installed. Please install it with `pip install openai`")
19
22
  model = OpenAIChat(DEFAULT_OPENAI_MODEL_ID)
20
- self.max_chunk_size = max_chunk_size
23
+ self.chunk_size = max_chunk_size
21
24
  self.model = model
22
25
 
23
26
  def chunk(self, document: Document) -> List[Document]:
24
27
  """Split text into chunks using LLM to determine natural breakpoints based on context"""
25
- if len(document.content) <= self.max_chunk_size:
28
+ if len(document.content) <= self.chunk_size:
26
29
  return [document]
27
30
 
28
31
  chunks: List[Document] = []
@@ -31,22 +34,22 @@ class AgenticChunking(ChunkingStrategy):
31
34
  chunk_number = 1
32
35
 
33
36
  while remaining_text:
34
- # Ask model to find a good breakpoint within max_chunk_size
35
- prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.max_chunk_size} characters.
37
+ # Ask model to find a good breakpoint within chunk_size
38
+ prompt = f"""Analyze this text and determine a natural breakpoint within the first {self.chunk_size} characters.
36
39
  Consider semantic completeness, paragraph boundaries, and topic transitions.
37
40
  Return only the character position number of where to break the text:
38
41
 
39
- {remaining_text[: self.max_chunk_size]}"""
42
+ {remaining_text[: self.chunk_size]}"""
40
43
 
41
44
  try:
42
45
  response = self.model.response([Message(role="user", content=prompt)])
43
46
  if response and response.content:
44
- break_point = min(int(response.content.strip()), self.max_chunk_size)
47
+ break_point = min(int(response.content.strip()), self.chunk_size)
45
48
  else:
46
- break_point = self.max_chunk_size
49
+ break_point = self.chunk_size
47
50
  except Exception:
48
51
  # Fallback to max size if model fails
49
- break_point = self.max_chunk_size
52
+ break_point = self.chunk_size
50
53
 
51
54
  # Extract chunk and update remaining text
52
55
  chunk = remaining_text[:break_point].strip()
@@ -7,7 +7,7 @@ from agno.knowledge.document.base import Document
7
7
  class FixedSizeChunking(ChunkingStrategy):
8
8
  """Chunking strategy that splits text into fixed-size chunks with optional overlap"""
9
9
 
10
- def __init__(self, chunk_size: int = 100, overlap: int = 0):
10
+ def __init__(self, chunk_size: int = 5000, overlap: int = 0):
11
11
  # overlap must be less than chunk size
12
12
  if overlap >= chunk_size:
13
13
  raise ValueError(f"Invalid parameters: overlap ({overlap}) must be less than chunk size ({chunk_size}).")
@@ -1,16 +1,22 @@
1
- from typing import List, Optional
1
+ import inspect
2
+ from typing import Any, Dict, List, Optional
2
3
 
3
4
  from agno.knowledge.chunking.strategy import ChunkingStrategy
4
5
  from agno.knowledge.document.base import Document
5
6
  from agno.knowledge.embedder.base import Embedder
6
- from agno.knowledge.embedder.openai import OpenAIEmbedder
7
+ from agno.utils.log import log_info
7
8
 
8
9
 
9
10
  class SemanticChunking(ChunkingStrategy):
10
11
  """Chunking strategy that splits text into semantic chunks using chonkie"""
11
12
 
12
13
  def __init__(self, embedder: Optional[Embedder] = None, chunk_size: int = 5000, similarity_threshold: float = 0.5):
13
- self.embedder = embedder or OpenAIEmbedder(id="text-embedding-3-small") # type: ignore
14
+ if embedder is None:
15
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
16
+
17
+ embedder = OpenAIEmbedder() # type: ignore
18
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
19
+ self.embedder = embedder
14
20
  self.chunk_size = chunk_size
15
21
  self.similarity_threshold = similarity_threshold
16
22
  self.chunker = None # Will be initialized lazily when needed
@@ -26,11 +32,37 @@ class SemanticChunking(ChunkingStrategy):
26
32
  "Please install it using `pip install chonkie` to use SemanticChunking."
27
33
  )
28
34
 
29
- self.chunker = SemanticChunker(
30
- embedding_model=self.embedder.id, # type: ignore
31
- chunk_size=self.chunk_size,
32
- threshold=self.similarity_threshold,
33
- )
35
+ # Build arguments dynamically based on chonkie's supported signature
36
+ params: Dict[str, Any] = {
37
+ "chunk_size": self.chunk_size,
38
+ "threshold": self.similarity_threshold,
39
+ }
40
+
41
+ try:
42
+ sig = inspect.signature(SemanticChunker)
43
+ param_names = set(sig.parameters.keys())
44
+
45
+ # Prefer passing a callable to avoid Chonkie initializing its own client
46
+ if "embedding_fn" in param_names:
47
+ params["embedding_fn"] = self.embedder.get_embedding # type: ignore[attr-defined]
48
+ # If chonkie allows specifying dimensions, provide them
49
+ if "embedding_dimensions" in param_names and getattr(self.embedder, "dimensions", None):
50
+ params["embedding_dimensions"] = self.embedder.dimensions # type: ignore[attr-defined]
51
+ elif "embedder" in param_names:
52
+ # Some versions may accept an embedder object directly
53
+ params["embedder"] = self.embedder
54
+ else:
55
+ # Fallback to model id
56
+ params["embedding_model"] = getattr(self.embedder, "id", None) or "text-embedding-3-small"
57
+
58
+ self.chunker = SemanticChunker(**params)
59
+ except Exception:
60
+ # As a final fallback, use the original behavior
61
+ self.chunker = SemanticChunker(
62
+ embedding_model=getattr(self.embedder, "id", None) or "text-embedding-3-small",
63
+ chunk_size=self.chunk_size,
64
+ threshold=self.similarity_threshold,
65
+ )
34
66
 
35
67
  def chunk(self, document: Document) -> List[Document]:
36
68
  """Split document into semantic chunks using chonkie"""
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from enum import Enum
3
- from typing import List
3
+ from typing import List, Optional
4
4
 
5
5
  from agno.knowledge.document.base import Document
6
6
 
@@ -60,7 +60,13 @@ class ChunkingStrategyFactory:
60
60
  """Factory for creating chunking strategy instances."""
61
61
 
62
62
  @classmethod
63
- def create_strategy(cls, strategy_type: ChunkingStrategyType, **kwargs) -> ChunkingStrategy:
63
+ def create_strategy(
64
+ cls,
65
+ strategy_type: ChunkingStrategyType,
66
+ chunk_size: Optional[int] = None,
67
+ overlap: Optional[int] = None,
68
+ **kwargs,
69
+ ) -> ChunkingStrategy:
64
70
  """Create an instance of the chunking strategy with the given parameters."""
65
71
  strategy_map = {
66
72
  ChunkingStrategyType.AGENTIC_CHUNKER: cls._create_agentic_chunking,
@@ -71,51 +77,89 @@ class ChunkingStrategyFactory:
71
77
  ChunkingStrategyType.ROW_CHUNKER: cls._create_row_chunking,
72
78
  ChunkingStrategyType.MARKDOWN_CHUNKER: cls._create_markdown_chunking,
73
79
  }
74
- return strategy_map[strategy_type](**kwargs)
80
+ return strategy_map[strategy_type](chunk_size=chunk_size, overlap=overlap, **kwargs)
75
81
 
76
82
  @classmethod
77
- def _create_agentic_chunking(cls, **kwargs) -> ChunkingStrategy:
83
+ def _create_agentic_chunking(
84
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
85
+ ) -> ChunkingStrategy:
78
86
  from agno.knowledge.chunking.agentic import AgenticChunking
79
87
 
80
- # Map chunk_size to max_chunk_size for AgenticChunking
81
- if "chunk_size" in kwargs and "max_chunk_size" not in kwargs:
82
- kwargs["max_chunk_size"] = kwargs.pop("chunk_size")
88
+ # AgenticChunking accepts max_chunk_size (not chunk_size) and no overlap
89
+ if chunk_size is not None:
90
+ kwargs["max_chunk_size"] = chunk_size
91
+ # Remove overlap since AgenticChunking doesn't support it
83
92
  return AgenticChunking(**kwargs)
84
93
 
85
94
  @classmethod
86
- def _create_document_chunking(cls, **kwargs) -> ChunkingStrategy:
95
+ def _create_document_chunking(
96
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
97
+ ) -> ChunkingStrategy:
87
98
  from agno.knowledge.chunking.document import DocumentChunking
88
99
 
100
+ # DocumentChunking accepts both chunk_size and overlap
101
+ if chunk_size is not None:
102
+ kwargs["chunk_size"] = chunk_size
103
+ if overlap is not None:
104
+ kwargs["overlap"] = overlap
89
105
  return DocumentChunking(**kwargs)
90
106
 
91
107
  @classmethod
92
- def _create_recursive_chunking(cls, **kwargs) -> ChunkingStrategy:
108
+ def _create_recursive_chunking(
109
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
110
+ ) -> ChunkingStrategy:
93
111
  from agno.knowledge.chunking.recursive import RecursiveChunking
94
112
 
113
+ # RecursiveChunking accepts both chunk_size and overlap
114
+ if chunk_size is not None:
115
+ kwargs["chunk_size"] = chunk_size
116
+ if overlap is not None:
117
+ kwargs["overlap"] = overlap
95
118
  return RecursiveChunking(**kwargs)
96
119
 
97
120
  @classmethod
98
- def _create_semantic_chunking(cls, **kwargs) -> ChunkingStrategy:
121
+ def _create_semantic_chunking(
122
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
123
+ ) -> ChunkingStrategy:
99
124
  from agno.knowledge.chunking.semantic import SemanticChunking
100
125
 
126
+ # SemanticChunking accepts chunk_size but not overlap
127
+ if chunk_size is not None:
128
+ kwargs["chunk_size"] = chunk_size
129
+ # Remove overlap since SemanticChunking doesn't support it
101
130
  return SemanticChunking(**kwargs)
102
131
 
103
132
  @classmethod
104
- def _create_fixed_chunking(cls, **kwargs) -> ChunkingStrategy:
133
+ def _create_fixed_chunking(
134
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
135
+ ) -> ChunkingStrategy:
105
136
  from agno.knowledge.chunking.fixed import FixedSizeChunking
106
137
 
138
+ # FixedSizeChunking accepts both chunk_size and overlap
139
+ if chunk_size is not None:
140
+ kwargs["chunk_size"] = chunk_size
141
+ if overlap is not None:
142
+ kwargs["overlap"] = overlap
107
143
  return FixedSizeChunking(**kwargs)
108
144
 
109
145
  @classmethod
110
- def _create_row_chunking(cls, **kwargs) -> ChunkingStrategy:
146
+ def _create_row_chunking(
147
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
148
+ ) -> ChunkingStrategy:
111
149
  from agno.knowledge.chunking.row import RowChunking
112
150
 
113
- # Remove chunk_size if present since RowChunking doesn't use it
114
- kwargs.pop("chunk_size", None)
151
+ # RowChunking doesn't accept chunk_size or overlap, only skip_header and clean_rows
115
152
  return RowChunking(**kwargs)
116
153
 
117
154
  @classmethod
118
- def _create_markdown_chunking(cls, **kwargs) -> ChunkingStrategy:
155
+ def _create_markdown_chunking(
156
+ cls, chunk_size: Optional[int] = None, overlap: Optional[int] = None, **kwargs
157
+ ) -> ChunkingStrategy:
119
158
  from agno.knowledge.chunking.markdown import MarkdownChunking
120
159
 
160
+ # MarkdownChunking accepts both chunk_size and overlap
161
+ if chunk_size is not None:
162
+ kwargs["chunk_size"] = chunk_size
163
+ if overlap is not None:
164
+ kwargs["overlap"] = overlap
121
165
  return MarkdownChunking(**kwargs)
@@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Tuple
5
5
 
6
6
  from agno.exceptions import AgnoError, ModelProviderError
7
7
  from agno.knowledge.embedder.base import Embedder
8
- from agno.utils.log import log_error, logger
8
+ from agno.utils.log import log_error, log_warning
9
9
 
10
10
  try:
11
11
  from boto3 import client as AwsClient
@@ -69,6 +69,11 @@ class AwsBedrockEmbedder(Embedder):
69
69
  client_params: Optional[Dict[str, Any]] = None
70
70
  client: Optional[AwsClient] = None
71
71
 
72
+ def __post_init__(self):
73
+ if self.enable_batch:
74
+ log_warning("AwsBedrockEmbedder does not support batch embeddings, setting enable_batch to False")
75
+ self.enable_batch = False
76
+
72
77
  def get_client(self) -> AwsClient:
73
78
  """
74
79
  Returns an AWS Bedrock client.
@@ -220,10 +225,10 @@ class AwsBedrockEmbedder(Embedder):
220
225
  # Fallback to the first available embedding type
221
226
  for embedding_type in response["embeddings"]:
222
227
  return response["embeddings"][embedding_type][0]
223
- logger.warning("No embeddings found in response")
228
+ log_warning("No embeddings found in response")
224
229
  return []
225
230
  except Exception as e:
226
- logger.warning(f"Error extracting embeddings: {e}")
231
+ log_warning(f"Error extracting embeddings: {e}")
227
232
  return []
228
233
 
229
234
  def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
@@ -286,7 +291,7 @@ class AwsBedrockEmbedder(Embedder):
286
291
  # Fallback to the first available embedding type
287
292
  for embedding_type in response_body["embeddings"]:
288
293
  return response_body["embeddings"][embedding_type][0]
289
- logger.warning("No embeddings found in response")
294
+ log_warning("No embeddings found in response")
290
295
  return []
291
296
  except ClientError as e:
292
297
  log_error(f"Unexpected error calling Bedrock API: {str(e)}")
@@ -154,3 +154,57 @@ class AzureOpenAIEmbedder(Embedder):
154
154
  embedding = response.data[0].embedding
155
155
  usage = response.usage
156
156
  return embedding, usage.model_dump()
157
+
158
+ async def async_get_embeddings_batch_and_usage(
159
+ self, texts: List[str]
160
+ ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
161
+ """
162
+ Get embeddings and usage for multiple texts in batches.
163
+
164
+ Args:
165
+ texts: List of text strings to embed
166
+
167
+ Returns:
168
+ Tuple of (List of embedding vectors, List of usage dictionaries)
169
+ """
170
+ all_embeddings = []
171
+ all_usage = []
172
+ logger.info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size}")
173
+
174
+ for i in range(0, len(texts), self.batch_size):
175
+ batch_texts = texts[i : i + self.batch_size]
176
+
177
+ req: Dict[str, Any] = {
178
+ "input": batch_texts,
179
+ "model": self.id,
180
+ "encoding_format": self.encoding_format,
181
+ }
182
+ if self.user is not None:
183
+ req["user"] = self.user
184
+ if self.id.startswith("text-embedding-3"):
185
+ req["dimensions"] = self.dimensions
186
+ if self.request_params:
187
+ req.update(self.request_params)
188
+
189
+ try:
190
+ response: CreateEmbeddingResponse = await self.aclient.embeddings.create(**req)
191
+ batch_embeddings = [data.embedding for data in response.data]
192
+ all_embeddings.extend(batch_embeddings)
193
+
194
+ # For each embedding in the batch, add the same usage information
195
+ usage_dict = response.usage.model_dump() if response.usage else None
196
+ all_usage.extend([usage_dict] * len(batch_embeddings))
197
+ except Exception as e:
198
+ logger.warning(f"Error in async batch embedding: {e}")
199
+ # Fallback to individual calls for this batch
200
+ for text in batch_texts:
201
+ try:
202
+ embedding, usage = await self.async_get_embedding_and_usage(text)
203
+ all_embeddings.append(embedding)
204
+ all_usage.append(usage)
205
+ except Exception as e2:
206
+ logger.warning(f"Error in individual async embedding fallback: {e2}")
207
+ all_embeddings.append([])
208
+ all_usage.append(None)
209
+
210
+ return all_embeddings, all_usage
@@ -7,6 +7,8 @@ class Embedder:
7
7
  """Base class for managing embedders"""
8
8
 
9
9
  dimensions: Optional[int] = 1536
10
+ enable_batch: bool = False
11
+ batch_size: int = 100 # Number of texts to process in each API call
10
12
 
11
13
  def get_embedding(self, text: str) -> List[float]:
12
14
  raise NotImplementedError
@@ -1,8 +1,9 @@
1
+ import time
1
2
  from dataclasses import dataclass
2
3
  from typing import Any, Dict, List, Optional, Tuple, Union
3
4
 
4
5
  from agno.knowledge.embedder.base import Embedder
5
- from agno.utils.log import logger
6
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
6
7
 
7
8
  try:
8
9
  from cohere import AsyncClient as AsyncCohereClient
@@ -22,6 +23,7 @@ class CohereEmbedder(Embedder):
22
23
  client_params: Optional[Dict[str, Any]] = None
23
24
  cohere_client: Optional[CohereClient] = None
24
25
  async_client: Optional[AsyncCohereClient] = None
26
+ exponential_backoff: bool = False # Enable exponential backoff on rate limits
25
27
 
26
28
  @property
27
29
  def client(self) -> CohereClient:
@@ -61,6 +63,111 @@ class CohereEmbedder(Embedder):
61
63
  request_params.update(self.request_params)
62
64
  return self.client.embed(texts=[text], **request_params)
63
65
 
66
+ def _get_batch_request_params(self) -> Dict[str, Any]:
67
+ """Get request parameters for batch embedding calls."""
68
+ request_params: Dict[str, Any] = {}
69
+
70
+ if self.id:
71
+ request_params["model"] = self.id
72
+ if self.input_type:
73
+ request_params["input_type"] = self.input_type
74
+ if self.embedding_types:
75
+ request_params["embedding_types"] = self.embedding_types
76
+ if self.request_params:
77
+ request_params.update(self.request_params)
78
+
79
+ return request_params
80
+
81
+ def _is_rate_limit_error(self, error: Exception) -> bool:
82
+ """Check if the error is a rate limiting error."""
83
+ if hasattr(error, "status_code") and error.status_code == 429:
84
+ return True
85
+ error_str = str(error).lower()
86
+ return any(
87
+ phrase in error_str
88
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
89
+ )
90
+
91
+ def _exponential_backoff_sleep(self, attempt: int, base_delay: float = 1.0) -> None:
92
+ """Sleep with exponential backoff."""
93
+ delay = base_delay * (2**attempt) + (time.time() % 1) # Add jitter
94
+ log_debug(f"Rate limited, waiting {delay:.2f} seconds before retry (attempt {attempt + 1})")
95
+ time.sleep(delay)
96
+
97
+ async def _async_rate_limit_backoff_sleep(self, attempt: int) -> None:
98
+ """Async version of rate-limit-aware backoff for APIs with per-minute limits."""
99
+ import asyncio
100
+
101
+ # For 40 req/min APIs like Cohere Trial, we need longer waits
102
+ if attempt == 0:
103
+ delay = 15.0 # Wait 15 seconds (1/4 of minute window)
104
+ elif attempt == 1:
105
+ delay = 30.0 # Wait 30 seconds (1/2 of minute window)
106
+ else:
107
+ delay = 60.0 # Wait full minute for window reset
108
+
109
+ # Add small jitter
110
+ delay += time.time() % 3
111
+
112
+ log_debug(
113
+ f"Async rate limit backoff, waiting {delay:.1f} seconds for rate limit window reset (attempt {attempt + 1})"
114
+ )
115
+ await asyncio.sleep(delay)
116
+
117
+ async def _async_batch_with_retry(
118
+ self, texts: List[str], max_retries: int = 3
119
+ ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
120
+ """Execute async batch embedding with rate-limit-aware backoff for rate limiting."""
121
+
122
+ log_debug(f"Starting async batch retry for {len(texts)} texts with max_retries={max_retries}")
123
+
124
+ for attempt in range(max_retries + 1):
125
+ try:
126
+ request_params = self._get_batch_request_params()
127
+ response: Union[
128
+ EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse
129
+ ] = await self.aclient.embed(texts=texts, **request_params)
130
+
131
+ # Extract embeddings from response
132
+ if isinstance(response, EmbeddingsFloatsEmbedResponse):
133
+ batch_embeddings = response.embeddings
134
+ elif isinstance(response, EmbeddingsByTypeEmbedResponse):
135
+ batch_embeddings = response.embeddings.float_ if response.embeddings.float_ else []
136
+ else:
137
+ log_warning("No embeddings found in response")
138
+ batch_embeddings = []
139
+
140
+ # Extract usage information
141
+ usage = response.meta.billed_units if response.meta else None
142
+ usage_dict = usage.model_dump() if usage else None
143
+ all_usage = [usage_dict] * len(batch_embeddings)
144
+
145
+ log_debug(f"Async batch embedding succeeded on attempt {attempt + 1}")
146
+ return batch_embeddings, all_usage
147
+
148
+ except Exception as e:
149
+ if self._is_rate_limit_error(e):
150
+ if not self.exponential_backoff:
151
+ log_warning(
152
+ "Rate limit detected. To enable automatic backoff retry, set enable_backoff=True when creating the embedder."
153
+ )
154
+ raise e
155
+
156
+ log_info(f"Async rate limit detected on attempt {attempt + 1}")
157
+ if attempt < max_retries:
158
+ await self._async_rate_limit_backoff_sleep(attempt)
159
+ continue
160
+ else:
161
+ log_warning(f"Async max retries ({max_retries}) reached for rate limiting")
162
+ raise e
163
+ else:
164
+ log_debug(f"Async non-rate-limit error on attempt {attempt + 1}: {e}")
165
+ raise e
166
+
167
+ # This should never be reached, but just in case
168
+ log_error("Could not create embeddings. End of retry loop reached.")
169
+ return [], []
170
+
64
171
  def get_embedding(self, text: str) -> List[float]:
65
172
  response: Union[EmbeddingsFloatsEmbedResponse, EmbeddingsByTypeEmbedResponse] = self.response(text=text)
66
173
  try:
@@ -69,10 +176,10 @@ class CohereEmbedder(Embedder):
69
176
  elif isinstance(response, EmbeddingsByTypeEmbedResponse):
70
177
  return response.embeddings.float_[0] if response.embeddings.float_ else []
71
178
  else:
72
- logger.warning("No embeddings found")
179
+ log_warning("No embeddings found")
73
180
  return []
74
181
  except Exception as e:
75
- logger.warning(e)
182
+ log_warning(e)
76
183
  return []
77
184
 
78
185
  def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
@@ -110,10 +217,10 @@ class CohereEmbedder(Embedder):
110
217
  elif isinstance(response, EmbeddingsByTypeEmbedResponse):
111
218
  return response.embeddings.float_[0] if response.embeddings.float_ else []
112
219
  else:
113
- logger.warning("No embeddings found")
220
+ log_warning("No embeddings found")
114
221
  return []
115
222
  except Exception as e:
116
- logger.warning(e)
223
+ log_warning(e)
117
224
  return []
118
225
 
119
226
  async def async_get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict[str, Any]]]:
@@ -142,3 +249,75 @@ class CohereEmbedder(Embedder):
142
249
  if usage:
143
250
  return embedding, usage.model_dump()
144
251
  return embedding, None
252
+
253
+ async def async_get_embeddings_batch_and_usage(
254
+ self, texts: List[str]
255
+ ) -> Tuple[List[List[float]], List[Optional[Dict]]]:
256
+ """
257
+ Get embeddings and usage for multiple texts in batches (async version).
258
+
259
+ Args:
260
+ texts: List of text strings to embed
261
+
262
+ Returns:
263
+ s, List of usage dictionaries)
264
+ """
265
+ all_embeddings = []
266
+ all_usage = []
267
+ log_info(f"Getting embeddings and usage for {len(texts)} texts in batches of {self.batch_size} (async)")
268
+
269
+ for i in range(0, len(texts), self.batch_size):
270
+ batch_texts = texts[i : i + self.batch_size]
271
+
272
+ try:
273
+ # Use retry logic for batch processing
274
+ batch_embeddings, batch_usage = await self._async_batch_with_retry(batch_texts)
275
+ all_embeddings.extend(batch_embeddings)
276
+ all_usage.extend(batch_usage)
277
+
278
+ except Exception as e:
279
+ log_warning(f"Async batch embedding failed after retries: {e}")
280
+
281
+ # Check if this is a rate limit error and backoff is disabled
282
+ if self._is_rate_limit_error(e) and not self.exponential_backoff:
283
+ log_warning("Rate limit hit and backoff is disabled. Failing immediately.")
284
+ raise e
285
+
286
+ # Only fall back to individual calls for non-rate-limit errors
287
+ # For rate limit errors, we should reduce batch size instead
288
+ if self._is_rate_limit_error(e):
289
+ log_warning("Rate limit hit even after retries. Consider reducing batch_size or upgrading API key.")
290
+ # Try with smaller batch size
291
+ if len(batch_texts) > 1:
292
+ smaller_batch_size = max(1, len(batch_texts) // 2)
293
+ log_info(f"Retrying with smaller batch size: {smaller_batch_size}")
294
+ for j in range(0, len(batch_texts), smaller_batch_size):
295
+ small_batch = batch_texts[j : j + smaller_batch_size]
296
+ try:
297
+ small_embeddings, small_usage = await self._async_batch_with_retry(small_batch)
298
+ all_embeddings.extend(small_embeddings)
299
+ all_usage.extend(small_usage)
300
+ except Exception as e3:
301
+ log_error(f"Failed even with reduced batch size: {e3}")
302
+ # Fall back to empty results for this batch
303
+ all_embeddings.extend([[] for _ in small_batch])
304
+ all_usage.extend([None for _ in small_batch])
305
+ else:
306
+ # Single item already failed, add empty result
307
+ log_debug("Single item failed, adding empty result")
308
+ all_embeddings.append([])
309
+ all_usage.append(None)
310
+ else:
311
+ # For non-rate-limit errors, fall back to individual calls
312
+ log_debug("Non-rate-limit error, falling back to individual calls")
313
+ for text in batch_texts:
314
+ try:
315
+ embedding, usage = await self.async_get_embedding_and_usage(text)
316
+ all_embeddings.append(embedding)
317
+ all_usage.append(usage)
318
+ except Exception as e2:
319
+ log_warning(f"Error in individual async embedding fallback: {e2}")
320
+ all_embeddings.append([])
321
+ all_usage.append(None)
322
+
323
+ return all_embeddings, all_usage
@@ -23,7 +23,7 @@ class FastEmbedEmbedder(Embedder):
23
23
  """Using BAAI/bge-small-en-v1.5 model, more models available: https://qdrant.github.io/fastembed/examples/Supported_Models/"""
24
24
 
25
25
  id: str = "BAAI/bge-small-en-v1.5"
26
- dimensions: int = 384
26
+ dimensions: Optional[int] = 384
27
27
 
28
28
  def get_embedding(self, text: str) -> List[float]:
29
29
  model = TextEmbedding(model_name=self.id)