agno 2.2.13__py3-none-any.whl → 2.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. agno/agent/__init__.py +6 -0
  2. agno/agent/agent.py +5252 -3145
  3. agno/agent/remote.py +525 -0
  4. agno/api/api.py +2 -0
  5. agno/client/__init__.py +3 -0
  6. agno/client/a2a/__init__.py +10 -0
  7. agno/client/a2a/client.py +554 -0
  8. agno/client/a2a/schemas.py +112 -0
  9. agno/client/a2a/utils.py +369 -0
  10. agno/client/os.py +2669 -0
  11. agno/compression/__init__.py +3 -0
  12. agno/compression/manager.py +247 -0
  13. agno/culture/manager.py +2 -2
  14. agno/db/base.py +927 -6
  15. agno/db/dynamo/dynamo.py +788 -2
  16. agno/db/dynamo/schemas.py +128 -0
  17. agno/db/dynamo/utils.py +26 -3
  18. agno/db/firestore/firestore.py +674 -50
  19. agno/db/firestore/schemas.py +41 -0
  20. agno/db/firestore/utils.py +25 -10
  21. agno/db/gcs_json/gcs_json_db.py +506 -3
  22. agno/db/gcs_json/utils.py +14 -2
  23. agno/db/in_memory/in_memory_db.py +203 -4
  24. agno/db/in_memory/utils.py +14 -2
  25. agno/db/json/json_db.py +498 -2
  26. agno/db/json/utils.py +14 -2
  27. agno/db/migrations/manager.py +199 -0
  28. agno/db/migrations/utils.py +19 -0
  29. agno/db/migrations/v1_to_v2.py +54 -16
  30. agno/db/migrations/versions/__init__.py +0 -0
  31. agno/db/migrations/versions/v2_3_0.py +977 -0
  32. agno/db/mongo/async_mongo.py +1013 -39
  33. agno/db/mongo/mongo.py +684 -4
  34. agno/db/mongo/schemas.py +48 -0
  35. agno/db/mongo/utils.py +17 -0
  36. agno/db/mysql/__init__.py +2 -1
  37. agno/db/mysql/async_mysql.py +2958 -0
  38. agno/db/mysql/mysql.py +722 -53
  39. agno/db/mysql/schemas.py +77 -11
  40. agno/db/mysql/utils.py +151 -8
  41. agno/db/postgres/async_postgres.py +1254 -137
  42. agno/db/postgres/postgres.py +2316 -93
  43. agno/db/postgres/schemas.py +153 -21
  44. agno/db/postgres/utils.py +22 -7
  45. agno/db/redis/redis.py +531 -3
  46. agno/db/redis/schemas.py +36 -0
  47. agno/db/redis/utils.py +31 -15
  48. agno/db/schemas/evals.py +1 -0
  49. agno/db/schemas/memory.py +20 -9
  50. agno/db/singlestore/schemas.py +70 -1
  51. agno/db/singlestore/singlestore.py +737 -74
  52. agno/db/singlestore/utils.py +13 -3
  53. agno/db/sqlite/async_sqlite.py +1069 -89
  54. agno/db/sqlite/schemas.py +133 -1
  55. agno/db/sqlite/sqlite.py +2203 -165
  56. agno/db/sqlite/utils.py +21 -11
  57. agno/db/surrealdb/models.py +25 -0
  58. agno/db/surrealdb/surrealdb.py +603 -1
  59. agno/db/utils.py +60 -0
  60. agno/eval/__init__.py +26 -3
  61. agno/eval/accuracy.py +25 -12
  62. agno/eval/agent_as_judge.py +871 -0
  63. agno/eval/base.py +29 -0
  64. agno/eval/performance.py +10 -4
  65. agno/eval/reliability.py +22 -13
  66. agno/eval/utils.py +2 -1
  67. agno/exceptions.py +42 -0
  68. agno/hooks/__init__.py +3 -0
  69. agno/hooks/decorator.py +164 -0
  70. agno/integrations/discord/client.py +13 -2
  71. agno/knowledge/__init__.py +4 -0
  72. agno/knowledge/chunking/code.py +90 -0
  73. agno/knowledge/chunking/document.py +65 -4
  74. agno/knowledge/chunking/fixed.py +4 -1
  75. agno/knowledge/chunking/markdown.py +102 -11
  76. agno/knowledge/chunking/recursive.py +2 -2
  77. agno/knowledge/chunking/semantic.py +130 -48
  78. agno/knowledge/chunking/strategy.py +18 -0
  79. agno/knowledge/embedder/azure_openai.py +0 -1
  80. agno/knowledge/embedder/google.py +1 -1
  81. agno/knowledge/embedder/mistral.py +1 -1
  82. agno/knowledge/embedder/nebius.py +1 -1
  83. agno/knowledge/embedder/openai.py +16 -12
  84. agno/knowledge/filesystem.py +412 -0
  85. agno/knowledge/knowledge.py +4261 -1199
  86. agno/knowledge/protocol.py +134 -0
  87. agno/knowledge/reader/arxiv_reader.py +3 -2
  88. agno/knowledge/reader/base.py +9 -7
  89. agno/knowledge/reader/csv_reader.py +91 -42
  90. agno/knowledge/reader/docx_reader.py +9 -10
  91. agno/knowledge/reader/excel_reader.py +225 -0
  92. agno/knowledge/reader/field_labeled_csv_reader.py +38 -48
  93. agno/knowledge/reader/firecrawl_reader.py +3 -2
  94. agno/knowledge/reader/json_reader.py +16 -22
  95. agno/knowledge/reader/markdown_reader.py +15 -14
  96. agno/knowledge/reader/pdf_reader.py +33 -28
  97. agno/knowledge/reader/pptx_reader.py +9 -10
  98. agno/knowledge/reader/reader_factory.py +135 -1
  99. agno/knowledge/reader/s3_reader.py +8 -16
  100. agno/knowledge/reader/tavily_reader.py +3 -3
  101. agno/knowledge/reader/text_reader.py +15 -14
  102. agno/knowledge/reader/utils/__init__.py +17 -0
  103. agno/knowledge/reader/utils/spreadsheet.py +114 -0
  104. agno/knowledge/reader/web_search_reader.py +8 -65
  105. agno/knowledge/reader/website_reader.py +16 -13
  106. agno/knowledge/reader/wikipedia_reader.py +36 -3
  107. agno/knowledge/reader/youtube_reader.py +3 -2
  108. agno/knowledge/remote_content/__init__.py +33 -0
  109. agno/knowledge/remote_content/config.py +266 -0
  110. agno/knowledge/remote_content/remote_content.py +105 -17
  111. agno/knowledge/utils.py +76 -22
  112. agno/learn/__init__.py +71 -0
  113. agno/learn/config.py +463 -0
  114. agno/learn/curate.py +185 -0
  115. agno/learn/machine.py +725 -0
  116. agno/learn/schemas.py +1114 -0
  117. agno/learn/stores/__init__.py +38 -0
  118. agno/learn/stores/decision_log.py +1156 -0
  119. agno/learn/stores/entity_memory.py +3275 -0
  120. agno/learn/stores/learned_knowledge.py +1583 -0
  121. agno/learn/stores/protocol.py +117 -0
  122. agno/learn/stores/session_context.py +1217 -0
  123. agno/learn/stores/user_memory.py +1495 -0
  124. agno/learn/stores/user_profile.py +1220 -0
  125. agno/learn/utils.py +209 -0
  126. agno/media.py +22 -6
  127. agno/memory/__init__.py +14 -1
  128. agno/memory/manager.py +223 -8
  129. agno/memory/strategies/__init__.py +15 -0
  130. agno/memory/strategies/base.py +66 -0
  131. agno/memory/strategies/summarize.py +196 -0
  132. agno/memory/strategies/types.py +37 -0
  133. agno/models/aimlapi/aimlapi.py +17 -0
  134. agno/models/anthropic/claude.py +434 -59
  135. agno/models/aws/bedrock.py +121 -20
  136. agno/models/aws/claude.py +131 -274
  137. agno/models/azure/ai_foundry.py +10 -6
  138. agno/models/azure/openai_chat.py +33 -10
  139. agno/models/base.py +1162 -561
  140. agno/models/cerebras/cerebras.py +120 -24
  141. agno/models/cerebras/cerebras_openai.py +21 -2
  142. agno/models/cohere/chat.py +65 -6
  143. agno/models/cometapi/cometapi.py +18 -1
  144. agno/models/dashscope/dashscope.py +2 -3
  145. agno/models/deepinfra/deepinfra.py +18 -1
  146. agno/models/deepseek/deepseek.py +69 -3
  147. agno/models/fireworks/fireworks.py +18 -1
  148. agno/models/google/gemini.py +959 -89
  149. agno/models/google/utils.py +22 -0
  150. agno/models/groq/groq.py +48 -18
  151. agno/models/huggingface/huggingface.py +17 -6
  152. agno/models/ibm/watsonx.py +16 -6
  153. agno/models/internlm/internlm.py +18 -1
  154. agno/models/langdb/langdb.py +13 -1
  155. agno/models/litellm/chat.py +88 -9
  156. agno/models/litellm/litellm_openai.py +18 -1
  157. agno/models/message.py +24 -5
  158. agno/models/meta/llama.py +40 -13
  159. agno/models/meta/llama_openai.py +22 -21
  160. agno/models/metrics.py +12 -0
  161. agno/models/mistral/mistral.py +8 -4
  162. agno/models/n1n/__init__.py +3 -0
  163. agno/models/n1n/n1n.py +57 -0
  164. agno/models/nebius/nebius.py +6 -7
  165. agno/models/nvidia/nvidia.py +20 -3
  166. agno/models/ollama/__init__.py +2 -0
  167. agno/models/ollama/chat.py +17 -6
  168. agno/models/ollama/responses.py +100 -0
  169. agno/models/openai/__init__.py +2 -0
  170. agno/models/openai/chat.py +117 -26
  171. agno/models/openai/open_responses.py +46 -0
  172. agno/models/openai/responses.py +110 -32
  173. agno/models/openrouter/__init__.py +2 -0
  174. agno/models/openrouter/openrouter.py +67 -2
  175. agno/models/openrouter/responses.py +146 -0
  176. agno/models/perplexity/perplexity.py +19 -1
  177. agno/models/portkey/portkey.py +7 -6
  178. agno/models/requesty/requesty.py +19 -2
  179. agno/models/response.py +20 -2
  180. agno/models/sambanova/sambanova.py +20 -3
  181. agno/models/siliconflow/siliconflow.py +19 -2
  182. agno/models/together/together.py +20 -3
  183. agno/models/vercel/v0.py +20 -3
  184. agno/models/vertexai/claude.py +124 -4
  185. agno/models/vllm/vllm.py +19 -14
  186. agno/models/xai/xai.py +19 -2
  187. agno/os/app.py +467 -137
  188. agno/os/auth.py +253 -5
  189. agno/os/config.py +22 -0
  190. agno/os/interfaces/a2a/a2a.py +7 -6
  191. agno/os/interfaces/a2a/router.py +635 -26
  192. agno/os/interfaces/a2a/utils.py +32 -33
  193. agno/os/interfaces/agui/agui.py +5 -3
  194. agno/os/interfaces/agui/router.py +26 -16
  195. agno/os/interfaces/agui/utils.py +97 -57
  196. agno/os/interfaces/base.py +7 -7
  197. agno/os/interfaces/slack/router.py +16 -7
  198. agno/os/interfaces/slack/slack.py +7 -7
  199. agno/os/interfaces/whatsapp/router.py +35 -7
  200. agno/os/interfaces/whatsapp/security.py +3 -1
  201. agno/os/interfaces/whatsapp/whatsapp.py +11 -8
  202. agno/os/managers.py +326 -0
  203. agno/os/mcp.py +652 -79
  204. agno/os/middleware/__init__.py +4 -0
  205. agno/os/middleware/jwt.py +718 -115
  206. agno/os/middleware/trailing_slash.py +27 -0
  207. agno/os/router.py +105 -1558
  208. agno/os/routers/agents/__init__.py +3 -0
  209. agno/os/routers/agents/router.py +655 -0
  210. agno/os/routers/agents/schema.py +288 -0
  211. agno/os/routers/components/__init__.py +3 -0
  212. agno/os/routers/components/components.py +475 -0
  213. agno/os/routers/database.py +155 -0
  214. agno/os/routers/evals/evals.py +111 -18
  215. agno/os/routers/evals/schemas.py +38 -5
  216. agno/os/routers/evals/utils.py +80 -11
  217. agno/os/routers/health.py +3 -3
  218. agno/os/routers/knowledge/knowledge.py +284 -35
  219. agno/os/routers/knowledge/schemas.py +14 -2
  220. agno/os/routers/memory/memory.py +274 -11
  221. agno/os/routers/memory/schemas.py +44 -3
  222. agno/os/routers/metrics/metrics.py +30 -15
  223. agno/os/routers/metrics/schemas.py +10 -6
  224. agno/os/routers/registry/__init__.py +3 -0
  225. agno/os/routers/registry/registry.py +337 -0
  226. agno/os/routers/session/session.py +143 -14
  227. agno/os/routers/teams/__init__.py +3 -0
  228. agno/os/routers/teams/router.py +550 -0
  229. agno/os/routers/teams/schema.py +280 -0
  230. agno/os/routers/traces/__init__.py +3 -0
  231. agno/os/routers/traces/schemas.py +414 -0
  232. agno/os/routers/traces/traces.py +549 -0
  233. agno/os/routers/workflows/__init__.py +3 -0
  234. agno/os/routers/workflows/router.py +757 -0
  235. agno/os/routers/workflows/schema.py +139 -0
  236. agno/os/schema.py +157 -584
  237. agno/os/scopes.py +469 -0
  238. agno/os/settings.py +3 -0
  239. agno/os/utils.py +574 -185
  240. agno/reasoning/anthropic.py +85 -1
  241. agno/reasoning/azure_ai_foundry.py +93 -1
  242. agno/reasoning/deepseek.py +102 -2
  243. agno/reasoning/default.py +6 -7
  244. agno/reasoning/gemini.py +87 -3
  245. agno/reasoning/groq.py +109 -2
  246. agno/reasoning/helpers.py +6 -7
  247. agno/reasoning/manager.py +1238 -0
  248. agno/reasoning/ollama.py +93 -1
  249. agno/reasoning/openai.py +115 -1
  250. agno/reasoning/vertexai.py +85 -1
  251. agno/registry/__init__.py +3 -0
  252. agno/registry/registry.py +68 -0
  253. agno/remote/__init__.py +3 -0
  254. agno/remote/base.py +581 -0
  255. agno/run/__init__.py +2 -4
  256. agno/run/agent.py +134 -19
  257. agno/run/base.py +49 -1
  258. agno/run/cancel.py +65 -52
  259. agno/run/cancellation_management/__init__.py +9 -0
  260. agno/run/cancellation_management/base.py +78 -0
  261. agno/run/cancellation_management/in_memory_cancellation_manager.py +100 -0
  262. agno/run/cancellation_management/redis_cancellation_manager.py +236 -0
  263. agno/run/requirement.py +181 -0
  264. agno/run/team.py +111 -19
  265. agno/run/workflow.py +2 -1
  266. agno/session/agent.py +57 -92
  267. agno/session/summary.py +1 -1
  268. agno/session/team.py +62 -115
  269. agno/session/workflow.py +353 -57
  270. agno/skills/__init__.py +17 -0
  271. agno/skills/agent_skills.py +377 -0
  272. agno/skills/errors.py +32 -0
  273. agno/skills/loaders/__init__.py +4 -0
  274. agno/skills/loaders/base.py +27 -0
  275. agno/skills/loaders/local.py +216 -0
  276. agno/skills/skill.py +65 -0
  277. agno/skills/utils.py +107 -0
  278. agno/skills/validator.py +277 -0
  279. agno/table.py +10 -0
  280. agno/team/__init__.py +5 -1
  281. agno/team/remote.py +447 -0
  282. agno/team/team.py +3769 -2202
  283. agno/tools/brandfetch.py +27 -18
  284. agno/tools/browserbase.py +225 -16
  285. agno/tools/crawl4ai.py +3 -0
  286. agno/tools/duckduckgo.py +25 -71
  287. agno/tools/exa.py +0 -21
  288. agno/tools/file.py +14 -13
  289. agno/tools/file_generation.py +12 -6
  290. agno/tools/firecrawl.py +15 -7
  291. agno/tools/function.py +94 -113
  292. agno/tools/google_bigquery.py +11 -2
  293. agno/tools/google_drive.py +4 -3
  294. agno/tools/knowledge.py +9 -4
  295. agno/tools/mcp/mcp.py +301 -18
  296. agno/tools/mcp/multi_mcp.py +269 -14
  297. agno/tools/mem0.py +11 -10
  298. agno/tools/memory.py +47 -46
  299. agno/tools/mlx_transcribe.py +10 -7
  300. agno/tools/models/nebius.py +5 -5
  301. agno/tools/models_labs.py +20 -10
  302. agno/tools/nano_banana.py +151 -0
  303. agno/tools/parallel.py +0 -7
  304. agno/tools/postgres.py +76 -36
  305. agno/tools/python.py +14 -6
  306. agno/tools/reasoning.py +30 -23
  307. agno/tools/redshift.py +406 -0
  308. agno/tools/shopify.py +1519 -0
  309. agno/tools/spotify.py +919 -0
  310. agno/tools/tavily.py +4 -1
  311. agno/tools/toolkit.py +253 -18
  312. agno/tools/websearch.py +93 -0
  313. agno/tools/website.py +1 -1
  314. agno/tools/wikipedia.py +1 -1
  315. agno/tools/workflow.py +56 -48
  316. agno/tools/yfinance.py +12 -11
  317. agno/tracing/__init__.py +12 -0
  318. agno/tracing/exporter.py +161 -0
  319. agno/tracing/schemas.py +276 -0
  320. agno/tracing/setup.py +112 -0
  321. agno/utils/agent.py +251 -10
  322. agno/utils/cryptography.py +22 -0
  323. agno/utils/dttm.py +33 -0
  324. agno/utils/events.py +264 -7
  325. agno/utils/hooks.py +111 -3
  326. agno/utils/http.py +161 -2
  327. agno/utils/mcp.py +49 -8
  328. agno/utils/media.py +22 -1
  329. agno/utils/models/ai_foundry.py +9 -2
  330. agno/utils/models/claude.py +20 -5
  331. agno/utils/models/cohere.py +9 -2
  332. agno/utils/models/llama.py +9 -2
  333. agno/utils/models/mistral.py +4 -2
  334. agno/utils/os.py +0 -0
  335. agno/utils/print_response/agent.py +99 -16
  336. agno/utils/print_response/team.py +223 -24
  337. agno/utils/print_response/workflow.py +0 -2
  338. agno/utils/prompts.py +8 -6
  339. agno/utils/remote.py +23 -0
  340. agno/utils/response.py +1 -13
  341. agno/utils/string.py +91 -2
  342. agno/utils/team.py +62 -12
  343. agno/utils/tokens.py +657 -0
  344. agno/vectordb/base.py +15 -2
  345. agno/vectordb/cassandra/cassandra.py +1 -1
  346. agno/vectordb/chroma/__init__.py +2 -1
  347. agno/vectordb/chroma/chromadb.py +468 -23
  348. agno/vectordb/clickhouse/clickhousedb.py +1 -1
  349. agno/vectordb/couchbase/couchbase.py +6 -2
  350. agno/vectordb/lancedb/lance_db.py +7 -38
  351. agno/vectordb/lightrag/lightrag.py +7 -6
  352. agno/vectordb/milvus/milvus.py +118 -84
  353. agno/vectordb/mongodb/__init__.py +2 -1
  354. agno/vectordb/mongodb/mongodb.py +14 -31
  355. agno/vectordb/pgvector/pgvector.py +120 -66
  356. agno/vectordb/pineconedb/pineconedb.py +2 -19
  357. agno/vectordb/qdrant/__init__.py +2 -1
  358. agno/vectordb/qdrant/qdrant.py +33 -56
  359. agno/vectordb/redis/__init__.py +2 -1
  360. agno/vectordb/redis/redisdb.py +19 -31
  361. agno/vectordb/singlestore/singlestore.py +17 -9
  362. agno/vectordb/surrealdb/surrealdb.py +2 -38
  363. agno/vectordb/weaviate/__init__.py +2 -1
  364. agno/vectordb/weaviate/weaviate.py +7 -3
  365. agno/workflow/__init__.py +5 -1
  366. agno/workflow/agent.py +2 -2
  367. agno/workflow/condition.py +12 -10
  368. agno/workflow/loop.py +28 -9
  369. agno/workflow/parallel.py +21 -13
  370. agno/workflow/remote.py +362 -0
  371. agno/workflow/router.py +12 -9
  372. agno/workflow/step.py +261 -36
  373. agno/workflow/steps.py +12 -8
  374. agno/workflow/types.py +40 -77
  375. agno/workflow/workflow.py +939 -213
  376. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/METADATA +134 -181
  377. agno-2.4.3.dist-info/RECORD +677 -0
  378. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/WHEEL +1 -1
  379. agno/tools/googlesearch.py +0 -98
  380. agno/tools/memori.py +0 -339
  381. agno-2.2.13.dist-info/RECORD +0 -575
  382. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/licenses/LICENSE +0 -0
  383. {agno-2.2.13.dist-info → agno-2.4.3.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
- from agno.vectordb.chroma.chromadb import ChromaDb
1
+ from agno.vectordb.chroma.chromadb import ChromaDb, SearchType
2
2
 
3
3
  __all__ = [
4
4
  "ChromaDb",
5
+ "SearchType",
5
6
  ]
@@ -1,7 +1,9 @@
1
1
  import asyncio
2
2
  import json
3
+ from collections import defaultdict
4
+ from concurrent.futures import ThreadPoolExecutor
3
5
  from hashlib import md5
4
- from typing import Any, Dict, List, Mapping, Optional, Union, cast
6
+ from typing import Any, Dict, List, Mapping, Optional, Tuple, Union, cast
5
7
 
6
8
  try:
7
9
  from chromadb import Client as ChromaDbClient
@@ -20,12 +22,64 @@ from agno.knowledge.reranker.base import Reranker
20
22
  from agno.utils.log import log_debug, log_error, log_info, log_warning, logger
21
23
  from agno.vectordb.base import VectorDb
22
24
  from agno.vectordb.distance import Distance
25
+ from agno.vectordb.search import SearchType
26
+
27
+
28
+ def reciprocal_rank_fusion(
29
+ ranked_lists: List[List[Tuple[str, float]]],
30
+ k: int = 60,
31
+ ) -> List[Tuple[str, float]]:
32
+ """
33
+ Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).
34
+
35
+ RRF is a simple yet effective method for combining multiple rankings.
36
+ The formula is: RRF(d) = sum(1 / (k + rank_i(d))) for each ranking i
37
+
38
+ Args:
39
+ ranked_lists: List of ranked results, each as [(doc_id, score), ...]
40
+ k: RRF constant (default 60, as per original paper by Cormack et al.)
41
+
42
+ Returns:
43
+ Fused ranking as [(doc_id, rrf_score), ...] sorted by score descending
44
+ """
45
+ rrf_scores: Dict[str, float] = defaultdict(float)
46
+
47
+ for ranked_list in ranked_lists:
48
+ for rank, (doc_id, _) in enumerate(ranked_list, start=1):
49
+ rrf_scores[doc_id] += 1.0 / (k + rank)
50
+
51
+ sorted_results = sorted(rrf_scores.items(), key=lambda x: x[1], reverse=True)
52
+ return sorted_results
23
53
 
24
54
 
25
55
  class ChromaDb(VectorDb):
56
+ """
57
+ ChromaDb class for managing vector operations with ChromaDB.
58
+
59
+ Args:
60
+ collection: The name of the ChromaDB collection. If not provided, derived from 'name'.
61
+ name: Name of the vector database. Also used as collection name if 'collection' is not provided.
62
+ description: Description of the vector database.
63
+ id: Unique identifier for this vector database instance.
64
+ embedder: The embedder to use when embedding the document contents.
65
+ distance: The distance metric to use when searching for documents.
66
+ path: The path to store the ChromaDB data (for persistent client).
67
+ persistent_client: Whether to use a persistent client.
68
+ search_type: The search type to use when searching for documents.
69
+ - SearchType.vector: Pure vector similarity search (default)
70
+ - SearchType.keyword: Keyword-based search using document content
71
+ - SearchType.hybrid: Combines vector + FTS with Reciprocal Rank Fusion
72
+ hybrid_rrf_k: RRF (Reciprocal Rank Fusion) constant for hybrid search.
73
+ Controls ranking smoothness - higher values give more weight to lower-ranked
74
+ results, lower values make top results more dominant. Default is 60
75
+ (per original RRF paper by Cormack et al.).
76
+ reranker: The reranker to use when reranking documents.
77
+ **kwargs: Additional arguments to pass to the ChromaDB client.
78
+ """
79
+
26
80
  def __init__(
27
81
  self,
28
- collection: str,
82
+ collection: Optional[str] = None,
29
83
  name: Optional[str] = None,
30
84
  description: Optional[str] = None,
31
85
  id: Optional[str] = None,
@@ -33,12 +87,18 @@ class ChromaDb(VectorDb):
33
87
  distance: Distance = Distance.cosine,
34
88
  path: str = "tmp/chromadb",
35
89
  persistent_client: bool = False,
90
+ search_type: SearchType = SearchType.vector,
91
+ hybrid_rrf_k: int = 60,
36
92
  reranker: Optional[Reranker] = None,
37
93
  **kwargs,
38
94
  ):
39
- # Validate required parameters
40
- if not collection:
41
- raise ValueError("Collection name must be provided.")
95
+ # Derive collection from name if not provided
96
+ if collection is None:
97
+ if name is not None:
98
+ # Sanitize name: lowercase and replace spaces with underscores
99
+ collection = name.lower().replace(" ", "_")
100
+ else:
101
+ raise ValueError("Either 'collection' or 'name' must be provided.")
42
102
 
43
103
  # Dynamic ID generation based on unique identifiers
44
104
  if id is None:
@@ -57,7 +117,7 @@ class ChromaDb(VectorDb):
57
117
  from agno.knowledge.embedder.openai import OpenAIEmbedder
58
118
 
59
119
  embedder = OpenAIEmbedder()
60
- log_info("Embedder not provided, using OpenAIEmbedder as default.")
120
+ log_debug("Embedder not provided, using OpenAIEmbedder as default.")
61
121
  self.embedder: Embedder = embedder
62
122
  # Distance metric
63
123
  self.distance: Distance = distance
@@ -72,6 +132,10 @@ class ChromaDb(VectorDb):
72
132
  self.persistent_client: bool = persistent_client
73
133
  self.path: str = path
74
134
 
135
+ # Search type configuration
136
+ self.search_type: SearchType = search_type
137
+ self.hybrid_rrf_k: int = hybrid_rrf_k
138
+
75
139
  # Reranker instance
76
140
  self.reranker: Optional[Reranker] = reranker
77
141
 
@@ -272,11 +336,13 @@ class ChromaDb(VectorDb):
272
336
  embed_tasks = [document.async_embed(embedder=self.embedder) for document in documents]
273
337
  await asyncio.gather(*embed_tasks, return_exceptions=True)
274
338
  except Exception as e:
275
- log_error(f"Error processing document: {e}")
339
+ logger.error(f"Error processing document: {e}")
276
340
 
277
341
  for document in documents:
278
342
  cleaned_content = document.content.replace("\x00", "\ufffd")
279
- doc_id = md5(cleaned_content.encode()).hexdigest()
343
+ # Include content_hash in ID to ensure uniqueness across different content hashes
344
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
345
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
280
346
 
281
347
  # Handle metadata and filters
282
348
  metadata = document.meta_data or {}
@@ -435,7 +501,9 @@ class ChromaDb(VectorDb):
435
501
 
436
502
  for document in documents:
437
503
  cleaned_content = document.content.replace("\x00", "\ufffd")
438
- doc_id = md5(cleaned_content.encode()).hexdigest()
504
+ # Include content_hash in ID to ensure uniqueness across different content hashes
505
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
506
+ doc_id = md5(f"{base_id}_{content_hash}".encode()).hexdigest()
439
507
 
440
508
  # Handle metadata and filters
441
509
  metadata = document.meta_data or {}
@@ -498,6 +566,41 @@ class ChromaDb(VectorDb):
498
566
  if isinstance(filters, list):
499
567
  log_warning("Filter Expressions are not yet supported in ChromaDB. No filters will be applied.")
500
568
  filters = None
569
+
570
+ if not self._collection:
571
+ self._collection = self.client.get_collection(name=self.collection_name)
572
+
573
+ # Route to appropriate search method based on search_type
574
+ if self.search_type == SearchType.vector:
575
+ search_results = self._vector_search(query, limit, filters)
576
+ elif self.search_type == SearchType.keyword:
577
+ search_results = self._keyword_search(query, limit, filters)
578
+ elif self.search_type == SearchType.hybrid:
579
+ search_results = self._hybrid_search(query, limit, filters)
580
+ else:
581
+ logger.error(f"Invalid search type '{self.search_type}'.")
582
+ return []
583
+
584
+ if self.reranker and search_results:
585
+ try:
586
+ search_results = self.reranker.rerank(query=query, documents=search_results)
587
+ except Exception as e:
588
+ log_warning(f"Reranker failed, returning unranked results: {e}")
589
+
590
+ log_info(f"Found {len(search_results)} documents")
591
+ return search_results
592
+
593
+ def _vector_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
594
+ """Perform pure vector similarity search.
595
+
596
+ Args:
597
+ query (str): Query to search for.
598
+ limit (int): Number of results to return.
599
+ filters (Optional[Dict[str, Any]]): Metadata filters to apply.
600
+
601
+ Returns:
602
+ List[Document]: List of search results.
603
+ """
501
604
  query_embedding = self.embedder.get_embedding(query)
502
605
  if query_embedding is None:
503
606
  logger.error(f"Error getting embedding for Query: {query}")
@@ -512,11 +615,248 @@ class ChromaDb(VectorDb):
512
615
  result: QueryResult = self._collection.query(
513
616
  query_embeddings=query_embedding,
514
617
  n_results=limit,
515
- where=where_filter, # Add where filter
618
+ where=where_filter,
516
619
  include=["metadatas", "documents", "embeddings", "distances", "uris"],
517
620
  )
518
621
 
519
- # Build search results
622
+ return self._build_search_results(result)
623
+
624
+ def _keyword_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
625
+ """Perform keyword-based search using document content filtering.
626
+
627
+ This uses ChromaDB's where_document filter with $contains operator
628
+ for basic full-text search functionality.
629
+
630
+ Args:
631
+ query (str): Query to search for (keywords to match in document content).
632
+ limit (int): Number of results to return.
633
+ filters (Optional[Dict[str, Any]]): Metadata filters to apply.
634
+
635
+ Returns:
636
+ List[Document]: List of search results.
637
+ """
638
+ if not self._collection:
639
+ self._collection = self.client.get_collection(name=self.collection_name)
640
+
641
+ # Convert simple filters to ChromaDB's format if needed
642
+ where_filter = self._convert_filters(filters) if filters else None
643
+
644
+ # Get first significant word for $contains filter
645
+ query_words = query.split()
646
+ if not query_words:
647
+ return []
648
+
649
+ # Use where_document to filter by document content
650
+ where_document: Dict[str, Any] = {"$contains": query_words[0]}
651
+
652
+ try:
653
+ # Get documents matching the keyword filter
654
+ result = self._collection.get(
655
+ where=where_filter,
656
+ where_document=cast(Any, where_document),
657
+ limit=limit,
658
+ include=["metadatas", "documents", "embeddings"],
659
+ )
660
+
661
+ return self._build_get_results(cast(Dict[str, Any], result), query)
662
+ except Exception as e:
663
+ logger.error(f"Error in keyword search: {e}")
664
+ return []
665
+
666
+ def _hybrid_search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
667
+ """Perform hybrid search combining vector similarity with full-text search using RRF.
668
+
669
+ This method combines:
670
+ 1. Dense vector similarity search (semantic search)
671
+ 2. Full-text search (keyword/lexical search)
672
+
673
+ Results are fused using Reciprocal Rank Fusion (RRF) for optimal ranking.
674
+
675
+ Args:
676
+ query (str): Query to search for.
677
+ limit (int): Number of results to return.
678
+ filters (Optional[Dict[str, Any]]): Metadata filters to apply.
679
+
680
+ Returns:
681
+ List[Document]: List of search results with RRF-fused ranking.
682
+ """
683
+ query_embedding = self.embedder.get_embedding(query)
684
+ if query_embedding is None:
685
+ logger.error(f"Error getting embedding for Query: {query}")
686
+ return []
687
+
688
+ if not self._collection:
689
+ self._collection = self.client.get_collection(name=self.collection_name)
690
+
691
+ # Convert simple filters to ChromaDB's format if needed
692
+ where_filter = self._convert_filters(filters) if filters else None
693
+
694
+ # Fetch more candidates than needed for better fusion
695
+ fetch_k = min(limit * 3, 100)
696
+
697
+ def dense_vector_similarity_search() -> List[Tuple[str, float]]:
698
+ """Dense vector similarity search."""
699
+ try:
700
+ results = self._collection.query( # type: ignore
701
+ query_embeddings=query_embedding,
702
+ n_results=fetch_k,
703
+ where=where_filter,
704
+ include=["documents", "metadatas", "distances"],
705
+ )
706
+
707
+ ranked: List[Tuple[str, float]] = []
708
+ if results.get("ids") and results["ids"][0]:
709
+ for i, doc_id in enumerate(results["ids"][0]):
710
+ distance = results["distances"][0][i] if results.get("distances") else 0 # type: ignore
711
+ # Convert distance to similarity score (lower distance = higher score)
712
+ score = 1.0 / (1.0 + distance)
713
+ ranked.append((doc_id, score))
714
+ return ranked
715
+ except Exception as e:
716
+ log_error(f"Error in vector search component: {e}")
717
+ return []
718
+
719
+ def fts_search() -> List[Tuple[str, float]]:
720
+ """Full-text search using ChromaDB's where_document filter."""
721
+ try:
722
+ query_words = query.split()
723
+ if not query_words:
724
+ return []
725
+
726
+ # Use first word for $contains filter
727
+ fts_where_document: Dict[str, Any] = {"$contains": query_words[0]}
728
+
729
+ results = self._collection.query( # type: ignore
730
+ query_embeddings=query_embedding,
731
+ n_results=fetch_k,
732
+ where=where_filter,
733
+ where_document=cast(Any, fts_where_document),
734
+ include=["documents", "metadatas", "distances"],
735
+ )
736
+
737
+ ranked: List[Tuple[str, float]] = []
738
+ if results.get("ids") and results["ids"][0]:
739
+ for i, doc_id in enumerate(results["ids"][0]):
740
+ # Score based on term overlap (simple BM25-like scoring)
741
+ doc = results["documents"][0][i] if results.get("documents") else "" # type: ignore
742
+ query_terms = set(query.lower().split())
743
+ doc_terms = set(doc.lower().split()) if doc else set()
744
+ overlap = len(query_terms & doc_terms)
745
+ score = overlap / max(len(query_terms), 1)
746
+ ranked.append((doc_id, score))
747
+
748
+ # Sort by score descending
749
+ ranked.sort(key=lambda x: x[1], reverse=True)
750
+ return ranked
751
+ except Exception as e:
752
+ log_error(f"Error in FTS search component: {e}")
753
+ return []
754
+
755
+ # Execute searches in parallel for better performance
756
+ with ThreadPoolExecutor(max_workers=2) as executor:
757
+ vector_future = executor.submit(dense_vector_similarity_search)
758
+ fts_future = executor.submit(fts_search)
759
+
760
+ vector_results = vector_future.result()
761
+ fts_results = fts_future.result()
762
+
763
+ # Apply RRF fusion
764
+ fused_ranking = reciprocal_rank_fusion(
765
+ [vector_results, fts_results],
766
+ k=self.hybrid_rrf_k,
767
+ )
768
+
769
+ # Get top IDs from fused ranking
770
+ top_ids = [doc_id for doc_id, _ in fused_ranking[:limit]]
771
+
772
+ if not top_ids:
773
+ return []
774
+
775
+ # Fetch full document data for top results
776
+ try:
777
+ full_results = self._collection.get(
778
+ ids=top_ids,
779
+ include=["documents", "metadatas", "embeddings"],
780
+ )
781
+ except Exception as e:
782
+ log_error(f"Error fetching full results: {e}")
783
+ return []
784
+
785
+ # Build lookup dict for results
786
+ doc_lookup: Dict[str, Dict[str, Any]] = {}
787
+ result_ids = full_results.get("ids", [])
788
+ result_docs = full_results.get("documents")
789
+ result_metas = full_results.get("metadatas")
790
+ result_embeds = full_results.get("embeddings")
791
+
792
+ for i, doc_id in enumerate(result_ids if result_ids is not None else []):
793
+ doc_lookup[doc_id] = {
794
+ "document": result_docs[i] if result_docs is not None and i < len(result_docs) else None,
795
+ "metadata": result_metas[i] if result_metas is not None and i < len(result_metas) else None,
796
+ "embedding": result_embeds[i] if result_embeds is not None and i < len(result_embeds) else None,
797
+ }
798
+
799
+ # Build final results in fused ranking order
800
+ search_results: List[Document] = []
801
+ rrf_scores = dict(fused_ranking)
802
+
803
+ for doc_id in top_ids:
804
+ if doc_id not in doc_lookup:
805
+ continue
806
+
807
+ doc_data = doc_lookup[doc_id]
808
+ doc_metadata = dict(doc_data["metadata"]) if doc_data["metadata"] else {}
809
+
810
+ # Add RRF score to metadata
811
+ doc_metadata["rrf_score"] = rrf_scores.get(doc_id, 0.0)
812
+
813
+ # Extract the fields we added to metadata
814
+ name_val = doc_metadata.pop("name", None)
815
+ content_id_val = doc_metadata.pop("content_id", None)
816
+
817
+ # Convert types to match Document constructor expectations
818
+ name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
819
+ content_id = (
820
+ str(content_id_val)
821
+ if content_id_val is not None and not isinstance(content_id_val, str)
822
+ else content_id_val
823
+ )
824
+ content = str(doc_data["document"]) if doc_data["document"] is not None else ""
825
+
826
+ # Process embedding
827
+ embedding = None
828
+ if doc_data["embedding"] is not None:
829
+ embed_data = doc_data["embedding"]
830
+ if hasattr(embed_data, "tolist") and callable(getattr(embed_data, "tolist", None)):
831
+ try:
832
+ embedding = list(cast(Any, embed_data).tolist())
833
+ except (AttributeError, TypeError):
834
+ embedding = list(embed_data) if isinstance(embed_data, (list, tuple)) else None
835
+ elif isinstance(embed_data, (list, tuple)):
836
+ embedding = [float(x) for x in embed_data if isinstance(x, (int, float))]
837
+
838
+ search_results.append(
839
+ Document(
840
+ id=doc_id,
841
+ name=name,
842
+ meta_data=doc_metadata,
843
+ content=content,
844
+ embedding=embedding,
845
+ content_id=content_id,
846
+ )
847
+ )
848
+
849
+ return search_results
850
+
851
+ def _build_search_results(self, result: QueryResult) -> List[Document]:
852
+ """Build Document list from ChromaDB QueryResult.
853
+
854
+ Args:
855
+ result: The QueryResult from ChromaDB query.
856
+
857
+ Returns:
858
+ List[Document]: List of Document objects.
859
+ """
520
860
  search_results: List[Document] = []
521
861
 
522
862
  ids_list = result.get("ids", [[]]) # type: ignore
@@ -525,13 +865,33 @@ class ChromaDb(VectorDb):
525
865
  embeddings_list = result.get("embeddings") # type: ignore
526
866
  distances_list = result.get("distances", [[]]) # type: ignore
527
867
 
528
- if not ids_list or not metadata_list or not documents_list or embeddings_list is None or not distances_list:
868
+ # Check if we have valid results - handle numpy arrays carefully
869
+ if ids_list is None or len(ids_list) == 0:
870
+ return search_results
871
+ if metadata_list is None or len(metadata_list) == 0:
872
+ return search_results
873
+ if documents_list is None or len(documents_list) == 0:
874
+ return search_results
875
+ if distances_list is None or len(distances_list) == 0:
529
876
  return search_results
530
877
 
531
878
  ids = ids_list[0]
532
879
  metadata = [dict(m) if m else {} for m in metadata_list[0]] # Convert to mutable dicts
533
880
  documents = documents_list[0]
534
- embeddings_raw = embeddings_list[0] if embeddings_list else []
881
+
882
+ # Handle embeddings - may be None or numpy array
883
+ embeddings_raw: Any = []
884
+ if embeddings_list is not None:
885
+ try:
886
+ if len(embeddings_list) > 0:
887
+ embeddings_raw = embeddings_list[0]
888
+ except (TypeError, ValueError):
889
+ # numpy array truth value issue - try direct access
890
+ try:
891
+ embeddings_raw = embeddings_list[0]
892
+ except Exception:
893
+ embeddings_raw = []
894
+
535
895
  embeddings = []
536
896
  for e in embeddings_raw:
537
897
  if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
@@ -545,7 +905,8 @@ class ChromaDb(VectorDb):
545
905
  embeddings.append([float(e)])
546
906
  else:
547
907
  embeddings.append([])
548
- distances = distances_list[0]
908
+
909
+ distances = distances_list[0] if len(distances_list) > 0 else []
549
910
 
550
911
  for idx, distance in enumerate(distances):
551
912
  if idx < len(metadata):
@@ -578,12 +939,95 @@ class ChromaDb(VectorDb):
578
939
  )
579
940
  )
580
941
  except Exception as e:
581
- logger.error(f"Error building search results: {e}")
942
+ log_error(f"Error building search results: {e}")
582
943
 
583
- if self.reranker:
584
- search_results = self.reranker.rerank(query=query, documents=search_results)
944
+ return search_results
945
+
946
+ def _build_get_results(self, result: Dict[str, Any], query: str = "") -> List[Document]:
947
+ """Build Document list from ChromaDB GetResult.
948
+
949
+ Args:
950
+ result: The GetResult from ChromaDB get.
951
+ query: The original query for scoring.
952
+
953
+ Returns:
954
+ List[Document]: List of Document objects.
955
+ """
956
+ search_results: List[Document] = []
957
+
958
+ ids = result.get("ids", [])
959
+ metadatas = result.get("metadatas", [])
960
+ documents = result.get("documents", [])
961
+ embeddings_raw = result.get("embeddings")
962
+
963
+ # Check ids safely (may be numpy array)
964
+ if ids is None:
965
+ return search_results
966
+ try:
967
+ if len(ids) == 0:
968
+ return search_results
969
+ except (TypeError, ValueError):
970
+ return search_results
971
+
972
+ embeddings = []
973
+ # Handle embeddings - may be None or numpy array
974
+ if embeddings_raw is not None:
975
+ try:
976
+ for e in embeddings_raw:
977
+ if hasattr(e, "tolist") and callable(getattr(e, "tolist", None)):
978
+ try:
979
+ embeddings.append(list(cast(Any, e).tolist()))
980
+ except (AttributeError, TypeError):
981
+ embeddings.append(list(e) if isinstance(e, (list, tuple)) else [])
982
+ elif isinstance(e, (list, tuple)):
983
+ embeddings.append([float(x) for x in e if isinstance(x, (int, float))])
984
+ elif isinstance(e, (int, float)):
985
+ embeddings.append([float(e)])
986
+ else:
987
+ embeddings.append([])
988
+ except (TypeError, ValueError):
989
+ # numpy array iteration issue
990
+ embeddings = []
991
+
992
+ try:
993
+ for idx, id_ in enumerate(ids):
994
+ doc_metadata = dict(metadatas[idx]) if metadatas and idx < len(metadatas) and metadatas[idx] else {}
995
+ document = documents[idx] if documents and idx < len(documents) else ""
996
+
997
+ # Calculate simple keyword score if query provided
998
+ if query and document:
999
+ query_terms = set(query.lower().split())
1000
+ doc_terms = set(document.lower().split())
1001
+ overlap = len(query_terms & doc_terms)
1002
+ doc_metadata["keyword_score"] = overlap / max(len(query_terms), 1)
1003
+
1004
+ # Extract the fields we added to metadata
1005
+ name_val = doc_metadata.pop("name", None)
1006
+ content_id_val = doc_metadata.pop("content_id", None)
1007
+
1008
+ # Convert types to match Document constructor expectations
1009
+ name = str(name_val) if name_val is not None and not isinstance(name_val, str) else name_val
1010
+ content_id = (
1011
+ str(content_id_val)
1012
+ if content_id_val is not None and not isinstance(content_id_val, str)
1013
+ else content_id_val
1014
+ )
1015
+ content = str(document) if document is not None else ""
1016
+ embedding = embeddings[idx] if idx < len(embeddings) else None
1017
+
1018
+ search_results.append(
1019
+ Document(
1020
+ id=id_,
1021
+ name=name,
1022
+ meta_data=doc_metadata,
1023
+ content=content,
1024
+ embedding=embedding,
1025
+ content_id=content_id,
1026
+ )
1027
+ )
1028
+ except Exception as e:
1029
+ log_error(f"Error building get results: {e}")
585
1030
 
586
- log_info(f"Found {len(search_results)} documents")
587
1031
  return search_results
588
1032
 
589
1033
  def _convert_filters(self, filters: Dict[str, Any]) -> Dict[str, Any]:
@@ -888,7 +1332,7 @@ class ChromaDb(VectorDb):
888
1332
  current_metadatas = []
889
1333
 
890
1334
  if not ids:
891
- logger.debug(f"No documents found with content_id: {content_id}")
1335
+ log_debug(f"No documents found with content_id: {content_id}")
892
1336
  return
893
1337
 
894
1338
  # Flatten the new metadata first
@@ -908,12 +1352,13 @@ class ChromaDb(VectorDb):
908
1352
 
909
1353
  # Convert to the expected type for ChromaDB
910
1354
  chroma_metadatas = cast(List[Mapping[str, Union[str, int, float, bool]]], updated_metadatas)
1355
+ chroma_metadatas = [{k: v for k, v in m.items() if k and v} for m in chroma_metadatas]
911
1356
  collection.update(ids=ids, metadatas=chroma_metadatas) # type: ignore
912
- logger.debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
1357
+ log_debug(f"Updated metadata for {len(ids)} documents with content_id: {content_id}")
913
1358
 
914
1359
  except TypeError as te:
915
1360
  if "object of type 'int' has no len()" in str(te):
916
- logger.warning(
1361
+ log_warning(
917
1362
  f"ChromaDB internal error (version 0.5.0 bug): {te}. Cannot update metadata for content_id '{content_id}'."
918
1363
  )
919
1364
  return
@@ -921,9 +1366,9 @@ class ChromaDb(VectorDb):
921
1366
  raise te
922
1367
 
923
1368
  except Exception as e:
924
- logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
1369
+ log_error(f"Error updating metadata for content_id '{content_id}': {e}")
925
1370
  raise
926
1371
 
927
1372
  def get_supported_search_types(self) -> List[str]:
928
1373
  """Get the supported search types for this vector database."""
929
- return [] # ChromaDb doesn't use SearchType enum
1374
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]
@@ -71,7 +71,7 @@ class Clickhouse(VectorDb):
71
71
  from agno.knowledge.embedder.openai import OpenAIEmbedder
72
72
 
73
73
  _embedder = OpenAIEmbedder()
74
- log_info("Embedder not provided, using OpenAIEmbedder as default.")
74
+ log_debug("Embedder not provided, using OpenAIEmbedder as default.")
75
75
  self.embedder: Embedder = _embedder
76
76
  self.dimensions: Optional[int] = self.embedder.dimensions
77
77
 
@@ -6,7 +6,6 @@ from typing import Any, Dict, List, Optional, Union
6
6
  from agno.filters import FilterExpr
7
7
  from agno.knowledge.document import Document
8
8
  from agno.knowledge.embedder import Embedder
9
- from agno.knowledge.embedder.openai import OpenAIEmbedder
10
9
  from agno.utils.log import log_debug, log_info, log_warning, logger
11
10
  from agno.vectordb.base import VectorDb
12
11
 
@@ -62,7 +61,7 @@ class CouchbaseSearch(VectorDb):
62
61
  couchbase_connection_string: str,
63
62
  cluster_options: ClusterOptions,
64
63
  search_index: Union[str, SearchIndex],
65
- embedder: Embedder = OpenAIEmbedder(),
64
+ embedder: Optional[Embedder] = None,
66
65
  overwrite: bool = False,
67
66
  is_global_level_index: bool = False,
68
67
  wait_until_index_ready: float = 0,
@@ -97,6 +96,11 @@ class CouchbaseSearch(VectorDb):
97
96
  self.collection_name = collection_name
98
97
  self.connection_string = couchbase_connection_string
99
98
  self.cluster_options = cluster_options
99
+ if embedder is None:
100
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
101
+
102
+ embedder = OpenAIEmbedder()
103
+ log_debug("Embedder not provided, using OpenAIEmbedder as default.")
100
104
  self.embedder = embedder
101
105
  self.overwrite = overwrite
102
106
  self.is_global_level_index = is_global_level_index