agno 0.1.2__py3-none-any.whl → 2.3.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (723) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +44 -5
  3. agno/agent/agent.py +10531 -2975
  4. agno/api/agent.py +14 -53
  5. agno/api/api.py +7 -46
  6. agno/api/evals.py +22 -0
  7. agno/api/os.py +17 -0
  8. agno/api/routes.py +6 -25
  9. agno/api/schemas/__init__.py +9 -0
  10. agno/api/schemas/agent.py +6 -9
  11. agno/api/schemas/evals.py +16 -0
  12. agno/api/schemas/os.py +14 -0
  13. agno/api/schemas/team.py +10 -10
  14. agno/api/schemas/utils.py +21 -0
  15. agno/api/schemas/workflows.py +16 -0
  16. agno/api/settings.py +53 -0
  17. agno/api/team.py +22 -26
  18. agno/api/workflow.py +28 -0
  19. agno/cloud/aws/base.py +214 -0
  20. agno/cloud/aws/s3/__init__.py +2 -0
  21. agno/cloud/aws/s3/api_client.py +43 -0
  22. agno/cloud/aws/s3/bucket.py +195 -0
  23. agno/cloud/aws/s3/object.py +57 -0
  24. agno/compression/__init__.py +3 -0
  25. agno/compression/manager.py +247 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +946 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2781 -0
  33. agno/db/dynamo/schemas.py +442 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +2379 -0
  37. agno/db/firestore/schemas.py +181 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1791 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1312 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1777 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/manager.py +199 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/migrations/versions/v2_3_0.py +938 -0
  51. agno/db/mongo/__init__.py +17 -0
  52. agno/db/mongo/async_mongo.py +2760 -0
  53. agno/db/mongo/mongo.py +2597 -0
  54. agno/db/mongo/schemas.py +119 -0
  55. agno/db/mongo/utils.py +276 -0
  56. agno/db/mysql/__init__.py +4 -0
  57. agno/db/mysql/async_mysql.py +2912 -0
  58. agno/db/mysql/mysql.py +2923 -0
  59. agno/db/mysql/schemas.py +186 -0
  60. agno/db/mysql/utils.py +488 -0
  61. agno/db/postgres/__init__.py +4 -0
  62. agno/db/postgres/async_postgres.py +2579 -0
  63. agno/db/postgres/postgres.py +2870 -0
  64. agno/db/postgres/schemas.py +187 -0
  65. agno/db/postgres/utils.py +442 -0
  66. agno/db/redis/__init__.py +3 -0
  67. agno/db/redis/redis.py +2141 -0
  68. agno/db/redis/schemas.py +159 -0
  69. agno/db/redis/utils.py +346 -0
  70. agno/db/schemas/__init__.py +4 -0
  71. agno/db/schemas/culture.py +120 -0
  72. agno/db/schemas/evals.py +34 -0
  73. agno/db/schemas/knowledge.py +40 -0
  74. agno/db/schemas/memory.py +61 -0
  75. agno/db/singlestore/__init__.py +3 -0
  76. agno/db/singlestore/schemas.py +179 -0
  77. agno/db/singlestore/singlestore.py +2877 -0
  78. agno/db/singlestore/utils.py +384 -0
  79. agno/db/sqlite/__init__.py +4 -0
  80. agno/db/sqlite/async_sqlite.py +2911 -0
  81. agno/db/sqlite/schemas.py +181 -0
  82. agno/db/sqlite/sqlite.py +2908 -0
  83. agno/db/sqlite/utils.py +429 -0
  84. agno/db/surrealdb/__init__.py +3 -0
  85. agno/db/surrealdb/metrics.py +292 -0
  86. agno/db/surrealdb/models.py +334 -0
  87. agno/db/surrealdb/queries.py +71 -0
  88. agno/db/surrealdb/surrealdb.py +1908 -0
  89. agno/db/surrealdb/utils.py +147 -0
  90. agno/db/utils.py +118 -0
  91. agno/eval/__init__.py +24 -0
  92. agno/eval/accuracy.py +666 -276
  93. agno/eval/agent_as_judge.py +861 -0
  94. agno/eval/base.py +29 -0
  95. agno/eval/performance.py +779 -0
  96. agno/eval/reliability.py +241 -62
  97. agno/eval/utils.py +120 -0
  98. agno/exceptions.py +143 -1
  99. agno/filters.py +354 -0
  100. agno/guardrails/__init__.py +6 -0
  101. agno/guardrails/base.py +19 -0
  102. agno/guardrails/openai.py +144 -0
  103. agno/guardrails/pii.py +94 -0
  104. agno/guardrails/prompt_injection.py +52 -0
  105. agno/hooks/__init__.py +3 -0
  106. agno/hooks/decorator.py +164 -0
  107. agno/integrations/discord/__init__.py +3 -0
  108. agno/integrations/discord/client.py +203 -0
  109. agno/knowledge/__init__.py +5 -1
  110. agno/{document → knowledge}/chunking/agentic.py +22 -14
  111. agno/{document → knowledge}/chunking/document.py +2 -2
  112. agno/{document → knowledge}/chunking/fixed.py +7 -6
  113. agno/knowledge/chunking/markdown.py +151 -0
  114. agno/{document → knowledge}/chunking/recursive.py +15 -3
  115. agno/knowledge/chunking/row.py +39 -0
  116. agno/knowledge/chunking/semantic.py +91 -0
  117. agno/knowledge/chunking/strategy.py +165 -0
  118. agno/knowledge/content.py +74 -0
  119. agno/knowledge/document/__init__.py +5 -0
  120. agno/{document → knowledge/document}/base.py +12 -2
  121. agno/knowledge/embedder/__init__.py +5 -0
  122. agno/knowledge/embedder/aws_bedrock.py +343 -0
  123. agno/knowledge/embedder/azure_openai.py +210 -0
  124. agno/{embedder → knowledge/embedder}/base.py +8 -0
  125. agno/knowledge/embedder/cohere.py +323 -0
  126. agno/knowledge/embedder/fastembed.py +62 -0
  127. agno/{embedder → knowledge/embedder}/fireworks.py +1 -1
  128. agno/knowledge/embedder/google.py +258 -0
  129. agno/knowledge/embedder/huggingface.py +94 -0
  130. agno/knowledge/embedder/jina.py +182 -0
  131. agno/knowledge/embedder/langdb.py +22 -0
  132. agno/knowledge/embedder/mistral.py +206 -0
  133. agno/knowledge/embedder/nebius.py +13 -0
  134. agno/knowledge/embedder/ollama.py +154 -0
  135. agno/knowledge/embedder/openai.py +195 -0
  136. agno/knowledge/embedder/sentence_transformer.py +63 -0
  137. agno/{embedder → knowledge/embedder}/together.py +1 -1
  138. agno/knowledge/embedder/vllm.py +262 -0
  139. agno/knowledge/embedder/voyageai.py +165 -0
  140. agno/knowledge/knowledge.py +3006 -0
  141. agno/knowledge/reader/__init__.py +7 -0
  142. agno/knowledge/reader/arxiv_reader.py +81 -0
  143. agno/knowledge/reader/base.py +95 -0
  144. agno/knowledge/reader/csv_reader.py +164 -0
  145. agno/knowledge/reader/docx_reader.py +82 -0
  146. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  147. agno/knowledge/reader/firecrawl_reader.py +201 -0
  148. agno/knowledge/reader/json_reader.py +88 -0
  149. agno/knowledge/reader/markdown_reader.py +137 -0
  150. agno/knowledge/reader/pdf_reader.py +431 -0
  151. agno/knowledge/reader/pptx_reader.py +101 -0
  152. agno/knowledge/reader/reader_factory.py +313 -0
  153. agno/knowledge/reader/s3_reader.py +89 -0
  154. agno/knowledge/reader/tavily_reader.py +193 -0
  155. agno/knowledge/reader/text_reader.py +127 -0
  156. agno/knowledge/reader/web_search_reader.py +325 -0
  157. agno/knowledge/reader/website_reader.py +455 -0
  158. agno/knowledge/reader/wikipedia_reader.py +91 -0
  159. agno/knowledge/reader/youtube_reader.py +78 -0
  160. agno/knowledge/remote_content/remote_content.py +88 -0
  161. agno/knowledge/reranker/__init__.py +3 -0
  162. agno/{reranker → knowledge/reranker}/base.py +1 -1
  163. agno/{reranker → knowledge/reranker}/cohere.py +2 -2
  164. agno/knowledge/reranker/infinity.py +195 -0
  165. agno/knowledge/reranker/sentence_transformer.py +54 -0
  166. agno/knowledge/types.py +39 -0
  167. agno/knowledge/utils.py +234 -0
  168. agno/media.py +439 -95
  169. agno/memory/__init__.py +16 -3
  170. agno/memory/manager.py +1474 -123
  171. agno/memory/strategies/__init__.py +15 -0
  172. agno/memory/strategies/base.py +66 -0
  173. agno/memory/strategies/summarize.py +196 -0
  174. agno/memory/strategies/types.py +37 -0
  175. agno/models/aimlapi/__init__.py +5 -0
  176. agno/models/aimlapi/aimlapi.py +62 -0
  177. agno/models/anthropic/__init__.py +4 -0
  178. agno/models/anthropic/claude.py +960 -496
  179. agno/models/aws/__init__.py +15 -0
  180. agno/models/aws/bedrock.py +686 -451
  181. agno/models/aws/claude.py +190 -183
  182. agno/models/azure/__init__.py +18 -1
  183. agno/models/azure/ai_foundry.py +489 -0
  184. agno/models/azure/openai_chat.py +89 -40
  185. agno/models/base.py +2477 -550
  186. agno/models/cerebras/__init__.py +12 -0
  187. agno/models/cerebras/cerebras.py +565 -0
  188. agno/models/cerebras/cerebras_openai.py +131 -0
  189. agno/models/cohere/__init__.py +4 -0
  190. agno/models/cohere/chat.py +306 -492
  191. agno/models/cometapi/__init__.py +5 -0
  192. agno/models/cometapi/cometapi.py +74 -0
  193. agno/models/dashscope/__init__.py +5 -0
  194. agno/models/dashscope/dashscope.py +90 -0
  195. agno/models/deepinfra/__init__.py +5 -0
  196. agno/models/deepinfra/deepinfra.py +45 -0
  197. agno/models/deepseek/__init__.py +4 -0
  198. agno/models/deepseek/deepseek.py +110 -9
  199. agno/models/fireworks/__init__.py +4 -0
  200. agno/models/fireworks/fireworks.py +19 -22
  201. agno/models/google/__init__.py +3 -7
  202. agno/models/google/gemini.py +1717 -662
  203. agno/models/google/utils.py +22 -0
  204. agno/models/groq/__init__.py +4 -0
  205. agno/models/groq/groq.py +391 -666
  206. agno/models/huggingface/__init__.py +4 -0
  207. agno/models/huggingface/huggingface.py +266 -538
  208. agno/models/ibm/__init__.py +5 -0
  209. agno/models/ibm/watsonx.py +432 -0
  210. agno/models/internlm/__init__.py +3 -0
  211. agno/models/internlm/internlm.py +20 -3
  212. agno/models/langdb/__init__.py +1 -0
  213. agno/models/langdb/langdb.py +60 -0
  214. agno/models/litellm/__init__.py +14 -0
  215. agno/models/litellm/chat.py +503 -0
  216. agno/models/litellm/litellm_openai.py +42 -0
  217. agno/models/llama_cpp/__init__.py +5 -0
  218. agno/models/llama_cpp/llama_cpp.py +22 -0
  219. agno/models/lmstudio/__init__.py +5 -0
  220. agno/models/lmstudio/lmstudio.py +25 -0
  221. agno/models/message.py +361 -39
  222. agno/models/meta/__init__.py +12 -0
  223. agno/models/meta/llama.py +502 -0
  224. agno/models/meta/llama_openai.py +79 -0
  225. agno/models/metrics.py +120 -0
  226. agno/models/mistral/__init__.py +4 -0
  227. agno/models/mistral/mistral.py +293 -393
  228. agno/models/nebius/__init__.py +3 -0
  229. agno/models/nebius/nebius.py +53 -0
  230. agno/models/nexus/__init__.py +3 -0
  231. agno/models/nexus/nexus.py +22 -0
  232. agno/models/nvidia/__init__.py +4 -0
  233. agno/models/nvidia/nvidia.py +22 -3
  234. agno/models/ollama/__init__.py +4 -2
  235. agno/models/ollama/chat.py +257 -492
  236. agno/models/openai/__init__.py +7 -0
  237. agno/models/openai/chat.py +725 -770
  238. agno/models/openai/like.py +16 -2
  239. agno/models/openai/responses.py +1121 -0
  240. agno/models/openrouter/__init__.py +4 -0
  241. agno/models/openrouter/openrouter.py +62 -5
  242. agno/models/perplexity/__init__.py +5 -0
  243. agno/models/perplexity/perplexity.py +203 -0
  244. agno/models/portkey/__init__.py +3 -0
  245. agno/models/portkey/portkey.py +82 -0
  246. agno/models/requesty/__init__.py +5 -0
  247. agno/models/requesty/requesty.py +69 -0
  248. agno/models/response.py +177 -7
  249. agno/models/sambanova/__init__.py +4 -0
  250. agno/models/sambanova/sambanova.py +23 -4
  251. agno/models/siliconflow/__init__.py +5 -0
  252. agno/models/siliconflow/siliconflow.py +42 -0
  253. agno/models/together/__init__.py +4 -0
  254. agno/models/together/together.py +21 -164
  255. agno/models/utils.py +266 -0
  256. agno/models/vercel/__init__.py +3 -0
  257. agno/models/vercel/v0.py +43 -0
  258. agno/models/vertexai/__init__.py +0 -1
  259. agno/models/vertexai/claude.py +190 -0
  260. agno/models/vllm/__init__.py +3 -0
  261. agno/models/vllm/vllm.py +83 -0
  262. agno/models/xai/__init__.py +2 -0
  263. agno/models/xai/xai.py +111 -7
  264. agno/os/__init__.py +3 -0
  265. agno/os/app.py +1027 -0
  266. agno/os/auth.py +244 -0
  267. agno/os/config.py +126 -0
  268. agno/os/interfaces/__init__.py +1 -0
  269. agno/os/interfaces/a2a/__init__.py +3 -0
  270. agno/os/interfaces/a2a/a2a.py +42 -0
  271. agno/os/interfaces/a2a/router.py +249 -0
  272. agno/os/interfaces/a2a/utils.py +924 -0
  273. agno/os/interfaces/agui/__init__.py +3 -0
  274. agno/os/interfaces/agui/agui.py +47 -0
  275. agno/os/interfaces/agui/router.py +147 -0
  276. agno/os/interfaces/agui/utils.py +574 -0
  277. agno/os/interfaces/base.py +25 -0
  278. agno/os/interfaces/slack/__init__.py +3 -0
  279. agno/os/interfaces/slack/router.py +148 -0
  280. agno/os/interfaces/slack/security.py +30 -0
  281. agno/os/interfaces/slack/slack.py +47 -0
  282. agno/os/interfaces/whatsapp/__init__.py +3 -0
  283. agno/os/interfaces/whatsapp/router.py +210 -0
  284. agno/os/interfaces/whatsapp/security.py +55 -0
  285. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  286. agno/os/mcp.py +293 -0
  287. agno/os/middleware/__init__.py +9 -0
  288. agno/os/middleware/jwt.py +797 -0
  289. agno/os/router.py +258 -0
  290. agno/os/routers/__init__.py +3 -0
  291. agno/os/routers/agents/__init__.py +3 -0
  292. agno/os/routers/agents/router.py +599 -0
  293. agno/os/routers/agents/schema.py +261 -0
  294. agno/os/routers/evals/__init__.py +3 -0
  295. agno/os/routers/evals/evals.py +450 -0
  296. agno/os/routers/evals/schemas.py +174 -0
  297. agno/os/routers/evals/utils.py +231 -0
  298. agno/os/routers/health.py +31 -0
  299. agno/os/routers/home.py +52 -0
  300. agno/os/routers/knowledge/__init__.py +3 -0
  301. agno/os/routers/knowledge/knowledge.py +1008 -0
  302. agno/os/routers/knowledge/schemas.py +178 -0
  303. agno/os/routers/memory/__init__.py +3 -0
  304. agno/os/routers/memory/memory.py +661 -0
  305. agno/os/routers/memory/schemas.py +88 -0
  306. agno/os/routers/metrics/__init__.py +3 -0
  307. agno/os/routers/metrics/metrics.py +190 -0
  308. agno/os/routers/metrics/schemas.py +47 -0
  309. agno/os/routers/session/__init__.py +3 -0
  310. agno/os/routers/session/session.py +997 -0
  311. agno/os/routers/teams/__init__.py +3 -0
  312. agno/os/routers/teams/router.py +512 -0
  313. agno/os/routers/teams/schema.py +257 -0
  314. agno/os/routers/traces/__init__.py +3 -0
  315. agno/os/routers/traces/schemas.py +414 -0
  316. agno/os/routers/traces/traces.py +499 -0
  317. agno/os/routers/workflows/__init__.py +3 -0
  318. agno/os/routers/workflows/router.py +624 -0
  319. agno/os/routers/workflows/schema.py +75 -0
  320. agno/os/schema.py +534 -0
  321. agno/os/scopes.py +469 -0
  322. agno/{playground → os}/settings.py +7 -15
  323. agno/os/utils.py +973 -0
  324. agno/reasoning/anthropic.py +80 -0
  325. agno/reasoning/azure_ai_foundry.py +67 -0
  326. agno/reasoning/deepseek.py +63 -0
  327. agno/reasoning/default.py +97 -0
  328. agno/reasoning/gemini.py +73 -0
  329. agno/reasoning/groq.py +71 -0
  330. agno/reasoning/helpers.py +24 -1
  331. agno/reasoning/ollama.py +67 -0
  332. agno/reasoning/openai.py +86 -0
  333. agno/reasoning/step.py +2 -1
  334. agno/reasoning/vertexai.py +76 -0
  335. agno/run/__init__.py +6 -0
  336. agno/run/agent.py +822 -0
  337. agno/run/base.py +247 -0
  338. agno/run/cancel.py +81 -0
  339. agno/run/requirement.py +181 -0
  340. agno/run/team.py +767 -0
  341. agno/run/workflow.py +708 -0
  342. agno/session/__init__.py +10 -0
  343. agno/session/agent.py +260 -0
  344. agno/session/summary.py +265 -0
  345. agno/session/team.py +342 -0
  346. agno/session/workflow.py +501 -0
  347. agno/table.py +10 -0
  348. agno/team/__init__.py +37 -0
  349. agno/team/team.py +9536 -0
  350. agno/tools/__init__.py +7 -0
  351. agno/tools/agentql.py +120 -0
  352. agno/tools/airflow.py +22 -12
  353. agno/tools/api.py +122 -0
  354. agno/tools/apify.py +276 -83
  355. agno/tools/{arxiv_toolkit.py → arxiv.py} +20 -12
  356. agno/tools/aws_lambda.py +28 -7
  357. agno/tools/aws_ses.py +66 -0
  358. agno/tools/baidusearch.py +11 -4
  359. agno/tools/bitbucket.py +292 -0
  360. agno/tools/brandfetch.py +213 -0
  361. agno/tools/bravesearch.py +106 -0
  362. agno/tools/brightdata.py +367 -0
  363. agno/tools/browserbase.py +209 -0
  364. agno/tools/calcom.py +32 -23
  365. agno/tools/calculator.py +24 -37
  366. agno/tools/cartesia.py +187 -0
  367. agno/tools/{clickup_tool.py → clickup.py} +17 -28
  368. agno/tools/confluence.py +91 -26
  369. agno/tools/crawl4ai.py +139 -43
  370. agno/tools/csv_toolkit.py +28 -22
  371. agno/tools/dalle.py +36 -22
  372. agno/tools/daytona.py +475 -0
  373. agno/tools/decorator.py +169 -14
  374. agno/tools/desi_vocal.py +23 -11
  375. agno/tools/discord.py +32 -29
  376. agno/tools/docker.py +716 -0
  377. agno/tools/duckdb.py +76 -81
  378. agno/tools/duckduckgo.py +43 -40
  379. agno/tools/e2b.py +703 -0
  380. agno/tools/eleven_labs.py +65 -54
  381. agno/tools/email.py +13 -5
  382. agno/tools/evm.py +129 -0
  383. agno/tools/exa.py +324 -42
  384. agno/tools/fal.py +39 -35
  385. agno/tools/file.py +196 -30
  386. agno/tools/file_generation.py +356 -0
  387. agno/tools/financial_datasets.py +288 -0
  388. agno/tools/firecrawl.py +108 -33
  389. agno/tools/function.py +960 -122
  390. agno/tools/giphy.py +34 -12
  391. agno/tools/github.py +1294 -97
  392. agno/tools/gmail.py +922 -0
  393. agno/tools/google_bigquery.py +117 -0
  394. agno/tools/google_drive.py +271 -0
  395. agno/tools/google_maps.py +253 -0
  396. agno/tools/googlecalendar.py +607 -107
  397. agno/tools/googlesheets.py +377 -0
  398. agno/tools/hackernews.py +20 -12
  399. agno/tools/jina.py +24 -14
  400. agno/tools/jira.py +48 -19
  401. agno/tools/knowledge.py +218 -0
  402. agno/tools/linear.py +82 -43
  403. agno/tools/linkup.py +58 -0
  404. agno/tools/local_file_system.py +15 -7
  405. agno/tools/lumalab.py +41 -26
  406. agno/tools/mcp/__init__.py +10 -0
  407. agno/tools/mcp/mcp.py +331 -0
  408. agno/tools/mcp/multi_mcp.py +347 -0
  409. agno/tools/mcp/params.py +24 -0
  410. agno/tools/mcp_toolbox.py +284 -0
  411. agno/tools/mem0.py +193 -0
  412. agno/tools/memory.py +419 -0
  413. agno/tools/mlx_transcribe.py +11 -9
  414. agno/tools/models/azure_openai.py +190 -0
  415. agno/tools/models/gemini.py +203 -0
  416. agno/tools/models/groq.py +158 -0
  417. agno/tools/models/morph.py +186 -0
  418. agno/tools/models/nebius.py +124 -0
  419. agno/tools/models_labs.py +163 -82
  420. agno/tools/moviepy_video.py +18 -13
  421. agno/tools/nano_banana.py +151 -0
  422. agno/tools/neo4j.py +134 -0
  423. agno/tools/newspaper.py +15 -4
  424. agno/tools/newspaper4k.py +19 -6
  425. agno/tools/notion.py +204 -0
  426. agno/tools/openai.py +181 -17
  427. agno/tools/openbb.py +27 -20
  428. agno/tools/opencv.py +321 -0
  429. agno/tools/openweather.py +233 -0
  430. agno/tools/oxylabs.py +385 -0
  431. agno/tools/pandas.py +25 -15
  432. agno/tools/parallel.py +314 -0
  433. agno/tools/postgres.py +238 -185
  434. agno/tools/pubmed.py +125 -13
  435. agno/tools/python.py +48 -35
  436. agno/tools/reasoning.py +283 -0
  437. agno/tools/reddit.py +207 -29
  438. agno/tools/redshift.py +406 -0
  439. agno/tools/replicate.py +69 -26
  440. agno/tools/resend.py +11 -6
  441. agno/tools/scrapegraph.py +179 -19
  442. agno/tools/searxng.py +23 -31
  443. agno/tools/serpapi.py +15 -10
  444. agno/tools/serper.py +255 -0
  445. agno/tools/shell.py +23 -12
  446. agno/tools/shopify.py +1519 -0
  447. agno/tools/slack.py +56 -14
  448. agno/tools/sleep.py +8 -6
  449. agno/tools/spider.py +35 -11
  450. agno/tools/spotify.py +919 -0
  451. agno/tools/sql.py +34 -19
  452. agno/tools/tavily.py +158 -8
  453. agno/tools/telegram.py +18 -8
  454. agno/tools/todoist.py +218 -0
  455. agno/tools/toolkit.py +134 -9
  456. agno/tools/trafilatura.py +388 -0
  457. agno/tools/trello.py +25 -28
  458. agno/tools/twilio.py +18 -9
  459. agno/tools/user_control_flow.py +78 -0
  460. agno/tools/valyu.py +228 -0
  461. agno/tools/visualization.py +467 -0
  462. agno/tools/webbrowser.py +28 -0
  463. agno/tools/webex.py +76 -0
  464. agno/tools/website.py +23 -19
  465. agno/tools/webtools.py +45 -0
  466. agno/tools/whatsapp.py +286 -0
  467. agno/tools/wikipedia.py +28 -19
  468. agno/tools/workflow.py +285 -0
  469. agno/tools/{twitter.py → x.py} +142 -46
  470. agno/tools/yfinance.py +41 -39
  471. agno/tools/youtube.py +34 -17
  472. agno/tools/zendesk.py +15 -5
  473. agno/tools/zep.py +454 -0
  474. agno/tools/zoom.py +86 -37
  475. agno/tracing/__init__.py +12 -0
  476. agno/tracing/exporter.py +157 -0
  477. agno/tracing/schemas.py +276 -0
  478. agno/tracing/setup.py +111 -0
  479. agno/utils/agent.py +938 -0
  480. agno/utils/audio.py +37 -1
  481. agno/utils/certs.py +27 -0
  482. agno/utils/code_execution.py +11 -0
  483. agno/utils/common.py +103 -20
  484. agno/utils/cryptography.py +22 -0
  485. agno/utils/dttm.py +33 -0
  486. agno/utils/events.py +700 -0
  487. agno/utils/functions.py +107 -37
  488. agno/utils/gemini.py +426 -0
  489. agno/utils/hooks.py +171 -0
  490. agno/utils/http.py +185 -0
  491. agno/utils/json_schema.py +159 -37
  492. agno/utils/knowledge.py +36 -0
  493. agno/utils/location.py +19 -0
  494. agno/utils/log.py +221 -8
  495. agno/utils/mcp.py +214 -0
  496. agno/utils/media.py +335 -14
  497. agno/utils/merge_dict.py +22 -1
  498. agno/utils/message.py +77 -2
  499. agno/utils/models/ai_foundry.py +50 -0
  500. agno/utils/models/claude.py +373 -0
  501. agno/utils/models/cohere.py +94 -0
  502. agno/utils/models/llama.py +85 -0
  503. agno/utils/models/mistral.py +100 -0
  504. agno/utils/models/openai_responses.py +140 -0
  505. agno/utils/models/schema_utils.py +153 -0
  506. agno/utils/models/watsonx.py +41 -0
  507. agno/utils/openai.py +257 -0
  508. agno/utils/pickle.py +1 -1
  509. agno/utils/pprint.py +124 -8
  510. agno/utils/print_response/agent.py +930 -0
  511. agno/utils/print_response/team.py +1914 -0
  512. agno/utils/print_response/workflow.py +1668 -0
  513. agno/utils/prompts.py +111 -0
  514. agno/utils/reasoning.py +108 -0
  515. agno/utils/response.py +163 -0
  516. agno/utils/serialize.py +32 -0
  517. agno/utils/shell.py +4 -4
  518. agno/utils/streamlit.py +487 -0
  519. agno/utils/string.py +204 -51
  520. agno/utils/team.py +139 -0
  521. agno/utils/timer.py +9 -2
  522. agno/utils/tokens.py +657 -0
  523. agno/utils/tools.py +19 -1
  524. agno/utils/whatsapp.py +305 -0
  525. agno/utils/yaml_io.py +3 -3
  526. agno/vectordb/__init__.py +2 -0
  527. agno/vectordb/base.py +87 -9
  528. agno/vectordb/cassandra/__init__.py +5 -1
  529. agno/vectordb/cassandra/cassandra.py +383 -27
  530. agno/vectordb/chroma/__init__.py +4 -0
  531. agno/vectordb/chroma/chromadb.py +748 -83
  532. agno/vectordb/clickhouse/__init__.py +7 -1
  533. agno/vectordb/clickhouse/clickhousedb.py +554 -53
  534. agno/vectordb/couchbase/__init__.py +3 -0
  535. agno/vectordb/couchbase/couchbase.py +1446 -0
  536. agno/vectordb/lancedb/__init__.py +5 -0
  537. agno/vectordb/lancedb/lance_db.py +730 -98
  538. agno/vectordb/langchaindb/__init__.py +5 -0
  539. agno/vectordb/langchaindb/langchaindb.py +163 -0
  540. agno/vectordb/lightrag/__init__.py +5 -0
  541. agno/vectordb/lightrag/lightrag.py +388 -0
  542. agno/vectordb/llamaindex/__init__.py +3 -0
  543. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  544. agno/vectordb/milvus/__init__.py +3 -0
  545. agno/vectordb/milvus/milvus.py +966 -78
  546. agno/vectordb/mongodb/__init__.py +9 -1
  547. agno/vectordb/mongodb/mongodb.py +1175 -172
  548. agno/vectordb/pgvector/__init__.py +8 -0
  549. agno/vectordb/pgvector/pgvector.py +599 -115
  550. agno/vectordb/pineconedb/__init__.py +5 -1
  551. agno/vectordb/pineconedb/pineconedb.py +406 -43
  552. agno/vectordb/qdrant/__init__.py +4 -0
  553. agno/vectordb/qdrant/qdrant.py +914 -61
  554. agno/vectordb/redis/__init__.py +9 -0
  555. agno/vectordb/redis/redisdb.py +682 -0
  556. agno/vectordb/singlestore/__init__.py +8 -1
  557. agno/vectordb/singlestore/singlestore.py +771 -0
  558. agno/vectordb/surrealdb/__init__.py +3 -0
  559. agno/vectordb/surrealdb/surrealdb.py +663 -0
  560. agno/vectordb/upstashdb/__init__.py +5 -0
  561. agno/vectordb/upstashdb/upstashdb.py +718 -0
  562. agno/vectordb/weaviate/__init__.py +8 -0
  563. agno/vectordb/weaviate/index.py +15 -0
  564. agno/vectordb/weaviate/weaviate.py +1009 -0
  565. agno/workflow/__init__.py +23 -1
  566. agno/workflow/agent.py +299 -0
  567. agno/workflow/condition.py +759 -0
  568. agno/workflow/loop.py +756 -0
  569. agno/workflow/parallel.py +853 -0
  570. agno/workflow/router.py +723 -0
  571. agno/workflow/step.py +1564 -0
  572. agno/workflow/steps.py +613 -0
  573. agno/workflow/types.py +556 -0
  574. agno/workflow/workflow.py +4327 -514
  575. agno-2.3.13.dist-info/METADATA +639 -0
  576. agno-2.3.13.dist-info/RECORD +613 -0
  577. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/WHEEL +1 -1
  578. agno-2.3.13.dist-info/licenses/LICENSE +201 -0
  579. agno/api/playground.py +0 -91
  580. agno/api/schemas/playground.py +0 -22
  581. agno/api/schemas/user.py +0 -22
  582. agno/api/schemas/workspace.py +0 -46
  583. agno/api/user.py +0 -160
  584. agno/api/workspace.py +0 -151
  585. agno/cli/auth_server.py +0 -118
  586. agno/cli/config.py +0 -275
  587. agno/cli/console.py +0 -88
  588. agno/cli/credentials.py +0 -23
  589. agno/cli/entrypoint.py +0 -571
  590. agno/cli/operator.py +0 -355
  591. agno/cli/settings.py +0 -85
  592. agno/cli/ws/ws_cli.py +0 -817
  593. agno/constants.py +0 -13
  594. agno/document/__init__.py +0 -1
  595. agno/document/chunking/semantic.py +0 -47
  596. agno/document/chunking/strategy.py +0 -31
  597. agno/document/reader/__init__.py +0 -1
  598. agno/document/reader/arxiv_reader.py +0 -41
  599. agno/document/reader/base.py +0 -22
  600. agno/document/reader/csv_reader.py +0 -84
  601. agno/document/reader/docx_reader.py +0 -46
  602. agno/document/reader/firecrawl_reader.py +0 -99
  603. agno/document/reader/json_reader.py +0 -43
  604. agno/document/reader/pdf_reader.py +0 -219
  605. agno/document/reader/s3/pdf_reader.py +0 -46
  606. agno/document/reader/s3/text_reader.py +0 -51
  607. agno/document/reader/text_reader.py +0 -41
  608. agno/document/reader/website_reader.py +0 -175
  609. agno/document/reader/youtube_reader.py +0 -50
  610. agno/embedder/__init__.py +0 -1
  611. agno/embedder/azure_openai.py +0 -86
  612. agno/embedder/cohere.py +0 -72
  613. agno/embedder/fastembed.py +0 -37
  614. agno/embedder/google.py +0 -73
  615. agno/embedder/huggingface.py +0 -54
  616. agno/embedder/mistral.py +0 -80
  617. agno/embedder/ollama.py +0 -57
  618. agno/embedder/openai.py +0 -74
  619. agno/embedder/sentence_transformer.py +0 -38
  620. agno/embedder/voyageai.py +0 -64
  621. agno/eval/perf.py +0 -201
  622. agno/file/__init__.py +0 -1
  623. agno/file/file.py +0 -16
  624. agno/file/local/csv.py +0 -32
  625. agno/file/local/txt.py +0 -19
  626. agno/infra/app.py +0 -240
  627. agno/infra/base.py +0 -144
  628. agno/infra/context.py +0 -20
  629. agno/infra/db_app.py +0 -52
  630. agno/infra/resource.py +0 -205
  631. agno/infra/resources.py +0 -55
  632. agno/knowledge/agent.py +0 -230
  633. agno/knowledge/arxiv.py +0 -22
  634. agno/knowledge/combined.py +0 -22
  635. agno/knowledge/csv.py +0 -28
  636. agno/knowledge/csv_url.py +0 -19
  637. agno/knowledge/document.py +0 -20
  638. agno/knowledge/docx.py +0 -30
  639. agno/knowledge/json.py +0 -28
  640. agno/knowledge/langchain.py +0 -71
  641. agno/knowledge/llamaindex.py +0 -66
  642. agno/knowledge/pdf.py +0 -28
  643. agno/knowledge/pdf_url.py +0 -26
  644. agno/knowledge/s3/base.py +0 -60
  645. agno/knowledge/s3/pdf.py +0 -21
  646. agno/knowledge/s3/text.py +0 -23
  647. agno/knowledge/text.py +0 -30
  648. agno/knowledge/website.py +0 -88
  649. agno/knowledge/wikipedia.py +0 -31
  650. agno/knowledge/youtube.py +0 -22
  651. agno/memory/agent.py +0 -392
  652. agno/memory/classifier.py +0 -104
  653. agno/memory/db/__init__.py +0 -1
  654. agno/memory/db/base.py +0 -42
  655. agno/memory/db/mongodb.py +0 -189
  656. agno/memory/db/postgres.py +0 -203
  657. agno/memory/db/sqlite.py +0 -193
  658. agno/memory/memory.py +0 -15
  659. agno/memory/row.py +0 -36
  660. agno/memory/summarizer.py +0 -192
  661. agno/memory/summary.py +0 -19
  662. agno/memory/workflow.py +0 -38
  663. agno/models/google/gemini_openai.py +0 -26
  664. agno/models/ollama/hermes.py +0 -221
  665. agno/models/ollama/tools.py +0 -362
  666. agno/models/vertexai/gemini.py +0 -595
  667. agno/playground/__init__.py +0 -3
  668. agno/playground/async_router.py +0 -421
  669. agno/playground/deploy.py +0 -249
  670. agno/playground/operator.py +0 -92
  671. agno/playground/playground.py +0 -91
  672. agno/playground/schemas.py +0 -76
  673. agno/playground/serve.py +0 -55
  674. agno/playground/sync_router.py +0 -405
  675. agno/reasoning/agent.py +0 -68
  676. agno/run/response.py +0 -112
  677. agno/storage/agent/__init__.py +0 -0
  678. agno/storage/agent/base.py +0 -38
  679. agno/storage/agent/dynamodb.py +0 -350
  680. agno/storage/agent/json.py +0 -92
  681. agno/storage/agent/mongodb.py +0 -228
  682. agno/storage/agent/postgres.py +0 -367
  683. agno/storage/agent/session.py +0 -79
  684. agno/storage/agent/singlestore.py +0 -303
  685. agno/storage/agent/sqlite.py +0 -357
  686. agno/storage/agent/yaml.py +0 -93
  687. agno/storage/workflow/__init__.py +0 -0
  688. agno/storage/workflow/base.py +0 -40
  689. agno/storage/workflow/mongodb.py +0 -233
  690. agno/storage/workflow/postgres.py +0 -366
  691. agno/storage/workflow/session.py +0 -60
  692. agno/storage/workflow/sqlite.py +0 -359
  693. agno/tools/googlesearch.py +0 -88
  694. agno/utils/defaults.py +0 -57
  695. agno/utils/filesystem.py +0 -39
  696. agno/utils/git.py +0 -52
  697. agno/utils/json_io.py +0 -30
  698. agno/utils/load_env.py +0 -19
  699. agno/utils/py_io.py +0 -19
  700. agno/utils/pyproject.py +0 -18
  701. agno/utils/resource_filter.py +0 -31
  702. agno/vectordb/singlestore/s2vectordb.py +0 -390
  703. agno/vectordb/singlestore/s2vectordb2.py +0 -355
  704. agno/workspace/__init__.py +0 -0
  705. agno/workspace/config.py +0 -325
  706. agno/workspace/enums.py +0 -6
  707. agno/workspace/helpers.py +0 -48
  708. agno/workspace/operator.py +0 -758
  709. agno/workspace/settings.py +0 -63
  710. agno-0.1.2.dist-info/LICENSE +0 -375
  711. agno-0.1.2.dist-info/METADATA +0 -502
  712. agno-0.1.2.dist-info/RECORD +0 -352
  713. agno-0.1.2.dist-info/entry_points.txt +0 -3
  714. /agno/{cli → db/migrations}/__init__.py +0 -0
  715. /agno/{cli/ws → db/migrations/versions}/__init__.py +0 -0
  716. /agno/{document/chunking/__init__.py → db/schemas/metrics.py} +0 -0
  717. /agno/{document/reader/s3 → integrations}/__init__.py +0 -0
  718. /agno/{file/local → knowledge/chunking}/__init__.py +0 -0
  719. /agno/{infra → knowledge/remote_content}/__init__.py +0 -0
  720. /agno/{knowledge/s3 → tools/models}/__init__.py +0 -0
  721. /agno/{reranker → utils/models}/__init__.py +0 -0
  722. /agno/{storage → utils/print_response}/__init__.py +0 -0
  723. {agno-0.1.2.dist-info → agno-2.3.13.dist-info}/top_level.txt +0 -0
@@ -1,29 +1,60 @@
1
+ import asyncio
1
2
  import json
2
3
  from hashlib import md5
3
- from typing import Any, Dict, List, Optional
4
+ from os import getenv
5
+ from typing import Any, Dict, List, Optional, Union
4
6
 
5
7
  try:
6
8
  import lancedb
7
9
  import pyarrow as pa
8
10
  except ImportError:
9
- raise ImportError("`lancedb` not installed.")
11
+ raise ImportError("`lancedb` not installed. Please install using `pip install lancedb`")
10
12
 
11
- from agno.document import Document
12
- from agno.embedder import Embedder
13
- from agno.reranker.base import Reranker
14
- from agno.utils.log import logger
13
+ from agno.filters import FilterExpr
14
+ from agno.knowledge.document import Document
15
+ from agno.knowledge.embedder import Embedder
16
+ from agno.knowledge.reranker.base import Reranker
17
+ from agno.utils.log import log_debug, log_info, log_warning, logger
15
18
  from agno.vectordb.base import VectorDb
16
19
  from agno.vectordb.distance import Distance
17
20
  from agno.vectordb.search import SearchType
18
21
 
19
22
 
20
23
  class LanceDb(VectorDb):
24
+ """
25
+ LanceDb class for managing vector operations with LanceDb
26
+
27
+ Args:
28
+ uri: The URI of the LanceDB database.
29
+ name: Name of the vector database.
30
+ description: Description of the vector database.
31
+ connection: The LanceDB connection to use.
32
+ table: The LanceDB table instance to use.
33
+ async_connection: The LanceDB async connection to use.
34
+ async_table: The LanceDB async table instance to use.
35
+ table_name: The name of the LanceDB table to use.
36
+ api_key: The API key to use for the LanceDB connection.
37
+ embedder: The embedder to use when embedding the document contents.
38
+ search_type: The search type to use when searching for documents.
39
+ distance: The distance metric to use when searching for documents.
40
+ nprobes: The number of probes to use when searching for documents.
41
+ reranker: The reranker to use when reranking documents.
42
+ use_tantivy: Whether to use Tantivy for full text search.
43
+ on_bad_vectors: What to do if the vector is bad. One of "error", "drop", "fill", "null".
44
+ fill_value: The value to fill the vector with if on_bad_vectors is "fill".
45
+ """
46
+
21
47
  def __init__(
22
48
  self,
23
49
  uri: lancedb.URI = "/tmp/lancedb",
50
+ name: Optional[str] = None,
51
+ description: Optional[str] = None,
52
+ id: Optional[str] = None,
53
+ connection: Optional[lancedb.LanceDBConnection] = None,
24
54
  table: Optional[lancedb.db.LanceTable] = None,
55
+ async_connection: Optional[lancedb.AsyncConnection] = None,
56
+ async_table: Optional[lancedb.db.AsyncTable] = None,
25
57
  table_name: Optional[str] = None,
26
- connection: Optional[lancedb.LanceDBConnection] = None,
27
58
  api_key: Optional[str] = None,
28
59
  embedder: Optional[Embedder] = None,
29
60
  search_type: SearchType = SearchType.vector,
@@ -31,12 +62,26 @@ class LanceDb(VectorDb):
31
62
  nprobes: Optional[int] = None,
32
63
  reranker: Optional[Reranker] = None,
33
64
  use_tantivy: bool = True,
65
+ on_bad_vectors: Optional[str] = None, # One of "error", "drop", "fill", "null".
66
+ fill_value: Optional[float] = None, # Only used if on_bad_vectors is "fill"
34
67
  ):
68
+ # Dynamic ID generation based on unique identifiers
69
+ if id is None:
70
+ from agno.utils.string import generate_id
71
+
72
+ table_identifier = table_name or "default_table"
73
+ seed = f"{uri}#{table_identifier}"
74
+ id = generate_id(seed)
75
+
76
+ # Initialize base class with name, description, and generated ID
77
+ super().__init__(id=id, name=name, description=description)
78
+
35
79
  # Embedder for embedding the document contents
36
80
  if embedder is None:
37
- from agno.embedder.openai import OpenAIEmbedder
81
+ from agno.knowledge.embedder.openai import OpenAIEmbedder
38
82
 
39
83
  embedder = OpenAIEmbedder()
84
+ log_info("Embedder not provided, using OpenAIEmbedder as default.")
40
85
  self.embedder: Embedder = embedder
41
86
  self.dimensions: Optional[int] = self.embedder.dimensions
42
87
 
@@ -48,20 +93,33 @@ class LanceDb(VectorDb):
48
93
  # Distance metric
49
94
  self.distance: Distance = distance
50
95
 
96
+ # Remote LanceDB connection details
97
+ self.api_key: Optional[str] = api_key
98
+
51
99
  # LanceDB connection details
52
100
  self.uri: lancedb.URI = uri
53
- self.connection: lancedb.LanceDBConnection = connection or lancedb.connect(uri=self.uri, api_key=api_key)
54
-
101
+ self.connection: lancedb.DBConnection = connection or lancedb.connect(uri=self.uri, api_key=api_key)
55
102
  self.table: Optional[lancedb.db.LanceTable] = table
56
- self.table_name: Optional[str] = table_name
103
+
104
+ self.async_connection: Optional[lancedb.AsyncConnection] = async_connection
105
+ self.async_table: Optional[lancedb.db.AsyncTable] = async_table
57
106
 
58
107
  if table_name and table_name in self.connection.table_names():
59
108
  # Open the table if it exists
60
- self.table = self.connection.open_table(name=table_name)
61
- self.table_name = self.table.name
62
- self._vector_col = self.table.schema.names[0]
63
- self._id = self.table.schema.names[1] # type: ignore
64
-
109
+ try:
110
+ self.table = self.connection.open_table(name=table_name)
111
+ self.table_name = self.table.name
112
+ self._vector_col = self.table.schema.names[0]
113
+ self._id = self.table.schema.names[1] # type: ignore
114
+ except ValueError as e:
115
+ # Table might have been dropped by async operations but sync connection hasn't updated
116
+ if "was not found" in str(e):
117
+ log_debug(f"Table {table_name} listed but not accessible, will create if needed")
118
+ self.table = None
119
+ else:
120
+ raise
121
+
122
+ # LanceDB table details
65
123
  if self.table is None:
66
124
  # LanceDB table details
67
125
  if table:
@@ -73,7 +131,7 @@ class LanceDb(VectorDb):
73
131
  self.table = table
74
132
  self.table_name = self.table.name
75
133
  self._vector_col = self.table.schema.names[0]
76
- self._id = self.tbl.schema.names[1] # type: ignore
134
+ self._id = self.table.schema.names[1] # type: ignore
77
135
  else:
78
136
  if not table_name:
79
137
  raise ValueError("Either table or table_name should be provided.")
@@ -84,6 +142,8 @@ class LanceDb(VectorDb):
84
142
 
85
143
  self.reranker: Optional[Reranker] = reranker
86
144
  self.nprobes: Optional[int] = nprobes
145
+ self.on_bad_vectors: Optional[str] = on_bad_vectors
146
+ self.fill_value: Optional[float] = fill_value
87
147
  self.fts_index_exists = False
88
148
  self.use_tantivy = use_tantivy
89
149
 
@@ -95,91 +155,224 @@ class LanceDb(VectorDb):
95
155
  "Please install tantivy-py `pip install tantivy` to use the full text search feature." # noqa: E501
96
156
  )
97
157
 
98
- logger.debug(f"Initialized LanceDb with table: '{self.table_name}'")
158
+ log_debug(f"Initialized LanceDb with table: '{self.table_name}'")
159
+
160
+ def _prepare_vector(self, embedding) -> List[float]:
161
+ """Prepare vector embedding for insertion, ensuring correct dimensions and type."""
162
+ if embedding is not None and len(embedding) > 0:
163
+ # Convert to list of floats
164
+ vector = [float(x) for x in embedding]
165
+
166
+ # Ensure vector has correct dimensions if specified
167
+ if self.dimensions:
168
+ if len(vector) != self.dimensions:
169
+ if len(vector) > self.dimensions:
170
+ # Truncate if too long
171
+ vector = vector[: self.dimensions]
172
+ log_debug(f"Truncated vector from {len(embedding)} to {self.dimensions} dimensions")
173
+ else:
174
+ # Pad with zeros if too short
175
+ vector.extend([0.0] * (self.dimensions - len(vector)))
176
+ log_debug(f"Padded vector from {len(embedding)} to {self.dimensions} dimensions")
177
+
178
+ return vector
179
+ else:
180
+ # Fallback if embedding is None or empty
181
+ return [0.0] * (self.dimensions or 1536)
182
+
183
+ async def _get_async_connection(self) -> lancedb.AsyncConnection:
184
+ """Get or create an async connection to LanceDB."""
185
+ if self.async_connection is None:
186
+ self.async_connection = await lancedb.connect_async(self.uri)
187
+ # Only try to open table if it exists and we don't have it already
188
+ if self.async_table is None:
189
+ table_names = await self.async_connection.table_names()
190
+ if self.table_name in table_names:
191
+ try:
192
+ self.async_table = await self.async_connection.open_table(self.table_name)
193
+ except ValueError:
194
+ # Table might have been dropped by another operation
195
+ pass
196
+ return self.async_connection
197
+
198
+ def _refresh_sync_connection(self) -> None:
199
+ """Refresh the sync connection to see changes made by async operations."""
200
+ try:
201
+ # Re-establish sync connection to see async changes
202
+ if self.connection and self.table_name in self.connection.table_names():
203
+ self.table = self.connection.open_table(self.table_name)
204
+ except Exception as e:
205
+ log_debug(f"Could not refresh sync connection: {e}")
206
+ # If refresh fails, we can still function but sync methods might not see async changes
99
207
 
100
208
  def create(self) -> None:
101
209
  """Create the table if it does not exist."""
102
210
  if not self.exists():
103
- self.connection = self._init_table() # Connection update is needed
211
+ self.table = self._init_table()
104
212
 
105
- def _init_table(self) -> lancedb.db.LanceTable:
106
- schema = pa.schema(
213
+ async def async_create(self) -> None:
214
+ """Create the table asynchronously if it does not exist."""
215
+ if not await self.async_exists():
216
+ try:
217
+ conn = await self._get_async_connection()
218
+ schema = self._base_schema()
219
+
220
+ log_debug(f"Creating table asynchronously: {self.table_name}")
221
+ self.async_table = await conn.create_table(
222
+ self.table_name, schema=schema, mode="overwrite", exist_ok=True
223
+ )
224
+ log_debug(f"Successfully created async table: {self.table_name}")
225
+ except Exception as e:
226
+ logger.error(f"Error creating async table: {e}")
227
+ # Try to fall back to sync table creation
228
+ try:
229
+ log_debug("Falling back to sync table creation")
230
+ self.table = self._init_table()
231
+ log_debug("Sync table created successfully")
232
+ except Exception as sync_e:
233
+ logger.error(f"Sync table creation also failed: {sync_e}")
234
+ raise
235
+
236
+ def _base_schema(self) -> pa.Schema:
237
+ # Use fixed-size list for vector field as required by LanceDB
238
+ if self.dimensions:
239
+ vector_field = pa.field(self._vector_col, pa.list_(pa.float32(), self.dimensions))
240
+ else:
241
+ # Fallback to dynamic list if dimensions not known (should be rare)
242
+ vector_field = pa.field(self._vector_col, pa.list_(pa.float32()))
243
+
244
+ return pa.schema(
107
245
  [
108
- pa.field(
109
- self._vector_col,
110
- pa.list_(
111
- pa.float32(),
112
- len(self.embedder.get_embedding("test")), # type: ignore
113
- ),
114
- ),
246
+ vector_field,
115
247
  pa.field(self._id, pa.string()),
116
248
  pa.field("payload", pa.string()),
117
249
  ]
118
250
  )
119
251
 
120
- logger.debug(f"Creating table: {self.table_name}")
121
- tbl = self.connection.create_table(self.table_name, schema=schema, mode="overwrite", exist_ok=True)
122
- return tbl # type: ignore
123
-
124
- def doc_exists(self, document: Document) -> bool:
125
- """
126
- Validating if the document exists or not
252
+ def _init_table(self) -> lancedb.db.LanceTable:
253
+ schema = self._base_schema()
127
254
 
128
- Args:
129
- document (Document): Document to validate
130
- """
131
- if self.table is not None:
132
- cleaned_content = document.content.replace("\x00", "\ufffd")
133
- doc_id = md5(cleaned_content.encode()).hexdigest()
134
- result = self.table.search().where(f"{self._id}='{doc_id}'").to_arrow()
135
- return len(result) > 0
136
- return False
255
+ log_info(f"Creating table: {self.table_name}")
256
+ if self.api_key or getenv("LANCEDB_API_KEY"):
257
+ log_info("API key found, creating table in remote LanceDB")
258
+ tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite") # type: ignore
259
+ else:
260
+ tbl = self.connection.create_table(name=self.table_name, schema=schema, mode="overwrite", exist_ok=True) # type: ignore
261
+ return tbl # type: ignore
137
262
 
138
- def insert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
263
+ def insert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
139
264
  """
140
265
  Insert documents into the database.
141
266
 
142
267
  Args:
143
268
  documents (List[Document]): List of documents to insert
144
- filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
269
+ filters (Optional[Dict[str, Any]]): Filters to add as metadata to documents
145
270
  """
146
- logger.debug(f"Inserting {len(documents)} documents")
147
- data = []
148
271
  if len(documents) <= 0:
149
- logger.debug("No documents to insert")
272
+ log_info("No documents to insert")
150
273
  return
151
274
 
275
+ log_debug(f"Inserting {len(documents)} documents")
276
+ data = []
277
+
152
278
  for document in documents:
279
+ # Add filters to document metadata if provided
280
+ if filters:
281
+ meta_data = document.meta_data.copy() if document.meta_data else {}
282
+ meta_data.update(filters)
283
+ document.meta_data = meta_data
284
+
153
285
  document.embed(embedder=self.embedder)
154
286
  cleaned_content = document.content.replace("\x00", "\ufffd")
155
- doc_id = str(md5(cleaned_content.encode()).hexdigest())
287
+ # Include content_hash in ID to ensure uniqueness across different content hashes
288
+ base_id = document.id or md5(cleaned_content.encode()).hexdigest()
289
+ doc_id = str(md5(f"{base_id}_{content_hash}".encode()).hexdigest())
156
290
  payload = {
157
291
  "name": document.name,
158
292
  "meta_data": document.meta_data,
159
293
  "content": cleaned_content,
160
294
  "usage": document.usage,
295
+ "content_id": document.content_id,
296
+ "content_hash": content_hash,
161
297
  }
162
298
  data.append(
163
299
  {
164
300
  "id": doc_id,
165
- "vector": document.embedding,
301
+ "vector": self._prepare_vector(document.embedding),
166
302
  "payload": json.dumps(payload),
167
303
  }
168
304
  )
169
- logger.debug(f"Parsed document: {document.name} ({document.meta_data})")
305
+ log_debug(f"Parsed document: {document.name} ({document.meta_data})")
170
306
 
171
307
  if self.table is None:
172
308
  logger.error("Table not initialized. Please create the table first")
173
309
  return
174
310
 
175
311
  if not data:
176
- logger.debug("No new data to insert")
312
+ log_debug("No new data to insert")
177
313
  return
178
314
 
179
- self.table.add(data)
180
- logger.debug(f"Inserted {len(data)} documents")
315
+ if self.on_bad_vectors is not None:
316
+ self.table.add(data, on_bad_vectors=self.on_bad_vectors, fill_value=self.fill_value)
317
+ else:
318
+ self.table.add(data)
319
+
320
+ log_debug(f"Inserted {len(data)} documents")
321
+
322
+ async def async_insert(
323
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
324
+ ) -> None:
325
+ """
326
+ Asynchronously insert documents into the database.
327
+
328
+ Note: Currently wraps sync insert method since LanceDB async insert has sync/async table
329
+ synchronization issues causing empty vectors. We still do async embedding for performance.
181
330
 
182
- def upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
331
+ Args:
332
+ documents (List[Document]): List of documents to insert
333
+ filters (Optional[Dict[str, Any]]): Filters to apply while inserting documents
334
+ """
335
+ if len(documents) <= 0:
336
+ log_debug("No documents to insert")
337
+ return
338
+
339
+ log_debug(f"Inserting {len(documents)} documents")
340
+
341
+ # Still do async embedding for performance
342
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
343
+ try:
344
+ doc_contents = [doc.content for doc in documents]
345
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
346
+
347
+ for j, doc in enumerate(documents):
348
+ if j < len(embeddings):
349
+ doc.embedding = embeddings[j]
350
+ doc.usage = usages[j] if j < len(usages) else None
351
+ except Exception as e:
352
+ error_str = str(e).lower()
353
+ is_rate_limit = any(
354
+ phrase in error_str
355
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
356
+ )
357
+ if is_rate_limit:
358
+ logger.error(f"Rate limit detected during batch embedding. {e}")
359
+ raise e
360
+ else:
361
+ logger.warning(f"Async batch embedding failed, falling back to individual embeddings: {e}")
362
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
363
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
364
+ else:
365
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
366
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
367
+
368
+ # Use sync insert to avoid sync/async table synchronization issues
369
+ self.insert(content_hash, documents, filters)
370
+
371
+ def upsert_available(self) -> bool:
372
+ """Check if upsert is available in LanceDB."""
373
+ return True
374
+
375
+ def upsert(self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
183
376
  """
184
377
  Upsert documents into the database.
185
378
 
@@ -187,28 +380,140 @@ class LanceDb(VectorDb):
187
380
  documents (List[Document]): List of documents to upsert
188
381
  filters (Optional[Dict[str, Any]]): Filters to apply while upserting
189
382
  """
190
- self.insert(documents)
383
+ if self.content_hash_exists(content_hash):
384
+ self._delete_by_content_hash(content_hash)
385
+ self.insert(content_hash=content_hash, documents=documents, filters=filters)
386
+
387
+ async def async_upsert(
388
+ self, content_hash: str, documents: List[Document], filters: Optional[Dict[str, Any]] = None
389
+ ) -> None:
390
+ """
391
+ Asynchronously upsert documents into the database.
392
+
393
+ Note: Uses async embedding for performance, then sync upsert for reliability.
394
+ """
395
+ if len(documents) > 0:
396
+ # Do async embedding for performance
397
+ if self.embedder.enable_batch and hasattr(self.embedder, "async_get_embeddings_batch_and_usage"):
398
+ try:
399
+ doc_contents = [doc.content for doc in documents]
400
+ embeddings, usages = await self.embedder.async_get_embeddings_batch_and_usage(doc_contents)
401
+ for j, doc in enumerate(documents):
402
+ if j < len(embeddings):
403
+ doc.embedding = embeddings[j]
404
+ doc.usage = usages[j] if j < len(usages) else None
405
+ except Exception as e:
406
+ error_str = str(e).lower()
407
+ is_rate_limit = any(
408
+ phrase in error_str
409
+ for phrase in ["rate limit", "too many requests", "429", "trial key", "api calls / minute"]
410
+ )
411
+ if is_rate_limit:
412
+ raise e
413
+ else:
414
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
415
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
416
+ else:
417
+ embed_tasks = [doc.async_embed(embedder=self.embedder) for doc in documents]
418
+ await asyncio.gather(*embed_tasks, return_exceptions=True)
419
+
420
+ # Use sync upsert for reliability
421
+ self.upsert(content_hash=content_hash, documents=documents, filters=filters)
422
+
423
+ def search(
424
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
425
+ ) -> List[Document]:
426
+ """
427
+ Search for documents matching the query.
428
+
429
+ Args:
430
+ query (str): Query string to search for
431
+ limit (int): Maximum number of results to return
432
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
433
+
434
+ Returns:
435
+ List[Document]: List of matching documents
436
+ """
437
+ if self.connection:
438
+ self.table = self.connection.open_table(name=self.table_name)
439
+
440
+ results = None
441
+
442
+ if isinstance(filters, list):
443
+ log_warning("Filter Expressions are not yet supported in LanceDB. No filters will be applied.")
444
+ filters = None
191
445
 
192
- def search(self, query: str, limit: int = 5, filters: Optional[Dict[str, Any]] = None) -> List[Document]:
193
446
  if self.search_type == SearchType.vector:
194
- return self.vector_search(query, limit)
447
+ results = self.vector_search(query, limit)
195
448
  elif self.search_type == SearchType.keyword:
196
- return self.keyword_search(query, limit)
449
+ results = self.keyword_search(query, limit)
197
450
  elif self.search_type == SearchType.hybrid:
198
- return self.hybrid_search(query, limit)
451
+ results = self.hybrid_search(query, limit)
199
452
  else:
200
453
  logger.error(f"Invalid search type '{self.search_type}'.")
201
454
  return []
202
455
 
203
- def vector_search(self, query: str, limit: int = 5) -> List[Document]:
456
+ if results is None:
457
+ return []
458
+
459
+ search_results = self._build_search_results(results)
460
+
461
+ # Filter results based on metadata if filters are provided
462
+ if filters and search_results:
463
+ filtered_results = []
464
+ for doc in search_results:
465
+ if doc.meta_data is None:
466
+ continue
467
+
468
+ # Check if all filter criteria match
469
+ match = True
470
+ for key, value in filters.items():
471
+ if key not in doc.meta_data or doc.meta_data[key] != value:
472
+ match = False
473
+ break
474
+
475
+ if match:
476
+ filtered_results.append(doc)
477
+
478
+ search_results = filtered_results
479
+
480
+ if self.reranker and search_results:
481
+ search_results = self.reranker.rerank(query=query, documents=search_results)
482
+
483
+ log_info(f"Found {len(search_results)} documents")
484
+ return search_results
485
+
486
+ async def async_search(
487
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
488
+ ) -> List[Document]:
489
+ """
490
+ Asynchronously search for documents matching the query.
491
+
492
+ Note: Currently wraps sync search method since LanceDB async search has sync/async table
493
+ synchronization issues. Performance impact is minimal for search operations.
494
+
495
+ Args:
496
+ query (str): Query string to search for
497
+ limit (int): Maximum number of results to return
498
+ filters (Optional[Dict[str, Any]]): Filters to apply to the search
499
+
500
+ Returns:
501
+ List[Document]: List of matching documents
502
+ """
503
+ # Wrap sync search method to avoid sync/async table synchronization issues
504
+ return self.search(query=query, limit=limit, filters=filters)
505
+
506
+ def vector_search(
507
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
508
+ ) -> List[Document]:
204
509
  query_embedding = self.embedder.get_embedding(query)
205
510
  if query_embedding is None:
206
511
  logger.error(f"Error getting embedding for Query: {query}")
207
- return []
512
+ return None
208
513
 
209
514
  if self.table is None:
210
515
  logger.error("Table not initialized. Please create the table first")
211
- return []
516
+ return None # type: ignore
212
517
 
213
518
  results = self.table.search(
214
519
  query=query_embedding,
@@ -218,22 +523,20 @@ class LanceDb(VectorDb):
218
523
  if self.nprobes:
219
524
  results.nprobes(self.nprobes)
220
525
 
221
- results = results.to_pandas()
222
- search_results = self._build_search_results(results)
223
-
224
- if self.reranker:
225
- search_results = self.reranker.rerank(query=query, documents=search_results)
526
+ return results.to_pandas()
226
527
 
227
- return search_results
228
-
229
- def hybrid_search(self, query: str, limit: int = 5) -> List[Document]:
528
+ def hybrid_search(
529
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
530
+ ) -> List[Document]:
230
531
  query_embedding = self.embedder.get_embedding(query)
231
532
  if query_embedding is None:
232
533
  logger.error(f"Error getting embedding for Query: {query}")
233
534
  return []
535
+
234
536
  if self.table is None:
235
537
  logger.error("Table not initialized. Please create the table first")
236
538
  return []
539
+
237
540
  if not self.fts_index_exists:
238
541
  self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
239
542
  self.fts_index_exists = True
@@ -251,36 +554,25 @@ class LanceDb(VectorDb):
251
554
  if self.nprobes:
252
555
  results.nprobes(self.nprobes)
253
556
 
254
- results = results.to_pandas()
255
-
256
- search_results = self._build_search_results(results)
257
-
258
- if self.reranker:
259
- search_results = self.reranker.rerank(query=query, documents=search_results)
260
-
261
- return search_results
557
+ return results.to_pandas()
262
558
 
263
- def keyword_search(self, query: str, limit: int = 5) -> List[Document]:
559
+ def keyword_search(
560
+ self, query: str, limit: int = 5, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None
561
+ ) -> List[Document]:
264
562
  if self.table is None:
265
563
  logger.error("Table not initialized. Please create the table first")
266
564
  return []
565
+
267
566
  if not self.fts_index_exists:
268
567
  self.table.create_fts_index("payload", use_tantivy=self.use_tantivy, replace=True)
269
568
  self.fts_index_exists = True
270
569
 
271
- results = (
272
- self.table.search(
273
- query=query,
274
- query_type="fts",
275
- )
276
- .limit(limit)
277
- .to_pandas()
278
- )
279
- search_results = self._build_search_results(results)
570
+ results = self.table.search(
571
+ query=query,
572
+ query_type="fts",
573
+ ).limit(limit)
280
574
 
281
- if self.reranker:
282
- search_results = self.reranker.rerank(query=query, documents=search_results)
283
- return search_results
575
+ return results.to_pandas()
284
576
 
285
577
  def _build_search_results(self, results) -> List[Document]: # TODO: typehint pandas?
286
578
  search_results: List[Document] = []
@@ -295,6 +587,7 @@ class LanceDb(VectorDb):
295
587
  embedder=self.embedder,
296
588
  embedding=item["vector"],
297
589
  usage=payload["usage"],
590
+ content_id=payload.get("content_id"),
298
591
  )
299
592
  )
300
593
 
@@ -305,16 +598,66 @@ class LanceDb(VectorDb):
305
598
 
306
599
  def drop(self) -> None:
307
600
  if self.exists():
308
- logger.debug(f"Deleting collection: {self.table_name}")
309
- self.connection.drop_table(self.table_name)
601
+ log_debug(f"Deleting collection: {self.table_name}")
602
+ self.connection.drop_table(self.table_name) # type: ignore
603
+ # Clear the table reference after dropping
604
+ self.table = None
605
+
606
+ async def async_drop(self) -> None:
607
+ """Drop the table asynchronously."""
608
+ if await self.async_exists():
609
+ log_debug(f"Deleting collection: {self.table_name}")
610
+ conn = await self._get_async_connection()
611
+ await conn.drop_table(self.table_name)
612
+ # Clear the async table reference after dropping
613
+ self.async_table = None
310
614
 
311
615
  def exists(self) -> bool:
616
+ # If we have an async table that was created, the table exists
617
+ if self.async_table is not None:
618
+ return True
312
619
  if self.connection:
313
- if self.table_name in self.connection.table_names():
314
- return True
620
+ return self.table_name in self.connection.table_names()
315
621
  return False
316
622
 
623
+ async def async_exists(self) -> bool:
624
+ """Check if the table exists asynchronously."""
625
+ # If we have an async table that was created, the table exists
626
+ if self.async_table is not None:
627
+ return True
628
+ # Check if table exists in database without trying to open it
629
+ if self.async_connection is None:
630
+ self.async_connection = await lancedb.connect_async(self.uri)
631
+ table_names = await self.async_connection.table_names()
632
+ return self.table_name in table_names
633
+
634
+ async def async_get_count(self) -> int:
635
+ """Get the number of rows in the table asynchronously."""
636
+ await self._get_async_connection()
637
+ if self.async_table is not None:
638
+ return await self.async_table.count_rows()
639
+ return 0
640
+
317
641
  def get_count(self) -> int:
642
+ # If we have data in the async table but sync table isn't available, try to get count from async table
643
+ if self.async_table is not None:
644
+ try:
645
+ import asyncio
646
+
647
+ # Check if we're already in an event loop
648
+ try:
649
+ asyncio.get_running_loop()
650
+ # We're in an async context, can't use asyncio.run
651
+ log_debug("Already in async context, falling back to sync table for count")
652
+ except RuntimeError:
653
+ # No event loop running, safe to use asyncio.run
654
+ try:
655
+ return asyncio.run(self.async_get_count())
656
+ except Exception as e:
657
+ log_debug(f"Failed to get async count: {e}")
658
+ except Exception as e:
659
+ log_debug(f"Error in async count logic: {e}")
660
+
318
661
  if self.exists() and self.table:
319
662
  return self.table.count_rows()
320
663
  return 0
@@ -326,4 +669,293 @@ class LanceDb(VectorDb):
326
669
  return False
327
670
 
328
671
  def name_exists(self, name: str) -> bool:
329
- raise NotImplementedError
672
+ """Check if a document with the given name exists in the database"""
673
+ if self.table is None:
674
+ return False
675
+
676
+ try:
677
+ result = self.table.search().select(["payload"]).to_pandas()
678
+ # Convert the JSON strings in payload column to dictionaries
679
+ payloads = result["payload"].apply(json.loads)
680
+
681
+ # Check if the name exists in any of the payloads
682
+ return any(payload.get("name") == name for payload in payloads)
683
+ except Exception as e:
684
+ logger.error(f"Error checking name existence: {e}")
685
+ return False
686
+
687
+ async def async_name_exists(self, name: str) -> bool:
688
+ raise NotImplementedError(f"Async not supported on {self.__class__.__name__}.")
689
+
690
+ def id_exists(self, id: str) -> bool:
691
+ """Check if a document with the given ID exists in the database"""
692
+ if self.table is None:
693
+ logger.error("Table not initialized")
694
+ return False
695
+
696
+ try:
697
+ # Search for the document with the specific ID
698
+ result = self.table.search().where(f"{self._id} = '{id}'").to_pandas()
699
+ return len(result) > 0
700
+ except Exception as e:
701
+ logger.error(f"Error checking id existence: {e}")
702
+ return False
703
+
704
+ def delete_by_id(self, id: str) -> bool:
705
+ """Delete content by ID."""
706
+ if self.table is None:
707
+ logger.error("Table not initialized")
708
+ return False
709
+
710
+ try:
711
+ # Delete rows where the id matches
712
+ self.table.delete(f"{self._id} = '{id}'")
713
+ log_info(f"Deleted records with id '{id}' from table '{self.table_name}'.")
714
+ return True
715
+ except Exception as e:
716
+ logger.error(f"Error deleting rows by id '{id}': {e}")
717
+ return False
718
+
719
+ def delete_by_name(self, name: str) -> bool:
720
+ """Delete content by name."""
721
+ if self.table is None:
722
+ logger.error("Table not initialized")
723
+ return False
724
+
725
+ try:
726
+ total_count = self.table.count_rows()
727
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
728
+
729
+ # Find matching IDs
730
+ ids_to_delete = []
731
+ for _, row in result.iterrows():
732
+ payload = json.loads(row["payload"])
733
+ if payload.get("name") == name:
734
+ ids_to_delete.append(row["id"])
735
+
736
+ # Delete matching records
737
+ if ids_to_delete:
738
+ for doc_id in ids_to_delete:
739
+ self.table.delete(f"{self._id} = '{doc_id}'")
740
+ log_info(f"Deleted {len(ids_to_delete)} records with name '{name}' from table '{self.table_name}'.")
741
+ return True
742
+ else:
743
+ log_info(f"No records found with name '{name}' to delete.")
744
+ return False
745
+
746
+ except Exception as e:
747
+ logger.error(f"Error deleting rows by name '{name}': {e}")
748
+ return False
749
+
750
+ def delete_by_metadata(self, metadata: Dict[str, Any]) -> bool:
751
+ """Delete content by metadata."""
752
+ if self.table is None:
753
+ logger.error("Table not initialized")
754
+ return False
755
+
756
+ try:
757
+ total_count = self.table.count_rows()
758
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
759
+
760
+ # Find matching IDs
761
+ ids_to_delete = []
762
+ for _, row in result.iterrows():
763
+ payload = json.loads(row["payload"])
764
+ doc_metadata = payload.get("meta_data", {})
765
+
766
+ # Check if all metadata key-value pairs match
767
+ match = True
768
+ for key, value in metadata.items():
769
+ if key not in doc_metadata or doc_metadata[key] != value:
770
+ match = False
771
+ break
772
+
773
+ if match:
774
+ ids_to_delete.append(row["id"])
775
+
776
+ # Delete matching records
777
+ if ids_to_delete:
778
+ for doc_id in ids_to_delete:
779
+ self.table.delete(f"{self._id} = '{doc_id}'")
780
+ log_info(
781
+ f"Deleted {len(ids_to_delete)} records with metadata '{metadata}' from table '{self.table_name}'."
782
+ )
783
+ return True
784
+ else:
785
+ log_info(f"No records found with metadata '{metadata}' to delete.")
786
+ return False
787
+
788
+ except Exception as e:
789
+ logger.error(f"Error deleting rows by metadata '{metadata}': {e}")
790
+ return False
791
+
792
+ def delete_by_content_id(self, content_id: str) -> bool:
793
+ """Delete content by content ID."""
794
+ if self.table is None:
795
+ logger.error("Table not initialized")
796
+ return False
797
+
798
+ try:
799
+ total_count = self.table.count_rows()
800
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
801
+
802
+ # Find matching IDs
803
+ ids_to_delete = []
804
+ for _, row in result.iterrows():
805
+ payload = json.loads(row["payload"])
806
+ if payload.get("content_id") == content_id:
807
+ ids_to_delete.append(row["id"])
808
+
809
+ # Delete matching records
810
+ if ids_to_delete:
811
+ for doc_id in ids_to_delete:
812
+ self.table.delete(f"{self._id} = '{doc_id}'")
813
+ log_info(
814
+ f"Deleted {len(ids_to_delete)} records with content_id '{content_id}' from table '{self.table_name}'."
815
+ )
816
+ return True
817
+ else:
818
+ log_info(f"No records found with content_id '{content_id}' to delete.")
819
+ return False
820
+
821
+ except Exception as e:
822
+ logger.error(f"Error deleting rows by content_id '{content_id}': {e}")
823
+ return False
824
+
825
+ def _delete_by_content_hash(self, content_hash: str) -> bool:
826
+ """Delete content by content hash."""
827
+ if self.table is None:
828
+ logger.error("Table not initialized")
829
+ return False
830
+
831
+ try:
832
+ total_count = self.table.count_rows()
833
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
834
+
835
+ # Find matching IDs
836
+ ids_to_delete = []
837
+ for _, row in result.iterrows():
838
+ payload = json.loads(row["payload"])
839
+ if payload.get("content_hash") == content_hash:
840
+ ids_to_delete.append(row["id"])
841
+
842
+ # Delete matching records
843
+ if ids_to_delete:
844
+ for doc_id in ids_to_delete:
845
+ self.table.delete(f"{self._id} = '{doc_id}'")
846
+ log_info(
847
+ f"Deleted {len(ids_to_delete)} records with content_hash '{content_hash}' from table '{self.table_name}'."
848
+ )
849
+ return True
850
+ else:
851
+ log_info(f"No records found with content_hash '{content_hash}' to delete.")
852
+ return False
853
+
854
+ except Exception as e:
855
+ logger.error(f"Error deleting rows by content_hash '{content_hash}': {e}")
856
+ return False
857
+
858
+ def content_hash_exists(self, content_hash: str) -> bool:
859
+ """Check if documents with the given content hash exist."""
860
+ if self.table is None:
861
+ logger.error("Table not initialized")
862
+ return False
863
+
864
+ try:
865
+ total_count = self.table.count_rows()
866
+ result = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
867
+
868
+ # Check if any records match the content_hash
869
+ for _, row in result.iterrows():
870
+ payload = json.loads(row["payload"])
871
+ if payload.get("content_hash") == content_hash:
872
+ return True
873
+
874
+ return False
875
+
876
+ except Exception as e:
877
+ logger.error(f"Error checking content_hash existence '{content_hash}': {e}")
878
+ return False
879
+
880
+ def update_metadata(self, content_id: str, metadata: Dict[str, Any]) -> None:
881
+ """
882
+ Update the metadata for documents with the given content_id.
883
+
884
+ Args:
885
+ content_id (str): The content ID to update
886
+ metadata (Dict[str, Any]): The metadata to update
887
+ """
888
+ import json
889
+
890
+ try:
891
+ if self.table is None:
892
+ logger.error("Table not initialized")
893
+ return
894
+
895
+ # Get all documents and filter in Python (LanceDB doesn't support JSON operators)
896
+ total_count = self.table.count_rows()
897
+ results = self.table.search().select(["id", "payload"]).limit(total_count).to_pandas()
898
+
899
+ if results.empty:
900
+ logger.debug("No documents found")
901
+ return
902
+
903
+ # Find matching documents with the given content_id
904
+ matching_rows = []
905
+ for _, row in results.iterrows():
906
+ payload = json.loads(row["payload"])
907
+ if payload.get("content_id") == content_id:
908
+ matching_rows.append(row)
909
+
910
+ if not matching_rows:
911
+ logger.debug(f"No documents found with content_id: {content_id}")
912
+ return
913
+
914
+ # Update each matching document
915
+ updated_count = 0
916
+ for row in matching_rows:
917
+ row_id = row["id"]
918
+ current_payload = json.loads(row["payload"])
919
+
920
+ # Merge existing metadata with new metadata
921
+ if "meta_data" in current_payload:
922
+ current_payload["meta_data"].update(metadata)
923
+ else:
924
+ current_payload["meta_data"] = metadata
925
+
926
+ if "filters" in current_payload:
927
+ if isinstance(current_payload["filters"], dict):
928
+ current_payload["filters"].update(metadata)
929
+ else:
930
+ current_payload["filters"] = metadata
931
+ else:
932
+ current_payload["filters"] = metadata
933
+
934
+ # Update the document
935
+ update_data = {"id": row_id, "payload": json.dumps(current_payload)}
936
+
937
+ # LanceDB doesn't have a direct update, so we need to delete and re-insert
938
+ # First, get all the existing data
939
+ vector_data = row["vector"] if "vector" in row else None
940
+ text_data = row["text"] if "text" in row else None
941
+
942
+ # Create complete update record
943
+ if vector_data is not None:
944
+ update_data["vector"] = vector_data
945
+ if text_data is not None:
946
+ update_data["text"] = text_data
947
+
948
+ # Delete old record and insert updated one
949
+ self.table.delete(f"id = '{row_id}'")
950
+ self.table.add([update_data])
951
+ updated_count += 1
952
+
953
+ logger.debug(f"Updated metadata for {updated_count} documents with content_id: {content_id}")
954
+
955
+ except Exception as e:
956
+ logger.error(f"Error updating metadata for content_id '{content_id}': {e}")
957
+ raise
958
+
959
+ def get_supported_search_types(self) -> List[str]:
960
+ """Get the supported search types for this vector database."""
961
+ return [SearchType.vector, SearchType.keyword, SearchType.hybrid]