agno 2.2.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (575) hide show
  1. agno/__init__.py +8 -0
  2. agno/agent/__init__.py +51 -0
  3. agno/agent/agent.py +10405 -0
  4. agno/api/__init__.py +0 -0
  5. agno/api/agent.py +28 -0
  6. agno/api/api.py +40 -0
  7. agno/api/evals.py +22 -0
  8. agno/api/os.py +17 -0
  9. agno/api/routes.py +13 -0
  10. agno/api/schemas/__init__.py +9 -0
  11. agno/api/schemas/agent.py +16 -0
  12. agno/api/schemas/evals.py +16 -0
  13. agno/api/schemas/os.py +14 -0
  14. agno/api/schemas/response.py +6 -0
  15. agno/api/schemas/team.py +16 -0
  16. agno/api/schemas/utils.py +21 -0
  17. agno/api/schemas/workflows.py +16 -0
  18. agno/api/settings.py +53 -0
  19. agno/api/team.py +30 -0
  20. agno/api/workflow.py +28 -0
  21. agno/cloud/aws/base.py +214 -0
  22. agno/cloud/aws/s3/__init__.py +2 -0
  23. agno/cloud/aws/s3/api_client.py +43 -0
  24. agno/cloud/aws/s3/bucket.py +195 -0
  25. agno/cloud/aws/s3/object.py +57 -0
  26. agno/culture/__init__.py +3 -0
  27. agno/culture/manager.py +956 -0
  28. agno/db/__init__.py +24 -0
  29. agno/db/async_postgres/__init__.py +3 -0
  30. agno/db/base.py +598 -0
  31. agno/db/dynamo/__init__.py +3 -0
  32. agno/db/dynamo/dynamo.py +2042 -0
  33. agno/db/dynamo/schemas.py +314 -0
  34. agno/db/dynamo/utils.py +743 -0
  35. agno/db/firestore/__init__.py +3 -0
  36. agno/db/firestore/firestore.py +1795 -0
  37. agno/db/firestore/schemas.py +140 -0
  38. agno/db/firestore/utils.py +376 -0
  39. agno/db/gcs_json/__init__.py +3 -0
  40. agno/db/gcs_json/gcs_json_db.py +1335 -0
  41. agno/db/gcs_json/utils.py +228 -0
  42. agno/db/in_memory/__init__.py +3 -0
  43. agno/db/in_memory/in_memory_db.py +1160 -0
  44. agno/db/in_memory/utils.py +230 -0
  45. agno/db/json/__init__.py +3 -0
  46. agno/db/json/json_db.py +1328 -0
  47. agno/db/json/utils.py +230 -0
  48. agno/db/migrations/__init__.py +0 -0
  49. agno/db/migrations/v1_to_v2.py +635 -0
  50. agno/db/mongo/__init__.py +17 -0
  51. agno/db/mongo/async_mongo.py +2026 -0
  52. agno/db/mongo/mongo.py +1982 -0
  53. agno/db/mongo/schemas.py +87 -0
  54. agno/db/mongo/utils.py +259 -0
  55. agno/db/mysql/__init__.py +3 -0
  56. agno/db/mysql/mysql.py +2308 -0
  57. agno/db/mysql/schemas.py +138 -0
  58. agno/db/mysql/utils.py +355 -0
  59. agno/db/postgres/__init__.py +4 -0
  60. agno/db/postgres/async_postgres.py +1927 -0
  61. agno/db/postgres/postgres.py +2260 -0
  62. agno/db/postgres/schemas.py +139 -0
  63. agno/db/postgres/utils.py +442 -0
  64. agno/db/redis/__init__.py +3 -0
  65. agno/db/redis/redis.py +1660 -0
  66. agno/db/redis/schemas.py +123 -0
  67. agno/db/redis/utils.py +346 -0
  68. agno/db/schemas/__init__.py +4 -0
  69. agno/db/schemas/culture.py +120 -0
  70. agno/db/schemas/evals.py +33 -0
  71. agno/db/schemas/knowledge.py +40 -0
  72. agno/db/schemas/memory.py +46 -0
  73. agno/db/schemas/metrics.py +0 -0
  74. agno/db/singlestore/__init__.py +3 -0
  75. agno/db/singlestore/schemas.py +130 -0
  76. agno/db/singlestore/singlestore.py +2272 -0
  77. agno/db/singlestore/utils.py +384 -0
  78. agno/db/sqlite/__init__.py +4 -0
  79. agno/db/sqlite/async_sqlite.py +2293 -0
  80. agno/db/sqlite/schemas.py +133 -0
  81. agno/db/sqlite/sqlite.py +2288 -0
  82. agno/db/sqlite/utils.py +431 -0
  83. agno/db/surrealdb/__init__.py +3 -0
  84. agno/db/surrealdb/metrics.py +292 -0
  85. agno/db/surrealdb/models.py +309 -0
  86. agno/db/surrealdb/queries.py +71 -0
  87. agno/db/surrealdb/surrealdb.py +1353 -0
  88. agno/db/surrealdb/utils.py +147 -0
  89. agno/db/utils.py +116 -0
  90. agno/debug.py +18 -0
  91. agno/eval/__init__.py +14 -0
  92. agno/eval/accuracy.py +834 -0
  93. agno/eval/performance.py +773 -0
  94. agno/eval/reliability.py +306 -0
  95. agno/eval/utils.py +119 -0
  96. agno/exceptions.py +161 -0
  97. agno/filters.py +354 -0
  98. agno/guardrails/__init__.py +6 -0
  99. agno/guardrails/base.py +19 -0
  100. agno/guardrails/openai.py +144 -0
  101. agno/guardrails/pii.py +94 -0
  102. agno/guardrails/prompt_injection.py +52 -0
  103. agno/integrations/__init__.py +0 -0
  104. agno/integrations/discord/__init__.py +3 -0
  105. agno/integrations/discord/client.py +203 -0
  106. agno/knowledge/__init__.py +5 -0
  107. agno/knowledge/chunking/__init__.py +0 -0
  108. agno/knowledge/chunking/agentic.py +79 -0
  109. agno/knowledge/chunking/document.py +91 -0
  110. agno/knowledge/chunking/fixed.py +57 -0
  111. agno/knowledge/chunking/markdown.py +151 -0
  112. agno/knowledge/chunking/recursive.py +63 -0
  113. agno/knowledge/chunking/row.py +39 -0
  114. agno/knowledge/chunking/semantic.py +86 -0
  115. agno/knowledge/chunking/strategy.py +165 -0
  116. agno/knowledge/content.py +74 -0
  117. agno/knowledge/document/__init__.py +5 -0
  118. agno/knowledge/document/base.py +58 -0
  119. agno/knowledge/embedder/__init__.py +5 -0
  120. agno/knowledge/embedder/aws_bedrock.py +343 -0
  121. agno/knowledge/embedder/azure_openai.py +210 -0
  122. agno/knowledge/embedder/base.py +23 -0
  123. agno/knowledge/embedder/cohere.py +323 -0
  124. agno/knowledge/embedder/fastembed.py +62 -0
  125. agno/knowledge/embedder/fireworks.py +13 -0
  126. agno/knowledge/embedder/google.py +258 -0
  127. agno/knowledge/embedder/huggingface.py +94 -0
  128. agno/knowledge/embedder/jina.py +182 -0
  129. agno/knowledge/embedder/langdb.py +22 -0
  130. agno/knowledge/embedder/mistral.py +206 -0
  131. agno/knowledge/embedder/nebius.py +13 -0
  132. agno/knowledge/embedder/ollama.py +154 -0
  133. agno/knowledge/embedder/openai.py +195 -0
  134. agno/knowledge/embedder/sentence_transformer.py +63 -0
  135. agno/knowledge/embedder/together.py +13 -0
  136. agno/knowledge/embedder/vllm.py +262 -0
  137. agno/knowledge/embedder/voyageai.py +165 -0
  138. agno/knowledge/knowledge.py +1988 -0
  139. agno/knowledge/reader/__init__.py +7 -0
  140. agno/knowledge/reader/arxiv_reader.py +81 -0
  141. agno/knowledge/reader/base.py +95 -0
  142. agno/knowledge/reader/csv_reader.py +166 -0
  143. agno/knowledge/reader/docx_reader.py +82 -0
  144. agno/knowledge/reader/field_labeled_csv_reader.py +292 -0
  145. agno/knowledge/reader/firecrawl_reader.py +201 -0
  146. agno/knowledge/reader/json_reader.py +87 -0
  147. agno/knowledge/reader/markdown_reader.py +137 -0
  148. agno/knowledge/reader/pdf_reader.py +431 -0
  149. agno/knowledge/reader/pptx_reader.py +101 -0
  150. agno/knowledge/reader/reader_factory.py +313 -0
  151. agno/knowledge/reader/s3_reader.py +89 -0
  152. agno/knowledge/reader/tavily_reader.py +194 -0
  153. agno/knowledge/reader/text_reader.py +115 -0
  154. agno/knowledge/reader/web_search_reader.py +372 -0
  155. agno/knowledge/reader/website_reader.py +455 -0
  156. agno/knowledge/reader/wikipedia_reader.py +59 -0
  157. agno/knowledge/reader/youtube_reader.py +78 -0
  158. agno/knowledge/remote_content/__init__.py +0 -0
  159. agno/knowledge/remote_content/remote_content.py +88 -0
  160. agno/knowledge/reranker/__init__.py +3 -0
  161. agno/knowledge/reranker/base.py +14 -0
  162. agno/knowledge/reranker/cohere.py +64 -0
  163. agno/knowledge/reranker/infinity.py +195 -0
  164. agno/knowledge/reranker/sentence_transformer.py +54 -0
  165. agno/knowledge/types.py +39 -0
  166. agno/knowledge/utils.py +189 -0
  167. agno/media.py +462 -0
  168. agno/memory/__init__.py +3 -0
  169. agno/memory/manager.py +1327 -0
  170. agno/models/__init__.py +0 -0
  171. agno/models/aimlapi/__init__.py +5 -0
  172. agno/models/aimlapi/aimlapi.py +45 -0
  173. agno/models/anthropic/__init__.py +5 -0
  174. agno/models/anthropic/claude.py +757 -0
  175. agno/models/aws/__init__.py +15 -0
  176. agno/models/aws/bedrock.py +701 -0
  177. agno/models/aws/claude.py +378 -0
  178. agno/models/azure/__init__.py +18 -0
  179. agno/models/azure/ai_foundry.py +485 -0
  180. agno/models/azure/openai_chat.py +131 -0
  181. agno/models/base.py +2175 -0
  182. agno/models/cerebras/__init__.py +12 -0
  183. agno/models/cerebras/cerebras.py +501 -0
  184. agno/models/cerebras/cerebras_openai.py +112 -0
  185. agno/models/cohere/__init__.py +5 -0
  186. agno/models/cohere/chat.py +389 -0
  187. agno/models/cometapi/__init__.py +5 -0
  188. agno/models/cometapi/cometapi.py +57 -0
  189. agno/models/dashscope/__init__.py +5 -0
  190. agno/models/dashscope/dashscope.py +91 -0
  191. agno/models/deepinfra/__init__.py +5 -0
  192. agno/models/deepinfra/deepinfra.py +28 -0
  193. agno/models/deepseek/__init__.py +5 -0
  194. agno/models/deepseek/deepseek.py +61 -0
  195. agno/models/defaults.py +1 -0
  196. agno/models/fireworks/__init__.py +5 -0
  197. agno/models/fireworks/fireworks.py +26 -0
  198. agno/models/google/__init__.py +5 -0
  199. agno/models/google/gemini.py +1085 -0
  200. agno/models/groq/__init__.py +5 -0
  201. agno/models/groq/groq.py +556 -0
  202. agno/models/huggingface/__init__.py +5 -0
  203. agno/models/huggingface/huggingface.py +491 -0
  204. agno/models/ibm/__init__.py +5 -0
  205. agno/models/ibm/watsonx.py +422 -0
  206. agno/models/internlm/__init__.py +3 -0
  207. agno/models/internlm/internlm.py +26 -0
  208. agno/models/langdb/__init__.py +1 -0
  209. agno/models/langdb/langdb.py +48 -0
  210. agno/models/litellm/__init__.py +14 -0
  211. agno/models/litellm/chat.py +468 -0
  212. agno/models/litellm/litellm_openai.py +25 -0
  213. agno/models/llama_cpp/__init__.py +5 -0
  214. agno/models/llama_cpp/llama_cpp.py +22 -0
  215. agno/models/lmstudio/__init__.py +5 -0
  216. agno/models/lmstudio/lmstudio.py +25 -0
  217. agno/models/message.py +434 -0
  218. agno/models/meta/__init__.py +12 -0
  219. agno/models/meta/llama.py +475 -0
  220. agno/models/meta/llama_openai.py +78 -0
  221. agno/models/metrics.py +120 -0
  222. agno/models/mistral/__init__.py +5 -0
  223. agno/models/mistral/mistral.py +432 -0
  224. agno/models/nebius/__init__.py +3 -0
  225. agno/models/nebius/nebius.py +54 -0
  226. agno/models/nexus/__init__.py +3 -0
  227. agno/models/nexus/nexus.py +22 -0
  228. agno/models/nvidia/__init__.py +5 -0
  229. agno/models/nvidia/nvidia.py +28 -0
  230. agno/models/ollama/__init__.py +5 -0
  231. agno/models/ollama/chat.py +441 -0
  232. agno/models/openai/__init__.py +9 -0
  233. agno/models/openai/chat.py +883 -0
  234. agno/models/openai/like.py +27 -0
  235. agno/models/openai/responses.py +1050 -0
  236. agno/models/openrouter/__init__.py +5 -0
  237. agno/models/openrouter/openrouter.py +66 -0
  238. agno/models/perplexity/__init__.py +5 -0
  239. agno/models/perplexity/perplexity.py +187 -0
  240. agno/models/portkey/__init__.py +3 -0
  241. agno/models/portkey/portkey.py +81 -0
  242. agno/models/requesty/__init__.py +5 -0
  243. agno/models/requesty/requesty.py +52 -0
  244. agno/models/response.py +199 -0
  245. agno/models/sambanova/__init__.py +5 -0
  246. agno/models/sambanova/sambanova.py +28 -0
  247. agno/models/siliconflow/__init__.py +5 -0
  248. agno/models/siliconflow/siliconflow.py +25 -0
  249. agno/models/together/__init__.py +5 -0
  250. agno/models/together/together.py +25 -0
  251. agno/models/utils.py +266 -0
  252. agno/models/vercel/__init__.py +3 -0
  253. agno/models/vercel/v0.py +26 -0
  254. agno/models/vertexai/__init__.py +0 -0
  255. agno/models/vertexai/claude.py +70 -0
  256. agno/models/vllm/__init__.py +3 -0
  257. agno/models/vllm/vllm.py +78 -0
  258. agno/models/xai/__init__.py +3 -0
  259. agno/models/xai/xai.py +113 -0
  260. agno/os/__init__.py +3 -0
  261. agno/os/app.py +876 -0
  262. agno/os/auth.py +57 -0
  263. agno/os/config.py +104 -0
  264. agno/os/interfaces/__init__.py +1 -0
  265. agno/os/interfaces/a2a/__init__.py +3 -0
  266. agno/os/interfaces/a2a/a2a.py +42 -0
  267. agno/os/interfaces/a2a/router.py +250 -0
  268. agno/os/interfaces/a2a/utils.py +924 -0
  269. agno/os/interfaces/agui/__init__.py +3 -0
  270. agno/os/interfaces/agui/agui.py +47 -0
  271. agno/os/interfaces/agui/router.py +144 -0
  272. agno/os/interfaces/agui/utils.py +534 -0
  273. agno/os/interfaces/base.py +25 -0
  274. agno/os/interfaces/slack/__init__.py +3 -0
  275. agno/os/interfaces/slack/router.py +148 -0
  276. agno/os/interfaces/slack/security.py +30 -0
  277. agno/os/interfaces/slack/slack.py +47 -0
  278. agno/os/interfaces/whatsapp/__init__.py +3 -0
  279. agno/os/interfaces/whatsapp/router.py +211 -0
  280. agno/os/interfaces/whatsapp/security.py +53 -0
  281. agno/os/interfaces/whatsapp/whatsapp.py +36 -0
  282. agno/os/mcp.py +292 -0
  283. agno/os/middleware/__init__.py +7 -0
  284. agno/os/middleware/jwt.py +233 -0
  285. agno/os/router.py +1763 -0
  286. agno/os/routers/__init__.py +3 -0
  287. agno/os/routers/evals/__init__.py +3 -0
  288. agno/os/routers/evals/evals.py +430 -0
  289. agno/os/routers/evals/schemas.py +142 -0
  290. agno/os/routers/evals/utils.py +162 -0
  291. agno/os/routers/health.py +31 -0
  292. agno/os/routers/home.py +52 -0
  293. agno/os/routers/knowledge/__init__.py +3 -0
  294. agno/os/routers/knowledge/knowledge.py +997 -0
  295. agno/os/routers/knowledge/schemas.py +178 -0
  296. agno/os/routers/memory/__init__.py +3 -0
  297. agno/os/routers/memory/memory.py +515 -0
  298. agno/os/routers/memory/schemas.py +62 -0
  299. agno/os/routers/metrics/__init__.py +3 -0
  300. agno/os/routers/metrics/metrics.py +190 -0
  301. agno/os/routers/metrics/schemas.py +47 -0
  302. agno/os/routers/session/__init__.py +3 -0
  303. agno/os/routers/session/session.py +997 -0
  304. agno/os/schema.py +1055 -0
  305. agno/os/settings.py +43 -0
  306. agno/os/utils.py +630 -0
  307. agno/py.typed +0 -0
  308. agno/reasoning/__init__.py +0 -0
  309. agno/reasoning/anthropic.py +80 -0
  310. agno/reasoning/azure_ai_foundry.py +67 -0
  311. agno/reasoning/deepseek.py +63 -0
  312. agno/reasoning/default.py +97 -0
  313. agno/reasoning/gemini.py +73 -0
  314. agno/reasoning/groq.py +71 -0
  315. agno/reasoning/helpers.py +63 -0
  316. agno/reasoning/ollama.py +67 -0
  317. agno/reasoning/openai.py +86 -0
  318. agno/reasoning/step.py +31 -0
  319. agno/reasoning/vertexai.py +76 -0
  320. agno/run/__init__.py +6 -0
  321. agno/run/agent.py +787 -0
  322. agno/run/base.py +229 -0
  323. agno/run/cancel.py +81 -0
  324. agno/run/messages.py +32 -0
  325. agno/run/team.py +753 -0
  326. agno/run/workflow.py +708 -0
  327. agno/session/__init__.py +10 -0
  328. agno/session/agent.py +295 -0
  329. agno/session/summary.py +265 -0
  330. agno/session/team.py +392 -0
  331. agno/session/workflow.py +205 -0
  332. agno/team/__init__.py +37 -0
  333. agno/team/team.py +8793 -0
  334. agno/tools/__init__.py +10 -0
  335. agno/tools/agentql.py +120 -0
  336. agno/tools/airflow.py +69 -0
  337. agno/tools/api.py +122 -0
  338. agno/tools/apify.py +314 -0
  339. agno/tools/arxiv.py +127 -0
  340. agno/tools/aws_lambda.py +53 -0
  341. agno/tools/aws_ses.py +66 -0
  342. agno/tools/baidusearch.py +89 -0
  343. agno/tools/bitbucket.py +292 -0
  344. agno/tools/brandfetch.py +213 -0
  345. agno/tools/bravesearch.py +106 -0
  346. agno/tools/brightdata.py +367 -0
  347. agno/tools/browserbase.py +209 -0
  348. agno/tools/calcom.py +255 -0
  349. agno/tools/calculator.py +151 -0
  350. agno/tools/cartesia.py +187 -0
  351. agno/tools/clickup.py +244 -0
  352. agno/tools/confluence.py +240 -0
  353. agno/tools/crawl4ai.py +158 -0
  354. agno/tools/csv_toolkit.py +185 -0
  355. agno/tools/dalle.py +110 -0
  356. agno/tools/daytona.py +475 -0
  357. agno/tools/decorator.py +262 -0
  358. agno/tools/desi_vocal.py +108 -0
  359. agno/tools/discord.py +161 -0
  360. agno/tools/docker.py +716 -0
  361. agno/tools/duckdb.py +379 -0
  362. agno/tools/duckduckgo.py +91 -0
  363. agno/tools/e2b.py +703 -0
  364. agno/tools/eleven_labs.py +196 -0
  365. agno/tools/email.py +67 -0
  366. agno/tools/evm.py +129 -0
  367. agno/tools/exa.py +396 -0
  368. agno/tools/fal.py +127 -0
  369. agno/tools/file.py +240 -0
  370. agno/tools/file_generation.py +350 -0
  371. agno/tools/financial_datasets.py +288 -0
  372. agno/tools/firecrawl.py +143 -0
  373. agno/tools/function.py +1187 -0
  374. agno/tools/giphy.py +93 -0
  375. agno/tools/github.py +1760 -0
  376. agno/tools/gmail.py +922 -0
  377. agno/tools/google_bigquery.py +117 -0
  378. agno/tools/google_drive.py +270 -0
  379. agno/tools/google_maps.py +253 -0
  380. agno/tools/googlecalendar.py +674 -0
  381. agno/tools/googlesearch.py +98 -0
  382. agno/tools/googlesheets.py +377 -0
  383. agno/tools/hackernews.py +77 -0
  384. agno/tools/jina.py +101 -0
  385. agno/tools/jira.py +170 -0
  386. agno/tools/knowledge.py +218 -0
  387. agno/tools/linear.py +426 -0
  388. agno/tools/linkup.py +58 -0
  389. agno/tools/local_file_system.py +90 -0
  390. agno/tools/lumalab.py +183 -0
  391. agno/tools/mcp/__init__.py +10 -0
  392. agno/tools/mcp/mcp.py +331 -0
  393. agno/tools/mcp/multi_mcp.py +347 -0
  394. agno/tools/mcp/params.py +24 -0
  395. agno/tools/mcp_toolbox.py +284 -0
  396. agno/tools/mem0.py +193 -0
  397. agno/tools/memori.py +339 -0
  398. agno/tools/memory.py +419 -0
  399. agno/tools/mlx_transcribe.py +139 -0
  400. agno/tools/models/__init__.py +0 -0
  401. agno/tools/models/azure_openai.py +190 -0
  402. agno/tools/models/gemini.py +203 -0
  403. agno/tools/models/groq.py +158 -0
  404. agno/tools/models/morph.py +186 -0
  405. agno/tools/models/nebius.py +124 -0
  406. agno/tools/models_labs.py +195 -0
  407. agno/tools/moviepy_video.py +349 -0
  408. agno/tools/neo4j.py +134 -0
  409. agno/tools/newspaper.py +46 -0
  410. agno/tools/newspaper4k.py +93 -0
  411. agno/tools/notion.py +204 -0
  412. agno/tools/openai.py +202 -0
  413. agno/tools/openbb.py +160 -0
  414. agno/tools/opencv.py +321 -0
  415. agno/tools/openweather.py +233 -0
  416. agno/tools/oxylabs.py +385 -0
  417. agno/tools/pandas.py +102 -0
  418. agno/tools/parallel.py +314 -0
  419. agno/tools/postgres.py +257 -0
  420. agno/tools/pubmed.py +188 -0
  421. agno/tools/python.py +205 -0
  422. agno/tools/reasoning.py +283 -0
  423. agno/tools/reddit.py +467 -0
  424. agno/tools/replicate.py +117 -0
  425. agno/tools/resend.py +62 -0
  426. agno/tools/scrapegraph.py +222 -0
  427. agno/tools/searxng.py +152 -0
  428. agno/tools/serpapi.py +116 -0
  429. agno/tools/serper.py +255 -0
  430. agno/tools/shell.py +53 -0
  431. agno/tools/slack.py +136 -0
  432. agno/tools/sleep.py +20 -0
  433. agno/tools/spider.py +116 -0
  434. agno/tools/sql.py +154 -0
  435. agno/tools/streamlit/__init__.py +0 -0
  436. agno/tools/streamlit/components.py +113 -0
  437. agno/tools/tavily.py +254 -0
  438. agno/tools/telegram.py +48 -0
  439. agno/tools/todoist.py +218 -0
  440. agno/tools/tool_registry.py +1 -0
  441. agno/tools/toolkit.py +146 -0
  442. agno/tools/trafilatura.py +388 -0
  443. agno/tools/trello.py +274 -0
  444. agno/tools/twilio.py +186 -0
  445. agno/tools/user_control_flow.py +78 -0
  446. agno/tools/valyu.py +228 -0
  447. agno/tools/visualization.py +467 -0
  448. agno/tools/webbrowser.py +28 -0
  449. agno/tools/webex.py +76 -0
  450. agno/tools/website.py +54 -0
  451. agno/tools/webtools.py +45 -0
  452. agno/tools/whatsapp.py +286 -0
  453. agno/tools/wikipedia.py +63 -0
  454. agno/tools/workflow.py +278 -0
  455. agno/tools/x.py +335 -0
  456. agno/tools/yfinance.py +257 -0
  457. agno/tools/youtube.py +184 -0
  458. agno/tools/zendesk.py +82 -0
  459. agno/tools/zep.py +454 -0
  460. agno/tools/zoom.py +382 -0
  461. agno/utils/__init__.py +0 -0
  462. agno/utils/agent.py +820 -0
  463. agno/utils/audio.py +49 -0
  464. agno/utils/certs.py +27 -0
  465. agno/utils/code_execution.py +11 -0
  466. agno/utils/common.py +132 -0
  467. agno/utils/dttm.py +13 -0
  468. agno/utils/enum.py +22 -0
  469. agno/utils/env.py +11 -0
  470. agno/utils/events.py +696 -0
  471. agno/utils/format_str.py +16 -0
  472. agno/utils/functions.py +166 -0
  473. agno/utils/gemini.py +426 -0
  474. agno/utils/hooks.py +57 -0
  475. agno/utils/http.py +74 -0
  476. agno/utils/json_schema.py +234 -0
  477. agno/utils/knowledge.py +36 -0
  478. agno/utils/location.py +19 -0
  479. agno/utils/log.py +255 -0
  480. agno/utils/mcp.py +214 -0
  481. agno/utils/media.py +352 -0
  482. agno/utils/merge_dict.py +41 -0
  483. agno/utils/message.py +118 -0
  484. agno/utils/models/__init__.py +0 -0
  485. agno/utils/models/ai_foundry.py +43 -0
  486. agno/utils/models/claude.py +358 -0
  487. agno/utils/models/cohere.py +87 -0
  488. agno/utils/models/llama.py +78 -0
  489. agno/utils/models/mistral.py +98 -0
  490. agno/utils/models/openai_responses.py +140 -0
  491. agno/utils/models/schema_utils.py +153 -0
  492. agno/utils/models/watsonx.py +41 -0
  493. agno/utils/openai.py +257 -0
  494. agno/utils/pickle.py +32 -0
  495. agno/utils/pprint.py +178 -0
  496. agno/utils/print_response/__init__.py +0 -0
  497. agno/utils/print_response/agent.py +842 -0
  498. agno/utils/print_response/team.py +1724 -0
  499. agno/utils/print_response/workflow.py +1668 -0
  500. agno/utils/prompts.py +111 -0
  501. agno/utils/reasoning.py +108 -0
  502. agno/utils/response.py +163 -0
  503. agno/utils/response_iterator.py +17 -0
  504. agno/utils/safe_formatter.py +24 -0
  505. agno/utils/serialize.py +32 -0
  506. agno/utils/shell.py +22 -0
  507. agno/utils/streamlit.py +487 -0
  508. agno/utils/string.py +231 -0
  509. agno/utils/team.py +139 -0
  510. agno/utils/timer.py +41 -0
  511. agno/utils/tools.py +102 -0
  512. agno/utils/web.py +23 -0
  513. agno/utils/whatsapp.py +305 -0
  514. agno/utils/yaml_io.py +25 -0
  515. agno/vectordb/__init__.py +3 -0
  516. agno/vectordb/base.py +127 -0
  517. agno/vectordb/cassandra/__init__.py +5 -0
  518. agno/vectordb/cassandra/cassandra.py +501 -0
  519. agno/vectordb/cassandra/extra_param_mixin.py +11 -0
  520. agno/vectordb/cassandra/index.py +13 -0
  521. agno/vectordb/chroma/__init__.py +5 -0
  522. agno/vectordb/chroma/chromadb.py +929 -0
  523. agno/vectordb/clickhouse/__init__.py +9 -0
  524. agno/vectordb/clickhouse/clickhousedb.py +835 -0
  525. agno/vectordb/clickhouse/index.py +9 -0
  526. agno/vectordb/couchbase/__init__.py +3 -0
  527. agno/vectordb/couchbase/couchbase.py +1442 -0
  528. agno/vectordb/distance.py +7 -0
  529. agno/vectordb/lancedb/__init__.py +6 -0
  530. agno/vectordb/lancedb/lance_db.py +995 -0
  531. agno/vectordb/langchaindb/__init__.py +5 -0
  532. agno/vectordb/langchaindb/langchaindb.py +163 -0
  533. agno/vectordb/lightrag/__init__.py +5 -0
  534. agno/vectordb/lightrag/lightrag.py +388 -0
  535. agno/vectordb/llamaindex/__init__.py +3 -0
  536. agno/vectordb/llamaindex/llamaindexdb.py +166 -0
  537. agno/vectordb/milvus/__init__.py +4 -0
  538. agno/vectordb/milvus/milvus.py +1182 -0
  539. agno/vectordb/mongodb/__init__.py +9 -0
  540. agno/vectordb/mongodb/mongodb.py +1417 -0
  541. agno/vectordb/pgvector/__init__.py +12 -0
  542. agno/vectordb/pgvector/index.py +23 -0
  543. agno/vectordb/pgvector/pgvector.py +1462 -0
  544. agno/vectordb/pineconedb/__init__.py +5 -0
  545. agno/vectordb/pineconedb/pineconedb.py +747 -0
  546. agno/vectordb/qdrant/__init__.py +5 -0
  547. agno/vectordb/qdrant/qdrant.py +1134 -0
  548. agno/vectordb/redis/__init__.py +9 -0
  549. agno/vectordb/redis/redisdb.py +694 -0
  550. agno/vectordb/search.py +7 -0
  551. agno/vectordb/singlestore/__init__.py +10 -0
  552. agno/vectordb/singlestore/index.py +41 -0
  553. agno/vectordb/singlestore/singlestore.py +763 -0
  554. agno/vectordb/surrealdb/__init__.py +3 -0
  555. agno/vectordb/surrealdb/surrealdb.py +699 -0
  556. agno/vectordb/upstashdb/__init__.py +5 -0
  557. agno/vectordb/upstashdb/upstashdb.py +718 -0
  558. agno/vectordb/weaviate/__init__.py +8 -0
  559. agno/vectordb/weaviate/index.py +15 -0
  560. agno/vectordb/weaviate/weaviate.py +1005 -0
  561. agno/workflow/__init__.py +23 -0
  562. agno/workflow/agent.py +299 -0
  563. agno/workflow/condition.py +738 -0
  564. agno/workflow/loop.py +735 -0
  565. agno/workflow/parallel.py +824 -0
  566. agno/workflow/router.py +702 -0
  567. agno/workflow/step.py +1432 -0
  568. agno/workflow/steps.py +592 -0
  569. agno/workflow/types.py +520 -0
  570. agno/workflow/workflow.py +4321 -0
  571. agno-2.2.13.dist-info/METADATA +614 -0
  572. agno-2.2.13.dist-info/RECORD +575 -0
  573. agno-2.2.13.dist-info/WHEEL +5 -0
  574. agno-2.2.13.dist-info/licenses/LICENSE +201 -0
  575. agno-2.2.13.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1988 @@
1
+ import asyncio
2
+ import hashlib
3
+ import io
4
+ import time
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from io import BytesIO
8
+ from os.path import basename
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
11
+
12
+ from httpx import AsyncClient
13
+
14
+ from agno.db.base import AsyncBaseDb, BaseDb
15
+ from agno.db.schemas.knowledge import KnowledgeRow
16
+ from agno.filters import FilterExpr
17
+ from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
18
+ from agno.knowledge.document import Document
19
+ from agno.knowledge.reader import Reader, ReaderFactory
20
+ from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
21
+ from agno.utils.http import async_fetch_with_retry
22
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
23
+ from agno.utils.string import generate_id
24
+
25
+ ContentDict = Dict[str, Union[str, Dict[str, str]]]
26
+
27
+
28
+ class KnowledgeContentOrigin(Enum):
29
+ PATH = "path"
30
+ URL = "url"
31
+ TOPIC = "topic"
32
+ CONTENT = "content"
33
+
34
+
35
+ @dataclass
36
+ class Knowledge:
37
+ """Knowledge class"""
38
+
39
+ name: Optional[str] = None
40
+ description: Optional[str] = None
41
+ vector_db: Optional[Any] = None
42
+ contents_db: Optional[Union[BaseDb, AsyncBaseDb]] = None
43
+ max_results: int = 10
44
+ readers: Optional[Dict[str, Reader]] = None
45
+
46
+ def __post_init__(self):
47
+ from agno.vectordb import VectorDb
48
+
49
+ self.vector_db = cast(VectorDb, self.vector_db)
50
+ if self.vector_db and not self.vector_db.exists():
51
+ self.vector_db.create()
52
+
53
+ self.construct_readers()
54
+ self.valid_metadata_filters = set()
55
+
56
+ # --- SDK Specific Methods ---
57
+
58
+ # --- Add Contents ---
59
+ @overload
60
+ async def add_contents_async(self, contents: List[ContentDict]) -> None: ...
61
+
62
+ @overload
63
+ async def add_contents_async(
64
+ self,
65
+ *,
66
+ paths: Optional[List[str]] = None,
67
+ urls: Optional[List[str]] = None,
68
+ metadata: Optional[Dict[str, str]] = None,
69
+ topics: Optional[List[str]] = None,
70
+ text_contents: Optional[List[str]] = None,
71
+ reader: Optional[Reader] = None,
72
+ include: Optional[List[str]] = None,
73
+ exclude: Optional[List[str]] = None,
74
+ upsert: bool = True,
75
+ skip_if_exists: bool = False,
76
+ remote_content: Optional[RemoteContent] = None,
77
+ ) -> None: ...
78
+
79
+ async def add_contents_async(self, *args, **kwargs) -> None:
80
+ if args and isinstance(args[0], list):
81
+ arguments = args[0]
82
+ upsert = kwargs.get("upsert", True)
83
+ skip_if_exists = kwargs.get("skip_if_exists", False)
84
+ for argument in arguments:
85
+ await self.add_content_async(
86
+ name=argument.get("name"),
87
+ description=argument.get("description"),
88
+ path=argument.get("path"),
89
+ url=argument.get("url"),
90
+ metadata=argument.get("metadata"),
91
+ topics=argument.get("topics"),
92
+ text_content=argument.get("text_content"),
93
+ reader=argument.get("reader"),
94
+ include=argument.get("include"),
95
+ exclude=argument.get("exclude"),
96
+ upsert=argument.get("upsert", upsert),
97
+ skip_if_exists=argument.get("skip_if_exists", skip_if_exists),
98
+ remote_content=argument.get("remote_content", None),
99
+ )
100
+
101
+ elif kwargs:
102
+ name = kwargs.get("name", [])
103
+ metadata = kwargs.get("metadata", {})
104
+ description = kwargs.get("description", [])
105
+ topics = kwargs.get("topics", [])
106
+ reader = kwargs.get("reader", None)
107
+ paths = kwargs.get("paths", [])
108
+ urls = kwargs.get("urls", [])
109
+ text_contents = kwargs.get("text_contents", [])
110
+ include = kwargs.get("include")
111
+ exclude = kwargs.get("exclude")
112
+ upsert = kwargs.get("upsert", True)
113
+ skip_if_exists = kwargs.get("skip_if_exists", False)
114
+ remote_content = kwargs.get("remote_content", None)
115
+ for path in paths:
116
+ await self.add_content_async(
117
+ name=name,
118
+ description=description,
119
+ path=path,
120
+ metadata=metadata,
121
+ include=include,
122
+ exclude=exclude,
123
+ upsert=upsert,
124
+ skip_if_exists=skip_if_exists,
125
+ reader=reader,
126
+ )
127
+ for url in urls:
128
+ await self.add_content_async(
129
+ name=name,
130
+ description=description,
131
+ url=url,
132
+ metadata=metadata,
133
+ include=include,
134
+ exclude=exclude,
135
+ upsert=upsert,
136
+ skip_if_exists=skip_if_exists,
137
+ reader=reader,
138
+ )
139
+ for i, text_content in enumerate(text_contents):
140
+ content_name = f"{name}_{i}" if name else f"text_content_{i}"
141
+ log_debug(f"Adding text content: {content_name}")
142
+ await self.add_content_async(
143
+ name=content_name,
144
+ description=description,
145
+ text_content=text_content,
146
+ metadata=metadata,
147
+ include=include,
148
+ exclude=exclude,
149
+ upsert=upsert,
150
+ skip_if_exists=skip_if_exists,
151
+ reader=reader,
152
+ )
153
+ if topics:
154
+ await self.add_content_async(
155
+ name=name,
156
+ description=description,
157
+ topics=topics,
158
+ metadata=metadata,
159
+ include=include,
160
+ exclude=exclude,
161
+ upsert=upsert,
162
+ skip_if_exists=skip_if_exists,
163
+ reader=reader,
164
+ )
165
+
166
+ if remote_content:
167
+ await self.add_content_async(
168
+ name=name,
169
+ metadata=metadata,
170
+ description=description,
171
+ remote_content=remote_content,
172
+ upsert=upsert,
173
+ skip_if_exists=skip_if_exists,
174
+ reader=reader,
175
+ )
176
+
177
+ else:
178
+ raise ValueError("Invalid usage of add_contents.")
179
+
180
+ @overload
181
+ def add_contents(self, contents: List[ContentDict]) -> None: ...
182
+
183
+ @overload
184
+ def add_contents(
185
+ self,
186
+ *,
187
+ paths: Optional[List[str]] = None,
188
+ urls: Optional[List[str]] = None,
189
+ metadata: Optional[Dict[str, str]] = None,
190
+ topics: Optional[List[str]] = None,
191
+ text_contents: Optional[List[str]] = None,
192
+ reader: Optional[Reader] = None,
193
+ include: Optional[List[str]] = None,
194
+ exclude: Optional[List[str]] = None,
195
+ upsert: bool = True,
196
+ skip_if_exists: bool = False,
197
+ remote_content: Optional[RemoteContent] = None,
198
+ ) -> None: ...
199
+
200
+ def add_contents(self, *args, **kwargs) -> None:
201
+ """
202
+ Synchronously add multiple content items to the knowledge base.
203
+
204
+ This method wraps the asynchronous add_contents method
205
+
206
+ Supports two usage patterns:
207
+ 1. Pass a list of content dictionaries as first argument
208
+ 2. Pass keyword arguments with paths, urls, metadata, etc.
209
+
210
+ Args:
211
+ contents: List of content dictionaries (when used as first overload)
212
+ paths: Optional list of file paths to load content from
213
+ urls: Optional list of URLs to load content from
214
+ metadata: Optional metadata dictionary to apply to all content
215
+ topics: Optional list of topics to add
216
+ text_contents: Optional list of text content strings to add
217
+ reader: Optional reader to use for processing content
218
+ include: Optional list of file patterns to include
219
+ exclude: Optional list of file patterns to exclude
220
+ upsert: Whether to update existing content if it already exists
221
+ skip_if_exists: Whether to skip adding content if it already exists
222
+ remote_content: Optional remote content (S3, GCS, etc.) to add
223
+ """
224
+ asyncio.run(self.add_contents_async(*args, **kwargs))
225
+
226
+ # --- Add Content ---
227
+
228
+ @overload
229
+ async def add_content_async(
230
+ self,
231
+ *,
232
+ path: Optional[str] = None,
233
+ url: Optional[str] = None,
234
+ text_content: Optional[str] = None,
235
+ metadata: Optional[Dict[str, str]] = None,
236
+ include: Optional[List[str]] = None,
237
+ exclude: Optional[List[str]] = None,
238
+ upsert: bool = True,
239
+ skip_if_exists: bool = False,
240
+ reader: Optional[Reader] = None,
241
+ auth: Optional[ContentAuth] = None,
242
+ ) -> None: ...
243
+
244
+ @overload
245
+ async def add_content_async(self, *args, **kwargs) -> None: ...
246
+
247
+ async def add_content_async(
248
+ self,
249
+ name: Optional[str] = None,
250
+ description: Optional[str] = None,
251
+ path: Optional[str] = None,
252
+ url: Optional[str] = None,
253
+ text_content: Optional[str] = None,
254
+ metadata: Optional[Dict[str, Any]] = None,
255
+ topics: Optional[List[str]] = None,
256
+ remote_content: Optional[RemoteContent] = None,
257
+ reader: Optional[Reader] = None,
258
+ include: Optional[List[str]] = None,
259
+ exclude: Optional[List[str]] = None,
260
+ upsert: bool = True,
261
+ skip_if_exists: bool = True,
262
+ auth: Optional[ContentAuth] = None,
263
+ ) -> None:
264
+ # Validation: At least one of the parameters must be provided
265
+ if all(argument is None for argument in [path, url, text_content, topics, remote_content]):
266
+ log_warning(
267
+ "At least one of 'path', 'url', 'text_content', 'topics', or 'remote_content' must be provided."
268
+ )
269
+ return
270
+
271
+ if not skip_if_exists:
272
+ log_info("skip_if_exists is disabled, disabling upsert")
273
+ upsert = False
274
+
275
+ content = None
276
+ file_data = None
277
+ if text_content:
278
+ file_data = FileData(content=text_content, type="Text")
279
+
280
+ content = Content(
281
+ name=name,
282
+ description=description,
283
+ path=path,
284
+ url=url,
285
+ file_data=file_data if file_data else None,
286
+ metadata=metadata,
287
+ topics=topics,
288
+ remote_content=remote_content,
289
+ reader=reader,
290
+ auth=auth,
291
+ )
292
+ content.content_hash = self._build_content_hash(content)
293
+ content.id = generate_id(content.content_hash)
294
+
295
+ await self._load_content(content, upsert, skip_if_exists, include, exclude)
296
+
297
+ @overload
298
+ def add_content(
299
+ self,
300
+ *,
301
+ path: Optional[str] = None,
302
+ url: Optional[str] = None,
303
+ text_content: Optional[str] = None,
304
+ metadata: Optional[Dict[str, str]] = None,
305
+ include: Optional[List[str]] = None,
306
+ exclude: Optional[List[str]] = None,
307
+ upsert: bool = True,
308
+ skip_if_exists: bool = False,
309
+ reader: Optional[Reader] = None,
310
+ auth: Optional[ContentAuth] = None,
311
+ ) -> None: ...
312
+
313
+ @overload
314
+ def add_content(self, *args, **kwargs) -> None: ...
315
+
316
+ def add_content(
317
+ self,
318
+ name: Optional[str] = None,
319
+ description: Optional[str] = None,
320
+ path: Optional[str] = None,
321
+ url: Optional[str] = None,
322
+ text_content: Optional[str] = None,
323
+ metadata: Optional[Dict[str, Any]] = None,
324
+ topics: Optional[List[str]] = None,
325
+ remote_content: Optional[RemoteContent] = None,
326
+ reader: Optional[Reader] = None,
327
+ include: Optional[List[str]] = None,
328
+ exclude: Optional[List[str]] = None,
329
+ upsert: bool = True,
330
+ skip_if_exists: bool = False,
331
+ auth: Optional[ContentAuth] = None,
332
+ ) -> None:
333
+ """
334
+ Synchronously add content to the knowledge base.
335
+
336
+ Args:
337
+ name: Optional name for the content
338
+ description: Optional description for the content
339
+ path: Optional file path to load content from
340
+ url: Optional URL to load content from
341
+ text_content: Optional text content to add directly
342
+ metadata: Optional metadata dictionary
343
+ topics: Optional list of topics
344
+ remote_content: Optional cloud storage configuration
345
+ reader: Optional custom reader for processing the content
346
+ include: Optional list of file patterns to include
347
+ exclude: Optional list of file patterns to exclude
348
+ upsert: Whether to update existing content if it already exists
349
+ skip_if_exists: Whether to skip adding content if it already exists
350
+ """
351
+ asyncio.run(
352
+ self.add_content_async(
353
+ name=name,
354
+ description=description,
355
+ path=path,
356
+ url=url,
357
+ text_content=text_content,
358
+ metadata=metadata,
359
+ topics=topics,
360
+ remote_content=remote_content,
361
+ reader=reader,
362
+ include=include,
363
+ exclude=exclude,
364
+ upsert=upsert,
365
+ skip_if_exists=skip_if_exists,
366
+ auth=auth,
367
+ )
368
+ )
369
+
370
+ def _should_skip(self, content_hash: str, skip_if_exists: bool) -> bool:
371
+ """
372
+ Handle the skip_if_exists logic for content that already exists in the vector database.
373
+
374
+ Args:
375
+ content_hash: The content hash string to check for existence
376
+ skip_if_exists: Whether to skip if content already exists
377
+
378
+ Returns:
379
+ bool: True if should skip processing, False if should continue
380
+ """
381
+ from agno.vectordb import VectorDb
382
+
383
+ self.vector_db = cast(VectorDb, self.vector_db)
384
+ if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
385
+ log_debug(f"Content already exists: {content_hash}, skipping...")
386
+ return True
387
+
388
+ return False
389
+
390
+ async def _load_from_path(
391
+ self,
392
+ content: Content,
393
+ upsert: bool,
394
+ skip_if_exists: bool,
395
+ include: Optional[List[str]] = None,
396
+ exclude: Optional[List[str]] = None,
397
+ ):
398
+ from agno.vectordb import VectorDb
399
+
400
+ self.vector_db = cast(VectorDb, self.vector_db)
401
+
402
+ log_info(f"Adding content from path, {content.id}, {content.name}, {content.path}, {content.description}")
403
+ path = Path(content.path) # type: ignore
404
+
405
+ if path.is_file():
406
+ if self._should_include_file(str(path), include, exclude):
407
+ log_debug(f"Adding file {path} due to include/exclude filters")
408
+
409
+ await self._add_to_contents_db(content)
410
+ if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
411
+ content.status = ContentStatus.COMPLETED
412
+ await self._aupdate_content(content)
413
+ return
414
+
415
+ # Handle LightRAG special case - read file and upload directly
416
+ if self.vector_db.__class__.__name__ == "LightRag":
417
+ await self._process_lightrag_content(content, KnowledgeContentOrigin.PATH)
418
+ return
419
+
420
+ if content.reader:
421
+ # TODO: We will refactor this to eventually pass authorization to all readers
422
+ import inspect
423
+
424
+ read_signature = inspect.signature(content.reader.read)
425
+ if "password" in read_signature.parameters and content.auth and content.auth.password:
426
+ read_documents = content.reader.read(
427
+ path, name=content.name or path.name, password=content.auth.password
428
+ )
429
+ else:
430
+ read_documents = content.reader.read(path, name=content.name or path.name)
431
+
432
+ else:
433
+ reader = ReaderFactory.get_reader_for_extension(path.suffix)
434
+ log_info(f"Using Reader: {reader.__class__.__name__}")
435
+ if reader:
436
+ # TODO: We will refactor this to eventually pass authorization to all readers
437
+ import inspect
438
+
439
+ read_signature = inspect.signature(reader.read)
440
+ if "password" in read_signature.parameters and content.auth and content.auth.password:
441
+ read_documents = reader.read(
442
+ path, name=content.name or path.name, password=content.auth.password
443
+ )
444
+ else:
445
+ read_documents = reader.read(path, name=content.name or path.name)
446
+
447
+ if not content.file_type:
448
+ content.file_type = path.suffix
449
+
450
+ if not content.size and content.file_data:
451
+ content.size = len(content.file_data.content) # type: ignore
452
+ if not content.size:
453
+ try:
454
+ content.size = path.stat().st_size
455
+ except (OSError, IOError) as e:
456
+ log_warning(f"Could not get file size for {path}: {e}")
457
+ content.size = 0
458
+
459
+ for read_document in read_documents:
460
+ read_document.content_id = content.id
461
+
462
+ await self._handle_vector_db_insert(content, read_documents, upsert)
463
+
464
+ elif path.is_dir():
465
+ for file_path in path.iterdir():
466
+ # Apply include/exclude filtering
467
+ if not self._should_include_file(str(file_path), include, exclude):
468
+ log_debug(f"Skipping file {file_path} due to include/exclude filters")
469
+ continue
470
+
471
+ file_content = Content(
472
+ name=content.name,
473
+ path=str(file_path),
474
+ metadata=content.metadata,
475
+ description=content.description,
476
+ reader=content.reader,
477
+ )
478
+ file_content.content_hash = self._build_content_hash(file_content)
479
+ file_content.id = generate_id(file_content.content_hash)
480
+
481
+ await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
482
+ else:
483
+ log_warning(f"Invalid path: {path}")
484
+
485
+ async def _load_from_url(
486
+ self,
487
+ content: Content,
488
+ upsert: bool,
489
+ skip_if_exists: bool,
490
+ ):
491
+ """Load the content in the contextual URL
492
+
493
+ 1. Set content hash
494
+ 2. Validate the URL
495
+ 3. Read the content
496
+ 4. Prepare and insert the content in the vector database
497
+ """
498
+ from agno.vectordb import VectorDb
499
+
500
+ self.vector_db = cast(VectorDb, self.vector_db)
501
+
502
+ log_info(f"Adding content from URL {content.url}")
503
+ content.file_type = "url"
504
+
505
+ if not content.url:
506
+ raise ValueError("No url provided")
507
+
508
+ # 1. Add content to contents database
509
+ await self._add_to_contents_db(content)
510
+ if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
511
+ content.status = ContentStatus.COMPLETED
512
+ await self._aupdate_content(content)
513
+ return
514
+
515
+ if self.vector_db.__class__.__name__ == "LightRag":
516
+ await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
517
+ return
518
+
519
+ # 2. Validate URL
520
+ try:
521
+ from urllib.parse import urlparse
522
+
523
+ parsed_url = urlparse(content.url)
524
+ if not all([parsed_url.scheme, parsed_url.netloc]):
525
+ content.status = ContentStatus.FAILED
526
+ content.status_message = f"Invalid URL format: {content.url}"
527
+ await self._aupdate_content(content)
528
+ log_warning(f"Invalid URL format: {content.url}")
529
+ except Exception as e:
530
+ content.status = ContentStatus.FAILED
531
+ content.status_message = f"Invalid URL: {content.url} - {str(e)}"
532
+ await self._aupdate_content(content)
533
+ log_warning(f"Invalid URL: {content.url} - {str(e)}")
534
+
535
+ # 3. Fetch and load content if file has an extension
536
+ url_path = Path(parsed_url.path)
537
+ file_extension = url_path.suffix.lower()
538
+
539
+ bytes_content = None
540
+ if file_extension:
541
+ async with AsyncClient() as client:
542
+ response = await async_fetch_with_retry(content.url, client=client)
543
+ bytes_content = BytesIO(response.content)
544
+
545
+ # 4. Select reader
546
+ # If a reader was provided by the user, use it
547
+ reader = content.reader
548
+ name = content.name if content.name else content.url
549
+ # Else select based on file extension
550
+ if reader is None:
551
+ if file_extension == ".csv":
552
+ name = basename(parsed_url.path) or "data.csv"
553
+ reader = self.csv_reader
554
+ elif file_extension == ".pdf":
555
+ reader = self.pdf_reader
556
+ elif file_extension == ".docx":
557
+ reader = self.docx_reader
558
+ elif file_extension == ".pptx":
559
+ reader = self.pptx_reader
560
+ elif file_extension == ".json":
561
+ reader = self.json_reader
562
+ elif file_extension == ".markdown":
563
+ reader = self.markdown_reader
564
+ else:
565
+ reader = self.text_reader
566
+
567
+ # 5. Read content
568
+ try:
569
+ read_documents = []
570
+ if reader is not None:
571
+ # TODO: We will refactor this to eventually pass authorization to all readers
572
+ import inspect
573
+
574
+ read_signature = inspect.signature(reader.read)
575
+ if reader.__class__.__name__ == "YouTubeReader":
576
+ read_documents = reader.read(content.url, name=name)
577
+ elif "password" in read_signature.parameters and content.auth and content.auth.password:
578
+ if bytes_content:
579
+ read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
580
+ else:
581
+ read_documents = reader.read(content.url, name=name, password=content.auth.password)
582
+ else:
583
+ if bytes_content:
584
+ read_documents = reader.read(bytes_content, name=name)
585
+ else:
586
+ read_documents = reader.read(content.url, name=name)
587
+
588
+ except Exception as e:
589
+ log_error(f"Error reading URL: {content.url} - {str(e)}")
590
+ content.status = ContentStatus.FAILED
591
+ content.status_message = f"Error reading URL: {content.url} - {str(e)}"
592
+ await self._aupdate_content(content)
593
+ return
594
+
595
+ # 6. Chunk documents if needed
596
+ if reader and not reader.chunk:
597
+ read_documents = await reader.chunk_documents_async(read_documents)
598
+ # 7. Prepare and insert the content in the vector database
599
+ file_size = 0
600
+ if read_documents:
601
+ for read_document in read_documents:
602
+ if read_document.size:
603
+ file_size += read_document.size
604
+ read_document.content_id = content.id
605
+ await self._handle_vector_db_insert(content, read_documents, upsert)
606
+
607
+ async def _load_from_content(
608
+ self,
609
+ content: Content,
610
+ upsert: bool = True,
611
+ skip_if_exists: bool = False,
612
+ ):
613
+ from agno.vectordb import VectorDb
614
+
615
+ self.vector_db = cast(VectorDb, self.vector_db)
616
+
617
+ if content.name:
618
+ name = content.name
619
+ elif content.file_data and content.file_data.content:
620
+ if isinstance(content.file_data.content, bytes):
621
+ name = content.file_data.content[:10].decode("utf-8", errors="ignore")
622
+ elif isinstance(content.file_data.content, str):
623
+ name = (
624
+ content.file_data.content[:10]
625
+ if len(content.file_data.content) >= 10
626
+ else content.file_data.content
627
+ )
628
+ else:
629
+ name = str(content.file_data.content)[:10]
630
+ else:
631
+ name = None
632
+
633
+ if name is not None:
634
+ content.name = name
635
+
636
+ log_info(f"Adding content from {content.name}")
637
+
638
+ await self._add_to_contents_db(content)
639
+ if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
640
+ content.status = ContentStatus.COMPLETED
641
+ await self._aupdate_content(content)
642
+ return
643
+
644
+ if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
645
+ await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
646
+ return
647
+
648
+ read_documents = []
649
+
650
+ if isinstance(content.file_data, str):
651
+ content_bytes = content.file_data.encode("utf-8", errors="replace")
652
+ content_io = io.BytesIO(content_bytes)
653
+
654
+ if content.reader:
655
+ log_info(f"Using reader: {content.reader.__class__.__name__} to read content")
656
+ read_documents = content.reader.read(content_io, name=name)
657
+ else:
658
+ text_reader = self.text_reader
659
+ if text_reader:
660
+ read_documents = text_reader.read(content_io, name=name)
661
+ else:
662
+ content.status = ContentStatus.FAILED
663
+ content.status_message = "Text reader not available"
664
+ await self._aupdate_content(content)
665
+ return
666
+
667
+ elif isinstance(content.file_data, FileData):
668
+ if content.file_data.type:
669
+ if isinstance(content.file_data.content, bytes):
670
+ content_io = io.BytesIO(content.file_data.content)
671
+ elif isinstance(content.file_data.content, str):
672
+ content_bytes = content.file_data.content.encode("utf-8", errors="replace")
673
+ content_io = io.BytesIO(content_bytes)
674
+ else:
675
+ content_io = content.file_data.content # type: ignore
676
+
677
+ # Respect an explicitly provided reader; otherwise select based on file type
678
+ if content.reader:
679
+ log_info(f"Using reader: {content.reader.__class__.__name__} to read content")
680
+ reader = content.reader
681
+ else:
682
+ reader = self._select_reader(content.file_data.type)
683
+ name = content.name if content.name else f"content_{content.file_data.type}"
684
+ read_documents = reader.read(content_io, name=name)
685
+ for read_document in read_documents:
686
+ if content.metadata:
687
+ read_document.meta_data.update(content.metadata)
688
+ read_document.content_id = content.id
689
+
690
+ if len(read_documents) == 0:
691
+ content.status = ContentStatus.FAILED
692
+ content.status_message = "Content could not be read"
693
+ await self._aupdate_content(content)
694
+ return
695
+
696
+ else:
697
+ content.status = ContentStatus.FAILED
698
+ content.status_message = "No content provided"
699
+ await self._aupdate_content(content)
700
+ return
701
+
702
+ await self._handle_vector_db_insert(content, read_documents, upsert)
703
+
704
+ async def _load_from_topics(
705
+ self,
706
+ content: Content,
707
+ upsert: bool,
708
+ skip_if_exists: bool,
709
+ ):
710
+ from agno.vectordb import VectorDb
711
+
712
+ self.vector_db = cast(VectorDb, self.vector_db)
713
+ log_info(f"Adding content from topics: {content.topics}")
714
+
715
+ if content.topics is None:
716
+ log_warning("No topics provided for content")
717
+ return
718
+
719
+ for topic in content.topics:
720
+ content = Content(
721
+ name=topic,
722
+ metadata=content.metadata,
723
+ reader=content.reader,
724
+ status=ContentStatus.PROCESSING if content.reader else ContentStatus.FAILED,
725
+ file_data=FileData(
726
+ type="Topic",
727
+ ),
728
+ topics=[topic],
729
+ )
730
+ content.content_hash = self._build_content_hash(content)
731
+ content.id = generate_id(content.content_hash)
732
+
733
+ await self._add_to_contents_db(content)
734
+ if self._should_skip(content.content_hash, skip_if_exists):
735
+ content.status = ContentStatus.COMPLETED
736
+ await self._aupdate_content(content)
737
+ return
738
+
739
+ if self.vector_db.__class__.__name__ == "LightRag":
740
+ await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
741
+ return
742
+
743
+ if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
744
+ log_info(f"Content {content.content_hash} already exists, skipping")
745
+ continue
746
+
747
+ await self._add_to_contents_db(content)
748
+ if content.reader is None:
749
+ log_error(f"No reader available for topic: {topic}")
750
+ content.status = ContentStatus.FAILED
751
+ content.status_message = "No reader available for topic"
752
+ await self._aupdate_content(content)
753
+ continue
754
+
755
+ read_documents = content.reader.read(topic)
756
+ if len(read_documents) > 0:
757
+ for read_document in read_documents:
758
+ read_document.content_id = content.id
759
+ if read_document.content:
760
+ read_document.size = len(read_document.content.encode("utf-8"))
761
+ else:
762
+ content.status = ContentStatus.FAILED
763
+ content.status_message = "No content found for topic"
764
+ await self._aupdate_content(content)
765
+
766
+ await self._handle_vector_db_insert(content, read_documents, upsert)
767
+
768
+ async def _load_from_remote_content(
769
+ self,
770
+ content: Content,
771
+ upsert: bool,
772
+ skip_if_exists: bool,
773
+ ):
774
+ if content.remote_content is None:
775
+ log_warning("No remote content provided for content")
776
+ return
777
+
778
+ remote_content = content.remote_content
779
+
780
+ if isinstance(remote_content, S3Content):
781
+ await self._load_from_s3(content, upsert, skip_if_exists)
782
+
783
+ elif isinstance(remote_content, GCSContent):
784
+ await self._load_from_gcs(content, upsert, skip_if_exists)
785
+
786
+ else:
787
+ log_warning(f"Unsupported remote content type: {type(remote_content)}")
788
+
789
+ async def _load_from_s3(self, content: Content, upsert: bool, skip_if_exists: bool):
790
+ """Load the contextual S3 content.
791
+
792
+ 1. Identify objects to read
793
+ 2. Setup Content object
794
+ 3. Hash content and add it to the contents database
795
+ 4. Select reader
796
+ 5. Fetch and load the content
797
+ 6. Read the content
798
+ 7. Prepare and insert the content in the vector database
799
+ 8. Remove temporary file if needed
800
+ """
801
+ from agno.cloud.aws.s3.object import S3Object
802
+
803
+ remote_content: S3Content = cast(S3Content, content.remote_content)
804
+
805
+ # 1. Identify objects to read
806
+ objects_to_read: List[S3Object] = []
807
+ if remote_content.bucket is not None:
808
+ if remote_content.key is not None:
809
+ _object = S3Object(bucket_name=remote_content.bucket.name, name=remote_content.key)
810
+ objects_to_read.append(_object)
811
+ elif remote_content.object is not None:
812
+ objects_to_read.append(remote_content.object)
813
+ elif remote_content.prefix is not None:
814
+ objects_to_read.extend(remote_content.bucket.get_objects(prefix=remote_content.prefix))
815
+ else:
816
+ objects_to_read.extend(remote_content.bucket.get_objects())
817
+
818
+ for s3_object in objects_to_read:
819
+ # 2. Setup Content object
820
+ content_name = content.name or ""
821
+ content_name += "_" + (s3_object.name or "")
822
+ content_entry = Content(
823
+ name=content_name,
824
+ description=content.description,
825
+ status=ContentStatus.PROCESSING,
826
+ metadata=content.metadata,
827
+ file_type="s3",
828
+ )
829
+
830
+ # 3. Hash content and add it to the contents database
831
+ content_entry.content_hash = self._build_content_hash(content_entry)
832
+ content_entry.id = generate_id(content_entry.content_hash)
833
+ await self._add_to_contents_db(content_entry)
834
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
835
+ content_entry.status = ContentStatus.COMPLETED
836
+ await self._aupdate_content(content_entry)
837
+ return
838
+
839
+ # 4. Select reader
840
+ reader = content.reader
841
+ if reader is None:
842
+ if s3_object.uri.endswith(".pdf"):
843
+ reader = self.pdf_reader
844
+ elif s3_object.uri.endswith(".csv"):
845
+ reader = self.csv_reader
846
+ elif s3_object.uri.endswith(".docx"):
847
+ reader = self.docx_reader
848
+ elif s3_object.uri.endswith(".pptx"):
849
+ reader = self.pptx_reader
850
+ elif s3_object.uri.endswith(".json"):
851
+ reader = self.json_reader
852
+ elif s3_object.uri.endswith(".markdown"):
853
+ reader = self.markdown_reader
854
+ else:
855
+ reader = self.text_reader
856
+ reader = cast(Reader, reader)
857
+
858
+ # 5. Fetch and load the content
859
+ temporary_file = None
860
+ obj_name = content_name or s3_object.name.split("/")[-1]
861
+ readable_content: Optional[Union[BytesIO, Path]] = None
862
+ if s3_object.uri.endswith(".pdf"):
863
+ readable_content = BytesIO(s3_object.get_resource().get()["Body"].read())
864
+ else:
865
+ temporary_file = Path("storage").joinpath(obj_name)
866
+ readable_content = temporary_file
867
+ s3_object.download(readable_content) # type: ignore
868
+
869
+ # 6. Read the content
870
+ read_documents = reader.read(readable_content, name=obj_name)
871
+
872
+ # 7. Prepare and insert the content in the vector database
873
+ for read_document in read_documents:
874
+ read_document.content_id = content.id
875
+ await self._handle_vector_db_insert(content_entry, read_documents, upsert)
876
+
877
+ # 8. Remove temporary file if needed
878
+ if temporary_file:
879
+ temporary_file.unlink()
880
+
881
+ async def _load_from_gcs(self, content: Content, upsert: bool, skip_if_exists: bool):
882
+ """Load the contextual GCS content.
883
+
884
+ 1. Identify objects to read
885
+ 2. Setup Content object
886
+ 3. Hash content and add it to the contents database
887
+ 4. Select reader
888
+ 5. Fetch and load the content
889
+ 6. Read the content
890
+ 7. Prepare and insert the content in the vector database
891
+ """
892
+ remote_content: GCSContent = cast(GCSContent, content.remote_content)
893
+
894
+ # 1. Identify objects to read
895
+ objects_to_read = []
896
+ if remote_content.blob_name is not None:
897
+ objects_to_read.append(remote_content.bucket.blob(remote_content.blob_name)) # type: ignore
898
+ elif remote_content.prefix is not None:
899
+ objects_to_read.extend(remote_content.bucket.list_blobs(prefix=remote_content.prefix)) # type: ignore
900
+ else:
901
+ objects_to_read.extend(remote_content.bucket.list_blobs()) # type: ignore
902
+
903
+ for gcs_object in objects_to_read:
904
+ # 2. Setup Content object
905
+ name = (content.name or "content") + "_" + gcs_object.name
906
+ content_entry = Content(
907
+ name=name,
908
+ description=content.description,
909
+ status=ContentStatus.PROCESSING,
910
+ metadata=content.metadata,
911
+ file_type="gcs",
912
+ )
913
+
914
+ # 3. Hash content and add it to the contents database
915
+ content_entry.content_hash = self._build_content_hash(content_entry)
916
+ content_entry.id = generate_id(content_entry.content_hash)
917
+ await self._add_to_contents_db(content_entry)
918
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
919
+ content_entry.status = ContentStatus.COMPLETED
920
+ await self._aupdate_content(content_entry)
921
+ return
922
+
923
+ # 4. Select reader
924
+ reader = content.reader
925
+ if reader is None:
926
+ if gcs_object.name.endswith(".pdf"):
927
+ reader = self.pdf_reader
928
+ elif gcs_object.name.endswith(".csv"):
929
+ reader = self.csv_reader
930
+ elif gcs_object.name.endswith(".docx"):
931
+ reader = self.docx_reader
932
+ elif gcs_object.name.endswith(".pptx"):
933
+ reader = self.pptx_reader
934
+ elif gcs_object.name.endswith(".json"):
935
+ reader = self.json_reader
936
+ elif gcs_object.name.endswith(".markdown"):
937
+ reader = self.markdown_reader
938
+ else:
939
+ reader = self.text_reader
940
+ reader = cast(Reader, reader)
941
+
942
+ # 5. Fetch and load the content
943
+ readable_content = BytesIO(gcs_object.download_as_bytes())
944
+
945
+ # 6. Read the content
946
+ read_documents = reader.read(readable_content, name=name)
947
+
948
+ # 7. Prepare and insert the content in the vector database
949
+ for read_document in read_documents:
950
+ read_document.content_id = content.id
951
+ await self._handle_vector_db_insert(content_entry, read_documents, upsert)
952
+
953
+ async def _handle_vector_db_insert(self, content: Content, read_documents, upsert):
954
+ from agno.vectordb import VectorDb
955
+
956
+ self.vector_db = cast(VectorDb, self.vector_db)
957
+
958
+ if not self.vector_db:
959
+ log_error("No vector database configured")
960
+ content.status = ContentStatus.FAILED
961
+ content.status_message = "No vector database configured"
962
+ await self._aupdate_content(content)
963
+ return
964
+
965
+ if self.vector_db.upsert_available() and upsert:
966
+ try:
967
+ await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata) # type: ignore[arg-type]
968
+ except Exception as e:
969
+ log_error(f"Error upserting document: {e}")
970
+ content.status = ContentStatus.FAILED
971
+ content.status_message = "Could not upsert embedding"
972
+ await self._aupdate_content(content)
973
+ return
974
+ else:
975
+ try:
976
+ await self.vector_db.async_insert(
977
+ content.content_hash, # type: ignore[arg-type]
978
+ documents=read_documents,
979
+ filters=content.metadata, # type: ignore[arg-type]
980
+ )
981
+ except Exception as e:
982
+ log_error(f"Error inserting document: {e}")
983
+ content.status = ContentStatus.FAILED
984
+ content.status_message = "Could not insert embedding"
985
+ await self._aupdate_content(content)
986
+ return
987
+
988
+ content.status = ContentStatus.COMPLETED
989
+ await self._aupdate_content(content)
990
+
991
+ async def _load_content(
992
+ self,
993
+ content: Content,
994
+ upsert: bool,
995
+ skip_if_exists: bool,
996
+ include: Optional[List[str]] = None,
997
+ exclude: Optional[List[str]] = None,
998
+ ) -> None:
999
+ log_info(f"Loading content: {content.id}")
1000
+
1001
+ if content.metadata:
1002
+ self.add_filters(content.metadata)
1003
+
1004
+ if content.path:
1005
+ await self._load_from_path(content, upsert, skip_if_exists, include, exclude)
1006
+
1007
+ if content.url:
1008
+ await self._load_from_url(content, upsert, skip_if_exists)
1009
+
1010
+ if content.file_data:
1011
+ await self._load_from_content(content, upsert, skip_if_exists)
1012
+
1013
+ if content.topics:
1014
+ await self._load_from_topics(content, upsert, skip_if_exists)
1015
+
1016
+ if content.remote_content:
1017
+ await self._load_from_remote_content(content, upsert, skip_if_exists)
1018
+
1019
+ def _build_content_hash(self, content: Content) -> str:
1020
+ """
1021
+ Build the content hash from the content.
1022
+ """
1023
+ if content.path:
1024
+ return hashlib.sha256(str(content.path).encode()).hexdigest()
1025
+ elif content.url:
1026
+ hash = hashlib.sha256(content.url.encode()).hexdigest()
1027
+ return hash
1028
+ elif content.file_data and content.file_data.content:
1029
+ name = content.name or "content"
1030
+ return hashlib.sha256(name.encode()).hexdigest()
1031
+ elif content.topics and len(content.topics) > 0:
1032
+ topic = content.topics[0]
1033
+ reader = type(content.reader).__name__ if content.reader else "unknown"
1034
+ return hashlib.sha256(f"{topic}-{reader}".encode()).hexdigest()
1035
+ else:
1036
+ # Fallback for edge cases
1037
+ import random
1038
+ import string
1039
+
1040
+ fallback = (
1041
+ content.name
1042
+ or content.id
1043
+ or ("unknown_content" + "".join(random.choices(string.ascii_lowercase + string.digits, k=6)))
1044
+ )
1045
+ return hashlib.sha256(fallback.encode()).hexdigest()
1046
+
1047
+ def _ensure_string_field(self, value: Any, field_name: str, default: str = "") -> str:
1048
+ """
1049
+ Safely ensure a field is a string, handling various edge cases.
1050
+
1051
+ Args:
1052
+ value: The value to convert to string
1053
+ field_name: Name of the field for logging purposes
1054
+ default: Default string value if conversion fails
1055
+
1056
+ Returns:
1057
+ str: A safe string value
1058
+ """
1059
+ # Handle None/falsy values
1060
+ if value is None or value == "":
1061
+ return default
1062
+
1063
+ # Handle unexpected list types (the root cause of our Pydantic warning)
1064
+ if isinstance(value, list):
1065
+ if len(value) == 0:
1066
+ log_debug(f"Empty list found for {field_name}, using default: '{default}'")
1067
+ return default
1068
+ elif len(value) == 1:
1069
+ # Single item list, extract the item
1070
+ log_debug(f"Single-item list found for {field_name}, extracting: '{value[0]}'")
1071
+ return str(value[0]) if value[0] is not None else default
1072
+ else:
1073
+ # Multiple items, join them
1074
+ log_debug(f"Multi-item list found for {field_name}, joining: {value}")
1075
+ return " | ".join(str(item) for item in value if item is not None)
1076
+
1077
+ # Handle other unexpected types
1078
+ if not isinstance(value, str):
1079
+ log_debug(f"Non-string type {type(value)} found for {field_name}, converting: '{value}'")
1080
+ try:
1081
+ return str(value)
1082
+ except Exception as e:
1083
+ log_warning(f"Failed to convert {field_name} to string: {e}, using default")
1084
+ return default
1085
+
1086
+ # Already a string, return as-is
1087
+ return value
1088
+
1089
+ async def _add_to_contents_db(self, content: Content):
1090
+ if self.contents_db:
1091
+ created_at = content.created_at if content.created_at else int(time.time())
1092
+ updated_at = content.updated_at if content.updated_at else int(time.time())
1093
+
1094
+ file_type = (
1095
+ content.file_type
1096
+ if content.file_type
1097
+ else content.file_data.type
1098
+ if content.file_data and content.file_data.type
1099
+ else None
1100
+ )
1101
+ # Safely handle string fields with proper type checking
1102
+ safe_name = self._ensure_string_field(content.name, "content.name", default="")
1103
+ safe_description = self._ensure_string_field(content.description, "content.description", default="")
1104
+ safe_linked_to = self._ensure_string_field(self.name, "knowledge.name", default="")
1105
+ safe_status_message = self._ensure_string_field(
1106
+ content.status_message, "content.status_message", default=""
1107
+ )
1108
+
1109
+ content_row = KnowledgeRow(
1110
+ id=content.id,
1111
+ name=safe_name,
1112
+ description=safe_description,
1113
+ metadata=content.metadata,
1114
+ type=file_type,
1115
+ size=content.size
1116
+ if content.size
1117
+ else len(content.file_data.content)
1118
+ if content.file_data and content.file_data.content
1119
+ else None,
1120
+ linked_to=safe_linked_to,
1121
+ access_count=0,
1122
+ status=content.status if content.status else ContentStatus.PROCESSING,
1123
+ status_message=safe_status_message,
1124
+ created_at=created_at,
1125
+ updated_at=updated_at,
1126
+ )
1127
+ if isinstance(self.contents_db, AsyncBaseDb):
1128
+ await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1129
+ else:
1130
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1131
+
1132
+ def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
1133
+ from agno.vectordb import VectorDb
1134
+
1135
+ self.vector_db = cast(VectorDb, self.vector_db)
1136
+ if self.contents_db:
1137
+ if isinstance(self.contents_db, AsyncBaseDb):
1138
+ raise ValueError(
1139
+ "update_content() is not supported with an async DB. Please use aupdate_content() instead."
1140
+ )
1141
+
1142
+ if not content.id:
1143
+ log_warning("Content id is required to update Knowledge content")
1144
+ return None
1145
+
1146
+ # TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
1147
+ content_row = self.contents_db.get_knowledge_content(content.id)
1148
+ if content_row is None:
1149
+ log_warning(f"Content row not found for id: {content.id}, cannot update status")
1150
+ return None
1151
+
1152
+ # Apply safe string handling for updates as well
1153
+ if content.name is not None:
1154
+ content_row.name = self._ensure_string_field(content.name, "content.name", default="")
1155
+ if content.description is not None:
1156
+ content_row.description = self._ensure_string_field(
1157
+ content.description, "content.description", default=""
1158
+ )
1159
+ if content.metadata is not None:
1160
+ content_row.metadata = content.metadata
1161
+ if content.status is not None:
1162
+ content_row.status = content.status
1163
+ if content.status_message is not None:
1164
+ content_row.status_message = self._ensure_string_field(
1165
+ content.status_message, "content.status_message", default=""
1166
+ )
1167
+ if content.external_id is not None:
1168
+ content_row.external_id = self._ensure_string_field(
1169
+ content.external_id, "content.external_id", default=""
1170
+ )
1171
+ content_row.updated_at = int(time.time())
1172
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1173
+
1174
+ if self.vector_db and content.metadata:
1175
+ self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
1176
+
1177
+ if content.metadata:
1178
+ self.add_filters(content.metadata)
1179
+
1180
+ return content_row.to_dict()
1181
+
1182
+ else:
1183
+ if self.name:
1184
+ log_warning(f"Contents DB not found for knowledge base: {self.name}")
1185
+ else:
1186
+ log_warning("Contents DB not found for knowledge base")
1187
+ return None
1188
+
1189
+ async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
1190
+ if self.contents_db:
1191
+ if not content.id:
1192
+ log_warning("Content id is required to update Knowledge content")
1193
+ return None
1194
+
1195
+ # TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
1196
+ if isinstance(self.contents_db, AsyncBaseDb):
1197
+ content_row = await self.contents_db.get_knowledge_content(content.id)
1198
+ else:
1199
+ content_row = self.contents_db.get_knowledge_content(content.id)
1200
+ if content_row is None:
1201
+ log_warning(f"Content row not found for id: {content.id}, cannot update status")
1202
+ return None
1203
+
1204
+ if content.name is not None:
1205
+ content_row.name = content.name
1206
+ if content.description is not None:
1207
+ content_row.description = content.description
1208
+ if content.metadata is not None:
1209
+ content_row.metadata = content.metadata
1210
+ if content.status is not None:
1211
+ content_row.status = content.status
1212
+ if content.status_message is not None:
1213
+ content_row.status_message = content.status_message if content.status_message else ""
1214
+ if content.external_id is not None:
1215
+ content_row.external_id = content.external_id
1216
+
1217
+ content_row.updated_at = int(time.time())
1218
+ if isinstance(self.contents_db, AsyncBaseDb):
1219
+ await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1220
+ else:
1221
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1222
+
1223
+ if self.vector_db and content.metadata:
1224
+ self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
1225
+
1226
+ if content.metadata:
1227
+ self.add_filters(content.metadata)
1228
+
1229
+ return content_row.to_dict()
1230
+
1231
+ else:
1232
+ log_warning(f"Contents DB not found for knowledge base: {self.name}")
1233
+ return None
1234
+
1235
+ async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
1236
+ from agno.vectordb import VectorDb
1237
+
1238
+ self.vector_db = cast(VectorDb, self.vector_db)
1239
+
1240
+ await self._add_to_contents_db(content)
1241
+ if content_type == KnowledgeContentOrigin.PATH:
1242
+ if content.file_data is None:
1243
+ log_warning("No file data provided")
1244
+
1245
+ if content.path is None:
1246
+ log_error("No path provided for content")
1247
+ return
1248
+
1249
+ path = Path(content.path)
1250
+
1251
+ log_info(f"Uploading file to LightRAG from path: {path}")
1252
+ try:
1253
+ # Read the file content from path
1254
+ with open(path, "rb") as f:
1255
+ file_content = f.read()
1256
+
1257
+ # Get file type from extension or content.file_type
1258
+ file_type = content.file_type or path.suffix
1259
+
1260
+ if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
1261
+ result = await self.vector_db.insert_file_bytes(
1262
+ file_content=file_content,
1263
+ filename=path.name, # Use the original filename with extension
1264
+ content_type=file_type,
1265
+ send_metadata=True, # Enable metadata so server knows the file type
1266
+ )
1267
+
1268
+ else:
1269
+ log_error("Vector database does not support file insertion")
1270
+ content.status = ContentStatus.FAILED
1271
+ await self._aupdate_content(content)
1272
+ return
1273
+ content.external_id = result
1274
+ content.status = ContentStatus.COMPLETED
1275
+ await self._aupdate_content(content)
1276
+ return
1277
+
1278
+ except Exception as e:
1279
+ log_error(f"Error uploading file to LightRAG: {e}")
1280
+ content.status = ContentStatus.FAILED
1281
+ content.status_message = f"Could not upload to LightRAG: {str(e)}"
1282
+ await self._aupdate_content(content)
1283
+ return
1284
+
1285
+ elif content_type == KnowledgeContentOrigin.URL:
1286
+ log_info(f"Uploading file to LightRAG from URL: {content.url}")
1287
+ try:
1288
+ reader = content.reader or self.website_reader
1289
+ if reader is None:
1290
+ log_error("No URL reader available")
1291
+ content.status = ContentStatus.FAILED
1292
+ await self._aupdate_content(content)
1293
+ return
1294
+
1295
+ reader.chunk = False
1296
+ read_documents = reader.read(content.url, name=content.name)
1297
+
1298
+ for read_document in read_documents:
1299
+ read_document.content_id = content.id
1300
+
1301
+ if not read_documents:
1302
+ log_error("No documents read from URL")
1303
+ content.status = ContentStatus.FAILED
1304
+ await self._aupdate_content(content)
1305
+ return
1306
+
1307
+ if self.vector_db and hasattr(self.vector_db, "insert_text"):
1308
+ result = await self.vector_db.insert_text(
1309
+ file_source=content.url,
1310
+ text=read_documents[0].content,
1311
+ )
1312
+ else:
1313
+ log_error("Vector database does not support text insertion")
1314
+ content.status = ContentStatus.FAILED
1315
+ await self._aupdate_content(content)
1316
+ return
1317
+
1318
+ content.external_id = result
1319
+ content.status = ContentStatus.COMPLETED
1320
+ await self._aupdate_content(content)
1321
+ return
1322
+
1323
+ except Exception as e:
1324
+ log_error(f"Error uploading file to LightRAG: {e}")
1325
+ content.status = ContentStatus.FAILED
1326
+ content.status_message = f"Could not upload to LightRAG: {str(e)}"
1327
+ await self._aupdate_content(content)
1328
+ return
1329
+
1330
+ elif content_type == KnowledgeContentOrigin.CONTENT:
1331
+ filename = (
1332
+ content.file_data.filename if content.file_data and content.file_data.filename else "uploaded_file"
1333
+ )
1334
+ log_info(f"Uploading file to LightRAG: {filename}")
1335
+
1336
+ # Use the content from file_data
1337
+ if content.file_data and content.file_data.content:
1338
+ if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
1339
+ result = await self.vector_db.insert_file_bytes(
1340
+ file_content=content.file_data.content,
1341
+ filename=filename,
1342
+ content_type=content.file_data.type,
1343
+ send_metadata=True, # Enable metadata so server knows the file type
1344
+ )
1345
+ else:
1346
+ log_error("Vector database does not support file insertion")
1347
+ content.status = ContentStatus.FAILED
1348
+ await self._aupdate_content(content)
1349
+ return
1350
+ content.external_id = result
1351
+ content.status = ContentStatus.COMPLETED
1352
+ await self._aupdate_content(content)
1353
+ else:
1354
+ log_warning(f"No file data available for LightRAG upload: {content.name}")
1355
+ return
1356
+
1357
+ elif content_type == KnowledgeContentOrigin.TOPIC:
1358
+ log_info(f"Uploading file to LightRAG: {content.name}")
1359
+
1360
+ if content.reader is None:
1361
+ log_error("No reader available for topic content")
1362
+ content.status = ContentStatus.FAILED
1363
+ await self._aupdate_content(content)
1364
+ return
1365
+
1366
+ if not content.topics:
1367
+ log_error("No topics available for content")
1368
+ content.status = ContentStatus.FAILED
1369
+ await self._aupdate_content(content)
1370
+ return
1371
+
1372
+ read_documents = content.reader.read(content.topics)
1373
+ if len(read_documents) > 0:
1374
+ if self.vector_db and hasattr(self.vector_db, "insert_text"):
1375
+ result = await self.vector_db.insert_text(
1376
+ file_source=content.topics[0],
1377
+ text=read_documents[0].content,
1378
+ )
1379
+ else:
1380
+ log_error("Vector database does not support text insertion")
1381
+ content.status = ContentStatus.FAILED
1382
+ await self._aupdate_content(content)
1383
+ return
1384
+ content.external_id = result
1385
+ content.status = ContentStatus.COMPLETED
1386
+ await self._aupdate_content(content)
1387
+ return
1388
+ else:
1389
+ log_warning(f"No documents found for LightRAG upload: {content.name}")
1390
+ return
1391
+
1392
+ def search(
1393
+ self,
1394
+ query: str,
1395
+ max_results: Optional[int] = None,
1396
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1397
+ search_type: Optional[str] = None,
1398
+ ) -> List[Document]:
1399
+ """Returns relevant documents matching a query"""
1400
+ from agno.vectordb import VectorDb
1401
+ from agno.vectordb.search import SearchType
1402
+
1403
+ self.vector_db = cast(VectorDb, self.vector_db)
1404
+
1405
+ if (
1406
+ hasattr(self.vector_db, "search_type")
1407
+ and isinstance(self.vector_db.search_type, SearchType)
1408
+ and search_type
1409
+ ):
1410
+ self.vector_db.search_type = SearchType(search_type)
1411
+ try:
1412
+ if self.vector_db is None:
1413
+ log_warning("No vector db provided")
1414
+ return []
1415
+
1416
+ _max_results = max_results or self.max_results
1417
+ log_debug(f"Getting {_max_results} relevant documents for query: {query}")
1418
+ return self.vector_db.search(query=query, limit=_max_results, filters=filters)
1419
+ except Exception as e:
1420
+ log_error(f"Error searching for documents: {e}")
1421
+ return []
1422
+
1423
+ async def async_search(
1424
+ self,
1425
+ query: str,
1426
+ max_results: Optional[int] = None,
1427
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1428
+ search_type: Optional[str] = None,
1429
+ ) -> List[Document]:
1430
+ """Returns relevant documents matching a query"""
1431
+ from agno.vectordb import VectorDb
1432
+ from agno.vectordb.search import SearchType
1433
+
1434
+ self.vector_db = cast(VectorDb, self.vector_db)
1435
+ if (
1436
+ hasattr(self.vector_db, "search_type")
1437
+ and isinstance(self.vector_db.search_type, SearchType)
1438
+ and search_type
1439
+ ):
1440
+ self.vector_db.search_type = SearchType(search_type)
1441
+ try:
1442
+ if self.vector_db is None:
1443
+ log_warning("No vector db provided")
1444
+ return []
1445
+
1446
+ _max_results = max_results or self.max_results
1447
+ log_debug(f"Getting {_max_results} relevant documents for query: {query}")
1448
+ try:
1449
+ return await self.vector_db.async_search(query=query, limit=_max_results, filters=filters)
1450
+ except NotImplementedError:
1451
+ log_info("Vector db does not support async search")
1452
+ return self.search(query=query, max_results=_max_results, filters=filters)
1453
+ except Exception as e:
1454
+ log_error(f"Error searching for documents: {e}")
1455
+ return []
1456
+
1457
+ def get_valid_filters(self) -> Set[str]:
1458
+ if self.valid_metadata_filters is None:
1459
+ self.valid_metadata_filters = set()
1460
+ self.valid_metadata_filters.update(self._get_filters_from_db())
1461
+ return self.valid_metadata_filters
1462
+
1463
+ async def aget_valid_filters(self) -> Set[str]:
1464
+ if self.valid_metadata_filters is None:
1465
+ self.valid_metadata_filters = set()
1466
+ self.valid_metadata_filters.update(await self._aget_filters_from_db())
1467
+ return self.valid_metadata_filters
1468
+
1469
+ def _validate_filters(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]) -> Tuple[Any, List[str]]:
1470
+ """Internal method to validate filters against known metadata keys."""
1471
+ if not filters:
1472
+ return None, []
1473
+
1474
+ valid_filters: Optional[Dict[str, Any]] = None
1475
+ invalid_keys = []
1476
+
1477
+ if isinstance(filters, dict):
1478
+ # If no metadata filters tracked yet, all keys are considered invalid
1479
+ if self.valid_metadata_filters is None:
1480
+ invalid_keys = list(filters.keys())
1481
+ log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
1482
+ return None, invalid_keys
1483
+
1484
+ valid_filters = {}
1485
+ for key, value in filters.items():
1486
+ # Handle both normal keys and prefixed keys like meta_data.key
1487
+ base_key = key.split(".")[-1] if "." in key else key
1488
+ if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
1489
+ valid_filters[key] = value
1490
+ else:
1491
+ invalid_keys.append(key)
1492
+ log_debug(f"Invalid filter key: {key} - not present in knowledge base")
1493
+
1494
+ elif isinstance(filters, List):
1495
+ # Validate that list contains FilterExpr instances
1496
+ for i, filter_item in enumerate(filters):
1497
+ if not isinstance(filter_item, FilterExpr):
1498
+ log_warning(
1499
+ f"Invalid filter at index {i}: expected FilterExpr instance, "
1500
+ f"got {type(filter_item).__name__}. "
1501
+ f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
1502
+ f"AND(...), OR(...), NOT(...) from agno.filters"
1503
+ )
1504
+
1505
+ # Filter expressions are already validated, return empty dict/list
1506
+ # The actual filtering happens in the vector_db layer
1507
+ return filters, []
1508
+
1509
+ return valid_filters, invalid_keys
1510
+
1511
+ def validate_filters(self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]) -> Tuple[Any, List[str]]:
1512
+ if self.valid_metadata_filters is None:
1513
+ self.valid_metadata_filters = set()
1514
+ self.valid_metadata_filters.update(self._get_filters_from_db())
1515
+
1516
+ return self._validate_filters(filters)
1517
+
1518
+ async def async_validate_filters(
1519
+ self, filters: Optional[Union[Dict[str, Any], List[FilterExpr]]]
1520
+ ) -> Tuple[Any, List[str]]:
1521
+ if self.valid_metadata_filters is None:
1522
+ self.valid_metadata_filters = set()
1523
+ self.valid_metadata_filters.update(await self._aget_filters_from_db())
1524
+
1525
+ return self._validate_filters(filters)
1526
+
1527
+ def add_filters(self, metadata: Dict[str, Any]) -> None:
1528
+ if self.valid_metadata_filters is None:
1529
+ self.valid_metadata_filters = set()
1530
+
1531
+ if metadata is not None:
1532
+ for key in metadata.keys():
1533
+ self.valid_metadata_filters.add(key)
1534
+
1535
+ def _get_filters_from_db(self) -> Set[str]:
1536
+ if self.contents_db is None:
1537
+ return set()
1538
+ contents, _ = self.get_content()
1539
+ valid_filters: Set[str] = set()
1540
+ for content in contents:
1541
+ if content.metadata:
1542
+ valid_filters.update(content.metadata.keys())
1543
+ return valid_filters
1544
+
1545
+ async def _aget_filters_from_db(self) -> Set[str]:
1546
+ if self.contents_db is None:
1547
+ return set()
1548
+ contents, _ = await self.aget_content()
1549
+ valid_filters: Set[str] = set()
1550
+ for content in contents:
1551
+ if content.metadata:
1552
+ valid_filters.update(content.metadata.keys())
1553
+ return valid_filters
1554
+
1555
+ def remove_vector_by_id(self, id: str) -> bool:
1556
+ from agno.vectordb import VectorDb
1557
+
1558
+ self.vector_db = cast(VectorDb, self.vector_db)
1559
+ if self.vector_db is None:
1560
+ log_warning("No vector DB provided")
1561
+ return False
1562
+ return self.vector_db.delete_by_id(id)
1563
+
1564
+ def remove_vectors_by_name(self, name: str) -> bool:
1565
+ from agno.vectordb import VectorDb
1566
+
1567
+ self.vector_db = cast(VectorDb, self.vector_db)
1568
+ if self.vector_db is None:
1569
+ log_warning("No vector DB provided")
1570
+ return False
1571
+ return self.vector_db.delete_by_name(name)
1572
+
1573
+ def remove_vectors_by_metadata(self, metadata: Dict[str, Any]) -> bool:
1574
+ from agno.vectordb import VectorDb
1575
+
1576
+ self.vector_db = cast(VectorDb, self.vector_db)
1577
+ if self.vector_db is None:
1578
+ log_warning("No vector DB provided")
1579
+ return False
1580
+ return self.vector_db.delete_by_metadata(metadata)
1581
+
1582
+ # --- API Only Methods ---
1583
+
1584
+ def patch_content(self, content: Content) -> Optional[Dict[str, Any]]:
1585
+ return self._update_content(content)
1586
+
1587
+ async def apatch_content(self, content: Content) -> Optional[Dict[str, Any]]:
1588
+ return await self._aupdate_content(content)
1589
+
1590
+ def get_content_by_id(self, content_id: str) -> Optional[Content]:
1591
+ if self.contents_db is None:
1592
+ raise ValueError("No contents db provided")
1593
+
1594
+ if isinstance(self.contents_db, AsyncBaseDb):
1595
+ raise ValueError(
1596
+ "get_content_by_id() is not supported for async databases. Please use aget_content_by_id() instead."
1597
+ )
1598
+
1599
+ content_row = self.contents_db.get_knowledge_content(content_id)
1600
+
1601
+ if content_row is None:
1602
+ return None
1603
+ content = Content(
1604
+ id=content_row.id,
1605
+ name=content_row.name,
1606
+ description=content_row.description,
1607
+ metadata=content_row.metadata,
1608
+ file_type=content_row.type,
1609
+ size=content_row.size,
1610
+ status=ContentStatus(content_row.status) if content_row.status else None,
1611
+ status_message=content_row.status_message,
1612
+ created_at=content_row.created_at,
1613
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1614
+ external_id=content_row.external_id,
1615
+ )
1616
+ return content
1617
+
1618
+ async def aget_content_by_id(self, content_id: str) -> Optional[Content]:
1619
+ if self.contents_db is None:
1620
+ raise ValueError("No contents db provided")
1621
+
1622
+ if isinstance(self.contents_db, AsyncBaseDb):
1623
+ content_row = await self.contents_db.get_knowledge_content(content_id)
1624
+ else:
1625
+ content_row = self.contents_db.get_knowledge_content(content_id)
1626
+
1627
+ if content_row is None:
1628
+ return None
1629
+ content = Content(
1630
+ id=content_row.id,
1631
+ name=content_row.name,
1632
+ description=content_row.description,
1633
+ metadata=content_row.metadata,
1634
+ file_type=content_row.type,
1635
+ size=content_row.size,
1636
+ status=ContentStatus(content_row.status) if content_row.status else None,
1637
+ status_message=content_row.status_message,
1638
+ created_at=content_row.created_at,
1639
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1640
+ external_id=content_row.external_id,
1641
+ )
1642
+ return content
1643
+
1644
+ def get_content(
1645
+ self,
1646
+ limit: Optional[int] = None,
1647
+ page: Optional[int] = None,
1648
+ sort_by: Optional[str] = None,
1649
+ sort_order: Optional[str] = None,
1650
+ ) -> Tuple[List[Content], int]:
1651
+ if self.contents_db is None:
1652
+ raise ValueError("No contents db provided")
1653
+
1654
+ if isinstance(self.contents_db, AsyncBaseDb):
1655
+ raise ValueError("get_content() is not supported for async databases. Please use aget_content() instead.")
1656
+
1657
+ contents, count = self.contents_db.get_knowledge_contents(
1658
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1659
+ )
1660
+
1661
+ result = []
1662
+ for content_row in contents:
1663
+ # Create Content from database row
1664
+ content = Content(
1665
+ id=content_row.id,
1666
+ name=content_row.name,
1667
+ description=content_row.description,
1668
+ metadata=content_row.metadata,
1669
+ size=content_row.size,
1670
+ file_type=content_row.type,
1671
+ status=ContentStatus(content_row.status) if content_row.status else None,
1672
+ status_message=content_row.status_message,
1673
+ created_at=content_row.created_at,
1674
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1675
+ external_id=content_row.external_id,
1676
+ )
1677
+ result.append(content)
1678
+ return result, count
1679
+
1680
+ async def aget_content(
1681
+ self,
1682
+ limit: Optional[int] = None,
1683
+ page: Optional[int] = None,
1684
+ sort_by: Optional[str] = None,
1685
+ sort_order: Optional[str] = None,
1686
+ ) -> Tuple[List[Content], int]:
1687
+ if self.contents_db is None:
1688
+ raise ValueError("No contents db provided")
1689
+
1690
+ if isinstance(self.contents_db, AsyncBaseDb):
1691
+ contents, count = await self.contents_db.get_knowledge_contents(
1692
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1693
+ )
1694
+ else:
1695
+ contents, count = self.contents_db.get_knowledge_contents(
1696
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1697
+ )
1698
+
1699
+ result = []
1700
+ for content_row in contents:
1701
+ # Create Content from database row
1702
+ content = Content(
1703
+ id=content_row.id,
1704
+ name=content_row.name,
1705
+ description=content_row.description,
1706
+ metadata=content_row.metadata,
1707
+ size=content_row.size,
1708
+ file_type=content_row.type,
1709
+ status=ContentStatus(content_row.status) if content_row.status else None,
1710
+ status_message=content_row.status_message,
1711
+ created_at=content_row.created_at,
1712
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1713
+ external_id=content_row.external_id,
1714
+ )
1715
+ result.append(content)
1716
+ return result, count
1717
+
1718
+ def get_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
1719
+ if self.contents_db is None:
1720
+ raise ValueError("No contents db provided")
1721
+
1722
+ if isinstance(self.contents_db, AsyncBaseDb):
1723
+ raise ValueError(
1724
+ "get_content_status() is not supported for async databases. Please use aget_content_status() instead."
1725
+ )
1726
+
1727
+ content_row = self.contents_db.get_knowledge_content(content_id)
1728
+ if content_row is None:
1729
+ return None, "Content not found"
1730
+
1731
+ # Convert string status to enum, defaulting to PROCESSING if unknown
1732
+ status_str = content_row.status
1733
+ try:
1734
+ status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
1735
+ except ValueError:
1736
+ # Handle legacy or unknown statuses
1737
+ if status_str and "failed" in status_str.lower():
1738
+ status = ContentStatus.FAILED
1739
+ elif status_str and "completed" in status_str.lower():
1740
+ status = ContentStatus.COMPLETED
1741
+ else:
1742
+ status = ContentStatus.PROCESSING
1743
+
1744
+ return status, content_row.status_message
1745
+
1746
+ async def aget_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
1747
+ if self.contents_db is None:
1748
+ raise ValueError("No contents db provided")
1749
+
1750
+ if isinstance(self.contents_db, AsyncBaseDb):
1751
+ content_row = await self.contents_db.get_knowledge_content(content_id)
1752
+ else:
1753
+ content_row = self.contents_db.get_knowledge_content(content_id)
1754
+
1755
+ if content_row is None:
1756
+ return None, "Content not found"
1757
+
1758
+ # Convert string status to enum, defaulting to PROCESSING if unknown
1759
+ status_str = content_row.status
1760
+ try:
1761
+ status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
1762
+ except ValueError:
1763
+ # Handle legacy or unknown statuses
1764
+ if status_str and "failed" in status_str.lower():
1765
+ status = ContentStatus.FAILED
1766
+ elif status_str and "completed" in status_str.lower():
1767
+ status = ContentStatus.COMPLETED
1768
+ else:
1769
+ status = ContentStatus.PROCESSING
1770
+
1771
+ return status, content_row.status_message
1772
+
1773
+ def remove_content_by_id(self, content_id: str):
1774
+ from agno.vectordb import VectorDb
1775
+
1776
+ self.vector_db = cast(VectorDb, self.vector_db)
1777
+ if self.vector_db is not None:
1778
+ if self.vector_db.__class__.__name__ == "LightRag":
1779
+ # For LightRAG, get the content first to find the external_id
1780
+ content = self.get_content_by_id(content_id)
1781
+ if content and content.external_id:
1782
+ self.vector_db.delete_by_external_id(content.external_id) # type: ignore
1783
+ else:
1784
+ log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
1785
+ else:
1786
+ self.vector_db.delete_by_content_id(content_id)
1787
+
1788
+ if self.contents_db is not None:
1789
+ self.contents_db.delete_knowledge_content(content_id)
1790
+
1791
+ async def aremove_content_by_id(self, content_id: str):
1792
+ if self.vector_db is not None:
1793
+ if self.vector_db.__class__.__name__ == "LightRag":
1794
+ # For LightRAG, get the content first to find the external_id
1795
+ content = await self.aget_content_by_id(content_id)
1796
+ if content and content.external_id:
1797
+ self.vector_db.delete_by_external_id(content.external_id) # type: ignore
1798
+ else:
1799
+ log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
1800
+ else:
1801
+ self.vector_db.delete_by_content_id(content_id)
1802
+
1803
+ if self.contents_db is not None:
1804
+ if isinstance(self.contents_db, AsyncBaseDb):
1805
+ await self.contents_db.delete_knowledge_content(content_id)
1806
+ else:
1807
+ self.contents_db.delete_knowledge_content(content_id)
1808
+
1809
+ def remove_all_content(self):
1810
+ contents, _ = self.get_content()
1811
+ for content in contents:
1812
+ if content.id is not None:
1813
+ self.remove_content_by_id(content.id)
1814
+
1815
+ async def aremove_all_content(self):
1816
+ contents, _ = await self.aget_content()
1817
+ for content in contents:
1818
+ if content.id is not None:
1819
+ await self.aremove_content_by_id(content.id)
1820
+
1821
+ # --- Reader Factory Integration ---
1822
+
1823
+ def construct_readers(self):
1824
+ """Initialize readers dictionary for lazy loading."""
1825
+ # Initialize empty readers dict - readers will be created on-demand
1826
+ if self.readers is None:
1827
+ self.readers = {}
1828
+
1829
+ def add_reader(self, reader: Reader):
1830
+ """Add a custom reader to the knowledge base."""
1831
+ if self.readers is None:
1832
+ self.readers = {}
1833
+
1834
+ # Generate a key for the reader
1835
+ reader_key = self._generate_reader_key(reader)
1836
+ self.readers[reader_key] = reader
1837
+ return reader
1838
+
1839
+ def get_readers(self) -> Dict[str, Reader]:
1840
+ """Get all currently loaded readers (only returns readers that have been used)."""
1841
+ if self.readers is None:
1842
+ self.readers = {}
1843
+
1844
+ return self.readers
1845
+
1846
+ def _generate_reader_key(self, reader: Reader) -> str:
1847
+ """Generate a key for a reader instance."""
1848
+ if reader.name:
1849
+ return f"{reader.name.lower().replace(' ', '_')}"
1850
+ else:
1851
+ return f"{reader.__class__.__name__.lower().replace(' ', '_')}"
1852
+
1853
+ def _select_reader(self, extension: str) -> Reader:
1854
+ """Select the appropriate reader for a file extension."""
1855
+ log_info(f"Selecting reader for extension: {extension}")
1856
+ return ReaderFactory.get_reader_for_extension(extension)
1857
+
1858
+ def get_filters(self) -> List[str]:
1859
+ return [
1860
+ "filter_tag_1",
1861
+ "filter_tag2",
1862
+ ]
1863
+
1864
+ # --- Convenience Properties for Backward Compatibility ---
1865
+
1866
+ def _is_text_mime_type(self, mime_type: str) -> bool:
1867
+ """
1868
+ Check if a MIME type represents text content that can be safely encoded as UTF-8.
1869
+
1870
+ Args:
1871
+ mime_type: The MIME type to check
1872
+
1873
+ Returns:
1874
+ bool: True if it's a text type, False if binary
1875
+ """
1876
+ if not mime_type:
1877
+ return False
1878
+
1879
+ text_types = [
1880
+ "text/",
1881
+ "application/json",
1882
+ "application/xml",
1883
+ "application/javascript",
1884
+ "application/csv",
1885
+ "application/sql",
1886
+ ]
1887
+
1888
+ return any(mime_type.startswith(t) for t in text_types)
1889
+
1890
+ def _should_include_file(self, file_path: str, include: Optional[List[str]], exclude: Optional[List[str]]) -> bool:
1891
+ """
1892
+ Determine if a file should be included based on include/exclude patterns.
1893
+
1894
+ Logic:
1895
+ 1. If include is specified, file must match at least one include pattern
1896
+ 2. If exclude is specified, file must not match any exclude pattern
1897
+ 3. If neither specified, include all files
1898
+
1899
+ Args:
1900
+ file_path: Path to the file to check
1901
+ include: Optional list of include patterns (glob-style)
1902
+ exclude: Optional list of exclude patterns (glob-style)
1903
+
1904
+ Returns:
1905
+ bool: True if file should be included, False otherwise
1906
+ """
1907
+ import fnmatch
1908
+
1909
+ # If include patterns specified, file must match at least one
1910
+ if include:
1911
+ if not any(fnmatch.fnmatch(file_path, pattern) for pattern in include):
1912
+ return False
1913
+
1914
+ # If exclude patterns specified, file must not match any
1915
+ if exclude:
1916
+ if any(fnmatch.fnmatch(file_path, pattern) for pattern in exclude):
1917
+ return False
1918
+
1919
+ return True
1920
+
1921
+ def _get_reader(self, reader_type: str) -> Optional[Reader]:
1922
+ """Get a cached reader or create it if not cached, handling missing dependencies gracefully."""
1923
+ if self.readers is None:
1924
+ self.readers = {}
1925
+
1926
+ if reader_type not in self.readers:
1927
+ try:
1928
+ reader = ReaderFactory.create_reader(reader_type)
1929
+ if reader:
1930
+ self.readers[reader_type] = reader
1931
+ else:
1932
+ return None
1933
+
1934
+ except Exception as e:
1935
+ log_warning(f"Cannot create {reader_type} reader {e}")
1936
+ return None
1937
+
1938
+ return self.readers.get(reader_type)
1939
+
1940
+ @property
1941
+ def pdf_reader(self) -> Optional[Reader]:
1942
+ """PDF reader - lazy loaded via factory."""
1943
+ return self._get_reader("pdf")
1944
+
1945
+ @property
1946
+ def csv_reader(self) -> Optional[Reader]:
1947
+ """CSV reader - lazy loaded via factory."""
1948
+ return self._get_reader("csv")
1949
+
1950
+ @property
1951
+ def docx_reader(self) -> Optional[Reader]:
1952
+ """Docx reader - lazy loaded via factory."""
1953
+ return self._get_reader("docx")
1954
+
1955
+ @property
1956
+ def pptx_reader(self) -> Optional[Reader]:
1957
+ """PPTX reader - lazy loaded via factory."""
1958
+ return self._get_reader("pptx")
1959
+
1960
+ @property
1961
+ def json_reader(self) -> Optional[Reader]:
1962
+ """JSON reader - lazy loaded via factory."""
1963
+ return self._get_reader("json")
1964
+
1965
+ @property
1966
+ def markdown_reader(self) -> Optional[Reader]:
1967
+ """Markdown reader - lazy loaded via factory."""
1968
+ return self._get_reader("markdown")
1969
+
1970
+ @property
1971
+ def text_reader(self) -> Optional[Reader]:
1972
+ """Text reader - lazy loaded via factory."""
1973
+ return self._get_reader("text")
1974
+
1975
+ @property
1976
+ def website_reader(self) -> Optional[Reader]:
1977
+ """Website reader - lazy loaded via factory."""
1978
+ return self._get_reader("website")
1979
+
1980
+ @property
1981
+ def firecrawl_reader(self) -> Optional[Reader]:
1982
+ """Firecrawl reader - lazy loaded via factory."""
1983
+ return self._get_reader("firecrawl")
1984
+
1985
+ @property
1986
+ def youtube_reader(self) -> Optional[Reader]:
1987
+ """YouTube reader - lazy loaded via factory."""
1988
+ return self._get_reader("youtube")