agno 2.0.0rc2__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (331) hide show
  1. agno/agent/agent.py +6009 -2874
  2. agno/api/api.py +2 -0
  3. agno/api/os.py +1 -1
  4. agno/culture/__init__.py +3 -0
  5. agno/culture/manager.py +956 -0
  6. agno/db/async_postgres/__init__.py +3 -0
  7. agno/db/base.py +385 -6
  8. agno/db/dynamo/dynamo.py +388 -81
  9. agno/db/dynamo/schemas.py +47 -10
  10. agno/db/dynamo/utils.py +63 -4
  11. agno/db/firestore/firestore.py +435 -64
  12. agno/db/firestore/schemas.py +11 -0
  13. agno/db/firestore/utils.py +102 -4
  14. agno/db/gcs_json/gcs_json_db.py +384 -42
  15. agno/db/gcs_json/utils.py +60 -26
  16. agno/db/in_memory/in_memory_db.py +351 -66
  17. agno/db/in_memory/utils.py +60 -2
  18. agno/db/json/json_db.py +339 -48
  19. agno/db/json/utils.py +60 -26
  20. agno/db/migrations/manager.py +199 -0
  21. agno/db/migrations/v1_to_v2.py +510 -37
  22. agno/db/migrations/versions/__init__.py +0 -0
  23. agno/db/migrations/versions/v2_3_0.py +938 -0
  24. agno/db/mongo/__init__.py +15 -1
  25. agno/db/mongo/async_mongo.py +2036 -0
  26. agno/db/mongo/mongo.py +653 -76
  27. agno/db/mongo/schemas.py +13 -0
  28. agno/db/mongo/utils.py +80 -8
  29. agno/db/mysql/mysql.py +687 -25
  30. agno/db/mysql/schemas.py +61 -37
  31. agno/db/mysql/utils.py +60 -2
  32. agno/db/postgres/__init__.py +2 -1
  33. agno/db/postgres/async_postgres.py +2001 -0
  34. agno/db/postgres/postgres.py +676 -57
  35. agno/db/postgres/schemas.py +43 -18
  36. agno/db/postgres/utils.py +164 -2
  37. agno/db/redis/redis.py +344 -38
  38. agno/db/redis/schemas.py +18 -0
  39. agno/db/redis/utils.py +60 -2
  40. agno/db/schemas/__init__.py +2 -1
  41. agno/db/schemas/culture.py +120 -0
  42. agno/db/schemas/memory.py +13 -0
  43. agno/db/singlestore/schemas.py +26 -1
  44. agno/db/singlestore/singlestore.py +687 -53
  45. agno/db/singlestore/utils.py +60 -2
  46. agno/db/sqlite/__init__.py +2 -1
  47. agno/db/sqlite/async_sqlite.py +2371 -0
  48. agno/db/sqlite/schemas.py +24 -0
  49. agno/db/sqlite/sqlite.py +774 -85
  50. agno/db/sqlite/utils.py +168 -5
  51. agno/db/surrealdb/__init__.py +3 -0
  52. agno/db/surrealdb/metrics.py +292 -0
  53. agno/db/surrealdb/models.py +309 -0
  54. agno/db/surrealdb/queries.py +71 -0
  55. agno/db/surrealdb/surrealdb.py +1361 -0
  56. agno/db/surrealdb/utils.py +147 -0
  57. agno/db/utils.py +50 -22
  58. agno/eval/accuracy.py +50 -43
  59. agno/eval/performance.py +6 -3
  60. agno/eval/reliability.py +6 -3
  61. agno/eval/utils.py +33 -16
  62. agno/exceptions.py +68 -1
  63. agno/filters.py +354 -0
  64. agno/guardrails/__init__.py +6 -0
  65. agno/guardrails/base.py +19 -0
  66. agno/guardrails/openai.py +144 -0
  67. agno/guardrails/pii.py +94 -0
  68. agno/guardrails/prompt_injection.py +52 -0
  69. agno/integrations/discord/client.py +1 -0
  70. agno/knowledge/chunking/agentic.py +13 -10
  71. agno/knowledge/chunking/fixed.py +1 -1
  72. agno/knowledge/chunking/semantic.py +40 -8
  73. agno/knowledge/chunking/strategy.py +59 -15
  74. agno/knowledge/embedder/aws_bedrock.py +9 -4
  75. agno/knowledge/embedder/azure_openai.py +54 -0
  76. agno/knowledge/embedder/base.py +2 -0
  77. agno/knowledge/embedder/cohere.py +184 -5
  78. agno/knowledge/embedder/fastembed.py +1 -1
  79. agno/knowledge/embedder/google.py +79 -1
  80. agno/knowledge/embedder/huggingface.py +9 -4
  81. agno/knowledge/embedder/jina.py +63 -0
  82. agno/knowledge/embedder/mistral.py +78 -11
  83. agno/knowledge/embedder/nebius.py +1 -1
  84. agno/knowledge/embedder/ollama.py +13 -0
  85. agno/knowledge/embedder/openai.py +37 -65
  86. agno/knowledge/embedder/sentence_transformer.py +8 -4
  87. agno/knowledge/embedder/vllm.py +262 -0
  88. agno/knowledge/embedder/voyageai.py +69 -16
  89. agno/knowledge/knowledge.py +595 -187
  90. agno/knowledge/reader/base.py +9 -2
  91. agno/knowledge/reader/csv_reader.py +8 -10
  92. agno/knowledge/reader/docx_reader.py +5 -6
  93. agno/knowledge/reader/field_labeled_csv_reader.py +290 -0
  94. agno/knowledge/reader/json_reader.py +6 -5
  95. agno/knowledge/reader/markdown_reader.py +13 -13
  96. agno/knowledge/reader/pdf_reader.py +43 -68
  97. agno/knowledge/reader/pptx_reader.py +101 -0
  98. agno/knowledge/reader/reader_factory.py +51 -6
  99. agno/knowledge/reader/s3_reader.py +3 -15
  100. agno/knowledge/reader/tavily_reader.py +194 -0
  101. agno/knowledge/reader/text_reader.py +13 -13
  102. agno/knowledge/reader/web_search_reader.py +2 -43
  103. agno/knowledge/reader/website_reader.py +43 -25
  104. agno/knowledge/reranker/__init__.py +3 -0
  105. agno/knowledge/types.py +9 -0
  106. agno/knowledge/utils.py +20 -0
  107. agno/media.py +339 -266
  108. agno/memory/manager.py +336 -82
  109. agno/models/aimlapi/aimlapi.py +2 -2
  110. agno/models/anthropic/claude.py +183 -37
  111. agno/models/aws/bedrock.py +52 -112
  112. agno/models/aws/claude.py +33 -1
  113. agno/models/azure/ai_foundry.py +33 -15
  114. agno/models/azure/openai_chat.py +25 -8
  115. agno/models/base.py +1011 -566
  116. agno/models/cerebras/cerebras.py +19 -13
  117. agno/models/cerebras/cerebras_openai.py +8 -5
  118. agno/models/cohere/chat.py +27 -1
  119. agno/models/cometapi/__init__.py +5 -0
  120. agno/models/cometapi/cometapi.py +57 -0
  121. agno/models/dashscope/dashscope.py +1 -0
  122. agno/models/deepinfra/deepinfra.py +2 -2
  123. agno/models/deepseek/deepseek.py +2 -2
  124. agno/models/fireworks/fireworks.py +2 -2
  125. agno/models/google/gemini.py +110 -37
  126. agno/models/groq/groq.py +28 -11
  127. agno/models/huggingface/huggingface.py +2 -1
  128. agno/models/internlm/internlm.py +2 -2
  129. agno/models/langdb/langdb.py +4 -4
  130. agno/models/litellm/chat.py +18 -1
  131. agno/models/litellm/litellm_openai.py +2 -2
  132. agno/models/llama_cpp/__init__.py +5 -0
  133. agno/models/llama_cpp/llama_cpp.py +22 -0
  134. agno/models/message.py +143 -4
  135. agno/models/meta/llama.py +27 -10
  136. agno/models/meta/llama_openai.py +5 -17
  137. agno/models/nebius/nebius.py +6 -6
  138. agno/models/nexus/__init__.py +3 -0
  139. agno/models/nexus/nexus.py +22 -0
  140. agno/models/nvidia/nvidia.py +2 -2
  141. agno/models/ollama/chat.py +60 -6
  142. agno/models/openai/chat.py +102 -43
  143. agno/models/openai/responses.py +103 -106
  144. agno/models/openrouter/openrouter.py +41 -3
  145. agno/models/perplexity/perplexity.py +4 -5
  146. agno/models/portkey/portkey.py +3 -3
  147. agno/models/requesty/__init__.py +5 -0
  148. agno/models/requesty/requesty.py +52 -0
  149. agno/models/response.py +81 -5
  150. agno/models/sambanova/sambanova.py +2 -2
  151. agno/models/siliconflow/__init__.py +5 -0
  152. agno/models/siliconflow/siliconflow.py +25 -0
  153. agno/models/together/together.py +2 -2
  154. agno/models/utils.py +254 -8
  155. agno/models/vercel/v0.py +2 -2
  156. agno/models/vertexai/__init__.py +0 -0
  157. agno/models/vertexai/claude.py +96 -0
  158. agno/models/vllm/vllm.py +1 -0
  159. agno/models/xai/xai.py +3 -2
  160. agno/os/app.py +543 -175
  161. agno/os/auth.py +24 -14
  162. agno/os/config.py +1 -0
  163. agno/os/interfaces/__init__.py +1 -0
  164. agno/os/interfaces/a2a/__init__.py +3 -0
  165. agno/os/interfaces/a2a/a2a.py +42 -0
  166. agno/os/interfaces/a2a/router.py +250 -0
  167. agno/os/interfaces/a2a/utils.py +924 -0
  168. agno/os/interfaces/agui/agui.py +23 -7
  169. agno/os/interfaces/agui/router.py +27 -3
  170. agno/os/interfaces/agui/utils.py +242 -142
  171. agno/os/interfaces/base.py +6 -2
  172. agno/os/interfaces/slack/router.py +81 -23
  173. agno/os/interfaces/slack/slack.py +29 -14
  174. agno/os/interfaces/whatsapp/router.py +11 -4
  175. agno/os/interfaces/whatsapp/whatsapp.py +14 -7
  176. agno/os/mcp.py +111 -54
  177. agno/os/middleware/__init__.py +7 -0
  178. agno/os/middleware/jwt.py +233 -0
  179. agno/os/router.py +556 -139
  180. agno/os/routers/evals/evals.py +71 -34
  181. agno/os/routers/evals/schemas.py +31 -31
  182. agno/os/routers/evals/utils.py +6 -5
  183. agno/os/routers/health.py +31 -0
  184. agno/os/routers/home.py +52 -0
  185. agno/os/routers/knowledge/knowledge.py +185 -38
  186. agno/os/routers/knowledge/schemas.py +82 -22
  187. agno/os/routers/memory/memory.py +158 -53
  188. agno/os/routers/memory/schemas.py +20 -16
  189. agno/os/routers/metrics/metrics.py +20 -8
  190. agno/os/routers/metrics/schemas.py +16 -16
  191. agno/os/routers/session/session.py +499 -38
  192. agno/os/schema.py +308 -198
  193. agno/os/utils.py +401 -41
  194. agno/reasoning/anthropic.py +80 -0
  195. agno/reasoning/azure_ai_foundry.py +2 -2
  196. agno/reasoning/deepseek.py +2 -2
  197. agno/reasoning/default.py +3 -1
  198. agno/reasoning/gemini.py +73 -0
  199. agno/reasoning/groq.py +2 -2
  200. agno/reasoning/ollama.py +2 -2
  201. agno/reasoning/openai.py +7 -2
  202. agno/reasoning/vertexai.py +76 -0
  203. agno/run/__init__.py +6 -0
  204. agno/run/agent.py +266 -112
  205. agno/run/base.py +53 -24
  206. agno/run/team.py +252 -111
  207. agno/run/workflow.py +156 -45
  208. agno/session/agent.py +105 -89
  209. agno/session/summary.py +65 -25
  210. agno/session/team.py +176 -96
  211. agno/session/workflow.py +406 -40
  212. agno/team/team.py +3854 -1692
  213. agno/tools/brightdata.py +3 -3
  214. agno/tools/cartesia.py +3 -5
  215. agno/tools/dalle.py +9 -8
  216. agno/tools/decorator.py +4 -2
  217. agno/tools/desi_vocal.py +2 -2
  218. agno/tools/duckduckgo.py +15 -11
  219. agno/tools/e2b.py +20 -13
  220. agno/tools/eleven_labs.py +26 -28
  221. agno/tools/exa.py +21 -16
  222. agno/tools/fal.py +4 -4
  223. agno/tools/file.py +153 -23
  224. agno/tools/file_generation.py +350 -0
  225. agno/tools/firecrawl.py +4 -4
  226. agno/tools/function.py +257 -37
  227. agno/tools/giphy.py +2 -2
  228. agno/tools/gmail.py +238 -14
  229. agno/tools/google_drive.py +270 -0
  230. agno/tools/googlecalendar.py +36 -8
  231. agno/tools/googlesheets.py +20 -5
  232. agno/tools/jira.py +20 -0
  233. agno/tools/knowledge.py +3 -3
  234. agno/tools/lumalab.py +3 -3
  235. agno/tools/mcp/__init__.py +10 -0
  236. agno/tools/mcp/mcp.py +331 -0
  237. agno/tools/mcp/multi_mcp.py +347 -0
  238. agno/tools/mcp/params.py +24 -0
  239. agno/tools/mcp_toolbox.py +284 -0
  240. agno/tools/mem0.py +11 -17
  241. agno/tools/memori.py +1 -53
  242. agno/tools/memory.py +419 -0
  243. agno/tools/models/azure_openai.py +2 -2
  244. agno/tools/models/gemini.py +3 -3
  245. agno/tools/models/groq.py +3 -5
  246. agno/tools/models/nebius.py +7 -7
  247. agno/tools/models_labs.py +25 -15
  248. agno/tools/notion.py +204 -0
  249. agno/tools/openai.py +4 -9
  250. agno/tools/opencv.py +3 -3
  251. agno/tools/parallel.py +314 -0
  252. agno/tools/replicate.py +7 -7
  253. agno/tools/scrapegraph.py +58 -31
  254. agno/tools/searxng.py +2 -2
  255. agno/tools/serper.py +2 -2
  256. agno/tools/slack.py +18 -3
  257. agno/tools/spider.py +2 -2
  258. agno/tools/tavily.py +146 -0
  259. agno/tools/whatsapp.py +1 -1
  260. agno/tools/workflow.py +278 -0
  261. agno/tools/yfinance.py +12 -11
  262. agno/utils/agent.py +820 -0
  263. agno/utils/audio.py +27 -0
  264. agno/utils/common.py +90 -1
  265. agno/utils/events.py +222 -7
  266. agno/utils/gemini.py +181 -23
  267. agno/utils/hooks.py +57 -0
  268. agno/utils/http.py +111 -0
  269. agno/utils/knowledge.py +12 -5
  270. agno/utils/log.py +1 -0
  271. agno/utils/mcp.py +95 -5
  272. agno/utils/media.py +188 -10
  273. agno/utils/merge_dict.py +22 -1
  274. agno/utils/message.py +60 -0
  275. agno/utils/models/claude.py +40 -11
  276. agno/utils/models/cohere.py +1 -1
  277. agno/utils/models/watsonx.py +1 -1
  278. agno/utils/openai.py +1 -1
  279. agno/utils/print_response/agent.py +105 -21
  280. agno/utils/print_response/team.py +103 -38
  281. agno/utils/print_response/workflow.py +251 -34
  282. agno/utils/reasoning.py +22 -1
  283. agno/utils/serialize.py +32 -0
  284. agno/utils/streamlit.py +16 -10
  285. agno/utils/string.py +41 -0
  286. agno/utils/team.py +98 -9
  287. agno/utils/tools.py +1 -1
  288. agno/vectordb/base.py +23 -4
  289. agno/vectordb/cassandra/cassandra.py +65 -9
  290. agno/vectordb/chroma/chromadb.py +182 -38
  291. agno/vectordb/clickhouse/clickhousedb.py +64 -11
  292. agno/vectordb/couchbase/couchbase.py +105 -10
  293. agno/vectordb/lancedb/lance_db.py +183 -135
  294. agno/vectordb/langchaindb/langchaindb.py +25 -7
  295. agno/vectordb/lightrag/lightrag.py +17 -3
  296. agno/vectordb/llamaindex/__init__.py +3 -0
  297. agno/vectordb/llamaindex/llamaindexdb.py +46 -7
  298. agno/vectordb/milvus/milvus.py +126 -9
  299. agno/vectordb/mongodb/__init__.py +7 -1
  300. agno/vectordb/mongodb/mongodb.py +112 -7
  301. agno/vectordb/pgvector/pgvector.py +142 -21
  302. agno/vectordb/pineconedb/pineconedb.py +80 -8
  303. agno/vectordb/qdrant/qdrant.py +125 -39
  304. agno/vectordb/redis/__init__.py +9 -0
  305. agno/vectordb/redis/redisdb.py +694 -0
  306. agno/vectordb/singlestore/singlestore.py +111 -25
  307. agno/vectordb/surrealdb/surrealdb.py +31 -5
  308. agno/vectordb/upstashdb/upstashdb.py +76 -8
  309. agno/vectordb/weaviate/weaviate.py +86 -15
  310. agno/workflow/__init__.py +2 -0
  311. agno/workflow/agent.py +299 -0
  312. agno/workflow/condition.py +112 -18
  313. agno/workflow/loop.py +69 -10
  314. agno/workflow/parallel.py +266 -118
  315. agno/workflow/router.py +110 -17
  316. agno/workflow/step.py +645 -136
  317. agno/workflow/steps.py +65 -6
  318. agno/workflow/types.py +71 -33
  319. agno/workflow/workflow.py +2113 -300
  320. agno-2.3.0.dist-info/METADATA +618 -0
  321. agno-2.3.0.dist-info/RECORD +577 -0
  322. agno-2.3.0.dist-info/licenses/LICENSE +201 -0
  323. agno/knowledge/reader/url_reader.py +0 -128
  324. agno/tools/googlesearch.py +0 -98
  325. agno/tools/mcp.py +0 -610
  326. agno/utils/models/aws_claude.py +0 -170
  327. agno-2.0.0rc2.dist-info/METADATA +0 -355
  328. agno-2.0.0rc2.dist-info/RECORD +0 -515
  329. agno-2.0.0rc2.dist-info/licenses/LICENSE +0 -375
  330. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/WHEEL +0 -0
  331. {agno-2.0.0rc2.dist-info → agno-2.3.0.dist-info}/top_level.txt +0 -0
@@ -4,24 +4,23 @@ import io
4
4
  import time
5
5
  from dataclasses import dataclass
6
6
  from enum import Enum
7
- from functools import cached_property
8
7
  from io import BytesIO
9
8
  from os.path import basename
10
9
  from pathlib import Path
11
10
  from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
12
- from uuid import uuid4
13
11
 
14
12
  from httpx import AsyncClient
15
13
 
16
- from agno.db.base import BaseDb
14
+ from agno.db.base import AsyncBaseDb, BaseDb
17
15
  from agno.db.schemas.knowledge import KnowledgeRow
16
+ from agno.filters import FilterExpr
18
17
  from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
19
18
  from agno.knowledge.document import Document
20
19
  from agno.knowledge.reader import Reader, ReaderFactory
21
20
  from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
22
21
  from agno.utils.http import async_fetch_with_retry
23
22
  from agno.utils.log import log_debug, log_error, log_info, log_warning
24
- from agno.vectordb import VectorDb
23
+ from agno.utils.string import generate_id
25
24
 
26
25
  ContentDict = Dict[str, Union[str, Dict[str, str]]]
27
26
 
@@ -39,19 +38,19 @@ class Knowledge:
39
38
 
40
39
  name: Optional[str] = None
41
40
  description: Optional[str] = None
42
- vector_db: Optional[VectorDb] = None
43
- contents_db: Optional[BaseDb] = None
41
+ vector_db: Optional[Any] = None
42
+ contents_db: Optional[Union[BaseDb, AsyncBaseDb]] = None
44
43
  max_results: int = 10
45
44
  readers: Optional[Dict[str, Reader]] = None
46
45
 
47
46
  def __post_init__(self):
47
+ from agno.vectordb import VectorDb
48
+
49
+ self.vector_db = cast(VectorDb, self.vector_db)
48
50
  if self.vector_db and not self.vector_db.exists():
49
51
  self.vector_db.create()
50
52
 
51
53
  self.construct_readers()
52
- self.valid_metadata_filters = set()
53
-
54
- # --- SDK Specific Methods ---
55
54
 
56
55
  # --- Add Contents ---
57
56
  @overload
@@ -64,9 +63,12 @@ class Knowledge:
64
63
  paths: Optional[List[str]] = None,
65
64
  urls: Optional[List[str]] = None,
66
65
  metadata: Optional[Dict[str, str]] = None,
66
+ topics: Optional[List[str]] = None,
67
+ text_contents: Optional[List[str]] = None,
68
+ reader: Optional[Reader] = None,
67
69
  include: Optional[List[str]] = None,
68
70
  exclude: Optional[List[str]] = None,
69
- upsert: bool = False,
71
+ upsert: bool = True,
70
72
  skip_if_exists: bool = False,
71
73
  remote_content: Optional[RemoteContent] = None,
72
74
  ) -> None: ...
@@ -74,6 +76,8 @@ class Knowledge:
74
76
  async def add_contents_async(self, *args, **kwargs) -> None:
75
77
  if args and isinstance(args[0], list):
76
78
  arguments = args[0]
79
+ upsert = kwargs.get("upsert", True)
80
+ skip_if_exists = kwargs.get("skip_if_exists", False)
77
81
  for argument in arguments:
78
82
  await self.add_content_async(
79
83
  name=argument.get("name"),
@@ -82,11 +86,12 @@ class Knowledge:
82
86
  url=argument.get("url"),
83
87
  metadata=argument.get("metadata"),
84
88
  topics=argument.get("topics"),
89
+ text_content=argument.get("text_content"),
85
90
  reader=argument.get("reader"),
86
91
  include=argument.get("include"),
87
92
  exclude=argument.get("exclude"),
88
- upsert=argument.get("upsert", False),
89
- skip_if_exists=argument.get("skip_if_exists", False),
93
+ upsert=argument.get("upsert", upsert),
94
+ skip_if_exists=argument.get("skip_if_exists", skip_if_exists),
90
95
  remote_content=argument.get("remote_content", None),
91
96
  )
92
97
 
@@ -95,14 +100,15 @@ class Knowledge:
95
100
  metadata = kwargs.get("metadata", {})
96
101
  description = kwargs.get("description", [])
97
102
  topics = kwargs.get("topics", [])
103
+ reader = kwargs.get("reader", None)
98
104
  paths = kwargs.get("paths", [])
99
105
  urls = kwargs.get("urls", [])
106
+ text_contents = kwargs.get("text_contents", [])
100
107
  include = kwargs.get("include")
101
108
  exclude = kwargs.get("exclude")
102
- upsert = kwargs.get("upsert", False)
109
+ upsert = kwargs.get("upsert", True)
103
110
  skip_if_exists = kwargs.get("skip_if_exists", False)
104
111
  remote_content = kwargs.get("remote_content", None)
105
-
106
112
  for path in paths:
107
113
  await self.add_content_async(
108
114
  name=name,
@@ -113,6 +119,7 @@ class Knowledge:
113
119
  exclude=exclude,
114
120
  upsert=upsert,
115
121
  skip_if_exists=skip_if_exists,
122
+ reader=reader,
116
123
  )
117
124
  for url in urls:
118
125
  await self.add_content_async(
@@ -124,6 +131,21 @@ class Knowledge:
124
131
  exclude=exclude,
125
132
  upsert=upsert,
126
133
  skip_if_exists=skip_if_exists,
134
+ reader=reader,
135
+ )
136
+ for i, text_content in enumerate(text_contents):
137
+ content_name = f"{name}_{i}" if name else f"text_content_{i}"
138
+ log_debug(f"Adding text content: {content_name}")
139
+ await self.add_content_async(
140
+ name=content_name,
141
+ description=description,
142
+ text_content=text_content,
143
+ metadata=metadata,
144
+ include=include,
145
+ exclude=exclude,
146
+ upsert=upsert,
147
+ skip_if_exists=skip_if_exists,
148
+ reader=reader,
127
149
  )
128
150
  if topics:
129
151
  await self.add_content_async(
@@ -135,6 +157,7 @@ class Knowledge:
135
157
  exclude=exclude,
136
158
  upsert=upsert,
137
159
  skip_if_exists=skip_if_exists,
160
+ reader=reader,
138
161
  )
139
162
 
140
163
  if remote_content:
@@ -145,6 +168,7 @@ class Knowledge:
145
168
  remote_content=remote_content,
146
169
  upsert=upsert,
147
170
  skip_if_exists=skip_if_exists,
171
+ reader=reader,
148
172
  )
149
173
 
150
174
  else:
@@ -160,10 +184,14 @@ class Knowledge:
160
184
  paths: Optional[List[str]] = None,
161
185
  urls: Optional[List[str]] = None,
162
186
  metadata: Optional[Dict[str, str]] = None,
187
+ topics: Optional[List[str]] = None,
188
+ text_contents: Optional[List[str]] = None,
189
+ reader: Optional[Reader] = None,
163
190
  include: Optional[List[str]] = None,
164
191
  exclude: Optional[List[str]] = None,
165
- upsert: bool = False,
192
+ upsert: bool = True,
166
193
  skip_if_exists: bool = False,
194
+ remote_content: Optional[RemoteContent] = None,
167
195
  ) -> None: ...
168
196
 
169
197
  def add_contents(self, *args, **kwargs) -> None:
@@ -181,10 +209,14 @@ class Knowledge:
181
209
  paths: Optional list of file paths to load content from
182
210
  urls: Optional list of URLs to load content from
183
211
  metadata: Optional metadata dictionary to apply to all content
212
+ topics: Optional list of topics to add
213
+ text_contents: Optional list of text content strings to add
214
+ reader: Optional reader to use for processing content
184
215
  include: Optional list of file patterns to include
185
216
  exclude: Optional list of file patterns to exclude
186
217
  upsert: Whether to update existing content if it already exists
187
218
  skip_if_exists: Whether to skip adding content if it already exists
219
+ remote_content: Optional remote content (S3, GCS, etc.) to add
188
220
  """
189
221
  asyncio.run(self.add_contents_async(*args, **kwargs))
190
222
 
@@ -200,7 +232,7 @@ class Knowledge:
200
232
  metadata: Optional[Dict[str, str]] = None,
201
233
  include: Optional[List[str]] = None,
202
234
  exclude: Optional[List[str]] = None,
203
- upsert: bool = False,
235
+ upsert: bool = True,
204
236
  skip_if_exists: bool = False,
205
237
  reader: Optional[Reader] = None,
206
238
  auth: Optional[ContentAuth] = None,
@@ -228,11 +260,13 @@ class Knowledge:
228
260
  ) -> None:
229
261
  # Validation: At least one of the parameters must be provided
230
262
  if all(argument is None for argument in [path, url, text_content, topics, remote_content]):
231
- log_info("At least one of 'path', 'url', 'text_content', 'topics', or 'remote_content' must be provided.")
263
+ log_warning(
264
+ "At least one of 'path', 'url', 'text_content', 'topics', or 'remote_content' must be provided."
265
+ )
232
266
  return
233
267
 
234
268
  if not skip_if_exists:
235
- log_info("skip_if_exists is disabled, disabling upsert")
269
+ log_debug("skip_if_exists is disabled, disabling upsert")
236
270
  upsert = False
237
271
 
238
272
  content = None
@@ -241,7 +275,6 @@ class Knowledge:
241
275
  file_data = FileData(content=text_content, type="Text")
242
276
 
243
277
  content = Content(
244
- id=str(uuid4()),
245
278
  name=name,
246
279
  description=description,
247
280
  path=path,
@@ -253,6 +286,8 @@ class Knowledge:
253
286
  reader=reader,
254
287
  auth=auth,
255
288
  )
289
+ content.content_hash = self._build_content_hash(content)
290
+ content.id = generate_id(content.content_hash)
256
291
 
257
292
  await self._load_content(content, upsert, skip_if_exists, include, exclude)
258
293
 
@@ -266,7 +301,7 @@ class Knowledge:
266
301
  metadata: Optional[Dict[str, str]] = None,
267
302
  include: Optional[List[str]] = None,
268
303
  exclude: Optional[List[str]] = None,
269
- upsert: bool = False,
304
+ upsert: bool = True,
270
305
  skip_if_exists: bool = False,
271
306
  reader: Optional[Reader] = None,
272
307
  auth: Optional[ContentAuth] = None,
@@ -289,7 +324,7 @@ class Knowledge:
289
324
  include: Optional[List[str]] = None,
290
325
  exclude: Optional[List[str]] = None,
291
326
  upsert: bool = True,
292
- skip_if_exists: bool = True,
327
+ skip_if_exists: bool = False,
293
328
  auth: Optional[ContentAuth] = None,
294
329
  ) -> None:
295
330
  """
@@ -303,7 +338,7 @@ class Knowledge:
303
338
  text_content: Optional text content to add directly
304
339
  metadata: Optional metadata dictionary
305
340
  topics: Optional list of topics
306
- config: Optional cloud storage configuration
341
+ remote_content: Optional cloud storage configuration
307
342
  reader: Optional custom reader for processing the content
308
343
  include: Optional list of file patterns to include
309
344
  exclude: Optional list of file patterns to exclude
@@ -329,6 +364,26 @@ class Knowledge:
329
364
  )
330
365
  )
331
366
 
367
+ def _should_skip(self, content_hash: str, skip_if_exists: bool) -> bool:
368
+ """
369
+ Handle the skip_if_exists logic for content that already exists in the vector database.
370
+
371
+ Args:
372
+ content_hash: The content hash string to check for existence
373
+ skip_if_exists: Whether to skip if content already exists
374
+
375
+ Returns:
376
+ bool: True if should skip processing, False if should continue
377
+ """
378
+ from agno.vectordb import VectorDb
379
+
380
+ self.vector_db = cast(VectorDb, self.vector_db)
381
+ if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
382
+ log_debug(f"Content already exists: {content_hash}, skipping...")
383
+ return True
384
+
385
+ return False
386
+
332
387
  async def _load_from_path(
333
388
  self,
334
389
  content: Content,
@@ -337,25 +392,28 @@ class Knowledge:
337
392
  include: Optional[List[str]] = None,
338
393
  exclude: Optional[List[str]] = None,
339
394
  ):
395
+ from agno.vectordb import VectorDb
396
+
397
+ self.vector_db = cast(VectorDb, self.vector_db)
398
+
340
399
  log_info(f"Adding content from path, {content.id}, {content.name}, {content.path}, {content.description}")
341
400
  path = Path(content.path) # type: ignore
342
401
 
343
402
  if path.is_file():
344
403
  if self._should_include_file(str(path), include, exclude):
345
- log_info(f"Adding file {path} due to include/exclude filters")
404
+ log_debug(f"Adding file {path} due to include/exclude filters")
405
+
406
+ await self._add_to_contents_db(content)
407
+ if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
408
+ content.status = ContentStatus.COMPLETED
409
+ await self._aupdate_content(content)
410
+ return
346
411
 
347
412
  # Handle LightRAG special case - read file and upload directly
348
413
  if self.vector_db.__class__.__name__ == "LightRag":
349
414
  await self._process_lightrag_content(content, KnowledgeContentOrigin.PATH)
350
415
  return
351
416
 
352
- content.content_hash = self._build_content_hash(content)
353
- if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
354
- log_info(f"Content {content.content_hash} already exists, skipping")
355
- return
356
-
357
- self._add_to_contents_db(content)
358
-
359
417
  if content.reader:
360
418
  # TODO: We will refactor this to eventually pass authorization to all readers
361
419
  import inspect
@@ -370,7 +428,7 @@ class Knowledge:
370
428
 
371
429
  else:
372
430
  reader = ReaderFactory.get_reader_for_extension(path.suffix)
373
- log_info(f"Using Reader: {reader.__class__.__name__}")
431
+ log_debug(f"Using Reader: {reader.__class__.__name__}")
374
432
  if reader:
375
433
  # TODO: We will refactor this to eventually pass authorization to all readers
376
434
  import inspect
@@ -407,15 +465,16 @@ class Knowledge:
407
465
  log_debug(f"Skipping file {file_path} due to include/exclude filters")
408
466
  continue
409
467
 
410
- id = str(uuid4())
411
468
  file_content = Content(
412
- id=id,
413
469
  name=content.name,
414
470
  path=str(file_path),
415
471
  metadata=content.metadata,
416
472
  description=content.description,
417
473
  reader=content.reader,
418
474
  )
475
+ file_content.content_hash = self._build_content_hash(file_content)
476
+ file_content.id = generate_id(file_content.content_hash)
477
+
419
478
  await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
420
479
  else:
421
480
  log_warning(f"Invalid path: {path}")
@@ -433,22 +492,26 @@ class Knowledge:
433
492
  3. Read the content
434
493
  4. Prepare and insert the content in the vector database
435
494
  """
495
+ from agno.vectordb import VectorDb
496
+
497
+ self.vector_db = cast(VectorDb, self.vector_db)
498
+
436
499
  log_info(f"Adding content from URL {content.url}")
437
500
  content.file_type = "url"
438
501
 
439
502
  if not content.url:
440
503
  raise ValueError("No url provided")
441
504
 
442
- if self.vector_db.__class__.__name__ == "LightRag":
443
- await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
505
+ # 1. Add content to contents database
506
+ await self._add_to_contents_db(content)
507
+ if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
508
+ content.status = ContentStatus.COMPLETED
509
+ await self._aupdate_content(content)
444
510
  return
445
511
 
446
- # 1. Set content hash
447
- content.content_hash = self._build_content_hash(content)
448
- if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
449
- log_info(f"Content {content.content_hash} already exists, skipping")
512
+ if self.vector_db.__class__.__name__ == "LightRag":
513
+ await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
450
514
  return
451
- self._add_to_contents_db(content)
452
515
 
453
516
  # 2. Validate URL
454
517
  try:
@@ -458,27 +521,30 @@ class Knowledge:
458
521
  if not all([parsed_url.scheme, parsed_url.netloc]):
459
522
  content.status = ContentStatus.FAILED
460
523
  content.status_message = f"Invalid URL format: {content.url}"
461
- self._update_content(content)
524
+ await self._aupdate_content(content)
462
525
  log_warning(f"Invalid URL format: {content.url}")
463
526
  except Exception as e:
464
527
  content.status = ContentStatus.FAILED
465
528
  content.status_message = f"Invalid URL: {content.url} - {str(e)}"
466
- self._update_content(content)
529
+ await self._aupdate_content(content)
467
530
  log_warning(f"Invalid URL: {content.url} - {str(e)}")
468
531
 
469
- # 3. Fetch and load content
470
- async with AsyncClient() as client:
471
- response = await async_fetch_with_retry(content.url, client=client)
472
- bytes_content = BytesIO(response.content)
532
+ # 3. Fetch and load content if file has an extension
533
+ url_path = Path(parsed_url.path)
534
+ file_extension = url_path.suffix.lower()
535
+
536
+ bytes_content = None
537
+ if file_extension:
538
+ async with AsyncClient() as client:
539
+ response = await async_fetch_with_retry(content.url, client=client)
540
+ bytes_content = BytesIO(response.content)
473
541
 
474
542
  # 4. Select reader
475
543
  # If a reader was provided by the user, use it
476
544
  reader = content.reader
477
- name = content.name
545
+ name = content.name if content.name else content.url
478
546
  # Else select based on file extension
479
547
  if reader is None:
480
- url_path = Path(parsed_url.path)
481
- file_extension = url_path.suffix.lower()
482
548
  if file_extension == ".csv":
483
549
  name = basename(parsed_url.path) or "data.csv"
484
550
  reader = self.csv_reader
@@ -486,6 +552,8 @@ class Knowledge:
486
552
  reader = self.pdf_reader
487
553
  elif file_extension == ".docx":
488
554
  reader = self.docx_reader
555
+ elif file_extension == ".pptx":
556
+ reader = self.pptx_reader
489
557
  elif file_extension == ".json":
490
558
  reader = self.json_reader
491
559
  elif file_extension == ".markdown":
@@ -504,20 +572,26 @@ class Knowledge:
504
572
  if reader.__class__.__name__ == "YouTubeReader":
505
573
  read_documents = reader.read(content.url, name=name)
506
574
  elif "password" in read_signature.parameters and content.auth and content.auth.password:
507
- read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
575
+ if bytes_content:
576
+ read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
577
+ else:
578
+ read_documents = reader.read(content.url, name=name, password=content.auth.password)
508
579
  else:
509
- read_documents = reader.read(bytes_content, name=name)
580
+ if bytes_content:
581
+ read_documents = reader.read(bytes_content, name=name)
582
+ else:
583
+ read_documents = reader.read(content.url, name=name)
584
+
510
585
  except Exception as e:
511
586
  log_error(f"Error reading URL: {content.url} - {str(e)}")
512
587
  content.status = ContentStatus.FAILED
513
588
  content.status_message = f"Error reading URL: {content.url} - {str(e)}"
514
- self._update_content(content)
589
+ await self._aupdate_content(content)
515
590
  return
516
591
 
517
592
  # 6. Chunk documents if needed
518
593
  if reader and not reader.chunk:
519
594
  read_documents = await reader.chunk_documents_async(read_documents)
520
-
521
595
  # 7. Prepare and insert the content in the vector database
522
596
  file_size = 0
523
597
  if read_documents:
@@ -531,8 +605,12 @@ class Knowledge:
531
605
  self,
532
606
  content: Content,
533
607
  upsert: bool = True,
534
- skip_if_exists: bool = True,
608
+ skip_if_exists: bool = False,
535
609
  ):
610
+ from agno.vectordb import VectorDb
611
+
612
+ self.vector_db = cast(VectorDb, self.vector_db)
613
+
536
614
  if content.name:
537
615
  name = content.name
538
616
  elif content.file_data and content.file_data.content:
@@ -554,28 +632,24 @@ class Knowledge:
554
632
 
555
633
  log_info(f"Adding content from {content.name}")
556
634
 
557
- if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
558
- await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
635
+ await self._add_to_contents_db(content)
636
+ if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
637
+ content.status = ContentStatus.COMPLETED
638
+ await self._aupdate_content(content)
559
639
  return
560
640
 
561
- content.content_hash = self._build_content_hash(content)
562
- if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
563
- log_info(f"Content {content.content_hash} already exists, skipping")
564
-
641
+ if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
642
+ await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
565
643
  return
566
- self._add_to_contents_db(content)
567
644
 
568
645
  read_documents = []
569
646
 
570
647
  if isinstance(content.file_data, str):
571
- try:
572
- content_bytes = content.file_data.encode("utf-8")
573
- except UnicodeEncodeError:
574
- content_bytes = content.file_data.encode("latin-1")
648
+ content_bytes = content.file_data.encode("utf-8", errors="replace")
575
649
  content_io = io.BytesIO(content_bytes)
576
650
 
577
651
  if content.reader:
578
- log_info(f"Using reader: {content.reader.__class__.__name__} to read content")
652
+ log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
579
653
  read_documents = content.reader.read(content_io, name=name)
580
654
  else:
581
655
  text_reader = self.text_reader
@@ -584,7 +658,7 @@ class Knowledge:
584
658
  else:
585
659
  content.status = ContentStatus.FAILED
586
660
  content.status_message = "Text reader not available"
587
- self._update_content(content)
661
+ await self._aupdate_content(content)
588
662
  return
589
663
 
590
664
  elif isinstance(content.file_data, FileData):
@@ -592,27 +666,19 @@ class Knowledge:
592
666
  if isinstance(content.file_data.content, bytes):
593
667
  content_io = io.BytesIO(content.file_data.content)
594
668
  elif isinstance(content.file_data.content, str):
595
- if self._is_text_mime_type(content.file_data.type):
596
- try:
597
- content_bytes = content.file_data.content.encode("utf-8")
598
- except UnicodeEncodeError:
599
- log_debug(f"UTF-8 encoding failed for {content.file_data.type}, using latin-1")
600
- content_bytes = content.file_data.content.encode("latin-1")
601
- else:
602
- content_bytes = content.file_data.content.encode("latin-1")
669
+ content_bytes = content.file_data.content.encode("utf-8", errors="replace")
603
670
  content_io = io.BytesIO(content_bytes)
604
671
  else:
605
672
  content_io = content.file_data.content # type: ignore
606
673
 
607
674
  # Respect an explicitly provided reader; otherwise select based on file type
608
675
  if content.reader:
609
- log_info(f"Using reader: {content.reader.__class__.__name__} to read content")
676
+ log_debug(f"Using reader: {content.reader.__class__.__name__} to read content")
610
677
  reader = content.reader
611
678
  else:
612
679
  reader = self._select_reader(content.file_data.type)
613
680
  name = content.name if content.name else f"content_{content.file_data.type}"
614
681
  read_documents = reader.read(content_io, name=name)
615
-
616
682
  for read_document in read_documents:
617
683
  if content.metadata:
618
684
  read_document.meta_data.update(content.metadata)
@@ -621,12 +687,13 @@ class Knowledge:
621
687
  if len(read_documents) == 0:
622
688
  content.status = ContentStatus.FAILED
623
689
  content.status_message = "Content could not be read"
624
- self._update_content(content)
690
+ await self._aupdate_content(content)
691
+ return
625
692
 
626
693
  else:
627
694
  content.status = ContentStatus.FAILED
628
695
  content.status_message = "No content provided"
629
- self._update_content(content)
696
+ await self._aupdate_content(content)
630
697
  return
631
698
 
632
699
  await self._handle_vector_db_insert(content, read_documents, upsert)
@@ -637,6 +704,9 @@ class Knowledge:
637
704
  upsert: bool,
638
705
  skip_if_exists: bool,
639
706
  ):
707
+ from agno.vectordb import VectorDb
708
+
709
+ self.vector_db = cast(VectorDb, self.vector_db)
640
710
  log_info(f"Adding content from topics: {content.topics}")
641
711
 
642
712
  if content.topics is None:
@@ -644,9 +714,7 @@ class Knowledge:
644
714
  return
645
715
 
646
716
  for topic in content.topics:
647
- id = str(uuid4())
648
717
  content = Content(
649
- id=id,
650
718
  name=topic,
651
719
  metadata=content.metadata,
652
720
  reader=content.reader,
@@ -656,30 +724,41 @@ class Knowledge:
656
724
  ),
657
725
  topics=[topic],
658
726
  )
727
+ content.content_hash = self._build_content_hash(content)
728
+ content.id = generate_id(content.content_hash)
729
+
730
+ await self._add_to_contents_db(content)
731
+ if self._should_skip(content.content_hash, skip_if_exists):
732
+ content.status = ContentStatus.COMPLETED
733
+ await self._aupdate_content(content)
734
+ return
659
735
 
660
736
  if self.vector_db.__class__.__name__ == "LightRag":
661
737
  await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
662
738
  return
663
739
 
664
- content.content_hash = self._build_content_hash(content)
665
740
  if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
666
741
  log_info(f"Content {content.content_hash} already exists, skipping")
667
742
  continue
668
743
 
669
- self._add_to_contents_db(content)
744
+ await self._add_to_contents_db(content)
670
745
  if content.reader is None:
671
746
  log_error(f"No reader available for topic: {topic}")
747
+ content.status = ContentStatus.FAILED
748
+ content.status_message = "No reader available for topic"
749
+ await self._aupdate_content(content)
672
750
  continue
751
+
673
752
  read_documents = content.reader.read(topic)
674
753
  if len(read_documents) > 0:
675
754
  for read_document in read_documents:
676
- read_document.content_id = id
755
+ read_document.content_id = content.id
677
756
  if read_document.content:
678
757
  read_document.size = len(read_document.content.encode("utf-8"))
679
758
  else:
680
759
  content.status = ContentStatus.FAILED
681
760
  content.status_message = "No content found for topic"
682
- self._update_content(content)
761
+ await self._aupdate_content(content)
683
762
 
684
763
  await self._handle_vector_db_insert(content, read_documents, upsert)
685
764
 
@@ -735,11 +814,9 @@ class Knowledge:
735
814
 
736
815
  for s3_object in objects_to_read:
737
816
  # 2. Setup Content object
738
- id = str(uuid4())
739
817
  content_name = content.name or ""
740
818
  content_name += "_" + (s3_object.name or "")
741
819
  content_entry = Content(
742
- id=id,
743
820
  name=content_name,
744
821
  description=content.description,
745
822
  status=ContentStatus.PROCESSING,
@@ -748,11 +825,13 @@ class Knowledge:
748
825
  )
749
826
 
750
827
  # 3. Hash content and add it to the contents database
751
- content_hash = self._build_content_hash(content_entry)
752
- if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
753
- log_info(f"Content {content_hash} already exists, skipping")
754
- continue
755
- self._add_to_contents_db(content_entry)
828
+ content_entry.content_hash = self._build_content_hash(content_entry)
829
+ content_entry.id = generate_id(content_entry.content_hash)
830
+ await self._add_to_contents_db(content_entry)
831
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
832
+ content_entry.status = ContentStatus.COMPLETED
833
+ await self._aupdate_content(content_entry)
834
+ return
756
835
 
757
836
  # 4. Select reader
758
837
  reader = content.reader
@@ -763,6 +842,8 @@ class Knowledge:
763
842
  reader = self.csv_reader
764
843
  elif s3_object.uri.endswith(".docx"):
765
844
  reader = self.docx_reader
845
+ elif s3_object.uri.endswith(".pptx"):
846
+ reader = self.pptx_reader
766
847
  elif s3_object.uri.endswith(".json"):
767
848
  reader = self.json_reader
768
849
  elif s3_object.uri.endswith(".markdown"):
@@ -818,10 +899,8 @@ class Knowledge:
818
899
 
819
900
  for gcs_object in objects_to_read:
820
901
  # 2. Setup Content object
821
- id = str(uuid4())
822
902
  name = (content.name or "content") + "_" + gcs_object.name
823
903
  content_entry = Content(
824
- id=id,
825
904
  name=name,
826
905
  description=content.description,
827
906
  status=ContentStatus.PROCESSING,
@@ -830,15 +909,15 @@ class Knowledge:
830
909
  )
831
910
 
832
911
  # 3. Hash content and add it to the contents database
833
- content_hash = self._build_content_hash(content_entry)
834
- if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
835
- log_info(f"Content {content_hash} already exists, skipping")
836
- continue
837
-
838
- # 4. Add it to the contents database
839
- self._add_to_contents_db(content_entry)
912
+ content_entry.content_hash = self._build_content_hash(content_entry)
913
+ content_entry.id = generate_id(content_entry.content_hash)
914
+ await self._add_to_contents_db(content_entry)
915
+ if self._should_skip(content_entry.content_hash, skip_if_exists):
916
+ content_entry.status = ContentStatus.COMPLETED
917
+ await self._aupdate_content(content_entry)
918
+ return
840
919
 
841
- # 5. Select reader
920
+ # 4. Select reader
842
921
  reader = content.reader
843
922
  if reader is None:
844
923
  if gcs_object.name.endswith(".pdf"):
@@ -847,6 +926,8 @@ class Knowledge:
847
926
  reader = self.csv_reader
848
927
  elif gcs_object.name.endswith(".docx"):
849
928
  reader = self.docx_reader
929
+ elif gcs_object.name.endswith(".pptx"):
930
+ reader = self.pptx_reader
850
931
  elif gcs_object.name.endswith(".json"):
851
932
  reader = self.json_reader
852
933
  elif gcs_object.name.endswith(".markdown"):
@@ -866,37 +947,43 @@ class Knowledge:
866
947
  read_document.content_id = content.id
867
948
  await self._handle_vector_db_insert(content_entry, read_documents, upsert)
868
949
 
869
- async def _handle_vector_db_insert(self, content, read_documents, upsert):
950
+ async def _handle_vector_db_insert(self, content: Content, read_documents, upsert):
951
+ from agno.vectordb import VectorDb
952
+
953
+ self.vector_db = cast(VectorDb, self.vector_db)
954
+
870
955
  if not self.vector_db:
871
956
  log_error("No vector database configured")
872
957
  content.status = ContentStatus.FAILED
873
958
  content.status_message = "No vector database configured"
874
- self._update_content(content)
959
+ await self._aupdate_content(content)
875
960
  return
876
961
 
877
962
  if self.vector_db.upsert_available() and upsert:
878
963
  try:
879
- await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata)
964
+ await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata) # type: ignore[arg-type]
880
965
  except Exception as e:
881
966
  log_error(f"Error upserting document: {e}")
882
967
  content.status = ContentStatus.FAILED
883
968
  content.status_message = "Could not upsert embedding"
884
- self._update_content(content)
969
+ await self._aupdate_content(content)
885
970
  return
886
971
  else:
887
972
  try:
888
973
  await self.vector_db.async_insert(
889
- content.content_hash, documents=read_documents, filters=content.metadata
974
+ content.content_hash, # type: ignore[arg-type]
975
+ documents=read_documents,
976
+ filters=content.metadata, # type: ignore[arg-type]
890
977
  )
891
978
  except Exception as e:
892
979
  log_error(f"Error inserting document: {e}")
893
980
  content.status = ContentStatus.FAILED
894
981
  content.status_message = "Could not insert embedding"
895
- self._update_content(content)
982
+ await self._aupdate_content(content)
896
983
  return
897
984
 
898
985
  content.status = ContentStatus.COMPLETED
899
- self._update_content(content)
986
+ await self._aupdate_content(content)
900
987
 
901
988
  async def _load_content(
902
989
  self,
@@ -906,11 +993,6 @@ class Knowledge:
906
993
  include: Optional[List[str]] = None,
907
994
  exclude: Optional[List[str]] = None,
908
995
  ) -> None:
909
- log_info(f"Loading content: {content.id}")
910
-
911
- if content.metadata:
912
- self.add_filters(content.metadata)
913
-
914
996
  if content.path:
915
997
  await self._load_from_path(content, upsert, skip_if_exists, include, exclude)
916
998
 
@@ -954,7 +1036,49 @@ class Knowledge:
954
1036
  )
955
1037
  return hashlib.sha256(fallback.encode()).hexdigest()
956
1038
 
957
- def _add_to_contents_db(self, content: Content):
1039
+ def _ensure_string_field(self, value: Any, field_name: str, default: str = "") -> str:
1040
+ """
1041
+ Safely ensure a field is a string, handling various edge cases.
1042
+
1043
+ Args:
1044
+ value: The value to convert to string
1045
+ field_name: Name of the field for logging purposes
1046
+ default: Default string value if conversion fails
1047
+
1048
+ Returns:
1049
+ str: A safe string value
1050
+ """
1051
+ # Handle None/falsy values
1052
+ if value is None or value == "":
1053
+ return default
1054
+
1055
+ # Handle unexpected list types (the root cause of our Pydantic warning)
1056
+ if isinstance(value, list):
1057
+ if len(value) == 0:
1058
+ log_debug(f"Empty list found for {field_name}, using default: '{default}'")
1059
+ return default
1060
+ elif len(value) == 1:
1061
+ # Single item list, extract the item
1062
+ log_debug(f"Single-item list found for {field_name}, extracting: '{value[0]}'")
1063
+ return str(value[0]) if value[0] is not None else default
1064
+ else:
1065
+ # Multiple items, join them
1066
+ log_debug(f"Multi-item list found for {field_name}, joining: {value}")
1067
+ return " | ".join(str(item) for item in value if item is not None)
1068
+
1069
+ # Handle other unexpected types
1070
+ if not isinstance(value, str):
1071
+ log_debug(f"Non-string type {type(value)} found for {field_name}, converting: '{value}'")
1072
+ try:
1073
+ return str(value)
1074
+ except Exception as e:
1075
+ log_warning(f"Failed to convert {field_name} to string: {e}, using default")
1076
+ return default
1077
+
1078
+ # Already a string, return as-is
1079
+ return value
1080
+
1081
+ async def _add_to_contents_db(self, content: Content):
958
1082
  if self.contents_db:
959
1083
  created_at = content.created_at if content.created_at else int(time.time())
960
1084
  updated_at = content.updated_at if content.updated_at else int(time.time())
@@ -966,10 +1090,18 @@ class Knowledge:
966
1090
  if content.file_data and content.file_data.type
967
1091
  else None
968
1092
  )
1093
+ # Safely handle string fields with proper type checking
1094
+ safe_name = self._ensure_string_field(content.name, "content.name", default="")
1095
+ safe_description = self._ensure_string_field(content.description, "content.description", default="")
1096
+ safe_linked_to = self._ensure_string_field(self.name, "knowledge.name", default="")
1097
+ safe_status_message = self._ensure_string_field(
1098
+ content.status_message, "content.status_message", default=""
1099
+ )
1100
+
969
1101
  content_row = KnowledgeRow(
970
1102
  id=content.id,
971
- name=content.name if content.name else "",
972
- description=content.description if content.description else "",
1103
+ name=safe_name,
1104
+ description=safe_description,
973
1105
  metadata=content.metadata,
974
1106
  type=file_type,
975
1107
  size=content.size
@@ -977,17 +1109,28 @@ class Knowledge:
977
1109
  else len(content.file_data.content)
978
1110
  if content.file_data and content.file_data.content
979
1111
  else None,
980
- linked_to=self.name,
1112
+ linked_to=safe_linked_to,
981
1113
  access_count=0,
982
1114
  status=content.status if content.status else ContentStatus.PROCESSING,
983
- status_message="",
1115
+ status_message=safe_status_message,
984
1116
  created_at=created_at,
985
1117
  updated_at=updated_at,
986
1118
  )
987
- self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1119
+ if isinstance(self.contents_db, AsyncBaseDb):
1120
+ await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1121
+ else:
1122
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
988
1123
 
989
1124
  def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
1125
+ from agno.vectordb import VectorDb
1126
+
1127
+ self.vector_db = cast(VectorDb, self.vector_db)
990
1128
  if self.contents_db:
1129
+ if isinstance(self.contents_db, AsyncBaseDb):
1130
+ raise ValueError(
1131
+ "update_content() is not supported with an async DB. Please use aupdate_content() instead."
1132
+ )
1133
+
991
1134
  if not content.id:
992
1135
  log_warning("Content id is required to update Knowledge content")
993
1136
  return None
@@ -998,6 +1141,55 @@ class Knowledge:
998
1141
  log_warning(f"Content row not found for id: {content.id}, cannot update status")
999
1142
  return None
1000
1143
 
1144
+ # Apply safe string handling for updates as well
1145
+ if content.name is not None:
1146
+ content_row.name = self._ensure_string_field(content.name, "content.name", default="")
1147
+ if content.description is not None:
1148
+ content_row.description = self._ensure_string_field(
1149
+ content.description, "content.description", default=""
1150
+ )
1151
+ if content.metadata is not None:
1152
+ content_row.metadata = content.metadata
1153
+ if content.status is not None:
1154
+ content_row.status = content.status
1155
+ if content.status_message is not None:
1156
+ content_row.status_message = self._ensure_string_field(
1157
+ content.status_message, "content.status_message", default=""
1158
+ )
1159
+ if content.external_id is not None:
1160
+ content_row.external_id = self._ensure_string_field(
1161
+ content.external_id, "content.external_id", default=""
1162
+ )
1163
+ content_row.updated_at = int(time.time())
1164
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1165
+
1166
+ if self.vector_db and content.metadata:
1167
+ self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
1168
+
1169
+ return content_row.to_dict()
1170
+
1171
+ else:
1172
+ if self.name:
1173
+ log_warning(f"Contents DB not found for knowledge base: {self.name}")
1174
+ else:
1175
+ log_warning("Contents DB not found for knowledge base")
1176
+ return None
1177
+
1178
+ async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
1179
+ if self.contents_db:
1180
+ if not content.id:
1181
+ log_warning("Content id is required to update Knowledge content")
1182
+ return None
1183
+
1184
+ # TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
1185
+ if isinstance(self.contents_db, AsyncBaseDb):
1186
+ content_row = await self.contents_db.get_knowledge_content(content.id)
1187
+ else:
1188
+ content_row = self.contents_db.get_knowledge_content(content.id)
1189
+ if content_row is None:
1190
+ log_warning(f"Content row not found for id: {content.id}, cannot update status")
1191
+ return None
1192
+
1001
1193
  if content.name is not None:
1002
1194
  content_row.name = content.name
1003
1195
  if content.description is not None:
@@ -1012,22 +1204,29 @@ class Knowledge:
1012
1204
  content_row.external_id = content.external_id
1013
1205
 
1014
1206
  content_row.updated_at = int(time.time())
1015
- self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1207
+ if isinstance(self.contents_db, AsyncBaseDb):
1208
+ await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1209
+ else:
1210
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1016
1211
 
1017
1212
  if self.vector_db and content.metadata:
1018
1213
  self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
1019
1214
 
1020
- if content.metadata:
1021
- self.add_filters(content.metadata)
1022
-
1023
1215
  return content_row.to_dict()
1024
1216
 
1025
1217
  else:
1026
- log_warning(f"Contents DB not found for knowledge base: {self.name}")
1218
+ if self.name:
1219
+ log_warning(f"Contents DB not found for knowledge base: {self.name}")
1220
+ else:
1221
+ log_warning("Contents DB not found for knowledge base")
1027
1222
  return None
1028
1223
 
1029
1224
  async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
1030
- self._add_to_contents_db(content)
1225
+ from agno.vectordb import VectorDb
1226
+
1227
+ self.vector_db = cast(VectorDb, self.vector_db)
1228
+
1229
+ await self._add_to_contents_db(content)
1031
1230
  if content_type == KnowledgeContentOrigin.PATH:
1032
1231
  if content.file_data is None:
1033
1232
  log_warning("No file data provided")
@@ -1058,18 +1257,18 @@ class Knowledge:
1058
1257
  else:
1059
1258
  log_error("Vector database does not support file insertion")
1060
1259
  content.status = ContentStatus.FAILED
1061
- self._update_content(content)
1260
+ await self._aupdate_content(content)
1062
1261
  return
1063
1262
  content.external_id = result
1064
1263
  content.status = ContentStatus.COMPLETED
1065
- self._update_content(content)
1264
+ await self._aupdate_content(content)
1066
1265
  return
1067
1266
 
1068
1267
  except Exception as e:
1069
1268
  log_error(f"Error uploading file to LightRAG: {e}")
1070
1269
  content.status = ContentStatus.FAILED
1071
1270
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
1072
- self._update_content(content)
1271
+ await self._aupdate_content(content)
1073
1272
  return
1074
1273
 
1075
1274
  elif content_type == KnowledgeContentOrigin.URL:
@@ -1079,7 +1278,7 @@ class Knowledge:
1079
1278
  if reader is None:
1080
1279
  log_error("No URL reader available")
1081
1280
  content.status = ContentStatus.FAILED
1082
- self._update_content(content)
1281
+ await self._aupdate_content(content)
1083
1282
  return
1084
1283
 
1085
1284
  reader.chunk = False
@@ -1091,7 +1290,7 @@ class Knowledge:
1091
1290
  if not read_documents:
1092
1291
  log_error("No documents read from URL")
1093
1292
  content.status = ContentStatus.FAILED
1094
- self._update_content(content)
1293
+ await self._aupdate_content(content)
1095
1294
  return
1096
1295
 
1097
1296
  if self.vector_db and hasattr(self.vector_db, "insert_text"):
@@ -1102,19 +1301,19 @@ class Knowledge:
1102
1301
  else:
1103
1302
  log_error("Vector database does not support text insertion")
1104
1303
  content.status = ContentStatus.FAILED
1105
- self._update_content(content)
1304
+ await self._aupdate_content(content)
1106
1305
  return
1107
1306
 
1108
1307
  content.external_id = result
1109
1308
  content.status = ContentStatus.COMPLETED
1110
- self._update_content(content)
1309
+ await self._aupdate_content(content)
1111
1310
  return
1112
1311
 
1113
1312
  except Exception as e:
1114
1313
  log_error(f"Error uploading file to LightRAG: {e}")
1115
1314
  content.status = ContentStatus.FAILED
1116
1315
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
1117
- self._update_content(content)
1316
+ await self._aupdate_content(content)
1118
1317
  return
1119
1318
 
1120
1319
  elif content_type == KnowledgeContentOrigin.CONTENT:
@@ -1135,11 +1334,11 @@ class Knowledge:
1135
1334
  else:
1136
1335
  log_error("Vector database does not support file insertion")
1137
1336
  content.status = ContentStatus.FAILED
1138
- self._update_content(content)
1337
+ await self._aupdate_content(content)
1139
1338
  return
1140
1339
  content.external_id = result
1141
1340
  content.status = ContentStatus.COMPLETED
1142
- self._update_content(content)
1341
+ await self._aupdate_content(content)
1143
1342
  else:
1144
1343
  log_warning(f"No file data available for LightRAG upload: {content.name}")
1145
1344
  return
@@ -1150,20 +1349,17 @@ class Knowledge:
1150
1349
  if content.reader is None:
1151
1350
  log_error("No reader available for topic content")
1152
1351
  content.status = ContentStatus.FAILED
1153
- self._update_content(content)
1352
+ await self._aupdate_content(content)
1154
1353
  return
1155
1354
 
1156
1355
  if not content.topics:
1157
1356
  log_error("No topics available for content")
1158
1357
  content.status = ContentStatus.FAILED
1159
- self._update_content(content)
1358
+ await self._aupdate_content(content)
1160
1359
  return
1161
1360
 
1162
1361
  read_documents = content.reader.read(content.topics)
1163
1362
  if len(read_documents) > 0:
1164
- print("READ DOCUMENTS: ", len(read_documents))
1165
- print("READ DOCUMENTS: ", read_documents[0])
1166
-
1167
1363
  if self.vector_db and hasattr(self.vector_db, "insert_text"):
1168
1364
  result = await self.vector_db.insert_text(
1169
1365
  file_source=content.topics[0],
@@ -1172,21 +1368,35 @@ class Knowledge:
1172
1368
  else:
1173
1369
  log_error("Vector database does not support text insertion")
1174
1370
  content.status = ContentStatus.FAILED
1175
- self._update_content(content)
1371
+ await self._aupdate_content(content)
1176
1372
  return
1177
1373
  content.external_id = result
1178
1374
  content.status = ContentStatus.COMPLETED
1179
- self._update_content(content)
1375
+ await self._aupdate_content(content)
1180
1376
  return
1181
1377
  else:
1182
1378
  log_warning(f"No documents found for LightRAG upload: {content.name}")
1183
1379
  return
1184
1380
 
1185
1381
  def search(
1186
- self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
1382
+ self,
1383
+ query: str,
1384
+ max_results: Optional[int] = None,
1385
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1386
+ search_type: Optional[str] = None,
1187
1387
  ) -> List[Document]:
1188
1388
  """Returns relevant documents matching a query"""
1389
+ from agno.vectordb import VectorDb
1390
+ from agno.vectordb.search import SearchType
1391
+
1392
+ self.vector_db = cast(VectorDb, self.vector_db)
1189
1393
 
1394
+ if (
1395
+ hasattr(self.vector_db, "search_type")
1396
+ and isinstance(self.vector_db.search_type, SearchType)
1397
+ and search_type
1398
+ ):
1399
+ self.vector_db.search_type = SearchType(search_type)
1190
1400
  try:
1191
1401
  if self.vector_db is None:
1192
1402
  log_warning("No vector db provided")
@@ -1200,10 +1410,23 @@ class Knowledge:
1200
1410
  return []
1201
1411
 
1202
1412
  async def async_search(
1203
- self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
1413
+ self,
1414
+ query: str,
1415
+ max_results: Optional[int] = None,
1416
+ filters: Optional[Union[Dict[str, Any], List[FilterExpr]]] = None,
1417
+ search_type: Optional[str] = None,
1204
1418
  ) -> List[Document]:
1205
1419
  """Returns relevant documents matching a query"""
1206
-
1420
+ from agno.vectordb import VectorDb
1421
+ from agno.vectordb.search import SearchType
1422
+
1423
+ self.vector_db = cast(VectorDb, self.vector_db)
1424
+ if (
1425
+ hasattr(self.vector_db, "search_type")
1426
+ and isinstance(self.vector_db.search_type, SearchType)
1427
+ and search_type
1428
+ ):
1429
+ self.vector_db.search_type = SearchType(search_type)
1207
1430
  try:
1208
1431
  if self.vector_db is None:
1209
1432
  log_warning("No vector db provided")
@@ -1220,66 +1443,114 @@ class Knowledge:
1220
1443
  log_error(f"Error searching for documents: {e}")
1221
1444
  return []
1222
1445
 
1223
- def validate_filters(self, filters: Optional[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[str]]:
1224
- if self.valid_metadata_filters is None:
1225
- self.valid_metadata_filters = set()
1226
- self.valid_metadata_filters.update(self._get_filters_from_db)
1446
+ def get_valid_filters(self) -> Set[str]:
1447
+ if self.contents_db is None:
1448
+ log_warning("No contents db provided. This is required for filtering.")
1449
+ return set()
1450
+ contents, _ = self.get_content()
1451
+ valid_filters: Set[str] = set()
1452
+ for content in contents:
1453
+ if content.metadata:
1454
+ valid_filters.update(content.metadata.keys())
1455
+
1456
+ return valid_filters
1457
+
1458
+ async def async_get_valid_filters(self) -> Set[str]:
1459
+ if self.contents_db is None:
1460
+ log_warning("No contents db provided. This is required for filtering.")
1461
+ return set()
1462
+ contents, _ = await self.aget_content()
1463
+ valid_filters: Set[str] = set()
1464
+ for content in contents:
1465
+ if content.metadata:
1466
+ valid_filters.update(content.metadata.keys())
1467
+
1468
+ return valid_filters
1227
1469
 
1470
+ def _validate_filters(
1471
+ self, filters: Union[Dict[str, Any], List[FilterExpr]], valid_metadata_filters: Set[str]
1472
+ ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
1228
1473
  if not filters:
1229
1474
  return {}, []
1230
1475
 
1231
- valid_filters: Dict[str, Any] = {}
1476
+ valid_filters: Union[Dict[str, Any], List[FilterExpr]] = {}
1232
1477
  invalid_keys = []
1233
1478
 
1234
- # If no metadata filters tracked yet, all keys are considered invalid
1235
- if self.valid_metadata_filters is None:
1236
- invalid_keys = list(filters.keys())
1237
- log_debug(f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}")
1238
- return {}, invalid_keys
1239
-
1240
- for key, value in filters.items():
1241
- # Handle both normal keys and prefixed keys like meta_data.key
1242
- base_key = key.split(".")[-1] if "." in key else key
1243
- if base_key in self.valid_metadata_filters or key in self.valid_metadata_filters:
1244
- valid_filters[key] = value
1245
- else:
1246
- invalid_keys.append(key)
1247
- log_debug(f"Invalid filter key: {key} - not present in knowledge base")
1479
+ if isinstance(filters, dict):
1480
+ # If no metadata filters tracked yet, all keys are considered invalid
1481
+ if valid_metadata_filters is None or not valid_metadata_filters:
1482
+ invalid_keys = list(filters.keys())
1483
+ log_warning(
1484
+ f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}"
1485
+ )
1486
+ return {}, invalid_keys
1487
+
1488
+ for key, value in filters.items():
1489
+ # Handle both normal keys and prefixed keys like meta_data.key
1490
+ base_key = key.split(".")[-1] if "." in key else key
1491
+ if base_key in valid_metadata_filters or key in valid_metadata_filters:
1492
+ valid_filters[key] = value # type: ignore
1493
+ else:
1494
+ invalid_keys.append(key)
1495
+ log_warning(f"Invalid filter key: {key} - not present in knowledge base")
1496
+
1497
+ elif isinstance(filters, List):
1498
+ # Validate that list contains FilterExpr instances
1499
+ for i, filter_item in enumerate(filters):
1500
+ if not isinstance(filter_item, FilterExpr):
1501
+ log_warning(
1502
+ f"Invalid filter at index {i}: expected FilterExpr instance, "
1503
+ f"got {type(filter_item).__name__}. "
1504
+ f"Use filter expressions like EQ('key', 'value'), IN('key', [values]), "
1505
+ f"AND(...), OR(...), NOT(...) from agno.filters"
1506
+ )
1507
+ # Filter expressions are already validated, return empty dict/list
1508
+ # The actual filtering happens in the vector_db layer
1509
+ return filters, []
1248
1510
 
1249
1511
  return valid_filters, invalid_keys
1250
1512
 
1251
- def add_filters(self, metadata: Dict[str, Any]) -> None:
1252
- if self.valid_metadata_filters is None:
1253
- self.valid_metadata_filters = set()
1513
+ def validate_filters(
1514
+ self, filters: Union[Dict[str, Any], List[FilterExpr]]
1515
+ ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
1516
+ valid_filters_from_db = self.get_valid_filters()
1254
1517
 
1255
- if metadata is not None:
1256
- for key in metadata.keys():
1257
- self.valid_metadata_filters.add(key)
1518
+ valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
1258
1519
 
1259
- @cached_property
1260
- def _get_filters_from_db(self) -> Set[str]:
1261
- if self.contents_db is None:
1262
- return set()
1263
- contents, _ = self.get_content()
1264
- valid_filters: Set[str] = set()
1265
- for content in contents:
1266
- if content.metadata:
1267
- valid_filters.update(content.metadata.keys())
1268
- return valid_filters
1520
+ return valid_filters, invalid_keys
1521
+
1522
+ async def async_validate_filters(
1523
+ self, filters: Union[Dict[str, Any], List[FilterExpr]]
1524
+ ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
1525
+ """Return a tuple containing a dict with all valid filters and a list of invalid filter keys"""
1526
+ valid_filters_from_db = await self.async_get_valid_filters()
1527
+
1528
+ valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
1529
+
1530
+ return valid_filters, invalid_keys
1269
1531
 
1270
1532
  def remove_vector_by_id(self, id: str) -> bool:
1533
+ from agno.vectordb import VectorDb
1534
+
1535
+ self.vector_db = cast(VectorDb, self.vector_db)
1271
1536
  if self.vector_db is None:
1272
1537
  log_warning("No vector DB provided")
1273
1538
  return False
1274
1539
  return self.vector_db.delete_by_id(id)
1275
1540
 
1276
1541
  def remove_vectors_by_name(self, name: str) -> bool:
1542
+ from agno.vectordb import VectorDb
1543
+
1544
+ self.vector_db = cast(VectorDb, self.vector_db)
1277
1545
  if self.vector_db is None:
1278
1546
  log_warning("No vector DB provided")
1279
1547
  return False
1280
1548
  return self.vector_db.delete_by_name(name)
1281
1549
 
1282
1550
  def remove_vectors_by_metadata(self, metadata: Dict[str, Any]) -> bool:
1551
+ from agno.vectordb import VectorDb
1552
+
1553
+ self.vector_db = cast(VectorDb, self.vector_db)
1283
1554
  if self.vector_db is None:
1284
1555
  log_warning("No vector DB provided")
1285
1556
  return False
@@ -1290,10 +1561,46 @@ class Knowledge:
1290
1561
  def patch_content(self, content: Content) -> Optional[Dict[str, Any]]:
1291
1562
  return self._update_content(content)
1292
1563
 
1564
+ async def apatch_content(self, content: Content) -> Optional[Dict[str, Any]]:
1565
+ return await self._aupdate_content(content)
1566
+
1293
1567
  def get_content_by_id(self, content_id: str) -> Optional[Content]:
1294
1568
  if self.contents_db is None:
1295
1569
  raise ValueError("No contents db provided")
1570
+
1571
+ if isinstance(self.contents_db, AsyncBaseDb):
1572
+ raise ValueError(
1573
+ "get_content_by_id() is not supported for async databases. Please use aget_content_by_id() instead."
1574
+ )
1575
+
1296
1576
  content_row = self.contents_db.get_knowledge_content(content_id)
1577
+
1578
+ if content_row is None:
1579
+ return None
1580
+ content = Content(
1581
+ id=content_row.id,
1582
+ name=content_row.name,
1583
+ description=content_row.description,
1584
+ metadata=content_row.metadata,
1585
+ file_type=content_row.type,
1586
+ size=content_row.size,
1587
+ status=ContentStatus(content_row.status) if content_row.status else None,
1588
+ status_message=content_row.status_message,
1589
+ created_at=content_row.created_at,
1590
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1591
+ external_id=content_row.external_id,
1592
+ )
1593
+ return content
1594
+
1595
+ async def aget_content_by_id(self, content_id: str) -> Optional[Content]:
1596
+ if self.contents_db is None:
1597
+ raise ValueError("No contents db provided")
1598
+
1599
+ if isinstance(self.contents_db, AsyncBaseDb):
1600
+ content_row = await self.contents_db.get_knowledge_content(content_id)
1601
+ else:
1602
+ content_row = self.contents_db.get_knowledge_content(content_id)
1603
+
1297
1604
  if content_row is None:
1298
1605
  return None
1299
1606
  content = Content(
@@ -1320,6 +1627,10 @@ class Knowledge:
1320
1627
  ) -> Tuple[List[Content], int]:
1321
1628
  if self.contents_db is None:
1322
1629
  raise ValueError("No contents db provided")
1630
+
1631
+ if isinstance(self.contents_db, AsyncBaseDb):
1632
+ raise ValueError("get_content() is not supported for async databases. Please use aget_content() instead.")
1633
+
1323
1634
  contents, count = self.contents_db.get_knowledge_contents(
1324
1635
  limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1325
1636
  )
@@ -1343,9 +1654,53 @@ class Knowledge:
1343
1654
  result.append(content)
1344
1655
  return result, count
1345
1656
 
1657
+ async def aget_content(
1658
+ self,
1659
+ limit: Optional[int] = None,
1660
+ page: Optional[int] = None,
1661
+ sort_by: Optional[str] = None,
1662
+ sort_order: Optional[str] = None,
1663
+ ) -> Tuple[List[Content], int]:
1664
+ if self.contents_db is None:
1665
+ raise ValueError("No contents db provided")
1666
+
1667
+ if isinstance(self.contents_db, AsyncBaseDb):
1668
+ contents, count = await self.contents_db.get_knowledge_contents(
1669
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1670
+ )
1671
+ else:
1672
+ contents, count = self.contents_db.get_knowledge_contents(
1673
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1674
+ )
1675
+
1676
+ result = []
1677
+ for content_row in contents:
1678
+ # Create Content from database row
1679
+ content = Content(
1680
+ id=content_row.id,
1681
+ name=content_row.name,
1682
+ description=content_row.description,
1683
+ metadata=content_row.metadata,
1684
+ size=content_row.size,
1685
+ file_type=content_row.type,
1686
+ status=ContentStatus(content_row.status) if content_row.status else None,
1687
+ status_message=content_row.status_message,
1688
+ created_at=content_row.created_at,
1689
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1690
+ external_id=content_row.external_id,
1691
+ )
1692
+ result.append(content)
1693
+ return result, count
1694
+
1346
1695
  def get_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
1347
1696
  if self.contents_db is None:
1348
1697
  raise ValueError("No contents db provided")
1698
+
1699
+ if isinstance(self.contents_db, AsyncBaseDb):
1700
+ raise ValueError(
1701
+ "get_content_status() is not supported for async databases. Please use aget_content_status() instead."
1702
+ )
1703
+
1349
1704
  content_row = self.contents_db.get_knowledge_content(content_id)
1350
1705
  if content_row is None:
1351
1706
  return None, "Content not found"
@@ -1365,7 +1720,37 @@ class Knowledge:
1365
1720
 
1366
1721
  return status, content_row.status_message
1367
1722
 
1723
+ async def aget_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
1724
+ if self.contents_db is None:
1725
+ raise ValueError("No contents db provided")
1726
+
1727
+ if isinstance(self.contents_db, AsyncBaseDb):
1728
+ content_row = await self.contents_db.get_knowledge_content(content_id)
1729
+ else:
1730
+ content_row = self.contents_db.get_knowledge_content(content_id)
1731
+
1732
+ if content_row is None:
1733
+ return None, "Content not found"
1734
+
1735
+ # Convert string status to enum, defaulting to PROCESSING if unknown
1736
+ status_str = content_row.status
1737
+ try:
1738
+ status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
1739
+ except ValueError:
1740
+ # Handle legacy or unknown statuses
1741
+ if status_str and "failed" in status_str.lower():
1742
+ status = ContentStatus.FAILED
1743
+ elif status_str and "completed" in status_str.lower():
1744
+ status = ContentStatus.COMPLETED
1745
+ else:
1746
+ status = ContentStatus.PROCESSING
1747
+
1748
+ return status, content_row.status_message
1749
+
1368
1750
  def remove_content_by_id(self, content_id: str):
1751
+ from agno.vectordb import VectorDb
1752
+
1753
+ self.vector_db = cast(VectorDb, self.vector_db)
1369
1754
  if self.vector_db is not None:
1370
1755
  if self.vector_db.__class__.__name__ == "LightRag":
1371
1756
  # For LightRAG, get the content first to find the external_id
@@ -1380,12 +1765,36 @@ class Knowledge:
1380
1765
  if self.contents_db is not None:
1381
1766
  self.contents_db.delete_knowledge_content(content_id)
1382
1767
 
1768
+ async def aremove_content_by_id(self, content_id: str):
1769
+ if self.vector_db is not None:
1770
+ if self.vector_db.__class__.__name__ == "LightRag":
1771
+ # For LightRAG, get the content first to find the external_id
1772
+ content = await self.aget_content_by_id(content_id)
1773
+ if content and content.external_id:
1774
+ self.vector_db.delete_by_external_id(content.external_id) # type: ignore
1775
+ else:
1776
+ log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
1777
+ else:
1778
+ self.vector_db.delete_by_content_id(content_id)
1779
+
1780
+ if self.contents_db is not None:
1781
+ if isinstance(self.contents_db, AsyncBaseDb):
1782
+ await self.contents_db.delete_knowledge_content(content_id)
1783
+ else:
1784
+ self.contents_db.delete_knowledge_content(content_id)
1785
+
1383
1786
  def remove_all_content(self):
1384
1787
  contents, _ = self.get_content()
1385
1788
  for content in contents:
1386
1789
  if content.id is not None:
1387
1790
  self.remove_content_by_id(content.id)
1388
1791
 
1792
+ async def aremove_all_content(self):
1793
+ contents, _ = await self.aget_content()
1794
+ for content in contents:
1795
+ if content.id is not None:
1796
+ await self.aremove_content_by_id(content.id)
1797
+
1389
1798
  # --- Reader Factory Integration ---
1390
1799
 
1391
1800
  def construct_readers(self):
@@ -1423,12 +1832,6 @@ class Knowledge:
1423
1832
  log_info(f"Selecting reader for extension: {extension}")
1424
1833
  return ReaderFactory.get_reader_for_extension(extension)
1425
1834
 
1426
- def get_filters(self) -> List[str]:
1427
- return [
1428
- "filter_tag_1",
1429
- "filter_tag2",
1430
- ]
1431
-
1432
1835
  # --- Convenience Properties for Backward Compatibility ---
1433
1836
 
1434
1837
  def _is_text_mime_type(self, mime_type: str) -> bool:
@@ -1520,6 +1923,11 @@ class Knowledge:
1520
1923
  """Docx reader - lazy loaded via factory."""
1521
1924
  return self._get_reader("docx")
1522
1925
 
1926
+ @property
1927
+ def pptx_reader(self) -> Optional[Reader]:
1928
+ """PPTX reader - lazy loaded via factory."""
1929
+ return self._get_reader("pptx")
1930
+
1523
1931
  @property
1524
1932
  def json_reader(self) -> Optional[Reader]:
1525
1933
  """JSON reader - lazy loaded via factory."""