agno 2.3.22__py3-none-any.whl → 2.3.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. agno/agent/agent.py +28 -1
  2. agno/agent/remote.py +1 -1
  3. agno/db/mongo/mongo.py +9 -1
  4. agno/db/mysql/async_mysql.py +5 -7
  5. agno/db/mysql/mysql.py +5 -7
  6. agno/db/mysql/schemas.py +39 -21
  7. agno/db/postgres/async_postgres.py +10 -2
  8. agno/db/postgres/postgres.py +5 -7
  9. agno/db/postgres/schemas.py +39 -21
  10. agno/db/singlestore/schemas.py +41 -21
  11. agno/db/singlestore/singlestore.py +14 -3
  12. agno/db/sqlite/async_sqlite.py +7 -2
  13. agno/db/sqlite/schemas.py +36 -21
  14. agno/db/sqlite/sqlite.py +3 -7
  15. agno/knowledge/chunking/markdown.py +94 -8
  16. agno/knowledge/chunking/semantic.py +2 -2
  17. agno/knowledge/knowledge.py +215 -207
  18. agno/models/base.py +32 -8
  19. agno/models/google/gemini.py +27 -4
  20. agno/os/routers/agents/router.py +1 -1
  21. agno/os/routers/evals/evals.py +2 -2
  22. agno/os/routers/knowledge/knowledge.py +21 -5
  23. agno/os/routers/knowledge/schemas.py +1 -1
  24. agno/os/routers/memory/memory.py +4 -4
  25. agno/os/routers/session/session.py +2 -2
  26. agno/os/routers/teams/router.py +2 -2
  27. agno/os/routers/traces/traces.py +3 -3
  28. agno/os/routers/workflows/router.py +1 -1
  29. agno/os/schema.py +1 -1
  30. agno/os/utils.py +1 -1
  31. agno/remote/base.py +1 -1
  32. agno/team/remote.py +1 -1
  33. agno/team/team.py +24 -4
  34. agno/tools/brandfetch.py +27 -18
  35. agno/tools/browserbase.py +150 -13
  36. agno/tools/crawl4ai.py +3 -0
  37. agno/tools/file.py +14 -13
  38. agno/tools/function.py +15 -2
  39. agno/tools/mcp/mcp.py +1 -0
  40. agno/tools/mlx_transcribe.py +10 -7
  41. agno/tools/python.py +14 -6
  42. agno/tools/toolkit.py +122 -23
  43. agno/vectordb/cassandra/cassandra.py +1 -1
  44. agno/vectordb/chroma/chromadb.py +1 -1
  45. agno/vectordb/clickhouse/clickhousedb.py +1 -1
  46. agno/vectordb/couchbase/couchbase.py +1 -1
  47. agno/vectordb/milvus/milvus.py +1 -1
  48. agno/vectordb/mongodb/mongodb.py +13 -3
  49. agno/vectordb/pgvector/pgvector.py +1 -1
  50. agno/vectordb/pineconedb/pineconedb.py +2 -2
  51. agno/vectordb/qdrant/qdrant.py +1 -1
  52. agno/vectordb/redis/redisdb.py +2 -2
  53. agno/vectordb/singlestore/singlestore.py +1 -1
  54. agno/vectordb/surrealdb/surrealdb.py +2 -2
  55. agno/vectordb/weaviate/weaviate.py +1 -1
  56. agno/workflow/remote.py +1 -1
  57. agno/workflow/workflow.py +14 -0
  58. {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/METADATA +1 -1
  59. {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/RECORD +62 -62
  60. {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/WHEEL +0 -0
  61. {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/licenses/LICENSE +0 -0
  62. {agno-2.3.22.dist-info → agno-2.3.24.dist-info}/top_level.txt +0 -0
@@ -34,7 +34,14 @@ class KnowledgeContentOrigin(Enum):
34
34
 
35
35
  @dataclass
36
36
  class Knowledge:
37
- """Knowledge class"""
37
+ """Knowledge class
38
+
39
+ Args:
40
+ vector_db: Vector database for storing and searching embeddings (required)
41
+ contents_db: Optional contents database for metadata tracking and filter validation.
42
+ When configured, enables validation of agentic filter keys.
43
+ When not configured, filters are passed directly to vector_db.
44
+ """
38
45
 
39
46
  name: Optional[str] = None
40
47
  description: Optional[str] = None
@@ -2027,13 +2034,7 @@ class Knowledge:
2027
2034
  self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata or {})
2028
2035
 
2029
2036
  return content_row.to_dict()
2030
-
2031
- else:
2032
- if self.name:
2033
- log_warning(f"Contents DB not found for knowledge base: {self.name}")
2034
- else:
2035
- log_warning("Contents DB not found for knowledge base")
2036
- return None
2037
+ return None
2037
2038
 
2038
2039
  async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
2039
2040
  if self.contents_db:
@@ -2073,91 +2074,145 @@ class Knowledge:
2073
2074
  self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata or {})
2074
2075
 
2075
2076
  return content_row.to_dict()
2077
+ return None
2076
2078
 
2077
- else:
2078
- if self.name:
2079
- log_warning(f"Contents DB not found for knowledge base: {self.name}")
2080
- else:
2081
- log_warning("Contents DB not found for knowledge base")
2079
+ def _prepare_lightrag_path_data(self, content: Content) -> Optional[Tuple[bytes, str, str]]:
2080
+ """Prepare file data from a path for LightRAG upload.
2081
+
2082
+ Returns:
2083
+ Tuple of (file_content, filename, file_type) or None if preparation fails.
2084
+ """
2085
+ if content.file_data is None:
2086
+ log_warning("No file data provided")
2087
+
2088
+ if content.path is None:
2089
+ log_error("No path provided for content")
2090
+ return None
2091
+
2092
+ path = Path(content.path)
2093
+ log_info(f"Uploading file to LightRAG from path: {path}")
2094
+
2095
+ with open(path, "rb") as f:
2096
+ file_content = f.read()
2097
+
2098
+ file_type = content.file_type or path.suffix
2099
+ return (file_content, path.name, file_type)
2100
+
2101
+ def _prepare_lightrag_url_data(self, content: Content) -> Optional[Tuple[str, str]]:
2102
+ """Prepare text data from a URL for LightRAG upload.
2103
+
2104
+ Returns:
2105
+ Tuple of (file_source, text) or None if preparation fails.
2106
+ """
2107
+ log_info(f"Uploading file to LightRAG from URL: {content.url}")
2108
+
2109
+ reader = content.reader or self.website_reader
2110
+ if reader is None:
2111
+ log_error("No URL reader available")
2112
+ return None
2113
+
2114
+ reader.chunk = False
2115
+ read_documents = reader.read(content.url, name=content.name)
2116
+ if not content.id:
2117
+ content.id = generate_id(content.content_hash or "")
2118
+ self._prepare_documents_for_insert(read_documents, content.id)
2119
+
2120
+ if not read_documents:
2121
+ log_error("No documents read from URL")
2082
2122
  return None
2083
2123
 
2124
+ return (content.url or "", read_documents[0].content)
2125
+
2126
+ def _prepare_lightrag_topic_data(self, content: Content) -> Optional[Tuple[str, str]]:
2127
+ """Prepare text data from topics for LightRAG upload.
2128
+
2129
+ Returns:
2130
+ Tuple of (file_source, text) or None if preparation fails.
2131
+ """
2132
+ log_info(f"Uploading file to LightRAG: {content.name}")
2133
+
2134
+ if content.reader is None:
2135
+ log_error("No reader available for topic content")
2136
+ return None
2137
+
2138
+ if not content.topics:
2139
+ log_error("No topics available for content")
2140
+ return None
2141
+
2142
+ read_documents = content.reader.read(content.topics)
2143
+ if not read_documents:
2144
+ log_warning(f"No documents found for LightRAG upload: {content.name}")
2145
+ return None
2146
+
2147
+ return (content.topics[0], read_documents[0].content)
2148
+
2149
+ def _prepare_lightrag_file_data(self, content: Content) -> Optional[Tuple[Union[str, bytes], str, Optional[str]]]:
2150
+ """Prepare file data from file_data content for LightRAG upload.
2151
+
2152
+ Returns:
2153
+ Tuple of (file_content, filename, content_type) or None if preparation fails.
2154
+ """
2155
+ filename = content.file_data.filename if content.file_data and content.file_data.filename else "uploaded_file"
2156
+ log_info(f"Uploading file to LightRAG: {filename}")
2157
+
2158
+ if not (content.file_data and content.file_data.content):
2159
+ log_warning(f"No file data available for LightRAG upload: {content.name}")
2160
+ return None
2161
+
2162
+ return (content.file_data.content, filename, content.file_data.type)
2163
+
2084
2164
  async def _process_lightrag_content_async(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
2085
2165
  from agno.vectordb import VectorDb
2086
2166
 
2087
2167
  self.vector_db = cast(VectorDb, self.vector_db)
2088
2168
 
2089
2169
  await self._add_to_contents_db_async(content)
2090
- if content_type == KnowledgeContentOrigin.PATH:
2091
- if content.file_data is None:
2092
- log_warning("No file data provided")
2093
-
2094
- if content.path is None:
2095
- log_error("No path provided for content")
2096
- return
2097
2170
 
2098
- path = Path(content.path)
2099
-
2100
- log_info(f"Uploading file to LightRAG from path: {path}")
2171
+ if content_type == KnowledgeContentOrigin.PATH:
2101
2172
  try:
2102
- # Read the file content from path
2103
- with open(path, "rb") as f:
2104
- file_content = f.read()
2173
+ path_data = self._prepare_lightrag_path_data(content)
2174
+ if path_data is None:
2175
+ content.status = ContentStatus.FAILED
2176
+ await self._aupdate_content(content)
2177
+ return
2105
2178
 
2106
- # Get file type from extension or content.file_type
2107
- file_type = content.file_type or path.suffix
2179
+ file_content, filename, file_type = path_data
2108
2180
 
2109
2181
  if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
2110
2182
  result = await self.vector_db.insert_file_bytes(
2111
2183
  file_content=file_content,
2112
- filename=path.name, # Use the original filename with extension
2184
+ filename=filename,
2113
2185
  content_type=file_type,
2114
- send_metadata=True, # Enable metadata so server knows the file type
2186
+ send_metadata=True,
2115
2187
  )
2116
-
2117
2188
  else:
2118
2189
  log_error("Vector database does not support file insertion")
2119
2190
  content.status = ContentStatus.FAILED
2120
2191
  await self._aupdate_content(content)
2121
2192
  return
2193
+
2122
2194
  content.external_id = result
2123
2195
  content.status = ContentStatus.COMPLETED
2124
2196
  await self._aupdate_content(content)
2125
- return
2126
2197
 
2127
2198
  except Exception as e:
2128
2199
  log_error(f"Error uploading file to LightRAG: {e}")
2129
2200
  content.status = ContentStatus.FAILED
2130
2201
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
2131
2202
  await self._aupdate_content(content)
2132
- return
2133
2203
 
2134
2204
  elif content_type == KnowledgeContentOrigin.URL:
2135
- log_info(f"Uploading file to LightRAG from URL: {content.url}")
2136
2205
  try:
2137
- reader = content.reader or self.website_reader
2138
- if reader is None:
2139
- log_error("No URL reader available")
2206
+ url_data = self._prepare_lightrag_url_data(content)
2207
+ if url_data is None:
2140
2208
  content.status = ContentStatus.FAILED
2141
2209
  await self._aupdate_content(content)
2142
2210
  return
2143
2211
 
2144
- reader.chunk = False
2145
- read_documents = reader.read(content.url, name=content.name)
2146
- if not content.id:
2147
- content.id = generate_id(content.content_hash or "")
2148
- self._prepare_documents_for_insert(read_documents, content.id)
2149
-
2150
- if not read_documents:
2151
- log_error("No documents read from URL")
2152
- content.status = ContentStatus.FAILED
2153
- await self._aupdate_content(content)
2154
- return
2212
+ file_source, text = url_data
2155
2213
 
2156
2214
  if self.vector_db and hasattr(self.vector_db, "insert_text"):
2157
- result = await self.vector_db.insert_text(
2158
- file_source=content.url,
2159
- text=read_documents[0].content,
2160
- )
2215
+ result = await self.vector_db.insert_text(file_source=file_source, text=text)
2161
2216
  else:
2162
2217
  log_error("Vector database does not support text insertion")
2163
2218
  content.status = ContentStatus.FAILED
@@ -2167,77 +2222,58 @@ class Knowledge:
2167
2222
  content.external_id = result
2168
2223
  content.status = ContentStatus.COMPLETED
2169
2224
  await self._aupdate_content(content)
2170
- return
2171
2225
 
2172
2226
  except Exception as e:
2173
2227
  log_error(f"Error uploading file to LightRAG: {e}")
2174
2228
  content.status = ContentStatus.FAILED
2175
2229
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
2176
2230
  await self._aupdate_content(content)
2177
- return
2178
2231
 
2179
2232
  elif content_type == KnowledgeContentOrigin.CONTENT:
2180
- filename = (
2181
- content.file_data.filename if content.file_data and content.file_data.filename else "uploaded_file"
2182
- )
2183
- log_info(f"Uploading file to LightRAG: {filename}")
2184
-
2185
- # Use the content from file_data
2186
- if content.file_data and content.file_data.content:
2187
- if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
2188
- result = await self.vector_db.insert_file_bytes(
2189
- file_content=content.file_data.content,
2190
- filename=filename,
2191
- content_type=content.file_data.type,
2192
- send_metadata=True, # Enable metadata so server knows the file type
2193
- )
2194
- else:
2195
- log_error("Vector database does not support file insertion")
2196
- content.status = ContentStatus.FAILED
2197
- await self._aupdate_content(content)
2198
- return
2199
- content.external_id = result
2200
- content.status = ContentStatus.COMPLETED
2201
- await self._aupdate_content(content)
2202
- else:
2203
- log_warning(f"No file data available for LightRAG upload: {content.name}")
2204
- return
2233
+ file_data = self._prepare_lightrag_file_data(content)
2234
+ if file_data is None:
2235
+ return
2205
2236
 
2206
- elif content_type == KnowledgeContentOrigin.TOPIC:
2207
- log_info(f"Uploading file to LightRAG: {content.name}")
2237
+ file_content_data, filename_data, content_type_str = file_data
2208
2238
 
2209
- if content.reader is None:
2210
- log_error("No reader available for topic content")
2239
+ if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
2240
+ result = await self.vector_db.insert_file_bytes(
2241
+ file_content=file_content_data,
2242
+ filename=filename_data,
2243
+ content_type=content_type_str,
2244
+ send_metadata=True,
2245
+ )
2246
+ else:
2247
+ log_error("Vector database does not support file insertion")
2211
2248
  content.status = ContentStatus.FAILED
2212
2249
  await self._aupdate_content(content)
2213
2250
  return
2214
2251
 
2215
- if not content.topics:
2216
- log_error("No topics available for content")
2252
+ content.external_id = result
2253
+ content.status = ContentStatus.COMPLETED
2254
+ await self._aupdate_content(content)
2255
+
2256
+ elif content_type == KnowledgeContentOrigin.TOPIC:
2257
+ topic_data = self._prepare_lightrag_topic_data(content)
2258
+ if topic_data is None:
2217
2259
  content.status = ContentStatus.FAILED
2218
2260
  await self._aupdate_content(content)
2219
2261
  return
2220
2262
 
2221
- read_documents = content.reader.read(content.topics)
2222
- if len(read_documents) > 0:
2223
- if self.vector_db and hasattr(self.vector_db, "insert_text"):
2224
- result = await self.vector_db.insert_text(
2225
- file_source=content.topics[0],
2226
- text=read_documents[0].content,
2227
- )
2228
- else:
2229
- log_error("Vector database does not support text insertion")
2230
- content.status = ContentStatus.FAILED
2231
- await self._aupdate_content(content)
2232
- return
2233
- content.external_id = result
2234
- content.status = ContentStatus.COMPLETED
2235
- await self._aupdate_content(content)
2236
- return
2263
+ file_source, text = topic_data
2264
+
2265
+ if self.vector_db and hasattr(self.vector_db, "insert_text"):
2266
+ result = await self.vector_db.insert_text(file_source=file_source, text=text)
2237
2267
  else:
2238
- log_warning(f"No documents found for LightRAG upload: {content.name}")
2268
+ log_error("Vector database does not support text insertion")
2269
+ content.status = ContentStatus.FAILED
2270
+ await self._aupdate_content(content)
2239
2271
  return
2240
2272
 
2273
+ content.external_id = result
2274
+ content.status = ContentStatus.COMPLETED
2275
+ await self._aupdate_content(content)
2276
+
2241
2277
  def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
2242
2278
  """Synchronously process LightRAG content. Uses asyncio.run() only for LightRAG-specific async methods."""
2243
2279
  from agno.vectordb import VectorDb
@@ -2245,31 +2281,22 @@ class Knowledge:
2245
2281
  self.vector_db = cast(VectorDb, self.vector_db)
2246
2282
 
2247
2283
  self._add_to_contents_db(content)
2248
- if content_type == KnowledgeContentOrigin.PATH:
2249
- if content.file_data is None:
2250
- log_warning("No file data provided")
2251
2284
 
2252
- if content.path is None:
2253
- log_error("No path provided for content")
2254
- return
2255
-
2256
- path = Path(content.path)
2257
-
2258
- log_info(f"Uploading file to LightRAG from path: {path}")
2285
+ if content_type == KnowledgeContentOrigin.PATH:
2259
2286
  try:
2260
- # Read the file content from path
2261
- with open(path, "rb") as f:
2262
- file_content = f.read()
2287
+ path_data = self._prepare_lightrag_path_data(content)
2288
+ if path_data is None:
2289
+ content.status = ContentStatus.FAILED
2290
+ self._update_content(content)
2291
+ return
2263
2292
 
2264
- # Get file type from extension or content.file_type
2265
- file_type = content.file_type or path.suffix
2293
+ file_content, filename, file_type = path_data
2266
2294
 
2267
2295
  if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
2268
- # LightRAG only has async methods, use asyncio.run() here
2269
2296
  result = asyncio.run(
2270
2297
  self.vector_db.insert_file_bytes(
2271
2298
  file_content=file_content,
2272
- filename=path.name,
2299
+ filename=filename,
2273
2300
  content_type=file_type,
2274
2301
  send_metadata=True,
2275
2302
  )
@@ -2279,48 +2306,29 @@ class Knowledge:
2279
2306
  content.status = ContentStatus.FAILED
2280
2307
  self._update_content(content)
2281
2308
  return
2309
+
2282
2310
  content.external_id = result
2283
2311
  content.status = ContentStatus.COMPLETED
2284
2312
  self._update_content(content)
2285
- return
2286
2313
 
2287
2314
  except Exception as e:
2288
2315
  log_error(f"Error uploading file to LightRAG: {e}")
2289
2316
  content.status = ContentStatus.FAILED
2290
2317
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
2291
2318
  self._update_content(content)
2292
- return
2293
2319
 
2294
2320
  elif content_type == KnowledgeContentOrigin.URL:
2295
- log_info(f"Uploading file to LightRAG from URL: {content.url}")
2296
2321
  try:
2297
- reader = content.reader or self.website_reader
2298
- if reader is None:
2299
- log_error("No URL reader available")
2322
+ url_data = self._prepare_lightrag_url_data(content)
2323
+ if url_data is None:
2300
2324
  content.status = ContentStatus.FAILED
2301
2325
  self._update_content(content)
2302
2326
  return
2303
2327
 
2304
- reader.chunk = False
2305
- read_documents = reader.read(content.url, name=content.name)
2306
- if not content.id:
2307
- content.id = generate_id(content.content_hash or "")
2308
- self._prepare_documents_for_insert(read_documents, content.id)
2309
-
2310
- if not read_documents:
2311
- log_error("No documents read from URL")
2312
- content.status = ContentStatus.FAILED
2313
- self._update_content(content)
2314
- return
2328
+ file_source, text = url_data
2315
2329
 
2316
2330
  if self.vector_db and hasattr(self.vector_db, "insert_text"):
2317
- # LightRAG only has async methods, use asyncio.run() here
2318
- result = asyncio.run(
2319
- self.vector_db.insert_text(
2320
- file_source=content.url,
2321
- text=read_documents[0].content,
2322
- )
2323
- )
2331
+ result = asyncio.run(self.vector_db.insert_text(file_source=file_source, text=text))
2324
2332
  else:
2325
2333
  log_error("Vector database does not support text insertion")
2326
2334
  content.status = ContentStatus.FAILED
@@ -2330,83 +2338,60 @@ class Knowledge:
2330
2338
  content.external_id = result
2331
2339
  content.status = ContentStatus.COMPLETED
2332
2340
  self._update_content(content)
2333
- return
2334
2341
 
2335
2342
  except Exception as e:
2336
2343
  log_error(f"Error uploading file to LightRAG: {e}")
2337
2344
  content.status = ContentStatus.FAILED
2338
2345
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
2339
2346
  self._update_content(content)
2340
- return
2341
2347
 
2342
2348
  elif content_type == KnowledgeContentOrigin.CONTENT:
2343
- filename = (
2344
- content.file_data.filename if content.file_data and content.file_data.filename else "uploaded_file"
2345
- )
2346
- log_info(f"Uploading file to LightRAG: {filename}")
2349
+ file_data = self._prepare_lightrag_file_data(content)
2350
+ if file_data is None:
2351
+ return
2347
2352
 
2348
- # Use the content from file_data
2349
- if content.file_data and content.file_data.content:
2350
- if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
2351
- # LightRAG only has async methods, use asyncio.run() here
2352
- result = asyncio.run(
2353
- self.vector_db.insert_file_bytes(
2354
- file_content=content.file_data.content,
2355
- filename=filename,
2356
- content_type=content.file_data.type,
2357
- send_metadata=True,
2358
- )
2353
+ file_content_data, filename_data, content_type_str = file_data
2354
+
2355
+ if self.vector_db and hasattr(self.vector_db, "insert_file_bytes"):
2356
+ result = asyncio.run(
2357
+ self.vector_db.insert_file_bytes(
2358
+ file_content=file_content_data,
2359
+ filename=filename_data,
2360
+ content_type=content_type_str,
2361
+ send_metadata=True,
2359
2362
  )
2360
- else:
2361
- log_error("Vector database does not support file insertion")
2362
- content.status = ContentStatus.FAILED
2363
- self._update_content(content)
2364
- return
2365
- content.external_id = result
2366
- content.status = ContentStatus.COMPLETED
2367
- self._update_content(content)
2363
+ )
2368
2364
  else:
2369
- log_warning(f"No file data available for LightRAG upload: {content.name}")
2370
- return
2371
-
2372
- elif content_type == KnowledgeContentOrigin.TOPIC:
2373
- log_info(f"Uploading file to LightRAG: {content.name}")
2374
-
2375
- if content.reader is None:
2376
- log_error("No reader available for topic content")
2365
+ log_error("Vector database does not support file insertion")
2377
2366
  content.status = ContentStatus.FAILED
2378
2367
  self._update_content(content)
2379
2368
  return
2380
2369
 
2381
- if not content.topics:
2382
- log_error("No topics available for content")
2370
+ content.external_id = result
2371
+ content.status = ContentStatus.COMPLETED
2372
+ self._update_content(content)
2373
+
2374
+ elif content_type == KnowledgeContentOrigin.TOPIC:
2375
+ topic_data = self._prepare_lightrag_topic_data(content)
2376
+ if topic_data is None:
2383
2377
  content.status = ContentStatus.FAILED
2384
2378
  self._update_content(content)
2385
2379
  return
2386
2380
 
2387
- read_documents = content.reader.read(content.topics)
2388
- if len(read_documents) > 0:
2389
- if self.vector_db and hasattr(self.vector_db, "insert_text"):
2390
- # LightRAG only has async methods, use asyncio.run() here
2391
- result = asyncio.run(
2392
- self.vector_db.insert_text(
2393
- file_source=content.topics[0],
2394
- text=read_documents[0].content,
2395
- )
2396
- )
2397
- else:
2398
- log_error("Vector database does not support text insertion")
2399
- content.status = ContentStatus.FAILED
2400
- self._update_content(content)
2401
- return
2402
- content.external_id = result
2403
- content.status = ContentStatus.COMPLETED
2404
- self._update_content(content)
2405
- return
2381
+ file_source, text = topic_data
2382
+
2383
+ if self.vector_db and hasattr(self.vector_db, "insert_text"):
2384
+ result = asyncio.run(self.vector_db.insert_text(file_source=file_source, text=text))
2406
2385
  else:
2407
- log_warning(f"No documents found for LightRAG upload: {content.name}")
2386
+ log_error("Vector database does not support text insertion")
2387
+ content.status = ContentStatus.FAILED
2388
+ self._update_content(content)
2408
2389
  return
2409
2390
 
2391
+ content.external_id = result
2392
+ content.status = ContentStatus.COMPLETED
2393
+ self._update_content(content)
2394
+
2410
2395
  def search(
2411
2396
  self,
2412
2397
  query: str,
@@ -2473,8 +2458,16 @@ class Knowledge:
2473
2458
  return []
2474
2459
 
2475
2460
  def get_valid_filters(self) -> Set[str]:
2461
+ """Get set of valid filter keys from ContentsDB metadata.
2462
+
2463
+ Returns:
2464
+ Set of metadata keys available for filtering. Empty set if ContentsDB not configured.
2465
+
2466
+ Note:
2467
+ When ContentsDB is not configured, filtering still works - filters are passed
2468
+ directly to the vector database without validation.
2469
+ """
2476
2470
  if self.contents_db is None:
2477
- log_warning("No contents db provided. This is required for filtering.")
2478
2471
  return set()
2479
2472
  contents, _ = self.get_content()
2480
2473
  valid_filters: Set[str] = set()
@@ -2486,7 +2479,9 @@ class Knowledge:
2486
2479
 
2487
2480
  async def async_get_valid_filters(self) -> Set[str]:
2488
2481
  if self.contents_db is None:
2489
- log_warning("No contents db provided. This is required for filtering.")
2482
+ log_info(
2483
+ "ContentsDB not configured. For improved filter validation and reliability, consider adding a ContentsDB."
2484
+ )
2490
2485
  return set()
2491
2486
  contents, _ = await self.aget_content()
2492
2487
  valid_filters: Set[str] = set()
@@ -2506,13 +2501,10 @@ class Knowledge:
2506
2501
  invalid_keys = []
2507
2502
 
2508
2503
  if isinstance(filters, dict):
2509
- # If no metadata filters tracked yet, all keys are considered invalid
2504
+ # If no metadata filters tracked yet, pass all filters through without validation
2510
2505
  if valid_metadata_filters is None or not valid_metadata_filters:
2511
- invalid_keys = list(filters.keys())
2512
- log_warning(
2513
- f"No valid metadata filters tracked yet. All filter keys considered invalid: {invalid_keys}"
2514
- )
2515
- return {}, invalid_keys
2506
+ log_debug("No metadata filter validation available. Passing filters to vector DB without validation.")
2507
+ return filters, []
2516
2508
 
2517
2509
  for key, value in filters.items():
2518
2510
  # Handle both normal keys and prefixed keys like meta_data.key
@@ -2542,10 +2534,26 @@ class Knowledge:
2542
2534
  def validate_filters(
2543
2535
  self, filters: Union[Dict[str, Any], List[FilterExpr]]
2544
2536
  ) -> Tuple[Union[Dict[str, Any], List[FilterExpr]], List[str]]:
2537
+ """Validate filters against known metadata keys from ContentsDB.
2538
+
2539
+ Args:
2540
+ filters: Filters to validate
2541
+
2542
+ Returns:
2543
+ Tuple of (valid_filters, invalid_keys)
2544
+
2545
+ Note:
2546
+ When ContentsDB is not configured, returns (filters, []) - all filters
2547
+ are considered valid and passed through without validation.
2548
+ """
2549
+ if self.contents_db is None:
2550
+ log_info(
2551
+ "ContentsDB not configured. For improved filter validation and reliability, consider adding a ContentsDB."
2552
+ )
2553
+ return filters, []
2545
2554
  valid_filters_from_db = self.get_valid_filters()
2546
2555
 
2547
2556
  valid_filters, invalid_keys = self._validate_filters(filters, valid_filters_from_db)
2548
-
2549
2557
  return valid_filters, invalid_keys
2550
2558
 
2551
2559
  async def async_validate_filters(