agno 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. agno/agent/agent.py +1767 -535
  2. agno/db/async_postgres/__init__.py +3 -0
  3. agno/db/async_postgres/async_postgres.py +1668 -0
  4. agno/db/async_postgres/schemas.py +124 -0
  5. agno/db/async_postgres/utils.py +289 -0
  6. agno/db/base.py +237 -2
  7. agno/db/dynamo/dynamo.py +2 -2
  8. agno/db/firestore/firestore.py +2 -2
  9. agno/db/firestore/utils.py +4 -2
  10. agno/db/gcs_json/gcs_json_db.py +2 -2
  11. agno/db/in_memory/in_memory_db.py +2 -2
  12. agno/db/json/json_db.py +2 -2
  13. agno/db/migrations/v1_to_v2.py +30 -13
  14. agno/db/mongo/mongo.py +18 -6
  15. agno/db/mysql/mysql.py +35 -13
  16. agno/db/postgres/postgres.py +29 -6
  17. agno/db/redis/redis.py +2 -2
  18. agno/db/singlestore/singlestore.py +2 -2
  19. agno/db/sqlite/sqlite.py +34 -12
  20. agno/db/sqlite/utils.py +8 -3
  21. agno/eval/accuracy.py +50 -43
  22. agno/eval/performance.py +6 -3
  23. agno/eval/reliability.py +6 -3
  24. agno/eval/utils.py +33 -16
  25. agno/exceptions.py +8 -2
  26. agno/knowledge/knowledge.py +260 -46
  27. agno/knowledge/reader/pdf_reader.py +4 -6
  28. agno/knowledge/reader/reader_factory.py +2 -3
  29. agno/memory/manager.py +241 -33
  30. agno/models/anthropic/claude.py +37 -0
  31. agno/os/app.py +8 -7
  32. agno/os/interfaces/a2a/router.py +3 -5
  33. agno/os/interfaces/agui/router.py +4 -1
  34. agno/os/interfaces/agui/utils.py +27 -6
  35. agno/os/interfaces/slack/router.py +2 -4
  36. agno/os/mcp.py +98 -41
  37. agno/os/router.py +23 -0
  38. agno/os/routers/evals/evals.py +52 -20
  39. agno/os/routers/evals/utils.py +14 -14
  40. agno/os/routers/knowledge/knowledge.py +130 -9
  41. agno/os/routers/knowledge/schemas.py +57 -0
  42. agno/os/routers/memory/memory.py +116 -44
  43. agno/os/routers/metrics/metrics.py +16 -6
  44. agno/os/routers/session/session.py +65 -22
  45. agno/os/schema.py +36 -0
  46. agno/os/utils.py +67 -12
  47. agno/reasoning/anthropic.py +80 -0
  48. agno/reasoning/gemini.py +73 -0
  49. agno/reasoning/openai.py +5 -0
  50. agno/reasoning/vertexai.py +76 -0
  51. agno/session/workflow.py +3 -3
  52. agno/team/team.py +918 -175
  53. agno/tools/googlesheets.py +20 -5
  54. agno/tools/mcp_toolbox.py +3 -3
  55. agno/tools/scrapegraph.py +1 -1
  56. agno/utils/models/claude.py +3 -1
  57. agno/utils/streamlit.py +1 -1
  58. agno/vectordb/base.py +22 -1
  59. agno/vectordb/cassandra/cassandra.py +9 -0
  60. agno/vectordb/chroma/chromadb.py +26 -6
  61. agno/vectordb/clickhouse/clickhousedb.py +9 -1
  62. agno/vectordb/couchbase/couchbase.py +11 -0
  63. agno/vectordb/lancedb/lance_db.py +20 -0
  64. agno/vectordb/langchaindb/langchaindb.py +11 -0
  65. agno/vectordb/lightrag/lightrag.py +9 -0
  66. agno/vectordb/llamaindex/llamaindexdb.py +15 -1
  67. agno/vectordb/milvus/milvus.py +23 -0
  68. agno/vectordb/mongodb/mongodb.py +22 -0
  69. agno/vectordb/pgvector/pgvector.py +19 -0
  70. agno/vectordb/pineconedb/pineconedb.py +35 -4
  71. agno/vectordb/qdrant/qdrant.py +24 -0
  72. agno/vectordb/singlestore/singlestore.py +25 -17
  73. agno/vectordb/surrealdb/surrealdb.py +18 -1
  74. agno/vectordb/upstashdb/upstashdb.py +26 -1
  75. agno/vectordb/weaviate/weaviate.py +18 -0
  76. agno/workflow/condition.py +4 -0
  77. agno/workflow/loop.py +4 -0
  78. agno/workflow/parallel.py +4 -0
  79. agno/workflow/router.py +4 -0
  80. agno/workflow/step.py +22 -14
  81. agno/workflow/steps.py +4 -0
  82. agno/workflow/types.py +2 -2
  83. agno/workflow/workflow.py +328 -61
  84. {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/METADATA +100 -41
  85. {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/RECORD +88 -81
  86. {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/WHEEL +0 -0
  87. {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/licenses/LICENSE +0 -0
  88. {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
12
12
 
13
13
  from httpx import AsyncClient
14
14
 
15
- from agno.db.base import BaseDb
15
+ from agno.db.base import AsyncBaseDb, BaseDb
16
16
  from agno.db.schemas.knowledge import KnowledgeRow
17
17
  from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
18
18
  from agno.knowledge.document import Document
@@ -39,7 +39,7 @@ class Knowledge:
39
39
  name: Optional[str] = None
40
40
  description: Optional[str] = None
41
41
  vector_db: Optional[Any] = None
42
- contents_db: Optional[BaseDb] = None
42
+ contents_db: Optional[Union[BaseDb, AsyncBaseDb]] = None
43
43
  max_results: int = 10
44
44
  readers: Optional[Dict[str, Reader]] = None
45
45
 
@@ -394,10 +394,10 @@ class Knowledge:
394
394
  if self._should_include_file(str(path), include, exclude):
395
395
  log_info(f"Adding file {path} due to include/exclude filters")
396
396
 
397
- self._add_to_contents_db(content)
397
+ await self._add_to_contents_db(content)
398
398
  if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
399
399
  content.status = ContentStatus.COMPLETED
400
- self._update_content(content)
400
+ await self._aupdate_content(content)
401
401
  return
402
402
 
403
403
  # Handle LightRAG special case - read file and upload directly
@@ -483,7 +483,6 @@ class Knowledge:
483
483
  3. Read the content
484
484
  4. Prepare and insert the content in the vector database
485
485
  """
486
-
487
486
  from agno.vectordb import VectorDb
488
487
 
489
488
  self.vector_db = cast(VectorDb, self.vector_db)
@@ -495,7 +494,7 @@ class Knowledge:
495
494
  raise ValueError("No url provided")
496
495
 
497
496
  # 1. Add content to contents database
498
- self._add_to_contents_db(content)
497
+ await self._add_to_contents_db(content)
499
498
  if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
500
499
  content.status = ContentStatus.COMPLETED
501
500
  self._update_content(content)
@@ -513,12 +512,12 @@ class Knowledge:
513
512
  if not all([parsed_url.scheme, parsed_url.netloc]):
514
513
  content.status = ContentStatus.FAILED
515
514
  content.status_message = f"Invalid URL format: {content.url}"
516
- self._update_content(content)
515
+ await self._aupdate_content(content)
517
516
  log_warning(f"Invalid URL format: {content.url}")
518
517
  except Exception as e:
519
518
  content.status = ContentStatus.FAILED
520
519
  content.status_message = f"Invalid URL: {content.url} - {str(e)}"
521
- self._update_content(content)
520
+ await self._aupdate_content(content)
522
521
  log_warning(f"Invalid URL: {content.url} - {str(e)}")
523
522
 
524
523
  # 3. Fetch and load content if file has an extension
@@ -576,7 +575,7 @@ class Knowledge:
576
575
  log_error(f"Error reading URL: {content.url} - {str(e)}")
577
576
  content.status = ContentStatus.FAILED
578
577
  content.status_message = f"Error reading URL: {content.url} - {str(e)}"
579
- self._update_content(content)
578
+ await self._aupdate_content(content)
580
579
  return
581
580
 
582
581
  # 6. Chunk documents if needed
@@ -622,10 +621,10 @@ class Knowledge:
622
621
 
623
622
  log_info(f"Adding content from {content.name}")
624
623
 
625
- self._add_to_contents_db(content)
624
+ await self._add_to_contents_db(content)
626
625
  if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
627
626
  content.status = ContentStatus.COMPLETED
628
- self._update_content(content)
627
+ await self._aupdate_content(content)
629
628
  return
630
629
 
631
630
  if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
@@ -648,7 +647,7 @@ class Knowledge:
648
647
  else:
649
648
  content.status = ContentStatus.FAILED
650
649
  content.status_message = "Text reader not available"
651
- self._update_content(content)
650
+ await self._aupdate_content(content)
652
651
  return
653
652
 
654
653
  elif isinstance(content.file_data, FileData):
@@ -677,12 +676,13 @@ class Knowledge:
677
676
  if len(read_documents) == 0:
678
677
  content.status = ContentStatus.FAILED
679
678
  content.status_message = "Content could not be read"
680
- self._update_content(content)
679
+ await self._aupdate_content(content)
680
+ return
681
681
 
682
682
  else:
683
683
  content.status = ContentStatus.FAILED
684
684
  content.status_message = "No content provided"
685
- self._update_content(content)
685
+ await self._aupdate_content(content)
686
686
  return
687
687
 
688
688
  await self._handle_vector_db_insert(content, read_documents, upsert)
@@ -716,7 +716,7 @@ class Knowledge:
716
716
  content.content_hash = self._build_content_hash(content)
717
717
  content.id = generate_id(content.content_hash)
718
718
 
719
- self._add_to_contents_db(content)
719
+ await self._add_to_contents_db(content)
720
720
  if self._should_skip(content.content_hash, skip_if_exists):
721
721
  content.status = ContentStatus.COMPLETED
722
722
  self._update_content(content)
@@ -726,6 +726,11 @@ class Knowledge:
726
726
  await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
727
727
  return
728
728
 
729
+ if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
730
+ log_info(f"Content {content.content_hash} already exists, skipping")
731
+ continue
732
+
733
+ await self._add_to_contents_db(content)
729
734
  if content.reader is None:
730
735
  log_error(f"No reader available for topic: {topic}")
731
736
  content.status = ContentStatus.FAILED
@@ -742,8 +747,7 @@ class Knowledge:
742
747
  else:
743
748
  content.status = ContentStatus.FAILED
744
749
  content.status_message = "No content found for topic"
745
- self._update_content(content)
746
- continue
750
+ await self._aupdate_content(content)
747
751
 
748
752
  await self._handle_vector_db_insert(content, read_documents, upsert)
749
753
 
@@ -812,10 +816,10 @@ class Knowledge:
812
816
  # 3. Hash content and add it to the contents database
813
817
  content_entry.content_hash = self._build_content_hash(content_entry)
814
818
  content_entry.id = generate_id(content_entry.content_hash)
815
- self._add_to_contents_db(content_entry)
819
+ await self._add_to_contents_db(content_entry)
816
820
  if self._should_skip(content_entry.content_hash, skip_if_exists):
817
821
  content_entry.status = ContentStatus.COMPLETED
818
- self._update_content(content_entry)
822
+ await self._aupdate_content(content_entry)
819
823
  return
820
824
 
821
825
  # 4. Select reader
@@ -894,10 +898,10 @@ class Knowledge:
894
898
  # 3. Hash content and add it to the contents database
895
899
  content_entry.content_hash = self._build_content_hash(content_entry)
896
900
  content_entry.id = generate_id(content_entry.content_hash)
897
- self._add_to_contents_db(content_entry)
901
+ await self._add_to_contents_db(content_entry)
898
902
  if self._should_skip(content_entry.content_hash, skip_if_exists):
899
903
  content_entry.status = ContentStatus.COMPLETED
900
- self._update_content(content_entry)
904
+ await self._aupdate_content(content_entry)
901
905
  return
902
906
 
903
907
  # 4. Select reader
@@ -937,7 +941,7 @@ class Knowledge:
937
941
  log_error("No vector database configured")
938
942
  content.status = ContentStatus.FAILED
939
943
  content.status_message = "No vector database configured"
940
- self._update_content(content)
944
+ await self._aupdate_content(content)
941
945
  return
942
946
 
943
947
  if self.vector_db.upsert_available() and upsert:
@@ -947,7 +951,7 @@ class Knowledge:
947
951
  log_error(f"Error upserting document: {e}")
948
952
  content.status = ContentStatus.FAILED
949
953
  content.status_message = "Could not upsert embedding"
950
- self._update_content(content)
954
+ await self._aupdate_content(content)
951
955
  return
952
956
  else:
953
957
  try:
@@ -960,11 +964,11 @@ class Knowledge:
960
964
  log_error(f"Error inserting document: {e}")
961
965
  content.status = ContentStatus.FAILED
962
966
  content.status_message = "Could not insert embedding"
963
- self._update_content(content)
967
+ await self._aupdate_content(content)
964
968
  return
965
969
 
966
970
  content.status = ContentStatus.COMPLETED
967
- self._update_content(content)
971
+ await self._aupdate_content(content)
968
972
 
969
973
  async def _load_content(
970
974
  self,
@@ -1064,7 +1068,7 @@ class Knowledge:
1064
1068
  # Already a string, return as-is
1065
1069
  return value
1066
1070
 
1067
- def _add_to_contents_db(self, content: Content):
1071
+ async def _add_to_contents_db(self, content: Content):
1068
1072
  if self.contents_db:
1069
1073
  created_at = content.created_at if content.created_at else int(time.time())
1070
1074
  updated_at = content.updated_at if content.updated_at else int(time.time())
@@ -1102,13 +1106,21 @@ class Knowledge:
1102
1106
  created_at=created_at,
1103
1107
  updated_at=updated_at,
1104
1108
  )
1105
- self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1109
+ if isinstance(self.contents_db, AsyncBaseDb):
1110
+ await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1111
+ else:
1112
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1106
1113
 
1107
1114
  def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
1108
1115
  from agno.vectordb import VectorDb
1109
1116
 
1110
1117
  self.vector_db = cast(VectorDb, self.vector_db)
1111
1118
  if self.contents_db:
1119
+ if isinstance(self.contents_db, AsyncBaseDb):
1120
+ raise ValueError(
1121
+ "update_content() is not supported with an async DB. Please use aupdate_content() instead."
1122
+ )
1123
+
1112
1124
  if not content.id:
1113
1125
  log_warning("Content id is required to update Knowledge content")
1114
1126
  return None
@@ -1156,12 +1168,58 @@ class Knowledge:
1156
1168
  log_warning("Contents DB not found for knowledge base")
1157
1169
  return None
1158
1170
 
1171
+ async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
1172
+ if self.contents_db:
1173
+ if not content.id:
1174
+ log_warning("Content id is required to update Knowledge content")
1175
+ return None
1176
+
1177
+ # TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
1178
+ if isinstance(self.contents_db, AsyncBaseDb):
1179
+ content_row = await self.contents_db.get_knowledge_content(content.id)
1180
+ else:
1181
+ content_row = self.contents_db.get_knowledge_content(content.id)
1182
+ if content_row is None:
1183
+ log_warning(f"Content row not found for id: {content.id}, cannot update status")
1184
+ return None
1185
+
1186
+ if content.name is not None:
1187
+ content_row.name = content.name
1188
+ if content.description is not None:
1189
+ content_row.description = content.description
1190
+ if content.metadata is not None:
1191
+ content_row.metadata = content.metadata
1192
+ if content.status is not None:
1193
+ content_row.status = content.status
1194
+ if content.status_message is not None:
1195
+ content_row.status_message = content.status_message if content.status_message else ""
1196
+ if content.external_id is not None:
1197
+ content_row.external_id = content.external_id
1198
+
1199
+ content_row.updated_at = int(time.time())
1200
+ if isinstance(self.contents_db, AsyncBaseDb):
1201
+ await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1202
+ else:
1203
+ self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
1204
+
1205
+ if self.vector_db and content.metadata:
1206
+ self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
1207
+
1208
+ if content.metadata:
1209
+ self.add_filters(content.metadata)
1210
+
1211
+ return content_row.to_dict()
1212
+
1213
+ else:
1214
+ log_warning(f"Contents DB not found for knowledge base: {self.name}")
1215
+ return None
1216
+
1159
1217
  async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
1160
1218
  from agno.vectordb import VectorDb
1161
1219
 
1162
1220
  self.vector_db = cast(VectorDb, self.vector_db)
1163
1221
 
1164
- self._add_to_contents_db(content)
1222
+ await self._add_to_contents_db(content)
1165
1223
  if content_type == KnowledgeContentOrigin.PATH:
1166
1224
  if content.file_data is None:
1167
1225
  log_warning("No file data provided")
@@ -1192,18 +1250,18 @@ class Knowledge:
1192
1250
  else:
1193
1251
  log_error("Vector database does not support file insertion")
1194
1252
  content.status = ContentStatus.FAILED
1195
- self._update_content(content)
1253
+ await self._aupdate_content(content)
1196
1254
  return
1197
1255
  content.external_id = result
1198
1256
  content.status = ContentStatus.COMPLETED
1199
- self._update_content(content)
1257
+ await self._aupdate_content(content)
1200
1258
  return
1201
1259
 
1202
1260
  except Exception as e:
1203
1261
  log_error(f"Error uploading file to LightRAG: {e}")
1204
1262
  content.status = ContentStatus.FAILED
1205
1263
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
1206
- self._update_content(content)
1264
+ await self._aupdate_content(content)
1207
1265
  return
1208
1266
 
1209
1267
  elif content_type == KnowledgeContentOrigin.URL:
@@ -1213,7 +1271,7 @@ class Knowledge:
1213
1271
  if reader is None:
1214
1272
  log_error("No URL reader available")
1215
1273
  content.status = ContentStatus.FAILED
1216
- self._update_content(content)
1274
+ await self._aupdate_content(content)
1217
1275
  return
1218
1276
 
1219
1277
  reader.chunk = False
@@ -1225,7 +1283,7 @@ class Knowledge:
1225
1283
  if not read_documents:
1226
1284
  log_error("No documents read from URL")
1227
1285
  content.status = ContentStatus.FAILED
1228
- self._update_content(content)
1286
+ await self._aupdate_content(content)
1229
1287
  return
1230
1288
 
1231
1289
  if self.vector_db and hasattr(self.vector_db, "insert_text"):
@@ -1236,19 +1294,19 @@ class Knowledge:
1236
1294
  else:
1237
1295
  log_error("Vector database does not support text insertion")
1238
1296
  content.status = ContentStatus.FAILED
1239
- self._update_content(content)
1297
+ await self._aupdate_content(content)
1240
1298
  return
1241
1299
 
1242
1300
  content.external_id = result
1243
1301
  content.status = ContentStatus.COMPLETED
1244
- self._update_content(content)
1302
+ await self._aupdate_content(content)
1245
1303
  return
1246
1304
 
1247
1305
  except Exception as e:
1248
1306
  log_error(f"Error uploading file to LightRAG: {e}")
1249
1307
  content.status = ContentStatus.FAILED
1250
1308
  content.status_message = f"Could not upload to LightRAG: {str(e)}"
1251
- self._update_content(content)
1309
+ await self._aupdate_content(content)
1252
1310
  return
1253
1311
 
1254
1312
  elif content_type == KnowledgeContentOrigin.CONTENT:
@@ -1269,11 +1327,11 @@ class Knowledge:
1269
1327
  else:
1270
1328
  log_error("Vector database does not support file insertion")
1271
1329
  content.status = ContentStatus.FAILED
1272
- self._update_content(content)
1330
+ await self._aupdate_content(content)
1273
1331
  return
1274
1332
  content.external_id = result
1275
1333
  content.status = ContentStatus.COMPLETED
1276
- self._update_content(content)
1334
+ await self._aupdate_content(content)
1277
1335
  else:
1278
1336
  log_warning(f"No file data available for LightRAG upload: {content.name}")
1279
1337
  return
@@ -1284,13 +1342,13 @@ class Knowledge:
1284
1342
  if content.reader is None:
1285
1343
  log_error("No reader available for topic content")
1286
1344
  content.status = ContentStatus.FAILED
1287
- self._update_content(content)
1345
+ await self._aupdate_content(content)
1288
1346
  return
1289
1347
 
1290
1348
  if not content.topics:
1291
1349
  log_error("No topics available for content")
1292
1350
  content.status = ContentStatus.FAILED
1293
- self._update_content(content)
1351
+ await self._aupdate_content(content)
1294
1352
  return
1295
1353
 
1296
1354
  read_documents = content.reader.read(content.topics)
@@ -1303,24 +1361,35 @@ class Knowledge:
1303
1361
  else:
1304
1362
  log_error("Vector database does not support text insertion")
1305
1363
  content.status = ContentStatus.FAILED
1306
- self._update_content(content)
1364
+ await self._aupdate_content(content)
1307
1365
  return
1308
1366
  content.external_id = result
1309
1367
  content.status = ContentStatus.COMPLETED
1310
- self._update_content(content)
1368
+ await self._aupdate_content(content)
1311
1369
  return
1312
1370
  else:
1313
1371
  log_warning(f"No documents found for LightRAG upload: {content.name}")
1314
1372
  return
1315
1373
 
1316
1374
  def search(
1317
- self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
1375
+ self,
1376
+ query: str,
1377
+ max_results: Optional[int] = None,
1378
+ filters: Optional[Dict[str, Any]] = None,
1379
+ search_type: Optional[str] = None,
1318
1380
  ) -> List[Document]:
1319
1381
  """Returns relevant documents matching a query"""
1320
-
1321
1382
  from agno.vectordb import VectorDb
1383
+ from agno.vectordb.search import SearchType
1322
1384
 
1323
1385
  self.vector_db = cast(VectorDb, self.vector_db)
1386
+
1387
+ if (
1388
+ hasattr(self.vector_db, "search_type")
1389
+ and isinstance(self.vector_db.search_type, SearchType)
1390
+ and search_type
1391
+ ):
1392
+ self.vector_db.search_type = SearchType(search_type)
1324
1393
  try:
1325
1394
  if self.vector_db is None:
1326
1395
  log_warning("No vector db provided")
@@ -1334,13 +1403,23 @@ class Knowledge:
1334
1403
  return []
1335
1404
 
1336
1405
  async def async_search(
1337
- self, query: str, max_results: Optional[int] = None, filters: Optional[Dict[str, Any]] = None
1406
+ self,
1407
+ query: str,
1408
+ max_results: Optional[int] = None,
1409
+ filters: Optional[Dict[str, Any]] = None,
1410
+ search_type: Optional[str] = None,
1338
1411
  ) -> List[Document]:
1339
1412
  """Returns relevant documents matching a query"""
1340
-
1341
1413
  from agno.vectordb import VectorDb
1414
+ from agno.vectordb.search import SearchType
1342
1415
 
1343
1416
  self.vector_db = cast(VectorDb, self.vector_db)
1417
+ if (
1418
+ hasattr(self.vector_db, "search_type")
1419
+ and isinstance(self.vector_db.search_type, SearchType)
1420
+ and search_type
1421
+ ):
1422
+ self.vector_db.search_type = SearchType(search_type)
1344
1423
  try:
1345
1424
  if self.vector_db is None:
1346
1425
  log_warning("No vector db provided")
@@ -1442,10 +1521,46 @@ class Knowledge:
1442
1521
  def patch_content(self, content: Content) -> Optional[Dict[str, Any]]:
1443
1522
  return self._update_content(content)
1444
1523
 
1524
+ async def apatch_content(self, content: Content) -> Optional[Dict[str, Any]]:
1525
+ return await self._aupdate_content(content)
1526
+
1445
1527
  def get_content_by_id(self, content_id: str) -> Optional[Content]:
1446
1528
  if self.contents_db is None:
1447
1529
  raise ValueError("No contents db provided")
1530
+
1531
+ if isinstance(self.contents_db, AsyncBaseDb):
1532
+ raise ValueError(
1533
+ "get_content_by_id() is not supported for async databases. Please use aget_content_by_id() instead."
1534
+ )
1535
+
1448
1536
  content_row = self.contents_db.get_knowledge_content(content_id)
1537
+
1538
+ if content_row is None:
1539
+ return None
1540
+ content = Content(
1541
+ id=content_row.id,
1542
+ name=content_row.name,
1543
+ description=content_row.description,
1544
+ metadata=content_row.metadata,
1545
+ file_type=content_row.type,
1546
+ size=content_row.size,
1547
+ status=ContentStatus(content_row.status) if content_row.status else None,
1548
+ status_message=content_row.status_message,
1549
+ created_at=content_row.created_at,
1550
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1551
+ external_id=content_row.external_id,
1552
+ )
1553
+ return content
1554
+
1555
+ async def aget_content_by_id(self, content_id: str) -> Optional[Content]:
1556
+ if self.contents_db is None:
1557
+ raise ValueError("No contents db provided")
1558
+
1559
+ if isinstance(self.contents_db, AsyncBaseDb):
1560
+ content_row = await self.contents_db.get_knowledge_content(content_id)
1561
+ else:
1562
+ content_row = self.contents_db.get_knowledge_content(content_id)
1563
+
1449
1564
  if content_row is None:
1450
1565
  return None
1451
1566
  content = Content(
@@ -1472,6 +1587,10 @@ class Knowledge:
1472
1587
  ) -> Tuple[List[Content], int]:
1473
1588
  if self.contents_db is None:
1474
1589
  raise ValueError("No contents db provided")
1590
+
1591
+ if isinstance(self.contents_db, AsyncBaseDb):
1592
+ raise ValueError("get_content() is not supported for async databases. Please use aget_content() instead.")
1593
+
1475
1594
  contents, count = self.contents_db.get_knowledge_contents(
1476
1595
  limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1477
1596
  )
@@ -1495,9 +1614,53 @@ class Knowledge:
1495
1614
  result.append(content)
1496
1615
  return result, count
1497
1616
 
1617
+ async def aget_content(
1618
+ self,
1619
+ limit: Optional[int] = None,
1620
+ page: Optional[int] = None,
1621
+ sort_by: Optional[str] = None,
1622
+ sort_order: Optional[str] = None,
1623
+ ) -> Tuple[List[Content], int]:
1624
+ if self.contents_db is None:
1625
+ raise ValueError("No contents db provided")
1626
+
1627
+ if isinstance(self.contents_db, AsyncBaseDb):
1628
+ contents, count = await self.contents_db.get_knowledge_contents(
1629
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1630
+ )
1631
+ else:
1632
+ contents, count = self.contents_db.get_knowledge_contents(
1633
+ limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
1634
+ )
1635
+
1636
+ result = []
1637
+ for content_row in contents:
1638
+ # Create Content from database row
1639
+ content = Content(
1640
+ id=content_row.id,
1641
+ name=content_row.name,
1642
+ description=content_row.description,
1643
+ metadata=content_row.metadata,
1644
+ size=content_row.size,
1645
+ file_type=content_row.type,
1646
+ status=ContentStatus(content_row.status) if content_row.status else None,
1647
+ status_message=content_row.status_message,
1648
+ created_at=content_row.created_at,
1649
+ updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
1650
+ external_id=content_row.external_id,
1651
+ )
1652
+ result.append(content)
1653
+ return result, count
1654
+
1498
1655
  def get_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
1499
1656
  if self.contents_db is None:
1500
1657
  raise ValueError("No contents db provided")
1658
+
1659
+ if isinstance(self.contents_db, AsyncBaseDb):
1660
+ raise ValueError(
1661
+ "get_content_status() is not supported for async databases. Please use aget_content_status() instead."
1662
+ )
1663
+
1501
1664
  content_row = self.contents_db.get_knowledge_content(content_id)
1502
1665
  if content_row is None:
1503
1666
  return None, "Content not found"
@@ -1517,6 +1680,33 @@ class Knowledge:
1517
1680
 
1518
1681
  return status, content_row.status_message
1519
1682
 
1683
+ async def aget_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
1684
+ if self.contents_db is None:
1685
+ raise ValueError("No contents db provided")
1686
+
1687
+ if isinstance(self.contents_db, AsyncBaseDb):
1688
+ content_row = await self.contents_db.get_knowledge_content(content_id)
1689
+ else:
1690
+ content_row = self.contents_db.get_knowledge_content(content_id)
1691
+
1692
+ if content_row is None:
1693
+ return None, "Content not found"
1694
+
1695
+ # Convert string status to enum, defaulting to PROCESSING if unknown
1696
+ status_str = content_row.status
1697
+ try:
1698
+ status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
1699
+ except ValueError:
1700
+ # Handle legacy or unknown statuses
1701
+ if status_str and "failed" in status_str.lower():
1702
+ status = ContentStatus.FAILED
1703
+ elif status_str and "completed" in status_str.lower():
1704
+ status = ContentStatus.COMPLETED
1705
+ else:
1706
+ status = ContentStatus.PROCESSING
1707
+
1708
+ return status, content_row.status_message
1709
+
1520
1710
  def remove_content_by_id(self, content_id: str):
1521
1711
  from agno.vectordb import VectorDb
1522
1712
 
@@ -1535,12 +1725,36 @@ class Knowledge:
1535
1725
  if self.contents_db is not None:
1536
1726
  self.contents_db.delete_knowledge_content(content_id)
1537
1727
 
1728
+ async def aremove_content_by_id(self, content_id: str):
1729
+ if self.vector_db is not None:
1730
+ if self.vector_db.__class__.__name__ == "LightRag":
1731
+ # For LightRAG, get the content first to find the external_id
1732
+ content = await self.aget_content_by_id(content_id)
1733
+ if content and content.external_id:
1734
+ self.vector_db.delete_by_external_id(content.external_id) # type: ignore
1735
+ else:
1736
+ log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
1737
+ else:
1738
+ self.vector_db.delete_by_content_id(content_id)
1739
+
1740
+ if self.contents_db is not None:
1741
+ if isinstance(self.contents_db, AsyncBaseDb):
1742
+ await self.contents_db.delete_knowledge_content(content_id)
1743
+ else:
1744
+ self.contents_db.delete_knowledge_content(content_id)
1745
+
1538
1746
  def remove_all_content(self):
1539
1747
  contents, _ = self.get_content()
1540
1748
  for content in contents:
1541
1749
  if content.id is not None:
1542
1750
  self.remove_content_by_id(content.id)
1543
1751
 
1752
+ async def aremove_all_content(self):
1753
+ contents, _ = await self.aget_content()
1754
+ for content in contents:
1755
+ if content.id is not None:
1756
+ await self.aremove_content_by_id(content.id)
1757
+
1544
1758
  # --- Reader Factory Integration ---
1545
1759
 
1546
1760
  def construct_readers(self):
@@ -4,7 +4,8 @@ from pathlib import Path
4
4
  from typing import IO, Any, List, Optional, Tuple, Union
5
5
  from uuid import uuid4
6
6
 
7
- from agno.knowledge.chunking.strategy import ChunkingStrategyType
7
+ from agno.knowledge.chunking.document import DocumentChunking
8
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
9
  from agno.knowledge.document.base import Document
9
10
  from agno.knowledge.reader.base import Reader
10
11
  from agno.knowledge.types import ContentType
@@ -183,6 +184,7 @@ class BasePDFReader(Reader):
183
184
  page_start_numbering_format: Optional[str] = None,
184
185
  page_end_numbering_format: Optional[str] = None,
185
186
  password: Optional[str] = None,
187
+ chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
186
188
  **kwargs,
187
189
  ):
188
190
  if page_start_numbering_format is None:
@@ -195,11 +197,7 @@ class BasePDFReader(Reader):
195
197
  self.page_end_numbering_format = page_end_numbering_format
196
198
  self.password = password
197
199
 
198
- if self.chunking_strategy is None:
199
- from agno.knowledge.chunking.document import DocumentChunking
200
-
201
- self.chunking_strategy = DocumentChunking(chunk_size=5000)
202
- super().__init__(**kwargs)
200
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
203
201
 
204
202
  @classmethod
205
203
  def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
@@ -16,8 +16,7 @@ class ReaderFactory:
16
16
  from agno.knowledge.reader.pdf_reader import PDFReader
17
17
 
18
18
  config: Dict[str, Any] = {
19
- "chunk": True,
20
- "chunk_size": 100,
19
+ "name": "PDF Reader",
21
20
  "description": "Processes PDF documents with OCR support for images and text extraction",
22
21
  }
23
22
  config.update(kwargs)
@@ -201,7 +200,7 @@ class ReaderFactory:
201
200
  return cls.create_reader("pdf")
202
201
  elif extension in [".csv", "text/csv"]:
203
202
  return cls.create_reader("csv")
204
- elif extension in [".docx", ".doc"]:
203
+ elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
205
204
  return cls.create_reader("docx")
206
205
  elif extension == ".json":
207
206
  return cls.create_reader("json")