agno 2.1.4__py3-none-any.whl → 2.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +1767 -535
- agno/db/async_postgres/__init__.py +3 -0
- agno/db/async_postgres/async_postgres.py +1668 -0
- agno/db/async_postgres/schemas.py +124 -0
- agno/db/async_postgres/utils.py +289 -0
- agno/db/base.py +237 -2
- agno/db/dynamo/dynamo.py +2 -2
- agno/db/firestore/firestore.py +2 -2
- agno/db/firestore/utils.py +4 -2
- agno/db/gcs_json/gcs_json_db.py +2 -2
- agno/db/in_memory/in_memory_db.py +2 -2
- agno/db/json/json_db.py +2 -2
- agno/db/migrations/v1_to_v2.py +30 -13
- agno/db/mongo/mongo.py +18 -6
- agno/db/mysql/mysql.py +35 -13
- agno/db/postgres/postgres.py +29 -6
- agno/db/redis/redis.py +2 -2
- agno/db/singlestore/singlestore.py +2 -2
- agno/db/sqlite/sqlite.py +34 -12
- agno/db/sqlite/utils.py +8 -3
- agno/eval/accuracy.py +50 -43
- agno/eval/performance.py +6 -3
- agno/eval/reliability.py +6 -3
- agno/eval/utils.py +33 -16
- agno/exceptions.py +8 -2
- agno/knowledge/knowledge.py +260 -46
- agno/knowledge/reader/pdf_reader.py +4 -6
- agno/knowledge/reader/reader_factory.py +2 -3
- agno/memory/manager.py +241 -33
- agno/models/anthropic/claude.py +37 -0
- agno/os/app.py +8 -7
- agno/os/interfaces/a2a/router.py +3 -5
- agno/os/interfaces/agui/router.py +4 -1
- agno/os/interfaces/agui/utils.py +27 -6
- agno/os/interfaces/slack/router.py +2 -4
- agno/os/mcp.py +98 -41
- agno/os/router.py +23 -0
- agno/os/routers/evals/evals.py +52 -20
- agno/os/routers/evals/utils.py +14 -14
- agno/os/routers/knowledge/knowledge.py +130 -9
- agno/os/routers/knowledge/schemas.py +57 -0
- agno/os/routers/memory/memory.py +116 -44
- agno/os/routers/metrics/metrics.py +16 -6
- agno/os/routers/session/session.py +65 -22
- agno/os/schema.py +36 -0
- agno/os/utils.py +67 -12
- agno/reasoning/anthropic.py +80 -0
- agno/reasoning/gemini.py +73 -0
- agno/reasoning/openai.py +5 -0
- agno/reasoning/vertexai.py +76 -0
- agno/session/workflow.py +3 -3
- agno/team/team.py +918 -175
- agno/tools/googlesheets.py +20 -5
- agno/tools/mcp_toolbox.py +3 -3
- agno/tools/scrapegraph.py +1 -1
- agno/utils/models/claude.py +3 -1
- agno/utils/streamlit.py +1 -1
- agno/vectordb/base.py +22 -1
- agno/vectordb/cassandra/cassandra.py +9 -0
- agno/vectordb/chroma/chromadb.py +26 -6
- agno/vectordb/clickhouse/clickhousedb.py +9 -1
- agno/vectordb/couchbase/couchbase.py +11 -0
- agno/vectordb/lancedb/lance_db.py +20 -0
- agno/vectordb/langchaindb/langchaindb.py +11 -0
- agno/vectordb/lightrag/lightrag.py +9 -0
- agno/vectordb/llamaindex/llamaindexdb.py +15 -1
- agno/vectordb/milvus/milvus.py +23 -0
- agno/vectordb/mongodb/mongodb.py +22 -0
- agno/vectordb/pgvector/pgvector.py +19 -0
- agno/vectordb/pineconedb/pineconedb.py +35 -4
- agno/vectordb/qdrant/qdrant.py +24 -0
- agno/vectordb/singlestore/singlestore.py +25 -17
- agno/vectordb/surrealdb/surrealdb.py +18 -1
- agno/vectordb/upstashdb/upstashdb.py +26 -1
- agno/vectordb/weaviate/weaviate.py +18 -0
- agno/workflow/condition.py +4 -0
- agno/workflow/loop.py +4 -0
- agno/workflow/parallel.py +4 -0
- agno/workflow/router.py +4 -0
- agno/workflow/step.py +22 -14
- agno/workflow/steps.py +4 -0
- agno/workflow/types.py +2 -2
- agno/workflow/workflow.py +328 -61
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/METADATA +100 -41
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/RECORD +88 -81
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/WHEEL +0 -0
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/licenses/LICENSE +0 -0
- {agno-2.1.4.dist-info → agno-2.1.5.dist-info}/top_level.txt +0 -0
agno/knowledge/knowledge.py
CHANGED
|
@@ -12,7 +12,7 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
|
|
|
12
12
|
|
|
13
13
|
from httpx import AsyncClient
|
|
14
14
|
|
|
15
|
-
from agno.db.base import BaseDb
|
|
15
|
+
from agno.db.base import AsyncBaseDb, BaseDb
|
|
16
16
|
from agno.db.schemas.knowledge import KnowledgeRow
|
|
17
17
|
from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
|
|
18
18
|
from agno.knowledge.document import Document
|
|
@@ -39,7 +39,7 @@ class Knowledge:
|
|
|
39
39
|
name: Optional[str] = None
|
|
40
40
|
description: Optional[str] = None
|
|
41
41
|
vector_db: Optional[Any] = None
|
|
42
|
-
contents_db: Optional[BaseDb] = None
|
|
42
|
+
contents_db: Optional[Union[BaseDb, AsyncBaseDb]] = None
|
|
43
43
|
max_results: int = 10
|
|
44
44
|
readers: Optional[Dict[str, Reader]] = None
|
|
45
45
|
|
|
@@ -394,10 +394,10 @@ class Knowledge:
|
|
|
394
394
|
if self._should_include_file(str(path), include, exclude):
|
|
395
395
|
log_info(f"Adding file {path} due to include/exclude filters")
|
|
396
396
|
|
|
397
|
-
self._add_to_contents_db(content)
|
|
397
|
+
await self._add_to_contents_db(content)
|
|
398
398
|
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
399
399
|
content.status = ContentStatus.COMPLETED
|
|
400
|
-
self.
|
|
400
|
+
await self._aupdate_content(content)
|
|
401
401
|
return
|
|
402
402
|
|
|
403
403
|
# Handle LightRAG special case - read file and upload directly
|
|
@@ -483,7 +483,6 @@ class Knowledge:
|
|
|
483
483
|
3. Read the content
|
|
484
484
|
4. Prepare and insert the content in the vector database
|
|
485
485
|
"""
|
|
486
|
-
|
|
487
486
|
from agno.vectordb import VectorDb
|
|
488
487
|
|
|
489
488
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
@@ -495,7 +494,7 @@ class Knowledge:
|
|
|
495
494
|
raise ValueError("No url provided")
|
|
496
495
|
|
|
497
496
|
# 1. Add content to contents database
|
|
498
|
-
self._add_to_contents_db(content)
|
|
497
|
+
await self._add_to_contents_db(content)
|
|
499
498
|
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
500
499
|
content.status = ContentStatus.COMPLETED
|
|
501
500
|
self._update_content(content)
|
|
@@ -513,12 +512,12 @@ class Knowledge:
|
|
|
513
512
|
if not all([parsed_url.scheme, parsed_url.netloc]):
|
|
514
513
|
content.status = ContentStatus.FAILED
|
|
515
514
|
content.status_message = f"Invalid URL format: {content.url}"
|
|
516
|
-
self.
|
|
515
|
+
await self._aupdate_content(content)
|
|
517
516
|
log_warning(f"Invalid URL format: {content.url}")
|
|
518
517
|
except Exception as e:
|
|
519
518
|
content.status = ContentStatus.FAILED
|
|
520
519
|
content.status_message = f"Invalid URL: {content.url} - {str(e)}"
|
|
521
|
-
self.
|
|
520
|
+
await self._aupdate_content(content)
|
|
522
521
|
log_warning(f"Invalid URL: {content.url} - {str(e)}")
|
|
523
522
|
|
|
524
523
|
# 3. Fetch and load content if file has an extension
|
|
@@ -576,7 +575,7 @@ class Knowledge:
|
|
|
576
575
|
log_error(f"Error reading URL: {content.url} - {str(e)}")
|
|
577
576
|
content.status = ContentStatus.FAILED
|
|
578
577
|
content.status_message = f"Error reading URL: {content.url} - {str(e)}"
|
|
579
|
-
self.
|
|
578
|
+
await self._aupdate_content(content)
|
|
580
579
|
return
|
|
581
580
|
|
|
582
581
|
# 6. Chunk documents if needed
|
|
@@ -622,10 +621,10 @@ class Knowledge:
|
|
|
622
621
|
|
|
623
622
|
log_info(f"Adding content from {content.name}")
|
|
624
623
|
|
|
625
|
-
self._add_to_contents_db(content)
|
|
624
|
+
await self._add_to_contents_db(content)
|
|
626
625
|
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
627
626
|
content.status = ContentStatus.COMPLETED
|
|
628
|
-
self.
|
|
627
|
+
await self._aupdate_content(content)
|
|
629
628
|
return
|
|
630
629
|
|
|
631
630
|
if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
|
|
@@ -648,7 +647,7 @@ class Knowledge:
|
|
|
648
647
|
else:
|
|
649
648
|
content.status = ContentStatus.FAILED
|
|
650
649
|
content.status_message = "Text reader not available"
|
|
651
|
-
self.
|
|
650
|
+
await self._aupdate_content(content)
|
|
652
651
|
return
|
|
653
652
|
|
|
654
653
|
elif isinstance(content.file_data, FileData):
|
|
@@ -677,12 +676,13 @@ class Knowledge:
|
|
|
677
676
|
if len(read_documents) == 0:
|
|
678
677
|
content.status = ContentStatus.FAILED
|
|
679
678
|
content.status_message = "Content could not be read"
|
|
680
|
-
self.
|
|
679
|
+
await self._aupdate_content(content)
|
|
680
|
+
return
|
|
681
681
|
|
|
682
682
|
else:
|
|
683
683
|
content.status = ContentStatus.FAILED
|
|
684
684
|
content.status_message = "No content provided"
|
|
685
|
-
self.
|
|
685
|
+
await self._aupdate_content(content)
|
|
686
686
|
return
|
|
687
687
|
|
|
688
688
|
await self._handle_vector_db_insert(content, read_documents, upsert)
|
|
@@ -716,7 +716,7 @@ class Knowledge:
|
|
|
716
716
|
content.content_hash = self._build_content_hash(content)
|
|
717
717
|
content.id = generate_id(content.content_hash)
|
|
718
718
|
|
|
719
|
-
self._add_to_contents_db(content)
|
|
719
|
+
await self._add_to_contents_db(content)
|
|
720
720
|
if self._should_skip(content.content_hash, skip_if_exists):
|
|
721
721
|
content.status = ContentStatus.COMPLETED
|
|
722
722
|
self._update_content(content)
|
|
@@ -726,6 +726,11 @@ class Knowledge:
|
|
|
726
726
|
await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
|
|
727
727
|
return
|
|
728
728
|
|
|
729
|
+
if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
|
|
730
|
+
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
731
|
+
continue
|
|
732
|
+
|
|
733
|
+
await self._add_to_contents_db(content)
|
|
729
734
|
if content.reader is None:
|
|
730
735
|
log_error(f"No reader available for topic: {topic}")
|
|
731
736
|
content.status = ContentStatus.FAILED
|
|
@@ -742,8 +747,7 @@ class Knowledge:
|
|
|
742
747
|
else:
|
|
743
748
|
content.status = ContentStatus.FAILED
|
|
744
749
|
content.status_message = "No content found for topic"
|
|
745
|
-
self.
|
|
746
|
-
continue
|
|
750
|
+
await self._aupdate_content(content)
|
|
747
751
|
|
|
748
752
|
await self._handle_vector_db_insert(content, read_documents, upsert)
|
|
749
753
|
|
|
@@ -812,10 +816,10 @@ class Knowledge:
|
|
|
812
816
|
# 3. Hash content and add it to the contents database
|
|
813
817
|
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
814
818
|
content_entry.id = generate_id(content_entry.content_hash)
|
|
815
|
-
self._add_to_contents_db(content_entry)
|
|
819
|
+
await self._add_to_contents_db(content_entry)
|
|
816
820
|
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
817
821
|
content_entry.status = ContentStatus.COMPLETED
|
|
818
|
-
self.
|
|
822
|
+
await self._aupdate_content(content_entry)
|
|
819
823
|
return
|
|
820
824
|
|
|
821
825
|
# 4. Select reader
|
|
@@ -894,10 +898,10 @@ class Knowledge:
|
|
|
894
898
|
# 3. Hash content and add it to the contents database
|
|
895
899
|
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
896
900
|
content_entry.id = generate_id(content_entry.content_hash)
|
|
897
|
-
self._add_to_contents_db(content_entry)
|
|
901
|
+
await self._add_to_contents_db(content_entry)
|
|
898
902
|
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
899
903
|
content_entry.status = ContentStatus.COMPLETED
|
|
900
|
-
self.
|
|
904
|
+
await self._aupdate_content(content_entry)
|
|
901
905
|
return
|
|
902
906
|
|
|
903
907
|
# 4. Select reader
|
|
@@ -937,7 +941,7 @@ class Knowledge:
|
|
|
937
941
|
log_error("No vector database configured")
|
|
938
942
|
content.status = ContentStatus.FAILED
|
|
939
943
|
content.status_message = "No vector database configured"
|
|
940
|
-
self.
|
|
944
|
+
await self._aupdate_content(content)
|
|
941
945
|
return
|
|
942
946
|
|
|
943
947
|
if self.vector_db.upsert_available() and upsert:
|
|
@@ -947,7 +951,7 @@ class Knowledge:
|
|
|
947
951
|
log_error(f"Error upserting document: {e}")
|
|
948
952
|
content.status = ContentStatus.FAILED
|
|
949
953
|
content.status_message = "Could not upsert embedding"
|
|
950
|
-
self.
|
|
954
|
+
await self._aupdate_content(content)
|
|
951
955
|
return
|
|
952
956
|
else:
|
|
953
957
|
try:
|
|
@@ -960,11 +964,11 @@ class Knowledge:
|
|
|
960
964
|
log_error(f"Error inserting document: {e}")
|
|
961
965
|
content.status = ContentStatus.FAILED
|
|
962
966
|
content.status_message = "Could not insert embedding"
|
|
963
|
-
self.
|
|
967
|
+
await self._aupdate_content(content)
|
|
964
968
|
return
|
|
965
969
|
|
|
966
970
|
content.status = ContentStatus.COMPLETED
|
|
967
|
-
self.
|
|
971
|
+
await self._aupdate_content(content)
|
|
968
972
|
|
|
969
973
|
async def _load_content(
|
|
970
974
|
self,
|
|
@@ -1064,7 +1068,7 @@ class Knowledge:
|
|
|
1064
1068
|
# Already a string, return as-is
|
|
1065
1069
|
return value
|
|
1066
1070
|
|
|
1067
|
-
def _add_to_contents_db(self, content: Content):
|
|
1071
|
+
async def _add_to_contents_db(self, content: Content):
|
|
1068
1072
|
if self.contents_db:
|
|
1069
1073
|
created_at = content.created_at if content.created_at else int(time.time())
|
|
1070
1074
|
updated_at = content.updated_at if content.updated_at else int(time.time())
|
|
@@ -1102,13 +1106,21 @@ class Knowledge:
|
|
|
1102
1106
|
created_at=created_at,
|
|
1103
1107
|
updated_at=updated_at,
|
|
1104
1108
|
)
|
|
1105
|
-
self.contents_db
|
|
1109
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1110
|
+
await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
|
|
1111
|
+
else:
|
|
1112
|
+
self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
|
|
1106
1113
|
|
|
1107
1114
|
def _update_content(self, content: Content) -> Optional[Dict[str, Any]]:
|
|
1108
1115
|
from agno.vectordb import VectorDb
|
|
1109
1116
|
|
|
1110
1117
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
1111
1118
|
if self.contents_db:
|
|
1119
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1120
|
+
raise ValueError(
|
|
1121
|
+
"update_content() is not supported with an async DB. Please use aupdate_content() instead."
|
|
1122
|
+
)
|
|
1123
|
+
|
|
1112
1124
|
if not content.id:
|
|
1113
1125
|
log_warning("Content id is required to update Knowledge content")
|
|
1114
1126
|
return None
|
|
@@ -1156,12 +1168,58 @@ class Knowledge:
|
|
|
1156
1168
|
log_warning("Contents DB not found for knowledge base")
|
|
1157
1169
|
return None
|
|
1158
1170
|
|
|
1171
|
+
async def _aupdate_content(self, content: Content) -> Optional[Dict[str, Any]]:
|
|
1172
|
+
if self.contents_db:
|
|
1173
|
+
if not content.id:
|
|
1174
|
+
log_warning("Content id is required to update Knowledge content")
|
|
1175
|
+
return None
|
|
1176
|
+
|
|
1177
|
+
# TODO: we shouldn't check for content here, we should trust the upsert method to handle conflicts
|
|
1178
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1179
|
+
content_row = await self.contents_db.get_knowledge_content(content.id)
|
|
1180
|
+
else:
|
|
1181
|
+
content_row = self.contents_db.get_knowledge_content(content.id)
|
|
1182
|
+
if content_row is None:
|
|
1183
|
+
log_warning(f"Content row not found for id: {content.id}, cannot update status")
|
|
1184
|
+
return None
|
|
1185
|
+
|
|
1186
|
+
if content.name is not None:
|
|
1187
|
+
content_row.name = content.name
|
|
1188
|
+
if content.description is not None:
|
|
1189
|
+
content_row.description = content.description
|
|
1190
|
+
if content.metadata is not None:
|
|
1191
|
+
content_row.metadata = content.metadata
|
|
1192
|
+
if content.status is not None:
|
|
1193
|
+
content_row.status = content.status
|
|
1194
|
+
if content.status_message is not None:
|
|
1195
|
+
content_row.status_message = content.status_message if content.status_message else ""
|
|
1196
|
+
if content.external_id is not None:
|
|
1197
|
+
content_row.external_id = content.external_id
|
|
1198
|
+
|
|
1199
|
+
content_row.updated_at = int(time.time())
|
|
1200
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1201
|
+
await self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
|
|
1202
|
+
else:
|
|
1203
|
+
self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
|
|
1204
|
+
|
|
1205
|
+
if self.vector_db and content.metadata:
|
|
1206
|
+
self.vector_db.update_metadata(content_id=content.id, metadata=content.metadata)
|
|
1207
|
+
|
|
1208
|
+
if content.metadata:
|
|
1209
|
+
self.add_filters(content.metadata)
|
|
1210
|
+
|
|
1211
|
+
return content_row.to_dict()
|
|
1212
|
+
|
|
1213
|
+
else:
|
|
1214
|
+
log_warning(f"Contents DB not found for knowledge base: {self.name}")
|
|
1215
|
+
return None
|
|
1216
|
+
|
|
1159
1217
|
async def _process_lightrag_content(self, content: Content, content_type: KnowledgeContentOrigin) -> None:
|
|
1160
1218
|
from agno.vectordb import VectorDb
|
|
1161
1219
|
|
|
1162
1220
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
1163
1221
|
|
|
1164
|
-
self._add_to_contents_db(content)
|
|
1222
|
+
await self._add_to_contents_db(content)
|
|
1165
1223
|
if content_type == KnowledgeContentOrigin.PATH:
|
|
1166
1224
|
if content.file_data is None:
|
|
1167
1225
|
log_warning("No file data provided")
|
|
@@ -1192,18 +1250,18 @@ class Knowledge:
|
|
|
1192
1250
|
else:
|
|
1193
1251
|
log_error("Vector database does not support file insertion")
|
|
1194
1252
|
content.status = ContentStatus.FAILED
|
|
1195
|
-
self.
|
|
1253
|
+
await self._aupdate_content(content)
|
|
1196
1254
|
return
|
|
1197
1255
|
content.external_id = result
|
|
1198
1256
|
content.status = ContentStatus.COMPLETED
|
|
1199
|
-
self.
|
|
1257
|
+
await self._aupdate_content(content)
|
|
1200
1258
|
return
|
|
1201
1259
|
|
|
1202
1260
|
except Exception as e:
|
|
1203
1261
|
log_error(f"Error uploading file to LightRAG: {e}")
|
|
1204
1262
|
content.status = ContentStatus.FAILED
|
|
1205
1263
|
content.status_message = f"Could not upload to LightRAG: {str(e)}"
|
|
1206
|
-
self.
|
|
1264
|
+
await self._aupdate_content(content)
|
|
1207
1265
|
return
|
|
1208
1266
|
|
|
1209
1267
|
elif content_type == KnowledgeContentOrigin.URL:
|
|
@@ -1213,7 +1271,7 @@ class Knowledge:
|
|
|
1213
1271
|
if reader is None:
|
|
1214
1272
|
log_error("No URL reader available")
|
|
1215
1273
|
content.status = ContentStatus.FAILED
|
|
1216
|
-
self.
|
|
1274
|
+
await self._aupdate_content(content)
|
|
1217
1275
|
return
|
|
1218
1276
|
|
|
1219
1277
|
reader.chunk = False
|
|
@@ -1225,7 +1283,7 @@ class Knowledge:
|
|
|
1225
1283
|
if not read_documents:
|
|
1226
1284
|
log_error("No documents read from URL")
|
|
1227
1285
|
content.status = ContentStatus.FAILED
|
|
1228
|
-
self.
|
|
1286
|
+
await self._aupdate_content(content)
|
|
1229
1287
|
return
|
|
1230
1288
|
|
|
1231
1289
|
if self.vector_db and hasattr(self.vector_db, "insert_text"):
|
|
@@ -1236,19 +1294,19 @@ class Knowledge:
|
|
|
1236
1294
|
else:
|
|
1237
1295
|
log_error("Vector database does not support text insertion")
|
|
1238
1296
|
content.status = ContentStatus.FAILED
|
|
1239
|
-
self.
|
|
1297
|
+
await self._aupdate_content(content)
|
|
1240
1298
|
return
|
|
1241
1299
|
|
|
1242
1300
|
content.external_id = result
|
|
1243
1301
|
content.status = ContentStatus.COMPLETED
|
|
1244
|
-
self.
|
|
1302
|
+
await self._aupdate_content(content)
|
|
1245
1303
|
return
|
|
1246
1304
|
|
|
1247
1305
|
except Exception as e:
|
|
1248
1306
|
log_error(f"Error uploading file to LightRAG: {e}")
|
|
1249
1307
|
content.status = ContentStatus.FAILED
|
|
1250
1308
|
content.status_message = f"Could not upload to LightRAG: {str(e)}"
|
|
1251
|
-
self.
|
|
1309
|
+
await self._aupdate_content(content)
|
|
1252
1310
|
return
|
|
1253
1311
|
|
|
1254
1312
|
elif content_type == KnowledgeContentOrigin.CONTENT:
|
|
@@ -1269,11 +1327,11 @@ class Knowledge:
|
|
|
1269
1327
|
else:
|
|
1270
1328
|
log_error("Vector database does not support file insertion")
|
|
1271
1329
|
content.status = ContentStatus.FAILED
|
|
1272
|
-
self.
|
|
1330
|
+
await self._aupdate_content(content)
|
|
1273
1331
|
return
|
|
1274
1332
|
content.external_id = result
|
|
1275
1333
|
content.status = ContentStatus.COMPLETED
|
|
1276
|
-
self.
|
|
1334
|
+
await self._aupdate_content(content)
|
|
1277
1335
|
else:
|
|
1278
1336
|
log_warning(f"No file data available for LightRAG upload: {content.name}")
|
|
1279
1337
|
return
|
|
@@ -1284,13 +1342,13 @@ class Knowledge:
|
|
|
1284
1342
|
if content.reader is None:
|
|
1285
1343
|
log_error("No reader available for topic content")
|
|
1286
1344
|
content.status = ContentStatus.FAILED
|
|
1287
|
-
self.
|
|
1345
|
+
await self._aupdate_content(content)
|
|
1288
1346
|
return
|
|
1289
1347
|
|
|
1290
1348
|
if not content.topics:
|
|
1291
1349
|
log_error("No topics available for content")
|
|
1292
1350
|
content.status = ContentStatus.FAILED
|
|
1293
|
-
self.
|
|
1351
|
+
await self._aupdate_content(content)
|
|
1294
1352
|
return
|
|
1295
1353
|
|
|
1296
1354
|
read_documents = content.reader.read(content.topics)
|
|
@@ -1303,24 +1361,35 @@ class Knowledge:
|
|
|
1303
1361
|
else:
|
|
1304
1362
|
log_error("Vector database does not support text insertion")
|
|
1305
1363
|
content.status = ContentStatus.FAILED
|
|
1306
|
-
self.
|
|
1364
|
+
await self._aupdate_content(content)
|
|
1307
1365
|
return
|
|
1308
1366
|
content.external_id = result
|
|
1309
1367
|
content.status = ContentStatus.COMPLETED
|
|
1310
|
-
self.
|
|
1368
|
+
await self._aupdate_content(content)
|
|
1311
1369
|
return
|
|
1312
1370
|
else:
|
|
1313
1371
|
log_warning(f"No documents found for LightRAG upload: {content.name}")
|
|
1314
1372
|
return
|
|
1315
1373
|
|
|
1316
1374
|
def search(
|
|
1317
|
-
self,
|
|
1375
|
+
self,
|
|
1376
|
+
query: str,
|
|
1377
|
+
max_results: Optional[int] = None,
|
|
1378
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
1379
|
+
search_type: Optional[str] = None,
|
|
1318
1380
|
) -> List[Document]:
|
|
1319
1381
|
"""Returns relevant documents matching a query"""
|
|
1320
|
-
|
|
1321
1382
|
from agno.vectordb import VectorDb
|
|
1383
|
+
from agno.vectordb.search import SearchType
|
|
1322
1384
|
|
|
1323
1385
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
1386
|
+
|
|
1387
|
+
if (
|
|
1388
|
+
hasattr(self.vector_db, "search_type")
|
|
1389
|
+
and isinstance(self.vector_db.search_type, SearchType)
|
|
1390
|
+
and search_type
|
|
1391
|
+
):
|
|
1392
|
+
self.vector_db.search_type = SearchType(search_type)
|
|
1324
1393
|
try:
|
|
1325
1394
|
if self.vector_db is None:
|
|
1326
1395
|
log_warning("No vector db provided")
|
|
@@ -1334,13 +1403,23 @@ class Knowledge:
|
|
|
1334
1403
|
return []
|
|
1335
1404
|
|
|
1336
1405
|
async def async_search(
|
|
1337
|
-
self,
|
|
1406
|
+
self,
|
|
1407
|
+
query: str,
|
|
1408
|
+
max_results: Optional[int] = None,
|
|
1409
|
+
filters: Optional[Dict[str, Any]] = None,
|
|
1410
|
+
search_type: Optional[str] = None,
|
|
1338
1411
|
) -> List[Document]:
|
|
1339
1412
|
"""Returns relevant documents matching a query"""
|
|
1340
|
-
|
|
1341
1413
|
from agno.vectordb import VectorDb
|
|
1414
|
+
from agno.vectordb.search import SearchType
|
|
1342
1415
|
|
|
1343
1416
|
self.vector_db = cast(VectorDb, self.vector_db)
|
|
1417
|
+
if (
|
|
1418
|
+
hasattr(self.vector_db, "search_type")
|
|
1419
|
+
and isinstance(self.vector_db.search_type, SearchType)
|
|
1420
|
+
and search_type
|
|
1421
|
+
):
|
|
1422
|
+
self.vector_db.search_type = SearchType(search_type)
|
|
1344
1423
|
try:
|
|
1345
1424
|
if self.vector_db is None:
|
|
1346
1425
|
log_warning("No vector db provided")
|
|
@@ -1442,10 +1521,46 @@ class Knowledge:
|
|
|
1442
1521
|
def patch_content(self, content: Content) -> Optional[Dict[str, Any]]:
|
|
1443
1522
|
return self._update_content(content)
|
|
1444
1523
|
|
|
1524
|
+
async def apatch_content(self, content: Content) -> Optional[Dict[str, Any]]:
|
|
1525
|
+
return await self._aupdate_content(content)
|
|
1526
|
+
|
|
1445
1527
|
def get_content_by_id(self, content_id: str) -> Optional[Content]:
|
|
1446
1528
|
if self.contents_db is None:
|
|
1447
1529
|
raise ValueError("No contents db provided")
|
|
1530
|
+
|
|
1531
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1532
|
+
raise ValueError(
|
|
1533
|
+
"get_content_by_id() is not supported for async databases. Please use aget_content_by_id() instead."
|
|
1534
|
+
)
|
|
1535
|
+
|
|
1448
1536
|
content_row = self.contents_db.get_knowledge_content(content_id)
|
|
1537
|
+
|
|
1538
|
+
if content_row is None:
|
|
1539
|
+
return None
|
|
1540
|
+
content = Content(
|
|
1541
|
+
id=content_row.id,
|
|
1542
|
+
name=content_row.name,
|
|
1543
|
+
description=content_row.description,
|
|
1544
|
+
metadata=content_row.metadata,
|
|
1545
|
+
file_type=content_row.type,
|
|
1546
|
+
size=content_row.size,
|
|
1547
|
+
status=ContentStatus(content_row.status) if content_row.status else None,
|
|
1548
|
+
status_message=content_row.status_message,
|
|
1549
|
+
created_at=content_row.created_at,
|
|
1550
|
+
updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
|
|
1551
|
+
external_id=content_row.external_id,
|
|
1552
|
+
)
|
|
1553
|
+
return content
|
|
1554
|
+
|
|
1555
|
+
async def aget_content_by_id(self, content_id: str) -> Optional[Content]:
|
|
1556
|
+
if self.contents_db is None:
|
|
1557
|
+
raise ValueError("No contents db provided")
|
|
1558
|
+
|
|
1559
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1560
|
+
content_row = await self.contents_db.get_knowledge_content(content_id)
|
|
1561
|
+
else:
|
|
1562
|
+
content_row = self.contents_db.get_knowledge_content(content_id)
|
|
1563
|
+
|
|
1449
1564
|
if content_row is None:
|
|
1450
1565
|
return None
|
|
1451
1566
|
content = Content(
|
|
@@ -1472,6 +1587,10 @@ class Knowledge:
|
|
|
1472
1587
|
) -> Tuple[List[Content], int]:
|
|
1473
1588
|
if self.contents_db is None:
|
|
1474
1589
|
raise ValueError("No contents db provided")
|
|
1590
|
+
|
|
1591
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1592
|
+
raise ValueError("get_content() is not supported for async databases. Please use aget_content() instead.")
|
|
1593
|
+
|
|
1475
1594
|
contents, count = self.contents_db.get_knowledge_contents(
|
|
1476
1595
|
limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
|
|
1477
1596
|
)
|
|
@@ -1495,9 +1614,53 @@ class Knowledge:
|
|
|
1495
1614
|
result.append(content)
|
|
1496
1615
|
return result, count
|
|
1497
1616
|
|
|
1617
|
+
async def aget_content(
|
|
1618
|
+
self,
|
|
1619
|
+
limit: Optional[int] = None,
|
|
1620
|
+
page: Optional[int] = None,
|
|
1621
|
+
sort_by: Optional[str] = None,
|
|
1622
|
+
sort_order: Optional[str] = None,
|
|
1623
|
+
) -> Tuple[List[Content], int]:
|
|
1624
|
+
if self.contents_db is None:
|
|
1625
|
+
raise ValueError("No contents db provided")
|
|
1626
|
+
|
|
1627
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1628
|
+
contents, count = await self.contents_db.get_knowledge_contents(
|
|
1629
|
+
limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
|
|
1630
|
+
)
|
|
1631
|
+
else:
|
|
1632
|
+
contents, count = self.contents_db.get_knowledge_contents(
|
|
1633
|
+
limit=limit, page=page, sort_by=sort_by, sort_order=sort_order
|
|
1634
|
+
)
|
|
1635
|
+
|
|
1636
|
+
result = []
|
|
1637
|
+
for content_row in contents:
|
|
1638
|
+
# Create Content from database row
|
|
1639
|
+
content = Content(
|
|
1640
|
+
id=content_row.id,
|
|
1641
|
+
name=content_row.name,
|
|
1642
|
+
description=content_row.description,
|
|
1643
|
+
metadata=content_row.metadata,
|
|
1644
|
+
size=content_row.size,
|
|
1645
|
+
file_type=content_row.type,
|
|
1646
|
+
status=ContentStatus(content_row.status) if content_row.status else None,
|
|
1647
|
+
status_message=content_row.status_message,
|
|
1648
|
+
created_at=content_row.created_at,
|
|
1649
|
+
updated_at=content_row.updated_at if content_row.updated_at else content_row.created_at,
|
|
1650
|
+
external_id=content_row.external_id,
|
|
1651
|
+
)
|
|
1652
|
+
result.append(content)
|
|
1653
|
+
return result, count
|
|
1654
|
+
|
|
1498
1655
|
def get_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
|
|
1499
1656
|
if self.contents_db is None:
|
|
1500
1657
|
raise ValueError("No contents db provided")
|
|
1658
|
+
|
|
1659
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1660
|
+
raise ValueError(
|
|
1661
|
+
"get_content_status() is not supported for async databases. Please use aget_content_status() instead."
|
|
1662
|
+
)
|
|
1663
|
+
|
|
1501
1664
|
content_row = self.contents_db.get_knowledge_content(content_id)
|
|
1502
1665
|
if content_row is None:
|
|
1503
1666
|
return None, "Content not found"
|
|
@@ -1517,6 +1680,33 @@ class Knowledge:
|
|
|
1517
1680
|
|
|
1518
1681
|
return status, content_row.status_message
|
|
1519
1682
|
|
|
1683
|
+
async def aget_content_status(self, content_id: str) -> Tuple[Optional[ContentStatus], Optional[str]]:
|
|
1684
|
+
if self.contents_db is None:
|
|
1685
|
+
raise ValueError("No contents db provided")
|
|
1686
|
+
|
|
1687
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1688
|
+
content_row = await self.contents_db.get_knowledge_content(content_id)
|
|
1689
|
+
else:
|
|
1690
|
+
content_row = self.contents_db.get_knowledge_content(content_id)
|
|
1691
|
+
|
|
1692
|
+
if content_row is None:
|
|
1693
|
+
return None, "Content not found"
|
|
1694
|
+
|
|
1695
|
+
# Convert string status to enum, defaulting to PROCESSING if unknown
|
|
1696
|
+
status_str = content_row.status
|
|
1697
|
+
try:
|
|
1698
|
+
status = ContentStatus(status_str.lower()) if status_str else ContentStatus.PROCESSING
|
|
1699
|
+
except ValueError:
|
|
1700
|
+
# Handle legacy or unknown statuses
|
|
1701
|
+
if status_str and "failed" in status_str.lower():
|
|
1702
|
+
status = ContentStatus.FAILED
|
|
1703
|
+
elif status_str and "completed" in status_str.lower():
|
|
1704
|
+
status = ContentStatus.COMPLETED
|
|
1705
|
+
else:
|
|
1706
|
+
status = ContentStatus.PROCESSING
|
|
1707
|
+
|
|
1708
|
+
return status, content_row.status_message
|
|
1709
|
+
|
|
1520
1710
|
def remove_content_by_id(self, content_id: str):
|
|
1521
1711
|
from agno.vectordb import VectorDb
|
|
1522
1712
|
|
|
@@ -1535,12 +1725,36 @@ class Knowledge:
|
|
|
1535
1725
|
if self.contents_db is not None:
|
|
1536
1726
|
self.contents_db.delete_knowledge_content(content_id)
|
|
1537
1727
|
|
|
1728
|
+
async def aremove_content_by_id(self, content_id: str):
|
|
1729
|
+
if self.vector_db is not None:
|
|
1730
|
+
if self.vector_db.__class__.__name__ == "LightRag":
|
|
1731
|
+
# For LightRAG, get the content first to find the external_id
|
|
1732
|
+
content = await self.aget_content_by_id(content_id)
|
|
1733
|
+
if content and content.external_id:
|
|
1734
|
+
self.vector_db.delete_by_external_id(content.external_id) # type: ignore
|
|
1735
|
+
else:
|
|
1736
|
+
log_warning(f"No external_id found for content {content_id}, cannot delete from LightRAG")
|
|
1737
|
+
else:
|
|
1738
|
+
self.vector_db.delete_by_content_id(content_id)
|
|
1739
|
+
|
|
1740
|
+
if self.contents_db is not None:
|
|
1741
|
+
if isinstance(self.contents_db, AsyncBaseDb):
|
|
1742
|
+
await self.contents_db.delete_knowledge_content(content_id)
|
|
1743
|
+
else:
|
|
1744
|
+
self.contents_db.delete_knowledge_content(content_id)
|
|
1745
|
+
|
|
1538
1746
|
def remove_all_content(self):
|
|
1539
1747
|
contents, _ = self.get_content()
|
|
1540
1748
|
for content in contents:
|
|
1541
1749
|
if content.id is not None:
|
|
1542
1750
|
self.remove_content_by_id(content.id)
|
|
1543
1751
|
|
|
1752
|
+
async def aremove_all_content(self):
|
|
1753
|
+
contents, _ = await self.aget_content()
|
|
1754
|
+
for content in contents:
|
|
1755
|
+
if content.id is not None:
|
|
1756
|
+
await self.aremove_content_by_id(content.id)
|
|
1757
|
+
|
|
1544
1758
|
# --- Reader Factory Integration ---
|
|
1545
1759
|
|
|
1546
1760
|
def construct_readers(self):
|
|
@@ -4,7 +4,8 @@ from pathlib import Path
|
|
|
4
4
|
from typing import IO, Any, List, Optional, Tuple, Union
|
|
5
5
|
from uuid import uuid4
|
|
6
6
|
|
|
7
|
-
from agno.knowledge.chunking.
|
|
7
|
+
from agno.knowledge.chunking.document import DocumentChunking
|
|
8
|
+
from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
|
|
8
9
|
from agno.knowledge.document.base import Document
|
|
9
10
|
from agno.knowledge.reader.base import Reader
|
|
10
11
|
from agno.knowledge.types import ContentType
|
|
@@ -183,6 +184,7 @@ class BasePDFReader(Reader):
|
|
|
183
184
|
page_start_numbering_format: Optional[str] = None,
|
|
184
185
|
page_end_numbering_format: Optional[str] = None,
|
|
185
186
|
password: Optional[str] = None,
|
|
187
|
+
chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(chunk_size=5000),
|
|
186
188
|
**kwargs,
|
|
187
189
|
):
|
|
188
190
|
if page_start_numbering_format is None:
|
|
@@ -195,11 +197,7 @@ class BasePDFReader(Reader):
|
|
|
195
197
|
self.page_end_numbering_format = page_end_numbering_format
|
|
196
198
|
self.password = password
|
|
197
199
|
|
|
198
|
-
|
|
199
|
-
from agno.knowledge.chunking.document import DocumentChunking
|
|
200
|
-
|
|
201
|
-
self.chunking_strategy = DocumentChunking(chunk_size=5000)
|
|
202
|
-
super().__init__(**kwargs)
|
|
200
|
+
super().__init__(chunking_strategy=chunking_strategy, **kwargs)
|
|
203
201
|
|
|
204
202
|
@classmethod
|
|
205
203
|
def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
|
|
@@ -16,8 +16,7 @@ class ReaderFactory:
|
|
|
16
16
|
from agno.knowledge.reader.pdf_reader import PDFReader
|
|
17
17
|
|
|
18
18
|
config: Dict[str, Any] = {
|
|
19
|
-
"
|
|
20
|
-
"chunk_size": 100,
|
|
19
|
+
"name": "PDF Reader",
|
|
21
20
|
"description": "Processes PDF documents with OCR support for images and text extraction",
|
|
22
21
|
}
|
|
23
22
|
config.update(kwargs)
|
|
@@ -201,7 +200,7 @@ class ReaderFactory:
|
|
|
201
200
|
return cls.create_reader("pdf")
|
|
202
201
|
elif extension in [".csv", "text/csv"]:
|
|
203
202
|
return cls.create_reader("csv")
|
|
204
|
-
elif extension in [".docx", ".doc"]:
|
|
203
|
+
elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
|
|
205
204
|
return cls.create_reader("docx")
|
|
206
205
|
elif extension == ".json":
|
|
207
206
|
return cls.create_reader("json")
|