agno 2.0.3__py3-none-any.whl → 2.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +229 -164
- agno/db/dynamo/dynamo.py +8 -0
- agno/db/firestore/firestore.py +8 -0
- agno/db/gcs_json/gcs_json_db.py +9 -0
- agno/db/json/json_db.py +8 -0
- agno/db/migrations/v1_to_v2.py +191 -23
- agno/db/mongo/mongo.py +68 -0
- agno/db/mysql/mysql.py +13 -3
- agno/db/mysql/schemas.py +27 -27
- agno/db/postgres/postgres.py +19 -11
- agno/db/redis/redis.py +6 -0
- agno/db/singlestore/schemas.py +1 -1
- agno/db/singlestore/singlestore.py +8 -1
- agno/db/sqlite/sqlite.py +12 -3
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/knowledge.py +92 -66
- agno/knowledge/reader/reader_factory.py +7 -3
- agno/knowledge/reader/web_search_reader.py +12 -6
- agno/models/base.py +2 -2
- agno/models/message.py +109 -0
- agno/models/openai/chat.py +3 -0
- agno/models/openai/responses.py +12 -0
- agno/models/response.py +5 -0
- agno/models/siliconflow/__init__.py +5 -0
- agno/models/siliconflow/siliconflow.py +25 -0
- agno/os/app.py +164 -41
- agno/os/auth.py +24 -14
- agno/os/interfaces/agui/utils.py +98 -134
- agno/os/router.py +128 -55
- agno/os/routers/evals/utils.py +9 -9
- agno/os/routers/health.py +25 -0
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +11 -11
- agno/os/routers/session/session.py +24 -8
- agno/os/schema.py +29 -2
- agno/os/utils.py +0 -8
- agno/run/agent.py +3 -3
- agno/run/team.py +3 -3
- agno/run/workflow.py +64 -10
- agno/session/team.py +1 -0
- agno/team/team.py +189 -94
- agno/tools/duckduckgo.py +15 -11
- agno/tools/googlesearch.py +1 -1
- agno/tools/mem0.py +11 -17
- agno/tools/memory.py +34 -6
- agno/utils/common.py +90 -1
- agno/utils/streamlit.py +14 -8
- agno/utils/string.py +32 -0
- agno/utils/tools.py +1 -1
- agno/vectordb/chroma/chromadb.py +8 -2
- agno/workflow/step.py +115 -16
- agno/workflow/workflow.py +16 -13
- {agno-2.0.3.dist-info → agno-2.0.5.dist-info}/METADATA +6 -5
- {agno-2.0.3.dist-info → agno-2.0.5.dist-info}/RECORD +57 -54
- agno/knowledge/reader/url_reader.py +0 -128
- {agno-2.0.3.dist-info → agno-2.0.5.dist-info}/WHEEL +0 -0
- {agno-2.0.3.dist-info → agno-2.0.5.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.3.dist-info → agno-2.0.5.dist-info}/top_level.txt +0 -0
agno/db/postgres/postgres.py
CHANGED
|
@@ -20,6 +20,7 @@ from agno.db.schemas.knowledge import KnowledgeRow
|
|
|
20
20
|
from agno.db.schemas.memory import UserMemory
|
|
21
21
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
22
22
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
23
|
+
from agno.utils.string import generate_id
|
|
23
24
|
|
|
24
25
|
try:
|
|
25
26
|
from sqlalchemy import Index, String, UniqueConstraint, func, update
|
|
@@ -68,6 +69,21 @@ class PostgresDb(BaseDb):
|
|
|
68
69
|
ValueError: If neither db_url nor db_engine is provided.
|
|
69
70
|
ValueError: If none of the tables are provided.
|
|
70
71
|
"""
|
|
72
|
+
_engine: Optional[Engine] = db_engine
|
|
73
|
+
if _engine is None and db_url is not None:
|
|
74
|
+
_engine = create_engine(db_url)
|
|
75
|
+
if _engine is None:
|
|
76
|
+
raise ValueError("One of db_url or db_engine must be provided")
|
|
77
|
+
|
|
78
|
+
self.db_url: Optional[str] = db_url
|
|
79
|
+
self.db_engine: Engine = _engine
|
|
80
|
+
|
|
81
|
+
if id is None:
|
|
82
|
+
base_seed = db_url or str(db_engine.url) # type: ignore
|
|
83
|
+
schema_suffix = db_schema if db_schema is not None else "ai"
|
|
84
|
+
seed = f"{base_seed}#{schema_suffix}"
|
|
85
|
+
id = generate_id(seed)
|
|
86
|
+
|
|
71
87
|
super().__init__(
|
|
72
88
|
id=id,
|
|
73
89
|
session_table=session_table,
|
|
@@ -77,14 +93,6 @@ class PostgresDb(BaseDb):
|
|
|
77
93
|
knowledge_table=knowledge_table,
|
|
78
94
|
)
|
|
79
95
|
|
|
80
|
-
_engine: Optional[Engine] = db_engine
|
|
81
|
-
if _engine is None and db_url is not None:
|
|
82
|
-
_engine = create_engine(db_url)
|
|
83
|
-
if _engine is None:
|
|
84
|
-
raise ValueError("One of db_url or db_engine must be provided")
|
|
85
|
-
|
|
86
|
-
self.db_url: Optional[str] = db_url
|
|
87
|
-
self.db_engine: Engine = _engine
|
|
88
96
|
self.db_schema: str = db_schema if db_schema is not None else "ai"
|
|
89
97
|
self.metadata: MetaData = MetaData()
|
|
90
98
|
|
|
@@ -1692,17 +1700,17 @@ class PostgresDb(BaseDb):
|
|
|
1692
1700
|
if v1_table_type == "agent_sessions":
|
|
1693
1701
|
for session in sessions:
|
|
1694
1702
|
self.upsert_session(session)
|
|
1695
|
-
log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.
|
|
1703
|
+
log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.session_table_name}")
|
|
1696
1704
|
|
|
1697
1705
|
elif v1_table_type == "team_sessions":
|
|
1698
1706
|
for session in sessions:
|
|
1699
1707
|
self.upsert_session(session)
|
|
1700
|
-
log_info(f"Migrated {len(sessions)} Team sessions to table: {self.
|
|
1708
|
+
log_info(f"Migrated {len(sessions)} Team sessions to table: {self.session_table_name}")
|
|
1701
1709
|
|
|
1702
1710
|
elif v1_table_type == "workflow_sessions":
|
|
1703
1711
|
for session in sessions:
|
|
1704
1712
|
self.upsert_session(session)
|
|
1705
|
-
log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.
|
|
1713
|
+
log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.session_table_name}")
|
|
1706
1714
|
|
|
1707
1715
|
elif v1_table_type == "memories":
|
|
1708
1716
|
for memory in memories:
|
agno/db/redis/redis.py
CHANGED
|
@@ -23,6 +23,7 @@ from agno.db.schemas.knowledge import KnowledgeRow
|
|
|
23
23
|
from agno.db.schemas.memory import UserMemory
|
|
24
24
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
25
25
|
from agno.utils.log import log_debug, log_error, log_info
|
|
26
|
+
from agno.utils.string import generate_id
|
|
26
27
|
|
|
27
28
|
try:
|
|
28
29
|
from redis import Redis
|
|
@@ -67,6 +68,11 @@ class RedisDb(BaseDb):
|
|
|
67
68
|
Raises:
|
|
68
69
|
ValueError: If neither redis_client nor db_url is provided.
|
|
69
70
|
"""
|
|
71
|
+
if id is None:
|
|
72
|
+
base_seed = db_url or str(redis_client)
|
|
73
|
+
seed = f"{base_seed}#{db_prefix}"
|
|
74
|
+
id = generate_id(seed)
|
|
75
|
+
|
|
70
76
|
super().__init__(
|
|
71
77
|
id=id,
|
|
72
78
|
session_table=session_table,
|
agno/db/singlestore/schemas.py
CHANGED
|
@@ -106,7 +106,7 @@ def get_table_schema_definition(table_type: str) -> dict[str, Any]:
|
|
|
106
106
|
"evals": EVAL_TABLE_SCHEMA,
|
|
107
107
|
"metrics": METRICS_TABLE_SCHEMA,
|
|
108
108
|
"memories": USER_MEMORY_TABLE_SCHEMA,
|
|
109
|
-
"
|
|
109
|
+
"knowledge": KNOWLEDGE_TABLE_SCHEMA,
|
|
110
110
|
}
|
|
111
111
|
schema = schemas.get(table_type, {})
|
|
112
112
|
|
|
@@ -21,6 +21,7 @@ from agno.db.singlestore.utils import (
|
|
|
21
21
|
)
|
|
22
22
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
23
23
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
24
|
+
from agno.utils.string import generate_id
|
|
24
25
|
|
|
25
26
|
try:
|
|
26
27
|
from sqlalchemy import Index, UniqueConstraint, and_, func, update
|
|
@@ -69,6 +70,12 @@ class SingleStoreDb(BaseDb):
|
|
|
69
70
|
ValueError: If neither db_url nor db_engine is provided.
|
|
70
71
|
ValueError: If none of the tables are provided.
|
|
71
72
|
"""
|
|
73
|
+
if id is None:
|
|
74
|
+
base_seed = db_url or str(db_engine.url) if db_engine else "singlestore" # type: ignore
|
|
75
|
+
schema_suffix = db_schema if db_schema is not None else "ai"
|
|
76
|
+
seed = f"{base_seed}#{schema_suffix}"
|
|
77
|
+
id = generate_id(seed)
|
|
78
|
+
|
|
72
79
|
super().__init__(
|
|
73
80
|
id=id,
|
|
74
81
|
session_table=session_table,
|
|
@@ -156,10 +163,10 @@ class SingleStoreDb(BaseDb):
|
|
|
156
163
|
Returns:
|
|
157
164
|
Table: SQLAlchemy Table object
|
|
158
165
|
"""
|
|
166
|
+
table_ref = f"{db_schema}.{table_name}" if db_schema else table_name
|
|
159
167
|
try:
|
|
160
168
|
table_schema = get_table_schema_definition(table_type)
|
|
161
169
|
|
|
162
|
-
table_ref = f"{db_schema}.{table_name}" if db_schema else table_name
|
|
163
170
|
log_debug(f"Creating table {table_ref} with schema: {table_schema}")
|
|
164
171
|
|
|
165
172
|
columns: List[Column] = []
|
agno/db/sqlite/sqlite.py
CHANGED
|
@@ -21,6 +21,7 @@ from agno.db.sqlite.utils import (
|
|
|
21
21
|
from agno.db.utils import deserialize_session_json_fields, serialize_session_json_fields
|
|
22
22
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
23
23
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
24
|
+
from agno.utils.string import generate_id
|
|
24
25
|
|
|
25
26
|
try:
|
|
26
27
|
from sqlalchemy import Column, MetaData, Table, and_, func, select, text, update
|
|
@@ -43,6 +44,7 @@ class SqliteDb(BaseDb):
|
|
|
43
44
|
metrics_table: Optional[str] = None,
|
|
44
45
|
eval_table: Optional[str] = None,
|
|
45
46
|
knowledge_table: Optional[str] = None,
|
|
47
|
+
id: Optional[str] = None,
|
|
46
48
|
):
|
|
47
49
|
"""
|
|
48
50
|
Interface for interacting with a SQLite database.
|
|
@@ -62,11 +64,17 @@ class SqliteDb(BaseDb):
|
|
|
62
64
|
metrics_table (Optional[str]): Name of the table to store metrics.
|
|
63
65
|
eval_table (Optional[str]): Name of the table to store evaluation runs data.
|
|
64
66
|
knowledge_table (Optional[str]): Name of the table to store knowledge documents data.
|
|
67
|
+
id (Optional[str]): ID of the database.
|
|
65
68
|
|
|
66
69
|
Raises:
|
|
67
70
|
ValueError: If none of the tables are provided.
|
|
68
71
|
"""
|
|
72
|
+
if id is None:
|
|
73
|
+
seed = db_url or db_file or str(db_engine.url) if db_engine else "sqlite:///agno.db"
|
|
74
|
+
id = generate_id(seed)
|
|
75
|
+
|
|
69
76
|
super().__init__(
|
|
77
|
+
id=id,
|
|
70
78
|
session_table=session_table,
|
|
71
79
|
memory_table=memory_table,
|
|
72
80
|
metrics_table=metrics_table,
|
|
@@ -1341,6 +1349,7 @@ class SqliteDb(BaseDb):
|
|
|
1341
1349
|
"linked_to": knowledge_row.linked_to,
|
|
1342
1350
|
"access_count": knowledge_row.access_count,
|
|
1343
1351
|
"status": knowledge_row.status,
|
|
1352
|
+
"status_message": knowledge_row.status_message,
|
|
1344
1353
|
"created_at": knowledge_row.created_at,
|
|
1345
1354
|
"updated_at": knowledge_row.updated_at,
|
|
1346
1355
|
"external_id": knowledge_row.external_id,
|
|
@@ -1658,17 +1667,17 @@ class SqliteDb(BaseDb):
|
|
|
1658
1667
|
if v1_table_type == "agent_sessions":
|
|
1659
1668
|
for session in sessions:
|
|
1660
1669
|
self.upsert_session(session)
|
|
1661
|
-
log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.
|
|
1670
|
+
log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.session_table_name}")
|
|
1662
1671
|
|
|
1663
1672
|
elif v1_table_type == "team_sessions":
|
|
1664
1673
|
for session in sessions:
|
|
1665
1674
|
self.upsert_session(session)
|
|
1666
|
-
log_info(f"Migrated {len(sessions)} Team sessions to table: {self.
|
|
1675
|
+
log_info(f"Migrated {len(sessions)} Team sessions to table: {self.session_table_name}")
|
|
1667
1676
|
|
|
1668
1677
|
elif v1_table_type == "workflow_sessions":
|
|
1669
1678
|
for session in sessions:
|
|
1670
1679
|
self.upsert_session(session)
|
|
1671
|
-
log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.
|
|
1680
|
+
log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.session_table_name}")
|
|
1672
1681
|
|
|
1673
1682
|
elif v1_table_type == "memories":
|
|
1674
1683
|
for memory in memories:
|
|
@@ -112,6 +112,7 @@ class DiscordClient:
|
|
|
112
112
|
# TODO Unhappy with the duplication here but it keeps MyPy from complaining
|
|
113
113
|
additional_context = dedent(f"""
|
|
114
114
|
Discord username: {message_user}
|
|
115
|
+
Discord userid: {message_user_id}
|
|
115
116
|
Discord url: {message_url}
|
|
116
117
|
""")
|
|
117
118
|
if self.agent:
|
agno/knowledge/knowledge.py
CHANGED
|
@@ -9,7 +9,6 @@ from io import BytesIO
|
|
|
9
9
|
from os.path import basename
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Any, Dict, List, Optional, Set, Tuple, Union, cast, overload
|
|
12
|
-
from uuid import uuid4
|
|
13
12
|
|
|
14
13
|
from httpx import AsyncClient
|
|
15
14
|
|
|
@@ -21,6 +20,7 @@ from agno.knowledge.reader import Reader, ReaderFactory
|
|
|
21
20
|
from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
|
|
22
21
|
from agno.utils.http import async_fetch_with_retry
|
|
23
22
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
23
|
+
from agno.utils.string import generate_id
|
|
24
24
|
from agno.vectordb import VectorDb
|
|
25
25
|
|
|
26
26
|
ContentDict = Dict[str, Union[str, Dict[str, str]]]
|
|
@@ -241,7 +241,6 @@ class Knowledge:
|
|
|
241
241
|
file_data = FileData(content=text_content, type="Text")
|
|
242
242
|
|
|
243
243
|
content = Content(
|
|
244
|
-
id=str(uuid4()),
|
|
245
244
|
name=name,
|
|
246
245
|
description=description,
|
|
247
246
|
path=path,
|
|
@@ -253,6 +252,8 @@ class Knowledge:
|
|
|
253
252
|
reader=reader,
|
|
254
253
|
auth=auth,
|
|
255
254
|
)
|
|
255
|
+
content.content_hash = self._build_content_hash(content)
|
|
256
|
+
content.id = generate_id(content.content_hash)
|
|
256
257
|
|
|
257
258
|
await self._load_content(content, upsert, skip_if_exists, include, exclude)
|
|
258
259
|
|
|
@@ -303,7 +304,7 @@ class Knowledge:
|
|
|
303
304
|
text_content: Optional text content to add directly
|
|
304
305
|
metadata: Optional metadata dictionary
|
|
305
306
|
topics: Optional list of topics
|
|
306
|
-
|
|
307
|
+
remote_content: Optional cloud storage configuration
|
|
307
308
|
reader: Optional custom reader for processing the content
|
|
308
309
|
include: Optional list of file patterns to include
|
|
309
310
|
exclude: Optional list of file patterns to exclude
|
|
@@ -329,6 +330,22 @@ class Knowledge:
|
|
|
329
330
|
)
|
|
330
331
|
)
|
|
331
332
|
|
|
333
|
+
def _should_skip(self, content_hash: str, skip_if_exists: bool) -> bool:
|
|
334
|
+
"""
|
|
335
|
+
Handle the skip_if_exists logic for content that already exists in the vector database.
|
|
336
|
+
|
|
337
|
+
Args:
|
|
338
|
+
content_hash: The content hash string to check for existence
|
|
339
|
+
skip_if_exists: Whether to skip if content already exists
|
|
340
|
+
|
|
341
|
+
Returns:
|
|
342
|
+
bool: True if should skip processing, False if should continue
|
|
343
|
+
"""
|
|
344
|
+
if self.vector_db and self.vector_db.content_hash_exists(content_hash) and skip_if_exists:
|
|
345
|
+
return True
|
|
346
|
+
|
|
347
|
+
return False
|
|
348
|
+
|
|
332
349
|
async def _load_from_path(
|
|
333
350
|
self,
|
|
334
351
|
content: Content,
|
|
@@ -344,18 +361,17 @@ class Knowledge:
|
|
|
344
361
|
if self._should_include_file(str(path), include, exclude):
|
|
345
362
|
log_info(f"Adding file {path} due to include/exclude filters")
|
|
346
363
|
|
|
364
|
+
self._add_to_contents_db(content)
|
|
365
|
+
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
366
|
+
content.status = ContentStatus.COMPLETED
|
|
367
|
+
self._update_content(content)
|
|
368
|
+
return
|
|
369
|
+
|
|
347
370
|
# Handle LightRAG special case - read file and upload directly
|
|
348
371
|
if self.vector_db.__class__.__name__ == "LightRag":
|
|
349
372
|
await self._process_lightrag_content(content, KnowledgeContentOrigin.PATH)
|
|
350
373
|
return
|
|
351
374
|
|
|
352
|
-
content.content_hash = self._build_content_hash(content)
|
|
353
|
-
if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
|
|
354
|
-
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
355
|
-
return
|
|
356
|
-
|
|
357
|
-
self._add_to_contents_db(content)
|
|
358
|
-
|
|
359
375
|
if content.reader:
|
|
360
376
|
# TODO: We will refactor this to eventually pass authorization to all readers
|
|
361
377
|
import inspect
|
|
@@ -407,15 +423,16 @@ class Knowledge:
|
|
|
407
423
|
log_debug(f"Skipping file {file_path} due to include/exclude filters")
|
|
408
424
|
continue
|
|
409
425
|
|
|
410
|
-
id = str(uuid4())
|
|
411
426
|
file_content = Content(
|
|
412
|
-
id=id,
|
|
413
427
|
name=content.name,
|
|
414
428
|
path=str(file_path),
|
|
415
429
|
metadata=content.metadata,
|
|
416
430
|
description=content.description,
|
|
417
431
|
reader=content.reader,
|
|
418
432
|
)
|
|
433
|
+
file_content.content_hash = self._build_content_hash(file_content)
|
|
434
|
+
file_content.id = generate_id(file_content.content_hash)
|
|
435
|
+
|
|
419
436
|
await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
|
|
420
437
|
else:
|
|
421
438
|
log_warning(f"Invalid path: {path}")
|
|
@@ -439,16 +456,16 @@ class Knowledge:
|
|
|
439
456
|
if not content.url:
|
|
440
457
|
raise ValueError("No url provided")
|
|
441
458
|
|
|
442
|
-
|
|
443
|
-
|
|
459
|
+
# 1. Add content to contents database
|
|
460
|
+
self._add_to_contents_db(content)
|
|
461
|
+
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
462
|
+
content.status = ContentStatus.COMPLETED
|
|
463
|
+
self._update_content(content)
|
|
444
464
|
return
|
|
445
465
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
|
|
449
|
-
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
466
|
+
if self.vector_db.__class__.__name__ == "LightRag":
|
|
467
|
+
await self._process_lightrag_content(content, KnowledgeContentOrigin.URL)
|
|
450
468
|
return
|
|
451
|
-
self._add_to_contents_db(content)
|
|
452
469
|
|
|
453
470
|
# 2. Validate URL
|
|
454
471
|
try:
|
|
@@ -466,19 +483,23 @@ class Knowledge:
|
|
|
466
483
|
self._update_content(content)
|
|
467
484
|
log_warning(f"Invalid URL: {content.url} - {str(e)}")
|
|
468
485
|
|
|
469
|
-
# 3. Fetch and load content
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
486
|
+
# 3. Fetch and load content if file has an extension
|
|
487
|
+
url_path = Path(parsed_url.path)
|
|
488
|
+
file_extension = url_path.suffix.lower()
|
|
489
|
+
|
|
490
|
+
bytes_content = None
|
|
491
|
+
if file_extension:
|
|
492
|
+
async with AsyncClient() as client:
|
|
493
|
+
response = await async_fetch_with_retry(content.url, client=client)
|
|
494
|
+
bytes_content = BytesIO(response.content)
|
|
473
495
|
|
|
474
496
|
# 4. Select reader
|
|
475
497
|
# If a reader was provided by the user, use it
|
|
476
498
|
reader = content.reader
|
|
477
499
|
name = content.name if content.name else content.url
|
|
478
500
|
# Else select based on file extension
|
|
501
|
+
|
|
479
502
|
if reader is None:
|
|
480
|
-
url_path = Path(parsed_url.path)
|
|
481
|
-
file_extension = url_path.suffix.lower()
|
|
482
503
|
if file_extension == ".csv":
|
|
483
504
|
name = basename(parsed_url.path) or "data.csv"
|
|
484
505
|
reader = self.csv_reader
|
|
@@ -504,9 +525,15 @@ class Knowledge:
|
|
|
504
525
|
if reader.__class__.__name__ == "YouTubeReader":
|
|
505
526
|
read_documents = reader.read(content.url, name=name)
|
|
506
527
|
elif "password" in read_signature.parameters and content.auth and content.auth.password:
|
|
507
|
-
|
|
528
|
+
if bytes_content:
|
|
529
|
+
read_documents = reader.read(bytes_content, name=name, password=content.auth.password)
|
|
530
|
+
else:
|
|
531
|
+
read_documents = reader.read(content.url, name=name, password=content.auth.password)
|
|
508
532
|
else:
|
|
509
|
-
|
|
533
|
+
if bytes_content:
|
|
534
|
+
read_documents = reader.read(bytes_content, name=name)
|
|
535
|
+
else:
|
|
536
|
+
read_documents = reader.read(content.url, name=name)
|
|
510
537
|
except Exception as e:
|
|
511
538
|
log_error(f"Error reading URL: {content.url} - {str(e)}")
|
|
512
539
|
content.status = ContentStatus.FAILED
|
|
@@ -554,16 +581,15 @@ class Knowledge:
|
|
|
554
581
|
|
|
555
582
|
log_info(f"Adding content from {content.name}")
|
|
556
583
|
|
|
557
|
-
|
|
558
|
-
|
|
584
|
+
self._add_to_contents_db(content)
|
|
585
|
+
if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
|
|
586
|
+
content.status = ContentStatus.COMPLETED
|
|
587
|
+
self._update_content(content)
|
|
559
588
|
return
|
|
560
589
|
|
|
561
|
-
content.
|
|
562
|
-
|
|
563
|
-
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
564
|
-
|
|
590
|
+
if content.file_data and self.vector_db.__class__.__name__ == "LightRag":
|
|
591
|
+
await self._process_lightrag_content(content, KnowledgeContentOrigin.CONTENT)
|
|
565
592
|
return
|
|
566
|
-
self._add_to_contents_db(content)
|
|
567
593
|
|
|
568
594
|
read_documents = []
|
|
569
595
|
|
|
@@ -612,7 +638,6 @@ class Knowledge:
|
|
|
612
638
|
reader = self._select_reader(content.file_data.type)
|
|
613
639
|
name = content.name if content.name else f"content_{content.file_data.type}"
|
|
614
640
|
read_documents = reader.read(content_io, name=name)
|
|
615
|
-
|
|
616
641
|
for read_document in read_documents:
|
|
617
642
|
if content.metadata:
|
|
618
643
|
read_document.meta_data.update(content.metadata)
|
|
@@ -644,9 +669,7 @@ class Knowledge:
|
|
|
644
669
|
return
|
|
645
670
|
|
|
646
671
|
for topic in content.topics:
|
|
647
|
-
id = str(uuid4())
|
|
648
672
|
content = Content(
|
|
649
|
-
id=id,
|
|
650
673
|
name=topic,
|
|
651
674
|
metadata=content.metadata,
|
|
652
675
|
reader=content.reader,
|
|
@@ -656,30 +679,37 @@ class Knowledge:
|
|
|
656
679
|
),
|
|
657
680
|
topics=[topic],
|
|
658
681
|
)
|
|
682
|
+
content.content_hash = self._build_content_hash(content)
|
|
683
|
+
content.id = generate_id(content.content_hash)
|
|
684
|
+
|
|
685
|
+
self._add_to_contents_db(content)
|
|
686
|
+
if self._should_skip(content.content_hash, skip_if_exists):
|
|
687
|
+
content.status = ContentStatus.COMPLETED
|
|
688
|
+
self._update_content(content)
|
|
689
|
+
return
|
|
659
690
|
|
|
660
691
|
if self.vector_db.__class__.__name__ == "LightRag":
|
|
661
692
|
await self._process_lightrag_content(content, KnowledgeContentOrigin.TOPIC)
|
|
662
693
|
return
|
|
663
694
|
|
|
664
|
-
content.content_hash = self._build_content_hash(content)
|
|
665
|
-
if self.vector_db and self.vector_db.content_hash_exists(content.content_hash) and skip_if_exists:
|
|
666
|
-
log_info(f"Content {content.content_hash} already exists, skipping")
|
|
667
|
-
continue
|
|
668
|
-
|
|
669
|
-
self._add_to_contents_db(content)
|
|
670
695
|
if content.reader is None:
|
|
671
696
|
log_error(f"No reader available for topic: {topic}")
|
|
697
|
+
content.status = ContentStatus.FAILED
|
|
698
|
+
content.status_message = "No reader available for topic"
|
|
699
|
+
self._update_content(content)
|
|
672
700
|
continue
|
|
701
|
+
|
|
673
702
|
read_documents = content.reader.read(topic)
|
|
674
703
|
if len(read_documents) > 0:
|
|
675
704
|
for read_document in read_documents:
|
|
676
|
-
read_document.content_id = id
|
|
705
|
+
read_document.content_id = content.id
|
|
677
706
|
if read_document.content:
|
|
678
707
|
read_document.size = len(read_document.content.encode("utf-8"))
|
|
679
708
|
else:
|
|
680
709
|
content.status = ContentStatus.FAILED
|
|
681
710
|
content.status_message = "No content found for topic"
|
|
682
711
|
self._update_content(content)
|
|
712
|
+
continue
|
|
683
713
|
|
|
684
714
|
await self._handle_vector_db_insert(content, read_documents, upsert)
|
|
685
715
|
|
|
@@ -735,11 +765,9 @@ class Knowledge:
|
|
|
735
765
|
|
|
736
766
|
for s3_object in objects_to_read:
|
|
737
767
|
# 2. Setup Content object
|
|
738
|
-
id = str(uuid4())
|
|
739
768
|
content_name = content.name or ""
|
|
740
769
|
content_name += "_" + (s3_object.name or "")
|
|
741
770
|
content_entry = Content(
|
|
742
|
-
id=id,
|
|
743
771
|
name=content_name,
|
|
744
772
|
description=content.description,
|
|
745
773
|
status=ContentStatus.PROCESSING,
|
|
@@ -748,11 +776,13 @@ class Knowledge:
|
|
|
748
776
|
)
|
|
749
777
|
|
|
750
778
|
# 3. Hash content and add it to the contents database
|
|
751
|
-
content_hash = self._build_content_hash(content_entry)
|
|
752
|
-
|
|
753
|
-
log_info(f"Content {content_hash} already exists, skipping")
|
|
754
|
-
continue
|
|
779
|
+
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
780
|
+
content_entry.id = generate_id(content_entry.content_hash)
|
|
755
781
|
self._add_to_contents_db(content_entry)
|
|
782
|
+
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
783
|
+
content_entry.status = ContentStatus.COMPLETED
|
|
784
|
+
self._update_content(content_entry)
|
|
785
|
+
return
|
|
756
786
|
|
|
757
787
|
# 4. Select reader
|
|
758
788
|
reader = content.reader
|
|
@@ -818,10 +848,8 @@ class Knowledge:
|
|
|
818
848
|
|
|
819
849
|
for gcs_object in objects_to_read:
|
|
820
850
|
# 2. Setup Content object
|
|
821
|
-
id = str(uuid4())
|
|
822
851
|
name = (content.name or "content") + "_" + gcs_object.name
|
|
823
852
|
content_entry = Content(
|
|
824
|
-
id=id,
|
|
825
853
|
name=name,
|
|
826
854
|
description=content.description,
|
|
827
855
|
status=ContentStatus.PROCESSING,
|
|
@@ -830,15 +858,15 @@ class Knowledge:
|
|
|
830
858
|
)
|
|
831
859
|
|
|
832
860
|
# 3. Hash content and add it to the contents database
|
|
833
|
-
content_hash = self._build_content_hash(content_entry)
|
|
834
|
-
|
|
835
|
-
log_info(f"Content {content_hash} already exists, skipping")
|
|
836
|
-
continue
|
|
837
|
-
|
|
838
|
-
# 4. Add it to the contents database
|
|
861
|
+
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
862
|
+
content_entry.id = generate_id(content_entry.content_hash)
|
|
839
863
|
self._add_to_contents_db(content_entry)
|
|
864
|
+
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
865
|
+
content_entry.status = ContentStatus.COMPLETED
|
|
866
|
+
self._update_content(content_entry)
|
|
867
|
+
return
|
|
840
868
|
|
|
841
|
-
#
|
|
869
|
+
# 4. Select reader
|
|
842
870
|
reader = content.reader
|
|
843
871
|
if reader is None:
|
|
844
872
|
if gcs_object.name.endswith(".pdf"):
|
|
@@ -866,7 +894,7 @@ class Knowledge:
|
|
|
866
894
|
read_document.content_id = content.id
|
|
867
895
|
await self._handle_vector_db_insert(content_entry, read_documents, upsert)
|
|
868
896
|
|
|
869
|
-
async def _handle_vector_db_insert(self, content, read_documents, upsert):
|
|
897
|
+
async def _handle_vector_db_insert(self, content: Content, read_documents, upsert):
|
|
870
898
|
if not self.vector_db:
|
|
871
899
|
log_error("No vector database configured")
|
|
872
900
|
content.status = ContentStatus.FAILED
|
|
@@ -876,7 +904,7 @@ class Knowledge:
|
|
|
876
904
|
|
|
877
905
|
if self.vector_db.upsert_available() and upsert:
|
|
878
906
|
try:
|
|
879
|
-
await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata)
|
|
907
|
+
await self.vector_db.async_upsert(content.content_hash, read_documents, content.metadata) # type: ignore[arg-type]
|
|
880
908
|
except Exception as e:
|
|
881
909
|
log_error(f"Error upserting document: {e}")
|
|
882
910
|
content.status = ContentStatus.FAILED
|
|
@@ -886,7 +914,9 @@ class Knowledge:
|
|
|
886
914
|
else:
|
|
887
915
|
try:
|
|
888
916
|
await self.vector_db.async_insert(
|
|
889
|
-
content.content_hash,
|
|
917
|
+
content.content_hash, # type: ignore[arg-type]
|
|
918
|
+
documents=read_documents,
|
|
919
|
+
filters=content.metadata, # type: ignore[arg-type]
|
|
890
920
|
)
|
|
891
921
|
except Exception as e:
|
|
892
922
|
log_error(f"Error inserting document: {e}")
|
|
@@ -1010,7 +1040,6 @@ class Knowledge:
|
|
|
1010
1040
|
content_row.status_message = content.status_message if content.status_message else ""
|
|
1011
1041
|
if content.external_id is not None:
|
|
1012
1042
|
content_row.external_id = content.external_id
|
|
1013
|
-
|
|
1014
1043
|
content_row.updated_at = int(time.time())
|
|
1015
1044
|
self.contents_db.upsert_knowledge_content(knowledge_row=content_row)
|
|
1016
1045
|
|
|
@@ -1161,9 +1190,6 @@ class Knowledge:
|
|
|
1161
1190
|
|
|
1162
1191
|
read_documents = content.reader.read(content.topics)
|
|
1163
1192
|
if len(read_documents) > 0:
|
|
1164
|
-
print("READ DOCUMENTS: ", len(read_documents))
|
|
1165
|
-
print("READ DOCUMENTS: ", read_documents[0])
|
|
1166
|
-
|
|
1167
1193
|
if self.vector_db and hasattr(self.vector_db, "insert_text"):
|
|
1168
1194
|
result = await self.vector_db.insert_text(
|
|
1169
1195
|
file_source=content.topics[0],
|
|
@@ -210,8 +210,8 @@ class ReaderFactory:
|
|
|
210
210
|
if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
|
|
211
211
|
return cls.create_reader("youtube")
|
|
212
212
|
|
|
213
|
-
# Default to
|
|
214
|
-
return cls.create_reader("
|
|
213
|
+
# Default to website reader
|
|
214
|
+
return cls.create_reader("website")
|
|
215
215
|
|
|
216
216
|
@classmethod
|
|
217
217
|
def get_all_reader_keys(cls) -> List[str]:
|
|
@@ -228,7 +228,11 @@ class ReaderFactory:
|
|
|
228
228
|
reader_keys.append(reader_key)
|
|
229
229
|
|
|
230
230
|
# Define priority order for URL readers
|
|
231
|
-
url_reader_priority = [
|
|
231
|
+
url_reader_priority = [
|
|
232
|
+
"website",
|
|
233
|
+
"firecrawl",
|
|
234
|
+
"youtube",
|
|
235
|
+
]
|
|
232
236
|
|
|
233
237
|
# Sort with URL readers in priority order, others alphabetically
|
|
234
238
|
def sort_key(reader_key):
|
|
@@ -96,7 +96,7 @@ class WebSearchReader(Reader):
|
|
|
96
96
|
results.append(
|
|
97
97
|
{
|
|
98
98
|
"title": result.get("title", ""),
|
|
99
|
-
"url": result.get("
|
|
99
|
+
"url": result.get("href", ""),
|
|
100
100
|
"description": result.get("body", ""),
|
|
101
101
|
}
|
|
102
102
|
)
|
|
@@ -136,14 +136,20 @@ class WebSearchReader(Reader):
|
|
|
136
136
|
self._respect_rate_limits()
|
|
137
137
|
|
|
138
138
|
results = []
|
|
139
|
-
|
|
139
|
+
# Use the basic search function without unsupported parameters
|
|
140
|
+
# The googlesearch-python library's search function only accepts basic parameters
|
|
141
|
+
search_results = search(query)
|
|
140
142
|
|
|
141
|
-
|
|
143
|
+
# Convert iterator to list and limit results
|
|
144
|
+
result_list = list(search_results)[: self.max_results]
|
|
145
|
+
|
|
146
|
+
for result in result_list:
|
|
147
|
+
# The search function returns URLs as strings
|
|
142
148
|
results.append(
|
|
143
149
|
{
|
|
144
|
-
"title":
|
|
145
|
-
"url":
|
|
146
|
-
"description":
|
|
150
|
+
"title": "", # Google search doesn't provide titles directly
|
|
151
|
+
"url": result,
|
|
152
|
+
"description": "", # Google search doesn't provide descriptions directly
|
|
147
153
|
}
|
|
148
154
|
)
|
|
149
155
|
|
agno/models/base.py
CHANGED
|
@@ -1228,7 +1228,7 @@ class Model(ABC):
|
|
|
1228
1228
|
function_execution_result=function_execution_result,
|
|
1229
1229
|
)
|
|
1230
1230
|
yield ModelResponse(
|
|
1231
|
-
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
|
|
1231
|
+
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
1232
1232
|
tool_executions=[
|
|
1233
1233
|
ToolExecution(
|
|
1234
1234
|
tool_call_id=function_call_result.tool_call_id,
|
|
@@ -1632,7 +1632,7 @@ class Model(ABC):
|
|
|
1632
1632
|
function_execution_result=function_execution_result,
|
|
1633
1633
|
)
|
|
1634
1634
|
yield ModelResponse(
|
|
1635
|
-
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s.",
|
|
1635
|
+
content=f"{function_call.get_call_str()} completed in {function_call_timer.elapsed:.4f}s. ",
|
|
1636
1636
|
tool_executions=[
|
|
1637
1637
|
ToolExecution(
|
|
1638
1638
|
tool_call_id=function_call_result.tool_call_id,
|