agno 2.0.4__py3-none-any.whl → 2.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +127 -102
- agno/db/dynamo/dynamo.py +9 -7
- agno/db/firestore/firestore.py +7 -4
- agno/db/gcs_json/gcs_json_db.py +6 -4
- agno/db/json/json_db.py +10 -6
- agno/db/migrations/v1_to_v2.py +191 -23
- agno/db/mongo/mongo.py +67 -6
- agno/db/mysql/mysql.py +7 -6
- agno/db/mysql/schemas.py +27 -27
- agno/db/postgres/postgres.py +7 -6
- agno/db/redis/redis.py +3 -3
- agno/db/singlestore/singlestore.py +4 -4
- agno/db/sqlite/sqlite.py +7 -6
- agno/db/utils.py +0 -14
- agno/integrations/discord/client.py +1 -0
- agno/knowledge/embedder/openai.py +19 -11
- agno/knowledge/knowledge.py +11 -10
- agno/knowledge/reader/reader_factory.py +7 -3
- agno/knowledge/reader/web_search_reader.py +12 -6
- agno/knowledge/reader/website_reader.py +33 -16
- agno/media.py +70 -0
- agno/models/aimlapi/aimlapi.py +2 -2
- agno/models/base.py +31 -4
- agno/models/cerebras/cerebras_openai.py +2 -2
- agno/models/deepinfra/deepinfra.py +2 -2
- agno/models/deepseek/deepseek.py +2 -2
- agno/models/fireworks/fireworks.py +2 -2
- agno/models/internlm/internlm.py +2 -2
- agno/models/langdb/langdb.py +4 -4
- agno/models/litellm/litellm_openai.py +2 -2
- agno/models/message.py +135 -0
- agno/models/meta/llama_openai.py +2 -2
- agno/models/nebius/nebius.py +2 -2
- agno/models/nexus/__init__.py +3 -0
- agno/models/nexus/nexus.py +25 -0
- agno/models/nvidia/nvidia.py +2 -2
- agno/models/openai/responses.py +6 -0
- agno/models/openrouter/openrouter.py +2 -2
- agno/models/perplexity/perplexity.py +2 -2
- agno/models/portkey/portkey.py +3 -3
- agno/models/response.py +2 -1
- agno/models/sambanova/sambanova.py +2 -2
- agno/models/together/together.py +2 -2
- agno/models/vercel/v0.py +2 -2
- agno/models/xai/xai.py +2 -2
- agno/os/app.py +162 -42
- agno/os/interfaces/agui/utils.py +98 -134
- agno/os/router.py +3 -1
- agno/os/routers/health.py +0 -1
- agno/os/routers/home.py +52 -0
- agno/os/routers/knowledge/knowledge.py +2 -2
- agno/os/schema.py +21 -0
- agno/os/utils.py +1 -9
- agno/run/agent.py +19 -3
- agno/run/team.py +18 -3
- agno/run/workflow.py +10 -0
- agno/team/team.py +70 -45
- agno/tools/duckduckgo.py +15 -11
- agno/tools/e2b.py +14 -7
- agno/tools/file_generation.py +350 -0
- agno/tools/function.py +2 -0
- agno/tools/googlesearch.py +1 -1
- agno/utils/gemini.py +24 -4
- agno/utils/string.py +32 -0
- agno/utils/tools.py +1 -1
- agno/vectordb/chroma/chromadb.py +66 -25
- agno/vectordb/lancedb/lance_db.py +15 -4
- agno/vectordb/milvus/milvus.py +6 -0
- agno/workflow/step.py +4 -3
- agno/workflow/workflow.py +4 -0
- {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/METADATA +9 -5
- {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/RECORD +75 -72
- agno/knowledge/reader/url_reader.py +0 -128
- {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/WHEEL +0 -0
- {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/licenses/LICENSE +0 -0
- {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/top_level.txt +0 -0
agno/db/redis/redis.py
CHANGED
|
@@ -21,9 +21,9 @@ from agno.db.redis.utils import (
|
|
|
21
21
|
from agno.db.schemas.evals import EvalFilterType, EvalRunRecord, EvalType
|
|
22
22
|
from agno.db.schemas.knowledge import KnowledgeRow
|
|
23
23
|
from agno.db.schemas.memory import UserMemory
|
|
24
|
-
from agno.db.utils import generate_deterministic_id
|
|
25
24
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
26
25
|
from agno.utils.log import log_debug, log_error, log_info
|
|
26
|
+
from agno.utils.string import generate_id
|
|
27
27
|
|
|
28
28
|
try:
|
|
29
29
|
from redis import Redis
|
|
@@ -71,7 +71,7 @@ class RedisDb(BaseDb):
|
|
|
71
71
|
if id is None:
|
|
72
72
|
base_seed = db_url or str(redis_client)
|
|
73
73
|
seed = f"{base_seed}#{db_prefix}"
|
|
74
|
-
id =
|
|
74
|
+
id = generate_id(seed)
|
|
75
75
|
|
|
76
76
|
super().__init__(
|
|
77
77
|
id=id,
|
|
@@ -300,8 +300,8 @@ class RedisDb(BaseDb):
|
|
|
300
300
|
|
|
301
301
|
Args:
|
|
302
302
|
session_id (str): The ID of the session to get.
|
|
303
|
+
session_type (SessionType): The type of session to get.
|
|
303
304
|
user_id (Optional[str]): The ID of the user to filter by.
|
|
304
|
-
session_type (Optional[SessionType]): The type of session to filter by.
|
|
305
305
|
|
|
306
306
|
Returns:
|
|
307
307
|
Optional[Union[AgentSession, TeamSession, WorkflowSession]]: The session if found, None otherwise.
|
|
@@ -19,9 +19,9 @@ from agno.db.singlestore.utils import (
|
|
|
19
19
|
is_table_available,
|
|
20
20
|
is_valid_table,
|
|
21
21
|
)
|
|
22
|
-
from agno.db.utils import generate_deterministic_id
|
|
23
22
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
24
23
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
24
|
+
from agno.utils.string import generate_id
|
|
25
25
|
|
|
26
26
|
try:
|
|
27
27
|
from sqlalchemy import Index, UniqueConstraint, and_, func, update
|
|
@@ -74,7 +74,7 @@ class SingleStoreDb(BaseDb):
|
|
|
74
74
|
base_seed = db_url or str(db_engine.url) if db_engine else "singlestore" # type: ignore
|
|
75
75
|
schema_suffix = db_schema if db_schema is not None else "ai"
|
|
76
76
|
seed = f"{base_seed}#{schema_suffix}"
|
|
77
|
-
id =
|
|
77
|
+
id = generate_id(seed)
|
|
78
78
|
|
|
79
79
|
super().__init__(
|
|
80
80
|
id=id,
|
|
@@ -431,8 +431,8 @@ class SingleStoreDb(BaseDb):
|
|
|
431
431
|
|
|
432
432
|
Args:
|
|
433
433
|
session_id (str): ID of the session to read.
|
|
434
|
+
session_type (SessionType): Type of session to get.
|
|
434
435
|
user_id (Optional[str]): User ID to filter by. Defaults to None.
|
|
435
|
-
session_type (Optional[SessionType]): Type of session to read. Defaults to None.
|
|
436
436
|
deserialize (Optional[bool]): Whether to serialize the session. Defaults to True.
|
|
437
437
|
|
|
438
438
|
Returns:
|
|
@@ -496,7 +496,7 @@ class SingleStoreDb(BaseDb):
|
|
|
496
496
|
Get all sessions in the given table. Can filter by user_id and entity_id.
|
|
497
497
|
|
|
498
498
|
Args:
|
|
499
|
-
session_type (Optional[SessionType]): The type of session to filter by.
|
|
499
|
+
session_type (Optional[SessionType]): The type of session to filter by.
|
|
500
500
|
user_id (Optional[str]): The ID of the user to filter by.
|
|
501
501
|
component_id (Optional[str]): The ID of the agent / workflow to filter by.
|
|
502
502
|
session_name (Optional[str]): The name of the session to filter by.
|
agno/db/sqlite/sqlite.py
CHANGED
|
@@ -18,9 +18,10 @@ from agno.db.sqlite.utils import (
|
|
|
18
18
|
is_table_available,
|
|
19
19
|
is_valid_table,
|
|
20
20
|
)
|
|
21
|
-
from agno.db.utils import deserialize_session_json_fields,
|
|
21
|
+
from agno.db.utils import deserialize_session_json_fields, serialize_session_json_fields
|
|
22
22
|
from agno.session import AgentSession, Session, TeamSession, WorkflowSession
|
|
23
23
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
24
|
+
from agno.utils.string import generate_id
|
|
24
25
|
|
|
25
26
|
try:
|
|
26
27
|
from sqlalchemy import Column, MetaData, Table, and_, func, select, text, update
|
|
@@ -70,7 +71,7 @@ class SqliteDb(BaseDb):
|
|
|
70
71
|
"""
|
|
71
72
|
if id is None:
|
|
72
73
|
seed = db_url or db_file or str(db_engine.url) if db_engine else "sqlite:///agno.db"
|
|
73
|
-
id =
|
|
74
|
+
id = generate_id(seed)
|
|
74
75
|
|
|
75
76
|
super().__init__(
|
|
76
77
|
id=id,
|
|
@@ -332,8 +333,8 @@ class SqliteDb(BaseDb):
|
|
|
332
333
|
|
|
333
334
|
Args:
|
|
334
335
|
session_id (str): ID of the session to read.
|
|
336
|
+
session_type (SessionType): Type of session to get.
|
|
335
337
|
user_id (Optional[str]): User ID to filter by. Defaults to None.
|
|
336
|
-
session_type (Optional[SessionType]): Type of session to read. Defaults to None.
|
|
337
338
|
deserialize (Optional[bool]): Whether to serialize the session. Defaults to True.
|
|
338
339
|
|
|
339
340
|
Returns:
|
|
@@ -1666,17 +1667,17 @@ class SqliteDb(BaseDb):
|
|
|
1666
1667
|
if v1_table_type == "agent_sessions":
|
|
1667
1668
|
for session in sessions:
|
|
1668
1669
|
self.upsert_session(session)
|
|
1669
|
-
log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.
|
|
1670
|
+
log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.session_table_name}")
|
|
1670
1671
|
|
|
1671
1672
|
elif v1_table_type == "team_sessions":
|
|
1672
1673
|
for session in sessions:
|
|
1673
1674
|
self.upsert_session(session)
|
|
1674
|
-
log_info(f"Migrated {len(sessions)} Team sessions to table: {self.
|
|
1675
|
+
log_info(f"Migrated {len(sessions)} Team sessions to table: {self.session_table_name}")
|
|
1675
1676
|
|
|
1676
1677
|
elif v1_table_type == "workflow_sessions":
|
|
1677
1678
|
for session in sessions:
|
|
1678
1679
|
self.upsert_session(session)
|
|
1679
|
-
log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.
|
|
1680
|
+
log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.session_table_name}")
|
|
1680
1681
|
|
|
1681
1682
|
elif v1_table_type == "memories":
|
|
1682
1683
|
for memory in memories:
|
agno/db/utils.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
"""Logic shared across different database implementations"""
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
|
-
import uuid
|
|
5
4
|
from datetime import date, datetime
|
|
6
5
|
from uuid import UUID
|
|
7
6
|
|
|
@@ -87,16 +86,3 @@ def deserialize_session_json_fields(session: dict) -> dict:
|
|
|
87
86
|
session["runs"] = json.loads(session["runs"])
|
|
88
87
|
|
|
89
88
|
return session
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def generate_deterministic_id(seed: str) -> str:
|
|
93
|
-
"""
|
|
94
|
-
Generate a deterministic UUID5 based on a seed string.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
seed (str): The seed string to generate the UUID from.
|
|
98
|
-
|
|
99
|
-
Returns:
|
|
100
|
-
str: A deterministic UUID5 string.
|
|
101
|
-
"""
|
|
102
|
-
return str(uuid.uuid5(uuid.NAMESPACE_DNS, seed))
|
|
@@ -112,6 +112,7 @@ class DiscordClient:
|
|
|
112
112
|
# TODO Unhappy with the duplication here but it keeps MyPy from complaining
|
|
113
113
|
additional_context = dedent(f"""
|
|
114
114
|
Discord username: {message_user}
|
|
115
|
+
Discord userid: {message_user_id}
|
|
115
116
|
Discord url: {message_url}
|
|
116
117
|
""")
|
|
117
118
|
if self.agent:
|
|
@@ -78,21 +78,25 @@ class OpenAIEmbedder(Embedder):
|
|
|
78
78
|
return self.client.embeddings.create(**_request_params)
|
|
79
79
|
|
|
80
80
|
def get_embedding(self, text: str) -> List[float]:
|
|
81
|
-
response: CreateEmbeddingResponse = self.response(text=text)
|
|
82
81
|
try:
|
|
82
|
+
response: CreateEmbeddingResponse = self.response(text=text)
|
|
83
83
|
return response.data[0].embedding
|
|
84
84
|
except Exception as e:
|
|
85
85
|
logger.warning(e)
|
|
86
86
|
return []
|
|
87
87
|
|
|
88
88
|
def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
|
|
89
|
-
|
|
89
|
+
try:
|
|
90
|
+
response: CreateEmbeddingResponse = self.response(text=text)
|
|
90
91
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
92
|
+
embedding = response.data[0].embedding
|
|
93
|
+
usage = response.usage
|
|
94
|
+
if usage:
|
|
95
|
+
return embedding, usage.model_dump()
|
|
96
|
+
return embedding, None
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.warning(e)
|
|
99
|
+
return [], None
|
|
96
100
|
|
|
97
101
|
async def async_get_embedding(self, text: str) -> List[float]:
|
|
98
102
|
req: Dict[str, Any] = {
|
|
@@ -127,10 +131,14 @@ class OpenAIEmbedder(Embedder):
|
|
|
127
131
|
if self.request_params:
|
|
128
132
|
req.update(self.request_params)
|
|
129
133
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
try:
|
|
135
|
+
response = await self.aclient.embeddings.create(**req)
|
|
136
|
+
embedding = response.data[0].embedding
|
|
137
|
+
usage = response.usage
|
|
138
|
+
return embedding, usage.model_dump() if usage else None
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.warning(e)
|
|
141
|
+
return [], None
|
|
134
142
|
|
|
135
143
|
def get_embeddings_batch(self, texts: List[str], batch_size: int = 100) -> List[List[float]]:
|
|
136
144
|
"""
|
agno/knowledge/knowledge.py
CHANGED
|
@@ -14,13 +14,13 @@ from httpx import AsyncClient
|
|
|
14
14
|
|
|
15
15
|
from agno.db.base import BaseDb
|
|
16
16
|
from agno.db.schemas.knowledge import KnowledgeRow
|
|
17
|
-
from agno.db.utils import generate_deterministic_id
|
|
18
17
|
from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
|
|
19
18
|
from agno.knowledge.document import Document
|
|
20
19
|
from agno.knowledge.reader import Reader, ReaderFactory
|
|
21
20
|
from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
|
|
22
21
|
from agno.utils.http import async_fetch_with_retry
|
|
23
22
|
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
23
|
+
from agno.utils.string import generate_id
|
|
24
24
|
from agno.vectordb import VectorDb
|
|
25
25
|
|
|
26
26
|
ContentDict = Dict[str, Union[str, Dict[str, str]]]
|
|
@@ -74,6 +74,8 @@ class Knowledge:
|
|
|
74
74
|
async def add_contents_async(self, *args, **kwargs) -> None:
|
|
75
75
|
if args and isinstance(args[0], list):
|
|
76
76
|
arguments = args[0]
|
|
77
|
+
upsert = kwargs.get("upsert", False)
|
|
78
|
+
skip_if_exists = kwargs.get("skip_if_exists", False)
|
|
77
79
|
for argument in arguments:
|
|
78
80
|
await self.add_content_async(
|
|
79
81
|
name=argument.get("name"),
|
|
@@ -85,8 +87,8 @@ class Knowledge:
|
|
|
85
87
|
reader=argument.get("reader"),
|
|
86
88
|
include=argument.get("include"),
|
|
87
89
|
exclude=argument.get("exclude"),
|
|
88
|
-
upsert=argument.get("upsert",
|
|
89
|
-
skip_if_exists=argument.get("skip_if_exists",
|
|
90
|
+
upsert=argument.get("upsert", upsert),
|
|
91
|
+
skip_if_exists=argument.get("skip_if_exists", skip_if_exists),
|
|
90
92
|
remote_content=argument.get("remote_content", None),
|
|
91
93
|
)
|
|
92
94
|
|
|
@@ -102,7 +104,6 @@ class Knowledge:
|
|
|
102
104
|
upsert = kwargs.get("upsert", False)
|
|
103
105
|
skip_if_exists = kwargs.get("skip_if_exists", False)
|
|
104
106
|
remote_content = kwargs.get("remote_content", None)
|
|
105
|
-
|
|
106
107
|
for path in paths:
|
|
107
108
|
await self.add_content_async(
|
|
108
109
|
name=name,
|
|
@@ -253,7 +254,7 @@ class Knowledge:
|
|
|
253
254
|
auth=auth,
|
|
254
255
|
)
|
|
255
256
|
content.content_hash = self._build_content_hash(content)
|
|
256
|
-
content.id =
|
|
257
|
+
content.id = generate_id(content.content_hash)
|
|
257
258
|
|
|
258
259
|
await self._load_content(content, upsert, skip_if_exists, include, exclude)
|
|
259
260
|
|
|
@@ -304,7 +305,7 @@ class Knowledge:
|
|
|
304
305
|
text_content: Optional text content to add directly
|
|
305
306
|
metadata: Optional metadata dictionary
|
|
306
307
|
topics: Optional list of topics
|
|
307
|
-
|
|
308
|
+
remote_content: Optional cloud storage configuration
|
|
308
309
|
reader: Optional custom reader for processing the content
|
|
309
310
|
include: Optional list of file patterns to include
|
|
310
311
|
exclude: Optional list of file patterns to exclude
|
|
@@ -431,7 +432,7 @@ class Knowledge:
|
|
|
431
432
|
reader=content.reader,
|
|
432
433
|
)
|
|
433
434
|
file_content.content_hash = self._build_content_hash(file_content)
|
|
434
|
-
file_content.id =
|
|
435
|
+
file_content.id = generate_id(file_content.content_hash)
|
|
435
436
|
|
|
436
437
|
await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
|
|
437
438
|
else:
|
|
@@ -680,7 +681,7 @@ class Knowledge:
|
|
|
680
681
|
topics=[topic],
|
|
681
682
|
)
|
|
682
683
|
content.content_hash = self._build_content_hash(content)
|
|
683
|
-
content.id =
|
|
684
|
+
content.id = generate_id(content.content_hash)
|
|
684
685
|
|
|
685
686
|
self._add_to_contents_db(content)
|
|
686
687
|
if self._should_skip(content.content_hash, skip_if_exists):
|
|
@@ -777,7 +778,7 @@ class Knowledge:
|
|
|
777
778
|
|
|
778
779
|
# 3. Hash content and add it to the contents database
|
|
779
780
|
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
780
|
-
content_entry.id =
|
|
781
|
+
content_entry.id = generate_id(content_entry.content_hash)
|
|
781
782
|
self._add_to_contents_db(content_entry)
|
|
782
783
|
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
783
784
|
content_entry.status = ContentStatus.COMPLETED
|
|
@@ -859,7 +860,7 @@ class Knowledge:
|
|
|
859
860
|
|
|
860
861
|
# 3. Hash content and add it to the contents database
|
|
861
862
|
content_entry.content_hash = self._build_content_hash(content_entry)
|
|
862
|
-
content_entry.id =
|
|
863
|
+
content_entry.id = generate_id(content_entry.content_hash)
|
|
863
864
|
self._add_to_contents_db(content_entry)
|
|
864
865
|
if self._should_skip(content_entry.content_hash, skip_if_exists):
|
|
865
866
|
content_entry.status = ContentStatus.COMPLETED
|
|
@@ -210,8 +210,8 @@ class ReaderFactory:
|
|
|
210
210
|
if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
|
|
211
211
|
return cls.create_reader("youtube")
|
|
212
212
|
|
|
213
|
-
# Default to
|
|
214
|
-
return cls.create_reader("
|
|
213
|
+
# Default to website reader
|
|
214
|
+
return cls.create_reader("website")
|
|
215
215
|
|
|
216
216
|
@classmethod
|
|
217
217
|
def get_all_reader_keys(cls) -> List[str]:
|
|
@@ -228,7 +228,11 @@ class ReaderFactory:
|
|
|
228
228
|
reader_keys.append(reader_key)
|
|
229
229
|
|
|
230
230
|
# Define priority order for URL readers
|
|
231
|
-
url_reader_priority = [
|
|
231
|
+
url_reader_priority = [
|
|
232
|
+
"website",
|
|
233
|
+
"firecrawl",
|
|
234
|
+
"youtube",
|
|
235
|
+
]
|
|
232
236
|
|
|
233
237
|
# Sort with URL readers in priority order, others alphabetically
|
|
234
238
|
def sort_key(reader_key):
|
|
@@ -96,7 +96,7 @@ class WebSearchReader(Reader):
|
|
|
96
96
|
results.append(
|
|
97
97
|
{
|
|
98
98
|
"title": result.get("title", ""),
|
|
99
|
-
"url": result.get("
|
|
99
|
+
"url": result.get("href", ""),
|
|
100
100
|
"description": result.get("body", ""),
|
|
101
101
|
}
|
|
102
102
|
)
|
|
@@ -136,14 +136,20 @@ class WebSearchReader(Reader):
|
|
|
136
136
|
self._respect_rate_limits()
|
|
137
137
|
|
|
138
138
|
results = []
|
|
139
|
-
|
|
139
|
+
# Use the basic search function without unsupported parameters
|
|
140
|
+
# The googlesearch-python library's search function only accepts basic parameters
|
|
141
|
+
search_results = search(query)
|
|
140
142
|
|
|
141
|
-
|
|
143
|
+
# Convert iterator to list and limit results
|
|
144
|
+
result_list = list(search_results)[: self.max_results]
|
|
145
|
+
|
|
146
|
+
for result in result_list:
|
|
147
|
+
# The search function returns URLs as strings
|
|
142
148
|
results.append(
|
|
143
149
|
{
|
|
144
|
-
"title":
|
|
145
|
-
"url":
|
|
146
|
-
"description":
|
|
150
|
+
"title": "", # Google search doesn't provide titles directly
|
|
151
|
+
"url": result,
|
|
152
|
+
"description": "", # Google search doesn't provide descriptions directly
|
|
147
153
|
}
|
|
148
154
|
)
|
|
149
155
|
|
|
@@ -106,23 +106,35 @@ class WebsiteReader(Reader):
|
|
|
106
106
|
"""
|
|
107
107
|
Check if the tag matches any of the relevant tags or class names
|
|
108
108
|
"""
|
|
109
|
-
if tag
|
|
109
|
+
if not isinstance(tag, Tag):
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
if tag.name in ["article", "main", "section"]:
|
|
113
|
+
return True
|
|
114
|
+
|
|
115
|
+
classes = tag.get("class", [])
|
|
116
|
+
content_classes = ["content", "main-content", "post-content", "entry-content", "article-body"]
|
|
117
|
+
if any(cls in content_classes for cls in classes):
|
|
110
118
|
return True
|
|
111
|
-
|
|
119
|
+
|
|
120
|
+
# Check for common content IDs
|
|
121
|
+
tag_id = tag.get("id", "")
|
|
122
|
+
if tag_id in ["content", "main", "article"]:
|
|
112
123
|
return True
|
|
124
|
+
|
|
113
125
|
return False
|
|
114
126
|
|
|
115
|
-
#
|
|
127
|
+
# Try to find main content element
|
|
116
128
|
element = soup.find(match)
|
|
117
129
|
if element:
|
|
130
|
+
# Remove common unwanted elements from the found content
|
|
131
|
+
for unwanted in element.find_all(["script", "style", "nav", "header", "footer"]):
|
|
132
|
+
unwanted.decompose()
|
|
118
133
|
return element.get_text(strip=True, separator=" ")
|
|
119
134
|
|
|
120
|
-
#
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
):
|
|
124
|
-
return ""
|
|
125
|
-
|
|
135
|
+
# Fallback: get full page content
|
|
136
|
+
for unwanted in soup.find_all(["script", "style", "nav", "header", "footer"]):
|
|
137
|
+
unwanted.decompose()
|
|
126
138
|
return soup.get_text(strip=True, separator=" ")
|
|
127
139
|
|
|
128
140
|
def crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
|
|
@@ -164,7 +176,7 @@ class WebsiteReader(Reader):
|
|
|
164
176
|
if (
|
|
165
177
|
current_url in self._visited
|
|
166
178
|
or not urlparse(current_url).netloc.endswith(primary_domain)
|
|
167
|
-
or current_depth > self.max_depth
|
|
179
|
+
or (current_depth > self.max_depth and current_url != url)
|
|
168
180
|
or num_links >= self.max_links
|
|
169
181
|
):
|
|
170
182
|
continue
|
|
@@ -174,13 +186,14 @@ class WebsiteReader(Reader):
|
|
|
174
186
|
|
|
175
187
|
try:
|
|
176
188
|
log_debug(f"Crawling: {current_url}")
|
|
189
|
+
|
|
177
190
|
response = (
|
|
178
|
-
httpx.get(current_url, timeout=self.timeout, proxy=self.proxy)
|
|
191
|
+
httpx.get(current_url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
|
|
179
192
|
if self.proxy
|
|
180
|
-
else httpx.get(current_url, timeout=self.timeout)
|
|
193
|
+
else httpx.get(current_url, timeout=self.timeout, follow_redirects=True)
|
|
181
194
|
)
|
|
182
|
-
|
|
183
195
|
response.raise_for_status()
|
|
196
|
+
|
|
184
197
|
soup = BeautifulSoup(response.content, "html.parser")
|
|
185
198
|
|
|
186
199
|
# Extract main content
|
|
@@ -213,9 +226,13 @@ class WebsiteReader(Reader):
|
|
|
213
226
|
|
|
214
227
|
except httpx.HTTPStatusError as e:
|
|
215
228
|
# Log HTTP status errors but continue crawling other pages
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
229
|
+
# Skip redirect errors (3xx) as they should be handled by follow_redirects
|
|
230
|
+
if e.response.status_code >= 300 and e.response.status_code < 400:
|
|
231
|
+
logger.debug(f"Redirect encountered for {current_url}, skipping: {e}")
|
|
232
|
+
else:
|
|
233
|
+
logger.warning(f"HTTP status error while crawling {current_url}: {e}")
|
|
234
|
+
# For the initial URL, we should raise the error only if it's not a redirect
|
|
235
|
+
if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
|
|
219
236
|
raise
|
|
220
237
|
except httpx.RequestError as e:
|
|
221
238
|
# Log request errors but continue crawling other pages
|
agno/media.py
CHANGED
|
@@ -334,11 +334,16 @@ class Video(BaseModel):
|
|
|
334
334
|
|
|
335
335
|
|
|
336
336
|
class File(BaseModel):
|
|
337
|
+
id: Optional[str] = None
|
|
337
338
|
url: Optional[str] = None
|
|
338
339
|
filepath: Optional[Union[Path, str]] = None
|
|
339
340
|
# Raw bytes content of a file
|
|
340
341
|
content: Optional[Any] = None
|
|
341
342
|
mime_type: Optional[str] = None
|
|
343
|
+
|
|
344
|
+
file_type: Optional[str] = None
|
|
345
|
+
filename: Optional[str] = None
|
|
346
|
+
size: Optional[int] = None
|
|
342
347
|
# External file object (e.g. GeminiFile, must be a valid object as expected by the model you are using)
|
|
343
348
|
external: Optional[Any] = None
|
|
344
349
|
format: Optional[str] = None # E.g. `pdf`, `txt`, `csv`, `xml`, etc.
|
|
@@ -364,6 +369,7 @@ class File(BaseModel):
|
|
|
364
369
|
def valid_mime_types(cls) -> List[str]:
|
|
365
370
|
return [
|
|
366
371
|
"application/pdf",
|
|
372
|
+
"application/json",
|
|
367
373
|
"application/x-javascript",
|
|
368
374
|
"text/javascript",
|
|
369
375
|
"application/x-python",
|
|
@@ -377,6 +383,29 @@ class File(BaseModel):
|
|
|
377
383
|
"text/rtf",
|
|
378
384
|
]
|
|
379
385
|
|
|
386
|
+
@classmethod
|
|
387
|
+
def from_base64(
|
|
388
|
+
cls,
|
|
389
|
+
base64_content: str,
|
|
390
|
+
id: Optional[str] = None,
|
|
391
|
+
mime_type: Optional[str] = None,
|
|
392
|
+
filename: Optional[str] = None,
|
|
393
|
+
name: Optional[str] = None,
|
|
394
|
+
format: Optional[str] = None,
|
|
395
|
+
) -> "File":
|
|
396
|
+
"""Create File from base64 encoded content"""
|
|
397
|
+
import base64
|
|
398
|
+
|
|
399
|
+
content_bytes = base64.b64decode(base64_content)
|
|
400
|
+
return cls(
|
|
401
|
+
content=content_bytes,
|
|
402
|
+
id=id,
|
|
403
|
+
mime_type=mime_type,
|
|
404
|
+
filename=filename,
|
|
405
|
+
name=name,
|
|
406
|
+
format=format,
|
|
407
|
+
)
|
|
408
|
+
|
|
380
409
|
@property
|
|
381
410
|
def file_url_content(self) -> Optional[Tuple[bytes, str]]:
|
|
382
411
|
import httpx
|
|
@@ -388,3 +417,44 @@ class File(BaseModel):
|
|
|
388
417
|
return content, mime_type
|
|
389
418
|
else:
|
|
390
419
|
return None
|
|
420
|
+
|
|
421
|
+
def _normalise_content(self) -> Optional[Union[str, bytes]]:
|
|
422
|
+
if self.content is None:
|
|
423
|
+
return None
|
|
424
|
+
content_normalised: Union[str, bytes] = self.content
|
|
425
|
+
if content_normalised and isinstance(content_normalised, bytes):
|
|
426
|
+
from base64 import b64encode
|
|
427
|
+
|
|
428
|
+
try:
|
|
429
|
+
if self.mime_type and self.mime_type.startswith("text/"):
|
|
430
|
+
content_normalised = content_normalised.decode("utf-8")
|
|
431
|
+
else:
|
|
432
|
+
content_normalised = b64encode(content_normalised).decode("utf-8")
|
|
433
|
+
except UnicodeDecodeError:
|
|
434
|
+
if isinstance(self.content, bytes):
|
|
435
|
+
content_normalised = b64encode(self.content).decode("utf-8")
|
|
436
|
+
except Exception:
|
|
437
|
+
try:
|
|
438
|
+
if isinstance(self.content, bytes):
|
|
439
|
+
content_normalised = b64encode(self.content).decode("utf-8")
|
|
440
|
+
except Exception:
|
|
441
|
+
pass
|
|
442
|
+
return content_normalised
|
|
443
|
+
|
|
444
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
445
|
+
content_normalised = self._normalise_content()
|
|
446
|
+
|
|
447
|
+
response_dict = {
|
|
448
|
+
"id": self.id,
|
|
449
|
+
"url": self.url,
|
|
450
|
+
"filepath": str(self.filepath) if self.filepath else None,
|
|
451
|
+
"content": content_normalised,
|
|
452
|
+
"mime_type": self.mime_type,
|
|
453
|
+
"file_type": self.file_type,
|
|
454
|
+
"filename": self.filename,
|
|
455
|
+
"size": self.size,
|
|
456
|
+
"external": self.external,
|
|
457
|
+
"format": self.format,
|
|
458
|
+
"name": self.name,
|
|
459
|
+
}
|
|
460
|
+
return {k: v for k, v in response_dict.items() if v is not None}
|
agno/models/aimlapi/aimlapi.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from dataclasses import dataclass
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
2
|
from os import getenv
|
|
3
3
|
from typing import Any, Dict, Optional
|
|
4
4
|
|
|
@@ -24,7 +24,7 @@ class AIMLAPI(OpenAILike):
|
|
|
24
24
|
name: str = "AIMLAPI"
|
|
25
25
|
provider: str = "AIMLAPI"
|
|
26
26
|
|
|
27
|
-
api_key: Optional[str] = getenv("AIMLAPI_API_KEY")
|
|
27
|
+
api_key: Optional[str] = field(default_factory=lambda: getenv("AIMLAPI_API_KEY"))
|
|
28
28
|
base_url: str = "https://api.aimlapi.com/v1"
|
|
29
29
|
max_tokens: int = 4096
|
|
30
30
|
|