agno 2.0.4__py3-none-any.whl → 2.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. agno/agent/agent.py +127 -102
  2. agno/db/dynamo/dynamo.py +9 -7
  3. agno/db/firestore/firestore.py +7 -4
  4. agno/db/gcs_json/gcs_json_db.py +6 -4
  5. agno/db/json/json_db.py +10 -6
  6. agno/db/migrations/v1_to_v2.py +191 -23
  7. agno/db/mongo/mongo.py +67 -6
  8. agno/db/mysql/mysql.py +7 -6
  9. agno/db/mysql/schemas.py +27 -27
  10. agno/db/postgres/postgres.py +7 -6
  11. agno/db/redis/redis.py +3 -3
  12. agno/db/singlestore/singlestore.py +4 -4
  13. agno/db/sqlite/sqlite.py +7 -6
  14. agno/db/utils.py +0 -14
  15. agno/integrations/discord/client.py +1 -0
  16. agno/knowledge/embedder/openai.py +19 -11
  17. agno/knowledge/knowledge.py +11 -10
  18. agno/knowledge/reader/reader_factory.py +7 -3
  19. agno/knowledge/reader/web_search_reader.py +12 -6
  20. agno/knowledge/reader/website_reader.py +33 -16
  21. agno/media.py +70 -0
  22. agno/models/aimlapi/aimlapi.py +2 -2
  23. agno/models/base.py +31 -4
  24. agno/models/cerebras/cerebras_openai.py +2 -2
  25. agno/models/deepinfra/deepinfra.py +2 -2
  26. agno/models/deepseek/deepseek.py +2 -2
  27. agno/models/fireworks/fireworks.py +2 -2
  28. agno/models/internlm/internlm.py +2 -2
  29. agno/models/langdb/langdb.py +4 -4
  30. agno/models/litellm/litellm_openai.py +2 -2
  31. agno/models/message.py +135 -0
  32. agno/models/meta/llama_openai.py +2 -2
  33. agno/models/nebius/nebius.py +2 -2
  34. agno/models/nexus/__init__.py +3 -0
  35. agno/models/nexus/nexus.py +25 -0
  36. agno/models/nvidia/nvidia.py +2 -2
  37. agno/models/openai/responses.py +6 -0
  38. agno/models/openrouter/openrouter.py +2 -2
  39. agno/models/perplexity/perplexity.py +2 -2
  40. agno/models/portkey/portkey.py +3 -3
  41. agno/models/response.py +2 -1
  42. agno/models/sambanova/sambanova.py +2 -2
  43. agno/models/together/together.py +2 -2
  44. agno/models/vercel/v0.py +2 -2
  45. agno/models/xai/xai.py +2 -2
  46. agno/os/app.py +162 -42
  47. agno/os/interfaces/agui/utils.py +98 -134
  48. agno/os/router.py +3 -1
  49. agno/os/routers/health.py +0 -1
  50. agno/os/routers/home.py +52 -0
  51. agno/os/routers/knowledge/knowledge.py +2 -2
  52. agno/os/schema.py +21 -0
  53. agno/os/utils.py +1 -9
  54. agno/run/agent.py +19 -3
  55. agno/run/team.py +18 -3
  56. agno/run/workflow.py +10 -0
  57. agno/team/team.py +70 -45
  58. agno/tools/duckduckgo.py +15 -11
  59. agno/tools/e2b.py +14 -7
  60. agno/tools/file_generation.py +350 -0
  61. agno/tools/function.py +2 -0
  62. agno/tools/googlesearch.py +1 -1
  63. agno/utils/gemini.py +24 -4
  64. agno/utils/string.py +32 -0
  65. agno/utils/tools.py +1 -1
  66. agno/vectordb/chroma/chromadb.py +66 -25
  67. agno/vectordb/lancedb/lance_db.py +15 -4
  68. agno/vectordb/milvus/milvus.py +6 -0
  69. agno/workflow/step.py +4 -3
  70. agno/workflow/workflow.py +4 -0
  71. {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/METADATA +9 -5
  72. {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/RECORD +75 -72
  73. agno/knowledge/reader/url_reader.py +0 -128
  74. {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/WHEEL +0 -0
  75. {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/licenses/LICENSE +0 -0
  76. {agno-2.0.4.dist-info → agno-2.0.6.dist-info}/top_level.txt +0 -0
agno/db/redis/redis.py CHANGED
@@ -21,9 +21,9 @@ from agno.db.redis.utils import (
21
21
  from agno.db.schemas.evals import EvalFilterType, EvalRunRecord, EvalType
22
22
  from agno.db.schemas.knowledge import KnowledgeRow
23
23
  from agno.db.schemas.memory import UserMemory
24
- from agno.db.utils import generate_deterministic_id
25
24
  from agno.session import AgentSession, Session, TeamSession, WorkflowSession
26
25
  from agno.utils.log import log_debug, log_error, log_info
26
+ from agno.utils.string import generate_id
27
27
 
28
28
  try:
29
29
  from redis import Redis
@@ -71,7 +71,7 @@ class RedisDb(BaseDb):
71
71
  if id is None:
72
72
  base_seed = db_url or str(redis_client)
73
73
  seed = f"{base_seed}#{db_prefix}"
74
- id = generate_deterministic_id(seed)
74
+ id = generate_id(seed)
75
75
 
76
76
  super().__init__(
77
77
  id=id,
@@ -300,8 +300,8 @@ class RedisDb(BaseDb):
300
300
 
301
301
  Args:
302
302
  session_id (str): The ID of the session to get.
303
+ session_type (SessionType): The type of session to get.
303
304
  user_id (Optional[str]): The ID of the user to filter by.
304
- session_type (Optional[SessionType]): The type of session to filter by.
305
305
 
306
306
  Returns:
307
307
  Optional[Union[AgentSession, TeamSession, WorkflowSession]]: The session if found, None otherwise.
@@ -19,9 +19,9 @@ from agno.db.singlestore.utils import (
19
19
  is_table_available,
20
20
  is_valid_table,
21
21
  )
22
- from agno.db.utils import generate_deterministic_id
23
22
  from agno.session import AgentSession, Session, TeamSession, WorkflowSession
24
23
  from agno.utils.log import log_debug, log_error, log_info, log_warning
24
+ from agno.utils.string import generate_id
25
25
 
26
26
  try:
27
27
  from sqlalchemy import Index, UniqueConstraint, and_, func, update
@@ -74,7 +74,7 @@ class SingleStoreDb(BaseDb):
74
74
  base_seed = db_url or str(db_engine.url) if db_engine else "singlestore" # type: ignore
75
75
  schema_suffix = db_schema if db_schema is not None else "ai"
76
76
  seed = f"{base_seed}#{schema_suffix}"
77
- id = generate_deterministic_id(seed)
77
+ id = generate_id(seed)
78
78
 
79
79
  super().__init__(
80
80
  id=id,
@@ -431,8 +431,8 @@ class SingleStoreDb(BaseDb):
431
431
 
432
432
  Args:
433
433
  session_id (str): ID of the session to read.
434
+ session_type (SessionType): Type of session to get.
434
435
  user_id (Optional[str]): User ID to filter by. Defaults to None.
435
- session_type (Optional[SessionType]): Type of session to read. Defaults to None.
436
436
  deserialize (Optional[bool]): Whether to serialize the session. Defaults to True.
437
437
 
438
438
  Returns:
@@ -496,7 +496,7 @@ class SingleStoreDb(BaseDb):
496
496
  Get all sessions in the given table. Can filter by user_id and entity_id.
497
497
 
498
498
  Args:
499
- session_type (Optional[SessionType]): The type of session to filter by. Defaults to None.
499
+ session_type (Optional[SessionType]): The type of session to filter by.
500
500
  user_id (Optional[str]): The ID of the user to filter by.
501
501
  component_id (Optional[str]): The ID of the agent / workflow to filter by.
502
502
  session_name (Optional[str]): The name of the session to filter by.
agno/db/sqlite/sqlite.py CHANGED
@@ -18,9 +18,10 @@ from agno.db.sqlite.utils import (
18
18
  is_table_available,
19
19
  is_valid_table,
20
20
  )
21
- from agno.db.utils import deserialize_session_json_fields, generate_deterministic_id, serialize_session_json_fields
21
+ from agno.db.utils import deserialize_session_json_fields, serialize_session_json_fields
22
22
  from agno.session import AgentSession, Session, TeamSession, WorkflowSession
23
23
  from agno.utils.log import log_debug, log_error, log_info, log_warning
24
+ from agno.utils.string import generate_id
24
25
 
25
26
  try:
26
27
  from sqlalchemy import Column, MetaData, Table, and_, func, select, text, update
@@ -70,7 +71,7 @@ class SqliteDb(BaseDb):
70
71
  """
71
72
  if id is None:
72
73
  seed = db_url or db_file or str(db_engine.url) if db_engine else "sqlite:///agno.db"
73
- id = generate_deterministic_id(seed)
74
+ id = generate_id(seed)
74
75
 
75
76
  super().__init__(
76
77
  id=id,
@@ -332,8 +333,8 @@ class SqliteDb(BaseDb):
332
333
 
333
334
  Args:
334
335
  session_id (str): ID of the session to read.
336
+ session_type (SessionType): Type of session to get.
335
337
  user_id (Optional[str]): User ID to filter by. Defaults to None.
336
- session_type (Optional[SessionType]): Type of session to read. Defaults to None.
337
338
  deserialize (Optional[bool]): Whether to serialize the session. Defaults to True.
338
339
 
339
340
  Returns:
@@ -1666,17 +1667,17 @@ class SqliteDb(BaseDb):
1666
1667
  if v1_table_type == "agent_sessions":
1667
1668
  for session in sessions:
1668
1669
  self.upsert_session(session)
1669
- log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.session_table}")
1670
+ log_info(f"Migrated {len(sessions)} Agent sessions to table: {self.session_table_name}")
1670
1671
 
1671
1672
  elif v1_table_type == "team_sessions":
1672
1673
  for session in sessions:
1673
1674
  self.upsert_session(session)
1674
- log_info(f"Migrated {len(sessions)} Team sessions to table: {self.session_table}")
1675
+ log_info(f"Migrated {len(sessions)} Team sessions to table: {self.session_table_name}")
1675
1676
 
1676
1677
  elif v1_table_type == "workflow_sessions":
1677
1678
  for session in sessions:
1678
1679
  self.upsert_session(session)
1679
- log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.session_table}")
1680
+ log_info(f"Migrated {len(sessions)} Workflow sessions to table: {self.session_table_name}")
1680
1681
 
1681
1682
  elif v1_table_type == "memories":
1682
1683
  for memory in memories:
agno/db/utils.py CHANGED
@@ -1,7 +1,6 @@
1
1
  """Logic shared across different database implementations"""
2
2
 
3
3
  import json
4
- import uuid
5
4
  from datetime import date, datetime
6
5
  from uuid import UUID
7
6
 
@@ -87,16 +86,3 @@ def deserialize_session_json_fields(session: dict) -> dict:
87
86
  session["runs"] = json.loads(session["runs"])
88
87
 
89
88
  return session
90
-
91
-
92
- def generate_deterministic_id(seed: str) -> str:
93
- """
94
- Generate a deterministic UUID5 based on a seed string.
95
-
96
- Args:
97
- seed (str): The seed string to generate the UUID from.
98
-
99
- Returns:
100
- str: A deterministic UUID5 string.
101
- """
102
- return str(uuid.uuid5(uuid.NAMESPACE_DNS, seed))
@@ -112,6 +112,7 @@ class DiscordClient:
112
112
  # TODO Unhappy with the duplication here but it keeps MyPy from complaining
113
113
  additional_context = dedent(f"""
114
114
  Discord username: {message_user}
115
+ Discord userid: {message_user_id}
115
116
  Discord url: {message_url}
116
117
  """)
117
118
  if self.agent:
@@ -78,21 +78,25 @@ class OpenAIEmbedder(Embedder):
78
78
  return self.client.embeddings.create(**_request_params)
79
79
 
80
80
  def get_embedding(self, text: str) -> List[float]:
81
- response: CreateEmbeddingResponse = self.response(text=text)
82
81
  try:
82
+ response: CreateEmbeddingResponse = self.response(text=text)
83
83
  return response.data[0].embedding
84
84
  except Exception as e:
85
85
  logger.warning(e)
86
86
  return []
87
87
 
88
88
  def get_embedding_and_usage(self, text: str) -> Tuple[List[float], Optional[Dict]]:
89
- response: CreateEmbeddingResponse = self.response(text=text)
89
+ try:
90
+ response: CreateEmbeddingResponse = self.response(text=text)
90
91
 
91
- embedding = response.data[0].embedding
92
- usage = response.usage
93
- if usage:
94
- return embedding, usage.model_dump()
95
- return embedding, None
92
+ embedding = response.data[0].embedding
93
+ usage = response.usage
94
+ if usage:
95
+ return embedding, usage.model_dump()
96
+ return embedding, None
97
+ except Exception as e:
98
+ logger.warning(e)
99
+ return [], None
96
100
 
97
101
  async def async_get_embedding(self, text: str) -> List[float]:
98
102
  req: Dict[str, Any] = {
@@ -127,10 +131,14 @@ class OpenAIEmbedder(Embedder):
127
131
  if self.request_params:
128
132
  req.update(self.request_params)
129
133
 
130
- response = await self.aclient.embeddings.create(**req)
131
- embedding = response.data[0].embedding
132
- usage = response.usage
133
- return embedding, usage.model_dump() if usage else None
134
+ try:
135
+ response = await self.aclient.embeddings.create(**req)
136
+ embedding = response.data[0].embedding
137
+ usage = response.usage
138
+ return embedding, usage.model_dump() if usage else None
139
+ except Exception as e:
140
+ logger.warning(e)
141
+ return [], None
134
142
 
135
143
  def get_embeddings_batch(self, texts: List[str], batch_size: int = 100) -> List[List[float]]:
136
144
  """
@@ -14,13 +14,13 @@ from httpx import AsyncClient
14
14
 
15
15
  from agno.db.base import BaseDb
16
16
  from agno.db.schemas.knowledge import KnowledgeRow
17
- from agno.db.utils import generate_deterministic_id
18
17
  from agno.knowledge.content import Content, ContentAuth, ContentStatus, FileData
19
18
  from agno.knowledge.document import Document
20
19
  from agno.knowledge.reader import Reader, ReaderFactory
21
20
  from agno.knowledge.remote_content.remote_content import GCSContent, RemoteContent, S3Content
22
21
  from agno.utils.http import async_fetch_with_retry
23
22
  from agno.utils.log import log_debug, log_error, log_info, log_warning
23
+ from agno.utils.string import generate_id
24
24
  from agno.vectordb import VectorDb
25
25
 
26
26
  ContentDict = Dict[str, Union[str, Dict[str, str]]]
@@ -74,6 +74,8 @@ class Knowledge:
74
74
  async def add_contents_async(self, *args, **kwargs) -> None:
75
75
  if args and isinstance(args[0], list):
76
76
  arguments = args[0]
77
+ upsert = kwargs.get("upsert", False)
78
+ skip_if_exists = kwargs.get("skip_if_exists", False)
77
79
  for argument in arguments:
78
80
  await self.add_content_async(
79
81
  name=argument.get("name"),
@@ -85,8 +87,8 @@ class Knowledge:
85
87
  reader=argument.get("reader"),
86
88
  include=argument.get("include"),
87
89
  exclude=argument.get("exclude"),
88
- upsert=argument.get("upsert", False),
89
- skip_if_exists=argument.get("skip_if_exists", False),
90
+ upsert=argument.get("upsert", upsert),
91
+ skip_if_exists=argument.get("skip_if_exists", skip_if_exists),
90
92
  remote_content=argument.get("remote_content", None),
91
93
  )
92
94
 
@@ -102,7 +104,6 @@ class Knowledge:
102
104
  upsert = kwargs.get("upsert", False)
103
105
  skip_if_exists = kwargs.get("skip_if_exists", False)
104
106
  remote_content = kwargs.get("remote_content", None)
105
-
106
107
  for path in paths:
107
108
  await self.add_content_async(
108
109
  name=name,
@@ -253,7 +254,7 @@ class Knowledge:
253
254
  auth=auth,
254
255
  )
255
256
  content.content_hash = self._build_content_hash(content)
256
- content.id = generate_deterministic_id(content.content_hash)
257
+ content.id = generate_id(content.content_hash)
257
258
 
258
259
  await self._load_content(content, upsert, skip_if_exists, include, exclude)
259
260
 
@@ -304,7 +305,7 @@ class Knowledge:
304
305
  text_content: Optional text content to add directly
305
306
  metadata: Optional metadata dictionary
306
307
  topics: Optional list of topics
307
- config: Optional cloud storage configuration
308
+ remote_content: Optional cloud storage configuration
308
309
  reader: Optional custom reader for processing the content
309
310
  include: Optional list of file patterns to include
310
311
  exclude: Optional list of file patterns to exclude
@@ -431,7 +432,7 @@ class Knowledge:
431
432
  reader=content.reader,
432
433
  )
433
434
  file_content.content_hash = self._build_content_hash(file_content)
434
- file_content.id = generate_deterministic_id(file_content.content_hash)
435
+ file_content.id = generate_id(file_content.content_hash)
435
436
 
436
437
  await self._load_from_path(file_content, upsert, skip_if_exists, include, exclude)
437
438
  else:
@@ -680,7 +681,7 @@ class Knowledge:
680
681
  topics=[topic],
681
682
  )
682
683
  content.content_hash = self._build_content_hash(content)
683
- content.id = generate_deterministic_id(content.content_hash)
684
+ content.id = generate_id(content.content_hash)
684
685
 
685
686
  self._add_to_contents_db(content)
686
687
  if self._should_skip(content.content_hash, skip_if_exists):
@@ -777,7 +778,7 @@ class Knowledge:
777
778
 
778
779
  # 3. Hash content and add it to the contents database
779
780
  content_entry.content_hash = self._build_content_hash(content_entry)
780
- content_entry.id = generate_deterministic_id(content_entry.content_hash)
781
+ content_entry.id = generate_id(content_entry.content_hash)
781
782
  self._add_to_contents_db(content_entry)
782
783
  if self._should_skip(content_entry.content_hash, skip_if_exists):
783
784
  content_entry.status = ContentStatus.COMPLETED
@@ -859,7 +860,7 @@ class Knowledge:
859
860
 
860
861
  # 3. Hash content and add it to the contents database
861
862
  content_entry.content_hash = self._build_content_hash(content_entry)
862
- content_entry.id = generate_deterministic_id(content_entry.content_hash)
863
+ content_entry.id = generate_id(content_entry.content_hash)
863
864
  self._add_to_contents_db(content_entry)
864
865
  if self._should_skip(content_entry.content_hash, skip_if_exists):
865
866
  content_entry.status = ContentStatus.COMPLETED
@@ -210,8 +210,8 @@ class ReaderFactory:
210
210
  if any(domain in url_lower for domain in ["youtube.com", "youtu.be"]):
211
211
  return cls.create_reader("youtube")
212
212
 
213
- # Default to URL reader
214
- return cls.create_reader("url")
213
+ # Default to website reader
214
+ return cls.create_reader("website")
215
215
 
216
216
  @classmethod
217
217
  def get_all_reader_keys(cls) -> List[str]:
@@ -228,7 +228,11 @@ class ReaderFactory:
228
228
  reader_keys.append(reader_key)
229
229
 
230
230
  # Define priority order for URL readers
231
- url_reader_priority = ["url", "website", "firecrawl", "pdf_url", "csv_url", "youtube", "web_search"]
231
+ url_reader_priority = [
232
+ "website",
233
+ "firecrawl",
234
+ "youtube",
235
+ ]
232
236
 
233
237
  # Sort with URL readers in priority order, others alphabetically
234
238
  def sort_key(reader_key):
@@ -96,7 +96,7 @@ class WebSearchReader(Reader):
96
96
  results.append(
97
97
  {
98
98
  "title": result.get("title", ""),
99
- "url": result.get("link", ""),
99
+ "url": result.get("href", ""),
100
100
  "description": result.get("body", ""),
101
101
  }
102
102
  )
@@ -136,14 +136,20 @@ class WebSearchReader(Reader):
136
136
  self._respect_rate_limits()
137
137
 
138
138
  results = []
139
- search_results = search(query, num_results=self.max_results, stop=self.max_results)
139
+ # Use the basic search function without unsupported parameters
140
+ # The googlesearch-python library's search function only accepts basic parameters
141
+ search_results = search(query)
140
142
 
141
- for result in search_results:
143
+ # Convert iterator to list and limit results
144
+ result_list = list(search_results)[: self.max_results]
145
+
146
+ for result in result_list:
147
+ # The search function returns URLs as strings
142
148
  results.append(
143
149
  {
144
- "title": getattr(result, "title", ""),
145
- "url": getattr(result, "url", ""),
146
- "description": getattr(result, "description", ""),
150
+ "title": "", # Google search doesn't provide titles directly
151
+ "url": result,
152
+ "description": "", # Google search doesn't provide descriptions directly
147
153
  }
148
154
  )
149
155
 
@@ -106,23 +106,35 @@ class WebsiteReader(Reader):
106
106
  """
107
107
  Check if the tag matches any of the relevant tags or class names
108
108
  """
109
- if tag.name in ["article", "main"]:
109
+ if not isinstance(tag, Tag):
110
+ return False
111
+
112
+ if tag.name in ["article", "main", "section"]:
113
+ return True
114
+
115
+ classes = tag.get("class", [])
116
+ content_classes = ["content", "main-content", "post-content", "entry-content", "article-body"]
117
+ if any(cls in content_classes for cls in classes):
110
118
  return True
111
- if any(cls in ["content", "main-content", "post-content"] for cls in tag.get("class", [])): # type: ignore
119
+
120
+ # Check for common content IDs
121
+ tag_id = tag.get("id", "")
122
+ if tag_id in ["content", "main", "article"]:
112
123
  return True
124
+
113
125
  return False
114
126
 
115
- # Use a single call to 'find' with a custom function to match tags or classes
127
+ # Try to find main content element
116
128
  element = soup.find(match)
117
129
  if element:
130
+ # Remove common unwanted elements from the found content
131
+ for unwanted in element.find_all(["script", "style", "nav", "header", "footer"]):
132
+ unwanted.decompose()
118
133
  return element.get_text(strip=True, separator=" ")
119
134
 
120
- # If we only have a div without specific content classes, return empty string
121
- if soup.find("div") and not any(
122
- soup.find(class_=class_name) for class_name in ["content", "main-content", "post-content"]
123
- ):
124
- return ""
125
-
135
+ # Fallback: get full page content
136
+ for unwanted in soup.find_all(["script", "style", "nav", "header", "footer"]):
137
+ unwanted.decompose()
126
138
  return soup.get_text(strip=True, separator=" ")
127
139
 
128
140
  def crawl(self, url: str, starting_depth: int = 1) -> Dict[str, str]:
@@ -164,7 +176,7 @@ class WebsiteReader(Reader):
164
176
  if (
165
177
  current_url in self._visited
166
178
  or not urlparse(current_url).netloc.endswith(primary_domain)
167
- or current_depth > self.max_depth
179
+ or (current_depth > self.max_depth and current_url != url)
168
180
  or num_links >= self.max_links
169
181
  ):
170
182
  continue
@@ -174,13 +186,14 @@ class WebsiteReader(Reader):
174
186
 
175
187
  try:
176
188
  log_debug(f"Crawling: {current_url}")
189
+
177
190
  response = (
178
- httpx.get(current_url, timeout=self.timeout, proxy=self.proxy)
191
+ httpx.get(current_url, timeout=self.timeout, proxy=self.proxy, follow_redirects=True)
179
192
  if self.proxy
180
- else httpx.get(current_url, timeout=self.timeout)
193
+ else httpx.get(current_url, timeout=self.timeout, follow_redirects=True)
181
194
  )
182
-
183
195
  response.raise_for_status()
196
+
184
197
  soup = BeautifulSoup(response.content, "html.parser")
185
198
 
186
199
  # Extract main content
@@ -213,9 +226,13 @@ class WebsiteReader(Reader):
213
226
 
214
227
  except httpx.HTTPStatusError as e:
215
228
  # Log HTTP status errors but continue crawling other pages
216
- logger.warning(f"HTTP status error while crawling {current_url}: {e}")
217
- # For the initial URL, we should raise the error
218
- if current_url == url and not crawler_result:
229
+ # Skip redirect errors (3xx) as they should be handled by follow_redirects
230
+ if e.response.status_code >= 300 and e.response.status_code < 400:
231
+ logger.debug(f"Redirect encountered for {current_url}, skipping: {e}")
232
+ else:
233
+ logger.warning(f"HTTP status error while crawling {current_url}: {e}")
234
+ # For the initial URL, we should raise the error only if it's not a redirect
235
+ if current_url == url and not crawler_result and not (300 <= e.response.status_code < 400):
219
236
  raise
220
237
  except httpx.RequestError as e:
221
238
  # Log request errors but continue crawling other pages
agno/media.py CHANGED
@@ -334,11 +334,16 @@ class Video(BaseModel):
334
334
 
335
335
 
336
336
  class File(BaseModel):
337
+ id: Optional[str] = None
337
338
  url: Optional[str] = None
338
339
  filepath: Optional[Union[Path, str]] = None
339
340
  # Raw bytes content of a file
340
341
  content: Optional[Any] = None
341
342
  mime_type: Optional[str] = None
343
+
344
+ file_type: Optional[str] = None
345
+ filename: Optional[str] = None
346
+ size: Optional[int] = None
342
347
  # External file object (e.g. GeminiFile, must be a valid object as expected by the model you are using)
343
348
  external: Optional[Any] = None
344
349
  format: Optional[str] = None # E.g. `pdf`, `txt`, `csv`, `xml`, etc.
@@ -364,6 +369,7 @@ class File(BaseModel):
364
369
  def valid_mime_types(cls) -> List[str]:
365
370
  return [
366
371
  "application/pdf",
372
+ "application/json",
367
373
  "application/x-javascript",
368
374
  "text/javascript",
369
375
  "application/x-python",
@@ -377,6 +383,29 @@ class File(BaseModel):
377
383
  "text/rtf",
378
384
  ]
379
385
 
386
+ @classmethod
387
+ def from_base64(
388
+ cls,
389
+ base64_content: str,
390
+ id: Optional[str] = None,
391
+ mime_type: Optional[str] = None,
392
+ filename: Optional[str] = None,
393
+ name: Optional[str] = None,
394
+ format: Optional[str] = None,
395
+ ) -> "File":
396
+ """Create File from base64 encoded content"""
397
+ import base64
398
+
399
+ content_bytes = base64.b64decode(base64_content)
400
+ return cls(
401
+ content=content_bytes,
402
+ id=id,
403
+ mime_type=mime_type,
404
+ filename=filename,
405
+ name=name,
406
+ format=format,
407
+ )
408
+
380
409
  @property
381
410
  def file_url_content(self) -> Optional[Tuple[bytes, str]]:
382
411
  import httpx
@@ -388,3 +417,44 @@ class File(BaseModel):
388
417
  return content, mime_type
389
418
  else:
390
419
  return None
420
+
421
+ def _normalise_content(self) -> Optional[Union[str, bytes]]:
422
+ if self.content is None:
423
+ return None
424
+ content_normalised: Union[str, bytes] = self.content
425
+ if content_normalised and isinstance(content_normalised, bytes):
426
+ from base64 import b64encode
427
+
428
+ try:
429
+ if self.mime_type and self.mime_type.startswith("text/"):
430
+ content_normalised = content_normalised.decode("utf-8")
431
+ else:
432
+ content_normalised = b64encode(content_normalised).decode("utf-8")
433
+ except UnicodeDecodeError:
434
+ if isinstance(self.content, bytes):
435
+ content_normalised = b64encode(self.content).decode("utf-8")
436
+ except Exception:
437
+ try:
438
+ if isinstance(self.content, bytes):
439
+ content_normalised = b64encode(self.content).decode("utf-8")
440
+ except Exception:
441
+ pass
442
+ return content_normalised
443
+
444
+ def to_dict(self) -> Dict[str, Any]:
445
+ content_normalised = self._normalise_content()
446
+
447
+ response_dict = {
448
+ "id": self.id,
449
+ "url": self.url,
450
+ "filepath": str(self.filepath) if self.filepath else None,
451
+ "content": content_normalised,
452
+ "mime_type": self.mime_type,
453
+ "file_type": self.file_type,
454
+ "filename": self.filename,
455
+ "size": self.size,
456
+ "external": self.external,
457
+ "format": self.format,
458
+ "name": self.name,
459
+ }
460
+ return {k: v for k, v in response_dict.items() if v is not None}
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, field
2
2
  from os import getenv
3
3
  from typing import Any, Dict, Optional
4
4
 
@@ -24,7 +24,7 @@ class AIMLAPI(OpenAILike):
24
24
  name: str = "AIMLAPI"
25
25
  provider: str = "AIMLAPI"
26
26
 
27
- api_key: Optional[str] = getenv("AIMLAPI_API_KEY")
27
+ api_key: Optional[str] = field(default_factory=lambda: getenv("AIMLAPI_API_KEY"))
28
28
  base_url: str = "https://api.aimlapi.com/v1"
29
29
  max_tokens: int = 4096
30
30