agno 2.2.0__py3-none-any.whl → 2.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. agno/agent/agent.py +751 -575
  2. agno/culture/manager.py +22 -24
  3. agno/db/async_postgres/__init__.py +1 -1
  4. agno/db/dynamo/dynamo.py +0 -2
  5. agno/db/firestore/firestore.py +0 -2
  6. agno/db/gcs_json/gcs_json_db.py +0 -4
  7. agno/db/gcs_json/utils.py +0 -24
  8. agno/db/in_memory/in_memory_db.py +0 -3
  9. agno/db/json/json_db.py +4 -10
  10. agno/db/json/utils.py +0 -24
  11. agno/db/mongo/mongo.py +0 -2
  12. agno/db/mysql/mysql.py +0 -3
  13. agno/db/postgres/__init__.py +1 -1
  14. agno/db/{async_postgres → postgres}/async_postgres.py +19 -22
  15. agno/db/postgres/postgres.py +7 -10
  16. agno/db/postgres/utils.py +106 -2
  17. agno/db/redis/redis.py +0 -2
  18. agno/db/singlestore/singlestore.py +0 -3
  19. agno/db/sqlite/__init__.py +2 -1
  20. agno/db/sqlite/async_sqlite.py +2269 -0
  21. agno/db/sqlite/sqlite.py +0 -2
  22. agno/db/sqlite/utils.py +96 -0
  23. agno/db/surrealdb/surrealdb.py +0 -6
  24. agno/knowledge/knowledge.py +14 -3
  25. agno/knowledge/reader/pptx_reader.py +101 -0
  26. agno/knowledge/reader/reader_factory.py +30 -0
  27. agno/knowledge/reader/tavily_reader.py +194 -0
  28. agno/knowledge/types.py +1 -0
  29. agno/memory/manager.py +28 -25
  30. agno/models/anthropic/claude.py +63 -6
  31. agno/models/base.py +255 -36
  32. agno/models/response.py +69 -0
  33. agno/os/router.py +7 -5
  34. agno/os/routers/memory/memory.py +2 -1
  35. agno/os/routers/memory/schemas.py +5 -2
  36. agno/os/schema.py +26 -20
  37. agno/os/utils.py +9 -2
  38. agno/run/agent.py +28 -30
  39. agno/run/base.py +17 -1
  40. agno/run/team.py +28 -29
  41. agno/run/workflow.py +32 -17
  42. agno/session/agent.py +3 -0
  43. agno/session/summary.py +4 -1
  44. agno/session/team.py +1 -1
  45. agno/team/team.py +620 -374
  46. agno/tools/dalle.py +2 -4
  47. agno/tools/eleven_labs.py +23 -25
  48. agno/tools/function.py +40 -0
  49. agno/tools/mcp/__init__.py +10 -0
  50. agno/tools/mcp/mcp.py +324 -0
  51. agno/tools/mcp/multi_mcp.py +347 -0
  52. agno/tools/mcp/params.py +24 -0
  53. agno/tools/slack.py +18 -3
  54. agno/tools/tavily.py +146 -0
  55. agno/utils/agent.py +366 -1
  56. agno/utils/mcp.py +92 -2
  57. agno/utils/media.py +166 -1
  58. agno/utils/message.py +60 -0
  59. agno/utils/print_response/workflow.py +17 -1
  60. agno/utils/team.py +89 -1
  61. agno/workflow/step.py +0 -1
  62. agno/workflow/types.py +10 -15
  63. agno/workflow/workflow.py +86 -1
  64. {agno-2.2.0.dist-info → agno-2.2.2.dist-info}/METADATA +31 -25
  65. {agno-2.2.0.dist-info → agno-2.2.2.dist-info}/RECORD +68 -64
  66. agno/db/async_postgres/schemas.py +0 -139
  67. agno/db/async_postgres/utils.py +0 -347
  68. agno/tools/mcp.py +0 -679
  69. {agno-2.2.0.dist-info → agno-2.2.2.dist-info}/WHEEL +0 -0
  70. {agno-2.2.0.dist-info → agno-2.2.2.dist-info}/licenses/LICENSE +0 -0
  71. {agno-2.2.0.dist-info → agno-2.2.2.dist-info}/top_level.txt +0 -0
agno/db/sqlite/sqlite.py CHANGED
@@ -374,8 +374,6 @@ class SqliteDb(BaseDb):
374
374
  # Filtering
375
375
  if user_id is not None:
376
376
  stmt = stmt.where(table.c.user_id == user_id)
377
- if session_type is not None:
378
- stmt = stmt.where(table.c.session_type == session_type)
379
377
 
380
378
  result = sess.execute(stmt).fetchone()
381
379
  if result is None:
agno/db/sqlite/utils.py CHANGED
@@ -4,6 +4,8 @@ from datetime import date, datetime, timedelta, timezone
4
4
  from typing import Any, Dict, List, Optional
5
5
  from uuid import uuid4
6
6
 
7
+ from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession
8
+
7
9
  from agno.db.schemas.culture import CulturalKnowledge
8
10
  from agno.db.sqlite.schemas import get_table_schema_definition
9
11
  from agno.utils.log import log_debug, log_error, log_warning
@@ -50,6 +52,7 @@ def is_table_available(session: Session, table_name: str, db_schema: Optional[st
50
52
  """
51
53
  Check if a table with the given name exists.
52
54
  Note: db_schema parameter is ignored in SQLite but kept for API compatibility.
55
+
53
56
  Returns:
54
57
  bool: True if the table exists, False otherwise.
55
58
  """
@@ -65,6 +68,25 @@ def is_table_available(session: Session, table_name: str, db_schema: Optional[st
65
68
  return False
66
69
 
67
70
 
71
+ async def ais_table_available(session: AsyncSession, table_name: str, db_schema: Optional[str] = None) -> bool:
72
+ """
73
+ Check if a table with the given name exists.
74
+ Note: db_schema parameter is ignored in SQLite but kept for API compatibility.
75
+
76
+ Returns:
77
+ bool: True if the table exists, False otherwise.
78
+ """
79
+ try:
80
+ exists_query = text("SELECT 1 FROM sqlite_master WHERE type = 'table' AND name = :table")
81
+ exists = (await session.execute(exists_query, {"table": table_name})).scalar() is not None
82
+ if not exists:
83
+ log_debug(f"Table {table_name} {'exists' if exists else 'does not exist'}")
84
+ return exists
85
+ except Exception as e:
86
+ log_error(f"Error checking if table exists: {e}")
87
+ return False
88
+
89
+
68
90
  def is_valid_table(db_engine: Engine, table_name: str, table_type: str, db_schema: Optional[str] = None) -> bool:
69
91
  """
70
92
  Check if the existing table has the expected column names.
@@ -98,6 +120,47 @@ def is_valid_table(db_engine: Engine, table_name: str, table_type: str, db_schem
98
120
  return False
99
121
 
100
122
 
123
+ async def ais_valid_table(
124
+ db_engine: AsyncEngine, table_name: str, table_type: str, db_schema: Optional[str] = None
125
+ ) -> bool:
126
+ """
127
+ Check if the existing table has the expected column names.
128
+ Note: db_schema parameter is ignored in SQLite but kept for API compatibility.
129
+ Args:
130
+ db_engine (Engine): Database engine
131
+ table_name (str): Name of the table to validate
132
+ table_type (str): Type of table to get expected schema
133
+ db_schema (Optional[str]): Database schema name (ignored in SQLite)
134
+ Returns:
135
+ bool: True if table has all expected columns, False otherwise
136
+ """
137
+ try:
138
+ expected_table_schema = get_table_schema_definition(table_type)
139
+ expected_columns = {col_name for col_name in expected_table_schema.keys() if not col_name.startswith("_")}
140
+
141
+ # Get existing columns from the async engine
142
+ async with db_engine.connect() as conn:
143
+ existing_columns = await conn.run_sync(_get_table_columns, table_name)
144
+
145
+ missing_columns = expected_columns - existing_columns
146
+ if missing_columns:
147
+ log_warning(f"Missing columns {missing_columns} in table {table_name}")
148
+ return False
149
+
150
+ return True
151
+
152
+ except Exception as e:
153
+ log_error(f"Error validating table schema for {table_name}: {e}")
154
+ return False
155
+
156
+
157
+ def _get_table_columns(conn, table_name: str) -> set[str]:
158
+ """Helper function to get table columns using sync inspector."""
159
+ inspector = inspect(conn)
160
+ columns_info = inspector.get_columns(table_name)
161
+ return {col["name"] for col in columns_info}
162
+
163
+
101
164
  # -- Metrics util methods --
102
165
 
103
166
 
@@ -134,6 +197,39 @@ def bulk_upsert_metrics(session: Session, table: Table, metrics_records: list[di
134
197
  return results # type: ignore
135
198
 
136
199
 
200
+ async def abulk_upsert_metrics(session: AsyncSession, table: Table, metrics_records: list[dict]) -> list[dict]:
201
+ """Bulk upsert metrics into the database.
202
+
203
+ Args:
204
+ table (Table): The table to upsert into.
205
+ metrics_records (list[dict]): The metrics records to upsert.
206
+
207
+ Returns:
208
+ list[dict]: The upserted metrics records.
209
+ """
210
+ if not metrics_records:
211
+ return []
212
+
213
+ results = []
214
+ stmt = sqlite.insert(table)
215
+
216
+ # Columns to update in case of conflict
217
+ update_columns = {
218
+ col.name: stmt.excluded[col.name]
219
+ for col in table.columns
220
+ if col.name not in ["id", "date", "created_at", "aggregation_period"]
221
+ }
222
+
223
+ stmt = stmt.on_conflict_do_update(index_elements=["date", "aggregation_period"], set_=update_columns).returning( # type: ignore
224
+ table
225
+ )
226
+ result = await session.execute(stmt, metrics_records)
227
+ results = [dict(row._mapping) for row in result.fetchall()]
228
+ await session.commit()
229
+
230
+ return results # type: ignore
231
+
232
+
137
233
  def calculate_date_metrics(date_to_process: date, sessions_data: dict) -> dict:
138
234
  """Calculate metrics for the given single date.
139
235
 
@@ -238,12 +238,6 @@ class SurrealDb(BaseDb):
238
238
  where = WhereClause()
239
239
  if user_id is not None:
240
240
  where = where.and_("user_id", user_id)
241
- if session_type == SessionType.AGENT:
242
- where = where.and_("agent", None, "!=")
243
- elif session_type == SessionType.TEAM:
244
- where = where.and_("team", None, "!=")
245
- elif session_type == SessionType.WORKFLOW:
246
- where = where.and_("workflow", None, "!=")
247
241
  where_clause, where_vars = where.build()
248
242
  query = dedent(f"""
249
243
  SELECT *
@@ -501,7 +501,7 @@ class Knowledge:
501
501
  await self._add_to_contents_db(content)
502
502
  if self._should_skip(content.content_hash, skip_if_exists): # type: ignore[arg-type]
503
503
  content.status = ContentStatus.COMPLETED
504
- self._update_content(content)
504
+ await self._aupdate_content(content)
505
505
  return
506
506
 
507
507
  if self.vector_db.__class__.__name__ == "LightRag":
@@ -547,6 +547,8 @@ class Knowledge:
547
547
  reader = self.pdf_reader
548
548
  elif file_extension == ".docx":
549
549
  reader = self.docx_reader
550
+ elif file_extension == ".pptx":
551
+ reader = self.pptx_reader
550
552
  elif file_extension == ".json":
551
553
  reader = self.json_reader
552
554
  elif file_extension == ".markdown":
@@ -723,7 +725,7 @@ class Knowledge:
723
725
  await self._add_to_contents_db(content)
724
726
  if self._should_skip(content.content_hash, skip_if_exists):
725
727
  content.status = ContentStatus.COMPLETED
726
- self._update_content(content)
728
+ await self._aupdate_content(content)
727
729
  return
728
730
 
729
731
  if self.vector_db.__class__.__name__ == "LightRag":
@@ -739,7 +741,7 @@ class Knowledge:
739
741
  log_error(f"No reader available for topic: {topic}")
740
742
  content.status = ContentStatus.FAILED
741
743
  content.status_message = "No reader available for topic"
742
- self._update_content(content)
744
+ await self._aupdate_content(content)
743
745
  continue
744
746
 
745
747
  read_documents = content.reader.read(topic)
@@ -835,6 +837,8 @@ class Knowledge:
835
837
  reader = self.csv_reader
836
838
  elif s3_object.uri.endswith(".docx"):
837
839
  reader = self.docx_reader
840
+ elif s3_object.uri.endswith(".pptx"):
841
+ reader = self.pptx_reader
838
842
  elif s3_object.uri.endswith(".json"):
839
843
  reader = self.json_reader
840
844
  elif s3_object.uri.endswith(".markdown"):
@@ -917,6 +921,8 @@ class Knowledge:
917
921
  reader = self.csv_reader
918
922
  elif gcs_object.name.endswith(".docx"):
919
923
  reader = self.docx_reader
924
+ elif gcs_object.name.endswith(".pptx"):
925
+ reader = self.pptx_reader
920
926
  elif gcs_object.name.endswith(".json"):
921
927
  reader = self.json_reader
922
928
  elif gcs_object.name.endswith(".markdown"):
@@ -1893,6 +1899,11 @@ class Knowledge:
1893
1899
  """Docx reader - lazy loaded via factory."""
1894
1900
  return self._get_reader("docx")
1895
1901
 
1902
+ @property
1903
+ def pptx_reader(self) -> Optional[Reader]:
1904
+ """PPTX reader - lazy loaded via factory."""
1905
+ return self._get_reader("pptx")
1906
+
1896
1907
  @property
1897
1908
  def json_reader(self) -> Optional[Reader]:
1898
1909
  """JSON reader - lazy loaded via factory."""
@@ -0,0 +1,101 @@
1
+ import asyncio
2
+ from pathlib import Path
3
+ from typing import IO, Any, List, Optional, Union
4
+ from uuid import uuid4
5
+
6
+ from agno.knowledge.chunking.document import DocumentChunking
7
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
8
+ from agno.knowledge.document.base import Document
9
+ from agno.knowledge.reader.base import Reader
10
+ from agno.knowledge.types import ContentType
11
+ from agno.utils.log import log_info, logger
12
+
13
+ try:
14
+ from pptx import Presentation # type: ignore
15
+ except ImportError:
16
+ raise ImportError("The `python-pptx` package is not installed. Please install it via `pip install python-pptx`.")
17
+
18
+
19
+ class PPTXReader(Reader):
20
+ """Reader for PPTX files"""
21
+
22
+ def __init__(self, chunking_strategy: Optional[ChunkingStrategy] = DocumentChunking(), **kwargs):
23
+ super().__init__(chunking_strategy=chunking_strategy, **kwargs)
24
+
25
+ @classmethod
26
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
27
+ """Get the list of supported chunking strategies for PPTX readers."""
28
+ return [
29
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
30
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
31
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
32
+ ChunkingStrategyType.AGENTIC_CHUNKER,
33
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
34
+ ]
35
+
36
+ @classmethod
37
+ def get_supported_content_types(self) -> List[ContentType]:
38
+ return [ContentType.PPTX]
39
+
40
+ def read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
41
+ """Read a pptx file and return a list of documents"""
42
+ try:
43
+ if isinstance(file, Path):
44
+ if not file.exists():
45
+ raise FileNotFoundError(f"Could not find file: {file}")
46
+ log_info(f"Reading: {file}")
47
+ presentation = Presentation(str(file))
48
+ doc_name = name or file.stem
49
+ else:
50
+ log_info(f"Reading uploaded file: {getattr(file, 'name', 'pptx_file')}")
51
+ presentation = Presentation(file)
52
+ doc_name = name or (
53
+ getattr(file, "name", "pptx_file").split(".")[0] if hasattr(file, "name") else "pptx_file"
54
+ )
55
+
56
+ # Extract text from all slides
57
+ slide_texts = []
58
+ for slide_number, slide in enumerate(presentation.slides, 1):
59
+ slide_text = f"Slide {slide_number}:\n"
60
+
61
+ # Extract text from shapes that contain text
62
+ text_content = []
63
+ for shape in slide.shapes:
64
+ if hasattr(shape, "text") and shape.text.strip():
65
+ text_content.append(shape.text.strip())
66
+
67
+ if text_content:
68
+ slide_text += "\n".join(text_content)
69
+ else:
70
+ slide_text += "(No text content)"
71
+
72
+ slide_texts.append(slide_text)
73
+
74
+ doc_content = "\n\n".join(slide_texts)
75
+
76
+ documents = [
77
+ Document(
78
+ name=doc_name,
79
+ id=str(uuid4()),
80
+ content=doc_content,
81
+ )
82
+ ]
83
+
84
+ if self.chunk:
85
+ chunked_documents = []
86
+ for document in documents:
87
+ chunked_documents.extend(self.chunk_document(document))
88
+ return chunked_documents
89
+ return documents
90
+
91
+ except Exception as e:
92
+ logger.error(f"Error reading file: {e}")
93
+ return []
94
+
95
+ async def async_read(self, file: Union[Path, IO[Any]], name: Optional[str] = None) -> List[Document]:
96
+ """Asynchronously read a pptx file and return a list of documents"""
97
+ try:
98
+ return await asyncio.to_thread(self.read, file, name)
99
+ except Exception as e:
100
+ logger.error(f"Error reading file asynchronously: {e}")
101
+ return []
@@ -58,6 +58,18 @@ class ReaderFactory:
58
58
  config.update(kwargs)
59
59
  return DocxReader(**config)
60
60
 
61
+ @classmethod
62
+ def _get_pptx_reader(cls, **kwargs) -> Reader:
63
+ """Get PPTX reader instance."""
64
+ from agno.knowledge.reader.pptx_reader import PPTXReader
65
+
66
+ config: Dict[str, Any] = {
67
+ "name": "PPTX Reader",
68
+ "description": "Extracts text content from Microsoft PowerPoint presentations (.pptx format)",
69
+ }
70
+ config.update(kwargs)
71
+ return PPTXReader(**config)
72
+
61
73
  @classmethod
62
74
  def _get_json_reader(cls, **kwargs) -> Reader:
63
75
  """Get JSON reader instance."""
@@ -120,6 +132,21 @@ class ReaderFactory:
120
132
  config.update(kwargs)
121
133
  return FirecrawlReader(**config)
122
134
 
135
+ @classmethod
136
+ def _get_tavily_reader(cls, **kwargs) -> Reader:
137
+ """Get Tavily reader instance."""
138
+ from agno.knowledge.reader.tavily_reader import TavilyReader
139
+
140
+ config: Dict[str, Any] = {
141
+ "api_key": kwargs.get("api_key") or os.getenv("TAVILY_API_KEY"),
142
+ "extract_format": "markdown",
143
+ "extract_depth": "basic",
144
+ "name": "Tavily Reader",
145
+ "description": "Extracts content from URLs using Tavily's Extract API with markdown or text output",
146
+ }
147
+ config.update(kwargs)
148
+ return TavilyReader(**config)
149
+
123
150
  @classmethod
124
151
  def _get_youtube_reader(cls, **kwargs) -> Reader:
125
152
  """Get YouTube reader instance."""
@@ -202,6 +229,8 @@ class ReaderFactory:
202
229
  return cls.create_reader("csv")
203
230
  elif extension in [".docx", ".doc", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
204
231
  return cls.create_reader("docx")
232
+ elif extension == ".pptx":
233
+ return cls.create_reader("pptx")
205
234
  elif extension == ".json":
206
235
  return cls.create_reader("json")
207
236
  elif extension in [".md", ".markdown"]:
@@ -242,6 +271,7 @@ class ReaderFactory:
242
271
  url_reader_priority = [
243
272
  "website",
244
273
  "firecrawl",
274
+ "tavily",
245
275
  "youtube",
246
276
  ]
247
277
 
@@ -0,0 +1,194 @@
1
+ import asyncio
2
+ from dataclasses import dataclass
3
+ from typing import Dict, List, Literal, Optional
4
+
5
+ from agno.knowledge.chunking.semantic import SemanticChunking
6
+ from agno.knowledge.chunking.strategy import ChunkingStrategy, ChunkingStrategyType
7
+ from agno.knowledge.document.base import Document
8
+ from agno.knowledge.reader.base import Reader
9
+ from agno.knowledge.types import ContentType
10
+ from agno.utils.log import log_debug, logger
11
+
12
+ try:
13
+ from tavily import TavilyClient # type: ignore[attr-defined]
14
+ except ImportError:
15
+ raise ImportError(
16
+ "The `tavily-python` package is not installed. Please install it via `pip install tavily-python`."
17
+ )
18
+
19
+
20
+ @dataclass
21
+ class TavilyReader(Reader):
22
+ api_key: Optional[str] = None
23
+ params: Optional[Dict] = None
24
+ extract_format: Literal["markdown", "text"] = "markdown"
25
+ extract_depth: Literal["basic", "advanced"] = "basic"
26
+
27
+ def __init__(
28
+ self,
29
+ api_key: Optional[str] = None,
30
+ params: Optional[Dict] = None,
31
+ extract_format: Literal["markdown", "text"] = "markdown",
32
+ extract_depth: Literal["basic", "advanced"] = "basic",
33
+ chunk: bool = True,
34
+ chunk_size: int = 5000,
35
+ chunking_strategy: Optional[ChunkingStrategy] = SemanticChunking(),
36
+ name: Optional[str] = None,
37
+ description: Optional[str] = None,
38
+ ) -> None:
39
+ """
40
+ Initialize TavilyReader for extracting content from URLs using Tavily's Extract API.
41
+
42
+ Args:
43
+ api_key: Tavily API key (or use TAVILY_API_KEY env var)
44
+ params: Additional parameters to pass to the extract API
45
+ extract_format: Output format - "markdown" or "text"
46
+ extract_depth: Extraction depth - "basic" (1 credit/5 URLs) or "advanced" (2 credits/5 URLs)
47
+ chunk: Whether to chunk the extracted content
48
+ chunk_size: Size of chunks when chunking is enabled
49
+ chunking_strategy: Strategy to use for chunking
50
+ name: Name of the reader
51
+ description: Description of the reader
52
+ """
53
+ # Initialize base Reader (handles chunk_size / strategy)
54
+ super().__init__(
55
+ chunk=chunk, chunk_size=chunk_size, chunking_strategy=chunking_strategy, name=name, description=description
56
+ )
57
+
58
+ # Tavily-specific attributes
59
+ self.api_key = api_key
60
+ self.params = params or {}
61
+ self.extract_format = extract_format
62
+ self.extract_depth = extract_depth
63
+
64
+ @classmethod
65
+ def get_supported_chunking_strategies(self) -> List[ChunkingStrategyType]:
66
+ """Get the list of supported chunking strategies for Tavily readers."""
67
+ return [
68
+ ChunkingStrategyType.SEMANTIC_CHUNKER,
69
+ ChunkingStrategyType.FIXED_SIZE_CHUNKER,
70
+ ChunkingStrategyType.AGENTIC_CHUNKER,
71
+ ChunkingStrategyType.DOCUMENT_CHUNKER,
72
+ ChunkingStrategyType.RECURSIVE_CHUNKER,
73
+ ]
74
+
75
+ @classmethod
76
+ def get_supported_content_types(self) -> List[ContentType]:
77
+ return [ContentType.URL]
78
+
79
+ def _extract(self, url: str, name: Optional[str] = None) -> List[Document]:
80
+ """
81
+ Internal method to extract content from a URL using Tavily's Extract API.
82
+
83
+ Args:
84
+ url: The URL to extract content from
85
+ name: Optional name for the document (defaults to URL)
86
+
87
+ Returns:
88
+ A list of documents containing the extracted content
89
+ """
90
+ log_debug(f"Extracting content from: {url}")
91
+
92
+ client = TavilyClient(api_key=self.api_key)
93
+
94
+ # Prepare extract parameters
95
+ extract_params = {
96
+ "urls": [url],
97
+ "depth": self.extract_depth,
98
+ }
99
+
100
+ # Add optional params if provided
101
+ if self.params:
102
+ extract_params.update(self.params)
103
+
104
+ try:
105
+ # Call Tavily Extract API
106
+ response = client.extract(**extract_params)
107
+
108
+ # Extract content from response
109
+ if not response or "results" not in response:
110
+ logger.warning(f"No results received for URL: {url}")
111
+ return [Document(name=name or url, id=url, content="")]
112
+
113
+ results = response.get("results", [])
114
+ if not results:
115
+ logger.warning(f"Empty results for URL: {url}")
116
+ return [Document(name=name or url, id=url, content="")]
117
+
118
+ # Get the first result (since we're extracting a single URL)
119
+ result = results[0]
120
+
121
+ # Check if extraction failed
122
+ if "failed_reason" in result:
123
+ logger.warning(f"Extraction failed for {url}: {result['failed_reason']}")
124
+ return [Document(name=name or url, id=url, content="")]
125
+
126
+ # Get raw content
127
+ content = result.get("raw_content", "")
128
+
129
+ if content is None:
130
+ content = ""
131
+ logger.warning(f"No content received for URL: {url}")
132
+
133
+ # Debug logging
134
+ log_debug(f"Received content type: {type(content)}")
135
+ log_debug(f"Content length: {len(content) if content else 0}")
136
+
137
+ # Create documents
138
+ documents = []
139
+ if self.chunk and content:
140
+ documents.extend(self.chunk_document(Document(name=name or url, id=url, content=content)))
141
+ else:
142
+ documents.append(Document(name=name or url, id=url, content=content))
143
+
144
+ return documents
145
+
146
+ except Exception as e:
147
+ logger.error(f"Error extracting content from {url}: {e}")
148
+ return [Document(name=name or url, id=url, content="")]
149
+
150
+ async def _async_extract(self, url: str, name: Optional[str] = None) -> List[Document]:
151
+ """
152
+ Internal async method to extract content from a URL.
153
+
154
+ Args:
155
+ url: The URL to extract content from
156
+ name: Optional name for the document
157
+
158
+ Returns:
159
+ A list of documents containing the extracted content
160
+ """
161
+ log_debug(f"Async extracting content from: {url}")
162
+
163
+ # Use asyncio.to_thread to run the synchronous extract in a thread
164
+ return await asyncio.to_thread(self._extract, url, name)
165
+
166
+ def read(self, url: str, name: Optional[str] = None) -> List[Document]:
167
+ """
168
+ Reads content from a URL using Tavily Extract API.
169
+
170
+ This is the public API method that users should call.
171
+
172
+ Args:
173
+ url: The URL to extract content from
174
+ name: Optional name for the document
175
+
176
+ Returns:
177
+ A list of documents containing the extracted content
178
+ """
179
+ return self._extract(url, name)
180
+
181
+ async def async_read(self, url: str, name: Optional[str] = None) -> List[Document]:
182
+ """
183
+ Asynchronously reads content from a URL using Tavily Extract API.
184
+
185
+ This is the public API method that users should call for async operations.
186
+
187
+ Args:
188
+ url: The URL to extract content from
189
+ name: Optional name for the document
190
+
191
+ Returns:
192
+ A list of documents containing the extracted content
193
+ """
194
+ return await self._async_extract(url, name)
agno/knowledge/types.py CHANGED
@@ -20,6 +20,7 @@ class ContentType(str, Enum):
20
20
  MARKDOWN = ".md"
21
21
  DOCX = ".docx"
22
22
  DOC = ".doc"
23
+ PPTX = ".pptx"
23
24
  JSON = ".json"
24
25
 
25
26
  # Spreadsheet file extensions