agno 2.2.1__py3-none-any.whl → 2.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. agno/agent/agent.py +735 -574
  2. agno/culture/manager.py +22 -24
  3. agno/db/async_postgres/__init__.py +1 -1
  4. agno/db/dynamo/dynamo.py +0 -2
  5. agno/db/firestore/firestore.py +0 -2
  6. agno/db/gcs_json/gcs_json_db.py +0 -4
  7. agno/db/gcs_json/utils.py +0 -24
  8. agno/db/in_memory/in_memory_db.py +0 -3
  9. agno/db/json/json_db.py +4 -10
  10. agno/db/json/utils.py +0 -24
  11. agno/db/mongo/__init__.py +15 -1
  12. agno/db/mongo/async_mongo.py +1999 -0
  13. agno/db/mongo/mongo.py +0 -2
  14. agno/db/mysql/mysql.py +0 -3
  15. agno/db/postgres/__init__.py +1 -1
  16. agno/db/{async_postgres → postgres}/async_postgres.py +19 -22
  17. agno/db/postgres/postgres.py +7 -10
  18. agno/db/postgres/utils.py +106 -2
  19. agno/db/redis/redis.py +0 -2
  20. agno/db/singlestore/singlestore.py +0 -3
  21. agno/db/sqlite/__init__.py +2 -1
  22. agno/db/sqlite/async_sqlite.py +2269 -0
  23. agno/db/sqlite/sqlite.py +0 -2
  24. agno/db/sqlite/utils.py +96 -0
  25. agno/db/surrealdb/surrealdb.py +0 -6
  26. agno/knowledge/knowledge.py +3 -3
  27. agno/knowledge/reader/reader_factory.py +16 -0
  28. agno/knowledge/reader/tavily_reader.py +194 -0
  29. agno/memory/manager.py +28 -25
  30. agno/models/anthropic/claude.py +63 -6
  31. agno/models/base.py +251 -32
  32. agno/models/response.py +69 -0
  33. agno/os/router.py +7 -5
  34. agno/os/routers/memory/memory.py +2 -1
  35. agno/os/routers/memory/schemas.py +5 -2
  36. agno/os/schema.py +25 -20
  37. agno/os/utils.py +9 -2
  38. agno/run/agent.py +23 -30
  39. agno/run/base.py +17 -1
  40. agno/run/team.py +23 -29
  41. agno/run/workflow.py +17 -12
  42. agno/session/agent.py +3 -0
  43. agno/session/summary.py +4 -1
  44. agno/session/team.py +1 -1
  45. agno/team/team.py +599 -367
  46. agno/tools/dalle.py +2 -4
  47. agno/tools/eleven_labs.py +23 -25
  48. agno/tools/function.py +40 -0
  49. agno/tools/mcp/__init__.py +10 -0
  50. agno/tools/mcp/mcp.py +324 -0
  51. agno/tools/mcp/multi_mcp.py +347 -0
  52. agno/tools/mcp/params.py +24 -0
  53. agno/tools/slack.py +18 -3
  54. agno/tools/tavily.py +146 -0
  55. agno/utils/agent.py +366 -1
  56. agno/utils/mcp.py +92 -2
  57. agno/utils/media.py +166 -1
  58. agno/utils/print_response/workflow.py +17 -1
  59. agno/utils/team.py +89 -1
  60. agno/workflow/step.py +0 -1
  61. agno/workflow/types.py +10 -15
  62. {agno-2.2.1.dist-info → agno-2.2.3.dist-info}/METADATA +28 -25
  63. {agno-2.2.1.dist-info → agno-2.2.3.dist-info}/RECORD +66 -62
  64. agno/db/async_postgres/schemas.py +0 -139
  65. agno/db/async_postgres/utils.py +0 -347
  66. agno/tools/mcp.py +0 -679
  67. {agno-2.2.1.dist-info → agno-2.2.3.dist-info}/WHEEL +0 -0
  68. {agno-2.2.1.dist-info → agno-2.2.3.dist-info}/licenses/LICENSE +0 -0
  69. {agno-2.2.1.dist-info → agno-2.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,347 @@
1
+ import weakref
2
+ from contextlib import AsyncExitStack
3
+ from dataclasses import asdict
4
+ from datetime import timedelta
5
+ from types import TracebackType
6
+ from typing import List, Literal, Optional, Union
7
+
8
+ from agno.tools import Toolkit
9
+ from agno.tools.function import Function
10
+ from agno.tools.mcp.params import SSEClientParams, StreamableHTTPClientParams
11
+ from agno.utils.log import log_debug, log_error, log_info, log_warning
12
+ from agno.utils.mcp import get_entrypoint_for_tool, prepare_command
13
+
14
+ try:
15
+ from mcp import ClientSession, StdioServerParameters
16
+ from mcp.client.sse import sse_client
17
+ from mcp.client.stdio import get_default_environment, stdio_client
18
+ from mcp.client.streamable_http import streamablehttp_client
19
+ except (ImportError, ModuleNotFoundError):
20
+ raise ImportError("`mcp` not installed. Please install using `pip install mcp`")
21
+
22
+
23
+ class MultiMCPTools(Toolkit):
24
+ """
25
+ A toolkit for integrating multiple Model Context Protocol (MCP) servers with Agno agents.
26
+ This allows agents to access tools, resources, and prompts exposed by MCP servers.
27
+
28
+ Can be used in three ways:
29
+ 1. Direct initialization with a ClientSession
30
+ 2. As an async context manager with StdioServerParameters
31
+ 3. As an async context manager with SSE or Streamable HTTP endpoints
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ commands: Optional[List[str]] = None,
37
+ urls: Optional[List[str]] = None,
38
+ urls_transports: Optional[List[Literal["sse", "streamable-http"]]] = None,
39
+ *,
40
+ env: Optional[dict[str, str]] = None,
41
+ server_params_list: Optional[
42
+ list[Union[SSEClientParams, StdioServerParameters, StreamableHTTPClientParams]]
43
+ ] = None,
44
+ timeout_seconds: int = 10,
45
+ client=None,
46
+ include_tools: Optional[list[str]] = None,
47
+ exclude_tools: Optional[list[str]] = None,
48
+ refresh_connection: bool = False,
49
+ allow_partial_failure: bool = False,
50
+ **kwargs,
51
+ ):
52
+ """
53
+ Initialize the MCP toolkit.
54
+
55
+ Args:
56
+ commands: List of commands to run to start the servers. Should be used in conjunction with env.
57
+ urls: List of URLs for SSE and/or Streamable HTTP endpoints.
58
+ urls_transports: List of transports to use for the given URLs.
59
+ server_params_list: List of StdioServerParameters or SSEClientParams or StreamableHTTPClientParams for creating new sessions.
60
+ env: The environment variables to pass to the servers. Should be used in conjunction with commands.
61
+ client: The underlying MCP client (optional, used to prevent garbage collection).
62
+ timeout_seconds: Timeout in seconds for managing timeouts for Client Session if Agent or Tool doesn't respond.
63
+ include_tools: Optional list of tool names to include (if None, includes all).
64
+ exclude_tools: Optional list of tool names to exclude (if None, excludes none).
65
+ allow_partial_failure: If True, allows toolkit to initialize even if some MCP servers fail to connect. If False, any failure will raise an exception.
66
+ refresh_connection: If True, the connection and tools will be refreshed on each run
67
+ """
68
+ super().__init__(name="MultiMCPTools", **kwargs)
69
+
70
+ if urls_transports is not None:
71
+ if "sse" in urls_transports:
72
+ log_info("SSE as a standalone transport is deprecated. Please use Streamable HTTP instead.")
73
+
74
+ if urls is not None:
75
+ if urls_transports is None:
76
+ log_warning(
77
+ "The default transport 'streamable-http' will be used. You can explicitly set the transports by providing the urls_transports parameter."
78
+ )
79
+ else:
80
+ if len(urls) != len(urls_transports):
81
+ raise ValueError("urls and urls_transports must be of the same length")
82
+
83
+ # Set these after `__init__` to bypass the `_check_tools_filters`
84
+ # beacuse tools are not available until `initialize()` is called.
85
+ self.include_tools = include_tools
86
+ self.exclude_tools = exclude_tools
87
+ self.refresh_connection = refresh_connection
88
+
89
+ if server_params_list is None and commands is None and urls is None:
90
+ raise ValueError("Either server_params_list or commands or urls must be provided")
91
+
92
+ self.server_params_list: List[Union[SSEClientParams, StdioServerParameters, StreamableHTTPClientParams]] = (
93
+ server_params_list or []
94
+ )
95
+ self.timeout_seconds = timeout_seconds
96
+ self.commands: Optional[List[str]] = commands
97
+ self.urls: Optional[List[str]] = urls
98
+ # Merge provided env with system env
99
+ if env is not None:
100
+ env = {
101
+ **get_default_environment(),
102
+ **env,
103
+ }
104
+ else:
105
+ env = get_default_environment()
106
+
107
+ if commands is not None:
108
+ for command in commands:
109
+ parts = prepare_command(command)
110
+ cmd = parts[0]
111
+ arguments = parts[1:] if len(parts) > 1 else []
112
+ self.server_params_list.append(StdioServerParameters(command=cmd, args=arguments, env=env))
113
+
114
+ if urls is not None:
115
+ if urls_transports is not None:
116
+ for url, transport in zip(urls, urls_transports):
117
+ if transport == "streamable-http":
118
+ self.server_params_list.append(StreamableHTTPClientParams(url=url))
119
+ else:
120
+ self.server_params_list.append(SSEClientParams(url=url))
121
+ else:
122
+ for url in urls:
123
+ self.server_params_list.append(StreamableHTTPClientParams(url=url))
124
+
125
+ self._async_exit_stack = AsyncExitStack()
126
+
127
+ self._client = client
128
+
129
+ self._initialized = False
130
+ self._connection_task = None
131
+ self._successful_connections = 0
132
+ self._sessions: list[ClientSession] = []
133
+
134
+ self.allow_partial_failure = allow_partial_failure
135
+
136
+ def cleanup():
137
+ """Cancel active connections"""
138
+ if self._connection_task and not self._connection_task.done():
139
+ self._connection_task.cancel()
140
+
141
+ # Setup cleanup logic before the instance is garbage collected
142
+ self._cleanup_finalizer = weakref.finalize(self, cleanup)
143
+
144
+ @property
145
+ def initialized(self) -> bool:
146
+ return self._initialized
147
+
148
+ async def is_alive(self) -> bool:
149
+ try:
150
+ for session in self._sessions:
151
+ await session.send_ping()
152
+ return True
153
+ except (RuntimeError, BaseException):
154
+ return False
155
+
156
+ async def connect(self, force: bool = False):
157
+ """Initialize a MultiMCPTools instance and connect to the MCP servers"""
158
+
159
+ if force:
160
+ # Clean up the session and context so we force a new connection
161
+ self._sessions = []
162
+ self._successful_connections = 0
163
+ self._initialized = False
164
+ self._connection_task = None
165
+
166
+ if self._initialized:
167
+ return
168
+
169
+ try:
170
+ await self._connect()
171
+ except (RuntimeError, BaseException) as e:
172
+ log_error(f"Failed to connect to {str(self)}: {e}")
173
+
174
+ @classmethod
175
+ async def create_and_connect(
176
+ cls,
177
+ commands: Optional[List[str]] = None,
178
+ urls: Optional[List[str]] = None,
179
+ urls_transports: Optional[List[Literal["sse", "streamable-http"]]] = None,
180
+ *,
181
+ env: Optional[dict[str, str]] = None,
182
+ server_params_list: Optional[
183
+ List[Union[SSEClientParams, StdioServerParameters, StreamableHTTPClientParams]]
184
+ ] = None,
185
+ timeout_seconds: int = 5,
186
+ client=None,
187
+ include_tools: Optional[list[str]] = None,
188
+ exclude_tools: Optional[list[str]] = None,
189
+ refresh_connection: bool = False,
190
+ **kwargs,
191
+ ) -> "MultiMCPTools":
192
+ """Initialize a MultiMCPTools instance and connect to the MCP servers"""
193
+ instance = cls(
194
+ commands=commands,
195
+ urls=urls,
196
+ urls_transports=urls_transports,
197
+ env=env,
198
+ server_params_list=server_params_list,
199
+ timeout_seconds=timeout_seconds,
200
+ client=client,
201
+ include_tools=include_tools,
202
+ exclude_tools=exclude_tools,
203
+ refresh_connection=refresh_connection,
204
+ **kwargs,
205
+ )
206
+
207
+ await instance._connect()
208
+ return instance
209
+
210
+ async def _connect(self) -> None:
211
+ """Connects to the MCP servers and initializes the tools"""
212
+ if self._initialized:
213
+ return
214
+
215
+ server_connection_errors = []
216
+
217
+ for server_params in self.server_params_list:
218
+ try:
219
+ # Handle stdio connections
220
+ if isinstance(server_params, StdioServerParameters):
221
+ stdio_transport = await self._async_exit_stack.enter_async_context(stdio_client(server_params))
222
+ read, write = stdio_transport
223
+ session = await self._async_exit_stack.enter_async_context(
224
+ ClientSession(read, write, read_timeout_seconds=timedelta(seconds=self.timeout_seconds))
225
+ )
226
+ await self.initialize(session)
227
+ self._successful_connections += 1
228
+
229
+ # Handle SSE connections
230
+ elif isinstance(server_params, SSEClientParams):
231
+ client_connection = await self._async_exit_stack.enter_async_context(
232
+ sse_client(**asdict(server_params))
233
+ )
234
+ read, write = client_connection
235
+ session = await self._async_exit_stack.enter_async_context(ClientSession(read, write))
236
+ await self.initialize(session)
237
+ self._successful_connections += 1
238
+
239
+ # Handle Streamable HTTP connections
240
+ elif isinstance(server_params, StreamableHTTPClientParams):
241
+ client_connection = await self._async_exit_stack.enter_async_context(
242
+ streamablehttp_client(**asdict(server_params))
243
+ )
244
+ read, write = client_connection[0:2]
245
+ session = await self._async_exit_stack.enter_async_context(ClientSession(read, write))
246
+ await self.initialize(session)
247
+ self._successful_connections += 1
248
+
249
+ except Exception as e:
250
+ if not self.allow_partial_failure:
251
+ raise ValueError(f"MCP connection failed: {e}")
252
+
253
+ log_error(f"Failed to initialize MCP server with params {server_params}: {e}")
254
+ server_connection_errors.append(str(e))
255
+ continue
256
+
257
+ if self._successful_connections > 0:
258
+ await self.build_tools()
259
+
260
+ if self._successful_connections == 0 and server_connection_errors:
261
+ raise ValueError(f"All MCP connections failed: {server_connection_errors}")
262
+
263
+ if not self._initialized and self._successful_connections > 0:
264
+ self._initialized = True
265
+
266
+ async def close(self) -> None:
267
+ """Close the MCP connections and clean up resources"""
268
+ if not self._initialized:
269
+ return
270
+
271
+ try:
272
+ await self._async_exit_stack.aclose()
273
+ self._sessions = []
274
+ self._successful_connections = 0
275
+
276
+ except (RuntimeError, BaseException) as e:
277
+ log_error(f"Failed to close MCP connections: {e}")
278
+
279
+ self._initialized = False
280
+
281
+ async def __aenter__(self) -> "MultiMCPTools":
282
+ """Enter the async context manager."""
283
+ try:
284
+ await self._connect()
285
+ except (RuntimeError, BaseException) as e:
286
+ log_error(f"Failed to connect to {str(self)}: {e}")
287
+ return self
288
+
289
+ async def __aexit__(
290
+ self,
291
+ exc_type: Union[type[BaseException], None],
292
+ exc_val: Union[BaseException, None],
293
+ exc_tb: Union[TracebackType, None],
294
+ ):
295
+ """Exit the async context manager."""
296
+ await self._async_exit_stack.aclose()
297
+ self._initialized = False
298
+ self._successful_connections = 0
299
+
300
+ async def build_tools(self) -> None:
301
+ for session in self._sessions:
302
+ # Get the list of tools from the MCP server
303
+ available_tools = await session.list_tools()
304
+
305
+ # Filter tools based on include/exclude lists
306
+ filtered_tools = []
307
+ for tool in available_tools.tools:
308
+ if self.exclude_tools and tool.name in self.exclude_tools:
309
+ continue
310
+ if self.include_tools is None or tool.name in self.include_tools:
311
+ filtered_tools.append(tool)
312
+
313
+ # Register the tools with the toolkit
314
+ for tool in filtered_tools:
315
+ try:
316
+ # Get an entrypoint for the tool
317
+ entrypoint = get_entrypoint_for_tool(tool, session)
318
+
319
+ # Create a Function for the tool
320
+ f = Function(
321
+ name=tool.name,
322
+ description=tool.description,
323
+ parameters=tool.inputSchema,
324
+ entrypoint=entrypoint,
325
+ # Set skip_entrypoint_processing to True to avoid processing the entrypoint
326
+ skip_entrypoint_processing=True,
327
+ )
328
+
329
+ # Register the Function with the toolkit
330
+ self.functions[f.name] = f
331
+ log_debug(f"Function: {f.name} registered with {self.name}")
332
+ except Exception as e:
333
+ log_error(f"Failed to register tool {tool.name}: {e}")
334
+ raise
335
+
336
+ async def initialize(self, session: ClientSession) -> None:
337
+ """Initialize the MCP toolkit by getting available tools from the MCP server"""
338
+
339
+ try:
340
+ # Initialize the session if not already initialized
341
+ await session.initialize()
342
+
343
+ self._sessions.append(session)
344
+ self._initialized = True
345
+ except Exception as e:
346
+ log_error(f"Failed to get MCP tools: {e}")
347
+ raise
@@ -0,0 +1,24 @@
1
+ from dataclasses import dataclass
2
+ from datetime import timedelta
3
+ from typing import Any, Dict, Optional
4
+
5
+
6
+ @dataclass
7
+ class SSEClientParams:
8
+ """Parameters for SSE client connection."""
9
+
10
+ url: str
11
+ headers: Optional[Dict[str, Any]] = None
12
+ timeout: Optional[float] = 5
13
+ sse_read_timeout: Optional[float] = 60 * 5
14
+
15
+
16
+ @dataclass
17
+ class StreamableHTTPClientParams:
18
+ """Parameters for Streamable HTTP client connection."""
19
+
20
+ url: str
21
+ headers: Optional[Dict[str, Any]] = None
22
+ timeout: Optional[timedelta] = timedelta(seconds=30)
23
+ sse_read_timeout: Optional[timedelta] = timedelta(seconds=60 * 5)
24
+ terminate_on_close: Optional[bool] = None
agno/tools/slack.py CHANGED
@@ -16,6 +16,7 @@ class SlackTools(Toolkit):
16
16
  def __init__(
17
17
  self,
18
18
  token: Optional[str] = None,
19
+ markdown: bool = True,
19
20
  enable_send_message: bool = True,
20
21
  enable_send_message_thread: bool = True,
21
22
  enable_list_channels: bool = True,
@@ -23,10 +24,22 @@ class SlackTools(Toolkit):
23
24
  all: bool = False,
24
25
  **kwargs,
25
26
  ):
27
+ """
28
+ Initialize the SlackTools class.
29
+ Args:
30
+ token: The Slack API token. Defaults to the SLACK_TOKEN environment variable.
31
+ markdown: Whether to enable Slack markdown formatting. Defaults to True.
32
+ enable_send_message: Whether to enable the send_message tool. Defaults to True.
33
+ enable_send_message_thread: Whether to enable the send_message_thread tool. Defaults to True.
34
+ enable_list_channels: Whether to enable the list_channels tool. Defaults to True.
35
+ enable_get_channel_history: Whether to enable the get_channel_history tool. Defaults to True.
36
+ all: Whether to enable all tools. Defaults to False.
37
+ """
26
38
  self.token: Optional[str] = token or getenv("SLACK_TOKEN")
27
39
  if self.token is None or self.token == "":
28
40
  raise ValueError("SLACK_TOKEN is not set")
29
41
  self.client = WebClient(token=self.token)
42
+ self.markdown = markdown
30
43
 
31
44
  tools: List[Any] = []
32
45
  if enable_send_message or all:
@@ -52,7 +65,7 @@ class SlackTools(Toolkit):
52
65
  str: A JSON string containing the response from the Slack API.
53
66
  """
54
67
  try:
55
- response = self.client.chat_postMessage(channel=channel, text=text)
68
+ response = self.client.chat_postMessage(channel=channel, text=text, mrkdwn=self.markdown)
56
69
  return json.dumps(response.data)
57
70
  except SlackApiError as e:
58
71
  logger.error(f"Error sending message: {e}")
@@ -65,13 +78,15 @@ class SlackTools(Toolkit):
65
78
  Args:
66
79
  channel (str): The channel ID or name to send the message to.
67
80
  text (str): The text of the message to send.
68
- thread_ts (ts): The thread to reply to
81
+ thread_ts (ts): The thread to reply to.
69
82
 
70
83
  Returns:
71
84
  str: A JSON string containing the response from the Slack API.
72
85
  """
73
86
  try:
74
- response = self.client.chat_postMessage(channel=channel, text=text, thread_ts=thread_ts)
87
+ response = self.client.chat_postMessage(
88
+ channel=channel, text=text, thread_ts=thread_ts, mrkdwn=self.markdown
89
+ )
75
90
  return json.dumps(response.data)
76
91
  except SlackApiError as e:
77
92
  logger.error(f"Error sending message: {e}")
agno/tools/tavily.py CHANGED
@@ -17,21 +17,51 @@ class TavilyTools(Toolkit):
17
17
  api_key: Optional[str] = None,
18
18
  enable_search: bool = True,
19
19
  enable_search_context: bool = False,
20
+ enable_extract: bool = False,
20
21
  all: bool = False,
21
22
  max_tokens: int = 6000,
22
23
  include_answer: bool = True,
23
24
  search_depth: Literal["basic", "advanced"] = "advanced",
25
+ extract_depth: Literal["basic", "advanced"] = "basic",
26
+ include_images: bool = False,
27
+ include_favicon: bool = False,
28
+ extract_timeout: Optional[int] = None,
29
+ extract_format: Literal["markdown", "text"] = "markdown",
24
30
  format: Literal["json", "markdown"] = "markdown",
25
31
  **kwargs,
26
32
  ):
33
+ """Initialize TavilyTools with search and extract capabilities.
34
+
35
+ Args:
36
+ api_key: Tavily API key. If not provided, will use TAVILY_API_KEY env var.
37
+ enable_search: Enable web search functionality. Defaults to True.
38
+ enable_search_context: Use search context mode instead of regular search. Defaults to False.
39
+ enable_extract: Enable URL content extraction functionality. Defaults to False.
40
+ all: Enable all available tools. Defaults to False.
41
+ max_tokens: Maximum tokens for search results. Defaults to 6000.
42
+ include_answer: Include AI-generated answer in search results. Defaults to True.
43
+ search_depth: Search depth level - basic (1 credit) or advanced (2 credits). Defaults to "advanced".
44
+ extract_depth: Extract depth level - basic (1 credit/5 URLs) or advanced (2 credits/5 URLs). Defaults to "basic".
45
+ include_images: Include images in extracted content. Defaults to False.
46
+ include_favicon: Include favicon in extracted content. Defaults to False.
47
+ extract_timeout: Timeout in seconds for extraction requests. Defaults to None.
48
+ extract_format: Output format for extracted content - markdown or text. Defaults to "markdown".
49
+ format: Output format for search results - json or markdown. Defaults to "markdown".
50
+ **kwargs: Additional arguments passed to Toolkit.
51
+ """
27
52
  self.api_key = api_key or getenv("TAVILY_API_KEY")
28
53
  if not self.api_key:
29
54
  logger.error("TAVILY_API_KEY not provided")
30
55
 
31
56
  self.client: TavilyClient = TavilyClient(api_key=self.api_key)
32
57
  self.search_depth: Literal["basic", "advanced"] = search_depth
58
+ self.extract_depth: Literal["basic", "advanced"] = extract_depth
33
59
  self.max_tokens: int = max_tokens
34
60
  self.include_answer: bool = include_answer
61
+ self.include_images: bool = include_images
62
+ self.include_favicon: bool = include_favicon
63
+ self.extract_timeout: Optional[int] = extract_timeout
64
+ self.extract_format: Literal["markdown", "text"] = extract_format
35
65
  self.format: Literal["json", "markdown"] = format
36
66
 
37
67
  tools: List[Any] = []
@@ -42,6 +72,9 @@ class TavilyTools(Toolkit):
42
72
  else:
43
73
  tools.append(self.web_search_using_tavily)
44
74
 
75
+ if enable_extract or all:
76
+ tools.append(self.extract_url_content)
77
+
45
78
  super().__init__(name="tavily_tools", tools=tools, **kwargs)
46
79
 
47
80
  def web_search_using_tavily(self, query: str, max_results: int = 5) -> str:
@@ -106,3 +139,116 @@ class TavilyTools(Toolkit):
106
139
  return self.client.get_search_context(
107
140
  query=query, search_depth=self.search_depth, max_tokens=self.max_tokens, include_answer=self.include_answer
108
141
  )
142
+
143
+ def extract_url_content(self, urls: str) -> str:
144
+ """Extract content from one or more URLs using Tavily's Extract API.
145
+ This function retrieves the main content from web pages in markdown or text format.
146
+
147
+ Args:
148
+ urls (str): Single URL or multiple comma-separated URLs to extract content from.
149
+ Example: "https://example.com" or "https://example.com,https://another.com"
150
+
151
+ Returns:
152
+ str: Extracted content in the specified format (markdown or text).
153
+ For multiple URLs, returns combined content with URL headers.
154
+ Failed extractions are noted in the output.
155
+ """
156
+ # Parse URLs - handle both single and comma-separated multiple URLs
157
+ url_list = [url.strip() for url in urls.split(",") if url.strip()]
158
+
159
+ if not url_list:
160
+ return "Error: No valid URLs provided."
161
+
162
+ try:
163
+ # Prepare extract parameters
164
+ extract_params: Dict[str, Any] = {
165
+ "urls": url_list,
166
+ "depth": self.extract_depth,
167
+ }
168
+
169
+ # Add optional parameters if specified
170
+ if self.include_images:
171
+ extract_params["include_images"] = True
172
+ if self.include_favicon:
173
+ extract_params["include_favicon"] = True
174
+ if self.extract_timeout is not None:
175
+ extract_params["timeout"] = self.extract_timeout
176
+
177
+ # Call Tavily Extract API
178
+ response = self.client.extract(**extract_params)
179
+
180
+ # Process response based on format preference
181
+ if not response or "results" not in response:
182
+ return "Error: No content could be extracted from the provided URL(s)."
183
+
184
+ results = response.get("results", [])
185
+ if not results:
186
+ return "Error: No content could be extracted from the provided URL(s)."
187
+
188
+ # Format output
189
+ if self.extract_format == "markdown":
190
+ return self._format_extract_markdown(results)
191
+ elif self.extract_format == "text":
192
+ return self._format_extract_text(results)
193
+ else:
194
+ # Fallback to JSON if format is unrecognized
195
+ return json.dumps(results, indent=2)
196
+
197
+ except Exception as e:
198
+ logger.error(f"Error extracting content from URLs: {e}")
199
+ return f"Error extracting content: {str(e)}"
200
+
201
+ def _format_extract_markdown(self, results: List[Dict[str, Any]]) -> str:
202
+ """Format extraction results as markdown.
203
+
204
+ Args:
205
+ results: List of extraction result dictionaries from Tavily API.
206
+
207
+ Returns:
208
+ str: Formatted markdown content.
209
+ """
210
+ output = []
211
+
212
+ for result in results:
213
+ url = result.get("url", "Unknown URL")
214
+ raw_content = result.get("raw_content", "")
215
+ failed_reason = result.get("failed_reason")
216
+
217
+ if failed_reason:
218
+ output.append(f"## {url}\n\n **Extraction Failed**: {failed_reason}\n\n")
219
+ elif raw_content:
220
+ output.append(f"## {url}\n\n{raw_content}\n\n")
221
+ else:
222
+ output.append(f"## {url}\n\n*No content available*\n\n")
223
+
224
+ return "".join(output) if output else "No content extracted."
225
+
226
+ def _format_extract_text(self, results: List[Dict[str, Any]]) -> str:
227
+ """Format extraction results as plain text.
228
+
229
+ Args:
230
+ results: List of extraction result dictionaries from Tavily API.
231
+
232
+ Returns:
233
+ str: Formatted plain text content.
234
+ """
235
+ output = []
236
+
237
+ for result in results:
238
+ url = result.get("url", "Unknown URL")
239
+ raw_content = result.get("raw_content", "")
240
+ failed_reason = result.get("failed_reason")
241
+
242
+ output.append(f"URL: {url}")
243
+ output.append("-" * 80)
244
+
245
+ if failed_reason:
246
+ output.append(f"EXTRACTION FAILED: {failed_reason}")
247
+ elif raw_content:
248
+ output.append(raw_content)
249
+ else:
250
+ output.append("No content available")
251
+
252
+ output.append("\n")
253
+
254
+ return "\n".join(output) if output else "No content extracted."