agno 1.7.11__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agno/agent/agent.py +13 -0
- agno/app/agui/utils.py +1 -1
- agno/app/fastapi/async_router.py +13 -10
- agno/embedder/google.py +17 -5
- agno/knowledge/gcs/pdf.py +105 -1
- agno/media.py +24 -3
- agno/models/google/gemini.py +71 -19
- agno/models/openai/chat.py +5 -1
- agno/models/openai/responses.py +26 -9
- agno/reasoning/default.py +7 -1
- agno/reasoning/helpers.py +7 -1
- agno/storage/dynamodb.py +18 -22
- agno/team/team.py +165 -65
- agno/tools/bravesearch.py +7 -7
- agno/tools/calculator.py +8 -8
- agno/tools/discord.py +11 -11
- agno/tools/duckduckgo.py +8 -16
- agno/tools/github.py +26 -14
- agno/tools/memori.py +387 -0
- agno/tools/scrapegraph.py +65 -0
- agno/tools/youtube.py +12 -11
- agno/vectordb/pgvector/pgvector.py +23 -39
- agno/workflow/v2/step.py +4 -0
- agno/workflow/v2/types.py +11 -1
- agno/workflow/v2/workflow.py +54 -1
- {agno-1.7.11.dist-info → agno-1.8.0.dist-info}/METADATA +7 -4
- {agno-1.7.11.dist-info → agno-1.8.0.dist-info}/RECORD +31 -30
- {agno-1.7.11.dist-info → agno-1.8.0.dist-info}/WHEEL +0 -0
- {agno-1.7.11.dist-info → agno-1.8.0.dist-info}/entry_points.txt +0 -0
- {agno-1.7.11.dist-info → agno-1.8.0.dist-info}/licenses/LICENSE +0 -0
- {agno-1.7.11.dist-info → agno-1.8.0.dist-info}/top_level.txt +0 -0
agno/tools/github.py
CHANGED
|
@@ -1698,20 +1698,32 @@ class GithubTools(Toolkit):
|
|
|
1698
1698
|
log_debug(f"Final search query: {search_query}")
|
|
1699
1699
|
code_results = self.g.search_code(search_query)
|
|
1700
1700
|
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
#
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1701
|
+
results: list[dict] = []
|
|
1702
|
+
limit = 60
|
|
1703
|
+
max_pages = 2 # GitHub returns 30 items per page, so 2 pages covers our limit
|
|
1704
|
+
page_index = 0
|
|
1705
|
+
|
|
1706
|
+
while len(results) < limit and page_index < max_pages:
|
|
1707
|
+
# Fetch one page of results from GitHub API
|
|
1708
|
+
page_items = code_results.get_page(page_index)
|
|
1709
|
+
|
|
1710
|
+
# Stop if no more results available
|
|
1711
|
+
if not page_items:
|
|
1712
|
+
break
|
|
1713
|
+
|
|
1714
|
+
# Process each code result in the current page
|
|
1715
|
+
for code in page_items:
|
|
1716
|
+
code_info = {
|
|
1717
|
+
"repository": code.repository.full_name,
|
|
1718
|
+
"path": code.path,
|
|
1719
|
+
"name": code.name,
|
|
1720
|
+
"sha": code.sha,
|
|
1721
|
+
"html_url": code.html_url,
|
|
1722
|
+
"git_url": code.git_url,
|
|
1723
|
+
"score": code.score,
|
|
1724
|
+
}
|
|
1725
|
+
results.append(code_info)
|
|
1726
|
+
page_index += 1
|
|
1715
1727
|
|
|
1716
1728
|
# Return search results
|
|
1717
1729
|
return json.dumps(
|
agno/tools/memori.py
ADDED
|
@@ -0,0 +1,387 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from agno.agent import Agent
|
|
5
|
+
from agno.tools.toolkit import Toolkit
|
|
6
|
+
from agno.utils.log import log_debug, log_error, log_info, log_warning
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from memori import Memori, create_memory_tool
|
|
10
|
+
except ImportError:
|
|
11
|
+
raise ImportError("`memorisdk` package not found. Please install it with `pip install memorisdk`")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MemoriTools(Toolkit):
|
|
15
|
+
"""
|
|
16
|
+
Memori ToolKit for Agno Agents and Teams, providing persistent memory capabilities.
|
|
17
|
+
|
|
18
|
+
This toolkit integrates Memori's memory system with Agno, allowing Agents and Teams to:
|
|
19
|
+
- Store and retrieve conversation history
|
|
20
|
+
- Search through past interactions
|
|
21
|
+
- Maintain user preferences and context
|
|
22
|
+
- Build long-term memory across sessions
|
|
23
|
+
|
|
24
|
+
Requirements:
|
|
25
|
+
- pip install memorisdk
|
|
26
|
+
- Database connection string (SQLite, PostgreSQL, etc.)
|
|
27
|
+
|
|
28
|
+
Example:
|
|
29
|
+
```python
|
|
30
|
+
from agno.tools.memori import MemoriTools
|
|
31
|
+
|
|
32
|
+
# Initialize with SQLite (default)
|
|
33
|
+
memori_tools = MemoriTools(
|
|
34
|
+
database_connect="sqlite:///agent_memory.db",
|
|
35
|
+
namespace="my_agent",
|
|
36
|
+
auto_ingest=True # Automatically ingest conversations
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Add to agent
|
|
40
|
+
agent = Agent(
|
|
41
|
+
model=OpenAIChat(),
|
|
42
|
+
tools=[memori_tools],
|
|
43
|
+
description="An AI assistant with persistent memory"
|
|
44
|
+
)
|
|
45
|
+
```
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
database_connect: Optional[str] = None,
|
|
51
|
+
namespace: Optional[str] = None,
|
|
52
|
+
conscious_ingest: bool = True,
|
|
53
|
+
auto_ingest: bool = True,
|
|
54
|
+
verbose: bool = False,
|
|
55
|
+
config: Optional[Dict[str, Any]] = None,
|
|
56
|
+
auto_enable: bool = True,
|
|
57
|
+
**kwargs,
|
|
58
|
+
):
|
|
59
|
+
"""
|
|
60
|
+
Initialize Memori toolkit.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
database_connect: Database connection string (e.g., "sqlite:///memory.db")
|
|
64
|
+
namespace: Namespace for organizing memories (e.g., "agent_v1", "user_session")
|
|
65
|
+
conscious_ingest: Whether to use conscious memory ingestion
|
|
66
|
+
auto_ingest: Whether to automatically ingest conversations into memory
|
|
67
|
+
verbose: Enable verbose logging from Memori
|
|
68
|
+
config: Additional Memori configuration
|
|
69
|
+
auto_enable: Automatically enable the memory system on initialization
|
|
70
|
+
**kwargs: Additional arguments passed to Toolkit base class
|
|
71
|
+
"""
|
|
72
|
+
super().__init__(
|
|
73
|
+
name="memori_tools",
|
|
74
|
+
tools=[
|
|
75
|
+
self.search_memory,
|
|
76
|
+
self.record_conversation,
|
|
77
|
+
self.get_memory_stats,
|
|
78
|
+
],
|
|
79
|
+
**kwargs,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
# Set default database connection if not provided
|
|
83
|
+
if not database_connect:
|
|
84
|
+
sqlite_db = "sqlite:///agno_memori_memory.db"
|
|
85
|
+
log_info(f"No database connection provided, using default SQLite database at {sqlite_db}")
|
|
86
|
+
database_connect = sqlite_db
|
|
87
|
+
|
|
88
|
+
self.database_connect = database_connect
|
|
89
|
+
self.namespace = namespace or "agno_default"
|
|
90
|
+
self.conscious_ingest = conscious_ingest
|
|
91
|
+
self.auto_ingest = auto_ingest
|
|
92
|
+
self.verbose = verbose
|
|
93
|
+
self.config = config or {}
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
# Initialize Memori memory system
|
|
97
|
+
log_debug(f"Initializing Memori with database: {self.database_connect}")
|
|
98
|
+
self.memory_system = Memori(
|
|
99
|
+
database_connect=self.database_connect,
|
|
100
|
+
conscious_ingest=self.conscious_ingest,
|
|
101
|
+
auto_ingest=self.auto_ingest,
|
|
102
|
+
verbose=self.verbose,
|
|
103
|
+
namespace=self.namespace,
|
|
104
|
+
**self.config,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Enable the memory system if auto_enable is True
|
|
108
|
+
if auto_enable:
|
|
109
|
+
self.memory_system.enable()
|
|
110
|
+
log_debug("Memori memory system enabled")
|
|
111
|
+
|
|
112
|
+
# Create the memory tool for internal use
|
|
113
|
+
self._memory_tool = create_memory_tool(self.memory_system)
|
|
114
|
+
|
|
115
|
+
except Exception as e:
|
|
116
|
+
log_error(f"Failed to initialize Memori: {e}")
|
|
117
|
+
raise ConnectionError("Failed to initialize Memori memory system") from e
|
|
118
|
+
|
|
119
|
+
def search_memory(
|
|
120
|
+
self,
|
|
121
|
+
agent: Agent,
|
|
122
|
+
query: str,
|
|
123
|
+
limit: Optional[int] = None,
|
|
124
|
+
) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Search the Agent's memory for past conversations and information.
|
|
127
|
+
|
|
128
|
+
This performs semantic search across all stored memories to find
|
|
129
|
+
relevant information based on the provided query.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
query: What to search for in memory (e.g., "past conversations about AI", "user preferences")
|
|
133
|
+
limit: Maximum number of results to return (optional)
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
str: JSON-encoded search results or error message
|
|
137
|
+
|
|
138
|
+
Example:
|
|
139
|
+
search_memory("user's favorite programming languages")
|
|
140
|
+
search_memory("previous discussions about machine learning")
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
if not query.strip():
|
|
144
|
+
return json.dumps({"error": "Please provide a search query"})
|
|
145
|
+
|
|
146
|
+
log_debug(f"Searching memory for: {query}")
|
|
147
|
+
|
|
148
|
+
# Execute search using Memori's memory tool
|
|
149
|
+
result = self._memory_tool.execute(query=query.strip())
|
|
150
|
+
|
|
151
|
+
if result:
|
|
152
|
+
# If limit is specified, truncate results
|
|
153
|
+
if limit and isinstance(result, list):
|
|
154
|
+
result = result[:limit]
|
|
155
|
+
|
|
156
|
+
return json.dumps(
|
|
157
|
+
{
|
|
158
|
+
"success": True,
|
|
159
|
+
"query": query,
|
|
160
|
+
"results": result,
|
|
161
|
+
"count": len(result) if isinstance(result, list) else 1,
|
|
162
|
+
}
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
return json.dumps(
|
|
166
|
+
{
|
|
167
|
+
"success": True,
|
|
168
|
+
"query": query,
|
|
169
|
+
"results": [],
|
|
170
|
+
"count": 0,
|
|
171
|
+
"message": "No relevant memories found",
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
log_error(f"Error searching memory: {e}")
|
|
177
|
+
return json.dumps({"success": False, "error": f"Memory search error: {str(e)}"})
|
|
178
|
+
|
|
179
|
+
def record_conversation(self, agent: Agent, content: str) -> str:
|
|
180
|
+
"""
|
|
181
|
+
Add important information or facts to memory.
|
|
182
|
+
|
|
183
|
+
Use this tool to store important information, user preferences, facts, or context that should be remembered
|
|
184
|
+
for future conversations.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
content: The information/facts to store in memory
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
str: Success message or error details
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
record_conversation("User prefers Python over JavaScript")
|
|
194
|
+
record_conversation("User is working on an e-commerce project using Django")
|
|
195
|
+
record_conversation("User's name is John and they live in NYC")
|
|
196
|
+
"""
|
|
197
|
+
try:
|
|
198
|
+
if not content.strip():
|
|
199
|
+
return json.dumps({"success": False, "error": "Content cannot be empty"})
|
|
200
|
+
|
|
201
|
+
log_debug(f"Adding conversation: {content}")
|
|
202
|
+
|
|
203
|
+
# Extract the actual AI response from the agent's conversation history
|
|
204
|
+
ai_output = "I've noted this information and will remember it."
|
|
205
|
+
|
|
206
|
+
self.memory_system.record_conversation(user_input=content, ai_output=str(ai_output))
|
|
207
|
+
return json.dumps(
|
|
208
|
+
{
|
|
209
|
+
"success": True,
|
|
210
|
+
"message": "Memory added successfully via conversation recording",
|
|
211
|
+
"content_length": len(content),
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
except Exception as e:
|
|
216
|
+
log_error(f"Error adding memory: {e}")
|
|
217
|
+
return json.dumps({"success": False, "error": f"Failed to add memory: {str(e)}"})
|
|
218
|
+
|
|
219
|
+
def get_memory_stats(
|
|
220
|
+
self,
|
|
221
|
+
agent: Agent,
|
|
222
|
+
) -> str:
|
|
223
|
+
"""
|
|
224
|
+
Get statistics about the memory system.
|
|
225
|
+
|
|
226
|
+
Returns information about the current state of the memory system,
|
|
227
|
+
including total memories, memory distribution by retention type
|
|
228
|
+
(short-term vs long-term), and system configuration.
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
str: JSON-encoded memory statistics
|
|
232
|
+
|
|
233
|
+
Example:
|
|
234
|
+
Returns statistics like:
|
|
235
|
+
{
|
|
236
|
+
"success": true,
|
|
237
|
+
"total_memories": 42,
|
|
238
|
+
"memories_by_retention": {
|
|
239
|
+
"short_term": 5,
|
|
240
|
+
"long_term": 37
|
|
241
|
+
},
|
|
242
|
+
"namespace": "my_agent",
|
|
243
|
+
"conscious_ingest": true,
|
|
244
|
+
"auto_ingest": true,
|
|
245
|
+
"memory_system_enabled": true
|
|
246
|
+
}
|
|
247
|
+
"""
|
|
248
|
+
try:
|
|
249
|
+
log_debug("Retrieving memory statistics")
|
|
250
|
+
|
|
251
|
+
# Base stats about the system configuration
|
|
252
|
+
stats = {
|
|
253
|
+
"success": True,
|
|
254
|
+
"namespace": self.namespace,
|
|
255
|
+
"database_connect": self.database_connect,
|
|
256
|
+
"conscious_ingest": self.conscious_ingest,
|
|
257
|
+
"auto_ingest": self.auto_ingest,
|
|
258
|
+
"verbose": self.verbose,
|
|
259
|
+
"memory_system_enabled": hasattr(self.memory_system, "_enabled") and self.memory_system._enabled,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
# Get Memori's built-in memory statistics
|
|
263
|
+
try:
|
|
264
|
+
if hasattr(self.memory_system, "get_memory_stats"):
|
|
265
|
+
# Use the get_memory_stats method as shown in the example
|
|
266
|
+
memori_stats = self.memory_system.get_memory_stats()
|
|
267
|
+
|
|
268
|
+
# Add the Memori-specific stats to our response
|
|
269
|
+
if isinstance(memori_stats, dict):
|
|
270
|
+
# Include total memories
|
|
271
|
+
if "total_memories" in memori_stats:
|
|
272
|
+
stats["total_memories"] = memori_stats["total_memories"]
|
|
273
|
+
|
|
274
|
+
# Include memory distribution by retention type
|
|
275
|
+
if "memories_by_retention" in memori_stats:
|
|
276
|
+
stats["memories_by_retention"] = memori_stats["memories_by_retention"]
|
|
277
|
+
|
|
278
|
+
# Also add individual counts for convenience
|
|
279
|
+
retention_info = memori_stats["memories_by_retention"]
|
|
280
|
+
stats["short_term_memories"] = retention_info.get("short_term", 0)
|
|
281
|
+
stats["long_term_memories"] = retention_info.get("long_term", 0)
|
|
282
|
+
|
|
283
|
+
# Include any other available stats
|
|
284
|
+
for key, value in memori_stats.items():
|
|
285
|
+
if key not in stats:
|
|
286
|
+
stats[key] = value
|
|
287
|
+
|
|
288
|
+
log_debug(
|
|
289
|
+
f"Retrieved memory stats: total={stats.get('total_memories', 0)}, "
|
|
290
|
+
f"short_term={stats.get('short_term_memories', 0)}, "
|
|
291
|
+
f"long_term={stats.get('long_term_memories', 0)}"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
else:
|
|
295
|
+
log_debug("get_memory_stats method not available, providing basic stats only")
|
|
296
|
+
stats["total_memories"] = 0
|
|
297
|
+
stats["memories_by_retention"] = {"short_term": 0, "long_term": 0}
|
|
298
|
+
stats["short_term_memories"] = 0
|
|
299
|
+
stats["long_term_memories"] = 0
|
|
300
|
+
|
|
301
|
+
except Exception as e:
|
|
302
|
+
log_debug(f"Could not retrieve detailed memory stats: {e}")
|
|
303
|
+
# Provide basic stats if detailed stats fail
|
|
304
|
+
stats["total_memories"] = 0
|
|
305
|
+
stats["memories_by_retention"] = {"short_term": 0, "long_term": 0}
|
|
306
|
+
stats["short_term_memories"] = 0
|
|
307
|
+
stats["long_term_memories"] = 0
|
|
308
|
+
stats["stats_warning"] = "Detailed memory statistics not available"
|
|
309
|
+
|
|
310
|
+
return json.dumps(stats)
|
|
311
|
+
|
|
312
|
+
except Exception as e:
|
|
313
|
+
log_error(f"Error getting memory stats: {e}")
|
|
314
|
+
return json.dumps({"success": False, "error": f"Failed to get memory statistics: {str(e)}"})
|
|
315
|
+
|
|
316
|
+
def enable_memory_system(self) -> bool:
|
|
317
|
+
"""Enable the Memori memory system."""
|
|
318
|
+
try:
|
|
319
|
+
self.memory_system.enable()
|
|
320
|
+
log_debug("Memori memory system enabled")
|
|
321
|
+
return True
|
|
322
|
+
except Exception as e:
|
|
323
|
+
log_error(f"Failed to enable memory system: {e}")
|
|
324
|
+
return False
|
|
325
|
+
|
|
326
|
+
def disable_memory_system(self) -> bool:
|
|
327
|
+
"""Disable the Memori memory system."""
|
|
328
|
+
try:
|
|
329
|
+
if hasattr(self.memory_system, "disable"):
|
|
330
|
+
self.memory_system.disable()
|
|
331
|
+
log_debug("Memori memory system disabled")
|
|
332
|
+
return True
|
|
333
|
+
else:
|
|
334
|
+
log_warning("Memory system disable method not available")
|
|
335
|
+
return False
|
|
336
|
+
except Exception as e:
|
|
337
|
+
log_error(f"Failed to disable memory system: {e}")
|
|
338
|
+
return False
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def create_memori_search_tool(memori_toolkit: MemoriTools):
|
|
342
|
+
"""
|
|
343
|
+
Create a standalone memory search function for use with Agno agents.
|
|
344
|
+
|
|
345
|
+
This is a convenience function that creates a memory search tool similar
|
|
346
|
+
to the pattern shown in the Memori example code.
|
|
347
|
+
|
|
348
|
+
Args:
|
|
349
|
+
memori_toolkit: An initialized MemoriTools instance
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
Callable: A memory search function that can be used as an agent tool
|
|
353
|
+
|
|
354
|
+
Example:
|
|
355
|
+
```python
|
|
356
|
+
memori_tools = MemoriTools(database_connect="sqlite:///memory.db")
|
|
357
|
+
search_tool = create_memori_search_tool(memori_tools)
|
|
358
|
+
|
|
359
|
+
agent = Agent(
|
|
360
|
+
model=OpenAIChat(),
|
|
361
|
+
tools=[search_tool],
|
|
362
|
+
description="Agent with memory search capability"
|
|
363
|
+
)
|
|
364
|
+
```
|
|
365
|
+
"""
|
|
366
|
+
|
|
367
|
+
def search_memory(query: str) -> str:
|
|
368
|
+
"""
|
|
369
|
+
Search the agent's memory for past conversations and information.
|
|
370
|
+
|
|
371
|
+
Args:
|
|
372
|
+
query: What to search for in memory
|
|
373
|
+
|
|
374
|
+
Returns:
|
|
375
|
+
str: Search results or error message
|
|
376
|
+
"""
|
|
377
|
+
try:
|
|
378
|
+
if not query.strip():
|
|
379
|
+
return "Please provide a search query"
|
|
380
|
+
|
|
381
|
+
result = memori_toolkit._memory_tool.execute(query=query.strip())
|
|
382
|
+
return str(result) if result else "No relevant memories found"
|
|
383
|
+
|
|
384
|
+
except Exception as e:
|
|
385
|
+
return f"Memory search error: {str(e)}"
|
|
386
|
+
|
|
387
|
+
return search_memory
|
agno/tools/scrapegraph.py
CHANGED
|
@@ -22,6 +22,7 @@ class ScrapeGraphTools(Toolkit):
|
|
|
22
22
|
markdownify: bool = False,
|
|
23
23
|
crawl: bool = False,
|
|
24
24
|
searchscraper: bool = False,
|
|
25
|
+
agentic_crawler: bool = False,
|
|
25
26
|
**kwargs,
|
|
26
27
|
):
|
|
27
28
|
self.api_key: Optional[str] = api_key or os.getenv("SGAI_API_KEY")
|
|
@@ -41,6 +42,8 @@ class ScrapeGraphTools(Toolkit):
|
|
|
41
42
|
tools.append(self.crawl)
|
|
42
43
|
if searchscraper:
|
|
43
44
|
tools.append(self.searchscraper)
|
|
45
|
+
if agentic_crawler:
|
|
46
|
+
tools.append(self.agentic_crawler)
|
|
44
47
|
|
|
45
48
|
super().__init__(name="scrapegraph_tools", tools=tools, **kwargs)
|
|
46
49
|
|
|
@@ -110,6 +113,68 @@ class ScrapeGraphTools(Toolkit):
|
|
|
110
113
|
except Exception as e:
|
|
111
114
|
return json.dumps({"error": str(e)})
|
|
112
115
|
|
|
116
|
+
def agentic_crawler(
|
|
117
|
+
self,
|
|
118
|
+
url: str,
|
|
119
|
+
steps: List[str],
|
|
120
|
+
use_session: bool = True,
|
|
121
|
+
user_prompt: Optional[str] = None,
|
|
122
|
+
output_schema: Optional[dict] = None,
|
|
123
|
+
ai_extraction: bool = False,
|
|
124
|
+
) -> str:
|
|
125
|
+
"""Perform agentic crawling with automated browser actions and optional AI extraction.
|
|
126
|
+
|
|
127
|
+
This tool can:
|
|
128
|
+
1. Navigate to a website
|
|
129
|
+
2. Perform a series of automated actions (like filling forms, clicking buttons)
|
|
130
|
+
3. Extract the resulting HTML content as markdown
|
|
131
|
+
4. Optionally use AI to extract structured data
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
url (str): The URL to scrape
|
|
135
|
+
steps (List[str]): List of steps to perform on the webpage (e.g., ["Type email in input box", "click login"])
|
|
136
|
+
use_session (bool): Whether to use session for the scraping (default: True)
|
|
137
|
+
user_prompt (Optional[str]): Prompt for AI extraction (only used when ai_extraction=True)
|
|
138
|
+
output_schema (Optional[dict]): Schema for structured data extraction (only used when ai_extraction=True)
|
|
139
|
+
ai_extraction (bool): Whether to use AI for data extraction from the scraped content (default: False)
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
JSON string containing the scraping results, including request_id, status, and extracted data
|
|
143
|
+
"""
|
|
144
|
+
try:
|
|
145
|
+
# Validate required parameters for AI extraction
|
|
146
|
+
if ai_extraction and not user_prompt:
|
|
147
|
+
return json.dumps({"error": "user_prompt is required when ai_extraction=True"})
|
|
148
|
+
|
|
149
|
+
# Validate URL format
|
|
150
|
+
if not url.strip():
|
|
151
|
+
return json.dumps({"error": "URL cannot be empty"})
|
|
152
|
+
if not (url.startswith("http://") or url.startswith("https://")):
|
|
153
|
+
return json.dumps({"error": "Invalid URL - must start with http:// or https://"})
|
|
154
|
+
|
|
155
|
+
# Validate steps
|
|
156
|
+
if not steps:
|
|
157
|
+
return json.dumps({"error": "Steps cannot be empty"})
|
|
158
|
+
if any(not step.strip() for step in steps):
|
|
159
|
+
return json.dumps({"error": "All steps must contain valid instructions"})
|
|
160
|
+
|
|
161
|
+
# Prepare parameters for the API call
|
|
162
|
+
params = {"url": url, "steps": steps, "use_session": use_session, "ai_extraction": ai_extraction}
|
|
163
|
+
|
|
164
|
+
# Add optional parameters only if they are provided
|
|
165
|
+
if user_prompt:
|
|
166
|
+
params["user_prompt"] = user_prompt
|
|
167
|
+
if output_schema:
|
|
168
|
+
params["output_schema"] = output_schema
|
|
169
|
+
|
|
170
|
+
# Call the agentic scraper API
|
|
171
|
+
response = self.client.agenticscraper(**params)
|
|
172
|
+
|
|
173
|
+
return json.dumps(response, indent=2)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
return json.dumps({"error": str(e)})
|
|
177
|
+
|
|
113
178
|
def searchscraper(self, prompt: str) -> str:
|
|
114
179
|
"""Search the web and extract information from the web.
|
|
115
180
|
Args:
|
agno/tools/youtube.py
CHANGED
|
@@ -126,18 +126,19 @@ class YouTubeTools(Toolkit):
|
|
|
126
126
|
return "Error getting video ID from URL, please provide a valid YouTube url"
|
|
127
127
|
|
|
128
128
|
try:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
return "No captions found for video"
|
|
129
|
+
ytt_api = YouTubeTranscriptApi()
|
|
130
|
+
captions_data = ytt_api.fetch(video_id)
|
|
131
|
+
|
|
132
|
+
# log_info(f"Captions for video {video_id}: {captions_data}")
|
|
133
|
+
|
|
134
|
+
transcript_text = ""
|
|
135
|
+
|
|
136
|
+
for segment in captions_data:
|
|
137
|
+
transcript_text += f"{segment.text} "
|
|
138
|
+
|
|
139
|
+
return transcript_text.strip() if transcript_text else "No captions found for video"
|
|
140
140
|
except Exception as e:
|
|
141
|
+
# log_info(f"Error getting captions for video {video_id}: {e}")
|
|
141
142
|
return f"Error getting captions for video: {e}"
|
|
142
143
|
|
|
143
144
|
def get_video_timestamps(self, url: str) -> str:
|
|
@@ -310,26 +310,7 @@ class PgVector(VectorDb):
|
|
|
310
310
|
batch_records = []
|
|
311
311
|
for doc in batch_docs:
|
|
312
312
|
try:
|
|
313
|
-
|
|
314
|
-
cleaned_content = self._clean_content(doc.content)
|
|
315
|
-
content_hash = safe_content_hash(doc.content)
|
|
316
|
-
_id = doc.id or content_hash
|
|
317
|
-
|
|
318
|
-
meta_data = doc.meta_data or {}
|
|
319
|
-
if filters:
|
|
320
|
-
meta_data.update(filters)
|
|
321
|
-
|
|
322
|
-
record = {
|
|
323
|
-
"id": _id,
|
|
324
|
-
"name": doc.name,
|
|
325
|
-
"meta_data": doc.meta_data,
|
|
326
|
-
"filters": filters,
|
|
327
|
-
"content": cleaned_content,
|
|
328
|
-
"embedding": doc.embedding,
|
|
329
|
-
"usage": doc.usage,
|
|
330
|
-
"content_hash": content_hash,
|
|
331
|
-
}
|
|
332
|
-
batch_records.append(record)
|
|
313
|
+
batch_records.append(self._get_document_record(doc, filters))
|
|
333
314
|
except Exception as e:
|
|
334
315
|
logger.error(f"Error processing document '{doc.name}': {e}")
|
|
335
316
|
|
|
@@ -383,25 +364,7 @@ class PgVector(VectorDb):
|
|
|
383
364
|
batch_records = []
|
|
384
365
|
for doc in batch_docs:
|
|
385
366
|
try:
|
|
386
|
-
|
|
387
|
-
cleaned_content = self._clean_content(doc.content)
|
|
388
|
-
content_hash = safe_content_hash(doc.content)
|
|
389
|
-
|
|
390
|
-
meta_data = doc.meta_data or {}
|
|
391
|
-
if filters:
|
|
392
|
-
meta_data.update(filters)
|
|
393
|
-
|
|
394
|
-
record = {
|
|
395
|
-
"id": content_hash, # use content_hash as a reproducible id to avoid duplicates while upsert
|
|
396
|
-
"name": doc.name,
|
|
397
|
-
"meta_data": doc.meta_data,
|
|
398
|
-
"filters": filters,
|
|
399
|
-
"content": cleaned_content,
|
|
400
|
-
"embedding": doc.embedding,
|
|
401
|
-
"usage": doc.usage,
|
|
402
|
-
"content_hash": content_hash,
|
|
403
|
-
}
|
|
404
|
-
batch_records.append(record)
|
|
367
|
+
batch_records.append(self._get_document_record(doc, filters))
|
|
405
368
|
except Exception as e:
|
|
406
369
|
logger.error(f"Error processing document '{doc.name}': {e}")
|
|
407
370
|
|
|
@@ -430,6 +393,27 @@ class PgVector(VectorDb):
|
|
|
430
393
|
logger.error(f"Error upserting documents: {e}")
|
|
431
394
|
raise
|
|
432
395
|
|
|
396
|
+
def _get_document_record(self, doc: Document, filters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
397
|
+
doc.embed(embedder=self.embedder)
|
|
398
|
+
cleaned_content = self._clean_content(doc.content)
|
|
399
|
+
content_hash = safe_content_hash(doc.content)
|
|
400
|
+
_id = doc.id or content_hash
|
|
401
|
+
|
|
402
|
+
meta_data = doc.meta_data or {}
|
|
403
|
+
if filters:
|
|
404
|
+
meta_data.update(filters)
|
|
405
|
+
|
|
406
|
+
return {
|
|
407
|
+
"id": _id,
|
|
408
|
+
"name": doc.name,
|
|
409
|
+
"meta_data": meta_data,
|
|
410
|
+
"filters": filters,
|
|
411
|
+
"content": cleaned_content,
|
|
412
|
+
"embedding": doc.embedding,
|
|
413
|
+
"usage": doc.usage,
|
|
414
|
+
"content_hash": content_hash,
|
|
415
|
+
}
|
|
416
|
+
|
|
433
417
|
async def async_upsert(self, documents: List[Document], filters: Optional[Dict[str, Any]] = None) -> None:
|
|
434
418
|
"""Upsert documents asynchronously by running in a thread."""
|
|
435
419
|
await asyncio.to_thread(self.upsert, documents, filters)
|
agno/workflow/v2/step.py
CHANGED
|
@@ -239,6 +239,7 @@ class Step:
|
|
|
239
239
|
images=images,
|
|
240
240
|
videos=videos,
|
|
241
241
|
audio=audios,
|
|
242
|
+
files=step_input.files,
|
|
242
243
|
session_id=session_id,
|
|
243
244
|
user_id=user_id,
|
|
244
245
|
)
|
|
@@ -363,6 +364,7 @@ class Step:
|
|
|
363
364
|
images=images,
|
|
364
365
|
videos=videos,
|
|
365
366
|
audio=audios,
|
|
367
|
+
files=step_input.files,
|
|
366
368
|
session_id=session_id,
|
|
367
369
|
user_id=user_id,
|
|
368
370
|
stream=True,
|
|
@@ -514,6 +516,7 @@ class Step:
|
|
|
514
516
|
images=images,
|
|
515
517
|
videos=videos,
|
|
516
518
|
audio=audios,
|
|
519
|
+
files=step_input.files,
|
|
517
520
|
session_id=session_id,
|
|
518
521
|
user_id=user_id,
|
|
519
522
|
)
|
|
@@ -656,6 +659,7 @@ class Step:
|
|
|
656
659
|
images=images,
|
|
657
660
|
videos=videos,
|
|
658
661
|
audio=audios,
|
|
662
|
+
files=step_input.files,
|
|
659
663
|
session_id=session_id,
|
|
660
664
|
user_id=user_id,
|
|
661
665
|
stream=True,
|