slide-lye 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lye/__init__.py +132 -0
- lye/audio.py +268 -0
- lye/browser.py +232 -0
- lye/command_line.py +220 -0
- lye/files.py +495 -0
- lye/image.py +253 -0
- lye/notion.py +772 -0
- lye/slack.py +483 -0
- lye/utils/__init__.py +7 -0
- lye/utils/files.py +90 -0
- lye/utils/logging.py +58 -0
- lye/web.py +231 -0
- slide_lye-0.2.2.dist-info/METADATA +137 -0
- slide_lye-0.2.2.dist-info/RECORD +15 -0
- slide_lye-0.2.2.dist-info/WHEEL +4 -0
lye/__init__.py
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lye - Tools package for Tyler
|
|
3
|
+
"""
|
|
4
|
+
__version__ = "0.2.2"
|
|
5
|
+
|
|
6
|
+
import importlib
|
|
7
|
+
import sys
|
|
8
|
+
import os
|
|
9
|
+
import glob
|
|
10
|
+
from typing import Dict, List
|
|
11
|
+
from lye.utils.logging import get_logger
|
|
12
|
+
|
|
13
|
+
# Get configured logger
|
|
14
|
+
logger = get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
# Initialize empty tool lists for each module
|
|
17
|
+
WEB_TOOLS = []
|
|
18
|
+
SLACK_TOOLS = []
|
|
19
|
+
COMMAND_LINE_TOOLS = []
|
|
20
|
+
NOTION_TOOLS = []
|
|
21
|
+
IMAGE_TOOLS = []
|
|
22
|
+
AUDIO_TOOLS = []
|
|
23
|
+
FILES_TOOLS = []
|
|
24
|
+
BROWSER_TOOLS = []
|
|
25
|
+
|
|
26
|
+
# Combined tools list
|
|
27
|
+
TOOLS = []
|
|
28
|
+
|
|
29
|
+
# Try to import each tool module
|
|
30
|
+
try:
|
|
31
|
+
from . import web
|
|
32
|
+
from . import slack
|
|
33
|
+
from . import command_line
|
|
34
|
+
from . import notion
|
|
35
|
+
from . import image
|
|
36
|
+
from . import audio
|
|
37
|
+
from . import files
|
|
38
|
+
from . import browser
|
|
39
|
+
|
|
40
|
+
except ImportError as e:
|
|
41
|
+
print(f"Warning: Some tool modules could not be imported: {e}")
|
|
42
|
+
|
|
43
|
+
# Get tool lists from each module and maintain both individual and combined lists
|
|
44
|
+
try:
|
|
45
|
+
module_tools = getattr(web, "TOOLS", [])
|
|
46
|
+
WEB_TOOLS.extend(module_tools)
|
|
47
|
+
TOOLS.extend(module_tools)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
print(f"Warning: Could not load web tools: {e}")
|
|
50
|
+
|
|
51
|
+
try:
|
|
52
|
+
module_tools = getattr(slack, "TOOLS", [])
|
|
53
|
+
SLACK_TOOLS.extend(module_tools)
|
|
54
|
+
TOOLS.extend(module_tools)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
print(f"Warning: Could not load slack tools: {e}")
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
module_tools = getattr(command_line, "TOOLS", [])
|
|
60
|
+
COMMAND_LINE_TOOLS.extend(module_tools)
|
|
61
|
+
TOOLS.extend(module_tools)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
print(f"Warning: Could not load command line tools: {e}")
|
|
64
|
+
|
|
65
|
+
try:
|
|
66
|
+
module_tools = getattr(notion, "TOOLS", [])
|
|
67
|
+
NOTION_TOOLS.extend(module_tools)
|
|
68
|
+
TOOLS.extend(module_tools)
|
|
69
|
+
except Exception as e:
|
|
70
|
+
print(f"Warning: Could not load notion tools: {e}")
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
module_tools = getattr(image, "TOOLS", [])
|
|
74
|
+
IMAGE_TOOLS.extend(module_tools)
|
|
75
|
+
TOOLS.extend(module_tools)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
print(f"Warning: Could not load image tools: {e}")
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
module_tools = getattr(audio, "TOOLS", [])
|
|
81
|
+
AUDIO_TOOLS.extend(module_tools)
|
|
82
|
+
TOOLS.extend(module_tools)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(f"Warning: Could not load audio tools: {e}")
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
module_tools = getattr(files, "TOOLS", [])
|
|
88
|
+
FILES_TOOLS.extend(module_tools)
|
|
89
|
+
TOOLS.extend(module_tools)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
print(f"Warning: Could not load files tools: {e}")
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
module_tools = getattr(browser, "TOOLS", [])
|
|
95
|
+
BROWSER_TOOLS.extend(module_tools)
|
|
96
|
+
TOOLS.extend(module_tools)
|
|
97
|
+
except Exception as e:
|
|
98
|
+
print(f"Warning: Could not load browser tools: {e}")
|
|
99
|
+
|
|
100
|
+
__all__ = [
|
|
101
|
+
# Module-level tool lists
|
|
102
|
+
'TOOLS',
|
|
103
|
+
'WEB_TOOLS',
|
|
104
|
+
'FILES_TOOLS',
|
|
105
|
+
'COMMAND_LINE_TOOLS',
|
|
106
|
+
'AUDIO_TOOLS',
|
|
107
|
+
'IMAGE_TOOLS',
|
|
108
|
+
'BROWSER_TOOLS',
|
|
109
|
+
'SLACK_TOOLS',
|
|
110
|
+
'NOTION_TOOLS',
|
|
111
|
+
# Module namespaces for cleaner imports
|
|
112
|
+
'web',
|
|
113
|
+
'files',
|
|
114
|
+
'command_line',
|
|
115
|
+
'audio',
|
|
116
|
+
'image',
|
|
117
|
+
'browser',
|
|
118
|
+
'slack',
|
|
119
|
+
'notion',
|
|
120
|
+
]
|
|
121
|
+
|
|
122
|
+
# Map of module names to their tools for dynamic loading
|
|
123
|
+
TOOL_MODULES: Dict[str, List] = {
|
|
124
|
+
'web': WEB_TOOLS,
|
|
125
|
+
'slack': SLACK_TOOLS,
|
|
126
|
+
'command_line': COMMAND_LINE_TOOLS,
|
|
127
|
+
'notion': NOTION_TOOLS,
|
|
128
|
+
'image': IMAGE_TOOLS,
|
|
129
|
+
'audio': AUDIO_TOOLS,
|
|
130
|
+
'files': FILES_TOOLS,
|
|
131
|
+
'browser': BROWSER_TOOLS
|
|
132
|
+
}
|
lye/audio.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import weave
|
|
3
|
+
import base64
|
|
4
|
+
from typing import Dict, List, Optional, Any, Tuple
|
|
5
|
+
from litellm import speech, transcription
|
|
6
|
+
import uuid
|
|
7
|
+
import tempfile
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
@weave.op(name="text-to-speech")
|
|
11
|
+
async def text_to_speech(*,
|
|
12
|
+
input: str,
|
|
13
|
+
voice: str = "alloy",
|
|
14
|
+
model: str = "tts-1",
|
|
15
|
+
response_format: str = "mp3",
|
|
16
|
+
speed: float = 1.0
|
|
17
|
+
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
18
|
+
"""
|
|
19
|
+
Convert text to speech using LiteLLM's speech API.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
input (str): The text to convert to speech (max 4096 characters)
|
|
23
|
+
voice (str, optional): The voice to use. Defaults to "alloy"
|
|
24
|
+
model (str, optional): The model to use. Defaults to "tts-1"
|
|
25
|
+
response_format (str, optional): The format of the audio file. Defaults to "mp3"
|
|
26
|
+
speed (float, optional): The speed of the generated audio. Defaults to 1.0
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Tuple[Dict[str, Any], List[Dict[str, Any]]]: Tuple containing:
|
|
30
|
+
- Dict with success status and metadata
|
|
31
|
+
- List of file dictionaries with base64 encoded content and metadata
|
|
32
|
+
"""
|
|
33
|
+
try:
|
|
34
|
+
# Validate voice
|
|
35
|
+
valid_voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
|
|
36
|
+
if voice not in valid_voices:
|
|
37
|
+
return (
|
|
38
|
+
{
|
|
39
|
+
"success": False,
|
|
40
|
+
"error": f"Voice {voice} not supported. Choose from: {valid_voices}"
|
|
41
|
+
},
|
|
42
|
+
[] # Empty files list for error case
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Validate model
|
|
46
|
+
valid_models = ["tts-1", "tts-1-hd"]
|
|
47
|
+
if not model.endswith(tuple(valid_models)) and model not in valid_models:
|
|
48
|
+
# Allow for provider prefixes like "openai/tts-1"
|
|
49
|
+
model_name = model.split('/')[-1]
|
|
50
|
+
if model_name not in valid_models:
|
|
51
|
+
return (
|
|
52
|
+
{
|
|
53
|
+
"success": False,
|
|
54
|
+
"error": f"Model {model} not supported. Choose from: {valid_models}"
|
|
55
|
+
},
|
|
56
|
+
[] # Empty files list for error case
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# Create a temporary file to store the audio
|
|
60
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{response_format}") as temp_file:
|
|
61
|
+
temp_path = temp_file.name
|
|
62
|
+
|
|
63
|
+
# Generate speech
|
|
64
|
+
response = speech(
|
|
65
|
+
model=model,
|
|
66
|
+
voice=voice,
|
|
67
|
+
input=input,
|
|
68
|
+
response_format=response_format,
|
|
69
|
+
speed=speed
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
# Stream to file
|
|
73
|
+
response.stream_to_file(temp_path)
|
|
74
|
+
|
|
75
|
+
# Read the file content
|
|
76
|
+
with open(temp_path, "rb") as f:
|
|
77
|
+
audio_bytes = f.read()
|
|
78
|
+
|
|
79
|
+
# Clean up the temporary file
|
|
80
|
+
os.unlink(temp_path)
|
|
81
|
+
|
|
82
|
+
# Create a unique filename
|
|
83
|
+
timestamp = uuid.uuid4().hex
|
|
84
|
+
filename = f"speech_{timestamp}.{response_format}"
|
|
85
|
+
|
|
86
|
+
# Determine mime type based on response_format
|
|
87
|
+
mime_type_map = {
|
|
88
|
+
"mp3": "audio/mpeg",
|
|
89
|
+
"opus": "audio/opus",
|
|
90
|
+
"aac": "audio/aac",
|
|
91
|
+
"flac": "audio/flac"
|
|
92
|
+
}
|
|
93
|
+
mime_type = mime_type_map.get(response_format, f"audio/{response_format}")
|
|
94
|
+
|
|
95
|
+
# Create a short description
|
|
96
|
+
description = f"Speech generated from text: '{input[:50]}{'...' if len(input) > 50 else ''}'"
|
|
97
|
+
|
|
98
|
+
# Return tuple with content dict and files list
|
|
99
|
+
return (
|
|
100
|
+
{
|
|
101
|
+
"success": True,
|
|
102
|
+
"description": description,
|
|
103
|
+
},
|
|
104
|
+
[{
|
|
105
|
+
"content": audio_bytes, # Return raw bytes instead of base64 string
|
|
106
|
+
"filename": filename,
|
|
107
|
+
"mime_type": mime_type,
|
|
108
|
+
"description": description,
|
|
109
|
+
"attributes": {
|
|
110
|
+
"voice": voice,
|
|
111
|
+
"model": model,
|
|
112
|
+
"format": response_format,
|
|
113
|
+
"speed": speed,
|
|
114
|
+
"text_length": len(input)
|
|
115
|
+
}
|
|
116
|
+
}]
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
except Exception as e:
|
|
120
|
+
return (
|
|
121
|
+
{
|
|
122
|
+
"success": False,
|
|
123
|
+
"error": str(e)
|
|
124
|
+
},
|
|
125
|
+
[] # Empty files list for error case
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
@weave.op(name="speech-to-text")
|
|
129
|
+
async def speech_to_text(*,
|
|
130
|
+
file_url: str,
|
|
131
|
+
language: str = None,
|
|
132
|
+
prompt: str = None
|
|
133
|
+
) -> Dict[str, Any]:
|
|
134
|
+
"""
|
|
135
|
+
Transcribe speech to text using LiteLLM's transcription API.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
file_url: Full path to the audio file
|
|
139
|
+
language: Optional language code in ISO-639-1 format. If not specified, the model will auto-detect.
|
|
140
|
+
prompt: Optional text to guide the model's style or continue a previous audio segment
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Dict[str, Any]: Dictionary with transcription results or error
|
|
144
|
+
"""
|
|
145
|
+
try:
|
|
146
|
+
# Use the file_url directly as the path
|
|
147
|
+
file_path = Path(file_url)
|
|
148
|
+
|
|
149
|
+
if not file_path.exists():
|
|
150
|
+
raise FileNotFoundError(f"Audio file not found at {file_path}")
|
|
151
|
+
|
|
152
|
+
# Prepare optional parameters
|
|
153
|
+
optional_params = {}
|
|
154
|
+
if language:
|
|
155
|
+
optional_params["language"] = language
|
|
156
|
+
if prompt:
|
|
157
|
+
optional_params["prompt"] = prompt
|
|
158
|
+
|
|
159
|
+
# Open the file and transcribe
|
|
160
|
+
with open(file_path, "rb") as audio_file:
|
|
161
|
+
response = transcription(
|
|
162
|
+
model="whisper-1",
|
|
163
|
+
file=audio_file,
|
|
164
|
+
**optional_params
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Extract the transcription text
|
|
168
|
+
if isinstance(response, dict) and "text" in response:
|
|
169
|
+
transcription_text = response["text"]
|
|
170
|
+
elif hasattr(response, "text"):
|
|
171
|
+
transcription_text = response.text
|
|
172
|
+
else:
|
|
173
|
+
transcription_text = str(response)
|
|
174
|
+
|
|
175
|
+
return {
|
|
176
|
+
"success": True,
|
|
177
|
+
"text": transcription_text,
|
|
178
|
+
"details": {
|
|
179
|
+
"model": "whisper-1",
|
|
180
|
+
"language": language,
|
|
181
|
+
"file_url": file_url
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
except Exception as e:
|
|
186
|
+
return {
|
|
187
|
+
"success": False,
|
|
188
|
+
"error": str(e)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
# Define the tools list in the same format as other tool modules
|
|
192
|
+
TOOLS = [
|
|
193
|
+
{
|
|
194
|
+
"definition": {
|
|
195
|
+
"type": "function",
|
|
196
|
+
"function": {
|
|
197
|
+
"name": "audio-text_to_speech",
|
|
198
|
+
"description": "Convert text to natural-sounding speech using OpenAI's TTS API",
|
|
199
|
+
"parameters": {
|
|
200
|
+
"type": "object",
|
|
201
|
+
"properties": {
|
|
202
|
+
"input": {
|
|
203
|
+
"type": "string",
|
|
204
|
+
"description": "The text to convert to speech (max 4096 characters)"
|
|
205
|
+
},
|
|
206
|
+
"voice": {
|
|
207
|
+
"type": "string",
|
|
208
|
+
"description": "The voice to use for the speech",
|
|
209
|
+
"enum": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
|
|
210
|
+
"default": "alloy"
|
|
211
|
+
},
|
|
212
|
+
"model": {
|
|
213
|
+
"type": "string",
|
|
214
|
+
"description": "The model to use for speech generation",
|
|
215
|
+
"enum": ["tts-1", "tts-1-hd"],
|
|
216
|
+
"default": "tts-1"
|
|
217
|
+
},
|
|
218
|
+
"response_format": {
|
|
219
|
+
"type": "string",
|
|
220
|
+
"description": "The format of the audio file",
|
|
221
|
+
"enum": ["mp3", "opus", "aac", "flac"],
|
|
222
|
+
"default": "mp3"
|
|
223
|
+
},
|
|
224
|
+
"speed": {
|
|
225
|
+
"type": "number",
|
|
226
|
+
"description": "The speed of the generated audio (0.25 to 4.0)",
|
|
227
|
+
"minimum": 0.25,
|
|
228
|
+
"maximum": 4.0,
|
|
229
|
+
"default": 1.0
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
"required": ["input"]
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
},
|
|
236
|
+
"implementation": text_to_speech
|
|
237
|
+
},
|
|
238
|
+
{
|
|
239
|
+
"definition": {
|
|
240
|
+
"type": "function",
|
|
241
|
+
"function": {
|
|
242
|
+
"name": "audio-speech_to_text",
|
|
243
|
+
"description": "Transcribe audio to text using OpenAI's Whisper API",
|
|
244
|
+
"parameters": {
|
|
245
|
+
"type": "object",
|
|
246
|
+
"properties": {
|
|
247
|
+
"file_url": {
|
|
248
|
+
"type": "string",
|
|
249
|
+
"description": "URL or path to the audio file"
|
|
250
|
+
},
|
|
251
|
+
"language": {
|
|
252
|
+
"type": "string",
|
|
253
|
+
"description": "The language of the audio (ISO-639-1 format). If not specified, the model will auto-detect.",
|
|
254
|
+
"default": None
|
|
255
|
+
},
|
|
256
|
+
"prompt": {
|
|
257
|
+
"type": "string",
|
|
258
|
+
"description": "Optional text to guide the model's style or continue a previous audio segment",
|
|
259
|
+
"default": None
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
"required": ["file_url"]
|
|
263
|
+
}
|
|
264
|
+
}
|
|
265
|
+
},
|
|
266
|
+
"implementation": speech_to_text
|
|
267
|
+
}
|
|
268
|
+
]
|
lye/browser.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Browser automation tool using browser-use.
|
|
3
|
+
|
|
4
|
+
This tool allows Tyler to control a web browser to perform various tasks.
|
|
5
|
+
"""
|
|
6
|
+
import asyncio
|
|
7
|
+
import json
|
|
8
|
+
import weave
|
|
9
|
+
from typing import Dict, Any, Optional, List, Tuple
|
|
10
|
+
from browser_use import Agent as BrowserAgent, Browser, BrowserConfig
|
|
11
|
+
from browser_use.browser.context import BrowserContextConfig
|
|
12
|
+
from langchain_openai import ChatOpenAI
|
|
13
|
+
import os
|
|
14
|
+
from dotenv import load_dotenv
|
|
15
|
+
|
|
16
|
+
# Load environment variables
|
|
17
|
+
load_dotenv()
|
|
18
|
+
|
|
19
|
+
@weave.op(name="browser-automate")
|
|
20
|
+
async def browser_automate(*,
|
|
21
|
+
task: str,
|
|
22
|
+
model: str = "gpt-4.1",
|
|
23
|
+
headless: bool = False, # Default to non-headless mode so users can see the browser
|
|
24
|
+
timeout: int = 300) -> Dict[str, Any]:
|
|
25
|
+
"""
|
|
26
|
+
Automate browser tasks using browser-use.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
task (str): The task to perform in natural language (e.g., "Go to google.com and search for Browser Use")
|
|
30
|
+
model (str): The model to use for the browser agent (default: "gpt-4.1")
|
|
31
|
+
headless (bool): Whether to run the browser in headless mode (default: False)
|
|
32
|
+
timeout (int): Maximum time in seconds to run the task (default: 300)
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Dict[str, Any]: The result of the browser automation task
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
# Initialize the LLM
|
|
39
|
+
llm = ChatOpenAI(model=model)
|
|
40
|
+
|
|
41
|
+
# Configure the browser with supported parameters
|
|
42
|
+
# Create context config with settings to improve visibility
|
|
43
|
+
context_config = BrowserContextConfig(
|
|
44
|
+
highlight_elements=True, # Highlight interactive elements with colorful bounding boxes
|
|
45
|
+
wait_for_network_idle_page_load_time=3.0, # Wait longer for page loads to ensure content is visible
|
|
46
|
+
browser_window_size={'width': 1280, 'height': 900} # Set a good window size for visibility
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Configure the browser
|
|
50
|
+
browser_config = BrowserConfig(
|
|
51
|
+
headless=headless,
|
|
52
|
+
disable_security=True, # Helps with cross-site iFrames and other functionality
|
|
53
|
+
new_context_config=context_config # Apply our context configuration
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
browser = Browser(config=browser_config)
|
|
57
|
+
|
|
58
|
+
# Initialize the browser agent
|
|
59
|
+
agent = BrowserAgent(
|
|
60
|
+
task=task,
|
|
61
|
+
llm=llm,
|
|
62
|
+
browser=browser
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Run the browser agent
|
|
66
|
+
result = await agent.run()
|
|
67
|
+
|
|
68
|
+
# Extract useful information from the result
|
|
69
|
+
summary = "Task completed successfully"
|
|
70
|
+
if hasattr(result, 'all_results') and result.all_results:
|
|
71
|
+
summary = "Actions performed: " + ", ".join([str(r) for r in result.all_results])
|
|
72
|
+
elif hasattr(result, 'output') and result.output:
|
|
73
|
+
summary = result.output
|
|
74
|
+
|
|
75
|
+
# Close the browser
|
|
76
|
+
await browser.close()
|
|
77
|
+
|
|
78
|
+
# Return the result with more detailed information
|
|
79
|
+
return {
|
|
80
|
+
"success": True,
|
|
81
|
+
"summary": summary,
|
|
82
|
+
"result": str(result)
|
|
83
|
+
}
|
|
84
|
+
except Exception as e:
|
|
85
|
+
return {
|
|
86
|
+
"success": False,
|
|
87
|
+
"error": str(e)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
@weave.op(name="browser-screenshot")
|
|
91
|
+
async def browser_screenshot(*,
|
|
92
|
+
url: str,
|
|
93
|
+
wait_time: int = 3,
|
|
94
|
+
full_page: bool = True) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
|
|
95
|
+
"""
|
|
96
|
+
Take a screenshot of a webpage.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
url (str): The URL to take a screenshot of
|
|
100
|
+
wait_time (int): Time to wait in seconds after page load before taking screenshot (default: 3)
|
|
101
|
+
full_page (bool): Whether to capture the full page or just the viewport (default: True)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Tuple[Dict[str, Any], List[Dict[str, Any]]]: Tuple containing:
|
|
105
|
+
- Dict with success status and metadata
|
|
106
|
+
- List of file dictionaries with screenshot data
|
|
107
|
+
"""
|
|
108
|
+
try:
|
|
109
|
+
# Configure the browser context for screenshots
|
|
110
|
+
context_config = BrowserContextConfig(
|
|
111
|
+
wait_for_network_idle_page_load_time=wait_time, # Use the wait_time parameter
|
|
112
|
+
browser_window_size={'width': 1280, 'height': 900} # Set a good window size for screenshots
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Configure the browser in headless mode for screenshots
|
|
116
|
+
browser_config = BrowserConfig(
|
|
117
|
+
headless=True,
|
|
118
|
+
new_context_config=context_config
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
browser = Browser(config=browser_config)
|
|
122
|
+
|
|
123
|
+
# Create a simple task to navigate to the URL and take a screenshot
|
|
124
|
+
task = f"Go to {url} and take a {'full page' if full_page else 'viewport'} screenshot"
|
|
125
|
+
|
|
126
|
+
# Initialize the browser agent
|
|
127
|
+
llm = ChatOpenAI(model="gpt-4.1")
|
|
128
|
+
|
|
129
|
+
agent = BrowserAgent(
|
|
130
|
+
task=task,
|
|
131
|
+
llm=llm,
|
|
132
|
+
browser=browser
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Run the browser agent
|
|
136
|
+
result = await agent.run()
|
|
137
|
+
|
|
138
|
+
# Close the browser
|
|
139
|
+
await browser.close()
|
|
140
|
+
|
|
141
|
+
# Extract screenshot from result if available
|
|
142
|
+
# Note: This is a simplified implementation. The actual implementation
|
|
143
|
+
# would need to extract the screenshot from the browser-use result
|
|
144
|
+
# and convert it to the expected format.
|
|
145
|
+
|
|
146
|
+
# For now, return a placeholder
|
|
147
|
+
return (
|
|
148
|
+
{
|
|
149
|
+
"success": True,
|
|
150
|
+
"url": url,
|
|
151
|
+
"full_page": full_page
|
|
152
|
+
},
|
|
153
|
+
[
|
|
154
|
+
{
|
|
155
|
+
"filename": f"screenshot_{url.replace('://', '_').replace('/', '_')}.png",
|
|
156
|
+
"content": "base64_encoded_content_would_go_here",
|
|
157
|
+
"mime_type": "image/png"
|
|
158
|
+
}
|
|
159
|
+
]
|
|
160
|
+
)
|
|
161
|
+
except Exception as e:
|
|
162
|
+
return (
|
|
163
|
+
{
|
|
164
|
+
"success": False,
|
|
165
|
+
"error": str(e)
|
|
166
|
+
},
|
|
167
|
+
[]
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Define the tools to be exported
|
|
171
|
+
TOOLS = [
|
|
172
|
+
{
|
|
173
|
+
"definition": {
|
|
174
|
+
"type": "function",
|
|
175
|
+
"function": {
|
|
176
|
+
"name": "browser-automate",
|
|
177
|
+
"description": "Automate browser tasks using browser-use. This tool allows you to control a web browser to perform various tasks like navigating to websites, filling forms, clicking buttons, and more.",
|
|
178
|
+
"parameters": {
|
|
179
|
+
"type": "object",
|
|
180
|
+
"properties": {
|
|
181
|
+
"task": {
|
|
182
|
+
"type": "string",
|
|
183
|
+
"description": "The task to perform in natural language (e.g., 'Go to google.com and search for Browser Use')"
|
|
184
|
+
},
|
|
185
|
+
"model": {
|
|
186
|
+
"type": "string",
|
|
187
|
+
"description": "The model to use for the browser agent (default: 'gpt-4.1')"
|
|
188
|
+
},
|
|
189
|
+
"headless": {
|
|
190
|
+
"type": "boolean",
|
|
191
|
+
"description": "Whether to run the browser in headless mode (default: False)"
|
|
192
|
+
},
|
|
193
|
+
"timeout": {
|
|
194
|
+
"type": "integer",
|
|
195
|
+
"description": "Maximum time in seconds to run the task (default: 300)"
|
|
196
|
+
}
|
|
197
|
+
},
|
|
198
|
+
"required": ["task"]
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
},
|
|
202
|
+
"implementation": browser_automate
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
"definition": {
|
|
206
|
+
"type": "function",
|
|
207
|
+
"function": {
|
|
208
|
+
"name": "browser-screenshot",
|
|
209
|
+
"description": "Take a screenshot of a webpage using browser-use.",
|
|
210
|
+
"parameters": {
|
|
211
|
+
"type": "object",
|
|
212
|
+
"properties": {
|
|
213
|
+
"url": {
|
|
214
|
+
"type": "string",
|
|
215
|
+
"description": "The URL to take a screenshot of"
|
|
216
|
+
},
|
|
217
|
+
"wait_time": {
|
|
218
|
+
"type": "integer",
|
|
219
|
+
"description": "Time to wait in seconds after page load before taking screenshot (default: 3)"
|
|
220
|
+
},
|
|
221
|
+
"full_page": {
|
|
222
|
+
"type": "boolean",
|
|
223
|
+
"description": "Whether to capture the full page or just the viewport (default: True)"
|
|
224
|
+
}
|
|
225
|
+
},
|
|
226
|
+
"required": ["url"]
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
},
|
|
230
|
+
"implementation": browser_screenshot
|
|
231
|
+
}
|
|
232
|
+
]
|