slide-lye 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lye/__init__.py ADDED
@@ -0,0 +1,132 @@
1
+ """
2
+ Lye - Tools package for Tyler
3
+ """
4
+ __version__ = "0.2.2"
5
+
6
+ import importlib
7
+ import sys
8
+ import os
9
+ import glob
10
+ from typing import Dict, List
11
+ from lye.utils.logging import get_logger
12
+
13
+ # Get configured logger
14
+ logger = get_logger(__name__)
15
+
16
+ # Initialize empty tool lists for each module
17
+ WEB_TOOLS = []
18
+ SLACK_TOOLS = []
19
+ COMMAND_LINE_TOOLS = []
20
+ NOTION_TOOLS = []
21
+ IMAGE_TOOLS = []
22
+ AUDIO_TOOLS = []
23
+ FILES_TOOLS = []
24
+ BROWSER_TOOLS = []
25
+
26
+ # Combined tools list
27
+ TOOLS = []
28
+
29
+ # Try to import each tool module
30
+ try:
31
+ from . import web
32
+ from . import slack
33
+ from . import command_line
34
+ from . import notion
35
+ from . import image
36
+ from . import audio
37
+ from . import files
38
+ from . import browser
39
+
40
+ except ImportError as e:
41
+ print(f"Warning: Some tool modules could not be imported: {e}")
42
+
43
+ # Get tool lists from each module and maintain both individual and combined lists
44
+ try:
45
+ module_tools = getattr(web, "TOOLS", [])
46
+ WEB_TOOLS.extend(module_tools)
47
+ TOOLS.extend(module_tools)
48
+ except Exception as e:
49
+ print(f"Warning: Could not load web tools: {e}")
50
+
51
+ try:
52
+ module_tools = getattr(slack, "TOOLS", [])
53
+ SLACK_TOOLS.extend(module_tools)
54
+ TOOLS.extend(module_tools)
55
+ except Exception as e:
56
+ print(f"Warning: Could not load slack tools: {e}")
57
+
58
+ try:
59
+ module_tools = getattr(command_line, "TOOLS", [])
60
+ COMMAND_LINE_TOOLS.extend(module_tools)
61
+ TOOLS.extend(module_tools)
62
+ except Exception as e:
63
+ print(f"Warning: Could not load command line tools: {e}")
64
+
65
+ try:
66
+ module_tools = getattr(notion, "TOOLS", [])
67
+ NOTION_TOOLS.extend(module_tools)
68
+ TOOLS.extend(module_tools)
69
+ except Exception as e:
70
+ print(f"Warning: Could not load notion tools: {e}")
71
+
72
+ try:
73
+ module_tools = getattr(image, "TOOLS", [])
74
+ IMAGE_TOOLS.extend(module_tools)
75
+ TOOLS.extend(module_tools)
76
+ except Exception as e:
77
+ print(f"Warning: Could not load image tools: {e}")
78
+
79
+ try:
80
+ module_tools = getattr(audio, "TOOLS", [])
81
+ AUDIO_TOOLS.extend(module_tools)
82
+ TOOLS.extend(module_tools)
83
+ except Exception as e:
84
+ print(f"Warning: Could not load audio tools: {e}")
85
+
86
+ try:
87
+ module_tools = getattr(files, "TOOLS", [])
88
+ FILES_TOOLS.extend(module_tools)
89
+ TOOLS.extend(module_tools)
90
+ except Exception as e:
91
+ print(f"Warning: Could not load files tools: {e}")
92
+
93
+ try:
94
+ module_tools = getattr(browser, "TOOLS", [])
95
+ BROWSER_TOOLS.extend(module_tools)
96
+ TOOLS.extend(module_tools)
97
+ except Exception as e:
98
+ print(f"Warning: Could not load browser tools: {e}")
99
+
100
+ __all__ = [
101
+ # Module-level tool lists
102
+ 'TOOLS',
103
+ 'WEB_TOOLS',
104
+ 'FILES_TOOLS',
105
+ 'COMMAND_LINE_TOOLS',
106
+ 'AUDIO_TOOLS',
107
+ 'IMAGE_TOOLS',
108
+ 'BROWSER_TOOLS',
109
+ 'SLACK_TOOLS',
110
+ 'NOTION_TOOLS',
111
+ # Module namespaces for cleaner imports
112
+ 'web',
113
+ 'files',
114
+ 'command_line',
115
+ 'audio',
116
+ 'image',
117
+ 'browser',
118
+ 'slack',
119
+ 'notion',
120
+ ]
121
+
122
+ # Map of module names to their tools for dynamic loading
123
+ TOOL_MODULES: Dict[str, List] = {
124
+ 'web': WEB_TOOLS,
125
+ 'slack': SLACK_TOOLS,
126
+ 'command_line': COMMAND_LINE_TOOLS,
127
+ 'notion': NOTION_TOOLS,
128
+ 'image': IMAGE_TOOLS,
129
+ 'audio': AUDIO_TOOLS,
130
+ 'files': FILES_TOOLS,
131
+ 'browser': BROWSER_TOOLS
132
+ }
lye/audio.py ADDED
@@ -0,0 +1,268 @@
1
+ import os
2
+ import weave
3
+ import base64
4
+ from typing import Dict, List, Optional, Any, Tuple
5
+ from litellm import speech, transcription
6
+ import uuid
7
+ import tempfile
8
+ from pathlib import Path
9
+
10
+ @weave.op(name="text-to-speech")
11
+ async def text_to_speech(*,
12
+ input: str,
13
+ voice: str = "alloy",
14
+ model: str = "tts-1",
15
+ response_format: str = "mp3",
16
+ speed: float = 1.0
17
+ ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
18
+ """
19
+ Convert text to speech using LiteLLM's speech API.
20
+
21
+ Args:
22
+ input (str): The text to convert to speech (max 4096 characters)
23
+ voice (str, optional): The voice to use. Defaults to "alloy"
24
+ model (str, optional): The model to use. Defaults to "tts-1"
25
+ response_format (str, optional): The format of the audio file. Defaults to "mp3"
26
+ speed (float, optional): The speed of the generated audio. Defaults to 1.0
27
+
28
+ Returns:
29
+ Tuple[Dict[str, Any], List[Dict[str, Any]]]: Tuple containing:
30
+ - Dict with success status and metadata
31
+ - List of file dictionaries with base64 encoded content and metadata
32
+ """
33
+ try:
34
+ # Validate voice
35
+ valid_voices = ["alloy", "echo", "fable", "onyx", "nova", "shimmer"]
36
+ if voice not in valid_voices:
37
+ return (
38
+ {
39
+ "success": False,
40
+ "error": f"Voice {voice} not supported. Choose from: {valid_voices}"
41
+ },
42
+ [] # Empty files list for error case
43
+ )
44
+
45
+ # Validate model
46
+ valid_models = ["tts-1", "tts-1-hd"]
47
+ if not model.endswith(tuple(valid_models)) and model not in valid_models:
48
+ # Allow for provider prefixes like "openai/tts-1"
49
+ model_name = model.split('/')[-1]
50
+ if model_name not in valid_models:
51
+ return (
52
+ {
53
+ "success": False,
54
+ "error": f"Model {model} not supported. Choose from: {valid_models}"
55
+ },
56
+ [] # Empty files list for error case
57
+ )
58
+
59
+ # Create a temporary file to store the audio
60
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{response_format}") as temp_file:
61
+ temp_path = temp_file.name
62
+
63
+ # Generate speech
64
+ response = speech(
65
+ model=model,
66
+ voice=voice,
67
+ input=input,
68
+ response_format=response_format,
69
+ speed=speed
70
+ )
71
+
72
+ # Stream to file
73
+ response.stream_to_file(temp_path)
74
+
75
+ # Read the file content
76
+ with open(temp_path, "rb") as f:
77
+ audio_bytes = f.read()
78
+
79
+ # Clean up the temporary file
80
+ os.unlink(temp_path)
81
+
82
+ # Create a unique filename
83
+ timestamp = uuid.uuid4().hex
84
+ filename = f"speech_{timestamp}.{response_format}"
85
+
86
+ # Determine mime type based on response_format
87
+ mime_type_map = {
88
+ "mp3": "audio/mpeg",
89
+ "opus": "audio/opus",
90
+ "aac": "audio/aac",
91
+ "flac": "audio/flac"
92
+ }
93
+ mime_type = mime_type_map.get(response_format, f"audio/{response_format}")
94
+
95
+ # Create a short description
96
+ description = f"Speech generated from text: '{input[:50]}{'...' if len(input) > 50 else ''}'"
97
+
98
+ # Return tuple with content dict and files list
99
+ return (
100
+ {
101
+ "success": True,
102
+ "description": description,
103
+ },
104
+ [{
105
+ "content": audio_bytes, # Return raw bytes instead of base64 string
106
+ "filename": filename,
107
+ "mime_type": mime_type,
108
+ "description": description,
109
+ "attributes": {
110
+ "voice": voice,
111
+ "model": model,
112
+ "format": response_format,
113
+ "speed": speed,
114
+ "text_length": len(input)
115
+ }
116
+ }]
117
+ )
118
+
119
+ except Exception as e:
120
+ return (
121
+ {
122
+ "success": False,
123
+ "error": str(e)
124
+ },
125
+ [] # Empty files list for error case
126
+ )
127
+
128
+ @weave.op(name="speech-to-text")
129
+ async def speech_to_text(*,
130
+ file_url: str,
131
+ language: str = None,
132
+ prompt: str = None
133
+ ) -> Dict[str, Any]:
134
+ """
135
+ Transcribe speech to text using LiteLLM's transcription API.
136
+
137
+ Args:
138
+ file_url: Full path to the audio file
139
+ language: Optional language code in ISO-639-1 format. If not specified, the model will auto-detect.
140
+ prompt: Optional text to guide the model's style or continue a previous audio segment
141
+
142
+ Returns:
143
+ Dict[str, Any]: Dictionary with transcription results or error
144
+ """
145
+ try:
146
+ # Use the file_url directly as the path
147
+ file_path = Path(file_url)
148
+
149
+ if not file_path.exists():
150
+ raise FileNotFoundError(f"Audio file not found at {file_path}")
151
+
152
+ # Prepare optional parameters
153
+ optional_params = {}
154
+ if language:
155
+ optional_params["language"] = language
156
+ if prompt:
157
+ optional_params["prompt"] = prompt
158
+
159
+ # Open the file and transcribe
160
+ with open(file_path, "rb") as audio_file:
161
+ response = transcription(
162
+ model="whisper-1",
163
+ file=audio_file,
164
+ **optional_params
165
+ )
166
+
167
+ # Extract the transcription text
168
+ if isinstance(response, dict) and "text" in response:
169
+ transcription_text = response["text"]
170
+ elif hasattr(response, "text"):
171
+ transcription_text = response.text
172
+ else:
173
+ transcription_text = str(response)
174
+
175
+ return {
176
+ "success": True,
177
+ "text": transcription_text,
178
+ "details": {
179
+ "model": "whisper-1",
180
+ "language": language,
181
+ "file_url": file_url
182
+ }
183
+ }
184
+
185
+ except Exception as e:
186
+ return {
187
+ "success": False,
188
+ "error": str(e)
189
+ }
190
+
191
+ # Define the tools list in the same format as other tool modules
192
+ TOOLS = [
193
+ {
194
+ "definition": {
195
+ "type": "function",
196
+ "function": {
197
+ "name": "audio-text_to_speech",
198
+ "description": "Convert text to natural-sounding speech using OpenAI's TTS API",
199
+ "parameters": {
200
+ "type": "object",
201
+ "properties": {
202
+ "input": {
203
+ "type": "string",
204
+ "description": "The text to convert to speech (max 4096 characters)"
205
+ },
206
+ "voice": {
207
+ "type": "string",
208
+ "description": "The voice to use for the speech",
209
+ "enum": ["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
210
+ "default": "alloy"
211
+ },
212
+ "model": {
213
+ "type": "string",
214
+ "description": "The model to use for speech generation",
215
+ "enum": ["tts-1", "tts-1-hd"],
216
+ "default": "tts-1"
217
+ },
218
+ "response_format": {
219
+ "type": "string",
220
+ "description": "The format of the audio file",
221
+ "enum": ["mp3", "opus", "aac", "flac"],
222
+ "default": "mp3"
223
+ },
224
+ "speed": {
225
+ "type": "number",
226
+ "description": "The speed of the generated audio (0.25 to 4.0)",
227
+ "minimum": 0.25,
228
+ "maximum": 4.0,
229
+ "default": 1.0
230
+ }
231
+ },
232
+ "required": ["input"]
233
+ }
234
+ }
235
+ },
236
+ "implementation": text_to_speech
237
+ },
238
+ {
239
+ "definition": {
240
+ "type": "function",
241
+ "function": {
242
+ "name": "audio-speech_to_text",
243
+ "description": "Transcribe audio to text using OpenAI's Whisper API",
244
+ "parameters": {
245
+ "type": "object",
246
+ "properties": {
247
+ "file_url": {
248
+ "type": "string",
249
+ "description": "URL or path to the audio file"
250
+ },
251
+ "language": {
252
+ "type": "string",
253
+ "description": "The language of the audio (ISO-639-1 format). If not specified, the model will auto-detect.",
254
+ "default": None
255
+ },
256
+ "prompt": {
257
+ "type": "string",
258
+ "description": "Optional text to guide the model's style or continue a previous audio segment",
259
+ "default": None
260
+ }
261
+ },
262
+ "required": ["file_url"]
263
+ }
264
+ }
265
+ },
266
+ "implementation": speech_to_text
267
+ }
268
+ ]
lye/browser.py ADDED
@@ -0,0 +1,232 @@
1
+ """
2
+ Browser automation tool using browser-use.
3
+
4
+ This tool allows Tyler to control a web browser to perform various tasks.
5
+ """
6
+ import asyncio
7
+ import json
8
+ import weave
9
+ from typing import Dict, Any, Optional, List, Tuple
10
+ from browser_use import Agent as BrowserAgent, Browser, BrowserConfig
11
+ from browser_use.browser.context import BrowserContextConfig
12
+ from langchain_openai import ChatOpenAI
13
+ import os
14
+ from dotenv import load_dotenv
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ @weave.op(name="browser-automate")
20
+ async def browser_automate(*,
21
+ task: str,
22
+ model: str = "gpt-4.1",
23
+ headless: bool = False, # Default to non-headless mode so users can see the browser
24
+ timeout: int = 300) -> Dict[str, Any]:
25
+ """
26
+ Automate browser tasks using browser-use.
27
+
28
+ Args:
29
+ task (str): The task to perform in natural language (e.g., "Go to google.com and search for Browser Use")
30
+ model (str): The model to use for the browser agent (default: "gpt-4.1")
31
+ headless (bool): Whether to run the browser in headless mode (default: False)
32
+ timeout (int): Maximum time in seconds to run the task (default: 300)
33
+
34
+ Returns:
35
+ Dict[str, Any]: The result of the browser automation task
36
+ """
37
+ try:
38
+ # Initialize the LLM
39
+ llm = ChatOpenAI(model=model)
40
+
41
+ # Configure the browser with supported parameters
42
+ # Create context config with settings to improve visibility
43
+ context_config = BrowserContextConfig(
44
+ highlight_elements=True, # Highlight interactive elements with colorful bounding boxes
45
+ wait_for_network_idle_page_load_time=3.0, # Wait longer for page loads to ensure content is visible
46
+ browser_window_size={'width': 1280, 'height': 900} # Set a good window size for visibility
47
+ )
48
+
49
+ # Configure the browser
50
+ browser_config = BrowserConfig(
51
+ headless=headless,
52
+ disable_security=True, # Helps with cross-site iFrames and other functionality
53
+ new_context_config=context_config # Apply our context configuration
54
+ )
55
+
56
+ browser = Browser(config=browser_config)
57
+
58
+ # Initialize the browser agent
59
+ agent = BrowserAgent(
60
+ task=task,
61
+ llm=llm,
62
+ browser=browser
63
+ )
64
+
65
+ # Run the browser agent
66
+ result = await agent.run()
67
+
68
+ # Extract useful information from the result
69
+ summary = "Task completed successfully"
70
+ if hasattr(result, 'all_results') and result.all_results:
71
+ summary = "Actions performed: " + ", ".join([str(r) for r in result.all_results])
72
+ elif hasattr(result, 'output') and result.output:
73
+ summary = result.output
74
+
75
+ # Close the browser
76
+ await browser.close()
77
+
78
+ # Return the result with more detailed information
79
+ return {
80
+ "success": True,
81
+ "summary": summary,
82
+ "result": str(result)
83
+ }
84
+ except Exception as e:
85
+ return {
86
+ "success": False,
87
+ "error": str(e)
88
+ }
89
+
90
+ @weave.op(name="browser-screenshot")
91
+ async def browser_screenshot(*,
92
+ url: str,
93
+ wait_time: int = 3,
94
+ full_page: bool = True) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
95
+ """
96
+ Take a screenshot of a webpage.
97
+
98
+ Args:
99
+ url (str): The URL to take a screenshot of
100
+ wait_time (int): Time to wait in seconds after page load before taking screenshot (default: 3)
101
+ full_page (bool): Whether to capture the full page or just the viewport (default: True)
102
+
103
+ Returns:
104
+ Tuple[Dict[str, Any], List[Dict[str, Any]]]: Tuple containing:
105
+ - Dict with success status and metadata
106
+ - List of file dictionaries with screenshot data
107
+ """
108
+ try:
109
+ # Configure the browser context for screenshots
110
+ context_config = BrowserContextConfig(
111
+ wait_for_network_idle_page_load_time=wait_time, # Use the wait_time parameter
112
+ browser_window_size={'width': 1280, 'height': 900} # Set a good window size for screenshots
113
+ )
114
+
115
+ # Configure the browser in headless mode for screenshots
116
+ browser_config = BrowserConfig(
117
+ headless=True,
118
+ new_context_config=context_config
119
+ )
120
+
121
+ browser = Browser(config=browser_config)
122
+
123
+ # Create a simple task to navigate to the URL and take a screenshot
124
+ task = f"Go to {url} and take a {'full page' if full_page else 'viewport'} screenshot"
125
+
126
+ # Initialize the browser agent
127
+ llm = ChatOpenAI(model="gpt-4.1")
128
+
129
+ agent = BrowserAgent(
130
+ task=task,
131
+ llm=llm,
132
+ browser=browser
133
+ )
134
+
135
+ # Run the browser agent
136
+ result = await agent.run()
137
+
138
+ # Close the browser
139
+ await browser.close()
140
+
141
+ # Extract screenshot from result if available
142
+ # Note: This is a simplified implementation. The actual implementation
143
+ # would need to extract the screenshot from the browser-use result
144
+ # and convert it to the expected format.
145
+
146
+ # For now, return a placeholder
147
+ return (
148
+ {
149
+ "success": True,
150
+ "url": url,
151
+ "full_page": full_page
152
+ },
153
+ [
154
+ {
155
+ "filename": f"screenshot_{url.replace('://', '_').replace('/', '_')}.png",
156
+ "content": "base64_encoded_content_would_go_here",
157
+ "mime_type": "image/png"
158
+ }
159
+ ]
160
+ )
161
+ except Exception as e:
162
+ return (
163
+ {
164
+ "success": False,
165
+ "error": str(e)
166
+ },
167
+ []
168
+ )
169
+
170
+ # Define the tools to be exported
171
+ TOOLS = [
172
+ {
173
+ "definition": {
174
+ "type": "function",
175
+ "function": {
176
+ "name": "browser-automate",
177
+ "description": "Automate browser tasks using browser-use. This tool allows you to control a web browser to perform various tasks like navigating to websites, filling forms, clicking buttons, and more.",
178
+ "parameters": {
179
+ "type": "object",
180
+ "properties": {
181
+ "task": {
182
+ "type": "string",
183
+ "description": "The task to perform in natural language (e.g., 'Go to google.com and search for Browser Use')"
184
+ },
185
+ "model": {
186
+ "type": "string",
187
+ "description": "The model to use for the browser agent (default: 'gpt-4.1')"
188
+ },
189
+ "headless": {
190
+ "type": "boolean",
191
+ "description": "Whether to run the browser in headless mode (default: False)"
192
+ },
193
+ "timeout": {
194
+ "type": "integer",
195
+ "description": "Maximum time in seconds to run the task (default: 300)"
196
+ }
197
+ },
198
+ "required": ["task"]
199
+ }
200
+ }
201
+ },
202
+ "implementation": browser_automate
203
+ },
204
+ {
205
+ "definition": {
206
+ "type": "function",
207
+ "function": {
208
+ "name": "browser-screenshot",
209
+ "description": "Take a screenshot of a webpage using browser-use.",
210
+ "parameters": {
211
+ "type": "object",
212
+ "properties": {
213
+ "url": {
214
+ "type": "string",
215
+ "description": "The URL to take a screenshot of"
216
+ },
217
+ "wait_time": {
218
+ "type": "integer",
219
+ "description": "Time to wait in seconds after page load before taking screenshot (default: 3)"
220
+ },
221
+ "full_page": {
222
+ "type": "boolean",
223
+ "description": "Whether to capture the full page or just the viewport (default: True)"
224
+ }
225
+ },
226
+ "required": ["url"]
227
+ }
228
+ }
229
+ },
230
+ "implementation": browser_screenshot
231
+ }
232
+ ]