semantio 0.0.5__tar.gz → 0.0.6__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. {semantio-0.0.5 → semantio-0.0.6}/PKG-INFO +1 -1
  2. semantio-0.0.6/semantio/__init__.py +4 -0
  3. semantio-0.0.6/semantio/tools/web_browser.py +439 -0
  4. {semantio-0.0.5 → semantio-0.0.6}/semantio.egg-info/PKG-INFO +1 -1
  5. {semantio-0.0.5 → semantio-0.0.6}/setup.py +1 -1
  6. semantio-0.0.5/semantio/tools/__init__.py +0 -0
  7. semantio-0.0.5/semantio/tools/web_browser.py +0 -271
  8. {semantio-0.0.5 → semantio-0.0.6}/LICENSE +0 -0
  9. {semantio-0.0.5 → semantio-0.0.6}/README.md +0 -0
  10. {semantio-0.0.5 → semantio-0.0.6}/semantio/agent.py +0 -0
  11. {semantio-0.0.5/semantio → semantio-0.0.6/semantio/api}/__init__.py +0 -0
  12. {semantio-0.0.5 → semantio-0.0.6}/semantio/api/api_generator.py +0 -0
  13. {semantio-0.0.5 → semantio-0.0.6}/semantio/api/fastapi_app.py +0 -0
  14. {semantio-0.0.5/semantio/api → semantio-0.0.6/semantio/cli}/__init__.py +0 -0
  15. {semantio-0.0.5 → semantio-0.0.6}/semantio/cli/main.py +0 -0
  16. {semantio-0.0.5 → semantio-0.0.6}/semantio/knowledge_base/__init__.py +0 -0
  17. {semantio-0.0.5 → semantio-0.0.6}/semantio/knowledge_base/document_loader.py +0 -0
  18. {semantio-0.0.5 → semantio-0.0.6}/semantio/knowledge_base/retriever.py +0 -0
  19. {semantio-0.0.5 → semantio-0.0.6}/semantio/knowledge_base/vector_store.py +0 -0
  20. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/__init__.py +0 -0
  21. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/anthropic.py +0 -0
  22. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/base_llm.py +0 -0
  23. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/deepseek.py +0 -0
  24. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/gemini.py +0 -0
  25. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/groq.py +0 -0
  26. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/mistral.py +0 -0
  27. {semantio-0.0.5 → semantio-0.0.6}/semantio/llm/openai.py +0 -0
  28. {semantio-0.0.5 → semantio-0.0.6}/semantio/memory.py +0 -0
  29. {semantio-0.0.5 → semantio-0.0.6}/semantio/models.py +0 -0
  30. {semantio-0.0.5 → semantio-0.0.6}/semantio/rag.py +0 -0
  31. {semantio-0.0.5 → semantio-0.0.6}/semantio/storage/__init__.py +0 -0
  32. {semantio-0.0.5 → semantio-0.0.6}/semantio/storage/base_storage.py +0 -0
  33. {semantio-0.0.5 → semantio-0.0.6}/semantio/storage/cloud_storage.py +0 -0
  34. {semantio-0.0.5 → semantio-0.0.6}/semantio/storage/in_memory_storage.py +0 -0
  35. {semantio-0.0.5 → semantio-0.0.6}/semantio/storage/local_storage.py +0 -0
  36. {semantio-0.0.5/semantio/cli → semantio-0.0.6/semantio/tools}/__init__.py +0 -0
  37. {semantio-0.0.5 → semantio-0.0.6}/semantio/tools/base_tool.py +0 -0
  38. {semantio-0.0.5 → semantio-0.0.6}/semantio/tools/crypto.py +0 -0
  39. {semantio-0.0.5 → semantio-0.0.6}/semantio/tools/duckduckgo.py +0 -0
  40. {semantio-0.0.5 → semantio-0.0.6}/semantio/tools/stocks.py +0 -0
  41. {semantio-0.0.5 → semantio-0.0.6}/semantio/utils/__init__.py +0 -0
  42. {semantio-0.0.5 → semantio-0.0.6}/semantio/utils/config.py +0 -0
  43. {semantio-0.0.5 → semantio-0.0.6}/semantio/utils/date_utils.py +0 -0
  44. {semantio-0.0.5 → semantio-0.0.6}/semantio/utils/file_utils.py +0 -0
  45. {semantio-0.0.5 → semantio-0.0.6}/semantio/utils/logger.py +0 -0
  46. {semantio-0.0.5 → semantio-0.0.6}/semantio/utils/validation_utils.py +0 -0
  47. {semantio-0.0.5 → semantio-0.0.6}/semantio.egg-info/SOURCES.txt +0 -0
  48. {semantio-0.0.5 → semantio-0.0.6}/semantio.egg-info/dependency_links.txt +0 -0
  49. {semantio-0.0.5 → semantio-0.0.6}/semantio.egg-info/entry_points.txt +0 -0
  50. {semantio-0.0.5 → semantio-0.0.6}/semantio.egg-info/requires.txt +0 -0
  51. {semantio-0.0.5 → semantio-0.0.6}/semantio.egg-info/top_level.txt +0 -0
  52. {semantio-0.0.5 → semantio-0.0.6}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -0,0 +1,4 @@
1
+ import warnings
2
+
3
+ # Suppress all warnings globally
4
+ warnings.simplefilter("ignore")
@@ -0,0 +1,439 @@
1
+ # web_browser.py
2
+ from typing import Dict, Any, List, Optional, Callable
3
+ from pydantic import Field, BaseModel
4
+ from selenium import webdriver
5
+ from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.common.action_chains import ActionChains
7
+ from selenium.webdriver.remote.webelement import WebElement
8
+ from selenium.webdriver.support.ui import WebDriverWait
9
+ from selenium.webdriver.support import expected_conditions as EC
10
+ from selenium.webdriver.chrome.options import Options
11
+ from selenium.webdriver.chrome.service import Service
12
+ from webdriver_manager.chrome import ChromeDriverManager
13
+ from bs4 import BeautifulSoup
14
+ import json
15
+ import time
16
+ import re
17
+ import logging
18
+ import os
19
+ import difflib
20
+ from .base_tool import BaseTool
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+ class BrowserPlan(BaseModel):
25
+ tasks: List[Dict[str, Any]] = Field(
26
+ ...,
27
+ description="List of automation tasks to execute"
28
+ )
29
+
30
+ class WebBrowserTool(BaseTool):
31
+ name: str = Field("WebBrowser", description="Name of the tool")
32
+ description: str = Field(
33
+ "Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
34
+ description="Tool description"
35
+ )
36
+
37
+ default_timeout: int = 15 # Default wait timeout in seconds
38
+ max_retries: int = 3 # Increased maximum retries for any task
39
+
40
+ def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
41
+ """Execute an advanced dynamic web automation workflow."""
42
+ driver = None
43
+ overall_start = time.time()
44
+ try:
45
+ headless = input.get("headless", False)
46
+ self.default_timeout = int(input.get("timeout", self.default_timeout))
47
+ self.max_retries = int(input.get("max_retries", self.max_retries))
48
+ driver = self._init_browser(headless)
49
+ results = []
50
+ current_url = ""
51
+
52
+ plan = self._generate_plan(input.get('query', ''), current_url)
53
+ if not plan.tasks:
54
+ raise ValueError("No valid tasks in the generated plan.")
55
+
56
+ # Dynamic mapping: action name to handler function.
57
+ action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
58
+ "navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
59
+ "click": lambda d, task: self._handle_click(d, task.get("selector", "")),
60
+ "type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
61
+ "wait": lambda d, task: self._handle_wait(task.get("value", "")),
62
+ "wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
63
+ "scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
64
+ "hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
65
+ "screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
66
+ "switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
67
+ "execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
68
+ "drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
69
+ }
70
+
71
+ for task in plan.tasks:
72
+ # Before each action, dismiss modals/overlays.
73
+ self._dismiss_unwanted_modals(driver)
74
+ action = task.get("action", "").lower()
75
+ logger.info(f"Executing task: {task.get('description', action)}")
76
+ start_time = time.time()
77
+ handler = action_map.get(action)
78
+ if not handler:
79
+ results.append({
80
+ "action": action,
81
+ "success": False,
82
+ "message": f"Unsupported action: {action}"
83
+ })
84
+ continue
85
+
86
+ result = self._execute_with_retries(driver, task, handler)
87
+ elapsed = time.time() - start_time
88
+ result["elapsed"] = elapsed
89
+ logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
90
+ results.append(result)
91
+
92
+ if not result.get('success', False):
93
+ logger.error(f"Task failed: {result.get('message')}")
94
+ self._capture_failure_screenshot(driver, action)
95
+ break
96
+
97
+ current_url = driver.current_url
98
+
99
+ overall_elapsed = time.time() - overall_start
100
+ logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
+ return {"status": "success", "results": results, "total_time": overall_elapsed}
102
+
103
+ except Exception as e:
104
+ logger.exception("Execution error:")
105
+ return {"status": "error", "message": str(e)}
106
+ finally:
107
+ if driver:
108
+ driver.quit()
109
+
110
+ def _init_browser(self, headless: bool) -> webdriver.Chrome:
111
+ """Initialize browser with advanced options."""
112
+ options = Options()
113
+ options.add_argument("--start-maximized")
114
+ options.add_argument("--disable-blink-features=AutomationControlled")
115
+ options.add_experimental_option("excludeSwitches", ["enable-automation"])
116
+ if headless:
117
+ options.add_argument("--headless=new")
118
+ return webdriver.Chrome(
119
+ service=Service(ChromeDriverManager().install()),
120
+ options=options
121
+ )
122
+
123
+ def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
124
+ """Generate an adaptive execution plan using an LLM or other dynamic planner."""
125
+ prompt = f"""Generate browser automation plan for: {query}
126
+
127
+ Current URL: {current_url or 'No page loaded yet'}
128
+
129
+ Required JSON format:
130
+ {{
131
+ "tasks": [
132
+ {{
133
+ "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
134
+ "selector": "CSS selector (optional)",
135
+ "value": "input text/URL/seconds/filename/target-selector",
136
+ "description": "action purpose"
137
+ }}
138
+ ]
139
+ }}
140
+
141
+ Guidelines:
142
+ 1. Prefer IDs in selectors (#element-id) and semantic attributes.
143
+ 2. Include wait steps after navigation and wait for AJAX where applicable.
144
+ 3. Dismiss any modals/pop-ups that are not part of the task.
145
+ 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
146
+ 5. For execute_script, 'value' should contain valid JavaScript.
147
+ 6. For switch_tab, 'value' should be an index or keyword 'new'.
148
+ """
149
+ response = self.llm.generate(prompt=prompt)
150
+ return self._parse_plan(response)
151
+
152
+ def _parse_plan(self, response: str) -> BrowserPlan:
153
+ """Robust JSON parsing with multiple fallback strategies."""
154
+ try:
155
+ json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
156
+ if json_match:
157
+ plan_data = json.loads(json_match.group(1).strip())
158
+ else:
159
+ json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
160
+ if not json_str_match:
161
+ raise ValueError("No JSON object found in the response.")
162
+ plan_data = json.loads(json_str_match.group())
163
+ validated_tasks = []
164
+ for task in plan_data.get("tasks", []):
165
+ if not all(key in task for key in ["action", "description"]):
166
+ logger.warning(f"Skipping task due to missing keys: {task}")
167
+ continue
168
+ validated_tasks.append({
169
+ "action": task["action"],
170
+ "selector": task.get("selector", ""),
171
+ "value": task.get("value", ""),
172
+ "description": task["description"]
173
+ })
174
+ return BrowserPlan(tasks=validated_tasks)
175
+ except (json.JSONDecodeError, AttributeError, ValueError) as e:
176
+ logger.error(f"Plan parsing failed: {e}")
177
+ return BrowserPlan(tasks=[])
178
+
179
+ def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
180
+ handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
181
+ """Execute a task with retry logic and exponential backoff."""
182
+ attempts = 0
183
+ result = {}
184
+ while attempts < self.max_retries:
185
+ result = self._execute_safe_task(driver, task, handler)
186
+ if result.get("success", False):
187
+ return result
188
+ attempts += 1
189
+ logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
190
+ time.sleep(1 * attempts)
191
+ return result
192
+
193
+ def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
194
+ handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
195
+ """Execute a task with comprehensive error handling."""
196
+ try:
197
+ return handler(driver, task)
198
+ except Exception as e:
199
+ action = task.get("action", "unknown")
200
+ logger.exception(f"Error executing task '{action}':")
201
+ return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
202
+
203
+ def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
204
+ """
205
+ Dismiss or remove unwanted modals, overlays, or pop-ups.
206
+ First attempts to click a close button; if not available, removes the element via JS.
207
+ """
208
+ try:
209
+ modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
210
+ for selector in modal_selectors:
211
+ elements = driver.find_elements(By.CSS_SELECTOR, selector)
212
+ for modal in elements:
213
+ if modal.is_displayed():
214
+ close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
215
+ dismissed = False
216
+ for close_sel in close_selectors:
217
+ try:
218
+ close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
219
+ if close_button.is_displayed():
220
+ close_button.click()
221
+ dismissed = True
222
+ logger.info(f"Dismissed modal using selector {close_sel}")
223
+ time.sleep(1)
224
+ break
225
+ except Exception:
226
+ continue
227
+ if not dismissed:
228
+ # Remove overlay by setting display to none
229
+ driver.execute_script("arguments[0].remove();", modal)
230
+ logger.info(f"Removed overlay/modal with selector {selector}")
231
+ except Exception as e:
232
+ logger.debug(f"Modal dismissal error: {e}")
233
+
234
+ def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
235
+ """
236
+ Advanced fallback for finding an element.
237
+ Searches across multiple attributes and inner text using fuzzy matching.
238
+ """
239
+ candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
240
+ best_match = None
241
+ best_ratio = 0.0
242
+ for candidate in candidates:
243
+ combined_text = " ".join([
244
+ candidate.get_attribute("id") or "",
245
+ candidate.get_attribute("name") or "",
246
+ candidate.get_attribute("placeholder") or "",
247
+ candidate.get_attribute("aria-label") or "",
248
+ candidate.text or "",
249
+ ])
250
+ ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
251
+ if ratio > best_ratio:
252
+ best_ratio = ratio
253
+ best_match = candidate
254
+ if best_ratio > 0.5:
255
+ logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
256
+ return best_match
257
+ return None
258
+
259
+ def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
260
+ """Handle navigation with URL correction."""
261
+ if not url.startswith(("http://", "https://")):
262
+ url = f"https://{url}"
263
+ try:
264
+ driver.get(url)
265
+ WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
266
+ return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
267
+ except Exception as e:
268
+ logger.error(f"Navigation to {url} failed: {e}")
269
+ return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
270
+
271
+ def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
272
+ """Handle click actions with fallback using JS if needed."""
273
+ try:
274
+ element = WebDriverWait(driver, self.default_timeout).until(
275
+ EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
276
+ )
277
+ driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
278
+ try:
279
+ element.click()
280
+ except Exception:
281
+ driver.execute_script("arguments[0].click();", element)
282
+ return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
283
+ except Exception as e:
284
+ logger.error(f"Click action failed on selector {selector}: {e}")
285
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
286
+
287
+ def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
288
+ """
289
+ Handle typing into an element.
290
+ If the primary selector fails, attempt advanced fallback detection.
291
+ """
292
+ try:
293
+ element = WebDriverWait(driver, self.default_timeout).until(
294
+ EC.presence_of_element_located((By.CSS_SELECTOR, selector))
295
+ )
296
+ except Exception as e:
297
+ # If the task seems to involve search or similar text, use advanced fallback.
298
+ if "search" in task.get("description", "").lower() or "search" in selector.lower():
299
+ logger.info("Primary selector failed; using advanced fallback for element detection.")
300
+ element = self._advanced_find_element(driver, "search")
301
+ if not element:
302
+ return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
303
+ else:
304
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
305
+ try:
306
+ element.clear()
307
+ element.send_keys(text)
308
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
309
+ except Exception as e:
310
+ logger.error(f"Typing action failed: {e}")
311
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
312
+
313
+ def _handle_wait(self, seconds: str) -> Dict[str, Any]:
314
+ """Handle a simple wait."""
315
+ try:
316
+ wait_time = float(seconds)
317
+ logger.info(f"Waiting for {wait_time} seconds")
318
+ time.sleep(wait_time)
319
+ return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
320
+ except ValueError as e:
321
+ logger.error(f"Invalid wait time provided: {seconds}")
322
+ return {"action": "wait", "success": False, "message": "Invalid wait time"}
323
+
324
+ def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
325
+ """
326
+ Wait until AJAX/network activity has subsided.
327
+ This implementation first checks for jQuery, then falls back to a generic check.
328
+ """
329
+ try:
330
+ timeout = int(seconds)
331
+ logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
332
+ end_time = time.time() + timeout
333
+ while time.time() < end_time:
334
+ ajax_complete = driver.execute_script("""
335
+ return (window.jQuery ? jQuery.active === 0 : true) &&
336
+ (typeof window.fetch === 'function' ? true : true);
337
+ """)
338
+ if ajax_complete:
339
+ break
340
+ time.sleep(0.5)
341
+ return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
342
+ except Exception as e:
343
+ logger.error(f"Wait for AJAX failed: {e}")
344
+ return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
345
+
346
+ def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
347
+ """Handle scrolling to a specific element or page bottom."""
348
+ try:
349
+ if selector:
350
+ element = WebDriverWait(driver, self.default_timeout).until(
351
+ EC.presence_of_element_located((By.CSS_SELECTOR, selector))
352
+ )
353
+ driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
354
+ scroll_target = selector
355
+ else:
356
+ driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
357
+ scroll_target = "page bottom"
358
+ return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
359
+ except Exception as e:
360
+ logger.error(f"Scroll action failed on selector {selector}: {e}")
361
+ return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
362
+
363
+ def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
364
+ """Handle mouse hover action."""
365
+ try:
366
+ element = WebDriverWait(driver, self.default_timeout).until(
367
+ EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
368
+ )
369
+ ActionChains(driver).move_to_element(element).perform()
370
+ return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
371
+ except Exception as e:
372
+ logger.error(f"Hover action failed on selector {selector}: {e}")
373
+ return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
374
+
375
+ def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
376
+ """Capture a screenshot of the current browser state."""
377
+ try:
378
+ driver.save_screenshot(filename)
379
+ return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
380
+ except Exception as e:
381
+ logger.error(f"Screenshot capture failed: {e}")
382
+ return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
383
+
384
+ def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
385
+ """
386
+ Switch between tabs. 'value' can be an index or the keyword 'new'.
387
+ """
388
+ try:
389
+ handles = driver.window_handles
390
+ if value.lower() == "new":
391
+ target_handle = handles[-1]
392
+ else:
393
+ idx = int(value)
394
+ if idx < len(handles):
395
+ target_handle = handles[idx]
396
+ else:
397
+ return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
398
+ driver.switch_to.window(target_handle)
399
+ return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
400
+ except Exception as e:
401
+ logger.error(f"Switch tab failed: {e}")
402
+ return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
403
+
404
+ def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
405
+ """
406
+ Execute arbitrary JavaScript code.
407
+ """
408
+ try:
409
+ result = driver.execute_script(script)
410
+ return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
411
+ except Exception as e:
412
+ logger.error(f"Execute script failed: {e}")
413
+ return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
414
+
415
+ def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
416
+ """
417
+ Simulate a drag-and-drop operation.
418
+ """
419
+ try:
420
+ source = WebDriverWait(driver, self.default_timeout).until(
421
+ EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
422
+ )
423
+ target = WebDriverWait(driver, self.default_timeout).until(
424
+ EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
425
+ )
426
+ ActionChains(driver).drag_and_drop(source, target).perform()
427
+ return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
428
+ except Exception as e:
429
+ logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
430
+ return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
431
+
432
+ def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
433
+ """Capture a screenshot for debugging when an error occurs."""
434
+ filename = f"failure_{action}_{int(time.time())}.png"
435
+ try:
436
+ driver.save_screenshot(filename)
437
+ logger.info(f"Failure screenshot captured: {filename}")
438
+ except Exception as e:
439
+ logger.error(f"Failed to capture screenshot: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="semantio",
5
- version="0.0.5",
5
+ version="0.0.6",
6
6
  description="A powerful SDK for building AI agents",
7
7
  long_description=open("README.md").read(),
8
8
  long_description_content_type="text/markdown",
File without changes
@@ -1,271 +0,0 @@
1
- # web_browser.py
2
- from typing import Dict, Any, List, Optional
3
- from pydantic import Field, BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.support.ui import WebDriverWait
7
- from selenium.webdriver.support import expected_conditions as EC
8
- from selenium.webdriver.chrome.options import Options
9
- from selenium.webdriver.chrome.service import Service
10
- from webdriver_manager.chrome import ChromeDriverManager
11
- from bs4 import BeautifulSoup
12
- import json
13
- import time
14
- import re
15
- import logging
16
- from .base_tool import BaseTool
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- class BrowserPlan(BaseModel):
21
- tasks: List[Dict[str, Any]] = Field(
22
- ...,
23
- description="List of automation tasks to execute"
24
- )
25
-
26
- class WebBrowserTool(BaseTool):
27
- name: str = Field("WebBrowser", description="Name of the tool")
28
- description: str = Field(
29
- "Universal web automation tool for dynamic website interactions",
30
- description="Tool description"
31
- )
32
-
33
- def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
34
- """Execute dynamic web automation workflow"""
35
- driver = None
36
- try:
37
- driver = self._init_browser(input.get("headless", False))
38
- results = []
39
- current_url = ""
40
-
41
- # Generate initial plan
42
- plan = self._generate_plan(input['query'], current_url)
43
-
44
- for task in plan.tasks:
45
- result = self._execute_safe_task(driver, task)
46
- results.append(result)
47
-
48
- if not result['success']:
49
- break
50
-
51
- # Update context for next tasks
52
- current_url = driver.current_url
53
-
54
- return {"status": "success", "results": results}
55
-
56
- except Exception as e:
57
- return {"status": "error", "message": str(e)}
58
- finally:
59
- if driver:
60
- driver.quit()
61
-
62
- def _init_browser(self, headless: bool) -> webdriver.Chrome:
63
- """Initialize browser with advanced options"""
64
- options = Options()
65
- options.add_argument("--start-maximized")
66
- options.add_argument("--disable-blink-features=AutomationControlled")
67
- options.add_experimental_option("excludeSwitches", ["enable-automation"])
68
-
69
- if headless:
70
- options.add_argument("--headless=new")
71
-
72
- return webdriver.Chrome(
73
- service=Service(ChromeDriverManager().install()),
74
- options=options
75
- )
76
-
77
- def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
78
- """Generate adaptive execution plan using LLM"""
79
- prompt = f"""Generate browser automation plan for: {query}
80
-
81
- Current URL: {current_url or 'No page loaded yet'}
82
-
83
- Required JSON format:
84
- {{
85
- "tasks": [
86
- {{
87
- "action": "navigate|click|type|wait|scroll",
88
- "selector": "CSS selector (optional)",
89
- "value": "input text/URL/seconds",
90
- "description": "action purpose"
91
- }}
92
- ]
93
- }}
94
-
95
- Guidelines:
96
- 1. Prefer IDs in selectors (#element-id)
97
- 2. Use semantic attributes (aria-label, name)
98
- 3. Include wait steps after navigation
99
- 4. Prioritize visible elements
100
- 5. Add scroll steps for hidden elements
101
- """
102
-
103
- response = self.llm.generate(prompt=prompt)
104
- return self._parse_plan(response)
105
-
106
- def _parse_plan(self, response: str) -> BrowserPlan:
107
- """Robust JSON parsing with multiple fallback strategies"""
108
- try:
109
- # Try extracting JSON from markdown code block
110
- json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
111
- if json_match:
112
- plan_data = json.loads(json_match.group(1).strip())
113
- else:
114
- # Fallback to extract first JSON object
115
- json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
116
- plan_data = json.loads(json_str)
117
-
118
- # Validate tasks structure
119
- validated_tasks = []
120
- for task in plan_data.get("tasks", []):
121
- if not all(key in task for key in ["action", "description"]):
122
- continue
123
- validated_tasks.append({
124
- "action": task["action"],
125
- "selector": task.get("selector", ""),
126
- "value": task.get("value", ""),
127
- "description": task["description"]
128
- })
129
-
130
- return BrowserPlan(tasks=validated_tasks)
131
-
132
- except (json.JSONDecodeError, AttributeError) as e:
133
- logger.error(f"Plan parsing failed: {e}")
134
- return BrowserPlan(tasks=[])
135
-
136
- def _execute_safe_task(self, driver, task: Dict) -> Dict[str, Any]:
137
- """Execute task with comprehensive error handling"""
138
- try:
139
- action = task["action"].lower()
140
- selector = task.get("selector", "")
141
- value = task.get("value", "")
142
-
143
- if action == "navigate":
144
- return self._handle_navigation(driver, value)
145
-
146
- elif action == "click":
147
- return self._handle_click(driver, selector)
148
-
149
- elif action == "type":
150
- return self._handle_typing(driver, selector, value)
151
-
152
- elif action == "wait":
153
- return self._handle_wait(value)
154
-
155
- elif action == "scroll":
156
- return self._handle_scroll(driver, selector)
157
-
158
- return {
159
- "action": action,
160
- "success": False,
161
- "message": f"Unsupported action: {action}"
162
- }
163
-
164
- except Exception as e:
165
- return {
166
- "action": action,
167
- "success": False,
168
- "message": f"Critical error: {str(e)}"
169
- }
170
-
171
- def _handle_navigation(self, driver, url: str) -> Dict[str, Any]:
172
- """Smart navigation handler"""
173
- if not url.startswith(("http://", "https://")):
174
- url = f"https://{url}"
175
-
176
- try:
177
- driver.get(url)
178
- WebDriverWait(driver, 15).until(
179
- EC.presence_of_element_located((By.TAG_NAME, "body"))
180
- )
181
- return {
182
- "action": "navigate",
183
- "success": True,
184
- "message": f"Navigated to {url}"
185
- }
186
- except Exception as e:
187
- return {
188
- "action": "navigate",
189
- "success": False,
190
- "message": f"Navigation failed: {str(e)}"
191
- }
192
-
193
- def _handle_click(self, driver, selector: str) -> Dict[str, Any]:
194
- """Dynamic click handler"""
195
- try:
196
- element = WebDriverWait(driver, 15).until(
197
- EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
198
- )
199
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
200
- element.click()
201
- return {
202
- "action": "click",
203
- "success": True,
204
- "message": f"Clicked element: {selector}"
205
- }
206
- except Exception as e:
207
- return {
208
- "action": "click",
209
- "success": False,
210
- "message": f"Click failed: {str(e)}"
211
- }
212
-
213
- def _handle_typing(self, driver, selector: str, text: str) -> Dict[str, Any]:
214
- """Universal typing handler"""
215
- try:
216
- element = WebDriverWait(driver, 15).until(
217
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
218
- )
219
- element.clear()
220
- element.send_keys(text)
221
- return {
222
- "action": "type",
223
- "success": True,
224
- "message": f"Typed '{text}' into {selector}"
225
- }
226
- except Exception as e:
227
- return {
228
- "action": "type",
229
- "success": False,
230
- "message": f"Typing failed: {str(e)}"
231
- }
232
-
233
- def _handle_wait(self, seconds: str) -> Dict[str, Any]:
234
- """Configurable wait handler"""
235
- try:
236
- wait_time = float(seconds)
237
- time.sleep(wait_time)
238
- return {
239
- "action": "wait",
240
- "success": True,
241
- "message": f"Waited {wait_time} seconds"
242
- }
243
- except ValueError:
244
- return {
245
- "action": "wait",
246
- "success": False,
247
- "message": "Invalid wait time"
248
- }
249
-
250
- def _handle_scroll(self, driver, selector: str) -> Dict[str, Any]:
251
- """Smart scroll handler"""
252
- try:
253
- if selector:
254
- element = WebDriverWait(driver, 15).until(
255
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
256
- )
257
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
258
- else:
259
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
260
-
261
- return {
262
- "action": "scroll",
263
- "success": True,
264
- "message": f"Scrolled to {selector or 'page bottom'}"
265
- }
266
- except Exception as e:
267
- return {
268
- "action": "scroll",
269
- "success": False,
270
- "message": f"Scroll failed: {str(e)}"
271
- }
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes