semantio 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
semantio/__init__.py CHANGED
@@ -0,0 +1,4 @@
1
+ import warnings
2
+
3
+ # Suppress all warnings globally
4
+ warnings.simplefilter("ignore")
@@ -1,8 +1,10 @@
1
1
  # web_browser.py
2
- from typing import Dict, Any, List, Optional
2
+ from typing import Dict, Any, List, Optional, Callable
3
3
  from pydantic import Field, BaseModel
4
4
  from selenium import webdriver
5
5
  from selenium.webdriver.common.by import By
6
+ from selenium.webdriver.common.action_chains import ActionChains
7
+ from selenium.webdriver.remote.webelement import WebElement
6
8
  from selenium.webdriver.support.ui import WebDriverWait
7
9
  from selenium.webdriver.support import expected_conditions as EC
8
10
  from selenium.webdriver.chrome.options import Options
@@ -13,6 +15,8 @@ import json
13
15
  import time
14
16
  import re
15
17
  import logging
18
+ import os
19
+ import difflib
16
20
  from .base_tool import BaseTool
17
21
 
18
22
  logger = logging.getLogger(__name__)
@@ -26,99 +30,140 @@ class BrowserPlan(BaseModel):
26
30
  class WebBrowserTool(BaseTool):
27
31
  name: str = Field("WebBrowser", description="Name of the tool")
28
32
  description: str = Field(
29
- "Universal web automation tool for dynamic website interactions",
33
+ "Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
30
34
  description="Tool description"
31
35
  )
32
36
 
37
+ default_timeout: int = 15 # Default wait timeout in seconds
38
+ max_retries: int = 3 # Increased maximum retries for any task
39
+
33
40
  def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
34
- """Execute dynamic web automation workflow"""
41
+ """Execute an advanced dynamic web automation workflow."""
35
42
  driver = None
43
+ overall_start = time.time()
36
44
  try:
37
- driver = self._init_browser(input.get("headless", False))
45
+ headless = input.get("headless", False)
46
+ self.default_timeout = int(input.get("timeout", self.default_timeout))
47
+ self.max_retries = int(input.get("max_retries", self.max_retries))
48
+ driver = self._init_browser(headless)
38
49
  results = []
39
50
  current_url = ""
40
51
 
41
- # Generate initial plan
42
- plan = self._generate_plan(input['query'], current_url)
52
+ plan = self._generate_plan(input.get('query', ''), current_url)
53
+ if not plan.tasks:
54
+ raise ValueError("No valid tasks in the generated plan.")
55
+
56
+ # Dynamic mapping: action name to handler function.
57
+ action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
58
+ "navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
59
+ "click": lambda d, task: self._handle_click(d, task.get("selector", "")),
60
+ "type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
61
+ "wait": lambda d, task: self._handle_wait(task.get("value", "")),
62
+ "wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
63
+ "scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
64
+ "hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
65
+ "screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
66
+ "switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
67
+ "execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
68
+ "drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
69
+ }
43
70
 
44
71
  for task in plan.tasks:
45
- result = self._execute_safe_task(driver, task)
72
+ # Before each action, dismiss modals/overlays.
73
+ self._dismiss_unwanted_modals(driver)
74
+ action = task.get("action", "").lower()
75
+ logger.info(f"Executing task: {task.get('description', action)}")
76
+ start_time = time.time()
77
+ handler = action_map.get(action)
78
+ if not handler:
79
+ results.append({
80
+ "action": action,
81
+ "success": False,
82
+ "message": f"Unsupported action: {action}"
83
+ })
84
+ continue
85
+
86
+ result = self._execute_with_retries(driver, task, handler)
87
+ elapsed = time.time() - start_time
88
+ result["elapsed"] = elapsed
89
+ logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
46
90
  results.append(result)
47
-
48
- if not result['success']:
91
+
92
+ if not result.get('success', False):
93
+ logger.error(f"Task failed: {result.get('message')}")
94
+ self._capture_failure_screenshot(driver, action)
49
95
  break
50
-
51
- # Update context for next tasks
96
+
52
97
  current_url = driver.current_url
53
98
 
54
- return {"status": "success", "results": results}
99
+ overall_elapsed = time.time() - overall_start
100
+ logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
+ return {"status": "success", "results": results, "total_time": overall_elapsed}
55
102
 
56
103
  except Exception as e:
104
+ logger.exception("Execution error:")
57
105
  return {"status": "error", "message": str(e)}
58
106
  finally:
59
107
  if driver:
60
108
  driver.quit()
61
109
 
62
110
  def _init_browser(self, headless: bool) -> webdriver.Chrome:
63
- """Initialize browser with advanced options"""
111
+ """Initialize browser with advanced options."""
64
112
  options = Options()
65
113
  options.add_argument("--start-maximized")
66
114
  options.add_argument("--disable-blink-features=AutomationControlled")
67
115
  options.add_experimental_option("excludeSwitches", ["enable-automation"])
68
-
69
116
  if headless:
70
117
  options.add_argument("--headless=new")
71
-
72
118
  return webdriver.Chrome(
73
119
  service=Service(ChromeDriverManager().install()),
74
120
  options=options
75
121
  )
76
122
 
77
123
  def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
78
- """Generate adaptive execution plan using LLM"""
124
+ """Generate an adaptive execution plan using an LLM or other dynamic planner."""
79
125
  prompt = f"""Generate browser automation plan for: {query}
80
-
81
- Current URL: {current_url or 'No page loaded yet'}
82
-
83
- Required JSON format:
126
+
127
+ Current URL: {current_url or 'No page loaded yet'}
128
+
129
+ Required JSON format:
130
+ {{
131
+ "tasks": [
84
132
  {{
85
- "tasks": [
86
- {{
87
- "action": "navigate|click|type|wait|scroll",
88
- "selector": "CSS selector (optional)",
89
- "value": "input text/URL/seconds",
90
- "description": "action purpose"
91
- }}
92
- ]
133
+ "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
134
+ "selector": "CSS selector (optional)",
135
+ "value": "input text/URL/seconds/filename/target-selector",
136
+ "description": "action purpose"
93
137
  }}
94
-
95
- Guidelines:
96
- 1. Prefer IDs in selectors (#element-id)
97
- 2. Use semantic attributes (aria-label, name)
98
- 3. Include wait steps after navigation
99
- 4. Prioritize visible elements
100
- 5. Add scroll steps for hidden elements
101
- """
102
-
138
+ ]
139
+ }}
140
+
141
+ Guidelines:
142
+ 1. Prefer IDs in selectors (#element-id) and semantic attributes.
143
+ 2. Include wait steps after navigation and wait for AJAX where applicable.
144
+ 3. Dismiss any modals/pop-ups that are not part of the task.
145
+ 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
146
+ 5. For execute_script, 'value' should contain valid JavaScript.
147
+ 6. For switch_tab, 'value' should be an index or keyword 'new'.
148
+ """
103
149
  response = self.llm.generate(prompt=prompt)
104
150
  return self._parse_plan(response)
105
151
 
106
152
  def _parse_plan(self, response: str) -> BrowserPlan:
107
- """Robust JSON parsing with multiple fallback strategies"""
153
+ """Robust JSON parsing with multiple fallback strategies."""
108
154
  try:
109
- # Try extracting JSON from markdown code block
110
155
  json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
111
156
  if json_match:
112
157
  plan_data = json.loads(json_match.group(1).strip())
113
158
  else:
114
- # Fallback to extract first JSON object
115
- json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
116
- plan_data = json.loads(json_str)
117
-
118
- # Validate tasks structure
159
+ json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
160
+ if not json_str_match:
161
+ raise ValueError("No JSON object found in the response.")
162
+ plan_data = json.loads(json_str_match.group())
119
163
  validated_tasks = []
120
164
  for task in plan_data.get("tasks", []):
121
165
  if not all(key in task for key in ["action", "description"]):
166
+ logger.warning(f"Skipping task due to missing keys: {task}")
122
167
  continue
123
168
  validated_tasks.append({
124
169
  "action": task["action"],
@@ -126,146 +171,269 @@ class WebBrowserTool(BaseTool):
126
171
  "value": task.get("value", ""),
127
172
  "description": task["description"]
128
173
  })
129
-
130
174
  return BrowserPlan(tasks=validated_tasks)
131
-
132
- except (json.JSONDecodeError, AttributeError) as e:
175
+ except (json.JSONDecodeError, AttributeError, ValueError) as e:
133
176
  logger.error(f"Plan parsing failed: {e}")
134
177
  return BrowserPlan(tasks=[])
135
178
 
136
- def _execute_safe_task(self, driver, task: Dict) -> Dict[str, Any]:
137
- """Execute task with comprehensive error handling"""
179
+ def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
180
+ handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
181
+ """Execute a task with retry logic and exponential backoff."""
182
+ attempts = 0
183
+ result = {}
184
+ while attempts < self.max_retries:
185
+ result = self._execute_safe_task(driver, task, handler)
186
+ if result.get("success", False):
187
+ return result
188
+ attempts += 1
189
+ logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
190
+ time.sleep(1 * attempts)
191
+ return result
192
+
193
+ def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
194
+ handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
195
+ """Execute a task with comprehensive error handling."""
138
196
  try:
139
- action = task["action"].lower()
140
- selector = task.get("selector", "")
141
- value = task.get("value", "")
142
-
143
- if action == "navigate":
144
- return self._handle_navigation(driver, value)
145
-
146
- elif action == "click":
147
- return self._handle_click(driver, selector)
148
-
149
- elif action == "type":
150
- return self._handle_typing(driver, selector, value)
151
-
152
- elif action == "wait":
153
- return self._handle_wait(value)
154
-
155
- elif action == "scroll":
156
- return self._handle_scroll(driver, selector)
157
-
158
- return {
159
- "action": action,
160
- "success": False,
161
- "message": f"Unsupported action: {action}"
162
- }
163
-
197
+ return handler(driver, task)
164
198
  except Exception as e:
165
- return {
166
- "action": action,
167
- "success": False,
168
- "message": f"Critical error: {str(e)}"
169
- }
199
+ action = task.get("action", "unknown")
200
+ logger.exception(f"Error executing task '{action}':")
201
+ return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
170
202
 
171
- def _handle_navigation(self, driver, url: str) -> Dict[str, Any]:
172
- """Smart navigation handler"""
203
+ def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
204
+ """
205
+ Dismiss or remove unwanted modals, overlays, or pop-ups.
206
+ First attempts to click a close button; if not available, removes the element via JS.
207
+ """
208
+ try:
209
+ modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
210
+ for selector in modal_selectors:
211
+ elements = driver.find_elements(By.CSS_SELECTOR, selector)
212
+ for modal in elements:
213
+ if modal.is_displayed():
214
+ close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
215
+ dismissed = False
216
+ for close_sel in close_selectors:
217
+ try:
218
+ close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
219
+ if close_button.is_displayed():
220
+ close_button.click()
221
+ dismissed = True
222
+ logger.info(f"Dismissed modal using selector {close_sel}")
223
+ time.sleep(1)
224
+ break
225
+ except Exception:
226
+ continue
227
+ if not dismissed:
228
+ # Remove overlay by setting display to none
229
+ driver.execute_script("arguments[0].remove();", modal)
230
+ logger.info(f"Removed overlay/modal with selector {selector}")
231
+ except Exception as e:
232
+ logger.debug(f"Modal dismissal error: {e}")
233
+
234
+ def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
235
+ """
236
+ Advanced fallback for finding an element.
237
+ Searches across multiple attributes and inner text using fuzzy matching.
238
+ """
239
+ candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
240
+ best_match = None
241
+ best_ratio = 0.0
242
+ for candidate in candidates:
243
+ combined_text = " ".join([
244
+ candidate.get_attribute("id") or "",
245
+ candidate.get_attribute("name") or "",
246
+ candidate.get_attribute("placeholder") or "",
247
+ candidate.get_attribute("aria-label") or "",
248
+ candidate.text or "",
249
+ ])
250
+ ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
251
+ if ratio > best_ratio:
252
+ best_ratio = ratio
253
+ best_match = candidate
254
+ if best_ratio > 0.5:
255
+ logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
256
+ return best_match
257
+ return None
258
+
259
+ def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
260
+ """Handle navigation with URL correction."""
173
261
  if not url.startswith(("http://", "https://")):
174
262
  url = f"https://{url}"
175
-
176
263
  try:
177
264
  driver.get(url)
178
- WebDriverWait(driver, 15).until(
179
- EC.presence_of_element_located((By.TAG_NAME, "body"))
180
- )
181
- return {
182
- "action": "navigate",
183
- "success": True,
184
- "message": f"Navigated to {url}"
185
- }
265
+ WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
266
+ return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
186
267
  except Exception as e:
187
- return {
188
- "action": "navigate",
189
- "success": False,
190
- "message": f"Navigation failed: {str(e)}"
191
- }
268
+ logger.error(f"Navigation to {url} failed: {e}")
269
+ return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
192
270
 
193
- def _handle_click(self, driver, selector: str) -> Dict[str, Any]:
194
- """Dynamic click handler"""
271
+ def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
272
+ """Handle click actions with fallback using JS if needed."""
195
273
  try:
196
- element = WebDriverWait(driver, 15).until(
274
+ element = WebDriverWait(driver, self.default_timeout).until(
197
275
  EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
198
276
  )
199
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
200
- element.click()
201
- return {
202
- "action": "click",
203
- "success": True,
204
- "message": f"Clicked element: {selector}"
205
- }
277
+ driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
278
+ try:
279
+ element.click()
280
+ except Exception:
281
+ driver.execute_script("arguments[0].click();", element)
282
+ return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
206
283
  except Exception as e:
207
- return {
208
- "action": "click",
209
- "success": False,
210
- "message": f"Click failed: {str(e)}"
211
- }
284
+ logger.error(f"Click action failed on selector {selector}: {e}")
285
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
212
286
 
213
- def _handle_typing(self, driver, selector: str, text: str) -> Dict[str, Any]:
214
- """Universal typing handler"""
287
+ def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
288
+ """
289
+ Handle typing into an element.
290
+ If the primary selector fails, attempt advanced fallback detection.
291
+ """
215
292
  try:
216
- element = WebDriverWait(driver, 15).until(
293
+ element = WebDriverWait(driver, self.default_timeout).until(
217
294
  EC.presence_of_element_located((By.CSS_SELECTOR, selector))
218
295
  )
296
+ except Exception as e:
297
+ # If the task seems to involve search or similar text, use advanced fallback.
298
+ if "search" in task.get("description", "").lower() or "search" in selector.lower():
299
+ logger.info("Primary selector failed; using advanced fallback for element detection.")
300
+ element = self._advanced_find_element(driver, "search")
301
+ if not element:
302
+ return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
303
+ else:
304
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
305
+ try:
219
306
  element.clear()
220
307
  element.send_keys(text)
221
- return {
222
- "action": "type",
223
- "success": True,
224
- "message": f"Typed '{text}' into {selector}"
225
- }
308
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
226
309
  except Exception as e:
227
- return {
228
- "action": "type",
229
- "success": False,
230
- "message": f"Typing failed: {str(e)}"
231
- }
310
+ logger.error(f"Typing action failed: {e}")
311
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
232
312
 
233
313
  def _handle_wait(self, seconds: str) -> Dict[str, Any]:
234
- """Configurable wait handler"""
314
+ """Handle a simple wait."""
235
315
  try:
236
316
  wait_time = float(seconds)
317
+ logger.info(f"Waiting for {wait_time} seconds")
237
318
  time.sleep(wait_time)
238
- return {
239
- "action": "wait",
240
- "success": True,
241
- "message": f"Waited {wait_time} seconds"
242
- }
243
- except ValueError:
244
- return {
245
- "action": "wait",
246
- "success": False,
247
- "message": "Invalid wait time"
248
- }
319
+ return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
320
+ except ValueError as e:
321
+ logger.error(f"Invalid wait time provided: {seconds}")
322
+ return {"action": "wait", "success": False, "message": "Invalid wait time"}
323
+
324
+ def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
325
+ """
326
+ Wait until AJAX/network activity has subsided.
327
+ This implementation first checks for jQuery, then falls back to a generic check.
328
+ """
329
+ try:
330
+ timeout = int(seconds)
331
+ logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
332
+ end_time = time.time() + timeout
333
+ while time.time() < end_time:
334
+ ajax_complete = driver.execute_script("""
335
+ return (window.jQuery ? jQuery.active === 0 : true) &&
336
+ (typeof window.fetch === 'function' ? true : true);
337
+ """)
338
+ if ajax_complete:
339
+ break
340
+ time.sleep(0.5)
341
+ return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
342
+ except Exception as e:
343
+ logger.error(f"Wait for AJAX failed: {e}")
344
+ return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
249
345
 
250
- def _handle_scroll(self, driver, selector: str) -> Dict[str, Any]:
251
- """Smart scroll handler"""
346
+ def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
347
+ """Handle scrolling to a specific element or page bottom."""
252
348
  try:
253
349
  if selector:
254
- element = WebDriverWait(driver, 15).until(
350
+ element = WebDriverWait(driver, self.default_timeout).until(
255
351
  EC.presence_of_element_located((By.CSS_SELECTOR, selector))
256
352
  )
257
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
353
+ driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
354
+ scroll_target = selector
258
355
  else:
259
356
  driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
260
-
261
- return {
262
- "action": "scroll",
263
- "success": True,
264
- "message": f"Scrolled to {selector or 'page bottom'}"
265
- }
357
+ scroll_target = "page bottom"
358
+ return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
359
+ except Exception as e:
360
+ logger.error(f"Scroll action failed on selector {selector}: {e}")
361
+ return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
362
+
363
+ def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
364
+ """Handle mouse hover action."""
365
+ try:
366
+ element = WebDriverWait(driver, self.default_timeout).until(
367
+ EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
368
+ )
369
+ ActionChains(driver).move_to_element(element).perform()
370
+ return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
371
+ except Exception as e:
372
+ logger.error(f"Hover action failed on selector {selector}: {e}")
373
+ return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
374
+
375
+ def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
376
+ """Capture a screenshot of the current browser state."""
377
+ try:
378
+ driver.save_screenshot(filename)
379
+ return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
380
+ except Exception as e:
381
+ logger.error(f"Screenshot capture failed: {e}")
382
+ return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
383
+
384
+ def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
385
+ """
386
+ Switch between tabs. 'value' can be an index or the keyword 'new'.
387
+ """
388
+ try:
389
+ handles = driver.window_handles
390
+ if value.lower() == "new":
391
+ target_handle = handles[-1]
392
+ else:
393
+ idx = int(value)
394
+ if idx < len(handles):
395
+ target_handle = handles[idx]
396
+ else:
397
+ return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
398
+ driver.switch_to.window(target_handle)
399
+ return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
400
+ except Exception as e:
401
+ logger.error(f"Switch tab failed: {e}")
402
+ return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
403
+
404
+ def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
405
+ """
406
+ Execute arbitrary JavaScript code.
407
+ """
408
+ try:
409
+ result = driver.execute_script(script)
410
+ return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
411
+ except Exception as e:
412
+ logger.error(f"Execute script failed: {e}")
413
+ return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
414
+
415
+ def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
416
+ """
417
+ Simulate a drag-and-drop operation.
418
+ """
419
+ try:
420
+ source = WebDriverWait(driver, self.default_timeout).until(
421
+ EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
422
+ )
423
+ target = WebDriverWait(driver, self.default_timeout).until(
424
+ EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
425
+ )
426
+ ActionChains(driver).drag_and_drop(source, target).perform()
427
+ return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
428
+ except Exception as e:
429
+ logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
430
+ return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
431
+
432
+ def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
433
+ """Capture a screenshot for debugging when an error occurs."""
434
+ filename = f"failure_{action}_{int(time.time())}.png"
435
+ try:
436
+ driver.save_screenshot(filename)
437
+ logger.info(f"Failure screenshot captured: {filename}")
266
438
  except Exception as e:
267
- return {
268
- "action": "scroll",
269
- "success": False,
270
- "message": f"Scroll failed: {str(e)}"
271
- }
439
+ logger.error(f"Failed to capture screenshot: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.5
3
+ Version: 0.0.6
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -1,4 +1,4 @@
1
- semantio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1
+ semantio/__init__.py,sha256=RIeSI07dGyWBK-STKIk4IeB4bkn_3-QEKQklzSvR7hQ,82
2
2
  semantio/agent.py,sha256=uPFz1WP2eb-z-tryQOX8necS8_tv4Il6qxNmZux9hNk,31709
3
3
  semantio/memory.py,sha256=en9n3UySnj4rA0x3uR1sEdEzA7EkboQNbEHQ5KuEehw,2115
4
4
  semantio/models.py,sha256=7hmP-F_aSU8WvsG3NGeC_hep-rUbiSbjUFMDVbpKxQE,289
@@ -30,16 +30,16 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
30
30
  semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
31
31
  semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
32
32
  semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
33
- semantio/tools/web_browser.py,sha256=wqr5pj2GybkK9IHDb8C1BipS8ujV2l36WlwA8ZbKd88,9711
33
+ semantio/tools/web_browser.py,sha256=ZjE-nrRtUEUoh6LNutMjdn9_bTlnVGU2PM2w6orjUWA,22172
34
34
  semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
35
35
  semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
36
36
  semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
37
37
  semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
38
38
  semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
39
39
  semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
40
- semantio-0.0.5.dist-info/LICENSE,sha256=mziLlfb9hZ8HKxm9V6BiHpmgJvmcDvswu1QBlDB-6vU,1074
41
- semantio-0.0.5.dist-info/METADATA,sha256=PtDbsZ-tWXbte0RR40K5O_OklMKZiUsb-3dxGlmjklQ,6913
42
- semantio-0.0.5.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
- semantio-0.0.5.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
44
- semantio-0.0.5.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
45
- semantio-0.0.5.dist-info/RECORD,,
40
+ semantio-0.0.6.dist-info/LICENSE,sha256=mziLlfb9hZ8HKxm9V6BiHpmgJvmcDvswu1QBlDB-6vU,1074
41
+ semantio-0.0.6.dist-info/METADATA,sha256=SeDpiywG59La7HcAQxZHxk8uXLFoyEv3otMFYkJl5W4,6913
42
+ semantio-0.0.6.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
+ semantio-0.0.6.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
44
+ semantio-0.0.6.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
45
+ semantio-0.0.6.dist-info/RECORD,,