semantio 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
semantio/__init__.py CHANGED
@@ -0,0 +1,4 @@
1
+ import warnings
2
+
3
+ # Suppress all warnings globally
4
+ warnings.simplefilter("ignore")
@@ -1,20 +1,11 @@
1
1
  # web_browser.py
2
- from typing import Dict, Any, List, Optional
2
+ from typing import Dict, Any, List, Optional, Callable
3
3
  from pydantic import Field, BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.support.ui import WebDriverWait
7
- from selenium.webdriver.support import expected_conditions as EC
8
- from selenium.webdriver.chrome.options import Options
9
- from selenium.webdriver.chrome.service import Service
10
- from webdriver_manager.chrome import ChromeDriverManager
11
- from bs4 import BeautifulSoup
12
- import json
13
- import time
14
- import re
15
- import logging
4
+ from playwright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError
5
+ import json, time, re, logging, os, difflib
16
6
  from .base_tool import BaseTool
17
7
 
8
+ # Global logger
18
9
  logger = logging.getLogger(__name__)
19
10
 
20
11
  class BrowserPlan(BaseModel):
@@ -26,99 +17,135 @@ class BrowserPlan(BaseModel):
26
17
  class WebBrowserTool(BaseTool):
27
18
  name: str = Field("WebBrowser", description="Name of the tool")
28
19
  description: str = Field(
29
- "Universal web automation tool for dynamic website interactions",
20
+ "Universal web automation tool with advanced element identification (DOM and image fallback), modal analysis, AJAX waiting, multi-tab support, and custom JS injection.",
30
21
  description="Tool description"
31
22
  )
32
-
23
+ default_timeout: int = 15000 # 15 seconds in milliseconds
24
+ max_retries: int = 3
25
+
26
+ def __init__(self, *args, **kwargs):
27
+ super().__init__(*args, **kwargs)
28
+ # Bypass Pydantic's restrictions for extra attributes.
29
+ object.__setattr__(self, "logger", logging.getLogger(__name__))
30
+
33
31
  def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
34
- """Execute dynamic web automation workflow"""
35
- driver = None
32
+ """
33
+ Execute the browser automation workflow.
34
+ Maintains a context string of executed tasks and passes it to fallback routines.
35
+ DOES NOT close the browser after successful execution.
36
+ """
37
+ overall_start = time.time()
38
+ results = [] # to hold summaries of executed tasks (for context)
39
+ current_url = ""
36
40
  try:
37
- driver = self._init_browser(input.get("headless", False))
38
- results = []
39
- current_url = ""
41
+ headless = input.get("headless", False)
42
+ self.default_timeout = int(input.get("timeout", 15)) * 1000
43
+ self.max_retries = int(input.get("max_retries", self.max_retries))
44
+ plan = self._generate_plan(input.get("query", ""), current_url)
45
+ if not plan.tasks:
46
+ raise ValueError("No valid tasks in the generated plan.")
47
+
48
+ # Start Playwright without a "with" block so we can leave the browser open.
49
+ p = sync_playwright().start()
50
+ browser = p.chromium.launch(headless=headless)
51
+ context = browser.new_context()
52
+ page = context.new_page()
40
53
 
41
- # Generate initial plan
42
- plan = self._generate_plan(input['query'], current_url)
54
+ # Map actions to handlers.
55
+ action_map: Dict[str, Callable[[Page, Dict[str, Any]], Dict[str, Any]]] = {
56
+ "navigate": lambda p, task: self._handle_navigation(p, task.get("value", "")),
57
+ "click": lambda p, task: self._handle_click(p, task.get("selector", "")),
58
+ "type": lambda p, task: self._handle_typing(p, task.get("selector", ""), task.get("value", ""), task),
59
+ "wait": lambda p, task: self._handle_wait(task.get("value", "")),
60
+ "wait_for_ajax": lambda p, task: self._handle_wait_for_ajax(p, task.get("value", "")),
61
+ "scroll": lambda p, task: self._handle_scroll(p, task.get("selector", "")),
62
+ "hover": lambda p, task: self._handle_hover(p, task.get("selector", "")),
63
+ "screenshot": lambda p, task: self._handle_screenshot(p, task.get("value", "screenshot.png")),
64
+ "switch_tab": lambda p, task: self._handle_switch_tab(context, task.get("value", "0")),
65
+ "execute_script": lambda p, task: self._handle_execute_script(p, task.get("value", "")),
66
+ "drag_and_drop": lambda p, task: self._handle_drag_and_drop(p, task.get("selector", ""), task.get("value", "")),
67
+ }
43
68
 
44
69
  for task in plan.tasks:
45
- result = self._execute_safe_task(driver, task)
70
+ self._dismiss_unwanted_modals(page, task_context=task.get("description", ""))
71
+ action = task.get("action", "").lower()
72
+ self.logger.info(f"Executing task: {task.get('description', action)}")
73
+ start_time = time.time()
74
+
75
+ # Build a context string from previously executed tasks.
76
+ executed_context = "\n".join([f"{r['action']}: {r['message']}" for r in results])
77
+ handler = action_map.get(action)
78
+ if not handler:
79
+ results.append({
80
+ "action": action,
81
+ "success": False,
82
+ "message": f"Unsupported action: {action}"
83
+ })
84
+ continue
85
+
86
+ result = self._execute_with_retries(page, task, handler, executed_context)
87
+ elapsed = time.time() - start_time
88
+ result["elapsed"] = elapsed
89
+ self.logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
46
90
  results.append(result)
47
-
48
- if not result['success']:
91
+
92
+ if not result.get("success", False):
93
+ self.logger.error(f"Task failed: {result.get('message')}")
94
+ self._capture_failure_screenshot(page, action)
49
95
  break
50
-
51
- # Update context for next tasks
52
- current_url = driver.current_url
53
96
 
54
- return {"status": "success", "results": results}
55
-
97
+ current_url = page.url
98
+
99
+ overall_elapsed = time.time() - overall_start
100
+ self.logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
+ # Do not close the browser.
102
+ return {"status": "success", "results": results, "total_time": overall_elapsed}
56
103
  except Exception as e:
104
+ self.logger.exception("Execution error:")
57
105
  return {"status": "error", "message": str(e)}
58
- finally:
59
- if driver:
60
- driver.quit()
61
-
62
- def _init_browser(self, headless: bool) -> webdriver.Chrome:
63
- """Initialize browser with advanced options"""
64
- options = Options()
65
- options.add_argument("--start-maximized")
66
- options.add_argument("--disable-blink-features=AutomationControlled")
67
- options.add_experimental_option("excludeSwitches", ["enable-automation"])
68
-
69
- if headless:
70
- options.add_argument("--headless=new")
71
-
72
- return webdriver.Chrome(
73
- service=Service(ChromeDriverManager().install()),
74
- options=options
75
- )
76
106
 
77
107
  def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
78
- """Generate adaptive execution plan using LLM"""
79
108
  prompt = f"""Generate browser automation plan for: {query}
80
-
81
- Current URL: {current_url or 'No page loaded yet'}
82
-
83
- Required JSON format:
109
+
110
+ Current URL: {current_url or 'No page loaded yet'}
111
+
112
+ Required JSON format:
113
+ {{
114
+ "tasks": [
84
115
  {{
85
- "tasks": [
86
- {{
87
- "action": "navigate|click|type|wait|scroll",
88
- "selector": "CSS selector (optional)",
89
- "value": "input text/URL/seconds",
90
- "description": "action purpose"
91
- }}
92
- ]
116
+ "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
117
+ "selector": "CSS selector (optional)",
118
+ "value": "input text/URL/seconds/filename/target-selector",
119
+ "description": "action purpose"
93
120
  }}
94
-
95
- Guidelines:
96
- 1. Prefer IDs in selectors (#element-id)
97
- 2. Use semantic attributes (aria-label, name)
98
- 3. Include wait steps after navigation
99
- 4. Prioritize visible elements
100
- 5. Add scroll steps for hidden elements
101
- """
102
-
121
+ ]
122
+ }}
123
+
124
+ Guidelines:
125
+ 1. Prefer IDs in selectors (#element-id) and semantic attributes.
126
+ 2. Include wait steps after navigation and wait for AJAX where applicable.
127
+ 3. Dismiss any modals/pop-ups that are not part of the task.
128
+ 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
129
+ 5. For execute_script, 'value' should contain valid JavaScript.
130
+ 6. For switch_tab, 'value' should be an index or keyword 'new'.
131
+ """
103
132
  response = self.llm.generate(prompt=prompt)
104
133
  return self._parse_plan(response)
105
134
 
106
135
  def _parse_plan(self, response: str) -> BrowserPlan:
107
- """Robust JSON parsing with multiple fallback strategies"""
108
136
  try:
109
- # Try extracting JSON from markdown code block
110
137
  json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
111
138
  if json_match:
112
139
  plan_data = json.loads(json_match.group(1).strip())
113
140
  else:
114
- # Fallback to extract first JSON object
115
- json_str = re.search(r'\{.*\}', response, re.DOTALL).group()
116
- plan_data = json.loads(json_str)
117
-
118
- # Validate tasks structure
141
+ json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
142
+ if not json_str_match:
143
+ raise ValueError("No JSON object found in the response.")
144
+ plan_data = json.loads(json_str_match.group())
119
145
  validated_tasks = []
120
146
  for task in plan_data.get("tasks", []):
121
147
  if not all(key in task for key in ["action", "description"]):
148
+ self.logger.warning(f"Skipping task due to missing keys: {task}")
122
149
  continue
123
150
  validated_tasks.append({
124
151
  "action": task["action"],
@@ -126,146 +153,393 @@ class WebBrowserTool(BaseTool):
126
153
  "value": task.get("value", ""),
127
154
  "description": task["description"]
128
155
  })
129
-
130
156
  return BrowserPlan(tasks=validated_tasks)
131
-
132
- except (json.JSONDecodeError, AttributeError) as e:
133
- logger.error(f"Plan parsing failed: {e}")
157
+ except (json.JSONDecodeError, AttributeError, ValueError) as e:
158
+ self.logger.error(f"Plan parsing failed: {e}")
134
159
  return BrowserPlan(tasks=[])
135
160
 
136
- def _execute_safe_task(self, driver, task: Dict) -> Dict[str, Any]:
137
- """Execute task with comprehensive error handling"""
161
+ def _execute_with_retries(self, page: Page, task: Dict[str, Any],
162
+ handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]],
163
+ executed_context: str = "") -> Dict[str, Any]:
164
+ """Execute a task with retry logic. If it fails, pass the executed_context to the fallback prompt.
165
+ The fallback now returns a JSON array of tasks, which are executed sequentially."""
166
+ attempts = 0
167
+ result = {}
168
+ while attempts < self.max_retries:
169
+ result = self._execute_safe_task(page, task, handler)
170
+ if result.get("success", False):
171
+ return result
172
+ attempts += 1
173
+ self.logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
174
+ time.sleep(1 * attempts)
175
+ if task.get("action") in ["click", "type"]:
176
+ self.logger.info("HTML-based automation failed. Using fallback with image-based LLM.")
177
+ result = self._fallback_with_image_llm(page, task, executed_context)
178
+ return result
179
+
180
+ def _execute_safe_task(self, page: Page, task: Dict[str, Any],
181
+ handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
138
182
  try:
139
- action = task["action"].lower()
140
- selector = task.get("selector", "")
141
- value = task.get("value", "")
142
-
143
- if action == "navigate":
144
- return self._handle_navigation(driver, value)
145
-
146
- elif action == "click":
147
- return self._handle_click(driver, selector)
148
-
149
- elif action == "type":
150
- return self._handle_typing(driver, selector, value)
151
-
152
- elif action == "wait":
153
- return self._handle_wait(value)
154
-
155
- elif action == "scroll":
156
- return self._handle_scroll(driver, selector)
157
-
158
- return {
159
- "action": action,
160
- "success": False,
161
- "message": f"Unsupported action: {action}"
162
- }
163
-
183
+ return handler(page, task)
164
184
  except Exception as e:
165
- return {
166
- "action": action,
167
- "success": False,
168
- "message": f"Critical error: {str(e)}"
169
- }
185
+ action = task.get("action", "unknown")
186
+ self.logger.exception(f"Error executing task '{action}':")
187
+ return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
188
+
189
+ def _dismiss_unwanted_modals(self, page: Page, task_context: str = ""):
190
+ modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
191
+ for selector in modal_selectors:
192
+ elements = page.query_selector_all(selector)
193
+ for modal in elements:
194
+ if modal.is_visible():
195
+ self._handle_modal(page, modal, task_context)
196
+
197
+ def _handle_modal(self, page: Page, modal_element, task_context: str):
198
+ try:
199
+ modal_screenshot = modal_element.screenshot()
200
+ prompt = (
201
+ f"A modal is displayed on the page. The content is visible in the attached image. "
202
+ f"The current task context is: \"{task_context}\". "
203
+ "Based on the content of the modal and the task context, decide whether to dismiss the modal. "
204
+ "Return a JSON response in the format: { \"action\": \"dismiss\" } to dismiss or { \"action\": \"ignore\" } to leave it. "
205
+ "Return only the JSON."
206
+ )
207
+ response_text = self.llm.generate_from_image(prompt, image_bytes=modal_screenshot)
208
+ self.logger.info(f"LLM response for modal analysis: {response_text}")
209
+ json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
210
+ json_text = json_match.group(1).strip() if json_match else response_text.strip()
211
+ decision = json.loads(json_text)
212
+ if decision.get("action") == "dismiss":
213
+ close_buttons = modal_element.query_selector_all(".close, .btn-close, [aria-label='Close'], [data-dismiss='modal']")
214
+ for btn in close_buttons:
215
+ if btn.is_visible():
216
+ btn.click()
217
+ self.logger.info("Modal dismissed using a close button.")
218
+ return
219
+ page.evaluate("(modal) => modal.remove()", modal_element)
220
+ self.logger.info("Modal dismissed by removal.")
221
+ else:
222
+ self.logger.info("Modal left intact according to LLM analysis.")
223
+ except Exception as e:
224
+ self.logger.error(f"Modal handling error: {e}")
225
+
226
+ def _advanced_find_element(self, page: Page, keyword: str):
227
+ try:
228
+ candidates = page.query_selector_all("input, textarea, button, a, div")
229
+ best_match = None
230
+ best_ratio = 0.0
231
+ for candidate in candidates:
232
+ attrs = page.evaluate(
233
+ """(el) => {
234
+ return {
235
+ id: el.id,
236
+ name: el.getAttribute('name'),
237
+ placeholder: el.getAttribute('placeholder'),
238
+ aria: el.getAttribute('aria-label'),
239
+ text: el.innerText
240
+ };
241
+ }""",
242
+ candidate,
243
+ )
244
+ combined_text = " ".join(
245
+ filter(None, [
246
+ attrs.get("id"),
247
+ attrs.get("name"),
248
+ attrs.get("placeholder"),
249
+ attrs.get("aria"),
250
+ attrs.get("text"),
251
+ ])
252
+ )
253
+ ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
254
+ if ratio > best_ratio:
255
+ best_ratio = ratio
256
+ best_match = candidate
257
+ if best_ratio > 0.5:
258
+ self.logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
259
+ return best_match
260
+ return None
261
+ except Exception as e:
262
+ self.logger.error(f"Advanced find element error: {e}")
263
+ return None
264
+
265
+ def _annotate_page_with_numbers(self, page: Page, query: str = "button, a, input, [onclick]"):
266
+ script = f"""
267
+ (() => {{
268
+ document.querySelectorAll('.automation-annotation-overlay').forEach(el => el.remove());
269
+ const elements = document.querySelectorAll('{query}');
270
+ let counter = 1;
271
+ elements.forEach(el => {{
272
+ const rect = el.getBoundingClientRect();
273
+ if (rect.width === 0 || rect.height === 0) return;
274
+ const overlay = document.createElement('div');
275
+ overlay.classList.add('automation-annotation-overlay');
276
+ overlay.style.position = 'absolute';
277
+ overlay.style.left = (rect.left + window.scrollX) + 'px';
278
+ overlay.style.top = (rect.top + window.scrollY) + 'px';
279
+ overlay.style.width = rect.width + 'px';
280
+ overlay.style.height = rect.height + 'px';
281
+ overlay.style.border = '2px solid red';
282
+ overlay.style.zIndex = 9999;
283
+ overlay.style.pointerEvents = 'none';
284
+ overlay.textContent = counter;
285
+ overlay.style.fontSize = '16px';
286
+ overlay.style.fontWeight = 'bold';
287
+ overlay.style.color = 'red';
288
+ overlay.style.backgroundColor = 'rgba(255, 255, 255, 0.7)';
289
+ document.body.appendChild(overlay);
290
+ counter += 1;
291
+ }});
292
+ }})();
293
+ """
294
+ page.evaluate(script)
295
+
296
+ def _click_element_by_number(self, page: Page, number: int) -> Dict[str, Any]:
297
+ candidates = [el for el in page.query_selector_all("button, a, input, [onclick]") if el.is_visible()]
298
+ index = number - 1
299
+ if index < len(candidates):
300
+ candidate = candidates[index]
301
+ candidate.scroll_into_view_if_needed()
302
+ try:
303
+ candidate.click()
304
+ return {"action": "click", "success": True, "message": f"Clicked element number {number}"}
305
+ except Exception as e:
306
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
307
+ else:
308
+ return {"action": "click", "success": False, "message": f"Element number {number} not found."}
309
+
310
+ def _fallback_with_image_llm(self, page: Page, task: Dict[str, Any], executed_context: str = "") -> Dict[str, Any]:
311
+ """
312
+ Fallback method: Annotate the page, capture a screenshot, and ask the LLM (via image analysis)
313
+ to generate a JSON array of tasks for the next steps.
314
+ Each fallback task is an object:
315
+ {
316
+ "action": "click" or "type",
317
+ "element_number": <number>,
318
+ "text": <if action is 'type', the text to type; otherwise an empty string>
319
+ }
320
+ The prompt includes the executed_context.
321
+ """
322
+ query = "input, textarea" if task.get("action") == "type" else "button, a, input, [onclick]"
323
+ self._annotate_page_with_numbers(page, query=query)
324
+ time.sleep(1)
325
+ screenshot_bytes = page.screenshot(type="png")
326
+ extra = ""
327
+ if task.get("action") == "type":
328
+ extra = f"\nThe exact text to be entered is: \"{task.get('value', '').strip()}\"."
329
+ prompt = (
330
+ f"Tasks executed so far:\n{executed_context}\n\n"
331
+ f"The following task remains: {task.get('description', '')}.{extra}\n"
332
+ "I have annotated the page with numbered overlays using the appropriate query. "
333
+ "Based on the attached screenshot, generate a JSON array of tasks that need to be performed next. "
334
+ "Each task should be a JSON object with the format:\n"
335
+ "[\n"
336
+ " {\n"
337
+ " \"action\": \"click\" or \"type\",\n"
338
+ " \"element_number\": <number>,\n"
339
+ " \"text\": <if action is 'type', the text to type; otherwise an empty string>\n"
340
+ " },\n"
341
+ " ...\n"
342
+ "]\n"
343
+ "Return only the JSON array."
344
+ )
345
+ response_text = self.llm.generate_from_image(prompt, image_bytes=screenshot_bytes)
346
+ self.logger.info(f"LLM response for fallback: {response_text}")
347
+ try:
348
+ fallback_tasks = json.loads(response_text.strip())
349
+ if not isinstance(fallback_tasks, list):
350
+ fallback_tasks = [fallback_tasks]
351
+ except Exception as e:
352
+ json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
353
+ if json_match:
354
+ json_text = json_match.group(1).strip()
355
+ fallback_tasks = json.loads(json_text)
356
+ if not isinstance(fallback_tasks, list):
357
+ fallback_tasks = [fallback_tasks]
358
+ else:
359
+ return {"action": task.get("action"), "success": False, "message": f"Fallback failed to parse JSON: {str(e)}"}
360
+
361
+ fallback_results = []
362
+ for fb_task in fallback_tasks:
363
+ action = fb_task.get("action")
364
+ element_number = fb_task.get("element_number")
365
+ if action == "type":
366
+ returned_text = fb_task.get("text", "").strip()
367
+ original_text = task.get("value", "").strip()
368
+ if returned_text.lower() != original_text.lower():
369
+ self.logger.info("Overriding LLM-provided text with original input text.")
370
+ text = original_text
371
+ else:
372
+ text = returned_text
373
+ else:
374
+ text = fb_task.get("text", "")
375
+ if action == "click":
376
+ self.logger.info(f"LLM indicated fallback click on element number {element_number}.")
377
+ res = self._click_element_by_number(page, element_number)
378
+ elif action == "type":
379
+ candidates = [el for el in page.query_selector_all("input, textarea") if el.is_visible()]
380
+ if element_number - 1 < len(candidates):
381
+ candidate = candidates[element_number - 1]
382
+ candidate.scroll_into_view_if_needed()
383
+ try:
384
+ candidate.fill(text, timeout=self.default_timeout)
385
+ res = {"action": "type", "success": True, "message": f"Typed '{text}' into element number {element_number}"}
386
+ except Exception as ex:
387
+ res = {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
388
+ else:
389
+ res = {"action": "type", "success": False, "message": f"Element number {element_number} not found."}
390
+ else:
391
+ res = {"action": task.get("action"), "success": False, "message": "Invalid fallback action."}
392
+ fallback_results.append(res)
393
+ overall_success = any(r.get("success", False) for r in fallback_results)
394
+ overall_message = "; ".join([r.get("message", "") for r in fallback_results])
395
+ return {"action": task.get("action"), "success": overall_success, "message": overall_message}
170
396
 
171
- def _handle_navigation(self, driver, url: str) -> Dict[str, Any]:
172
- """Smart navigation handler"""
397
+ def _handle_navigation(self, page: Page, url: str) -> Dict[str, Any]:
173
398
  if not url.startswith(("http://", "https://")):
174
399
  url = f"https://{url}"
175
-
176
400
  try:
177
- driver.get(url)
178
- WebDriverWait(driver, 15).until(
179
- EC.presence_of_element_located((By.TAG_NAME, "body"))
180
- )
181
- return {
182
- "action": "navigate",
183
- "success": True,
184
- "message": f"Navigated to {url}"
185
- }
401
+ page.goto(url, timeout=self.default_timeout)
402
+ page.wait_for_selector("body", timeout=self.default_timeout)
403
+ return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
404
+ except PlaywrightTimeoutError as e:
405
+ self.logger.error(f"Navigation to {url} timed out: {e}")
406
+ return {"action": "navigate", "success": False, "message": f"Navigation timed out: {str(e)}"}
186
407
  except Exception as e:
187
- return {
188
- "action": "navigate",
189
- "success": False,
190
- "message": f"Navigation failed: {str(e)}"
191
- }
408
+ self.logger.error(f"Navigation to {url} failed: {e}")
409
+ return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
192
410
 
193
- def _handle_click(self, driver, selector: str) -> Dict[str, Any]:
194
- """Dynamic click handler"""
411
+ def _handle_click(self, page: Page, selector: str) -> Dict[str, Any]:
195
412
  try:
196
- element = WebDriverWait(driver, 15).until(
197
- EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
198
- )
199
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
200
- element.click()
201
- return {
202
- "action": "click",
203
- "success": True,
204
- "message": f"Clicked element: {selector}"
205
- }
413
+ page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
414
+ page.click(selector, timeout=self.default_timeout)
415
+ return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
416
+ except PlaywrightTimeoutError as e:
417
+ self.logger.error(f"Click action timed out on selector {selector}: {e}")
418
+ return {"action": "click", "success": False, "message": f"Click timed out: {str(e)}"}
206
419
  except Exception as e:
207
- return {
208
- "action": "click",
209
- "success": False,
210
- "message": f"Click failed: {str(e)}"
211
- }
420
+ self.logger.error(f"Click action failed on selector {selector}: {e}")
421
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
212
422
 
213
- def _handle_typing(self, driver, selector: str, text: str) -> Dict[str, Any]:
214
- """Universal typing handler"""
423
+ def _handle_typing(self, page: Page, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
215
424
  try:
216
- element = WebDriverWait(driver, 15).until(
217
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
218
- )
219
- element.clear()
220
- element.send_keys(text)
221
- return {
222
- "action": "type",
223
- "success": True,
224
- "message": f"Typed '{text}' into {selector}"
225
- }
425
+ page.wait_for_selector(selector, state="attached", timeout=self.default_timeout)
426
+ page.fill(selector, text, timeout=self.default_timeout)
427
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
428
+ except PlaywrightTimeoutError as e:
429
+ self.logger.info("Primary selector failed; using advanced fallback for element detection.")
430
+ element = self._advanced_find_element(page, "search")
431
+ if not element:
432
+ return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
433
+ try:
434
+ element.fill(text, timeout=self.default_timeout)
435
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into fallback element."}
436
+ except Exception as ex:
437
+ return {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
226
438
  except Exception as e:
227
- return {
228
- "action": "type",
229
- "success": False,
230
- "message": f"Typing failed: {str(e)}"
231
- }
439
+ self.logger.error(f"Typing action failed: {e}")
440
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
232
441
 
233
442
  def _handle_wait(self, seconds: str) -> Dict[str, Any]:
234
- """Configurable wait handler"""
235
443
  try:
236
444
  wait_time = float(seconds)
445
+ self.logger.info(f"Waiting for {wait_time} seconds")
237
446
  time.sleep(wait_time)
238
- return {
239
- "action": "wait",
240
- "success": True,
241
- "message": f"Waited {wait_time} seconds"
242
- }
243
- except ValueError:
244
- return {
245
- "action": "wait",
246
- "success": False,
247
- "message": "Invalid wait time"
248
- }
447
+ return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
448
+ except ValueError as e:
449
+ self.logger.error(f"Invalid wait time provided: {seconds}")
450
+ return {"action": "wait", "success": False, "message": "Invalid wait time"}
249
451
 
250
- def _handle_scroll(self, driver, selector: str) -> Dict[str, Any]:
251
- """Smart scroll handler"""
452
+ def _handle_wait_for_ajax(self, page: Page, seconds: str) -> Dict[str, Any]:
453
+ try:
454
+ timeout_seconds = int(seconds) if seconds.strip() != "" else 30
455
+ self.logger.info(f"Waiting for AJAX/network activity for up to {timeout_seconds} seconds.")
456
+ end_time = time.time() + timeout_seconds
457
+ while time.time() < end_time:
458
+ ajax_complete = page.evaluate("""
459
+ () => {
460
+ return (window.jQuery ? jQuery.active === 0 : true) &&
461
+ (typeof window.fetch === 'function' ? true : true);
462
+ }
463
+ """)
464
+ if ajax_complete:
465
+ break
466
+ time.sleep(0.5)
467
+ return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
468
+ except Exception as e:
469
+ self.logger.error(f"Wait for AJAX failed: {e}")
470
+ return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
471
+
472
+ def _handle_scroll(self, page: Page, selector: str) -> Dict[str, Any]:
252
473
  try:
253
474
  if selector:
254
- element = WebDriverWait(driver, 15).until(
255
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
256
- )
257
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", element)
475
+ page.wait_for_selector(selector, timeout=self.default_timeout)
476
+ page.eval_on_selector(selector, "el => el.scrollIntoView({behavior: 'smooth', block: 'center'})")
477
+ scroll_target = selector
258
478
  else:
259
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
260
-
261
- return {
262
- "action": "scroll",
263
- "success": True,
264
- "message": f"Scrolled to {selector or 'page bottom'}"
265
- }
479
+ page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
480
+ scroll_target = "page bottom"
481
+ return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
482
+ except Exception as e:
483
+ self.logger.error(f"Scroll action failed on selector {selector}: {e}")
484
+ return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
485
+
486
+ def _handle_hover(self, page: Page, selector: str) -> Dict[str, Any]:
487
+ try:
488
+ page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
489
+ page.hover(selector, timeout=self.default_timeout)
490
+ return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
491
+ except Exception as e:
492
+ self.logger.error(f"Hover action failed on selector {selector}: {e}")
493
+ return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
494
+
495
+ def _handle_screenshot(self, page: Page, filename: str) -> Dict[str, Any]:
496
+ try:
497
+ page.screenshot(path=filename)
498
+ return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
499
+ except Exception as e:
500
+ self.logger.error(f"Screenshot capture failed: {e}")
501
+ return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
502
+
503
+ def _handle_switch_tab(self, context, value: str) -> Dict[str, Any]:
504
+ try:
505
+ pages = context.pages
506
+ if value.lower() == "new":
507
+ target_page = pages[-1]
508
+ else:
509
+ idx = int(value)
510
+ if idx < len(pages):
511
+ target_page = pages[idx]
512
+ else:
513
+ return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
514
+ return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
515
+ except Exception as e:
516
+ self.logger.error(f"Switch tab failed: {e}")
517
+ return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
518
+
519
+ def _handle_execute_script(self, page: Page, script: str) -> Dict[str, Any]:
520
+ try:
521
+ result = page.evaluate(script)
522
+ return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
523
+ except Exception as e:
524
+ self.logger.error(f"Execute script failed: {e}")
525
+ return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
526
+
527
+ def _handle_drag_and_drop(self, page: Page, source_selector: str, target_selector: str) -> Dict[str, Any]:
528
+ try:
529
+ page.wait_for_selector(source_selector, timeout=self.default_timeout)
530
+ page.wait_for_selector(target_selector, timeout=self.default_timeout)
531
+ source = page.locator(source_selector)
532
+ target = page.locator(target_selector)
533
+ source.drag_to(target, timeout=self.default_timeout)
534
+ return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
535
+ except Exception as e:
536
+ self.logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
537
+ return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
538
+
539
+ def _capture_failure_screenshot(self, page: Page, action: str):
540
+ filename = f"failure_{action}_{int(time.time())}.png"
541
+ try:
542
+ page.screenshot(path=filename)
543
+ self.logger.info(f"Failure screenshot captured: {filename}")
266
544
  except Exception as e:
267
- return {
268
- "action": "scroll",
269
- "success": False,
270
- "message": f"Scroll failed: {str(e)}"
271
- }
545
+ self.logger.error(f"Failed to capture screenshot: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.5
3
+ Version: 0.0.7
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -33,7 +33,6 @@ Requires-Dist: sentence-transformers
33
33
  Requires-Dist: fuzzywuzzy
34
34
  Requires-Dist: duckduckgo-search
35
35
  Requires-Dist: yfinance
36
- Requires-Dist: selenium
37
36
  Requires-Dist: beautifulsoup4
38
37
  Requires-Dist: webdriver-manager
39
38
  Requires-Dist: validators
@@ -1,4 +1,4 @@
1
- semantio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
1
+ semantio/__init__.py,sha256=RIeSI07dGyWBK-STKIk4IeB4bkn_3-QEKQklzSvR7hQ,82
2
2
  semantio/agent.py,sha256=uPFz1WP2eb-z-tryQOX8necS8_tv4Il6qxNmZux9hNk,31709
3
3
  semantio/memory.py,sha256=en9n3UySnj4rA0x3uR1sEdEzA7EkboQNbEHQ5KuEehw,2115
4
4
  semantio/models.py,sha256=7hmP-F_aSU8WvsG3NGeC_hep-rUbiSbjUFMDVbpKxQE,289
@@ -30,16 +30,16 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
30
30
  semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
31
31
  semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
32
32
  semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
33
- semantio/tools/web_browser.py,sha256=wqr5pj2GybkK9IHDb8C1BipS8ujV2l36WlwA8ZbKd88,9711
33
+ semantio/tools/web_browser.py,sha256=8-_SXvu3CRnIwKBlcmNe7-9DOd4y7OC7T24RB0xKMnI,28911
34
34
  semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
35
35
  semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
36
36
  semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
37
37
  semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
38
38
  semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
39
39
  semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
40
- semantio-0.0.5.dist-info/LICENSE,sha256=mziLlfb9hZ8HKxm9V6BiHpmgJvmcDvswu1QBlDB-6vU,1074
41
- semantio-0.0.5.dist-info/METADATA,sha256=PtDbsZ-tWXbte0RR40K5O_OklMKZiUsb-3dxGlmjklQ,6913
42
- semantio-0.0.5.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
- semantio-0.0.5.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
44
- semantio-0.0.5.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
45
- semantio-0.0.5.dist-info/RECORD,,
40
+ semantio-0.0.7.dist-info/LICENSE,sha256=mziLlfb9hZ8HKxm9V6BiHpmgJvmcDvswu1QBlDB-6vU,1074
41
+ semantio-0.0.7.dist-info/METADATA,sha256=QQRzinLKReosRRthYf1bei5FDAaOPHaG4bG5gdJnMFc,6889
42
+ semantio-0.0.7.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
43
+ semantio-0.0.7.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
44
+ semantio-0.0.7.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
45
+ semantio-0.0.7.dist-info/RECORD,,