semantio 0.0.6__tar.gz → 0.0.7__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. {semantio-0.0.6 → semantio-0.0.7}/PKG-INFO +1 -1
  2. semantio-0.0.7/semantio/tools/web_browser.py +545 -0
  3. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/PKG-INFO +1 -1
  4. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/requires.txt +0 -1
  5. {semantio-0.0.6 → semantio-0.0.7}/setup.py +1 -2
  6. semantio-0.0.6/semantio/tools/web_browser.py +0 -439
  7. {semantio-0.0.6 → semantio-0.0.7}/LICENSE +0 -0
  8. {semantio-0.0.6 → semantio-0.0.7}/README.md +0 -0
  9. {semantio-0.0.6 → semantio-0.0.7}/semantio/__init__.py +0 -0
  10. {semantio-0.0.6 → semantio-0.0.7}/semantio/agent.py +0 -0
  11. {semantio-0.0.6 → semantio-0.0.7}/semantio/api/__init__.py +0 -0
  12. {semantio-0.0.6 → semantio-0.0.7}/semantio/api/api_generator.py +0 -0
  13. {semantio-0.0.6 → semantio-0.0.7}/semantio/api/fastapi_app.py +0 -0
  14. {semantio-0.0.6 → semantio-0.0.7}/semantio/cli/__init__.py +0 -0
  15. {semantio-0.0.6 → semantio-0.0.7}/semantio/cli/main.py +0 -0
  16. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/__init__.py +0 -0
  17. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/document_loader.py +0 -0
  18. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/retriever.py +0 -0
  19. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/vector_store.py +0 -0
  20. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/__init__.py +0 -0
  21. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/anthropic.py +0 -0
  22. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/base_llm.py +0 -0
  23. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/deepseek.py +0 -0
  24. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/gemini.py +0 -0
  25. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/groq.py +0 -0
  26. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/mistral.py +0 -0
  27. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/openai.py +0 -0
  28. {semantio-0.0.6 → semantio-0.0.7}/semantio/memory.py +0 -0
  29. {semantio-0.0.6 → semantio-0.0.7}/semantio/models.py +0 -0
  30. {semantio-0.0.6 → semantio-0.0.7}/semantio/rag.py +0 -0
  31. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/__init__.py +0 -0
  32. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/base_storage.py +0 -0
  33. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/cloud_storage.py +0 -0
  34. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/in_memory_storage.py +0 -0
  35. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/local_storage.py +0 -0
  36. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/__init__.py +0 -0
  37. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/base_tool.py +0 -0
  38. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/crypto.py +0 -0
  39. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/duckduckgo.py +0 -0
  40. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/stocks.py +0 -0
  41. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/__init__.py +0 -0
  42. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/config.py +0 -0
  43. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/date_utils.py +0 -0
  44. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/file_utils.py +0 -0
  45. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/logger.py +0 -0
  46. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/validation_utils.py +0 -0
  47. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/SOURCES.txt +0 -0
  48. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/dependency_links.txt +0 -0
  49. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/entry_points.txt +0 -0
  50. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/top_level.txt +0 -0
  51. {semantio-0.0.6 → semantio-0.0.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -0,0 +1,545 @@
1
+ # web_browser.py
2
+ from typing import Dict, Any, List, Optional, Callable
3
+ from pydantic import Field, BaseModel
4
+ from playwright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError
5
+ import json, time, re, logging, os, difflib
6
+ from .base_tool import BaseTool
7
+
8
+ # Global logger
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class BrowserPlan(BaseModel):
12
+ tasks: List[Dict[str, Any]] = Field(
13
+ ...,
14
+ description="List of automation tasks to execute"
15
+ )
16
+
17
+ class WebBrowserTool(BaseTool):
18
+ name: str = Field("WebBrowser", description="Name of the tool")
19
+ description: str = Field(
20
+ "Universal web automation tool with advanced element identification (DOM and image fallback), modal analysis, AJAX waiting, multi-tab support, and custom JS injection.",
21
+ description="Tool description"
22
+ )
23
+ default_timeout: int = 15000 # 15 seconds in milliseconds
24
+ max_retries: int = 3
25
+
26
+ def __init__(self, *args, **kwargs):
27
+ super().__init__(*args, **kwargs)
28
+ # Bypass Pydantic's restrictions for extra attributes.
29
+ object.__setattr__(self, "logger", logging.getLogger(__name__))
30
+
31
+ def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
32
+ """
33
+ Execute the browser automation workflow.
34
+ Maintains a context string of executed tasks and passes it to fallback routines.
35
+ DOES NOT close the browser after successful execution.
36
+ """
37
+ overall_start = time.time()
38
+ results = [] # to hold summaries of executed tasks (for context)
39
+ current_url = ""
40
+ try:
41
+ headless = input.get("headless", False)
42
+ self.default_timeout = int(input.get("timeout", 15)) * 1000
43
+ self.max_retries = int(input.get("max_retries", self.max_retries))
44
+ plan = self._generate_plan(input.get("query", ""), current_url)
45
+ if not plan.tasks:
46
+ raise ValueError("No valid tasks in the generated plan.")
47
+
48
+ # Start Playwright without a "with" block so we can leave the browser open.
49
+ p = sync_playwright().start()
50
+ browser = p.chromium.launch(headless=headless)
51
+ context = browser.new_context()
52
+ page = context.new_page()
53
+
54
+ # Map actions to handlers.
55
+ action_map: Dict[str, Callable[[Page, Dict[str, Any]], Dict[str, Any]]] = {
56
+ "navigate": lambda p, task: self._handle_navigation(p, task.get("value", "")),
57
+ "click": lambda p, task: self._handle_click(p, task.get("selector", "")),
58
+ "type": lambda p, task: self._handle_typing(p, task.get("selector", ""), task.get("value", ""), task),
59
+ "wait": lambda p, task: self._handle_wait(task.get("value", "")),
60
+ "wait_for_ajax": lambda p, task: self._handle_wait_for_ajax(p, task.get("value", "")),
61
+ "scroll": lambda p, task: self._handle_scroll(p, task.get("selector", "")),
62
+ "hover": lambda p, task: self._handle_hover(p, task.get("selector", "")),
63
+ "screenshot": lambda p, task: self._handle_screenshot(p, task.get("value", "screenshot.png")),
64
+ "switch_tab": lambda p, task: self._handle_switch_tab(context, task.get("value", "0")),
65
+ "execute_script": lambda p, task: self._handle_execute_script(p, task.get("value", "")),
66
+ "drag_and_drop": lambda p, task: self._handle_drag_and_drop(p, task.get("selector", ""), task.get("value", "")),
67
+ }
68
+
69
+ for task in plan.tasks:
70
+ self._dismiss_unwanted_modals(page, task_context=task.get("description", ""))
71
+ action = task.get("action", "").lower()
72
+ self.logger.info(f"Executing task: {task.get('description', action)}")
73
+ start_time = time.time()
74
+
75
+ # Build a context string from previously executed tasks.
76
+ executed_context = "\n".join([f"{r['action']}: {r['message']}" for r in results])
77
+ handler = action_map.get(action)
78
+ if not handler:
79
+ results.append({
80
+ "action": action,
81
+ "success": False,
82
+ "message": f"Unsupported action: {action}"
83
+ })
84
+ continue
85
+
86
+ result = self._execute_with_retries(page, task, handler, executed_context)
87
+ elapsed = time.time() - start_time
88
+ result["elapsed"] = elapsed
89
+ self.logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
90
+ results.append(result)
91
+
92
+ if not result.get("success", False):
93
+ self.logger.error(f"Task failed: {result.get('message')}")
94
+ self._capture_failure_screenshot(page, action)
95
+ break
96
+
97
+ current_url = page.url
98
+
99
+ overall_elapsed = time.time() - overall_start
100
+ self.logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
+ # Do not close the browser.
102
+ return {"status": "success", "results": results, "total_time": overall_elapsed}
103
+ except Exception as e:
104
+ self.logger.exception("Execution error:")
105
+ return {"status": "error", "message": str(e)}
106
+
107
+ def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
108
+ prompt = f"""Generate browser automation plan for: {query}
109
+
110
+ Current URL: {current_url or 'No page loaded yet'}
111
+
112
+ Required JSON format:
113
+ {{
114
+ "tasks": [
115
+ {{
116
+ "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
117
+ "selector": "CSS selector (optional)",
118
+ "value": "input text/URL/seconds/filename/target-selector",
119
+ "description": "action purpose"
120
+ }}
121
+ ]
122
+ }}
123
+
124
+ Guidelines:
125
+ 1. Prefer IDs in selectors (#element-id) and semantic attributes.
126
+ 2. Include wait steps after navigation and wait for AJAX where applicable.
127
+ 3. Dismiss any modals/pop-ups that are not part of the task.
128
+ 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
129
+ 5. For execute_script, 'value' should contain valid JavaScript.
130
+ 6. For switch_tab, 'value' should be an index or keyword 'new'.
131
+ """
132
+ response = self.llm.generate(prompt=prompt)
133
+ return self._parse_plan(response)
134
+
135
+ def _parse_plan(self, response: str) -> BrowserPlan:
136
+ try:
137
+ json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
138
+ if json_match:
139
+ plan_data = json.loads(json_match.group(1).strip())
140
+ else:
141
+ json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
142
+ if not json_str_match:
143
+ raise ValueError("No JSON object found in the response.")
144
+ plan_data = json.loads(json_str_match.group())
145
+ validated_tasks = []
146
+ for task in plan_data.get("tasks", []):
147
+ if not all(key in task for key in ["action", "description"]):
148
+ self.logger.warning(f"Skipping task due to missing keys: {task}")
149
+ continue
150
+ validated_tasks.append({
151
+ "action": task["action"],
152
+ "selector": task.get("selector", ""),
153
+ "value": task.get("value", ""),
154
+ "description": task["description"]
155
+ })
156
+ return BrowserPlan(tasks=validated_tasks)
157
+ except (json.JSONDecodeError, AttributeError, ValueError) as e:
158
+ self.logger.error(f"Plan parsing failed: {e}")
159
+ return BrowserPlan(tasks=[])
160
+
161
+ def _execute_with_retries(self, page: Page, task: Dict[str, Any],
162
+ handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]],
163
+ executed_context: str = "") -> Dict[str, Any]:
164
+ """Execute a task with retry logic. If it fails, pass the executed_context to the fallback prompt.
165
+ The fallback now returns a JSON array of tasks, which are executed sequentially."""
166
+ attempts = 0
167
+ result = {}
168
+ while attempts < self.max_retries:
169
+ result = self._execute_safe_task(page, task, handler)
170
+ if result.get("success", False):
171
+ return result
172
+ attempts += 1
173
+ self.logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
174
+ time.sleep(1 * attempts)
175
+ if task.get("action") in ["click", "type"]:
176
+ self.logger.info("HTML-based automation failed. Using fallback with image-based LLM.")
177
+ result = self._fallback_with_image_llm(page, task, executed_context)
178
+ return result
179
+
180
+ def _execute_safe_task(self, page: Page, task: Dict[str, Any],
181
+ handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
182
+ try:
183
+ return handler(page, task)
184
+ except Exception as e:
185
+ action = task.get("action", "unknown")
186
+ self.logger.exception(f"Error executing task '{action}':")
187
+ return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
188
+
189
+ def _dismiss_unwanted_modals(self, page: Page, task_context: str = ""):
190
+ modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
191
+ for selector in modal_selectors:
192
+ elements = page.query_selector_all(selector)
193
+ for modal in elements:
194
+ if modal.is_visible():
195
+ self._handle_modal(page, modal, task_context)
196
+
197
+ def _handle_modal(self, page: Page, modal_element, task_context: str):
198
+ try:
199
+ modal_screenshot = modal_element.screenshot()
200
+ prompt = (
201
+ f"A modal is displayed on the page. The content is visible in the attached image. "
202
+ f"The current task context is: \"{task_context}\". "
203
+ "Based on the content of the modal and the task context, decide whether to dismiss the modal. "
204
+ "Return a JSON response in the format: { \"action\": \"dismiss\" } to dismiss or { \"action\": \"ignore\" } to leave it. "
205
+ "Return only the JSON."
206
+ )
207
+ response_text = self.llm.generate_from_image(prompt, image_bytes=modal_screenshot)
208
+ self.logger.info(f"LLM response for modal analysis: {response_text}")
209
+ json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
210
+ json_text = json_match.group(1).strip() if json_match else response_text.strip()
211
+ decision = json.loads(json_text)
212
+ if decision.get("action") == "dismiss":
213
+ close_buttons = modal_element.query_selector_all(".close, .btn-close, [aria-label='Close'], [data-dismiss='modal']")
214
+ for btn in close_buttons:
215
+ if btn.is_visible():
216
+ btn.click()
217
+ self.logger.info("Modal dismissed using a close button.")
218
+ return
219
+ page.evaluate("(modal) => modal.remove()", modal_element)
220
+ self.logger.info("Modal dismissed by removal.")
221
+ else:
222
+ self.logger.info("Modal left intact according to LLM analysis.")
223
+ except Exception as e:
224
+ self.logger.error(f"Modal handling error: {e}")
225
+
226
+ def _advanced_find_element(self, page: Page, keyword: str):
227
+ try:
228
+ candidates = page.query_selector_all("input, textarea, button, a, div")
229
+ best_match = None
230
+ best_ratio = 0.0
231
+ for candidate in candidates:
232
+ attrs = page.evaluate(
233
+ """(el) => {
234
+ return {
235
+ id: el.id,
236
+ name: el.getAttribute('name'),
237
+ placeholder: el.getAttribute('placeholder'),
238
+ aria: el.getAttribute('aria-label'),
239
+ text: el.innerText
240
+ };
241
+ }""",
242
+ candidate,
243
+ )
244
+ combined_text = " ".join(
245
+ filter(None, [
246
+ attrs.get("id"),
247
+ attrs.get("name"),
248
+ attrs.get("placeholder"),
249
+ attrs.get("aria"),
250
+ attrs.get("text"),
251
+ ])
252
+ )
253
+ ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
254
+ if ratio > best_ratio:
255
+ best_ratio = ratio
256
+ best_match = candidate
257
+ if best_ratio > 0.5:
258
+ self.logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
259
+ return best_match
260
+ return None
261
+ except Exception as e:
262
+ self.logger.error(f"Advanced find element error: {e}")
263
+ return None
264
+
265
+ def _annotate_page_with_numbers(self, page: Page, query: str = "button, a, input, [onclick]"):
266
+ script = f"""
267
+ (() => {{
268
+ document.querySelectorAll('.automation-annotation-overlay').forEach(el => el.remove());
269
+ const elements = document.querySelectorAll('{query}');
270
+ let counter = 1;
271
+ elements.forEach(el => {{
272
+ const rect = el.getBoundingClientRect();
273
+ if (rect.width === 0 || rect.height === 0) return;
274
+ const overlay = document.createElement('div');
275
+ overlay.classList.add('automation-annotation-overlay');
276
+ overlay.style.position = 'absolute';
277
+ overlay.style.left = (rect.left + window.scrollX) + 'px';
278
+ overlay.style.top = (rect.top + window.scrollY) + 'px';
279
+ overlay.style.width = rect.width + 'px';
280
+ overlay.style.height = rect.height + 'px';
281
+ overlay.style.border = '2px solid red';
282
+ overlay.style.zIndex = 9999;
283
+ overlay.style.pointerEvents = 'none';
284
+ overlay.textContent = counter;
285
+ overlay.style.fontSize = '16px';
286
+ overlay.style.fontWeight = 'bold';
287
+ overlay.style.color = 'red';
288
+ overlay.style.backgroundColor = 'rgba(255, 255, 255, 0.7)';
289
+ document.body.appendChild(overlay);
290
+ counter += 1;
291
+ }});
292
+ }})();
293
+ """
294
+ page.evaluate(script)
295
+
296
+ def _click_element_by_number(self, page: Page, number: int) -> Dict[str, Any]:
297
+ candidates = [el for el in page.query_selector_all("button, a, input, [onclick]") if el.is_visible()]
298
+ index = number - 1
299
+ if index < len(candidates):
300
+ candidate = candidates[index]
301
+ candidate.scroll_into_view_if_needed()
302
+ try:
303
+ candidate.click()
304
+ return {"action": "click", "success": True, "message": f"Clicked element number {number}"}
305
+ except Exception as e:
306
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
307
+ else:
308
+ return {"action": "click", "success": False, "message": f"Element number {number} not found."}
309
+
310
+ def _fallback_with_image_llm(self, page: Page, task: Dict[str, Any], executed_context: str = "") -> Dict[str, Any]:
311
+ """
312
+ Fallback method: Annotate the page, capture a screenshot, and ask the LLM (via image analysis)
313
+ to generate a JSON array of tasks for the next steps.
314
+ Each fallback task is an object:
315
+ {
316
+ "action": "click" or "type",
317
+ "element_number": <number>,
318
+ "text": <if action is 'type', the text to type; otherwise an empty string>
319
+ }
320
+ The prompt includes the executed_context.
321
+ """
322
+ query = "input, textarea" if task.get("action") == "type" else "button, a, input, [onclick]"
323
+ self._annotate_page_with_numbers(page, query=query)
324
+ time.sleep(1)
325
+ screenshot_bytes = page.screenshot(type="png")
326
+ extra = ""
327
+ if task.get("action") == "type":
328
+ extra = f"\nThe exact text to be entered is: \"{task.get('value', '').strip()}\"."
329
+ prompt = (
330
+ f"Tasks executed so far:\n{executed_context}\n\n"
331
+ f"The following task remains: {task.get('description', '')}.{extra}\n"
332
+ "I have annotated the page with numbered overlays using the appropriate query. "
333
+ "Based on the attached screenshot, generate a JSON array of tasks that need to be performed next. "
334
+ "Each task should be a JSON object with the format:\n"
335
+ "[\n"
336
+ " {\n"
337
+ " \"action\": \"click\" or \"type\",\n"
338
+ " \"element_number\": <number>,\n"
339
+ " \"text\": <if action is 'type', the text to type; otherwise an empty string>\n"
340
+ " },\n"
341
+ " ...\n"
342
+ "]\n"
343
+ "Return only the JSON array."
344
+ )
345
+ response_text = self.llm.generate_from_image(prompt, image_bytes=screenshot_bytes)
346
+ self.logger.info(f"LLM response for fallback: {response_text}")
347
+ try:
348
+ fallback_tasks = json.loads(response_text.strip())
349
+ if not isinstance(fallback_tasks, list):
350
+ fallback_tasks = [fallback_tasks]
351
+ except Exception as e:
352
+ json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
353
+ if json_match:
354
+ json_text = json_match.group(1).strip()
355
+ fallback_tasks = json.loads(json_text)
356
+ if not isinstance(fallback_tasks, list):
357
+ fallback_tasks = [fallback_tasks]
358
+ else:
359
+ return {"action": task.get("action"), "success": False, "message": f"Fallback failed to parse JSON: {str(e)}"}
360
+
361
+ fallback_results = []
362
+ for fb_task in fallback_tasks:
363
+ action = fb_task.get("action")
364
+ element_number = fb_task.get("element_number")
365
+ if action == "type":
366
+ returned_text = fb_task.get("text", "").strip()
367
+ original_text = task.get("value", "").strip()
368
+ if returned_text.lower() != original_text.lower():
369
+ self.logger.info("Overriding LLM-provided text with original input text.")
370
+ text = original_text
371
+ else:
372
+ text = returned_text
373
+ else:
374
+ text = fb_task.get("text", "")
375
+ if action == "click":
376
+ self.logger.info(f"LLM indicated fallback click on element number {element_number}.")
377
+ res = self._click_element_by_number(page, element_number)
378
+ elif action == "type":
379
+ candidates = [el for el in page.query_selector_all("input, textarea") if el.is_visible()]
380
+ if element_number - 1 < len(candidates):
381
+ candidate = candidates[element_number - 1]
382
+ candidate.scroll_into_view_if_needed()
383
+ try:
384
+ candidate.fill(text, timeout=self.default_timeout)
385
+ res = {"action": "type", "success": True, "message": f"Typed '{text}' into element number {element_number}"}
386
+ except Exception as ex:
387
+ res = {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
388
+ else:
389
+ res = {"action": "type", "success": False, "message": f"Element number {element_number} not found."}
390
+ else:
391
+ res = {"action": task.get("action"), "success": False, "message": "Invalid fallback action."}
392
+ fallback_results.append(res)
393
+ overall_success = any(r.get("success", False) for r in fallback_results)
394
+ overall_message = "; ".join([r.get("message", "") for r in fallback_results])
395
+ return {"action": task.get("action"), "success": overall_success, "message": overall_message}
396
+
397
+ def _handle_navigation(self, page: Page, url: str) -> Dict[str, Any]:
398
+ if not url.startswith(("http://", "https://")):
399
+ url = f"https://{url}"
400
+ try:
401
+ page.goto(url, timeout=self.default_timeout)
402
+ page.wait_for_selector("body", timeout=self.default_timeout)
403
+ return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
404
+ except PlaywrightTimeoutError as e:
405
+ self.logger.error(f"Navigation to {url} timed out: {e}")
406
+ return {"action": "navigate", "success": False, "message": f"Navigation timed out: {str(e)}"}
407
+ except Exception as e:
408
+ self.logger.error(f"Navigation to {url} failed: {e}")
409
+ return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
410
+
411
+ def _handle_click(self, page: Page, selector: str) -> Dict[str, Any]:
412
+ try:
413
+ page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
414
+ page.click(selector, timeout=self.default_timeout)
415
+ return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
416
+ except PlaywrightTimeoutError as e:
417
+ self.logger.error(f"Click action timed out on selector {selector}: {e}")
418
+ return {"action": "click", "success": False, "message": f"Click timed out: {str(e)}"}
419
+ except Exception as e:
420
+ self.logger.error(f"Click action failed on selector {selector}: {e}")
421
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
422
+
423
+ def _handle_typing(self, page: Page, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
424
+ try:
425
+ page.wait_for_selector(selector, state="attached", timeout=self.default_timeout)
426
+ page.fill(selector, text, timeout=self.default_timeout)
427
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
428
+ except PlaywrightTimeoutError as e:
429
+ self.logger.info("Primary selector failed; using advanced fallback for element detection.")
430
+ element = self._advanced_find_element(page, "search")
431
+ if not element:
432
+ return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
433
+ try:
434
+ element.fill(text, timeout=self.default_timeout)
435
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into fallback element."}
436
+ except Exception as ex:
437
+ return {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
438
+ except Exception as e:
439
+ self.logger.error(f"Typing action failed: {e}")
440
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
441
+
442
+ def _handle_wait(self, seconds: str) -> Dict[str, Any]:
443
+ try:
444
+ wait_time = float(seconds)
445
+ self.logger.info(f"Waiting for {wait_time} seconds")
446
+ time.sleep(wait_time)
447
+ return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
448
+ except ValueError as e:
449
+ self.logger.error(f"Invalid wait time provided: {seconds}")
450
+ return {"action": "wait", "success": False, "message": "Invalid wait time"}
451
+
452
+ def _handle_wait_for_ajax(self, page: Page, seconds: str) -> Dict[str, Any]:
453
+ try:
454
+ timeout_seconds = int(seconds) if seconds.strip() != "" else 30
455
+ self.logger.info(f"Waiting for AJAX/network activity for up to {timeout_seconds} seconds.")
456
+ end_time = time.time() + timeout_seconds
457
+ while time.time() < end_time:
458
+ ajax_complete = page.evaluate("""
459
+ () => {
460
+ return (window.jQuery ? jQuery.active === 0 : true) &&
461
+ (typeof window.fetch === 'function' ? true : true);
462
+ }
463
+ """)
464
+ if ajax_complete:
465
+ break
466
+ time.sleep(0.5)
467
+ return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
468
+ except Exception as e:
469
+ self.logger.error(f"Wait for AJAX failed: {e}")
470
+ return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
471
+
472
+ def _handle_scroll(self, page: Page, selector: str) -> Dict[str, Any]:
473
+ try:
474
+ if selector:
475
+ page.wait_for_selector(selector, timeout=self.default_timeout)
476
+ page.eval_on_selector(selector, "el => el.scrollIntoView({behavior: 'smooth', block: 'center'})")
477
+ scroll_target = selector
478
+ else:
479
+ page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
480
+ scroll_target = "page bottom"
481
+ return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
482
+ except Exception as e:
483
+ self.logger.error(f"Scroll action failed on selector {selector}: {e}")
484
+ return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
485
+
486
+ def _handle_hover(self, page: Page, selector: str) -> Dict[str, Any]:
487
+ try:
488
+ page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
489
+ page.hover(selector, timeout=self.default_timeout)
490
+ return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
491
+ except Exception as e:
492
+ self.logger.error(f"Hover action failed on selector {selector}: {e}")
493
+ return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
494
+
495
+ def _handle_screenshot(self, page: Page, filename: str) -> Dict[str, Any]:
496
+ try:
497
+ page.screenshot(path=filename)
498
+ return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
499
+ except Exception as e:
500
+ self.logger.error(f"Screenshot capture failed: {e}")
501
+ return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
502
+
503
+ def _handle_switch_tab(self, context, value: str) -> Dict[str, Any]:
504
+ try:
505
+ pages = context.pages
506
+ if value.lower() == "new":
507
+ target_page = pages[-1]
508
+ else:
509
+ idx = int(value)
510
+ if idx < len(pages):
511
+ target_page = pages[idx]
512
+ else:
513
+ return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
514
+ return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
515
+ except Exception as e:
516
+ self.logger.error(f"Switch tab failed: {e}")
517
+ return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
518
+
519
+ def _handle_execute_script(self, page: Page, script: str) -> Dict[str, Any]:
520
+ try:
521
+ result = page.evaluate(script)
522
+ return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
523
+ except Exception as e:
524
+ self.logger.error(f"Execute script failed: {e}")
525
+ return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
526
+
527
+ def _handle_drag_and_drop(self, page: Page, source_selector: str, target_selector: str) -> Dict[str, Any]:
528
+ try:
529
+ page.wait_for_selector(source_selector, timeout=self.default_timeout)
530
+ page.wait_for_selector(target_selector, timeout=self.default_timeout)
531
+ source = page.locator(source_selector)
532
+ target = page.locator(target_selector)
533
+ source.drag_to(target, timeout=self.default_timeout)
534
+ return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
535
+ except Exception as e:
536
+ self.logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
537
+ return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
538
+
539
+ def _capture_failure_screenshot(self, page: Page, action: str):
540
+ filename = f"failure_{action}_{int(time.time())}.png"
541
+ try:
542
+ page.screenshot(path=filename)
543
+ self.logger.info(f"Failure screenshot captured: {filename}")
544
+ except Exception as e:
545
+ self.logger.error(f"Failed to capture screenshot: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -15,7 +15,6 @@ sentence-transformers
15
15
  fuzzywuzzy
16
16
  duckduckgo-search
17
17
  yfinance
18
- selenium
19
18
  beautifulsoup4
20
19
  webdriver-manager
21
20
  validators
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="semantio",
5
- version="0.0.6",
5
+ version="0.0.7",
6
6
  description="A powerful SDK for building AI agents",
7
7
  long_description=open("README.md").read(),
8
8
  long_description_content_type="text/markdown",
@@ -28,7 +28,6 @@ setup(
28
28
  "fuzzywuzzy", # For fuzzy string matching
29
29
  "duckduckgo-search", # For DuckDuckGo search
30
30
  "yfinance", # For stock/crypto prices
31
- "selenium", # For web scraping
32
31
  "beautifulsoup4", # For HTML parsing
33
32
  "webdriver-manager", # For browser automation
34
33
  "validators", # For URL validation
@@ -1,439 +0,0 @@
1
- # web_browser.py
2
- from typing import Dict, Any, List, Optional, Callable
3
- from pydantic import Field, BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.common.action_chains import ActionChains
7
- from selenium.webdriver.remote.webelement import WebElement
8
- from selenium.webdriver.support.ui import WebDriverWait
9
- from selenium.webdriver.support import expected_conditions as EC
10
- from selenium.webdriver.chrome.options import Options
11
- from selenium.webdriver.chrome.service import Service
12
- from webdriver_manager.chrome import ChromeDriverManager
13
- from bs4 import BeautifulSoup
14
- import json
15
- import time
16
- import re
17
- import logging
18
- import os
19
- import difflib
20
- from .base_tool import BaseTool
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
- class BrowserPlan(BaseModel):
25
- tasks: List[Dict[str, Any]] = Field(
26
- ...,
27
- description="List of automation tasks to execute"
28
- )
29
-
30
- class WebBrowserTool(BaseTool):
31
- name: str = Field("WebBrowser", description="Name of the tool")
32
- description: str = Field(
33
- "Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
34
- description="Tool description"
35
- )
36
-
37
- default_timeout: int = 15 # Default wait timeout in seconds
38
- max_retries: int = 3 # Increased maximum retries for any task
39
-
40
- def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
41
- """Execute an advanced dynamic web automation workflow."""
42
- driver = None
43
- overall_start = time.time()
44
- try:
45
- headless = input.get("headless", False)
46
- self.default_timeout = int(input.get("timeout", self.default_timeout))
47
- self.max_retries = int(input.get("max_retries", self.max_retries))
48
- driver = self._init_browser(headless)
49
- results = []
50
- current_url = ""
51
-
52
- plan = self._generate_plan(input.get('query', ''), current_url)
53
- if not plan.tasks:
54
- raise ValueError("No valid tasks in the generated plan.")
55
-
56
- # Dynamic mapping: action name to handler function.
57
- action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
58
- "navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
59
- "click": lambda d, task: self._handle_click(d, task.get("selector", "")),
60
- "type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
61
- "wait": lambda d, task: self._handle_wait(task.get("value", "")),
62
- "wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
63
- "scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
64
- "hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
65
- "screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
66
- "switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
67
- "execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
68
- "drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
69
- }
70
-
71
- for task in plan.tasks:
72
- # Before each action, dismiss modals/overlays.
73
- self._dismiss_unwanted_modals(driver)
74
- action = task.get("action", "").lower()
75
- logger.info(f"Executing task: {task.get('description', action)}")
76
- start_time = time.time()
77
- handler = action_map.get(action)
78
- if not handler:
79
- results.append({
80
- "action": action,
81
- "success": False,
82
- "message": f"Unsupported action: {action}"
83
- })
84
- continue
85
-
86
- result = self._execute_with_retries(driver, task, handler)
87
- elapsed = time.time() - start_time
88
- result["elapsed"] = elapsed
89
- logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
90
- results.append(result)
91
-
92
- if not result.get('success', False):
93
- logger.error(f"Task failed: {result.get('message')}")
94
- self._capture_failure_screenshot(driver, action)
95
- break
96
-
97
- current_url = driver.current_url
98
-
99
- overall_elapsed = time.time() - overall_start
100
- logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
- return {"status": "success", "results": results, "total_time": overall_elapsed}
102
-
103
- except Exception as e:
104
- logger.exception("Execution error:")
105
- return {"status": "error", "message": str(e)}
106
- finally:
107
- if driver:
108
- driver.quit()
109
-
110
- def _init_browser(self, headless: bool) -> webdriver.Chrome:
111
- """Initialize browser with advanced options."""
112
- options = Options()
113
- options.add_argument("--start-maximized")
114
- options.add_argument("--disable-blink-features=AutomationControlled")
115
- options.add_experimental_option("excludeSwitches", ["enable-automation"])
116
- if headless:
117
- options.add_argument("--headless=new")
118
- return webdriver.Chrome(
119
- service=Service(ChromeDriverManager().install()),
120
- options=options
121
- )
122
-
123
- def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
124
- """Generate an adaptive execution plan using an LLM or other dynamic planner."""
125
- prompt = f"""Generate browser automation plan for: {query}
126
-
127
- Current URL: {current_url or 'No page loaded yet'}
128
-
129
- Required JSON format:
130
- {{
131
- "tasks": [
132
- {{
133
- "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
134
- "selector": "CSS selector (optional)",
135
- "value": "input text/URL/seconds/filename/target-selector",
136
- "description": "action purpose"
137
- }}
138
- ]
139
- }}
140
-
141
- Guidelines:
142
- 1. Prefer IDs in selectors (#element-id) and semantic attributes.
143
- 2. Include wait steps after navigation and wait for AJAX where applicable.
144
- 3. Dismiss any modals/pop-ups that are not part of the task.
145
- 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
146
- 5. For execute_script, 'value' should contain valid JavaScript.
147
- 6. For switch_tab, 'value' should be an index or keyword 'new'.
148
- """
149
- response = self.llm.generate(prompt=prompt)
150
- return self._parse_plan(response)
151
-
152
- def _parse_plan(self, response: str) -> BrowserPlan:
153
- """Robust JSON parsing with multiple fallback strategies."""
154
- try:
155
- json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
156
- if json_match:
157
- plan_data = json.loads(json_match.group(1).strip())
158
- else:
159
- json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
160
- if not json_str_match:
161
- raise ValueError("No JSON object found in the response.")
162
- plan_data = json.loads(json_str_match.group())
163
- validated_tasks = []
164
- for task in plan_data.get("tasks", []):
165
- if not all(key in task for key in ["action", "description"]):
166
- logger.warning(f"Skipping task due to missing keys: {task}")
167
- continue
168
- validated_tasks.append({
169
- "action": task["action"],
170
- "selector": task.get("selector", ""),
171
- "value": task.get("value", ""),
172
- "description": task["description"]
173
- })
174
- return BrowserPlan(tasks=validated_tasks)
175
- except (json.JSONDecodeError, AttributeError, ValueError) as e:
176
- logger.error(f"Plan parsing failed: {e}")
177
- return BrowserPlan(tasks=[])
178
-
179
- def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
180
- handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
181
- """Execute a task with retry logic and exponential backoff."""
182
- attempts = 0
183
- result = {}
184
- while attempts < self.max_retries:
185
- result = self._execute_safe_task(driver, task, handler)
186
- if result.get("success", False):
187
- return result
188
- attempts += 1
189
- logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
190
- time.sleep(1 * attempts)
191
- return result
192
-
193
- def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
194
- handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
195
- """Execute a task with comprehensive error handling."""
196
- try:
197
- return handler(driver, task)
198
- except Exception as e:
199
- action = task.get("action", "unknown")
200
- logger.exception(f"Error executing task '{action}':")
201
- return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
202
-
203
- def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
204
- """
205
- Dismiss or remove unwanted modals, overlays, or pop-ups.
206
- First attempts to click a close button; if not available, removes the element via JS.
207
- """
208
- try:
209
- modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
210
- for selector in modal_selectors:
211
- elements = driver.find_elements(By.CSS_SELECTOR, selector)
212
- for modal in elements:
213
- if modal.is_displayed():
214
- close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
215
- dismissed = False
216
- for close_sel in close_selectors:
217
- try:
218
- close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
219
- if close_button.is_displayed():
220
- close_button.click()
221
- dismissed = True
222
- logger.info(f"Dismissed modal using selector {close_sel}")
223
- time.sleep(1)
224
- break
225
- except Exception:
226
- continue
227
- if not dismissed:
228
- # Remove overlay by setting display to none
229
- driver.execute_script("arguments[0].remove();", modal)
230
- logger.info(f"Removed overlay/modal with selector {selector}")
231
- except Exception as e:
232
- logger.debug(f"Modal dismissal error: {e}")
233
-
234
- def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
235
- """
236
- Advanced fallback for finding an element.
237
- Searches across multiple attributes and inner text using fuzzy matching.
238
- """
239
- candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
240
- best_match = None
241
- best_ratio = 0.0
242
- for candidate in candidates:
243
- combined_text = " ".join([
244
- candidate.get_attribute("id") or "",
245
- candidate.get_attribute("name") or "",
246
- candidate.get_attribute("placeholder") or "",
247
- candidate.get_attribute("aria-label") or "",
248
- candidate.text or "",
249
- ])
250
- ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
251
- if ratio > best_ratio:
252
- best_ratio = ratio
253
- best_match = candidate
254
- if best_ratio > 0.5:
255
- logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
256
- return best_match
257
- return None
258
-
259
- def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
260
- """Handle navigation with URL correction."""
261
- if not url.startswith(("http://", "https://")):
262
- url = f"https://{url}"
263
- try:
264
- driver.get(url)
265
- WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
266
- return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
267
- except Exception as e:
268
- logger.error(f"Navigation to {url} failed: {e}")
269
- return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
270
-
271
- def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
272
- """Handle click actions with fallback using JS if needed."""
273
- try:
274
- element = WebDriverWait(driver, self.default_timeout).until(
275
- EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
276
- )
277
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
278
- try:
279
- element.click()
280
- except Exception:
281
- driver.execute_script("arguments[0].click();", element)
282
- return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
283
- except Exception as e:
284
- logger.error(f"Click action failed on selector {selector}: {e}")
285
- return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
286
-
287
- def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
288
- """
289
- Handle typing into an element.
290
- If the primary selector fails, attempt advanced fallback detection.
291
- """
292
- try:
293
- element = WebDriverWait(driver, self.default_timeout).until(
294
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
295
- )
296
- except Exception as e:
297
- # If the task seems to involve search or similar text, use advanced fallback.
298
- if "search" in task.get("description", "").lower() or "search" in selector.lower():
299
- logger.info("Primary selector failed; using advanced fallback for element detection.")
300
- element = self._advanced_find_element(driver, "search")
301
- if not element:
302
- return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
303
- else:
304
- return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
305
- try:
306
- element.clear()
307
- element.send_keys(text)
308
- return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
309
- except Exception as e:
310
- logger.error(f"Typing action failed: {e}")
311
- return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
312
-
313
- def _handle_wait(self, seconds: str) -> Dict[str, Any]:
314
- """Handle a simple wait."""
315
- try:
316
- wait_time = float(seconds)
317
- logger.info(f"Waiting for {wait_time} seconds")
318
- time.sleep(wait_time)
319
- return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
320
- except ValueError as e:
321
- logger.error(f"Invalid wait time provided: {seconds}")
322
- return {"action": "wait", "success": False, "message": "Invalid wait time"}
323
-
324
- def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
325
- """
326
- Wait until AJAX/network activity has subsided.
327
- This implementation first checks for jQuery, then falls back to a generic check.
328
- """
329
- try:
330
- timeout = int(seconds)
331
- logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
332
- end_time = time.time() + timeout
333
- while time.time() < end_time:
334
- ajax_complete = driver.execute_script("""
335
- return (window.jQuery ? jQuery.active === 0 : true) &&
336
- (typeof window.fetch === 'function' ? true : true);
337
- """)
338
- if ajax_complete:
339
- break
340
- time.sleep(0.5)
341
- return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
342
- except Exception as e:
343
- logger.error(f"Wait for AJAX failed: {e}")
344
- return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
345
-
346
- def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
347
- """Handle scrolling to a specific element or page bottom."""
348
- try:
349
- if selector:
350
- element = WebDriverWait(driver, self.default_timeout).until(
351
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
352
- )
353
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
354
- scroll_target = selector
355
- else:
356
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
357
- scroll_target = "page bottom"
358
- return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
359
- except Exception as e:
360
- logger.error(f"Scroll action failed on selector {selector}: {e}")
361
- return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
362
-
363
- def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
364
- """Handle mouse hover action."""
365
- try:
366
- element = WebDriverWait(driver, self.default_timeout).until(
367
- EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
368
- )
369
- ActionChains(driver).move_to_element(element).perform()
370
- return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
371
- except Exception as e:
372
- logger.error(f"Hover action failed on selector {selector}: {e}")
373
- return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
374
-
375
- def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
376
- """Capture a screenshot of the current browser state."""
377
- try:
378
- driver.save_screenshot(filename)
379
- return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
380
- except Exception as e:
381
- logger.error(f"Screenshot capture failed: {e}")
382
- return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
383
-
384
- def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
385
- """
386
- Switch between tabs. 'value' can be an index or the keyword 'new'.
387
- """
388
- try:
389
- handles = driver.window_handles
390
- if value.lower() == "new":
391
- target_handle = handles[-1]
392
- else:
393
- idx = int(value)
394
- if idx < len(handles):
395
- target_handle = handles[idx]
396
- else:
397
- return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
398
- driver.switch_to.window(target_handle)
399
- return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
400
- except Exception as e:
401
- logger.error(f"Switch tab failed: {e}")
402
- return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
403
-
404
- def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
405
- """
406
- Execute arbitrary JavaScript code.
407
- """
408
- try:
409
- result = driver.execute_script(script)
410
- return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
411
- except Exception as e:
412
- logger.error(f"Execute script failed: {e}")
413
- return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
414
-
415
- def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
416
- """
417
- Simulate a drag-and-drop operation.
418
- """
419
- try:
420
- source = WebDriverWait(driver, self.default_timeout).until(
421
- EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
422
- )
423
- target = WebDriverWait(driver, self.default_timeout).until(
424
- EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
425
- )
426
- ActionChains(driver).drag_and_drop(source, target).perform()
427
- return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
428
- except Exception as e:
429
- logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
430
- return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
431
-
432
- def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
433
- """Capture a screenshot for debugging when an error occurs."""
434
- filename = f"failure_{action}_{int(time.time())}.png"
435
- try:
436
- driver.save_screenshot(filename)
437
- logger.info(f"Failure screenshot captured: {filename}")
438
- except Exception as e:
439
- logger.error(f"Failed to capture screenshot: {e}")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes