semantio 0.0.6__tar.gz → 0.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {semantio-0.0.6 → semantio-0.0.7}/PKG-INFO +1 -1
  2. semantio-0.0.7/semantio/tools/web_browser.py +545 -0
  3. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/PKG-INFO +1 -1
  4. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/requires.txt +0 -1
  5. {semantio-0.0.6 → semantio-0.0.7}/setup.py +1 -2
  6. semantio-0.0.6/semantio/tools/web_browser.py +0 -439
  7. {semantio-0.0.6 → semantio-0.0.7}/LICENSE +0 -0
  8. {semantio-0.0.6 → semantio-0.0.7}/README.md +0 -0
  9. {semantio-0.0.6 → semantio-0.0.7}/semantio/__init__.py +0 -0
  10. {semantio-0.0.6 → semantio-0.0.7}/semantio/agent.py +0 -0
  11. {semantio-0.0.6 → semantio-0.0.7}/semantio/api/__init__.py +0 -0
  12. {semantio-0.0.6 → semantio-0.0.7}/semantio/api/api_generator.py +0 -0
  13. {semantio-0.0.6 → semantio-0.0.7}/semantio/api/fastapi_app.py +0 -0
  14. {semantio-0.0.6 → semantio-0.0.7}/semantio/cli/__init__.py +0 -0
  15. {semantio-0.0.6 → semantio-0.0.7}/semantio/cli/main.py +0 -0
  16. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/__init__.py +0 -0
  17. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/document_loader.py +0 -0
  18. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/retriever.py +0 -0
  19. {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/vector_store.py +0 -0
  20. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/__init__.py +0 -0
  21. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/anthropic.py +0 -0
  22. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/base_llm.py +0 -0
  23. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/deepseek.py +0 -0
  24. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/gemini.py +0 -0
  25. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/groq.py +0 -0
  26. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/mistral.py +0 -0
  27. {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/openai.py +0 -0
  28. {semantio-0.0.6 → semantio-0.0.7}/semantio/memory.py +0 -0
  29. {semantio-0.0.6 → semantio-0.0.7}/semantio/models.py +0 -0
  30. {semantio-0.0.6 → semantio-0.0.7}/semantio/rag.py +0 -0
  31. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/__init__.py +0 -0
  32. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/base_storage.py +0 -0
  33. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/cloud_storage.py +0 -0
  34. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/in_memory_storage.py +0 -0
  35. {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/local_storage.py +0 -0
  36. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/__init__.py +0 -0
  37. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/base_tool.py +0 -0
  38. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/crypto.py +0 -0
  39. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/duckduckgo.py +0 -0
  40. {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/stocks.py +0 -0
  41. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/__init__.py +0 -0
  42. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/config.py +0 -0
  43. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/date_utils.py +0 -0
  44. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/file_utils.py +0 -0
  45. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/logger.py +0 -0
  46. {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/validation_utils.py +0 -0
  47. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/SOURCES.txt +0 -0
  48. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/dependency_links.txt +0 -0
  49. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/entry_points.txt +0 -0
  50. {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/top_level.txt +0 -0
  51. {semantio-0.0.6 → semantio-0.0.7}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -0,0 +1,545 @@
1
+ # web_browser.py
2
+ from typing import Dict, Any, List, Optional, Callable
3
+ from pydantic import Field, BaseModel
4
+ from playwright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError
5
+ import json, time, re, logging, os, difflib
6
+ from .base_tool import BaseTool
7
+
8
+ # Global logger
9
+ logger = logging.getLogger(__name__)
10
+
11
+ class BrowserPlan(BaseModel):
12
+ tasks: List[Dict[str, Any]] = Field(
13
+ ...,
14
+ description="List of automation tasks to execute"
15
+ )
16
+
17
+ class WebBrowserTool(BaseTool):
18
+ name: str = Field("WebBrowser", description="Name of the tool")
19
+ description: str = Field(
20
+ "Universal web automation tool with advanced element identification (DOM and image fallback), modal analysis, AJAX waiting, multi-tab support, and custom JS injection.",
21
+ description="Tool description"
22
+ )
23
+ default_timeout: int = 15000 # 15 seconds in milliseconds
24
+ max_retries: int = 3
25
+
26
+ def __init__(self, *args, **kwargs):
27
+ super().__init__(*args, **kwargs)
28
+ # Bypass Pydantic's restrictions for extra attributes.
29
+ object.__setattr__(self, "logger", logging.getLogger(__name__))
30
+
31
+ def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
32
+ """
33
+ Execute the browser automation workflow.
34
+ Maintains a context string of executed tasks and passes it to fallback routines.
35
+ DOES NOT close the browser after successful execution.
36
+ """
37
+ overall_start = time.time()
38
+ results = [] # to hold summaries of executed tasks (for context)
39
+ current_url = ""
40
+ try:
41
+ headless = input.get("headless", False)
42
+ self.default_timeout = int(input.get("timeout", 15)) * 1000
43
+ self.max_retries = int(input.get("max_retries", self.max_retries))
44
+ plan = self._generate_plan(input.get("query", ""), current_url)
45
+ if not plan.tasks:
46
+ raise ValueError("No valid tasks in the generated plan.")
47
+
48
+ # Start Playwright without a "with" block so we can leave the browser open.
49
+ p = sync_playwright().start()
50
+ browser = p.chromium.launch(headless=headless)
51
+ context = browser.new_context()
52
+ page = context.new_page()
53
+
54
+ # Map actions to handlers.
55
+ action_map: Dict[str, Callable[[Page, Dict[str, Any]], Dict[str, Any]]] = {
56
+ "navigate": lambda p, task: self._handle_navigation(p, task.get("value", "")),
57
+ "click": lambda p, task: self._handle_click(p, task.get("selector", "")),
58
+ "type": lambda p, task: self._handle_typing(p, task.get("selector", ""), task.get("value", ""), task),
59
+ "wait": lambda p, task: self._handle_wait(task.get("value", "")),
60
+ "wait_for_ajax": lambda p, task: self._handle_wait_for_ajax(p, task.get("value", "")),
61
+ "scroll": lambda p, task: self._handle_scroll(p, task.get("selector", "")),
62
+ "hover": lambda p, task: self._handle_hover(p, task.get("selector", "")),
63
+ "screenshot": lambda p, task: self._handle_screenshot(p, task.get("value", "screenshot.png")),
64
+ "switch_tab": lambda p, task: self._handle_switch_tab(context, task.get("value", "0")),
65
+ "execute_script": lambda p, task: self._handle_execute_script(p, task.get("value", "")),
66
+ "drag_and_drop": lambda p, task: self._handle_drag_and_drop(p, task.get("selector", ""), task.get("value", "")),
67
+ }
68
+
69
+ for task in plan.tasks:
70
+ self._dismiss_unwanted_modals(page, task_context=task.get("description", ""))
71
+ action = task.get("action", "").lower()
72
+ self.logger.info(f"Executing task: {task.get('description', action)}")
73
+ start_time = time.time()
74
+
75
+ # Build a context string from previously executed tasks.
76
+ executed_context = "\n".join([f"{r['action']}: {r['message']}" for r in results])
77
+ handler = action_map.get(action)
78
+ if not handler:
79
+ results.append({
80
+ "action": action,
81
+ "success": False,
82
+ "message": f"Unsupported action: {action}"
83
+ })
84
+ continue
85
+
86
+ result = self._execute_with_retries(page, task, handler, executed_context)
87
+ elapsed = time.time() - start_time
88
+ result["elapsed"] = elapsed
89
+ self.logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
90
+ results.append(result)
91
+
92
+ if not result.get("success", False):
93
+ self.logger.error(f"Task failed: {result.get('message')}")
94
+ self._capture_failure_screenshot(page, action)
95
+ break
96
+
97
+ current_url = page.url
98
+
99
+ overall_elapsed = time.time() - overall_start
100
+ self.logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
+ # Do not close the browser.
102
+ return {"status": "success", "results": results, "total_time": overall_elapsed}
103
+ except Exception as e:
104
+ self.logger.exception("Execution error:")
105
+ return {"status": "error", "message": str(e)}
106
+
107
+ def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
108
+ prompt = f"""Generate browser automation plan for: {query}
109
+
110
+ Current URL: {current_url or 'No page loaded yet'}
111
+
112
+ Required JSON format:
113
+ {{
114
+ "tasks": [
115
+ {{
116
+ "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
117
+ "selector": "CSS selector (optional)",
118
+ "value": "input text/URL/seconds/filename/target-selector",
119
+ "description": "action purpose"
120
+ }}
121
+ ]
122
+ }}
123
+
124
+ Guidelines:
125
+ 1. Prefer IDs in selectors (#element-id) and semantic attributes.
126
+ 2. Include wait steps after navigation and wait for AJAX where applicable.
127
+ 3. Dismiss any modals/pop-ups that are not part of the task.
128
+ 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
129
+ 5. For execute_script, 'value' should contain valid JavaScript.
130
+ 6. For switch_tab, 'value' should be an index or keyword 'new'.
131
+ """
132
+ response = self.llm.generate(prompt=prompt)
133
+ return self._parse_plan(response)
134
+
135
+ def _parse_plan(self, response: str) -> BrowserPlan:
136
+ try:
137
+ json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
138
+ if json_match:
139
+ plan_data = json.loads(json_match.group(1).strip())
140
+ else:
141
+ json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
142
+ if not json_str_match:
143
+ raise ValueError("No JSON object found in the response.")
144
+ plan_data = json.loads(json_str_match.group())
145
+ validated_tasks = []
146
+ for task in plan_data.get("tasks", []):
147
+ if not all(key in task for key in ["action", "description"]):
148
+ self.logger.warning(f"Skipping task due to missing keys: {task}")
149
+ continue
150
+ validated_tasks.append({
151
+ "action": task["action"],
152
+ "selector": task.get("selector", ""),
153
+ "value": task.get("value", ""),
154
+ "description": task["description"]
155
+ })
156
+ return BrowserPlan(tasks=validated_tasks)
157
+ except (json.JSONDecodeError, AttributeError, ValueError) as e:
158
+ self.logger.error(f"Plan parsing failed: {e}")
159
+ return BrowserPlan(tasks=[])
160
+
161
+ def _execute_with_retries(self, page: Page, task: Dict[str, Any],
162
+ handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]],
163
+ executed_context: str = "") -> Dict[str, Any]:
164
+ """Execute a task with retry logic. If it fails, pass the executed_context to the fallback prompt.
165
+ The fallback now returns a JSON array of tasks, which are executed sequentially."""
166
+ attempts = 0
167
+ result = {}
168
+ while attempts < self.max_retries:
169
+ result = self._execute_safe_task(page, task, handler)
170
+ if result.get("success", False):
171
+ return result
172
+ attempts += 1
173
+ self.logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
174
+ time.sleep(1 * attempts)
175
+ if task.get("action") in ["click", "type"]:
176
+ self.logger.info("HTML-based automation failed. Using fallback with image-based LLM.")
177
+ result = self._fallback_with_image_llm(page, task, executed_context)
178
+ return result
179
+
180
+ def _execute_safe_task(self, page: Page, task: Dict[str, Any],
181
+ handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
182
+ try:
183
+ return handler(page, task)
184
+ except Exception as e:
185
+ action = task.get("action", "unknown")
186
+ self.logger.exception(f"Error executing task '{action}':")
187
+ return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
188
+
189
+ def _dismiss_unwanted_modals(self, page: Page, task_context: str = ""):
190
+ modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
191
+ for selector in modal_selectors:
192
+ elements = page.query_selector_all(selector)
193
+ for modal in elements:
194
+ if modal.is_visible():
195
+ self._handle_modal(page, modal, task_context)
196
+
197
+ def _handle_modal(self, page: Page, modal_element, task_context: str):
198
+ try:
199
+ modal_screenshot = modal_element.screenshot()
200
+ prompt = (
201
+ f"A modal is displayed on the page. The content is visible in the attached image. "
202
+ f"The current task context is: \"{task_context}\". "
203
+ "Based on the content of the modal and the task context, decide whether to dismiss the modal. "
204
+ "Return a JSON response in the format: { \"action\": \"dismiss\" } to dismiss or { \"action\": \"ignore\" } to leave it. "
205
+ "Return only the JSON."
206
+ )
207
+ response_text = self.llm.generate_from_image(prompt, image_bytes=modal_screenshot)
208
+ self.logger.info(f"LLM response for modal analysis: {response_text}")
209
+ json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
210
+ json_text = json_match.group(1).strip() if json_match else response_text.strip()
211
+ decision = json.loads(json_text)
212
+ if decision.get("action") == "dismiss":
213
+ close_buttons = modal_element.query_selector_all(".close, .btn-close, [aria-label='Close'], [data-dismiss='modal']")
214
+ for btn in close_buttons:
215
+ if btn.is_visible():
216
+ btn.click()
217
+ self.logger.info("Modal dismissed using a close button.")
218
+ return
219
+ page.evaluate("(modal) => modal.remove()", modal_element)
220
+ self.logger.info("Modal dismissed by removal.")
221
+ else:
222
+ self.logger.info("Modal left intact according to LLM analysis.")
223
+ except Exception as e:
224
+ self.logger.error(f"Modal handling error: {e}")
225
+
226
+ def _advanced_find_element(self, page: Page, keyword: str):
227
+ try:
228
+ candidates = page.query_selector_all("input, textarea, button, a, div")
229
+ best_match = None
230
+ best_ratio = 0.0
231
+ for candidate in candidates:
232
+ attrs = page.evaluate(
233
+ """(el) => {
234
+ return {
235
+ id: el.id,
236
+ name: el.getAttribute('name'),
237
+ placeholder: el.getAttribute('placeholder'),
238
+ aria: el.getAttribute('aria-label'),
239
+ text: el.innerText
240
+ };
241
+ }""",
242
+ candidate,
243
+ )
244
+ combined_text = " ".join(
245
+ filter(None, [
246
+ attrs.get("id"),
247
+ attrs.get("name"),
248
+ attrs.get("placeholder"),
249
+ attrs.get("aria"),
250
+ attrs.get("text"),
251
+ ])
252
+ )
253
+ ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
254
+ if ratio > best_ratio:
255
+ best_ratio = ratio
256
+ best_match = candidate
257
+ if best_ratio > 0.5:
258
+ self.logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
259
+ return best_match
260
+ return None
261
+ except Exception as e:
262
+ self.logger.error(f"Advanced find element error: {e}")
263
+ return None
264
+
265
+ def _annotate_page_with_numbers(self, page: Page, query: str = "button, a, input, [onclick]"):
266
+ script = f"""
267
+ (() => {{
268
+ document.querySelectorAll('.automation-annotation-overlay').forEach(el => el.remove());
269
+ const elements = document.querySelectorAll('{query}');
270
+ let counter = 1;
271
+ elements.forEach(el => {{
272
+ const rect = el.getBoundingClientRect();
273
+ if (rect.width === 0 || rect.height === 0) return;
274
+ const overlay = document.createElement('div');
275
+ overlay.classList.add('automation-annotation-overlay');
276
+ overlay.style.position = 'absolute';
277
+ overlay.style.left = (rect.left + window.scrollX) + 'px';
278
+ overlay.style.top = (rect.top + window.scrollY) + 'px';
279
+ overlay.style.width = rect.width + 'px';
280
+ overlay.style.height = rect.height + 'px';
281
+ overlay.style.border = '2px solid red';
282
+ overlay.style.zIndex = 9999;
283
+ overlay.style.pointerEvents = 'none';
284
+ overlay.textContent = counter;
285
+ overlay.style.fontSize = '16px';
286
+ overlay.style.fontWeight = 'bold';
287
+ overlay.style.color = 'red';
288
+ overlay.style.backgroundColor = 'rgba(255, 255, 255, 0.7)';
289
+ document.body.appendChild(overlay);
290
+ counter += 1;
291
+ }});
292
+ }})();
293
+ """
294
+ page.evaluate(script)
295
+
296
+ def _click_element_by_number(self, page: Page, number: int) -> Dict[str, Any]:
297
+ candidates = [el for el in page.query_selector_all("button, a, input, [onclick]") if el.is_visible()]
298
+ index = number - 1
299
+ if index < len(candidates):
300
+ candidate = candidates[index]
301
+ candidate.scroll_into_view_if_needed()
302
+ try:
303
+ candidate.click()
304
+ return {"action": "click", "success": True, "message": f"Clicked element number {number}"}
305
+ except Exception as e:
306
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
307
+ else:
308
+ return {"action": "click", "success": False, "message": f"Element number {number} not found."}
309
+
310
+ def _fallback_with_image_llm(self, page: Page, task: Dict[str, Any], executed_context: str = "") -> Dict[str, Any]:
311
+ """
312
+ Fallback method: Annotate the page, capture a screenshot, and ask the LLM (via image analysis)
313
+ to generate a JSON array of tasks for the next steps.
314
+ Each fallback task is an object:
315
+ {
316
+ "action": "click" or "type",
317
+ "element_number": <number>,
318
+ "text": <if action is 'type', the text to type; otherwise an empty string>
319
+ }
320
+ The prompt includes the executed_context.
321
+ """
322
+ query = "input, textarea" if task.get("action") == "type" else "button, a, input, [onclick]"
323
+ self._annotate_page_with_numbers(page, query=query)
324
+ time.sleep(1)
325
+ screenshot_bytes = page.screenshot(type="png")
326
+ extra = ""
327
+ if task.get("action") == "type":
328
+ extra = f"\nThe exact text to be entered is: \"{task.get('value', '').strip()}\"."
329
+ prompt = (
330
+ f"Tasks executed so far:\n{executed_context}\n\n"
331
+ f"The following task remains: {task.get('description', '')}.{extra}\n"
332
+ "I have annotated the page with numbered overlays using the appropriate query. "
333
+ "Based on the attached screenshot, generate a JSON array of tasks that need to be performed next. "
334
+ "Each task should be a JSON object with the format:\n"
335
+ "[\n"
336
+ " {\n"
337
+ " \"action\": \"click\" or \"type\",\n"
338
+ " \"element_number\": <number>,\n"
339
+ " \"text\": <if action is 'type', the text to type; otherwise an empty string>\n"
340
+ " },\n"
341
+ " ...\n"
342
+ "]\n"
343
+ "Return only the JSON array."
344
+ )
345
+ response_text = self.llm.generate_from_image(prompt, image_bytes=screenshot_bytes)
346
+ self.logger.info(f"LLM response for fallback: {response_text}")
347
+ try:
348
+ fallback_tasks = json.loads(response_text.strip())
349
+ if not isinstance(fallback_tasks, list):
350
+ fallback_tasks = [fallback_tasks]
351
+ except Exception as e:
352
+ json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
353
+ if json_match:
354
+ json_text = json_match.group(1).strip()
355
+ fallback_tasks = json.loads(json_text)
356
+ if not isinstance(fallback_tasks, list):
357
+ fallback_tasks = [fallback_tasks]
358
+ else:
359
+ return {"action": task.get("action"), "success": False, "message": f"Fallback failed to parse JSON: {str(e)}"}
360
+
361
+ fallback_results = []
362
+ for fb_task in fallback_tasks:
363
+ action = fb_task.get("action")
364
+ element_number = fb_task.get("element_number")
365
+ if action == "type":
366
+ returned_text = fb_task.get("text", "").strip()
367
+ original_text = task.get("value", "").strip()
368
+ if returned_text.lower() != original_text.lower():
369
+ self.logger.info("Overriding LLM-provided text with original input text.")
370
+ text = original_text
371
+ else:
372
+ text = returned_text
373
+ else:
374
+ text = fb_task.get("text", "")
375
+ if action == "click":
376
+ self.logger.info(f"LLM indicated fallback click on element number {element_number}.")
377
+ res = self._click_element_by_number(page, element_number)
378
+ elif action == "type":
379
+ candidates = [el for el in page.query_selector_all("input, textarea") if el.is_visible()]
380
+ if element_number - 1 < len(candidates):
381
+ candidate = candidates[element_number - 1]
382
+ candidate.scroll_into_view_if_needed()
383
+ try:
384
+ candidate.fill(text, timeout=self.default_timeout)
385
+ res = {"action": "type", "success": True, "message": f"Typed '{text}' into element number {element_number}"}
386
+ except Exception as ex:
387
+ res = {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
388
+ else:
389
+ res = {"action": "type", "success": False, "message": f"Element number {element_number} not found."}
390
+ else:
391
+ res = {"action": task.get("action"), "success": False, "message": "Invalid fallback action."}
392
+ fallback_results.append(res)
393
+ overall_success = any(r.get("success", False) for r in fallback_results)
394
+ overall_message = "; ".join([r.get("message", "") for r in fallback_results])
395
+ return {"action": task.get("action"), "success": overall_success, "message": overall_message}
396
+
397
+ def _handle_navigation(self, page: Page, url: str) -> Dict[str, Any]:
398
+ if not url.startswith(("http://", "https://")):
399
+ url = f"https://{url}"
400
+ try:
401
+ page.goto(url, timeout=self.default_timeout)
402
+ page.wait_for_selector("body", timeout=self.default_timeout)
403
+ return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
404
+ except PlaywrightTimeoutError as e:
405
+ self.logger.error(f"Navigation to {url} timed out: {e}")
406
+ return {"action": "navigate", "success": False, "message": f"Navigation timed out: {str(e)}"}
407
+ except Exception as e:
408
+ self.logger.error(f"Navigation to {url} failed: {e}")
409
+ return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
410
+
411
+ def _handle_click(self, page: Page, selector: str) -> Dict[str, Any]:
412
+ try:
413
+ page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
414
+ page.click(selector, timeout=self.default_timeout)
415
+ return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
416
+ except PlaywrightTimeoutError as e:
417
+ self.logger.error(f"Click action timed out on selector {selector}: {e}")
418
+ return {"action": "click", "success": False, "message": f"Click timed out: {str(e)}"}
419
+ except Exception as e:
420
+ self.logger.error(f"Click action failed on selector {selector}: {e}")
421
+ return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
422
+
423
+ def _handle_typing(self, page: Page, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
424
+ try:
425
+ page.wait_for_selector(selector, state="attached", timeout=self.default_timeout)
426
+ page.fill(selector, text, timeout=self.default_timeout)
427
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
428
+ except PlaywrightTimeoutError as e:
429
+ self.logger.info("Primary selector failed; using advanced fallback for element detection.")
430
+ element = self._advanced_find_element(page, "search")
431
+ if not element:
432
+ return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
433
+ try:
434
+ element.fill(text, timeout=self.default_timeout)
435
+ return {"action": "type", "success": True, "message": f"Typed '{text}' into fallback element."}
436
+ except Exception as ex:
437
+ return {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
438
+ except Exception as e:
439
+ self.logger.error(f"Typing action failed: {e}")
440
+ return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
441
+
442
+ def _handle_wait(self, seconds: str) -> Dict[str, Any]:
443
+ try:
444
+ wait_time = float(seconds)
445
+ self.logger.info(f"Waiting for {wait_time} seconds")
446
+ time.sleep(wait_time)
447
+ return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
448
+ except ValueError as e:
449
+ self.logger.error(f"Invalid wait time provided: {seconds}")
450
+ return {"action": "wait", "success": False, "message": "Invalid wait time"}
451
+
452
+ def _handle_wait_for_ajax(self, page: Page, seconds: str) -> Dict[str, Any]:
453
+ try:
454
+ timeout_seconds = int(seconds) if seconds.strip() != "" else 30
455
+ self.logger.info(f"Waiting for AJAX/network activity for up to {timeout_seconds} seconds.")
456
+ end_time = time.time() + timeout_seconds
457
+ while time.time() < end_time:
458
+ ajax_complete = page.evaluate("""
459
+ () => {
460
+ return (window.jQuery ? jQuery.active === 0 : true) &&
461
+ (typeof window.fetch === 'function' ? true : true);
462
+ }
463
+ """)
464
+ if ajax_complete:
465
+ break
466
+ time.sleep(0.5)
467
+ return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
468
+ except Exception as e:
469
+ self.logger.error(f"Wait for AJAX failed: {e}")
470
+ return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
471
+
472
+ def _handle_scroll(self, page: Page, selector: str) -> Dict[str, Any]:
473
+ try:
474
+ if selector:
475
+ page.wait_for_selector(selector, timeout=self.default_timeout)
476
+ page.eval_on_selector(selector, "el => el.scrollIntoView({behavior: 'smooth', block: 'center'})")
477
+ scroll_target = selector
478
+ else:
479
+ page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
480
+ scroll_target = "page bottom"
481
+ return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
482
+ except Exception as e:
483
+ self.logger.error(f"Scroll action failed on selector {selector}: {e}")
484
+ return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
485
+
486
+ def _handle_hover(self, page: Page, selector: str) -> Dict[str, Any]:
487
+ try:
488
+ page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
489
+ page.hover(selector, timeout=self.default_timeout)
490
+ return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
491
+ except Exception as e:
492
+ self.logger.error(f"Hover action failed on selector {selector}: {e}")
493
+ return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
494
+
495
+ def _handle_screenshot(self, page: Page, filename: str) -> Dict[str, Any]:
496
+ try:
497
+ page.screenshot(path=filename)
498
+ return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
499
+ except Exception as e:
500
+ self.logger.error(f"Screenshot capture failed: {e}")
501
+ return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
502
+
503
+ def _handle_switch_tab(self, context, value: str) -> Dict[str, Any]:
504
+ try:
505
+ pages = context.pages
506
+ if value.lower() == "new":
507
+ target_page = pages[-1]
508
+ else:
509
+ idx = int(value)
510
+ if idx < len(pages):
511
+ target_page = pages[idx]
512
+ else:
513
+ return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
514
+ return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
515
+ except Exception as e:
516
+ self.logger.error(f"Switch tab failed: {e}")
517
+ return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
518
+
519
+ def _handle_execute_script(self, page: Page, script: str) -> Dict[str, Any]:
520
+ try:
521
+ result = page.evaluate(script)
522
+ return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
523
+ except Exception as e:
524
+ self.logger.error(f"Execute script failed: {e}")
525
+ return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
526
+
527
+ def _handle_drag_and_drop(self, page: Page, source_selector: str, target_selector: str) -> Dict[str, Any]:
528
+ try:
529
+ page.wait_for_selector(source_selector, timeout=self.default_timeout)
530
+ page.wait_for_selector(target_selector, timeout=self.default_timeout)
531
+ source = page.locator(source_selector)
532
+ target = page.locator(target_selector)
533
+ source.drag_to(target, timeout=self.default_timeout)
534
+ return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
535
+ except Exception as e:
536
+ self.logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
537
+ return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
538
+
539
+ def _capture_failure_screenshot(self, page: Page, action: str):
540
+ filename = f"failure_{action}_{int(time.time())}.png"
541
+ try:
542
+ page.screenshot(path=filename)
543
+ self.logger.info(f"Failure screenshot captured: {filename}")
544
+ except Exception as e:
545
+ self.logger.error(f"Failed to capture screenshot: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: semantio
3
- Version: 0.0.6
3
+ Version: 0.0.7
4
4
  Summary: A powerful SDK for building AI agents
5
5
  Home-page: https://github.com/Syenah/semantio
6
6
  Author: Rakesh
@@ -15,7 +15,6 @@ sentence-transformers
15
15
  fuzzywuzzy
16
16
  duckduckgo-search
17
17
  yfinance
18
- selenium
19
18
  beautifulsoup4
20
19
  webdriver-manager
21
20
  validators
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="semantio",
5
- version="0.0.6",
5
+ version="0.0.7",
6
6
  description="A powerful SDK for building AI agents",
7
7
  long_description=open("README.md").read(),
8
8
  long_description_content_type="text/markdown",
@@ -28,7 +28,6 @@ setup(
28
28
  "fuzzywuzzy", # For fuzzy string matching
29
29
  "duckduckgo-search", # For DuckDuckGo search
30
30
  "yfinance", # For stock/crypto prices
31
- "selenium", # For web scraping
32
31
  "beautifulsoup4", # For HTML parsing
33
32
  "webdriver-manager", # For browser automation
34
33
  "validators", # For URL validation
@@ -1,439 +0,0 @@
1
- # web_browser.py
2
- from typing import Dict, Any, List, Optional, Callable
3
- from pydantic import Field, BaseModel
4
- from selenium import webdriver
5
- from selenium.webdriver.common.by import By
6
- from selenium.webdriver.common.action_chains import ActionChains
7
- from selenium.webdriver.remote.webelement import WebElement
8
- from selenium.webdriver.support.ui import WebDriverWait
9
- from selenium.webdriver.support import expected_conditions as EC
10
- from selenium.webdriver.chrome.options import Options
11
- from selenium.webdriver.chrome.service import Service
12
- from webdriver_manager.chrome import ChromeDriverManager
13
- from bs4 import BeautifulSoup
14
- import json
15
- import time
16
- import re
17
- import logging
18
- import os
19
- import difflib
20
- from .base_tool import BaseTool
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
- class BrowserPlan(BaseModel):
25
- tasks: List[Dict[str, Any]] = Field(
26
- ...,
27
- description="List of automation tasks to execute"
28
- )
29
-
30
- class WebBrowserTool(BaseTool):
31
- name: str = Field("WebBrowser", description="Name of the tool")
32
- description: str = Field(
33
- "Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
34
- description="Tool description"
35
- )
36
-
37
- default_timeout: int = 15 # Default wait timeout in seconds
38
- max_retries: int = 3 # Increased maximum retries for any task
39
-
40
- def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
41
- """Execute an advanced dynamic web automation workflow."""
42
- driver = None
43
- overall_start = time.time()
44
- try:
45
- headless = input.get("headless", False)
46
- self.default_timeout = int(input.get("timeout", self.default_timeout))
47
- self.max_retries = int(input.get("max_retries", self.max_retries))
48
- driver = self._init_browser(headless)
49
- results = []
50
- current_url = ""
51
-
52
- plan = self._generate_plan(input.get('query', ''), current_url)
53
- if not plan.tasks:
54
- raise ValueError("No valid tasks in the generated plan.")
55
-
56
- # Dynamic mapping: action name to handler function.
57
- action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
58
- "navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
59
- "click": lambda d, task: self._handle_click(d, task.get("selector", "")),
60
- "type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
61
- "wait": lambda d, task: self._handle_wait(task.get("value", "")),
62
- "wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
63
- "scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
64
- "hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
65
- "screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
66
- "switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
67
- "execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
68
- "drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
69
- }
70
-
71
- for task in plan.tasks:
72
- # Before each action, dismiss modals/overlays.
73
- self._dismiss_unwanted_modals(driver)
74
- action = task.get("action", "").lower()
75
- logger.info(f"Executing task: {task.get('description', action)}")
76
- start_time = time.time()
77
- handler = action_map.get(action)
78
- if not handler:
79
- results.append({
80
- "action": action,
81
- "success": False,
82
- "message": f"Unsupported action: {action}"
83
- })
84
- continue
85
-
86
- result = self._execute_with_retries(driver, task, handler)
87
- elapsed = time.time() - start_time
88
- result["elapsed"] = elapsed
89
- logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
90
- results.append(result)
91
-
92
- if not result.get('success', False):
93
- logger.error(f"Task failed: {result.get('message')}")
94
- self._capture_failure_screenshot(driver, action)
95
- break
96
-
97
- current_url = driver.current_url
98
-
99
- overall_elapsed = time.time() - overall_start
100
- logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
101
- return {"status": "success", "results": results, "total_time": overall_elapsed}
102
-
103
- except Exception as e:
104
- logger.exception("Execution error:")
105
- return {"status": "error", "message": str(e)}
106
- finally:
107
- if driver:
108
- driver.quit()
109
-
110
- def _init_browser(self, headless: bool) -> webdriver.Chrome:
111
- """Initialize browser with advanced options."""
112
- options = Options()
113
- options.add_argument("--start-maximized")
114
- options.add_argument("--disable-blink-features=AutomationControlled")
115
- options.add_experimental_option("excludeSwitches", ["enable-automation"])
116
- if headless:
117
- options.add_argument("--headless=new")
118
- return webdriver.Chrome(
119
- service=Service(ChromeDriverManager().install()),
120
- options=options
121
- )
122
-
123
- def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
124
- """Generate an adaptive execution plan using an LLM or other dynamic planner."""
125
- prompt = f"""Generate browser automation plan for: {query}
126
-
127
- Current URL: {current_url or 'No page loaded yet'}
128
-
129
- Required JSON format:
130
- {{
131
- "tasks": [
132
- {{
133
- "action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
134
- "selector": "CSS selector (optional)",
135
- "value": "input text/URL/seconds/filename/target-selector",
136
- "description": "action purpose"
137
- }}
138
- ]
139
- }}
140
-
141
- Guidelines:
142
- 1. Prefer IDs in selectors (#element-id) and semantic attributes.
143
- 2. Include wait steps after navigation and wait for AJAX where applicable.
144
- 3. Dismiss any modals/pop-ups that are not part of the task.
145
- 4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
146
- 5. For execute_script, 'value' should contain valid JavaScript.
147
- 6. For switch_tab, 'value' should be an index or keyword 'new'.
148
- """
149
- response = self.llm.generate(prompt=prompt)
150
- return self._parse_plan(response)
151
-
152
- def _parse_plan(self, response: str) -> BrowserPlan:
153
- """Robust JSON parsing with multiple fallback strategies."""
154
- try:
155
- json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
156
- if json_match:
157
- plan_data = json.loads(json_match.group(1).strip())
158
- else:
159
- json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
160
- if not json_str_match:
161
- raise ValueError("No JSON object found in the response.")
162
- plan_data = json.loads(json_str_match.group())
163
- validated_tasks = []
164
- for task in plan_data.get("tasks", []):
165
- if not all(key in task for key in ["action", "description"]):
166
- logger.warning(f"Skipping task due to missing keys: {task}")
167
- continue
168
- validated_tasks.append({
169
- "action": task["action"],
170
- "selector": task.get("selector", ""),
171
- "value": task.get("value", ""),
172
- "description": task["description"]
173
- })
174
- return BrowserPlan(tasks=validated_tasks)
175
- except (json.JSONDecodeError, AttributeError, ValueError) as e:
176
- logger.error(f"Plan parsing failed: {e}")
177
- return BrowserPlan(tasks=[])
178
-
179
- def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
180
- handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
181
- """Execute a task with retry logic and exponential backoff."""
182
- attempts = 0
183
- result = {}
184
- while attempts < self.max_retries:
185
- result = self._execute_safe_task(driver, task, handler)
186
- if result.get("success", False):
187
- return result
188
- attempts += 1
189
- logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
190
- time.sleep(1 * attempts)
191
- return result
192
-
193
- def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
194
- handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
195
- """Execute a task with comprehensive error handling."""
196
- try:
197
- return handler(driver, task)
198
- except Exception as e:
199
- action = task.get("action", "unknown")
200
- logger.exception(f"Error executing task '{action}':")
201
- return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
202
-
203
- def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
204
- """
205
- Dismiss or remove unwanted modals, overlays, or pop-ups.
206
- First attempts to click a close button; if not available, removes the element via JS.
207
- """
208
- try:
209
- modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
210
- for selector in modal_selectors:
211
- elements = driver.find_elements(By.CSS_SELECTOR, selector)
212
- for modal in elements:
213
- if modal.is_displayed():
214
- close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
215
- dismissed = False
216
- for close_sel in close_selectors:
217
- try:
218
- close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
219
- if close_button.is_displayed():
220
- close_button.click()
221
- dismissed = True
222
- logger.info(f"Dismissed modal using selector {close_sel}")
223
- time.sleep(1)
224
- break
225
- except Exception:
226
- continue
227
- if not dismissed:
228
- # Remove overlay by setting display to none
229
- driver.execute_script("arguments[0].remove();", modal)
230
- logger.info(f"Removed overlay/modal with selector {selector}")
231
- except Exception as e:
232
- logger.debug(f"Modal dismissal error: {e}")
233
-
234
- def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
235
- """
236
- Advanced fallback for finding an element.
237
- Searches across multiple attributes and inner text using fuzzy matching.
238
- """
239
- candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
240
- best_match = None
241
- best_ratio = 0.0
242
- for candidate in candidates:
243
- combined_text = " ".join([
244
- candidate.get_attribute("id") or "",
245
- candidate.get_attribute("name") or "",
246
- candidate.get_attribute("placeholder") or "",
247
- candidate.get_attribute("aria-label") or "",
248
- candidate.text or "",
249
- ])
250
- ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
251
- if ratio > best_ratio:
252
- best_ratio = ratio
253
- best_match = candidate
254
- if best_ratio > 0.5:
255
- logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
256
- return best_match
257
- return None
258
-
259
- def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
260
- """Handle navigation with URL correction."""
261
- if not url.startswith(("http://", "https://")):
262
- url = f"https://{url}"
263
- try:
264
- driver.get(url)
265
- WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
266
- return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
267
- except Exception as e:
268
- logger.error(f"Navigation to {url} failed: {e}")
269
- return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
270
-
271
- def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
272
- """Handle click actions with fallback using JS if needed."""
273
- try:
274
- element = WebDriverWait(driver, self.default_timeout).until(
275
- EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
276
- )
277
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
278
- try:
279
- element.click()
280
- except Exception:
281
- driver.execute_script("arguments[0].click();", element)
282
- return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
283
- except Exception as e:
284
- logger.error(f"Click action failed on selector {selector}: {e}")
285
- return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
286
-
287
- def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
288
- """
289
- Handle typing into an element.
290
- If the primary selector fails, attempt advanced fallback detection.
291
- """
292
- try:
293
- element = WebDriverWait(driver, self.default_timeout).until(
294
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
295
- )
296
- except Exception as e:
297
- # If the task seems to involve search or similar text, use advanced fallback.
298
- if "search" in task.get("description", "").lower() or "search" in selector.lower():
299
- logger.info("Primary selector failed; using advanced fallback for element detection.")
300
- element = self._advanced_find_element(driver, "search")
301
- if not element:
302
- return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
303
- else:
304
- return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
305
- try:
306
- element.clear()
307
- element.send_keys(text)
308
- return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
309
- except Exception as e:
310
- logger.error(f"Typing action failed: {e}")
311
- return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
312
-
313
- def _handle_wait(self, seconds: str) -> Dict[str, Any]:
314
- """Handle a simple wait."""
315
- try:
316
- wait_time = float(seconds)
317
- logger.info(f"Waiting for {wait_time} seconds")
318
- time.sleep(wait_time)
319
- return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
320
- except ValueError as e:
321
- logger.error(f"Invalid wait time provided: {seconds}")
322
- return {"action": "wait", "success": False, "message": "Invalid wait time"}
323
-
324
- def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
325
- """
326
- Wait until AJAX/network activity has subsided.
327
- This implementation first checks for jQuery, then falls back to a generic check.
328
- """
329
- try:
330
- timeout = int(seconds)
331
- logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
332
- end_time = time.time() + timeout
333
- while time.time() < end_time:
334
- ajax_complete = driver.execute_script("""
335
- return (window.jQuery ? jQuery.active === 0 : true) &&
336
- (typeof window.fetch === 'function' ? true : true);
337
- """)
338
- if ajax_complete:
339
- break
340
- time.sleep(0.5)
341
- return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
342
- except Exception as e:
343
- logger.error(f"Wait for AJAX failed: {e}")
344
- return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
345
-
346
- def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
347
- """Handle scrolling to a specific element or page bottom."""
348
- try:
349
- if selector:
350
- element = WebDriverWait(driver, self.default_timeout).until(
351
- EC.presence_of_element_located((By.CSS_SELECTOR, selector))
352
- )
353
- driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
354
- scroll_target = selector
355
- else:
356
- driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
357
- scroll_target = "page bottom"
358
- return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
359
- except Exception as e:
360
- logger.error(f"Scroll action failed on selector {selector}: {e}")
361
- return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
362
-
363
- def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
364
- """Handle mouse hover action."""
365
- try:
366
- element = WebDriverWait(driver, self.default_timeout).until(
367
- EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
368
- )
369
- ActionChains(driver).move_to_element(element).perform()
370
- return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
371
- except Exception as e:
372
- logger.error(f"Hover action failed on selector {selector}: {e}")
373
- return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
374
-
375
- def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
376
- """Capture a screenshot of the current browser state."""
377
- try:
378
- driver.save_screenshot(filename)
379
- return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
380
- except Exception as e:
381
- logger.error(f"Screenshot capture failed: {e}")
382
- return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
383
-
384
- def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
385
- """
386
- Switch between tabs. 'value' can be an index or the keyword 'new'.
387
- """
388
- try:
389
- handles = driver.window_handles
390
- if value.lower() == "new":
391
- target_handle = handles[-1]
392
- else:
393
- idx = int(value)
394
- if idx < len(handles):
395
- target_handle = handles[idx]
396
- else:
397
- return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
398
- driver.switch_to.window(target_handle)
399
- return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
400
- except Exception as e:
401
- logger.error(f"Switch tab failed: {e}")
402
- return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
403
-
404
- def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
405
- """
406
- Execute arbitrary JavaScript code.
407
- """
408
- try:
409
- result = driver.execute_script(script)
410
- return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
411
- except Exception as e:
412
- logger.error(f"Execute script failed: {e}")
413
- return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
414
-
415
- def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
416
- """
417
- Simulate a drag-and-drop operation.
418
- """
419
- try:
420
- source = WebDriverWait(driver, self.default_timeout).until(
421
- EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
422
- )
423
- target = WebDriverWait(driver, self.default_timeout).until(
424
- EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
425
- )
426
- ActionChains(driver).drag_and_drop(source, target).perform()
427
- return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
428
- except Exception as e:
429
- logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
430
- return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
431
-
432
- def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
433
- """Capture a screenshot for debugging when an error occurs."""
434
- filename = f"failure_{action}_{int(time.time())}.png"
435
- try:
436
- driver.save_screenshot(filename)
437
- logger.info(f"Failure screenshot captured: {filename}")
438
- except Exception as e:
439
- logger.error(f"Failed to capture screenshot: {e}")
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes