semantio 0.0.6__tar.gz → 0.0.7__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {semantio-0.0.6 → semantio-0.0.7}/PKG-INFO +1 -1
- semantio-0.0.7/semantio/tools/web_browser.py +545 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/PKG-INFO +1 -1
- {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/requires.txt +0 -1
- {semantio-0.0.6 → semantio-0.0.7}/setup.py +1 -2
- semantio-0.0.6/semantio/tools/web_browser.py +0 -439
- {semantio-0.0.6 → semantio-0.0.7}/LICENSE +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/README.md +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/agent.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/api/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/api/api_generator.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/api/fastapi_app.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/cli/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/cli/main.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/document_loader.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/retriever.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/knowledge_base/vector_store.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/anthropic.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/base_llm.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/deepseek.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/gemini.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/groq.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/mistral.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/llm/openai.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/memory.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/models.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/rag.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/base_storage.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/cloud_storage.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/in_memory_storage.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/storage/local_storage.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/base_tool.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/crypto.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/duckduckgo.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/tools/stocks.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/__init__.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/config.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/date_utils.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/file_utils.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/logger.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio/utils/validation_utils.py +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/SOURCES.txt +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/dependency_links.txt +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/entry_points.txt +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/semantio.egg-info/top_level.txt +0 -0
- {semantio-0.0.6 → semantio-0.0.7}/setup.cfg +0 -0
@@ -0,0 +1,545 @@
|
|
1
|
+
# web_browser.py
|
2
|
+
from typing import Dict, Any, List, Optional, Callable
|
3
|
+
from pydantic import Field, BaseModel
|
4
|
+
from playwright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError
|
5
|
+
import json, time, re, logging, os, difflib
|
6
|
+
from .base_tool import BaseTool
|
7
|
+
|
8
|
+
# Global logger
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
class BrowserPlan(BaseModel):
|
12
|
+
tasks: List[Dict[str, Any]] = Field(
|
13
|
+
...,
|
14
|
+
description="List of automation tasks to execute"
|
15
|
+
)
|
16
|
+
|
17
|
+
class WebBrowserTool(BaseTool):
|
18
|
+
name: str = Field("WebBrowser", description="Name of the tool")
|
19
|
+
description: str = Field(
|
20
|
+
"Universal web automation tool with advanced element identification (DOM and image fallback), modal analysis, AJAX waiting, multi-tab support, and custom JS injection.",
|
21
|
+
description="Tool description"
|
22
|
+
)
|
23
|
+
default_timeout: int = 15000 # 15 seconds in milliseconds
|
24
|
+
max_retries: int = 3
|
25
|
+
|
26
|
+
def __init__(self, *args, **kwargs):
|
27
|
+
super().__init__(*args, **kwargs)
|
28
|
+
# Bypass Pydantic's restrictions for extra attributes.
|
29
|
+
object.__setattr__(self, "logger", logging.getLogger(__name__))
|
30
|
+
|
31
|
+
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
32
|
+
"""
|
33
|
+
Execute the browser automation workflow.
|
34
|
+
Maintains a context string of executed tasks and passes it to fallback routines.
|
35
|
+
DOES NOT close the browser after successful execution.
|
36
|
+
"""
|
37
|
+
overall_start = time.time()
|
38
|
+
results = [] # to hold summaries of executed tasks (for context)
|
39
|
+
current_url = ""
|
40
|
+
try:
|
41
|
+
headless = input.get("headless", False)
|
42
|
+
self.default_timeout = int(input.get("timeout", 15)) * 1000
|
43
|
+
self.max_retries = int(input.get("max_retries", self.max_retries))
|
44
|
+
plan = self._generate_plan(input.get("query", ""), current_url)
|
45
|
+
if not plan.tasks:
|
46
|
+
raise ValueError("No valid tasks in the generated plan.")
|
47
|
+
|
48
|
+
# Start Playwright without a "with" block so we can leave the browser open.
|
49
|
+
p = sync_playwright().start()
|
50
|
+
browser = p.chromium.launch(headless=headless)
|
51
|
+
context = browser.new_context()
|
52
|
+
page = context.new_page()
|
53
|
+
|
54
|
+
# Map actions to handlers.
|
55
|
+
action_map: Dict[str, Callable[[Page, Dict[str, Any]], Dict[str, Any]]] = {
|
56
|
+
"navigate": lambda p, task: self._handle_navigation(p, task.get("value", "")),
|
57
|
+
"click": lambda p, task: self._handle_click(p, task.get("selector", "")),
|
58
|
+
"type": lambda p, task: self._handle_typing(p, task.get("selector", ""), task.get("value", ""), task),
|
59
|
+
"wait": lambda p, task: self._handle_wait(task.get("value", "")),
|
60
|
+
"wait_for_ajax": lambda p, task: self._handle_wait_for_ajax(p, task.get("value", "")),
|
61
|
+
"scroll": lambda p, task: self._handle_scroll(p, task.get("selector", "")),
|
62
|
+
"hover": lambda p, task: self._handle_hover(p, task.get("selector", "")),
|
63
|
+
"screenshot": lambda p, task: self._handle_screenshot(p, task.get("value", "screenshot.png")),
|
64
|
+
"switch_tab": lambda p, task: self._handle_switch_tab(context, task.get("value", "0")),
|
65
|
+
"execute_script": lambda p, task: self._handle_execute_script(p, task.get("value", "")),
|
66
|
+
"drag_and_drop": lambda p, task: self._handle_drag_and_drop(p, task.get("selector", ""), task.get("value", "")),
|
67
|
+
}
|
68
|
+
|
69
|
+
for task in plan.tasks:
|
70
|
+
self._dismiss_unwanted_modals(page, task_context=task.get("description", ""))
|
71
|
+
action = task.get("action", "").lower()
|
72
|
+
self.logger.info(f"Executing task: {task.get('description', action)}")
|
73
|
+
start_time = time.time()
|
74
|
+
|
75
|
+
# Build a context string from previously executed tasks.
|
76
|
+
executed_context = "\n".join([f"{r['action']}: {r['message']}" for r in results])
|
77
|
+
handler = action_map.get(action)
|
78
|
+
if not handler:
|
79
|
+
results.append({
|
80
|
+
"action": action,
|
81
|
+
"success": False,
|
82
|
+
"message": f"Unsupported action: {action}"
|
83
|
+
})
|
84
|
+
continue
|
85
|
+
|
86
|
+
result = self._execute_with_retries(page, task, handler, executed_context)
|
87
|
+
elapsed = time.time() - start_time
|
88
|
+
result["elapsed"] = elapsed
|
89
|
+
self.logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
90
|
+
results.append(result)
|
91
|
+
|
92
|
+
if not result.get("success", False):
|
93
|
+
self.logger.error(f"Task failed: {result.get('message')}")
|
94
|
+
self._capture_failure_screenshot(page, action)
|
95
|
+
break
|
96
|
+
|
97
|
+
current_url = page.url
|
98
|
+
|
99
|
+
overall_elapsed = time.time() - overall_start
|
100
|
+
self.logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
101
|
+
# Do not close the browser.
|
102
|
+
return {"status": "success", "results": results, "total_time": overall_elapsed}
|
103
|
+
except Exception as e:
|
104
|
+
self.logger.exception("Execution error:")
|
105
|
+
return {"status": "error", "message": str(e)}
|
106
|
+
|
107
|
+
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
108
|
+
prompt = f"""Generate browser automation plan for: {query}
|
109
|
+
|
110
|
+
Current URL: {current_url or 'No page loaded yet'}
|
111
|
+
|
112
|
+
Required JSON format:
|
113
|
+
{{
|
114
|
+
"tasks": [
|
115
|
+
{{
|
116
|
+
"action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
|
117
|
+
"selector": "CSS selector (optional)",
|
118
|
+
"value": "input text/URL/seconds/filename/target-selector",
|
119
|
+
"description": "action purpose"
|
120
|
+
}}
|
121
|
+
]
|
122
|
+
}}
|
123
|
+
|
124
|
+
Guidelines:
|
125
|
+
1. Prefer IDs in selectors (#element-id) and semantic attributes.
|
126
|
+
2. Include wait steps after navigation and wait for AJAX where applicable.
|
127
|
+
3. Dismiss any modals/pop-ups that are not part of the task.
|
128
|
+
4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
|
129
|
+
5. For execute_script, 'value' should contain valid JavaScript.
|
130
|
+
6. For switch_tab, 'value' should be an index or keyword 'new'.
|
131
|
+
"""
|
132
|
+
response = self.llm.generate(prompt=prompt)
|
133
|
+
return self._parse_plan(response)
|
134
|
+
|
135
|
+
def _parse_plan(self, response: str) -> BrowserPlan:
|
136
|
+
try:
|
137
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
138
|
+
if json_match:
|
139
|
+
plan_data = json.loads(json_match.group(1).strip())
|
140
|
+
else:
|
141
|
+
json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
|
142
|
+
if not json_str_match:
|
143
|
+
raise ValueError("No JSON object found in the response.")
|
144
|
+
plan_data = json.loads(json_str_match.group())
|
145
|
+
validated_tasks = []
|
146
|
+
for task in plan_data.get("tasks", []):
|
147
|
+
if not all(key in task for key in ["action", "description"]):
|
148
|
+
self.logger.warning(f"Skipping task due to missing keys: {task}")
|
149
|
+
continue
|
150
|
+
validated_tasks.append({
|
151
|
+
"action": task["action"],
|
152
|
+
"selector": task.get("selector", ""),
|
153
|
+
"value": task.get("value", ""),
|
154
|
+
"description": task["description"]
|
155
|
+
})
|
156
|
+
return BrowserPlan(tasks=validated_tasks)
|
157
|
+
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
158
|
+
self.logger.error(f"Plan parsing failed: {e}")
|
159
|
+
return BrowserPlan(tasks=[])
|
160
|
+
|
161
|
+
def _execute_with_retries(self, page: Page, task: Dict[str, Any],
|
162
|
+
handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]],
|
163
|
+
executed_context: str = "") -> Dict[str, Any]:
|
164
|
+
"""Execute a task with retry logic. If it fails, pass the executed_context to the fallback prompt.
|
165
|
+
The fallback now returns a JSON array of tasks, which are executed sequentially."""
|
166
|
+
attempts = 0
|
167
|
+
result = {}
|
168
|
+
while attempts < self.max_retries:
|
169
|
+
result = self._execute_safe_task(page, task, handler)
|
170
|
+
if result.get("success", False):
|
171
|
+
return result
|
172
|
+
attempts += 1
|
173
|
+
self.logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
174
|
+
time.sleep(1 * attempts)
|
175
|
+
if task.get("action") in ["click", "type"]:
|
176
|
+
self.logger.info("HTML-based automation failed. Using fallback with image-based LLM.")
|
177
|
+
result = self._fallback_with_image_llm(page, task, executed_context)
|
178
|
+
return result
|
179
|
+
|
180
|
+
def _execute_safe_task(self, page: Page, task: Dict[str, Any],
|
181
|
+
handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
182
|
+
try:
|
183
|
+
return handler(page, task)
|
184
|
+
except Exception as e:
|
185
|
+
action = task.get("action", "unknown")
|
186
|
+
self.logger.exception(f"Error executing task '{action}':")
|
187
|
+
return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
|
188
|
+
|
189
|
+
def _dismiss_unwanted_modals(self, page: Page, task_context: str = ""):
|
190
|
+
modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
|
191
|
+
for selector in modal_selectors:
|
192
|
+
elements = page.query_selector_all(selector)
|
193
|
+
for modal in elements:
|
194
|
+
if modal.is_visible():
|
195
|
+
self._handle_modal(page, modal, task_context)
|
196
|
+
|
197
|
+
def _handle_modal(self, page: Page, modal_element, task_context: str):
|
198
|
+
try:
|
199
|
+
modal_screenshot = modal_element.screenshot()
|
200
|
+
prompt = (
|
201
|
+
f"A modal is displayed on the page. The content is visible in the attached image. "
|
202
|
+
f"The current task context is: \"{task_context}\". "
|
203
|
+
"Based on the content of the modal and the task context, decide whether to dismiss the modal. "
|
204
|
+
"Return a JSON response in the format: { \"action\": \"dismiss\" } to dismiss or { \"action\": \"ignore\" } to leave it. "
|
205
|
+
"Return only the JSON."
|
206
|
+
)
|
207
|
+
response_text = self.llm.generate_from_image(prompt, image_bytes=modal_screenshot)
|
208
|
+
self.logger.info(f"LLM response for modal analysis: {response_text}")
|
209
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
|
210
|
+
json_text = json_match.group(1).strip() if json_match else response_text.strip()
|
211
|
+
decision = json.loads(json_text)
|
212
|
+
if decision.get("action") == "dismiss":
|
213
|
+
close_buttons = modal_element.query_selector_all(".close, .btn-close, [aria-label='Close'], [data-dismiss='modal']")
|
214
|
+
for btn in close_buttons:
|
215
|
+
if btn.is_visible():
|
216
|
+
btn.click()
|
217
|
+
self.logger.info("Modal dismissed using a close button.")
|
218
|
+
return
|
219
|
+
page.evaluate("(modal) => modal.remove()", modal_element)
|
220
|
+
self.logger.info("Modal dismissed by removal.")
|
221
|
+
else:
|
222
|
+
self.logger.info("Modal left intact according to LLM analysis.")
|
223
|
+
except Exception as e:
|
224
|
+
self.logger.error(f"Modal handling error: {e}")
|
225
|
+
|
226
|
+
def _advanced_find_element(self, page: Page, keyword: str):
|
227
|
+
try:
|
228
|
+
candidates = page.query_selector_all("input, textarea, button, a, div")
|
229
|
+
best_match = None
|
230
|
+
best_ratio = 0.0
|
231
|
+
for candidate in candidates:
|
232
|
+
attrs = page.evaluate(
|
233
|
+
"""(el) => {
|
234
|
+
return {
|
235
|
+
id: el.id,
|
236
|
+
name: el.getAttribute('name'),
|
237
|
+
placeholder: el.getAttribute('placeholder'),
|
238
|
+
aria: el.getAttribute('aria-label'),
|
239
|
+
text: el.innerText
|
240
|
+
};
|
241
|
+
}""",
|
242
|
+
candidate,
|
243
|
+
)
|
244
|
+
combined_text = " ".join(
|
245
|
+
filter(None, [
|
246
|
+
attrs.get("id"),
|
247
|
+
attrs.get("name"),
|
248
|
+
attrs.get("placeholder"),
|
249
|
+
attrs.get("aria"),
|
250
|
+
attrs.get("text"),
|
251
|
+
])
|
252
|
+
)
|
253
|
+
ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
|
254
|
+
if ratio > best_ratio:
|
255
|
+
best_ratio = ratio
|
256
|
+
best_match = candidate
|
257
|
+
if best_ratio > 0.5:
|
258
|
+
self.logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
|
259
|
+
return best_match
|
260
|
+
return None
|
261
|
+
except Exception as e:
|
262
|
+
self.logger.error(f"Advanced find element error: {e}")
|
263
|
+
return None
|
264
|
+
|
265
|
+
def _annotate_page_with_numbers(self, page: Page, query: str = "button, a, input, [onclick]"):
|
266
|
+
script = f"""
|
267
|
+
(() => {{
|
268
|
+
document.querySelectorAll('.automation-annotation-overlay').forEach(el => el.remove());
|
269
|
+
const elements = document.querySelectorAll('{query}');
|
270
|
+
let counter = 1;
|
271
|
+
elements.forEach(el => {{
|
272
|
+
const rect = el.getBoundingClientRect();
|
273
|
+
if (rect.width === 0 || rect.height === 0) return;
|
274
|
+
const overlay = document.createElement('div');
|
275
|
+
overlay.classList.add('automation-annotation-overlay');
|
276
|
+
overlay.style.position = 'absolute';
|
277
|
+
overlay.style.left = (rect.left + window.scrollX) + 'px';
|
278
|
+
overlay.style.top = (rect.top + window.scrollY) + 'px';
|
279
|
+
overlay.style.width = rect.width + 'px';
|
280
|
+
overlay.style.height = rect.height + 'px';
|
281
|
+
overlay.style.border = '2px solid red';
|
282
|
+
overlay.style.zIndex = 9999;
|
283
|
+
overlay.style.pointerEvents = 'none';
|
284
|
+
overlay.textContent = counter;
|
285
|
+
overlay.style.fontSize = '16px';
|
286
|
+
overlay.style.fontWeight = 'bold';
|
287
|
+
overlay.style.color = 'red';
|
288
|
+
overlay.style.backgroundColor = 'rgba(255, 255, 255, 0.7)';
|
289
|
+
document.body.appendChild(overlay);
|
290
|
+
counter += 1;
|
291
|
+
}});
|
292
|
+
}})();
|
293
|
+
"""
|
294
|
+
page.evaluate(script)
|
295
|
+
|
296
|
+
def _click_element_by_number(self, page: Page, number: int) -> Dict[str, Any]:
|
297
|
+
candidates = [el for el in page.query_selector_all("button, a, input, [onclick]") if el.is_visible()]
|
298
|
+
index = number - 1
|
299
|
+
if index < len(candidates):
|
300
|
+
candidate = candidates[index]
|
301
|
+
candidate.scroll_into_view_if_needed()
|
302
|
+
try:
|
303
|
+
candidate.click()
|
304
|
+
return {"action": "click", "success": True, "message": f"Clicked element number {number}"}
|
305
|
+
except Exception as e:
|
306
|
+
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
307
|
+
else:
|
308
|
+
return {"action": "click", "success": False, "message": f"Element number {number} not found."}
|
309
|
+
|
310
|
+
def _fallback_with_image_llm(self, page: Page, task: Dict[str, Any], executed_context: str = "") -> Dict[str, Any]:
|
311
|
+
"""
|
312
|
+
Fallback method: Annotate the page, capture a screenshot, and ask the LLM (via image analysis)
|
313
|
+
to generate a JSON array of tasks for the next steps.
|
314
|
+
Each fallback task is an object:
|
315
|
+
{
|
316
|
+
"action": "click" or "type",
|
317
|
+
"element_number": <number>,
|
318
|
+
"text": <if action is 'type', the text to type; otherwise an empty string>
|
319
|
+
}
|
320
|
+
The prompt includes the executed_context.
|
321
|
+
"""
|
322
|
+
query = "input, textarea" if task.get("action") == "type" else "button, a, input, [onclick]"
|
323
|
+
self._annotate_page_with_numbers(page, query=query)
|
324
|
+
time.sleep(1)
|
325
|
+
screenshot_bytes = page.screenshot(type="png")
|
326
|
+
extra = ""
|
327
|
+
if task.get("action") == "type":
|
328
|
+
extra = f"\nThe exact text to be entered is: \"{task.get('value', '').strip()}\"."
|
329
|
+
prompt = (
|
330
|
+
f"Tasks executed so far:\n{executed_context}\n\n"
|
331
|
+
f"The following task remains: {task.get('description', '')}.{extra}\n"
|
332
|
+
"I have annotated the page with numbered overlays using the appropriate query. "
|
333
|
+
"Based on the attached screenshot, generate a JSON array of tasks that need to be performed next. "
|
334
|
+
"Each task should be a JSON object with the format:\n"
|
335
|
+
"[\n"
|
336
|
+
" {\n"
|
337
|
+
" \"action\": \"click\" or \"type\",\n"
|
338
|
+
" \"element_number\": <number>,\n"
|
339
|
+
" \"text\": <if action is 'type', the text to type; otherwise an empty string>\n"
|
340
|
+
" },\n"
|
341
|
+
" ...\n"
|
342
|
+
"]\n"
|
343
|
+
"Return only the JSON array."
|
344
|
+
)
|
345
|
+
response_text = self.llm.generate_from_image(prompt, image_bytes=screenshot_bytes)
|
346
|
+
self.logger.info(f"LLM response for fallback: {response_text}")
|
347
|
+
try:
|
348
|
+
fallback_tasks = json.loads(response_text.strip())
|
349
|
+
if not isinstance(fallback_tasks, list):
|
350
|
+
fallback_tasks = [fallback_tasks]
|
351
|
+
except Exception as e:
|
352
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
|
353
|
+
if json_match:
|
354
|
+
json_text = json_match.group(1).strip()
|
355
|
+
fallback_tasks = json.loads(json_text)
|
356
|
+
if not isinstance(fallback_tasks, list):
|
357
|
+
fallback_tasks = [fallback_tasks]
|
358
|
+
else:
|
359
|
+
return {"action": task.get("action"), "success": False, "message": f"Fallback failed to parse JSON: {str(e)}"}
|
360
|
+
|
361
|
+
fallback_results = []
|
362
|
+
for fb_task in fallback_tasks:
|
363
|
+
action = fb_task.get("action")
|
364
|
+
element_number = fb_task.get("element_number")
|
365
|
+
if action == "type":
|
366
|
+
returned_text = fb_task.get("text", "").strip()
|
367
|
+
original_text = task.get("value", "").strip()
|
368
|
+
if returned_text.lower() != original_text.lower():
|
369
|
+
self.logger.info("Overriding LLM-provided text with original input text.")
|
370
|
+
text = original_text
|
371
|
+
else:
|
372
|
+
text = returned_text
|
373
|
+
else:
|
374
|
+
text = fb_task.get("text", "")
|
375
|
+
if action == "click":
|
376
|
+
self.logger.info(f"LLM indicated fallback click on element number {element_number}.")
|
377
|
+
res = self._click_element_by_number(page, element_number)
|
378
|
+
elif action == "type":
|
379
|
+
candidates = [el for el in page.query_selector_all("input, textarea") if el.is_visible()]
|
380
|
+
if element_number - 1 < len(candidates):
|
381
|
+
candidate = candidates[element_number - 1]
|
382
|
+
candidate.scroll_into_view_if_needed()
|
383
|
+
try:
|
384
|
+
candidate.fill(text, timeout=self.default_timeout)
|
385
|
+
res = {"action": "type", "success": True, "message": f"Typed '{text}' into element number {element_number}"}
|
386
|
+
except Exception as ex:
|
387
|
+
res = {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
|
388
|
+
else:
|
389
|
+
res = {"action": "type", "success": False, "message": f"Element number {element_number} not found."}
|
390
|
+
else:
|
391
|
+
res = {"action": task.get("action"), "success": False, "message": "Invalid fallback action."}
|
392
|
+
fallback_results.append(res)
|
393
|
+
overall_success = any(r.get("success", False) for r in fallback_results)
|
394
|
+
overall_message = "; ".join([r.get("message", "") for r in fallback_results])
|
395
|
+
return {"action": task.get("action"), "success": overall_success, "message": overall_message}
|
396
|
+
|
397
|
+
def _handle_navigation(self, page: Page, url: str) -> Dict[str, Any]:
|
398
|
+
if not url.startswith(("http://", "https://")):
|
399
|
+
url = f"https://{url}"
|
400
|
+
try:
|
401
|
+
page.goto(url, timeout=self.default_timeout)
|
402
|
+
page.wait_for_selector("body", timeout=self.default_timeout)
|
403
|
+
return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
|
404
|
+
except PlaywrightTimeoutError as e:
|
405
|
+
self.logger.error(f"Navigation to {url} timed out: {e}")
|
406
|
+
return {"action": "navigate", "success": False, "message": f"Navigation timed out: {str(e)}"}
|
407
|
+
except Exception as e:
|
408
|
+
self.logger.error(f"Navigation to {url} failed: {e}")
|
409
|
+
return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
|
410
|
+
|
411
|
+
def _handle_click(self, page: Page, selector: str) -> Dict[str, Any]:
|
412
|
+
try:
|
413
|
+
page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
|
414
|
+
page.click(selector, timeout=self.default_timeout)
|
415
|
+
return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
|
416
|
+
except PlaywrightTimeoutError as e:
|
417
|
+
self.logger.error(f"Click action timed out on selector {selector}: {e}")
|
418
|
+
return {"action": "click", "success": False, "message": f"Click timed out: {str(e)}"}
|
419
|
+
except Exception as e:
|
420
|
+
self.logger.error(f"Click action failed on selector {selector}: {e}")
|
421
|
+
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
422
|
+
|
423
|
+
def _handle_typing(self, page: Page, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
|
424
|
+
try:
|
425
|
+
page.wait_for_selector(selector, state="attached", timeout=self.default_timeout)
|
426
|
+
page.fill(selector, text, timeout=self.default_timeout)
|
427
|
+
return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
|
428
|
+
except PlaywrightTimeoutError as e:
|
429
|
+
self.logger.info("Primary selector failed; using advanced fallback for element detection.")
|
430
|
+
element = self._advanced_find_element(page, "search")
|
431
|
+
if not element:
|
432
|
+
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
433
|
+
try:
|
434
|
+
element.fill(text, timeout=self.default_timeout)
|
435
|
+
return {"action": "type", "success": True, "message": f"Typed '{text}' into fallback element."}
|
436
|
+
except Exception as ex:
|
437
|
+
return {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
|
438
|
+
except Exception as e:
|
439
|
+
self.logger.error(f"Typing action failed: {e}")
|
440
|
+
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
441
|
+
|
442
|
+
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
443
|
+
try:
|
444
|
+
wait_time = float(seconds)
|
445
|
+
self.logger.info(f"Waiting for {wait_time} seconds")
|
446
|
+
time.sleep(wait_time)
|
447
|
+
return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
|
448
|
+
except ValueError as e:
|
449
|
+
self.logger.error(f"Invalid wait time provided: {seconds}")
|
450
|
+
return {"action": "wait", "success": False, "message": "Invalid wait time"}
|
451
|
+
|
452
|
+
def _handle_wait_for_ajax(self, page: Page, seconds: str) -> Dict[str, Any]:
|
453
|
+
try:
|
454
|
+
timeout_seconds = int(seconds) if seconds.strip() != "" else 30
|
455
|
+
self.logger.info(f"Waiting for AJAX/network activity for up to {timeout_seconds} seconds.")
|
456
|
+
end_time = time.time() + timeout_seconds
|
457
|
+
while time.time() < end_time:
|
458
|
+
ajax_complete = page.evaluate("""
|
459
|
+
() => {
|
460
|
+
return (window.jQuery ? jQuery.active === 0 : true) &&
|
461
|
+
(typeof window.fetch === 'function' ? true : true);
|
462
|
+
}
|
463
|
+
""")
|
464
|
+
if ajax_complete:
|
465
|
+
break
|
466
|
+
time.sleep(0.5)
|
467
|
+
return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
|
468
|
+
except Exception as e:
|
469
|
+
self.logger.error(f"Wait for AJAX failed: {e}")
|
470
|
+
return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
|
471
|
+
|
472
|
+
def _handle_scroll(self, page: Page, selector: str) -> Dict[str, Any]:
|
473
|
+
try:
|
474
|
+
if selector:
|
475
|
+
page.wait_for_selector(selector, timeout=self.default_timeout)
|
476
|
+
page.eval_on_selector(selector, "el => el.scrollIntoView({behavior: 'smooth', block: 'center'})")
|
477
|
+
scroll_target = selector
|
478
|
+
else:
|
479
|
+
page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
480
|
+
scroll_target = "page bottom"
|
481
|
+
return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
|
482
|
+
except Exception as e:
|
483
|
+
self.logger.error(f"Scroll action failed on selector {selector}: {e}")
|
484
|
+
return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
|
485
|
+
|
486
|
+
def _handle_hover(self, page: Page, selector: str) -> Dict[str, Any]:
|
487
|
+
try:
|
488
|
+
page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
|
489
|
+
page.hover(selector, timeout=self.default_timeout)
|
490
|
+
return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
|
491
|
+
except Exception as e:
|
492
|
+
self.logger.error(f"Hover action failed on selector {selector}: {e}")
|
493
|
+
return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
|
494
|
+
|
495
|
+
def _handle_screenshot(self, page: Page, filename: str) -> Dict[str, Any]:
|
496
|
+
try:
|
497
|
+
page.screenshot(path=filename)
|
498
|
+
return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
|
499
|
+
except Exception as e:
|
500
|
+
self.logger.error(f"Screenshot capture failed: {e}")
|
501
|
+
return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
|
502
|
+
|
503
|
+
def _handle_switch_tab(self, context, value: str) -> Dict[str, Any]:
|
504
|
+
try:
|
505
|
+
pages = context.pages
|
506
|
+
if value.lower() == "new":
|
507
|
+
target_page = pages[-1]
|
508
|
+
else:
|
509
|
+
idx = int(value)
|
510
|
+
if idx < len(pages):
|
511
|
+
target_page = pages[idx]
|
512
|
+
else:
|
513
|
+
return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
|
514
|
+
return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
|
515
|
+
except Exception as e:
|
516
|
+
self.logger.error(f"Switch tab failed: {e}")
|
517
|
+
return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
|
518
|
+
|
519
|
+
def _handle_execute_script(self, page: Page, script: str) -> Dict[str, Any]:
|
520
|
+
try:
|
521
|
+
result = page.evaluate(script)
|
522
|
+
return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
|
523
|
+
except Exception as e:
|
524
|
+
self.logger.error(f"Execute script failed: {e}")
|
525
|
+
return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
|
526
|
+
|
527
|
+
def _handle_drag_and_drop(self, page: Page, source_selector: str, target_selector: str) -> Dict[str, Any]:
|
528
|
+
try:
|
529
|
+
page.wait_for_selector(source_selector, timeout=self.default_timeout)
|
530
|
+
page.wait_for_selector(target_selector, timeout=self.default_timeout)
|
531
|
+
source = page.locator(source_selector)
|
532
|
+
target = page.locator(target_selector)
|
533
|
+
source.drag_to(target, timeout=self.default_timeout)
|
534
|
+
return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
|
535
|
+
except Exception as e:
|
536
|
+
self.logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
537
|
+
return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
|
538
|
+
|
539
|
+
def _capture_failure_screenshot(self, page: Page, action: str):
|
540
|
+
filename = f"failure_{action}_{int(time.time())}.png"
|
541
|
+
try:
|
542
|
+
page.screenshot(path=filename)
|
543
|
+
self.logger.info(f"Failure screenshot captured: {filename}")
|
544
|
+
except Exception as e:
|
545
|
+
self.logger.error(f"Failed to capture screenshot: {e}")
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
2
2
|
|
3
3
|
setup(
|
4
4
|
name="semantio",
|
5
|
-
version="0.0.
|
5
|
+
version="0.0.7",
|
6
6
|
description="A powerful SDK for building AI agents",
|
7
7
|
long_description=open("README.md").read(),
|
8
8
|
long_description_content_type="text/markdown",
|
@@ -28,7 +28,6 @@ setup(
|
|
28
28
|
"fuzzywuzzy", # For fuzzy string matching
|
29
29
|
"duckduckgo-search", # For DuckDuckGo search
|
30
30
|
"yfinance", # For stock/crypto prices
|
31
|
-
"selenium", # For web scraping
|
32
31
|
"beautifulsoup4", # For HTML parsing
|
33
32
|
"webdriver-manager", # For browser automation
|
34
33
|
"validators", # For URL validation
|
@@ -1,439 +0,0 @@
|
|
1
|
-
# web_browser.py
|
2
|
-
from typing import Dict, Any, List, Optional, Callable
|
3
|
-
from pydantic import Field, BaseModel
|
4
|
-
from selenium import webdriver
|
5
|
-
from selenium.webdriver.common.by import By
|
6
|
-
from selenium.webdriver.common.action_chains import ActionChains
|
7
|
-
from selenium.webdriver.remote.webelement import WebElement
|
8
|
-
from selenium.webdriver.support.ui import WebDriverWait
|
9
|
-
from selenium.webdriver.support import expected_conditions as EC
|
10
|
-
from selenium.webdriver.chrome.options import Options
|
11
|
-
from selenium.webdriver.chrome.service import Service
|
12
|
-
from webdriver_manager.chrome import ChromeDriverManager
|
13
|
-
from bs4 import BeautifulSoup
|
14
|
-
import json
|
15
|
-
import time
|
16
|
-
import re
|
17
|
-
import logging
|
18
|
-
import os
|
19
|
-
import difflib
|
20
|
-
from .base_tool import BaseTool
|
21
|
-
|
22
|
-
logger = logging.getLogger(__name__)
|
23
|
-
|
24
|
-
class BrowserPlan(BaseModel):
|
25
|
-
tasks: List[Dict[str, Any]] = Field(
|
26
|
-
...,
|
27
|
-
description="List of automation tasks to execute"
|
28
|
-
)
|
29
|
-
|
30
|
-
class WebBrowserTool(BaseTool):
|
31
|
-
name: str = Field("WebBrowser", description="Name of the tool")
|
32
|
-
description: str = Field(
|
33
|
-
"Highly advanced universal web automation tool with advanced element identification, AJAX waiting, modal dismissal, multi-tab support, and custom JS injection.",
|
34
|
-
description="Tool description"
|
35
|
-
)
|
36
|
-
|
37
|
-
default_timeout: int = 15 # Default wait timeout in seconds
|
38
|
-
max_retries: int = 3 # Increased maximum retries for any task
|
39
|
-
|
40
|
-
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
41
|
-
"""Execute an advanced dynamic web automation workflow."""
|
42
|
-
driver = None
|
43
|
-
overall_start = time.time()
|
44
|
-
try:
|
45
|
-
headless = input.get("headless", False)
|
46
|
-
self.default_timeout = int(input.get("timeout", self.default_timeout))
|
47
|
-
self.max_retries = int(input.get("max_retries", self.max_retries))
|
48
|
-
driver = self._init_browser(headless)
|
49
|
-
results = []
|
50
|
-
current_url = ""
|
51
|
-
|
52
|
-
plan = self._generate_plan(input.get('query', ''), current_url)
|
53
|
-
if not plan.tasks:
|
54
|
-
raise ValueError("No valid tasks in the generated plan.")
|
55
|
-
|
56
|
-
# Dynamic mapping: action name to handler function.
|
57
|
-
action_map: Dict[str, Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]] = {
|
58
|
-
"navigate": lambda d, task: self._handle_navigation(d, task.get("value", "")),
|
59
|
-
"click": lambda d, task: self._handle_click(d, task.get("selector", "")),
|
60
|
-
"type": lambda d, task: self._handle_typing(d, task.get("selector", ""), task.get("value", ""), task),
|
61
|
-
"wait": lambda d, task: self._handle_wait(task.get("value", "")),
|
62
|
-
"wait_for_ajax": lambda d, task: self._handle_wait_for_ajax(d, task.get("value", "30")),
|
63
|
-
"scroll": lambda d, task: self._handle_scroll(d, task.get("selector", "")),
|
64
|
-
"hover": lambda d, task: self._handle_hover(d, task.get("selector", "")),
|
65
|
-
"screenshot": lambda d, task: self._handle_screenshot(d, task.get("value", "screenshot.png")),
|
66
|
-
"switch_tab": lambda d, task: self._handle_switch_tab(d, task.get("value", "0")),
|
67
|
-
"execute_script": lambda d, task: self._handle_execute_script(d, task.get("value", "")),
|
68
|
-
"drag_and_drop": lambda d, task: self._handle_drag_and_drop(d, task.get("selector", ""), task.get("value", "")),
|
69
|
-
}
|
70
|
-
|
71
|
-
for task in plan.tasks:
|
72
|
-
# Before each action, dismiss modals/overlays.
|
73
|
-
self._dismiss_unwanted_modals(driver)
|
74
|
-
action = task.get("action", "").lower()
|
75
|
-
logger.info(f"Executing task: {task.get('description', action)}")
|
76
|
-
start_time = time.time()
|
77
|
-
handler = action_map.get(action)
|
78
|
-
if not handler:
|
79
|
-
results.append({
|
80
|
-
"action": action,
|
81
|
-
"success": False,
|
82
|
-
"message": f"Unsupported action: {action}"
|
83
|
-
})
|
84
|
-
continue
|
85
|
-
|
86
|
-
result = self._execute_with_retries(driver, task, handler)
|
87
|
-
elapsed = time.time() - start_time
|
88
|
-
result["elapsed"] = elapsed
|
89
|
-
logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
90
|
-
results.append(result)
|
91
|
-
|
92
|
-
if not result.get('success', False):
|
93
|
-
logger.error(f"Task failed: {result.get('message')}")
|
94
|
-
self._capture_failure_screenshot(driver, action)
|
95
|
-
break
|
96
|
-
|
97
|
-
current_url = driver.current_url
|
98
|
-
|
99
|
-
overall_elapsed = time.time() - overall_start
|
100
|
-
logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
101
|
-
return {"status": "success", "results": results, "total_time": overall_elapsed}
|
102
|
-
|
103
|
-
except Exception as e:
|
104
|
-
logger.exception("Execution error:")
|
105
|
-
return {"status": "error", "message": str(e)}
|
106
|
-
finally:
|
107
|
-
if driver:
|
108
|
-
driver.quit()
|
109
|
-
|
110
|
-
def _init_browser(self, headless: bool) -> webdriver.Chrome:
|
111
|
-
"""Initialize browser with advanced options."""
|
112
|
-
options = Options()
|
113
|
-
options.add_argument("--start-maximized")
|
114
|
-
options.add_argument("--disable-blink-features=AutomationControlled")
|
115
|
-
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
116
|
-
if headless:
|
117
|
-
options.add_argument("--headless=new")
|
118
|
-
return webdriver.Chrome(
|
119
|
-
service=Service(ChromeDriverManager().install()),
|
120
|
-
options=options
|
121
|
-
)
|
122
|
-
|
123
|
-
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
124
|
-
"""Generate an adaptive execution plan using an LLM or other dynamic planner."""
|
125
|
-
prompt = f"""Generate browser automation plan for: {query}
|
126
|
-
|
127
|
-
Current URL: {current_url or 'No page loaded yet'}
|
128
|
-
|
129
|
-
Required JSON format:
|
130
|
-
{{
|
131
|
-
"tasks": [
|
132
|
-
{{
|
133
|
-
"action": "navigate|click|type|wait|wait_for_ajax|scroll|hover|screenshot|switch_tab|execute_script|drag_and_drop",
|
134
|
-
"selector": "CSS selector (optional)",
|
135
|
-
"value": "input text/URL/seconds/filename/target-selector",
|
136
|
-
"description": "action purpose"
|
137
|
-
}}
|
138
|
-
]
|
139
|
-
}}
|
140
|
-
|
141
|
-
Guidelines:
|
142
|
-
1. Prefer IDs in selectors (#element-id) and semantic attributes.
|
143
|
-
2. Include wait steps after navigation and wait for AJAX where applicable.
|
144
|
-
3. Dismiss any modals/pop-ups that are not part of the task.
|
145
|
-
4. For drag_and_drop, use source selector in 'selector' and target selector in 'value'.
|
146
|
-
5. For execute_script, 'value' should contain valid JavaScript.
|
147
|
-
6. For switch_tab, 'value' should be an index or keyword 'new'.
|
148
|
-
"""
|
149
|
-
response = self.llm.generate(prompt=prompt)
|
150
|
-
return self._parse_plan(response)
|
151
|
-
|
152
|
-
def _parse_plan(self, response: str) -> BrowserPlan:
|
153
|
-
"""Robust JSON parsing with multiple fallback strategies."""
|
154
|
-
try:
|
155
|
-
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
156
|
-
if json_match:
|
157
|
-
plan_data = json.loads(json_match.group(1).strip())
|
158
|
-
else:
|
159
|
-
json_str_match = re.search(r'\{.*\}', response, re.DOTALL)
|
160
|
-
if not json_str_match:
|
161
|
-
raise ValueError("No JSON object found in the response.")
|
162
|
-
plan_data = json.loads(json_str_match.group())
|
163
|
-
validated_tasks = []
|
164
|
-
for task in plan_data.get("tasks", []):
|
165
|
-
if not all(key in task for key in ["action", "description"]):
|
166
|
-
logger.warning(f"Skipping task due to missing keys: {task}")
|
167
|
-
continue
|
168
|
-
validated_tasks.append({
|
169
|
-
"action": task["action"],
|
170
|
-
"selector": task.get("selector", ""),
|
171
|
-
"value": task.get("value", ""),
|
172
|
-
"description": task["description"]
|
173
|
-
})
|
174
|
-
return BrowserPlan(tasks=validated_tasks)
|
175
|
-
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
176
|
-
logger.error(f"Plan parsing failed: {e}")
|
177
|
-
return BrowserPlan(tasks=[])
|
178
|
-
|
179
|
-
def _execute_with_retries(self, driver: webdriver.Chrome, task: Dict[str, Any],
|
180
|
-
handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
181
|
-
"""Execute a task with retry logic and exponential backoff."""
|
182
|
-
attempts = 0
|
183
|
-
result = {}
|
184
|
-
while attempts < self.max_retries:
|
185
|
-
result = self._execute_safe_task(driver, task, handler)
|
186
|
-
if result.get("success", False):
|
187
|
-
return result
|
188
|
-
attempts += 1
|
189
|
-
logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
190
|
-
time.sleep(1 * attempts)
|
191
|
-
return result
|
192
|
-
|
193
|
-
def _execute_safe_task(self, driver: webdriver.Chrome, task: Dict[str, Any],
|
194
|
-
handler: Callable[[webdriver.Chrome, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
195
|
-
"""Execute a task with comprehensive error handling."""
|
196
|
-
try:
|
197
|
-
return handler(driver, task)
|
198
|
-
except Exception as e:
|
199
|
-
action = task.get("action", "unknown")
|
200
|
-
logger.exception(f"Error executing task '{action}':")
|
201
|
-
return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
|
202
|
-
|
203
|
-
def _dismiss_unwanted_modals(self, driver: webdriver.Chrome):
|
204
|
-
"""
|
205
|
-
Dismiss or remove unwanted modals, overlays, or pop-ups.
|
206
|
-
First attempts to click a close button; if not available, removes the element via JS.
|
207
|
-
"""
|
208
|
-
try:
|
209
|
-
modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
|
210
|
-
for selector in modal_selectors:
|
211
|
-
elements = driver.find_elements(By.CSS_SELECTOR, selector)
|
212
|
-
for modal in elements:
|
213
|
-
if modal.is_displayed():
|
214
|
-
close_selectors = [".close", ".btn-close", "[aria-label='Close']", "[data-dismiss='modal']"]
|
215
|
-
dismissed = False
|
216
|
-
for close_sel in close_selectors:
|
217
|
-
try:
|
218
|
-
close_button = modal.find_element(By.CSS_SELECTOR, close_sel)
|
219
|
-
if close_button.is_displayed():
|
220
|
-
close_button.click()
|
221
|
-
dismissed = True
|
222
|
-
logger.info(f"Dismissed modal using selector {close_sel}")
|
223
|
-
time.sleep(1)
|
224
|
-
break
|
225
|
-
except Exception:
|
226
|
-
continue
|
227
|
-
if not dismissed:
|
228
|
-
# Remove overlay by setting display to none
|
229
|
-
driver.execute_script("arguments[0].remove();", modal)
|
230
|
-
logger.info(f"Removed overlay/modal with selector {selector}")
|
231
|
-
except Exception as e:
|
232
|
-
logger.debug(f"Modal dismissal error: {e}")
|
233
|
-
|
234
|
-
def _advanced_find_element(self, driver: webdriver.Chrome, keyword: str) -> Optional[WebElement]:
|
235
|
-
"""
|
236
|
-
Advanced fallback for finding an element.
|
237
|
-
Searches across multiple attributes and inner text using fuzzy matching.
|
238
|
-
"""
|
239
|
-
candidates = driver.find_elements(By.CSS_SELECTOR, "input, textarea, button, a, div")
|
240
|
-
best_match = None
|
241
|
-
best_ratio = 0.0
|
242
|
-
for candidate in candidates:
|
243
|
-
combined_text = " ".join([
|
244
|
-
candidate.get_attribute("id") or "",
|
245
|
-
candidate.get_attribute("name") or "",
|
246
|
-
candidate.get_attribute("placeholder") or "",
|
247
|
-
candidate.get_attribute("aria-label") or "",
|
248
|
-
candidate.text or "",
|
249
|
-
])
|
250
|
-
ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
|
251
|
-
if ratio > best_ratio:
|
252
|
-
best_ratio = ratio
|
253
|
-
best_match = candidate
|
254
|
-
if best_ratio > 0.5:
|
255
|
-
logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
|
256
|
-
return best_match
|
257
|
-
return None
|
258
|
-
|
259
|
-
def _handle_navigation(self, driver: webdriver.Chrome, url: str) -> Dict[str, Any]:
|
260
|
-
"""Handle navigation with URL correction."""
|
261
|
-
if not url.startswith(("http://", "https://")):
|
262
|
-
url = f"https://{url}"
|
263
|
-
try:
|
264
|
-
driver.get(url)
|
265
|
-
WebDriverWait(driver, self.default_timeout).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
266
|
-
return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
|
267
|
-
except Exception as e:
|
268
|
-
logger.error(f"Navigation to {url} failed: {e}")
|
269
|
-
return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
|
270
|
-
|
271
|
-
def _handle_click(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
|
272
|
-
"""Handle click actions with fallback using JS if needed."""
|
273
|
-
try:
|
274
|
-
element = WebDriverWait(driver, self.default_timeout).until(
|
275
|
-
EC.element_to_be_clickable((By.CSS_SELECTOR, selector))
|
276
|
-
)
|
277
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
278
|
-
try:
|
279
|
-
element.click()
|
280
|
-
except Exception:
|
281
|
-
driver.execute_script("arguments[0].click();", element)
|
282
|
-
return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
|
283
|
-
except Exception as e:
|
284
|
-
logger.error(f"Click action failed on selector {selector}: {e}")
|
285
|
-
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
286
|
-
|
287
|
-
def _handle_typing(self, driver: webdriver.Chrome, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
|
288
|
-
"""
|
289
|
-
Handle typing into an element.
|
290
|
-
If the primary selector fails, attempt advanced fallback detection.
|
291
|
-
"""
|
292
|
-
try:
|
293
|
-
element = WebDriverWait(driver, self.default_timeout).until(
|
294
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
295
|
-
)
|
296
|
-
except Exception as e:
|
297
|
-
# If the task seems to involve search or similar text, use advanced fallback.
|
298
|
-
if "search" in task.get("description", "").lower() or "search" in selector.lower():
|
299
|
-
logger.info("Primary selector failed; using advanced fallback for element detection.")
|
300
|
-
element = self._advanced_find_element(driver, "search")
|
301
|
-
if not element:
|
302
|
-
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
303
|
-
else:
|
304
|
-
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
305
|
-
try:
|
306
|
-
element.clear()
|
307
|
-
element.send_keys(text)
|
308
|
-
return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
|
309
|
-
except Exception as e:
|
310
|
-
logger.error(f"Typing action failed: {e}")
|
311
|
-
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
312
|
-
|
313
|
-
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
314
|
-
"""Handle a simple wait."""
|
315
|
-
try:
|
316
|
-
wait_time = float(seconds)
|
317
|
-
logger.info(f"Waiting for {wait_time} seconds")
|
318
|
-
time.sleep(wait_time)
|
319
|
-
return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
|
320
|
-
except ValueError as e:
|
321
|
-
logger.error(f"Invalid wait time provided: {seconds}")
|
322
|
-
return {"action": "wait", "success": False, "message": "Invalid wait time"}
|
323
|
-
|
324
|
-
def _handle_wait_for_ajax(self, driver: webdriver.Chrome, seconds: str) -> Dict[str, Any]:
|
325
|
-
"""
|
326
|
-
Wait until AJAX/network activity has subsided.
|
327
|
-
This implementation first checks for jQuery, then falls back to a generic check.
|
328
|
-
"""
|
329
|
-
try:
|
330
|
-
timeout = int(seconds)
|
331
|
-
logger.info(f"Waiting for AJAX/network activity for up to {timeout} seconds.")
|
332
|
-
end_time = time.time() + timeout
|
333
|
-
while time.time() < end_time:
|
334
|
-
ajax_complete = driver.execute_script("""
|
335
|
-
return (window.jQuery ? jQuery.active === 0 : true) &&
|
336
|
-
(typeof window.fetch === 'function' ? true : true);
|
337
|
-
""")
|
338
|
-
if ajax_complete:
|
339
|
-
break
|
340
|
-
time.sleep(0.5)
|
341
|
-
return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
|
342
|
-
except Exception as e:
|
343
|
-
logger.error(f"Wait for AJAX failed: {e}")
|
344
|
-
return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
|
345
|
-
|
346
|
-
def _handle_scroll(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
|
347
|
-
"""Handle scrolling to a specific element or page bottom."""
|
348
|
-
try:
|
349
|
-
if selector:
|
350
|
-
element = WebDriverWait(driver, self.default_timeout).until(
|
351
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
352
|
-
)
|
353
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
354
|
-
scroll_target = selector
|
355
|
-
else:
|
356
|
-
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
357
|
-
scroll_target = "page bottom"
|
358
|
-
return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
|
359
|
-
except Exception as e:
|
360
|
-
logger.error(f"Scroll action failed on selector {selector}: {e}")
|
361
|
-
return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
|
362
|
-
|
363
|
-
def _handle_hover(self, driver: webdriver.Chrome, selector: str) -> Dict[str, Any]:
|
364
|
-
"""Handle mouse hover action."""
|
365
|
-
try:
|
366
|
-
element = WebDriverWait(driver, self.default_timeout).until(
|
367
|
-
EC.visibility_of_element_located((By.CSS_SELECTOR, selector))
|
368
|
-
)
|
369
|
-
ActionChains(driver).move_to_element(element).perform()
|
370
|
-
return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
|
371
|
-
except Exception as e:
|
372
|
-
logger.error(f"Hover action failed on selector {selector}: {e}")
|
373
|
-
return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
|
374
|
-
|
375
|
-
def _handle_screenshot(self, driver: webdriver.Chrome, filename: str) -> Dict[str, Any]:
|
376
|
-
"""Capture a screenshot of the current browser state."""
|
377
|
-
try:
|
378
|
-
driver.save_screenshot(filename)
|
379
|
-
return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
|
380
|
-
except Exception as e:
|
381
|
-
logger.error(f"Screenshot capture failed: {e}")
|
382
|
-
return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
|
383
|
-
|
384
|
-
def _handle_switch_tab(self, driver: webdriver.Chrome, value: str) -> Dict[str, Any]:
|
385
|
-
"""
|
386
|
-
Switch between tabs. 'value' can be an index or the keyword 'new'.
|
387
|
-
"""
|
388
|
-
try:
|
389
|
-
handles = driver.window_handles
|
390
|
-
if value.lower() == "new":
|
391
|
-
target_handle = handles[-1]
|
392
|
-
else:
|
393
|
-
idx = int(value)
|
394
|
-
if idx < len(handles):
|
395
|
-
target_handle = handles[idx]
|
396
|
-
else:
|
397
|
-
return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
|
398
|
-
driver.switch_to.window(target_handle)
|
399
|
-
return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
|
400
|
-
except Exception as e:
|
401
|
-
logger.error(f"Switch tab failed: {e}")
|
402
|
-
return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
|
403
|
-
|
404
|
-
def _handle_execute_script(self, driver: webdriver.Chrome, script: str) -> Dict[str, Any]:
|
405
|
-
"""
|
406
|
-
Execute arbitrary JavaScript code.
|
407
|
-
"""
|
408
|
-
try:
|
409
|
-
result = driver.execute_script(script)
|
410
|
-
return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
|
411
|
-
except Exception as e:
|
412
|
-
logger.error(f"Execute script failed: {e}")
|
413
|
-
return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
|
414
|
-
|
415
|
-
def _handle_drag_and_drop(self, driver: webdriver.Chrome, source_selector: str, target_selector: str) -> Dict[str, Any]:
|
416
|
-
"""
|
417
|
-
Simulate a drag-and-drop operation.
|
418
|
-
"""
|
419
|
-
try:
|
420
|
-
source = WebDriverWait(driver, self.default_timeout).until(
|
421
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, source_selector))
|
422
|
-
)
|
423
|
-
target = WebDriverWait(driver, self.default_timeout).until(
|
424
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, target_selector))
|
425
|
-
)
|
426
|
-
ActionChains(driver).drag_and_drop(source, target).perform()
|
427
|
-
return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
|
428
|
-
except Exception as e:
|
429
|
-
logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
430
|
-
return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
|
431
|
-
|
432
|
-
def _capture_failure_screenshot(self, driver: webdriver.Chrome, action: str):
|
433
|
-
"""Capture a screenshot for debugging when an error occurs."""
|
434
|
-
filename = f"failure_{action}_{int(time.time())}.png"
|
435
|
-
try:
|
436
|
-
driver.save_screenshot(filename)
|
437
|
-
logger.info(f"Failure screenshot captured: {filename}")
|
438
|
-
except Exception as e:
|
439
|
-
logger.error(f"Failed to capture screenshot: {e}")
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|