semantio 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- semantio/tools/web_browser.py +344 -238
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/METADATA +1 -2
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/RECORD +7 -7
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/LICENSE +0 -0
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/WHEEL +0 -0
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/entry_points.txt +0 -0
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/top_level.txt +0 -0
semantio/tools/web_browser.py
CHANGED
@@ -1,24 +1,11 @@
|
|
1
1
|
# web_browser.py
|
2
2
|
from typing import Dict, Any, List, Optional, Callable
|
3
3
|
from pydantic import Field, BaseModel
|
4
|
-
from
|
5
|
-
|
6
|
-
from selenium.webdriver.common.action_chains import ActionChains
|
7
|
-
from selenium.webdriver.remote.webelement import WebElement
|
8
|
-
from selenium.webdriver.support.ui import WebDriverWait
|
9
|
-
from selenium.webdriver.support import expected_conditions as EC
|
10
|
-
from selenium.webdriver.chrome.options import Options
|
11
|
-
from selenium.webdriver.chrome.service import Service
|
12
|
-
from webdriver_manager.chrome import ChromeDriverManager
|
13
|
-
from bs4 import BeautifulSoup
|
14
|
-
import json
|
15
|
-
import time
|
16
|
-
import re
|
17
|
-
import logging
|
18
|
-
import os
|
19
|
-
import difflib
|
4
|
+
from playwright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError
|
5
|
+
import json, time, re, logging, os, difflib
|
20
6
|
from .base_tool import BaseTool
|
21
7
|
|
8
|
+
# Global logger
|
22
9
|
logger = logging.getLogger(__name__)
|
23
10
|
|
24
11
|
class BrowserPlan(BaseModel):
|
@@ -30,50 +17,63 @@ class BrowserPlan(BaseModel):
|
|
30
17
|
class WebBrowserTool(BaseTool):
|
31
18
|
name: str = Field("WebBrowser", description="Name of the tool")
|
32
19
|
description: str = Field(
|
33
|
-
"
|
20
|
+
"Universal web automation tool with advanced element identification (DOM and image fallback), modal analysis, AJAX waiting, multi-tab support, and custom JS injection.",
|
34
21
|
description="Tool description"
|
35
22
|
)
|
36
|
-
|
37
|
-
|
38
|
-
|
23
|
+
default_timeout: int = 15000 # 15 seconds in milliseconds
|
24
|
+
max_retries: int = 3
|
25
|
+
|
26
|
+
def __init__(self, *args, **kwargs):
|
27
|
+
super().__init__(*args, **kwargs)
|
28
|
+
# Bypass Pydantic's restrictions for extra attributes.
|
29
|
+
object.__setattr__(self, "logger", logging.getLogger(__name__))
|
39
30
|
|
40
31
|
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
41
|
-
"""
|
42
|
-
|
32
|
+
"""
|
33
|
+
Execute the browser automation workflow.
|
34
|
+
Maintains a context string of executed tasks and passes it to fallback routines.
|
35
|
+
DOES NOT close the browser after successful execution.
|
36
|
+
"""
|
43
37
|
overall_start = time.time()
|
38
|
+
results = [] # to hold summaries of executed tasks (for context)
|
39
|
+
current_url = ""
|
44
40
|
try:
|
45
41
|
headless = input.get("headless", False)
|
46
|
-
self.default_timeout = int(input.get("timeout",
|
42
|
+
self.default_timeout = int(input.get("timeout", 15)) * 1000
|
47
43
|
self.max_retries = int(input.get("max_retries", self.max_retries))
|
48
|
-
|
49
|
-
results = []
|
50
|
-
current_url = ""
|
51
|
-
|
52
|
-
plan = self._generate_plan(input.get('query', ''), current_url)
|
44
|
+
plan = self._generate_plan(input.get("query", ""), current_url)
|
53
45
|
if not plan.tasks:
|
54
46
|
raise ValueError("No valid tasks in the generated plan.")
|
55
47
|
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
"
|
66
|
-
"
|
67
|
-
"
|
68
|
-
"
|
48
|
+
# Start Playwright without a "with" block so we can leave the browser open.
|
49
|
+
p = sync_playwright().start()
|
50
|
+
browser = p.chromium.launch(headless=headless)
|
51
|
+
context = browser.new_context()
|
52
|
+
page = context.new_page()
|
53
|
+
|
54
|
+
# Map actions to handlers.
|
55
|
+
action_map: Dict[str, Callable[[Page, Dict[str, Any]], Dict[str, Any]]] = {
|
56
|
+
"navigate": lambda p, task: self._handle_navigation(p, task.get("value", "")),
|
57
|
+
"click": lambda p, task: self._handle_click(p, task.get("selector", "")),
|
58
|
+
"type": lambda p, task: self._handle_typing(p, task.get("selector", ""), task.get("value", ""), task),
|
59
|
+
"wait": lambda p, task: self._handle_wait(task.get("value", "")),
|
60
|
+
"wait_for_ajax": lambda p, task: self._handle_wait_for_ajax(p, task.get("value", "")),
|
61
|
+
"scroll": lambda p, task: self._handle_scroll(p, task.get("selector", "")),
|
62
|
+
"hover": lambda p, task: self._handle_hover(p, task.get("selector", "")),
|
63
|
+
"screenshot": lambda p, task: self._handle_screenshot(p, task.get("value", "screenshot.png")),
|
64
|
+
"switch_tab": lambda p, task: self._handle_switch_tab(context, task.get("value", "0")),
|
65
|
+
"execute_script": lambda p, task: self._handle_execute_script(p, task.get("value", "")),
|
66
|
+
"drag_and_drop": lambda p, task: self._handle_drag_and_drop(p, task.get("selector", ""), task.get("value", "")),
|
69
67
|
}
|
70
68
|
|
71
69
|
for task in plan.tasks:
|
72
|
-
|
73
|
-
self._dismiss_unwanted_modals(driver)
|
70
|
+
self._dismiss_unwanted_modals(page, task_context=task.get("description", ""))
|
74
71
|
action = task.get("action", "").lower()
|
75
|
-
logger.info(f"Executing task: {task.get('description', action)}")
|
72
|
+
self.logger.info(f"Executing task: {task.get('description', action)}")
|
76
73
|
start_time = time.time()
|
74
|
+
|
75
|
+
# Build a context string from previously executed tasks.
|
76
|
+
executed_context = "\n".join([f"{r['action']}: {r['message']}" for r in results])
|
77
77
|
handler = action_map.get(action)
|
78
78
|
if not handler:
|
79
79
|
results.append({
|
@@ -83,45 +83,28 @@ class WebBrowserTool(BaseTool):
|
|
83
83
|
})
|
84
84
|
continue
|
85
85
|
|
86
|
-
result = self._execute_with_retries(
|
86
|
+
result = self._execute_with_retries(page, task, handler, executed_context)
|
87
87
|
elapsed = time.time() - start_time
|
88
88
|
result["elapsed"] = elapsed
|
89
|
-
logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
89
|
+
self.logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
90
90
|
results.append(result)
|
91
91
|
|
92
|
-
if not result.get(
|
93
|
-
logger.error(f"Task failed: {result.get('message')}")
|
94
|
-
self._capture_failure_screenshot(
|
92
|
+
if not result.get("success", False):
|
93
|
+
self.logger.error(f"Task failed: {result.get('message')}")
|
94
|
+
self._capture_failure_screenshot(page, action)
|
95
95
|
break
|
96
96
|
|
97
|
-
current_url =
|
97
|
+
current_url = page.url
|
98
98
|
|
99
99
|
overall_elapsed = time.time() - overall_start
|
100
|
-
logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
100
|
+
self.logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
101
|
+
# Do not close the browser.
|
101
102
|
return {"status": "success", "results": results, "total_time": overall_elapsed}
|
102
|
-
|
103
103
|
except Exception as e:
|
104
|
-
logger.exception("Execution error:")
|
104
|
+
self.logger.exception("Execution error:")
|
105
105
|
return {"status": "error", "message": str(e)}
|
106
|
-
finally:
|
107
|
-
if driver:
|
108
|
-
driver.quit()
|
109
|
-
|
110
|
-
def _init_browser(self, headless: bool) -> webdriver.Chrome:
|
111
|
-
"""Initialize browser with advanced options."""
|
112
|
-
options = Options()
|
113
|
-
options.add_argument("--start-maximized")
|
114
|
-
options.add_argument("--disable-blink-features=AutomationControlled")
|
115
|
-
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
116
|
-
if headless:
|
117
|
-
options.add_argument("--headless=new")
|
118
|
-
return webdriver.Chrome(
|
119
|
-
service=Service(ChromeDriverManager().install()),
|
120
|
-
options=options
|
121
|
-
)
|
122
106
|
|
123
107
|
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
124
|
-
"""Generate an adaptive execution plan using an LLM or other dynamic planner."""
|
125
108
|
prompt = f"""Generate browser automation plan for: {query}
|
126
109
|
|
127
110
|
Current URL: {current_url or 'No page loaded yet'}
|
@@ -150,7 +133,6 @@ Guidelines:
|
|
150
133
|
return self._parse_plan(response)
|
151
134
|
|
152
135
|
def _parse_plan(self, response: str) -> BrowserPlan:
|
153
|
-
"""Robust JSON parsing with multiple fallback strategies."""
|
154
136
|
try:
|
155
137
|
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
156
138
|
if json_match:
|
@@ -163,7 +145,7 @@ Guidelines:
|
|
163
145
|
validated_tasks = []
|
164
146
|
for task in plan_data.get("tasks", []):
|
165
147
|
if not all(key in task for key in ["action", "description"]):
|
166
|
-
logger.warning(f"Skipping task due to missing keys: {task}")
|
148
|
+
self.logger.warning(f"Skipping task due to missing keys: {task}")
|
167
149
|
continue
|
168
150
|
validated_tasks.append({
|
169
151
|
"action": task["action"],
|
@@ -173,267 +155,391 @@ Guidelines:
|
|
173
155
|
})
|
174
156
|
return BrowserPlan(tasks=validated_tasks)
|
175
157
|
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
176
|
-
logger.error(f"Plan parsing failed: {e}")
|
158
|
+
self.logger.error(f"Plan parsing failed: {e}")
|
177
159
|
return BrowserPlan(tasks=[])
|
178
160
|
|
179
|
-
def _execute_with_retries(self,
|
180
|
-
handler: Callable[[
|
181
|
-
|
161
|
+
def _execute_with_retries(self, page: Page, task: Dict[str, Any],
|
162
|
+
handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]],
|
163
|
+
executed_context: str = "") -> Dict[str, Any]:
|
164
|
+
"""Execute a task with retry logic. If it fails, pass the executed_context to the fallback prompt.
|
165
|
+
The fallback now returns a JSON array of tasks, which are executed sequentially."""
|
182
166
|
attempts = 0
|
183
167
|
result = {}
|
184
168
|
while attempts < self.max_retries:
|
185
|
-
result = self._execute_safe_task(
|
169
|
+
result = self._execute_safe_task(page, task, handler)
|
186
170
|
if result.get("success", False):
|
187
171
|
return result
|
188
172
|
attempts += 1
|
189
|
-
logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
173
|
+
self.logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
190
174
|
time.sleep(1 * attempts)
|
175
|
+
if task.get("action") in ["click", "type"]:
|
176
|
+
self.logger.info("HTML-based automation failed. Using fallback with image-based LLM.")
|
177
|
+
result = self._fallback_with_image_llm(page, task, executed_context)
|
191
178
|
return result
|
192
179
|
|
193
|
-
def _execute_safe_task(self,
|
194
|
-
handler: Callable[[
|
195
|
-
"""Execute a task with comprehensive error handling."""
|
180
|
+
def _execute_safe_task(self, page: Page, task: Dict[str, Any],
|
181
|
+
handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
196
182
|
try:
|
197
|
-
return handler(
|
183
|
+
return handler(page, task)
|
198
184
|
except Exception as e:
|
199
185
|
action = task.get("action", "unknown")
|
200
|
-
logger.exception(f"Error executing task '{action}':")
|
186
|
+
self.logger.exception(f"Error executing task '{action}':")
|
201
187
|
return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
|
202
188
|
|
203
|
-
def _dismiss_unwanted_modals(self,
|
204
|
-
"""
|
205
|
-
|
206
|
-
|
207
|
-
|
189
|
+
def _dismiss_unwanted_modals(self, page: Page, task_context: str = ""):
|
190
|
+
modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
|
191
|
+
for selector in modal_selectors:
|
192
|
+
elements = page.query_selector_all(selector)
|
193
|
+
for modal in elements:
|
194
|
+
if modal.is_visible():
|
195
|
+
self._handle_modal(page, modal, task_context)
|
196
|
+
|
197
|
+
def _handle_modal(self, page: Page, modal_element, task_context: str):
|
208
198
|
try:
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
199
|
+
modal_screenshot = modal_element.screenshot()
|
200
|
+
prompt = (
|
201
|
+
f"A modal is displayed on the page. The content is visible in the attached image. "
|
202
|
+
f"The current task context is: \"{task_context}\". "
|
203
|
+
"Based on the content of the modal and the task context, decide whether to dismiss the modal. "
|
204
|
+
"Return a JSON response in the format: { \"action\": \"dismiss\" } to dismiss or { \"action\": \"ignore\" } to leave it. "
|
205
|
+
"Return only the JSON."
|
206
|
+
)
|
207
|
+
response_text = self.llm.generate_from_image(prompt, image_bytes=modal_screenshot)
|
208
|
+
self.logger.info(f"LLM response for modal analysis: {response_text}")
|
209
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
|
210
|
+
json_text = json_match.group(1).strip() if json_match else response_text.strip()
|
211
|
+
decision = json.loads(json_text)
|
212
|
+
if decision.get("action") == "dismiss":
|
213
|
+
close_buttons = modal_element.query_selector_all(".close, .btn-close, [aria-label='Close'], [data-dismiss='modal']")
|
214
|
+
for btn in close_buttons:
|
215
|
+
if btn.is_visible():
|
216
|
+
btn.click()
|
217
|
+
self.logger.info("Modal dismissed using a close button.")
|
218
|
+
return
|
219
|
+
page.evaluate("(modal) => modal.remove()", modal_element)
|
220
|
+
self.logger.info("Modal dismissed by removal.")
|
221
|
+
else:
|
222
|
+
self.logger.info("Modal left intact according to LLM analysis.")
|
231
223
|
except Exception as e:
|
232
|
-
logger.
|
224
|
+
self.logger.error(f"Modal handling error: {e}")
|
233
225
|
|
234
|
-
def _advanced_find_element(self,
|
226
|
+
def _advanced_find_element(self, page: Page, keyword: str):
|
227
|
+
try:
|
228
|
+
candidates = page.query_selector_all("input, textarea, button, a, div")
|
229
|
+
best_match = None
|
230
|
+
best_ratio = 0.0
|
231
|
+
for candidate in candidates:
|
232
|
+
attrs = page.evaluate(
|
233
|
+
"""(el) => {
|
234
|
+
return {
|
235
|
+
id: el.id,
|
236
|
+
name: el.getAttribute('name'),
|
237
|
+
placeholder: el.getAttribute('placeholder'),
|
238
|
+
aria: el.getAttribute('aria-label'),
|
239
|
+
text: el.innerText
|
240
|
+
};
|
241
|
+
}""",
|
242
|
+
candidate,
|
243
|
+
)
|
244
|
+
combined_text = " ".join(
|
245
|
+
filter(None, [
|
246
|
+
attrs.get("id"),
|
247
|
+
attrs.get("name"),
|
248
|
+
attrs.get("placeholder"),
|
249
|
+
attrs.get("aria"),
|
250
|
+
attrs.get("text"),
|
251
|
+
])
|
252
|
+
)
|
253
|
+
ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
|
254
|
+
if ratio > best_ratio:
|
255
|
+
best_ratio = ratio
|
256
|
+
best_match = candidate
|
257
|
+
if best_ratio > 0.5:
|
258
|
+
self.logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
|
259
|
+
return best_match
|
260
|
+
return None
|
261
|
+
except Exception as e:
|
262
|
+
self.logger.error(f"Advanced find element error: {e}")
|
263
|
+
return None
|
264
|
+
|
265
|
+
def _annotate_page_with_numbers(self, page: Page, query: str = "button, a, input, [onclick]"):
|
266
|
+
script = f"""
|
267
|
+
(() => {{
|
268
|
+
document.querySelectorAll('.automation-annotation-overlay').forEach(el => el.remove());
|
269
|
+
const elements = document.querySelectorAll('{query}');
|
270
|
+
let counter = 1;
|
271
|
+
elements.forEach(el => {{
|
272
|
+
const rect = el.getBoundingClientRect();
|
273
|
+
if (rect.width === 0 || rect.height === 0) return;
|
274
|
+
const overlay = document.createElement('div');
|
275
|
+
overlay.classList.add('automation-annotation-overlay');
|
276
|
+
overlay.style.position = 'absolute';
|
277
|
+
overlay.style.left = (rect.left + window.scrollX) + 'px';
|
278
|
+
overlay.style.top = (rect.top + window.scrollY) + 'px';
|
279
|
+
overlay.style.width = rect.width + 'px';
|
280
|
+
overlay.style.height = rect.height + 'px';
|
281
|
+
overlay.style.border = '2px solid red';
|
282
|
+
overlay.style.zIndex = 9999;
|
283
|
+
overlay.style.pointerEvents = 'none';
|
284
|
+
overlay.textContent = counter;
|
285
|
+
overlay.style.fontSize = '16px';
|
286
|
+
overlay.style.fontWeight = 'bold';
|
287
|
+
overlay.style.color = 'red';
|
288
|
+
overlay.style.backgroundColor = 'rgba(255, 255, 255, 0.7)';
|
289
|
+
document.body.appendChild(overlay);
|
290
|
+
counter += 1;
|
291
|
+
}});
|
292
|
+
}})();
|
235
293
|
"""
|
236
|
-
|
237
|
-
|
294
|
+
page.evaluate(script)
|
295
|
+
|
296
|
+
def _click_element_by_number(self, page: Page, number: int) -> Dict[str, Any]:
|
297
|
+
candidates = [el for el in page.query_selector_all("button, a, input, [onclick]") if el.is_visible()]
|
298
|
+
index = number - 1
|
299
|
+
if index < len(candidates):
|
300
|
+
candidate = candidates[index]
|
301
|
+
candidate.scroll_into_view_if_needed()
|
302
|
+
try:
|
303
|
+
candidate.click()
|
304
|
+
return {"action": "click", "success": True, "message": f"Clicked element number {number}"}
|
305
|
+
except Exception as e:
|
306
|
+
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
307
|
+
else:
|
308
|
+
return {"action": "click", "success": False, "message": f"Element number {number} not found."}
|
309
|
+
|
310
|
+
def _fallback_with_image_llm(self, page: Page, task: Dict[str, Any], executed_context: str = "") -> Dict[str, Any]:
|
311
|
+
"""
|
312
|
+
Fallback method: Annotate the page, capture a screenshot, and ask the LLM (via image analysis)
|
313
|
+
to generate a JSON array of tasks for the next steps.
|
314
|
+
Each fallback task is an object:
|
315
|
+
{
|
316
|
+
"action": "click" or "type",
|
317
|
+
"element_number": <number>,
|
318
|
+
"text": <if action is 'type', the text to type; otherwise an empty string>
|
319
|
+
}
|
320
|
+
The prompt includes the executed_context.
|
238
321
|
"""
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
322
|
+
query = "input, textarea" if task.get("action") == "type" else "button, a, input, [onclick]"
|
323
|
+
self._annotate_page_with_numbers(page, query=query)
|
324
|
+
time.sleep(1)
|
325
|
+
screenshot_bytes = page.screenshot(type="png")
|
326
|
+
extra = ""
|
327
|
+
if task.get("action") == "type":
|
328
|
+
extra = f"\nThe exact text to be entered is: \"{task.get('value', '').strip()}\"."
|
329
|
+
prompt = (
|
330
|
+
f"Tasks executed so far:\n{executed_context}\n\n"
|
331
|
+
f"The following task remains: {task.get('description', '')}.{extra}\n"
|
332
|
+
"I have annotated the page with numbered overlays using the appropriate query. "
|
333
|
+
"Based on the attached screenshot, generate a JSON array of tasks that need to be performed next. "
|
334
|
+
"Each task should be a JSON object with the format:\n"
|
335
|
+
"[\n"
|
336
|
+
" {\n"
|
337
|
+
" \"action\": \"click\" or \"type\",\n"
|
338
|
+
" \"element_number\": <number>,\n"
|
339
|
+
" \"text\": <if action is 'type', the text to type; otherwise an empty string>\n"
|
340
|
+
" },\n"
|
341
|
+
" ...\n"
|
342
|
+
"]\n"
|
343
|
+
"Return only the JSON array."
|
344
|
+
)
|
345
|
+
response_text = self.llm.generate_from_image(prompt, image_bytes=screenshot_bytes)
|
346
|
+
self.logger.info(f"LLM response for fallback: {response_text}")
|
347
|
+
try:
|
348
|
+
fallback_tasks = json.loads(response_text.strip())
|
349
|
+
if not isinstance(fallback_tasks, list):
|
350
|
+
fallback_tasks = [fallback_tasks]
|
351
|
+
except Exception as e:
|
352
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
|
353
|
+
if json_match:
|
354
|
+
json_text = json_match.group(1).strip()
|
355
|
+
fallback_tasks = json.loads(json_text)
|
356
|
+
if not isinstance(fallback_tasks, list):
|
357
|
+
fallback_tasks = [fallback_tasks]
|
358
|
+
else:
|
359
|
+
return {"action": task.get("action"), "success": False, "message": f"Fallback failed to parse JSON: {str(e)}"}
|
360
|
+
|
361
|
+
fallback_results = []
|
362
|
+
for fb_task in fallback_tasks:
|
363
|
+
action = fb_task.get("action")
|
364
|
+
element_number = fb_task.get("element_number")
|
365
|
+
if action == "type":
|
366
|
+
returned_text = fb_task.get("text", "").strip()
|
367
|
+
original_text = task.get("value", "").strip()
|
368
|
+
if returned_text.lower() != original_text.lower():
|
369
|
+
self.logger.info("Overriding LLM-provided text with original input text.")
|
370
|
+
text = original_text
|
371
|
+
else:
|
372
|
+
text = returned_text
|
373
|
+
else:
|
374
|
+
text = fb_task.get("text", "")
|
375
|
+
if action == "click":
|
376
|
+
self.logger.info(f"LLM indicated fallback click on element number {element_number}.")
|
377
|
+
res = self._click_element_by_number(page, element_number)
|
378
|
+
elif action == "type":
|
379
|
+
candidates = [el for el in page.query_selector_all("input, textarea") if el.is_visible()]
|
380
|
+
if element_number - 1 < len(candidates):
|
381
|
+
candidate = candidates[element_number - 1]
|
382
|
+
candidate.scroll_into_view_if_needed()
|
383
|
+
try:
|
384
|
+
candidate.fill(text, timeout=self.default_timeout)
|
385
|
+
res = {"action": "type", "success": True, "message": f"Typed '{text}' into element number {element_number}"}
|
386
|
+
except Exception as ex:
|
387
|
+
res = {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
|
388
|
+
else:
|
389
|
+
res = {"action": "type", "success": False, "message": f"Element number {element_number} not found."}
|
390
|
+
else:
|
391
|
+
res = {"action": task.get("action"), "success": False, "message": "Invalid fallback action."}
|
392
|
+
fallback_results.append(res)
|
393
|
+
overall_success = any(r.get("success", False) for r in fallback_results)
|
394
|
+
overall_message = "; ".join([r.get("message", "") for r in fallback_results])
|
395
|
+
return {"action": task.get("action"), "success": overall_success, "message": overall_message}
|
396
|
+
|
397
|
+
def _handle_navigation(self, page: Page, url: str) -> Dict[str, Any]:
|
261
398
|
if not url.startswith(("http://", "https://")):
|
262
399
|
url = f"https://{url}"
|
263
400
|
try:
|
264
|
-
|
265
|
-
|
401
|
+
page.goto(url, timeout=self.default_timeout)
|
402
|
+
page.wait_for_selector("body", timeout=self.default_timeout)
|
266
403
|
return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
|
404
|
+
except PlaywrightTimeoutError as e:
|
405
|
+
self.logger.error(f"Navigation to {url} timed out: {e}")
|
406
|
+
return {"action": "navigate", "success": False, "message": f"Navigation timed out: {str(e)}"}
|
267
407
|
except Exception as e:
|
268
|
-
logger.error(f"Navigation to {url} failed: {e}")
|
408
|
+
self.logger.error(f"Navigation to {url} failed: {e}")
|
269
409
|
return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
|
270
410
|
|
271
|
-
def _handle_click(self,
|
272
|
-
"""Handle click actions with fallback using JS if needed."""
|
411
|
+
def _handle_click(self, page: Page, selector: str) -> Dict[str, Any]:
|
273
412
|
try:
|
274
|
-
|
275
|
-
|
276
|
-
)
|
277
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
278
|
-
try:
|
279
|
-
element.click()
|
280
|
-
except Exception:
|
281
|
-
driver.execute_script("arguments[0].click();", element)
|
413
|
+
page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
|
414
|
+
page.click(selector, timeout=self.default_timeout)
|
282
415
|
return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
|
416
|
+
except PlaywrightTimeoutError as e:
|
417
|
+
self.logger.error(f"Click action timed out on selector {selector}: {e}")
|
418
|
+
return {"action": "click", "success": False, "message": f"Click timed out: {str(e)}"}
|
283
419
|
except Exception as e:
|
284
|
-
logger.error(f"Click action failed on selector {selector}: {e}")
|
420
|
+
self.logger.error(f"Click action failed on selector {selector}: {e}")
|
285
421
|
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
286
422
|
|
287
|
-
def _handle_typing(self,
|
288
|
-
"""
|
289
|
-
Handle typing into an element.
|
290
|
-
If the primary selector fails, attempt advanced fallback detection.
|
291
|
-
"""
|
292
|
-
try:
|
293
|
-
element = WebDriverWait(driver, self.default_timeout).until(
|
294
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
295
|
-
)
|
296
|
-
except Exception as e:
|
297
|
-
# If the task seems to involve search or similar text, use advanced fallback.
|
298
|
-
if "search" in task.get("description", "").lower() or "search" in selector.lower():
|
299
|
-
logger.info("Primary selector failed; using advanced fallback for element detection.")
|
300
|
-
element = self._advanced_find_element(driver, "search")
|
301
|
-
if not element:
|
302
|
-
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
303
|
-
else:
|
304
|
-
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
423
|
+
def _handle_typing(self, page: Page, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
|
305
424
|
try:
|
306
|
-
|
307
|
-
|
425
|
+
page.wait_for_selector(selector, state="attached", timeout=self.default_timeout)
|
426
|
+
page.fill(selector, text, timeout=self.default_timeout)
|
308
427
|
return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
|
428
|
+
except PlaywrightTimeoutError as e:
|
429
|
+
self.logger.info("Primary selector failed; using advanced fallback for element detection.")
|
430
|
+
element = self._advanced_find_element(page, "search")
|
431
|
+
if not element:
|
432
|
+
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
433
|
+
try:
|
434
|
+
element.fill(text, timeout=self.default_timeout)
|
435
|
+
return {"action": "type", "success": True, "message": f"Typed '{text}' into fallback element."}
|
436
|
+
except Exception as ex:
|
437
|
+
return {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
|
309
438
|
except Exception as e:
|
310
|
-
logger.error(f"Typing action failed: {e}")
|
439
|
+
self.logger.error(f"Typing action failed: {e}")
|
311
440
|
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
312
441
|
|
313
442
|
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
314
|
-
"""Handle a simple wait."""
|
315
443
|
try:
|
316
444
|
wait_time = float(seconds)
|
317
|
-
logger.info(f"Waiting for {wait_time} seconds")
|
445
|
+
self.logger.info(f"Waiting for {wait_time} seconds")
|
318
446
|
time.sleep(wait_time)
|
319
447
|
return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
|
320
448
|
except ValueError as e:
|
321
|
-
logger.error(f"Invalid wait time provided: {seconds}")
|
449
|
+
self.logger.error(f"Invalid wait time provided: {seconds}")
|
322
450
|
return {"action": "wait", "success": False, "message": "Invalid wait time"}
|
323
451
|
|
324
|
-
def _handle_wait_for_ajax(self,
|
325
|
-
"""
|
326
|
-
Wait until AJAX/network activity has subsided.
|
327
|
-
This implementation first checks for jQuery, then falls back to a generic check.
|
328
|
-
"""
|
452
|
+
def _handle_wait_for_ajax(self, page: Page, seconds: str) -> Dict[str, Any]:
|
329
453
|
try:
|
330
|
-
|
331
|
-
logger.info(f"Waiting for AJAX/network activity for up to {
|
332
|
-
end_time = time.time() +
|
454
|
+
timeout_seconds = int(seconds) if seconds.strip() != "" else 30
|
455
|
+
self.logger.info(f"Waiting for AJAX/network activity for up to {timeout_seconds} seconds.")
|
456
|
+
end_time = time.time() + timeout_seconds
|
333
457
|
while time.time() < end_time:
|
334
|
-
ajax_complete =
|
335
|
-
|
336
|
-
|
458
|
+
ajax_complete = page.evaluate("""
|
459
|
+
() => {
|
460
|
+
return (window.jQuery ? jQuery.active === 0 : true) &&
|
461
|
+
(typeof window.fetch === 'function' ? true : true);
|
462
|
+
}
|
337
463
|
""")
|
338
464
|
if ajax_complete:
|
339
465
|
break
|
340
466
|
time.sleep(0.5)
|
341
467
|
return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
|
342
468
|
except Exception as e:
|
343
|
-
logger.error(f"Wait for AJAX failed: {e}")
|
469
|
+
self.logger.error(f"Wait for AJAX failed: {e}")
|
344
470
|
return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
|
345
471
|
|
346
|
-
def _handle_scroll(self,
|
347
|
-
"""Handle scrolling to a specific element or page bottom."""
|
472
|
+
def _handle_scroll(self, page: Page, selector: str) -> Dict[str, Any]:
|
348
473
|
try:
|
349
474
|
if selector:
|
350
|
-
|
351
|
-
|
352
|
-
)
|
353
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
475
|
+
page.wait_for_selector(selector, timeout=self.default_timeout)
|
476
|
+
page.eval_on_selector(selector, "el => el.scrollIntoView({behavior: 'smooth', block: 'center'})")
|
354
477
|
scroll_target = selector
|
355
478
|
else:
|
356
|
-
|
479
|
+
page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
357
480
|
scroll_target = "page bottom"
|
358
481
|
return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
|
359
482
|
except Exception as e:
|
360
|
-
logger.error(f"Scroll action failed on selector {selector}: {e}")
|
483
|
+
self.logger.error(f"Scroll action failed on selector {selector}: {e}")
|
361
484
|
return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
|
362
485
|
|
363
|
-
def _handle_hover(self,
|
364
|
-
"""Handle mouse hover action."""
|
486
|
+
def _handle_hover(self, page: Page, selector: str) -> Dict[str, Any]:
|
365
487
|
try:
|
366
|
-
|
367
|
-
|
368
|
-
)
|
369
|
-
ActionChains(driver).move_to_element(element).perform()
|
488
|
+
page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
|
489
|
+
page.hover(selector, timeout=self.default_timeout)
|
370
490
|
return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
|
371
491
|
except Exception as e:
|
372
|
-
logger.error(f"Hover action failed on selector {selector}: {e}")
|
492
|
+
self.logger.error(f"Hover action failed on selector {selector}: {e}")
|
373
493
|
return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
|
374
494
|
|
375
|
-
def _handle_screenshot(self,
|
376
|
-
"""Capture a screenshot of the current browser state."""
|
495
|
+
def _handle_screenshot(self, page: Page, filename: str) -> Dict[str, Any]:
|
377
496
|
try:
|
378
|
-
|
497
|
+
page.screenshot(path=filename)
|
379
498
|
return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
|
380
499
|
except Exception as e:
|
381
|
-
logger.error(f"Screenshot capture failed: {e}")
|
500
|
+
self.logger.error(f"Screenshot capture failed: {e}")
|
382
501
|
return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
|
383
502
|
|
384
|
-
def _handle_switch_tab(self,
|
385
|
-
"""
|
386
|
-
Switch between tabs. 'value' can be an index or the keyword 'new'.
|
387
|
-
"""
|
503
|
+
def _handle_switch_tab(self, context, value: str) -> Dict[str, Any]:
|
388
504
|
try:
|
389
|
-
|
505
|
+
pages = context.pages
|
390
506
|
if value.lower() == "new":
|
391
|
-
|
507
|
+
target_page = pages[-1]
|
392
508
|
else:
|
393
509
|
idx = int(value)
|
394
|
-
if idx < len(
|
395
|
-
|
510
|
+
if idx < len(pages):
|
511
|
+
target_page = pages[idx]
|
396
512
|
else:
|
397
513
|
return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
|
398
|
-
driver.switch_to.window(target_handle)
|
399
514
|
return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
|
400
515
|
except Exception as e:
|
401
|
-
logger.error(f"Switch tab failed: {e}")
|
516
|
+
self.logger.error(f"Switch tab failed: {e}")
|
402
517
|
return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
|
403
518
|
|
404
|
-
def _handle_execute_script(self,
|
405
|
-
"""
|
406
|
-
Execute arbitrary JavaScript code.
|
407
|
-
"""
|
519
|
+
def _handle_execute_script(self, page: Page, script: str) -> Dict[str, Any]:
|
408
520
|
try:
|
409
|
-
result =
|
521
|
+
result = page.evaluate(script)
|
410
522
|
return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
|
411
523
|
except Exception as e:
|
412
|
-
logger.error(f"Execute script failed: {e}")
|
524
|
+
self.logger.error(f"Execute script failed: {e}")
|
413
525
|
return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
|
414
526
|
|
415
|
-
def _handle_drag_and_drop(self,
|
416
|
-
"""
|
417
|
-
Simulate a drag-and-drop operation.
|
418
|
-
"""
|
527
|
+
def _handle_drag_and_drop(self, page: Page, source_selector: str, target_selector: str) -> Dict[str, Any]:
|
419
528
|
try:
|
420
|
-
|
421
|
-
|
422
|
-
)
|
423
|
-
target =
|
424
|
-
|
425
|
-
)
|
426
|
-
ActionChains(driver).drag_and_drop(source, target).perform()
|
529
|
+
page.wait_for_selector(source_selector, timeout=self.default_timeout)
|
530
|
+
page.wait_for_selector(target_selector, timeout=self.default_timeout)
|
531
|
+
source = page.locator(source_selector)
|
532
|
+
target = page.locator(target_selector)
|
533
|
+
source.drag_to(target, timeout=self.default_timeout)
|
427
534
|
return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
|
428
535
|
except Exception as e:
|
429
|
-
logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
536
|
+
self.logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
430
537
|
return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
|
431
538
|
|
432
|
-
def _capture_failure_screenshot(self,
|
433
|
-
"""Capture a screenshot for debugging when an error occurs."""
|
539
|
+
def _capture_failure_screenshot(self, page: Page, action: str):
|
434
540
|
filename = f"failure_{action}_{int(time.time())}.png"
|
435
541
|
try:
|
436
|
-
|
437
|
-
logger.info(f"Failure screenshot captured: {filename}")
|
542
|
+
page.screenshot(path=filename)
|
543
|
+
self.logger.info(f"Failure screenshot captured: {filename}")
|
438
544
|
except Exception as e:
|
439
|
-
logger.error(f"Failed to capture screenshot: {e}")
|
545
|
+
self.logger.error(f"Failed to capture screenshot: {e}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: semantio
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.7
|
4
4
|
Summary: A powerful SDK for building AI agents
|
5
5
|
Home-page: https://github.com/Syenah/semantio
|
6
6
|
Author: Rakesh
|
@@ -33,7 +33,6 @@ Requires-Dist: sentence-transformers
|
|
33
33
|
Requires-Dist: fuzzywuzzy
|
34
34
|
Requires-Dist: duckduckgo-search
|
35
35
|
Requires-Dist: yfinance
|
36
|
-
Requires-Dist: selenium
|
37
36
|
Requires-Dist: beautifulsoup4
|
38
37
|
Requires-Dist: webdriver-manager
|
39
38
|
Requires-Dist: validators
|
@@ -30,16 +30,16 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
|
|
30
30
|
semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
|
31
31
|
semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
|
32
32
|
semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
|
33
|
-
semantio/tools/web_browser.py,sha256=
|
33
|
+
semantio/tools/web_browser.py,sha256=8-_SXvu3CRnIwKBlcmNe7-9DOd4y7OC7T24RB0xKMnI,28911
|
34
34
|
semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
|
35
35
|
semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
|
36
36
|
semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
|
37
37
|
semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
|
38
38
|
semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
|
39
39
|
semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
|
40
|
-
semantio-0.0.
|
41
|
-
semantio-0.0.
|
42
|
-
semantio-0.0.
|
43
|
-
semantio-0.0.
|
44
|
-
semantio-0.0.
|
45
|
-
semantio-0.0.
|
40
|
+
semantio-0.0.7.dist-info/LICENSE,sha256=mziLlfb9hZ8HKxm9V6BiHpmgJvmcDvswu1QBlDB-6vU,1074
|
41
|
+
semantio-0.0.7.dist-info/METADATA,sha256=QQRzinLKReosRRthYf1bei5FDAaOPHaG4bG5gdJnMFc,6889
|
42
|
+
semantio-0.0.7.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
43
|
+
semantio-0.0.7.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
|
44
|
+
semantio-0.0.7.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
|
45
|
+
semantio-0.0.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|