semantio 0.0.6__py3-none-any.whl → 0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- semantio/tools/web_browser.py +344 -238
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/METADATA +1 -2
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/RECORD +7 -7
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/LICENSE +0 -0
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/WHEEL +0 -0
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/entry_points.txt +0 -0
- {semantio-0.0.6.dist-info → semantio-0.0.7.dist-info}/top_level.txt +0 -0
semantio/tools/web_browser.py
CHANGED
@@ -1,24 +1,11 @@
|
|
1
1
|
# web_browser.py
|
2
2
|
from typing import Dict, Any, List, Optional, Callable
|
3
3
|
from pydantic import Field, BaseModel
|
4
|
-
from
|
5
|
-
|
6
|
-
from selenium.webdriver.common.action_chains import ActionChains
|
7
|
-
from selenium.webdriver.remote.webelement import WebElement
|
8
|
-
from selenium.webdriver.support.ui import WebDriverWait
|
9
|
-
from selenium.webdriver.support import expected_conditions as EC
|
10
|
-
from selenium.webdriver.chrome.options import Options
|
11
|
-
from selenium.webdriver.chrome.service import Service
|
12
|
-
from webdriver_manager.chrome import ChromeDriverManager
|
13
|
-
from bs4 import BeautifulSoup
|
14
|
-
import json
|
15
|
-
import time
|
16
|
-
import re
|
17
|
-
import logging
|
18
|
-
import os
|
19
|
-
import difflib
|
4
|
+
from playwright.sync_api import sync_playwright, Page, TimeoutError as PlaywrightTimeoutError
|
5
|
+
import json, time, re, logging, os, difflib
|
20
6
|
from .base_tool import BaseTool
|
21
7
|
|
8
|
+
# Global logger
|
22
9
|
logger = logging.getLogger(__name__)
|
23
10
|
|
24
11
|
class BrowserPlan(BaseModel):
|
@@ -30,50 +17,63 @@ class BrowserPlan(BaseModel):
|
|
30
17
|
class WebBrowserTool(BaseTool):
|
31
18
|
name: str = Field("WebBrowser", description="Name of the tool")
|
32
19
|
description: str = Field(
|
33
|
-
"
|
20
|
+
"Universal web automation tool with advanced element identification (DOM and image fallback), modal analysis, AJAX waiting, multi-tab support, and custom JS injection.",
|
34
21
|
description="Tool description"
|
35
22
|
)
|
36
|
-
|
37
|
-
|
38
|
-
|
23
|
+
default_timeout: int = 15000 # 15 seconds in milliseconds
|
24
|
+
max_retries: int = 3
|
25
|
+
|
26
|
+
def __init__(self, *args, **kwargs):
|
27
|
+
super().__init__(*args, **kwargs)
|
28
|
+
# Bypass Pydantic's restrictions for extra attributes.
|
29
|
+
object.__setattr__(self, "logger", logging.getLogger(__name__))
|
39
30
|
|
40
31
|
def execute(self, input: Dict[str, Any]) -> Dict[str, Any]:
|
41
|
-
"""
|
42
|
-
|
32
|
+
"""
|
33
|
+
Execute the browser automation workflow.
|
34
|
+
Maintains a context string of executed tasks and passes it to fallback routines.
|
35
|
+
DOES NOT close the browser after successful execution.
|
36
|
+
"""
|
43
37
|
overall_start = time.time()
|
38
|
+
results = [] # to hold summaries of executed tasks (for context)
|
39
|
+
current_url = ""
|
44
40
|
try:
|
45
41
|
headless = input.get("headless", False)
|
46
|
-
self.default_timeout = int(input.get("timeout",
|
42
|
+
self.default_timeout = int(input.get("timeout", 15)) * 1000
|
47
43
|
self.max_retries = int(input.get("max_retries", self.max_retries))
|
48
|
-
|
49
|
-
results = []
|
50
|
-
current_url = ""
|
51
|
-
|
52
|
-
plan = self._generate_plan(input.get('query', ''), current_url)
|
44
|
+
plan = self._generate_plan(input.get("query", ""), current_url)
|
53
45
|
if not plan.tasks:
|
54
46
|
raise ValueError("No valid tasks in the generated plan.")
|
55
47
|
|
56
|
-
#
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
"
|
65
|
-
"
|
66
|
-
"
|
67
|
-
"
|
68
|
-
"
|
48
|
+
# Start Playwright without a "with" block so we can leave the browser open.
|
49
|
+
p = sync_playwright().start()
|
50
|
+
browser = p.chromium.launch(headless=headless)
|
51
|
+
context = browser.new_context()
|
52
|
+
page = context.new_page()
|
53
|
+
|
54
|
+
# Map actions to handlers.
|
55
|
+
action_map: Dict[str, Callable[[Page, Dict[str, Any]], Dict[str, Any]]] = {
|
56
|
+
"navigate": lambda p, task: self._handle_navigation(p, task.get("value", "")),
|
57
|
+
"click": lambda p, task: self._handle_click(p, task.get("selector", "")),
|
58
|
+
"type": lambda p, task: self._handle_typing(p, task.get("selector", ""), task.get("value", ""), task),
|
59
|
+
"wait": lambda p, task: self._handle_wait(task.get("value", "")),
|
60
|
+
"wait_for_ajax": lambda p, task: self._handle_wait_for_ajax(p, task.get("value", "")),
|
61
|
+
"scroll": lambda p, task: self._handle_scroll(p, task.get("selector", "")),
|
62
|
+
"hover": lambda p, task: self._handle_hover(p, task.get("selector", "")),
|
63
|
+
"screenshot": lambda p, task: self._handle_screenshot(p, task.get("value", "screenshot.png")),
|
64
|
+
"switch_tab": lambda p, task: self._handle_switch_tab(context, task.get("value", "0")),
|
65
|
+
"execute_script": lambda p, task: self._handle_execute_script(p, task.get("value", "")),
|
66
|
+
"drag_and_drop": lambda p, task: self._handle_drag_and_drop(p, task.get("selector", ""), task.get("value", "")),
|
69
67
|
}
|
70
68
|
|
71
69
|
for task in plan.tasks:
|
72
|
-
|
73
|
-
self._dismiss_unwanted_modals(driver)
|
70
|
+
self._dismiss_unwanted_modals(page, task_context=task.get("description", ""))
|
74
71
|
action = task.get("action", "").lower()
|
75
|
-
logger.info(f"Executing task: {task.get('description', action)}")
|
72
|
+
self.logger.info(f"Executing task: {task.get('description', action)}")
|
76
73
|
start_time = time.time()
|
74
|
+
|
75
|
+
# Build a context string from previously executed tasks.
|
76
|
+
executed_context = "\n".join([f"{r['action']}: {r['message']}" for r in results])
|
77
77
|
handler = action_map.get(action)
|
78
78
|
if not handler:
|
79
79
|
results.append({
|
@@ -83,45 +83,28 @@ class WebBrowserTool(BaseTool):
|
|
83
83
|
})
|
84
84
|
continue
|
85
85
|
|
86
|
-
result = self._execute_with_retries(
|
86
|
+
result = self._execute_with_retries(page, task, handler, executed_context)
|
87
87
|
elapsed = time.time() - start_time
|
88
88
|
result["elapsed"] = elapsed
|
89
|
-
logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
89
|
+
self.logger.info(f"Action '{action}' completed in {elapsed:.2f} seconds.")
|
90
90
|
results.append(result)
|
91
91
|
|
92
|
-
if not result.get(
|
93
|
-
logger.error(f"Task failed: {result.get('message')}")
|
94
|
-
self._capture_failure_screenshot(
|
92
|
+
if not result.get("success", False):
|
93
|
+
self.logger.error(f"Task failed: {result.get('message')}")
|
94
|
+
self._capture_failure_screenshot(page, action)
|
95
95
|
break
|
96
96
|
|
97
|
-
current_url =
|
97
|
+
current_url = page.url
|
98
98
|
|
99
99
|
overall_elapsed = time.time() - overall_start
|
100
|
-
logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
100
|
+
self.logger.info(f"Total execution time: {overall_elapsed:.2f} seconds.")
|
101
|
+
# Do not close the browser.
|
101
102
|
return {"status": "success", "results": results, "total_time": overall_elapsed}
|
102
|
-
|
103
103
|
except Exception as e:
|
104
|
-
logger.exception("Execution error:")
|
104
|
+
self.logger.exception("Execution error:")
|
105
105
|
return {"status": "error", "message": str(e)}
|
106
|
-
finally:
|
107
|
-
if driver:
|
108
|
-
driver.quit()
|
109
|
-
|
110
|
-
def _init_browser(self, headless: bool) -> webdriver.Chrome:
|
111
|
-
"""Initialize browser with advanced options."""
|
112
|
-
options = Options()
|
113
|
-
options.add_argument("--start-maximized")
|
114
|
-
options.add_argument("--disable-blink-features=AutomationControlled")
|
115
|
-
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
116
|
-
if headless:
|
117
|
-
options.add_argument("--headless=new")
|
118
|
-
return webdriver.Chrome(
|
119
|
-
service=Service(ChromeDriverManager().install()),
|
120
|
-
options=options
|
121
|
-
)
|
122
106
|
|
123
107
|
def _generate_plan(self, query: str, current_url: str) -> BrowserPlan:
|
124
|
-
"""Generate an adaptive execution plan using an LLM or other dynamic planner."""
|
125
108
|
prompt = f"""Generate browser automation plan for: {query}
|
126
109
|
|
127
110
|
Current URL: {current_url or 'No page loaded yet'}
|
@@ -150,7 +133,6 @@ Guidelines:
|
|
150
133
|
return self._parse_plan(response)
|
151
134
|
|
152
135
|
def _parse_plan(self, response: str) -> BrowserPlan:
|
153
|
-
"""Robust JSON parsing with multiple fallback strategies."""
|
154
136
|
try:
|
155
137
|
json_match = re.search(r'```json\n?(.+?)\n?```', response, re.DOTALL)
|
156
138
|
if json_match:
|
@@ -163,7 +145,7 @@ Guidelines:
|
|
163
145
|
validated_tasks = []
|
164
146
|
for task in plan_data.get("tasks", []):
|
165
147
|
if not all(key in task for key in ["action", "description"]):
|
166
|
-
logger.warning(f"Skipping task due to missing keys: {task}")
|
148
|
+
self.logger.warning(f"Skipping task due to missing keys: {task}")
|
167
149
|
continue
|
168
150
|
validated_tasks.append({
|
169
151
|
"action": task["action"],
|
@@ -173,267 +155,391 @@ Guidelines:
|
|
173
155
|
})
|
174
156
|
return BrowserPlan(tasks=validated_tasks)
|
175
157
|
except (json.JSONDecodeError, AttributeError, ValueError) as e:
|
176
|
-
logger.error(f"Plan parsing failed: {e}")
|
158
|
+
self.logger.error(f"Plan parsing failed: {e}")
|
177
159
|
return BrowserPlan(tasks=[])
|
178
160
|
|
179
|
-
def _execute_with_retries(self,
|
180
|
-
handler: Callable[[
|
181
|
-
|
161
|
+
def _execute_with_retries(self, page: Page, task: Dict[str, Any],
|
162
|
+
handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]],
|
163
|
+
executed_context: str = "") -> Dict[str, Any]:
|
164
|
+
"""Execute a task with retry logic. If it fails, pass the executed_context to the fallback prompt.
|
165
|
+
The fallback now returns a JSON array of tasks, which are executed sequentially."""
|
182
166
|
attempts = 0
|
183
167
|
result = {}
|
184
168
|
while attempts < self.max_retries:
|
185
|
-
result = self._execute_safe_task(
|
169
|
+
result = self._execute_safe_task(page, task, handler)
|
186
170
|
if result.get("success", False):
|
187
171
|
return result
|
188
172
|
attempts += 1
|
189
|
-
logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
173
|
+
self.logger.info(f"Retrying task '{task.get('action')}' (attempt {attempts + 1}/{self.max_retries})")
|
190
174
|
time.sleep(1 * attempts)
|
175
|
+
if task.get("action") in ["click", "type"]:
|
176
|
+
self.logger.info("HTML-based automation failed. Using fallback with image-based LLM.")
|
177
|
+
result = self._fallback_with_image_llm(page, task, executed_context)
|
191
178
|
return result
|
192
179
|
|
193
|
-
def _execute_safe_task(self,
|
194
|
-
handler: Callable[[
|
195
|
-
"""Execute a task with comprehensive error handling."""
|
180
|
+
def _execute_safe_task(self, page: Page, task: Dict[str, Any],
|
181
|
+
handler: Callable[[Page, Dict[str, Any]], Dict[str, Any]]) -> Dict[str, Any]:
|
196
182
|
try:
|
197
|
-
return handler(
|
183
|
+
return handler(page, task)
|
198
184
|
except Exception as e:
|
199
185
|
action = task.get("action", "unknown")
|
200
|
-
logger.exception(f"Error executing task '{action}':")
|
186
|
+
self.logger.exception(f"Error executing task '{action}':")
|
201
187
|
return {"action": action, "success": False, "message": f"Critical error: {str(e)}"}
|
202
188
|
|
203
|
-
def _dismiss_unwanted_modals(self,
|
204
|
-
"""
|
205
|
-
|
206
|
-
|
207
|
-
|
189
|
+
def _dismiss_unwanted_modals(self, page: Page, task_context: str = ""):
|
190
|
+
modal_selectors = [".modal", ".popup", '[role="dialog"]', ".overlay", ".lightbox"]
|
191
|
+
for selector in modal_selectors:
|
192
|
+
elements = page.query_selector_all(selector)
|
193
|
+
for modal in elements:
|
194
|
+
if modal.is_visible():
|
195
|
+
self._handle_modal(page, modal, task_context)
|
196
|
+
|
197
|
+
def _handle_modal(self, page: Page, modal_element, task_context: str):
|
208
198
|
try:
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
199
|
+
modal_screenshot = modal_element.screenshot()
|
200
|
+
prompt = (
|
201
|
+
f"A modal is displayed on the page. The content is visible in the attached image. "
|
202
|
+
f"The current task context is: \"{task_context}\". "
|
203
|
+
"Based on the content of the modal and the task context, decide whether to dismiss the modal. "
|
204
|
+
"Return a JSON response in the format: { \"action\": \"dismiss\" } to dismiss or { \"action\": \"ignore\" } to leave it. "
|
205
|
+
"Return only the JSON."
|
206
|
+
)
|
207
|
+
response_text = self.llm.generate_from_image(prompt, image_bytes=modal_screenshot)
|
208
|
+
self.logger.info(f"LLM response for modal analysis: {response_text}")
|
209
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
|
210
|
+
json_text = json_match.group(1).strip() if json_match else response_text.strip()
|
211
|
+
decision = json.loads(json_text)
|
212
|
+
if decision.get("action") == "dismiss":
|
213
|
+
close_buttons = modal_element.query_selector_all(".close, .btn-close, [aria-label='Close'], [data-dismiss='modal']")
|
214
|
+
for btn in close_buttons:
|
215
|
+
if btn.is_visible():
|
216
|
+
btn.click()
|
217
|
+
self.logger.info("Modal dismissed using a close button.")
|
218
|
+
return
|
219
|
+
page.evaluate("(modal) => modal.remove()", modal_element)
|
220
|
+
self.logger.info("Modal dismissed by removal.")
|
221
|
+
else:
|
222
|
+
self.logger.info("Modal left intact according to LLM analysis.")
|
231
223
|
except Exception as e:
|
232
|
-
logger.
|
224
|
+
self.logger.error(f"Modal handling error: {e}")
|
233
225
|
|
234
|
-
def _advanced_find_element(self,
|
226
|
+
def _advanced_find_element(self, page: Page, keyword: str):
|
227
|
+
try:
|
228
|
+
candidates = page.query_selector_all("input, textarea, button, a, div")
|
229
|
+
best_match = None
|
230
|
+
best_ratio = 0.0
|
231
|
+
for candidate in candidates:
|
232
|
+
attrs = page.evaluate(
|
233
|
+
"""(el) => {
|
234
|
+
return {
|
235
|
+
id: el.id,
|
236
|
+
name: el.getAttribute('name'),
|
237
|
+
placeholder: el.getAttribute('placeholder'),
|
238
|
+
aria: el.getAttribute('aria-label'),
|
239
|
+
text: el.innerText
|
240
|
+
};
|
241
|
+
}""",
|
242
|
+
candidate,
|
243
|
+
)
|
244
|
+
combined_text = " ".join(
|
245
|
+
filter(None, [
|
246
|
+
attrs.get("id"),
|
247
|
+
attrs.get("name"),
|
248
|
+
attrs.get("placeholder"),
|
249
|
+
attrs.get("aria"),
|
250
|
+
attrs.get("text"),
|
251
|
+
])
|
252
|
+
)
|
253
|
+
ratio = difflib.SequenceMatcher(None, combined_text.lower(), keyword.lower()).ratio()
|
254
|
+
if ratio > best_ratio:
|
255
|
+
best_ratio = ratio
|
256
|
+
best_match = candidate
|
257
|
+
if best_ratio > 0.5:
|
258
|
+
self.logger.info(f"Advanced fallback detected element with similarity {best_ratio:.2f} for keyword '{keyword}'")
|
259
|
+
return best_match
|
260
|
+
return None
|
261
|
+
except Exception as e:
|
262
|
+
self.logger.error(f"Advanced find element error: {e}")
|
263
|
+
return None
|
264
|
+
|
265
|
+
def _annotate_page_with_numbers(self, page: Page, query: str = "button, a, input, [onclick]"):
|
266
|
+
script = f"""
|
267
|
+
(() => {{
|
268
|
+
document.querySelectorAll('.automation-annotation-overlay').forEach(el => el.remove());
|
269
|
+
const elements = document.querySelectorAll('{query}');
|
270
|
+
let counter = 1;
|
271
|
+
elements.forEach(el => {{
|
272
|
+
const rect = el.getBoundingClientRect();
|
273
|
+
if (rect.width === 0 || rect.height === 0) return;
|
274
|
+
const overlay = document.createElement('div');
|
275
|
+
overlay.classList.add('automation-annotation-overlay');
|
276
|
+
overlay.style.position = 'absolute';
|
277
|
+
overlay.style.left = (rect.left + window.scrollX) + 'px';
|
278
|
+
overlay.style.top = (rect.top + window.scrollY) + 'px';
|
279
|
+
overlay.style.width = rect.width + 'px';
|
280
|
+
overlay.style.height = rect.height + 'px';
|
281
|
+
overlay.style.border = '2px solid red';
|
282
|
+
overlay.style.zIndex = 9999;
|
283
|
+
overlay.style.pointerEvents = 'none';
|
284
|
+
overlay.textContent = counter;
|
285
|
+
overlay.style.fontSize = '16px';
|
286
|
+
overlay.style.fontWeight = 'bold';
|
287
|
+
overlay.style.color = 'red';
|
288
|
+
overlay.style.backgroundColor = 'rgba(255, 255, 255, 0.7)';
|
289
|
+
document.body.appendChild(overlay);
|
290
|
+
counter += 1;
|
291
|
+
}});
|
292
|
+
}})();
|
235
293
|
"""
|
236
|
-
|
237
|
-
|
294
|
+
page.evaluate(script)
|
295
|
+
|
296
|
+
def _click_element_by_number(self, page: Page, number: int) -> Dict[str, Any]:
|
297
|
+
candidates = [el for el in page.query_selector_all("button, a, input, [onclick]") if el.is_visible()]
|
298
|
+
index = number - 1
|
299
|
+
if index < len(candidates):
|
300
|
+
candidate = candidates[index]
|
301
|
+
candidate.scroll_into_view_if_needed()
|
302
|
+
try:
|
303
|
+
candidate.click()
|
304
|
+
return {"action": "click", "success": True, "message": f"Clicked element number {number}"}
|
305
|
+
except Exception as e:
|
306
|
+
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
307
|
+
else:
|
308
|
+
return {"action": "click", "success": False, "message": f"Element number {number} not found."}
|
309
|
+
|
310
|
+
def _fallback_with_image_llm(self, page: Page, task: Dict[str, Any], executed_context: str = "") -> Dict[str, Any]:
|
311
|
+
"""
|
312
|
+
Fallback method: Annotate the page, capture a screenshot, and ask the LLM (via image analysis)
|
313
|
+
to generate a JSON array of tasks for the next steps.
|
314
|
+
Each fallback task is an object:
|
315
|
+
{
|
316
|
+
"action": "click" or "type",
|
317
|
+
"element_number": <number>,
|
318
|
+
"text": <if action is 'type', the text to type; otherwise an empty string>
|
319
|
+
}
|
320
|
+
The prompt includes the executed_context.
|
238
321
|
"""
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
322
|
+
query = "input, textarea" if task.get("action") == "type" else "button, a, input, [onclick]"
|
323
|
+
self._annotate_page_with_numbers(page, query=query)
|
324
|
+
time.sleep(1)
|
325
|
+
screenshot_bytes = page.screenshot(type="png")
|
326
|
+
extra = ""
|
327
|
+
if task.get("action") == "type":
|
328
|
+
extra = f"\nThe exact text to be entered is: \"{task.get('value', '').strip()}\"."
|
329
|
+
prompt = (
|
330
|
+
f"Tasks executed so far:\n{executed_context}\n\n"
|
331
|
+
f"The following task remains: {task.get('description', '')}.{extra}\n"
|
332
|
+
"I have annotated the page with numbered overlays using the appropriate query. "
|
333
|
+
"Based on the attached screenshot, generate a JSON array of tasks that need to be performed next. "
|
334
|
+
"Each task should be a JSON object with the format:\n"
|
335
|
+
"[\n"
|
336
|
+
" {\n"
|
337
|
+
" \"action\": \"click\" or \"type\",\n"
|
338
|
+
" \"element_number\": <number>,\n"
|
339
|
+
" \"text\": <if action is 'type', the text to type; otherwise an empty string>\n"
|
340
|
+
" },\n"
|
341
|
+
" ...\n"
|
342
|
+
"]\n"
|
343
|
+
"Return only the JSON array."
|
344
|
+
)
|
345
|
+
response_text = self.llm.generate_from_image(prompt, image_bytes=screenshot_bytes)
|
346
|
+
self.logger.info(f"LLM response for fallback: {response_text}")
|
347
|
+
try:
|
348
|
+
fallback_tasks = json.loads(response_text.strip())
|
349
|
+
if not isinstance(fallback_tasks, list):
|
350
|
+
fallback_tasks = [fallback_tasks]
|
351
|
+
except Exception as e:
|
352
|
+
json_match = re.search(r'```json\n?(.+?)\n?```', response_text, re.DOTALL)
|
353
|
+
if json_match:
|
354
|
+
json_text = json_match.group(1).strip()
|
355
|
+
fallback_tasks = json.loads(json_text)
|
356
|
+
if not isinstance(fallback_tasks, list):
|
357
|
+
fallback_tasks = [fallback_tasks]
|
358
|
+
else:
|
359
|
+
return {"action": task.get("action"), "success": False, "message": f"Fallback failed to parse JSON: {str(e)}"}
|
360
|
+
|
361
|
+
fallback_results = []
|
362
|
+
for fb_task in fallback_tasks:
|
363
|
+
action = fb_task.get("action")
|
364
|
+
element_number = fb_task.get("element_number")
|
365
|
+
if action == "type":
|
366
|
+
returned_text = fb_task.get("text", "").strip()
|
367
|
+
original_text = task.get("value", "").strip()
|
368
|
+
if returned_text.lower() != original_text.lower():
|
369
|
+
self.logger.info("Overriding LLM-provided text with original input text.")
|
370
|
+
text = original_text
|
371
|
+
else:
|
372
|
+
text = returned_text
|
373
|
+
else:
|
374
|
+
text = fb_task.get("text", "")
|
375
|
+
if action == "click":
|
376
|
+
self.logger.info(f"LLM indicated fallback click on element number {element_number}.")
|
377
|
+
res = self._click_element_by_number(page, element_number)
|
378
|
+
elif action == "type":
|
379
|
+
candidates = [el for el in page.query_selector_all("input, textarea") if el.is_visible()]
|
380
|
+
if element_number - 1 < len(candidates):
|
381
|
+
candidate = candidates[element_number - 1]
|
382
|
+
candidate.scroll_into_view_if_needed()
|
383
|
+
try:
|
384
|
+
candidate.fill(text, timeout=self.default_timeout)
|
385
|
+
res = {"action": "type", "success": True, "message": f"Typed '{text}' into element number {element_number}"}
|
386
|
+
except Exception as ex:
|
387
|
+
res = {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
|
388
|
+
else:
|
389
|
+
res = {"action": "type", "success": False, "message": f"Element number {element_number} not found."}
|
390
|
+
else:
|
391
|
+
res = {"action": task.get("action"), "success": False, "message": "Invalid fallback action."}
|
392
|
+
fallback_results.append(res)
|
393
|
+
overall_success = any(r.get("success", False) for r in fallback_results)
|
394
|
+
overall_message = "; ".join([r.get("message", "") for r in fallback_results])
|
395
|
+
return {"action": task.get("action"), "success": overall_success, "message": overall_message}
|
396
|
+
|
397
|
+
def _handle_navigation(self, page: Page, url: str) -> Dict[str, Any]:
|
261
398
|
if not url.startswith(("http://", "https://")):
|
262
399
|
url = f"https://{url}"
|
263
400
|
try:
|
264
|
-
|
265
|
-
|
401
|
+
page.goto(url, timeout=self.default_timeout)
|
402
|
+
page.wait_for_selector("body", timeout=self.default_timeout)
|
266
403
|
return {"action": "navigate", "success": True, "message": f"Navigated to {url}"}
|
404
|
+
except PlaywrightTimeoutError as e:
|
405
|
+
self.logger.error(f"Navigation to {url} timed out: {e}")
|
406
|
+
return {"action": "navigate", "success": False, "message": f"Navigation timed out: {str(e)}"}
|
267
407
|
except Exception as e:
|
268
|
-
logger.error(f"Navigation to {url} failed: {e}")
|
408
|
+
self.logger.error(f"Navigation to {url} failed: {e}")
|
269
409
|
return {"action": "navigate", "success": False, "message": f"Navigation failed: {str(e)}"}
|
270
410
|
|
271
|
-
def _handle_click(self,
|
272
|
-
"""Handle click actions with fallback using JS if needed."""
|
411
|
+
def _handle_click(self, page: Page, selector: str) -> Dict[str, Any]:
|
273
412
|
try:
|
274
|
-
|
275
|
-
|
276
|
-
)
|
277
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
278
|
-
try:
|
279
|
-
element.click()
|
280
|
-
except Exception:
|
281
|
-
driver.execute_script("arguments[0].click();", element)
|
413
|
+
page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
|
414
|
+
page.click(selector, timeout=self.default_timeout)
|
282
415
|
return {"action": "click", "success": True, "message": f"Clicked element: {selector}"}
|
416
|
+
except PlaywrightTimeoutError as e:
|
417
|
+
self.logger.error(f"Click action timed out on selector {selector}: {e}")
|
418
|
+
return {"action": "click", "success": False, "message": f"Click timed out: {str(e)}"}
|
283
419
|
except Exception as e:
|
284
|
-
logger.error(f"Click action failed on selector {selector}: {e}")
|
420
|
+
self.logger.error(f"Click action failed on selector {selector}: {e}")
|
285
421
|
return {"action": "click", "success": False, "message": f"Click failed: {str(e)}"}
|
286
422
|
|
287
|
-
def _handle_typing(self,
|
288
|
-
"""
|
289
|
-
Handle typing into an element.
|
290
|
-
If the primary selector fails, attempt advanced fallback detection.
|
291
|
-
"""
|
292
|
-
try:
|
293
|
-
element = WebDriverWait(driver, self.default_timeout).until(
|
294
|
-
EC.presence_of_element_located((By.CSS_SELECTOR, selector))
|
295
|
-
)
|
296
|
-
except Exception as e:
|
297
|
-
# If the task seems to involve search or similar text, use advanced fallback.
|
298
|
-
if "search" in task.get("description", "").lower() or "search" in selector.lower():
|
299
|
-
logger.info("Primary selector failed; using advanced fallback for element detection.")
|
300
|
-
element = self._advanced_find_element(driver, "search")
|
301
|
-
if not element:
|
302
|
-
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
303
|
-
else:
|
304
|
-
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
423
|
+
def _handle_typing(self, page: Page, selector: str, text: str, task: Dict[str, Any]) -> Dict[str, Any]:
|
305
424
|
try:
|
306
|
-
|
307
|
-
|
425
|
+
page.wait_for_selector(selector, state="attached", timeout=self.default_timeout)
|
426
|
+
page.fill(selector, text, timeout=self.default_timeout)
|
308
427
|
return {"action": "type", "success": True, "message": f"Typed '{text}' into element."}
|
428
|
+
except PlaywrightTimeoutError as e:
|
429
|
+
self.logger.info("Primary selector failed; using advanced fallback for element detection.")
|
430
|
+
element = self._advanced_find_element(page, "search")
|
431
|
+
if not element:
|
432
|
+
return {"action": "type", "success": False, "message": f"Typing failed: No search-like element found; error: {str(e)}"}
|
433
|
+
try:
|
434
|
+
element.fill(text, timeout=self.default_timeout)
|
435
|
+
return {"action": "type", "success": True, "message": f"Typed '{text}' into fallback element."}
|
436
|
+
except Exception as ex:
|
437
|
+
return {"action": "type", "success": False, "message": f"Typing failed on fallback element: {str(ex)}"}
|
309
438
|
except Exception as e:
|
310
|
-
logger.error(f"Typing action failed: {e}")
|
439
|
+
self.logger.error(f"Typing action failed: {e}")
|
311
440
|
return {"action": "type", "success": False, "message": f"Typing failed: {str(e)}"}
|
312
441
|
|
313
442
|
def _handle_wait(self, seconds: str) -> Dict[str, Any]:
|
314
|
-
"""Handle a simple wait."""
|
315
443
|
try:
|
316
444
|
wait_time = float(seconds)
|
317
|
-
logger.info(f"Waiting for {wait_time} seconds")
|
445
|
+
self.logger.info(f"Waiting for {wait_time} seconds")
|
318
446
|
time.sleep(wait_time)
|
319
447
|
return {"action": "wait", "success": True, "message": f"Waited {wait_time} seconds"}
|
320
448
|
except ValueError as e:
|
321
|
-
logger.error(f"Invalid wait time provided: {seconds}")
|
449
|
+
self.logger.error(f"Invalid wait time provided: {seconds}")
|
322
450
|
return {"action": "wait", "success": False, "message": "Invalid wait time"}
|
323
451
|
|
324
|
-
def _handle_wait_for_ajax(self,
|
325
|
-
"""
|
326
|
-
Wait until AJAX/network activity has subsided.
|
327
|
-
This implementation first checks for jQuery, then falls back to a generic check.
|
328
|
-
"""
|
452
|
+
def _handle_wait_for_ajax(self, page: Page, seconds: str) -> Dict[str, Any]:
|
329
453
|
try:
|
330
|
-
|
331
|
-
logger.info(f"Waiting for AJAX/network activity for up to {
|
332
|
-
end_time = time.time() +
|
454
|
+
timeout_seconds = int(seconds) if seconds.strip() != "" else 30
|
455
|
+
self.logger.info(f"Waiting for AJAX/network activity for up to {timeout_seconds} seconds.")
|
456
|
+
end_time = time.time() + timeout_seconds
|
333
457
|
while time.time() < end_time:
|
334
|
-
ajax_complete =
|
335
|
-
|
336
|
-
|
458
|
+
ajax_complete = page.evaluate("""
|
459
|
+
() => {
|
460
|
+
return (window.jQuery ? jQuery.active === 0 : true) &&
|
461
|
+
(typeof window.fetch === 'function' ? true : true);
|
462
|
+
}
|
337
463
|
""")
|
338
464
|
if ajax_complete:
|
339
465
|
break
|
340
466
|
time.sleep(0.5)
|
341
467
|
return {"action": "wait_for_ajax", "success": True, "message": "AJAX/network activity subsided."}
|
342
468
|
except Exception as e:
|
343
|
-
logger.error(f"Wait for AJAX failed: {e}")
|
469
|
+
self.logger.error(f"Wait for AJAX failed: {e}")
|
344
470
|
return {"action": "wait_for_ajax", "success": False, "message": f"Wait for AJAX failed: {str(e)}"}
|
345
471
|
|
346
|
-
def _handle_scroll(self,
|
347
|
-
"""Handle scrolling to a specific element or page bottom."""
|
472
|
+
def _handle_scroll(self, page: Page, selector: str) -> Dict[str, Any]:
|
348
473
|
try:
|
349
474
|
if selector:
|
350
|
-
|
351
|
-
|
352
|
-
)
|
353
|
-
driver.execute_script("arguments[0].scrollIntoView({behavior: 'smooth', block: 'center'});", element)
|
475
|
+
page.wait_for_selector(selector, timeout=self.default_timeout)
|
476
|
+
page.eval_on_selector(selector, "el => el.scrollIntoView({behavior: 'smooth', block: 'center'})")
|
354
477
|
scroll_target = selector
|
355
478
|
else:
|
356
|
-
|
479
|
+
page.evaluate("window.scrollTo(0, document.body.scrollHeight);")
|
357
480
|
scroll_target = "page bottom"
|
358
481
|
return {"action": "scroll", "success": True, "message": f"Scrolled to {scroll_target}"}
|
359
482
|
except Exception as e:
|
360
|
-
logger.error(f"Scroll action failed on selector {selector}: {e}")
|
483
|
+
self.logger.error(f"Scroll action failed on selector {selector}: {e}")
|
361
484
|
return {"action": "scroll", "success": False, "message": f"Scroll failed: {str(e)}"}
|
362
485
|
|
363
|
-
def _handle_hover(self,
|
364
|
-
"""Handle mouse hover action."""
|
486
|
+
def _handle_hover(self, page: Page, selector: str) -> Dict[str, Any]:
|
365
487
|
try:
|
366
|
-
|
367
|
-
|
368
|
-
)
|
369
|
-
ActionChains(driver).move_to_element(element).perform()
|
488
|
+
page.wait_for_selector(selector, state="visible", timeout=self.default_timeout)
|
489
|
+
page.hover(selector, timeout=self.default_timeout)
|
370
490
|
return {"action": "hover", "success": True, "message": f"Hovered over {selector}"}
|
371
491
|
except Exception as e:
|
372
|
-
logger.error(f"Hover action failed on selector {selector}: {e}")
|
492
|
+
self.logger.error(f"Hover action failed on selector {selector}: {e}")
|
373
493
|
return {"action": "hover", "success": False, "message": f"Hover failed: {str(e)}"}
|
374
494
|
|
375
|
-
def _handle_screenshot(self,
|
376
|
-
"""Capture a screenshot of the current browser state."""
|
495
|
+
def _handle_screenshot(self, page: Page, filename: str) -> Dict[str, Any]:
|
377
496
|
try:
|
378
|
-
|
497
|
+
page.screenshot(path=filename)
|
379
498
|
return {"action": "screenshot", "success": True, "message": f"Screenshot saved as {filename}"}
|
380
499
|
except Exception as e:
|
381
|
-
logger.error(f"Screenshot capture failed: {e}")
|
500
|
+
self.logger.error(f"Screenshot capture failed: {e}")
|
382
501
|
return {"action": "screenshot", "success": False, "message": f"Screenshot failed: {str(e)}"}
|
383
502
|
|
384
|
-
def _handle_switch_tab(self,
|
385
|
-
"""
|
386
|
-
Switch between tabs. 'value' can be an index or the keyword 'new'.
|
387
|
-
"""
|
503
|
+
def _handle_switch_tab(self, context, value: str) -> Dict[str, Any]:
|
388
504
|
try:
|
389
|
-
|
505
|
+
pages = context.pages
|
390
506
|
if value.lower() == "new":
|
391
|
-
|
507
|
+
target_page = pages[-1]
|
392
508
|
else:
|
393
509
|
idx = int(value)
|
394
|
-
if idx < len(
|
395
|
-
|
510
|
+
if idx < len(pages):
|
511
|
+
target_page = pages[idx]
|
396
512
|
else:
|
397
513
|
return {"action": "switch_tab", "success": False, "message": f"Tab index {value} out of range"}
|
398
|
-
driver.switch_to.window(target_handle)
|
399
514
|
return {"action": "switch_tab", "success": True, "message": f"Switched to tab {value}"}
|
400
515
|
except Exception as e:
|
401
|
-
logger.error(f"Switch tab failed: {e}")
|
516
|
+
self.logger.error(f"Switch tab failed: {e}")
|
402
517
|
return {"action": "switch_tab", "success": False, "message": f"Switch tab failed: {str(e)}"}
|
403
518
|
|
404
|
-
def _handle_execute_script(self,
|
405
|
-
"""
|
406
|
-
Execute arbitrary JavaScript code.
|
407
|
-
"""
|
519
|
+
def _handle_execute_script(self, page: Page, script: str) -> Dict[str, Any]:
|
408
520
|
try:
|
409
|
-
result =
|
521
|
+
result = page.evaluate(script)
|
410
522
|
return {"action": "execute_script", "success": True, "message": "Script executed successfully", "result": result}
|
411
523
|
except Exception as e:
|
412
|
-
logger.error(f"Execute script failed: {e}")
|
524
|
+
self.logger.error(f"Execute script failed: {e}")
|
413
525
|
return {"action": "execute_script", "success": False, "message": f"Script execution failed: {str(e)}"}
|
414
526
|
|
415
|
-
def _handle_drag_and_drop(self,
|
416
|
-
"""
|
417
|
-
Simulate a drag-and-drop operation.
|
418
|
-
"""
|
527
|
+
def _handle_drag_and_drop(self, page: Page, source_selector: str, target_selector: str) -> Dict[str, Any]:
|
419
528
|
try:
|
420
|
-
|
421
|
-
|
422
|
-
)
|
423
|
-
target =
|
424
|
-
|
425
|
-
)
|
426
|
-
ActionChains(driver).drag_and_drop(source, target).perform()
|
529
|
+
page.wait_for_selector(source_selector, timeout=self.default_timeout)
|
530
|
+
page.wait_for_selector(target_selector, timeout=self.default_timeout)
|
531
|
+
source = page.locator(source_selector)
|
532
|
+
target = page.locator(target_selector)
|
533
|
+
source.drag_to(target, timeout=self.default_timeout)
|
427
534
|
return {"action": "drag_and_drop", "success": True, "message": f"Dragged element from {source_selector} to {target_selector}"}
|
428
535
|
except Exception as e:
|
429
|
-
logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
536
|
+
self.logger.error(f"Drag and drop failed from {source_selector} to {target_selector}: {e}")
|
430
537
|
return {"action": "drag_and_drop", "success": False, "message": f"Drag and drop failed: {str(e)}"}
|
431
538
|
|
432
|
-
def _capture_failure_screenshot(self,
|
433
|
-
"""Capture a screenshot for debugging when an error occurs."""
|
539
|
+
def _capture_failure_screenshot(self, page: Page, action: str):
|
434
540
|
filename = f"failure_{action}_{int(time.time())}.png"
|
435
541
|
try:
|
436
|
-
|
437
|
-
logger.info(f"Failure screenshot captured: {filename}")
|
542
|
+
page.screenshot(path=filename)
|
543
|
+
self.logger.info(f"Failure screenshot captured: {filename}")
|
438
544
|
except Exception as e:
|
439
|
-
logger.error(f"Failed to capture screenshot: {e}")
|
545
|
+
self.logger.error(f"Failed to capture screenshot: {e}")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: semantio
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.7
|
4
4
|
Summary: A powerful SDK for building AI agents
|
5
5
|
Home-page: https://github.com/Syenah/semantio
|
6
6
|
Author: Rakesh
|
@@ -33,7 +33,6 @@ Requires-Dist: sentence-transformers
|
|
33
33
|
Requires-Dist: fuzzywuzzy
|
34
34
|
Requires-Dist: duckduckgo-search
|
35
35
|
Requires-Dist: yfinance
|
36
|
-
Requires-Dist: selenium
|
37
36
|
Requires-Dist: beautifulsoup4
|
38
37
|
Requires-Dist: webdriver-manager
|
39
38
|
Requires-Dist: validators
|
@@ -30,16 +30,16 @@ semantio/tools/base_tool.py,sha256=xBNSa_8a8WmA4BGRLG2dE7wj9GnBcZo7-P2SyD86GvY,5
|
|
30
30
|
semantio/tools/crypto.py,sha256=mut1ztvpPcUUP3b563dh_FmKtP68KmNis3Qm8WENj8w,5559
|
31
31
|
semantio/tools/duckduckgo.py,sha256=6mGn0js0cIsVxQlAgB8AYNLP05H8WmJKnSVosiO9iH0,5034
|
32
32
|
semantio/tools/stocks.py,sha256=BVuK61O9OmWQjj0YdiCJY6TzpiFJ_An1UJB2RkDfX2k,5393
|
33
|
-
semantio/tools/web_browser.py,sha256=
|
33
|
+
semantio/tools/web_browser.py,sha256=8-_SXvu3CRnIwKBlcmNe7-9DOd4y7OC7T24RB0xKMnI,28911
|
34
34
|
semantio/utils/__init__.py,sha256=Lx4X4iJpRhZzRmpQb80XXh5Ve8ZMOkadWAxXSmHpO_8,244
|
35
35
|
semantio/utils/config.py,sha256=ZTwUTqxjW3-w94zoU7GzivWyJe0JJGvBfuB4RUOuEs8,1198
|
36
36
|
semantio/utils/date_utils.py,sha256=x3oqRGv6ee_KCJ0LvCqqZh_FSgS6YGOHBwZQS4TJetY,1471
|
37
37
|
semantio/utils/file_utils.py,sha256=b_cMuJINEGk9ikNuNHSn9lsmICWwvtnCDZ03ndH_S2I,1779
|
38
38
|
semantio/utils/logger.py,sha256=TmGbP8BRjLMWjXi2GWzZ0RIXt70x9qX3FuIqghCNlwM,510
|
39
39
|
semantio/utils/validation_utils.py,sha256=iwoxEb4Q5ILqV6tbesMjPWPCCoL3AmPLejGUy6q8YvQ,1284
|
40
|
-
semantio-0.0.
|
41
|
-
semantio-0.0.
|
42
|
-
semantio-0.0.
|
43
|
-
semantio-0.0.
|
44
|
-
semantio-0.0.
|
45
|
-
semantio-0.0.
|
40
|
+
semantio-0.0.7.dist-info/LICENSE,sha256=mziLlfb9hZ8HKxm9V6BiHpmgJvmcDvswu1QBlDB-6vU,1074
|
41
|
+
semantio-0.0.7.dist-info/METADATA,sha256=QQRzinLKReosRRthYf1bei5FDAaOPHaG4bG5gdJnMFc,6889
|
42
|
+
semantio-0.0.7.dist-info/WHEEL,sha256=ewwEueio1C2XeHTvT17n8dZUJgOvyCWCt0WVNLClP9o,92
|
43
|
+
semantio-0.0.7.dist-info/entry_points.txt,sha256=zbPgevSLwcLpdRHqI_atE8EOt8lK2vRF1AoDflDTo18,53
|
44
|
+
semantio-0.0.7.dist-info/top_level.txt,sha256=Yte_6mb-bh-I_lQwMjk1GijZkxPoX4Zmp3kBftC1ZlA,9
|
45
|
+
semantio-0.0.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|