agentcrew-ai 0.8.2__py3-none-any.whl → 0.8.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- AgentCrew/__init__.py +1 -1
- AgentCrew/main.py +3 -1
- AgentCrew/modules/a2a/agent_cards.py +8 -2
- AgentCrew/modules/a2a/errors.py +72 -0
- AgentCrew/modules/a2a/server.py +21 -2
- AgentCrew/modules/a2a/task_manager.py +180 -39
- AgentCrew/modules/agents/local_agent.py +11 -0
- AgentCrew/modules/browser_automation/element_extractor.py +4 -3
- AgentCrew/modules/browser_automation/js/draw_element_boxes.js +200 -0
- AgentCrew/modules/browser_automation/js/extract_clickable_elements.js +58 -26
- AgentCrew/modules/browser_automation/js/extract_elements_by_text.js +21 -19
- AgentCrew/modules/browser_automation/js/extract_input_elements.js +22 -23
- AgentCrew/modules/browser_automation/js/filter_hidden_elements.js +104 -0
- AgentCrew/modules/browser_automation/js/remove_element_boxes.js +29 -0
- AgentCrew/modules/browser_automation/js_loader.py +385 -92
- AgentCrew/modules/browser_automation/service.py +118 -347
- AgentCrew/modules/browser_automation/tool.py +28 -29
- AgentCrew/modules/chat/message/command_processor.py +7 -1
- AgentCrew/modules/chat/message/conversation.py +9 -8
- AgentCrew/modules/code_analysis/service.py +39 -0
- AgentCrew/modules/code_analysis/tool.py +10 -1
- AgentCrew/modules/console/command_handlers.py +186 -1
- AgentCrew/modules/console/completers.py +67 -0
- AgentCrew/modules/console/console_ui.py +59 -5
- AgentCrew/modules/console/display_handlers.py +12 -0
- AgentCrew/modules/console/input_handler.py +2 -0
- AgentCrew/modules/console/ui_effects.py +3 -4
- AgentCrew/modules/custom_llm/service.py +25 -3
- AgentCrew/modules/file_editing/tool.py +9 -11
- AgentCrew/modules/google/native_service.py +13 -0
- AgentCrew/modules/gui/widgets/message_bubble.py +1 -6
- AgentCrew/modules/llm/constants.py +38 -1
- AgentCrew/modules/llm/model_registry.py +9 -0
- AgentCrew/modules/llm/types.py +12 -1
- AgentCrew/modules/memory/base_service.py +2 -2
- AgentCrew/modules/memory/chroma_service.py +79 -138
- AgentCrew/modules/memory/context_persistent.py +10 -4
- AgentCrew/modules/memory/tool.py +17 -18
- AgentCrew/modules/openai/response_service.py +19 -11
- AgentCrew/modules/openai/service.py +15 -0
- AgentCrew/modules/prompts/constants.py +27 -14
- {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.4.dist-info}/METADATA +3 -3
- {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.4.dist-info}/RECORD +47 -43
- {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.4.dist-info}/WHEEL +0 -0
- {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.4.dist-info}/entry_points.txt +0 -0
- {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.4.dist-info}/licenses/LICENSE +0 -0
- {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.4.dist-info}/top_level.txt +0 -0
|
@@ -22,7 +22,7 @@ from .element_extractor import (
|
|
|
22
22
|
clean_markdown_images,
|
|
23
23
|
remove_duplicate_lines,
|
|
24
24
|
)
|
|
25
|
-
from .js_loader import js_loader,
|
|
25
|
+
from .js_loader import js_loader, JavaScriptExecutor
|
|
26
26
|
|
|
27
27
|
import PyChromeDevTools
|
|
28
28
|
from loguru import logger
|
|
@@ -116,7 +116,7 @@ class BrowserAutomationService:
|
|
|
116
116
|
"profile": profile,
|
|
117
117
|
}
|
|
118
118
|
|
|
119
|
-
current_url = self.
|
|
119
|
+
current_url = JavaScriptExecutor.get_current_url(self.chrome_interface)
|
|
120
120
|
|
|
121
121
|
return {
|
|
122
122
|
"success": True,
|
|
@@ -150,7 +150,6 @@ class BrowserAutomationService:
|
|
|
150
150
|
Returns:
|
|
151
151
|
Dict containing click result
|
|
152
152
|
"""
|
|
153
|
-
# Resolve UUID to XPath
|
|
154
153
|
xpath = self.uuid_to_xpath_mapping.get(element_uuid)
|
|
155
154
|
if not xpath:
|
|
156
155
|
return {
|
|
@@ -165,40 +164,10 @@ class BrowserAutomationService:
|
|
|
165
164
|
raise RuntimeError("Chrome interface is not initialized")
|
|
166
165
|
|
|
167
166
|
js_code = js_loader.get_click_element_js(xpath)
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
expression=js_code, returnByValue=True
|
|
167
|
+
coord_result = JavaScriptExecutor.execute_and_parse_result(
|
|
168
|
+
self.chrome_interface, js_code
|
|
171
169
|
)
|
|
172
170
|
|
|
173
|
-
# Parse JavaScript result
|
|
174
|
-
if isinstance(result, tuple) and len(result) >= 2:
|
|
175
|
-
if isinstance(result[1], dict):
|
|
176
|
-
coord_result = (
|
|
177
|
-
result[1].get("result", {}).get("result", {}).get("value", {})
|
|
178
|
-
)
|
|
179
|
-
elif isinstance(result[1], list) and len(result[1]) > 0:
|
|
180
|
-
coord_result = (
|
|
181
|
-
result[1][0]
|
|
182
|
-
.get("result", {})
|
|
183
|
-
.get("result", {})
|
|
184
|
-
.get("value", {})
|
|
185
|
-
)
|
|
186
|
-
else:
|
|
187
|
-
return {
|
|
188
|
-
"success": False,
|
|
189
|
-
"error": "Invalid response format from coordinate calculation",
|
|
190
|
-
"uuid": element_uuid,
|
|
191
|
-
"xpath": xpath,
|
|
192
|
-
}
|
|
193
|
-
else:
|
|
194
|
-
return {
|
|
195
|
-
"success": False,
|
|
196
|
-
"error": "No response from coordinate calculation",
|
|
197
|
-
"uuid": element_uuid,
|
|
198
|
-
"xpath": xpath,
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
# Check if coordinate calculation was successful
|
|
202
171
|
if not coord_result.get("success", False):
|
|
203
172
|
return {
|
|
204
173
|
"success": False,
|
|
@@ -209,7 +178,6 @@ class BrowserAutomationService:
|
|
|
209
178
|
"xpath": xpath,
|
|
210
179
|
}
|
|
211
180
|
|
|
212
|
-
# Extract coordinates
|
|
213
181
|
x = coord_result.get("x")
|
|
214
182
|
y = coord_result.get("y")
|
|
215
183
|
|
|
@@ -221,23 +189,18 @@ class BrowserAutomationService:
|
|
|
221
189
|
"xpath": xpath,
|
|
222
190
|
}
|
|
223
191
|
|
|
224
|
-
# Wait a moment for scrollIntoView to complete
|
|
225
192
|
time.sleep(0.5)
|
|
226
193
|
|
|
227
|
-
# Step 2: Dispatch mousePressed event using Chrome DevTools Protocol
|
|
228
194
|
self.chrome_interface.Input.dispatchMouseEvent(
|
|
229
195
|
type="mousePressed", x=x, y=y, button="left", clickCount=1
|
|
230
196
|
)
|
|
231
197
|
|
|
232
|
-
# Small delay between press and release (simulate realistic click timing)
|
|
233
198
|
time.sleep(0.02)
|
|
234
199
|
|
|
235
|
-
# Step 3: Dispatch mouseReleased event using Chrome DevTools Protocol
|
|
236
200
|
self.chrome_interface.Input.dispatchMouseEvent(
|
|
237
201
|
type="mouseReleased", x=x, y=y, button="left", clickCount=1
|
|
238
202
|
)
|
|
239
203
|
|
|
240
|
-
# Wait for click to be processed
|
|
241
204
|
time.sleep(1)
|
|
242
205
|
|
|
243
206
|
return {
|
|
@@ -281,7 +244,6 @@ class BrowserAutomationService:
|
|
|
281
244
|
|
|
282
245
|
scroll_distance = amount * 300
|
|
283
246
|
|
|
284
|
-
# Resolve UUID to XPath if provided
|
|
285
247
|
xpath = None
|
|
286
248
|
if element_uuid:
|
|
287
249
|
xpath = self.uuid_to_xpath_mapping.get(element_uuid)
|
|
@@ -294,35 +256,14 @@ class BrowserAutomationService:
|
|
|
294
256
|
"amount": amount,
|
|
295
257
|
}
|
|
296
258
|
|
|
297
|
-
# Load JavaScript code from external file
|
|
298
259
|
js_code = js_loader.get_scroll_page_js(
|
|
299
260
|
direction, scroll_distance, xpath or "", element_uuid or ""
|
|
300
261
|
)
|
|
301
262
|
|
|
302
|
-
|
|
303
|
-
|
|
263
|
+
scroll_result = JavaScriptExecutor.execute_and_parse_result(
|
|
264
|
+
self.chrome_interface, js_code
|
|
304
265
|
)
|
|
305
266
|
|
|
306
|
-
if isinstance(result, tuple) and len(result) >= 2:
|
|
307
|
-
if isinstance(result[1], dict):
|
|
308
|
-
scroll_result = (
|
|
309
|
-
result[1].get("result", {}).get("result", {}).get("value", {})
|
|
310
|
-
)
|
|
311
|
-
elif isinstance(result[1], list) and len(result[1]) > 0:
|
|
312
|
-
scroll_result = (
|
|
313
|
-
result[1][0]
|
|
314
|
-
.get("result", {})
|
|
315
|
-
.get("result", {})
|
|
316
|
-
.get("value", {})
|
|
317
|
-
)
|
|
318
|
-
else:
|
|
319
|
-
scroll_result = {
|
|
320
|
-
"success": False,
|
|
321
|
-
"error": "Invalid response format",
|
|
322
|
-
}
|
|
323
|
-
else:
|
|
324
|
-
scroll_result = {"success": False, "error": "No response from browser"}
|
|
325
|
-
|
|
326
267
|
time.sleep(1.5)
|
|
327
268
|
|
|
328
269
|
result_data = {"direction": direction, "amount": amount, **scroll_result}
|
|
@@ -368,27 +309,39 @@ class BrowserAutomationService:
|
|
|
368
309
|
if retry_count >= 5:
|
|
369
310
|
break
|
|
370
311
|
|
|
371
|
-
|
|
372
|
-
html_node = None
|
|
373
|
-
for node in dom_data[0]["result"]["root"]["children"]:
|
|
374
|
-
if node.get("nodeName") == "HTML":
|
|
375
|
-
html_node = node
|
|
376
|
-
break
|
|
312
|
+
result = JavaScriptExecutor.filter_hidden_elements(self.chrome_interface)
|
|
377
313
|
|
|
378
|
-
if
|
|
379
|
-
|
|
314
|
+
if result.get("success"):
|
|
315
|
+
filtered_html = result.get("html", "")
|
|
316
|
+
logger.info(
|
|
317
|
+
"Successfully filtered hidden elements using computed styles"
|
|
318
|
+
)
|
|
380
319
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
320
|
+
else:
|
|
321
|
+
# Find HTML node
|
|
322
|
+
html_node = None
|
|
323
|
+
for node in dom_data[0]["result"]["root"]["children"]:
|
|
324
|
+
if node.get("nodeName") == "HTML":
|
|
325
|
+
html_node = node
|
|
326
|
+
break
|
|
327
|
+
|
|
328
|
+
if not html_node:
|
|
329
|
+
return {
|
|
330
|
+
"success": False,
|
|
331
|
+
"error": "Could not find HTML node in page",
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
# Get outer HTML
|
|
335
|
+
html_content, _ = self.chrome_interface.DOM.getOuterHTML(
|
|
336
|
+
nodeId=html_node["nodeId"]
|
|
337
|
+
)
|
|
338
|
+
raw_html = html_content.get("result", {}).get("outerHTML", "")
|
|
386
339
|
|
|
387
|
-
|
|
388
|
-
|
|
340
|
+
if not raw_html:
|
|
341
|
+
return {"success": False, "error": "Could not extract HTML content"}
|
|
389
342
|
|
|
390
|
-
|
|
391
|
-
|
|
343
|
+
# Filter out hidden elements using JavaScript (doesn't modify page)
|
|
344
|
+
filtered_html = self._filter_hidden_elements(raw_html)
|
|
392
345
|
|
|
393
346
|
# Convert HTML to markdown
|
|
394
347
|
raw_markdown_content = convert_to_markdown(
|
|
@@ -433,7 +386,7 @@ class BrowserAutomationService:
|
|
|
433
386
|
"utf-8", "ignore"
|
|
434
387
|
)
|
|
435
388
|
|
|
436
|
-
current_url = self.
|
|
389
|
+
current_url = JavaScriptExecutor.get_current_url(self.chrome_interface)
|
|
437
390
|
|
|
438
391
|
return {
|
|
439
392
|
"success": True,
|
|
@@ -461,14 +414,21 @@ class BrowserAutomationService:
|
|
|
461
414
|
super().__init__()
|
|
462
415
|
self.filtered_html = []
|
|
463
416
|
self.skip_depth = 0
|
|
417
|
+
self.tag_stack = []
|
|
464
418
|
|
|
465
419
|
def handle_starttag(self, tag, attrs):
|
|
466
420
|
# Convert attrs to dict for easier access
|
|
467
421
|
attr_dict = dict(attrs)
|
|
468
|
-
|
|
469
|
-
# Check if element should be hidden
|
|
470
422
|
should_hide = False
|
|
471
423
|
|
|
424
|
+
if self.skip_depth > 0:
|
|
425
|
+
if tag in self.tag_stack:
|
|
426
|
+
self.skip_depth += 1
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
if tag.lower() in ["script", "style", "svg"]:
|
|
430
|
+
should_hide = True
|
|
431
|
+
|
|
472
432
|
# Check for style="display:none" (case insensitive, flexible matching)
|
|
473
433
|
style = attr_dict.get("style", "")
|
|
474
434
|
if style:
|
|
@@ -487,6 +447,10 @@ class BrowserAutomationService:
|
|
|
487
447
|
should_hide = True
|
|
488
448
|
|
|
489
449
|
if should_hide:
|
|
450
|
+
if tag.lower() in ["img", "input", "br", "hr", "meta", "link"]:
|
|
451
|
+
# Self-closing tags, just skip
|
|
452
|
+
return
|
|
453
|
+
self.tag_stack.append(tag)
|
|
490
454
|
self.skip_depth += 1
|
|
491
455
|
return
|
|
492
456
|
|
|
@@ -500,8 +464,11 @@ class BrowserAutomationService:
|
|
|
500
464
|
|
|
501
465
|
def handle_endtag(self, tag):
|
|
502
466
|
if self.skip_depth > 0:
|
|
503
|
-
self.
|
|
504
|
-
|
|
467
|
+
if tag in self.tag_stack:
|
|
468
|
+
self.skip_depth -= 1
|
|
469
|
+
if self.skip_depth == 0:
|
|
470
|
+
self.tag_stack.remove(tag)
|
|
471
|
+
return
|
|
505
472
|
|
|
506
473
|
if self.skip_depth == 0:
|
|
507
474
|
self.filtered_html.append(f"</{tag}>")
|
|
@@ -534,41 +501,6 @@ class BrowserAutomationService:
|
|
|
534
501
|
# Return original content if filtering fails
|
|
535
502
|
return html_content
|
|
536
503
|
|
|
537
|
-
def _get_current_url(self) -> str:
|
|
538
|
-
"""Get the current page URL."""
|
|
539
|
-
try:
|
|
540
|
-
if self.chrome_interface is None:
|
|
541
|
-
raise RuntimeError("Chrome interface is not initialized")
|
|
542
|
-
runtime_result = self.chrome_interface.Runtime.evaluate(
|
|
543
|
-
expression="window.location.href"
|
|
544
|
-
)
|
|
545
|
-
|
|
546
|
-
if isinstance(runtime_result, tuple) and len(runtime_result) >= 2:
|
|
547
|
-
if isinstance(runtime_result[1], dict):
|
|
548
|
-
current_url = (
|
|
549
|
-
runtime_result[1]
|
|
550
|
-
.get("result", {})
|
|
551
|
-
.get("result", {})
|
|
552
|
-
.get("value", "Unknown")
|
|
553
|
-
)
|
|
554
|
-
elif isinstance(runtime_result[1], list) and len(runtime_result[1]) > 0:
|
|
555
|
-
current_url = (
|
|
556
|
-
runtime_result[1][0]
|
|
557
|
-
.get("result", {})
|
|
558
|
-
.get("result", {})
|
|
559
|
-
.get("value", "Unknown")
|
|
560
|
-
)
|
|
561
|
-
else:
|
|
562
|
-
current_url = "Unknown"
|
|
563
|
-
else:
|
|
564
|
-
current_url = "Unknown"
|
|
565
|
-
|
|
566
|
-
return current_url
|
|
567
|
-
|
|
568
|
-
except Exception as e:
|
|
569
|
-
logger.warning(f"Could not get current URL: {e}")
|
|
570
|
-
return "Unknown"
|
|
571
|
-
|
|
572
504
|
def cleanup(self):
|
|
573
505
|
"""Clean up browser resources."""
|
|
574
506
|
try:
|
|
@@ -604,15 +536,17 @@ class BrowserAutomationService:
|
|
|
604
536
|
if self.chrome_interface is None:
|
|
605
537
|
raise RuntimeError("Chrome interface is not initialized")
|
|
606
538
|
|
|
607
|
-
|
|
608
|
-
|
|
539
|
+
focus_result = JavaScriptExecutor.focus_and_clear_element(
|
|
540
|
+
self.chrome_interface, xpath
|
|
541
|
+
)
|
|
609
542
|
if not focus_result.get("success", False):
|
|
610
543
|
return focus_result
|
|
611
544
|
|
|
612
545
|
can_simulate_typing = focus_result.get("canSimulateTyping", False)
|
|
613
|
-
# Simulate typing each character
|
|
614
546
|
if can_simulate_typing:
|
|
615
|
-
typing_result =
|
|
547
|
+
typing_result = JavaScriptExecutor.simulate_typing(
|
|
548
|
+
self.chrome_interface, value
|
|
549
|
+
)
|
|
616
550
|
if not typing_result.get("success", False):
|
|
617
551
|
return {
|
|
618
552
|
**typing_result,
|
|
@@ -621,7 +555,7 @@ class BrowserAutomationService:
|
|
|
621
555
|
"input_value": value,
|
|
622
556
|
}
|
|
623
557
|
|
|
624
|
-
self.
|
|
558
|
+
JavaScriptExecutor.trigger_input_events(self.chrome_interface, xpath, value)
|
|
625
559
|
time.sleep(1.5)
|
|
626
560
|
|
|
627
561
|
return {
|
|
@@ -644,131 +578,36 @@ class BrowserAutomationService:
|
|
|
644
578
|
"typing_method": "keyboard_simulation",
|
|
645
579
|
}
|
|
646
580
|
|
|
647
|
-
def
|
|
581
|
+
def dispatch_key_event(self, key: str, modifiers: List[str] = []) -> Dict[str, Any]:
|
|
648
582
|
"""
|
|
649
|
-
|
|
583
|
+
Dispatch key events using CDP input.dispatchKeyEvent.
|
|
650
584
|
|
|
651
585
|
Args:
|
|
652
|
-
|
|
586
|
+
key: Key to dispatch (e.g., 'Enter', 'Up', 'Down', 'F1', 'PageUp')
|
|
587
|
+
modifiers: Optional modifiers like 'ctrl', 'alt', 'shift'
|
|
653
588
|
|
|
654
589
|
Returns:
|
|
655
|
-
Dict containing
|
|
590
|
+
Dict containing dispatch result
|
|
656
591
|
"""
|
|
657
|
-
# Load JavaScript code from external file
|
|
658
|
-
js_code = js_loader.get_focus_and_clear_element_js(xpath)
|
|
659
|
-
|
|
660
|
-
if self.chrome_interface is None:
|
|
661
|
-
raise RuntimeError("Chrome interface is not initialized")
|
|
662
|
-
|
|
663
|
-
result = self.chrome_interface.Runtime.evaluate(
|
|
664
|
-
expression=js_code, returnByValue=True
|
|
665
|
-
)
|
|
666
|
-
|
|
667
|
-
if isinstance(result, tuple) and len(result) >= 2:
|
|
668
|
-
if isinstance(result[1], dict):
|
|
669
|
-
focus_result = (
|
|
670
|
-
result[1].get("result", {}).get("result", {}).get("value", {})
|
|
671
|
-
)
|
|
672
|
-
elif isinstance(result[1], list) and len(result[1]) > 0:
|
|
673
|
-
focus_result = (
|
|
674
|
-
result[1][0].get("result", {}).get("result", {}).get("value", {})
|
|
675
|
-
)
|
|
676
|
-
else:
|
|
677
|
-
focus_result = {
|
|
678
|
-
"success": False,
|
|
679
|
-
"error": "Invalid response format from focus operation",
|
|
680
|
-
}
|
|
681
|
-
else:
|
|
682
|
-
focus_result = {
|
|
683
|
-
"success": False,
|
|
684
|
-
"error": "No response from focus operation",
|
|
685
|
-
}
|
|
686
|
-
|
|
687
|
-
return focus_result
|
|
688
|
-
|
|
689
|
-
def _simulate_typing(self, text: str) -> Dict[str, Any]:
|
|
690
|
-
"""Simulate keyboard typing character by character."""
|
|
691
|
-
if self.chrome_interface is None:
|
|
692
|
-
raise RuntimeError("Chrome interface is not initialized")
|
|
693
|
-
|
|
694
592
|
try:
|
|
695
|
-
|
|
696
|
-
time.sleep(0.05)
|
|
697
|
-
|
|
698
|
-
if char == "\n":
|
|
699
|
-
self.chrome_interface.Input.dispatchKeyEvent(
|
|
700
|
-
**{
|
|
701
|
-
"type": "rawKeyDown",
|
|
702
|
-
"windowsVirtualKeyCode": 13,
|
|
703
|
-
"unmodifiedText": "\r",
|
|
704
|
-
"text": "\r",
|
|
705
|
-
}
|
|
706
|
-
)
|
|
707
|
-
self.chrome_interface.Input.dispatchKeyEvent(
|
|
708
|
-
**{
|
|
709
|
-
"type": "char",
|
|
710
|
-
"windowsVirtualKeyCode": 13,
|
|
711
|
-
"unmodifiedText": "\r",
|
|
712
|
-
"text": "\r",
|
|
713
|
-
}
|
|
714
|
-
)
|
|
715
|
-
self.chrome_interface.Input.dispatchKeyEvent(
|
|
716
|
-
**{
|
|
717
|
-
"type": "keyUp",
|
|
718
|
-
"windowsVirtualKeyCode": 13,
|
|
719
|
-
"unmodifiedText": "\r",
|
|
720
|
-
"text": "\r",
|
|
721
|
-
}
|
|
722
|
-
)
|
|
723
|
-
elif char == "\t":
|
|
724
|
-
self.chrome_interface.Input.dispatchKeyEvent(type="char", text="\t")
|
|
725
|
-
else:
|
|
726
|
-
self.chrome_interface.Input.dispatchKeyEvent(type="char", text=char)
|
|
727
|
-
|
|
728
|
-
return {
|
|
729
|
-
"success": True,
|
|
730
|
-
"message": f"Successfully typed {len(text)} characters",
|
|
731
|
-
"characters_typed": len(text),
|
|
732
|
-
}
|
|
733
|
-
|
|
734
|
-
except Exception as e:
|
|
735
|
-
logger.error(f"Error during typing simulation: {e}")
|
|
736
|
-
return {"success": False, "error": f"Typing simulation failed: {str(e)}"}
|
|
737
|
-
|
|
738
|
-
def _trigger_input_events(self, xpath: str, value: str) -> Dict[str, Any]:
|
|
739
|
-
"""Trigger input and change events to notify the page of input changes."""
|
|
740
|
-
# Load JavaScript code from external file
|
|
741
|
-
js_code = js_loader.get_trigger_input_events_js(xpath, value)
|
|
593
|
+
self._ensure_chrome_running()
|
|
742
594
|
|
|
743
|
-
|
|
744
|
-
|
|
595
|
+
if self.chrome_interface is None:
|
|
596
|
+
raise RuntimeError("Chrome interface is not initialized")
|
|
745
597
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
598
|
+
return JavaScriptExecutor.dispatch_key_event(
|
|
599
|
+
self.chrome_interface, key, modifiers
|
|
600
|
+
)
|
|
749
601
|
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
result[1].get("result", {}).get("result", {}).get("value", {})
|
|
754
|
-
)
|
|
755
|
-
elif isinstance(result[1], list) and len(result[1]) > 0:
|
|
756
|
-
event_result = (
|
|
757
|
-
result[1][0].get("result", {}).get("result", {}).get("value", {})
|
|
758
|
-
)
|
|
759
|
-
else:
|
|
760
|
-
event_result = {
|
|
761
|
-
"success": False,
|
|
762
|
-
"error": "Invalid response format from event triggering",
|
|
763
|
-
}
|
|
764
|
-
else:
|
|
765
|
-
event_result = {
|
|
602
|
+
except Exception as e:
|
|
603
|
+
logger.error(f"Key dispatch error: {e}")
|
|
604
|
+
return {
|
|
766
605
|
"success": False,
|
|
767
|
-
"error": "
|
|
606
|
+
"error": f"Key dispatch error: {str(e)}",
|
|
607
|
+
"key": key,
|
|
608
|
+
"modifiers": modifiers,
|
|
768
609
|
}
|
|
769
610
|
|
|
770
|
-
return event_result
|
|
771
|
-
|
|
772
611
|
def get_elements_by_text(self, text: str) -> Dict[str, Any]:
|
|
773
612
|
"""Find elements containing specified text using XPath."""
|
|
774
613
|
try:
|
|
@@ -805,7 +644,7 @@ class BrowserAutomationService:
|
|
|
805
644
|
capture_beyond_viewport: bool = False,
|
|
806
645
|
) -> Dict[str, Any]:
|
|
807
646
|
"""
|
|
808
|
-
Capture a screenshot of the current page.
|
|
647
|
+
Capture a screenshot of the current page with colored boxes and UUID labels drawn over identified elements.
|
|
809
648
|
|
|
810
649
|
Args:
|
|
811
650
|
format: Image format ("png", "jpeg", or "webp"). Defaults to "png"
|
|
@@ -823,6 +662,21 @@ class BrowserAutomationService:
|
|
|
823
662
|
if self.chrome_interface is None:
|
|
824
663
|
raise RuntimeError("Chrome interface is not initialized")
|
|
825
664
|
|
|
665
|
+
boxes_drawn = False
|
|
666
|
+
if self.uuid_to_xpath_mapping:
|
|
667
|
+
draw_result = JavaScriptExecutor.draw_element_boxes(
|
|
668
|
+
self.chrome_interface, self.uuid_to_xpath_mapping
|
|
669
|
+
)
|
|
670
|
+
if draw_result.get("success"):
|
|
671
|
+
boxes_drawn = True
|
|
672
|
+
logger.info(
|
|
673
|
+
f"Drew {draw_result.get('count', 0)} element boxes for screenshot"
|
|
674
|
+
)
|
|
675
|
+
else:
|
|
676
|
+
logger.warning(
|
|
677
|
+
f"Failed to draw element boxes: {draw_result.get('error')}"
|
|
678
|
+
)
|
|
679
|
+
|
|
826
680
|
# Prepare parameters for screenshot capture
|
|
827
681
|
screenshot_params = {
|
|
828
682
|
"format": format,
|
|
@@ -838,18 +692,9 @@ class BrowserAutomationService:
|
|
|
838
692
|
if clip is not None:
|
|
839
693
|
screenshot_params["clip"] = clip
|
|
840
694
|
|
|
841
|
-
# self.chrome_interface.Emulation.setDeviceMetricsOverride(
|
|
842
|
-
# height=1280,
|
|
843
|
-
# width=720,
|
|
844
|
-
# deviceScaleFactor=1,
|
|
845
|
-
# mobile=False,
|
|
846
|
-
# )
|
|
847
|
-
|
|
848
695
|
# Capture the screenshot
|
|
849
696
|
result = self.chrome_interface.Page.captureScreenshot(**screenshot_params)
|
|
850
697
|
|
|
851
|
-
# self.chrome_interface.Emulation.clearDeviceMetricsOverride()
|
|
852
|
-
|
|
853
698
|
if isinstance(result, tuple) and len(result) >= 2:
|
|
854
699
|
if isinstance(result[1], dict):
|
|
855
700
|
screenshot_data = result[1].get("result", {}).get("data", "")
|
|
@@ -869,7 +714,20 @@ class BrowserAutomationService:
|
|
|
869
714
|
if not screenshot_data:
|
|
870
715
|
return {"success": False, "error": "No screenshot data received"}
|
|
871
716
|
|
|
872
|
-
#
|
|
717
|
+
# import base64
|
|
718
|
+
# from datetime import datetime
|
|
719
|
+
#
|
|
720
|
+
# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
721
|
+
# filename = f"screenshot_{timestamp}.{format}"
|
|
722
|
+
#
|
|
723
|
+
# try:
|
|
724
|
+
# screenshot_bytes = base64.b64decode(screenshot_data)
|
|
725
|
+
# with open(filename, "wb") as f:
|
|
726
|
+
# f.write(screenshot_bytes)
|
|
727
|
+
# logger.info(f"Screenshot saved to {filename} for debugging")
|
|
728
|
+
# except Exception as save_error:
|
|
729
|
+
# logger.warning(f"Failed to save screenshot to file: {save_error}")
|
|
730
|
+
#
|
|
873
731
|
mime_type_map = {
|
|
874
732
|
"png": "image/png",
|
|
875
733
|
"jpeg": "image/jpeg",
|
|
@@ -877,8 +735,16 @@ class BrowserAutomationService:
|
|
|
877
735
|
}
|
|
878
736
|
mime_type = mime_type_map.get(format, "image/png")
|
|
879
737
|
|
|
880
|
-
|
|
881
|
-
|
|
738
|
+
current_url = JavaScriptExecutor.get_current_url(self.chrome_interface)
|
|
739
|
+
|
|
740
|
+
if boxes_drawn:
|
|
741
|
+
remove_result = JavaScriptExecutor.remove_element_boxes(
|
|
742
|
+
self.chrome_interface
|
|
743
|
+
)
|
|
744
|
+
if not remove_result.get("success"):
|
|
745
|
+
logger.warning(
|
|
746
|
+
f"Failed to remove element boxes: {remove_result.get('error')}"
|
|
747
|
+
)
|
|
882
748
|
|
|
883
749
|
return {
|
|
884
750
|
"success": True,
|
|
@@ -895,101 +761,6 @@ class BrowserAutomationService:
|
|
|
895
761
|
logger.error(f"Screenshot capture error: {e}")
|
|
896
762
|
return {"success": False, "error": f"Screenshot capture error: {str(e)}"}
|
|
897
763
|
|
|
898
|
-
def dispatch_key_event(self, key: str, modifiers: List[str] = []) -> Dict[str, Any]:
|
|
899
|
-
"""
|
|
900
|
-
Dispatch key events using CDP input.dispatchKeyEvent.
|
|
901
|
-
|
|
902
|
-
Args:
|
|
903
|
-
key: Key to dispatch (e.g., 'Enter', 'Up', 'Down', 'F1', 'PageUp')
|
|
904
|
-
modifiers: Optional modifiers like 'ctrl', 'alt', 'shift' (comma-separated)
|
|
905
|
-
|
|
906
|
-
Returns:
|
|
907
|
-
Dict containing dispatch result
|
|
908
|
-
"""
|
|
909
|
-
try:
|
|
910
|
-
self._ensure_chrome_running()
|
|
911
|
-
|
|
912
|
-
if self.chrome_interface is None:
|
|
913
|
-
raise RuntimeError("Chrome interface is not initialized")
|
|
914
|
-
|
|
915
|
-
key_name = key.lower().strip()
|
|
916
|
-
key_code = key_codes.get(key_name)
|
|
917
|
-
|
|
918
|
-
if key_code is None:
|
|
919
|
-
return {
|
|
920
|
-
"success": False,
|
|
921
|
-
"error": f"Unknown key '{key}'. Supported keys: {', '.join(sorted(key_codes.keys()))}",
|
|
922
|
-
"key": key,
|
|
923
|
-
"modifiers": modifiers,
|
|
924
|
-
}
|
|
925
|
-
|
|
926
|
-
# Parse modifiers
|
|
927
|
-
modifier_flags = 0
|
|
928
|
-
if modifiers:
|
|
929
|
-
modifier_names = [m.strip().lower() for m in modifiers]
|
|
930
|
-
for mod in modifier_names:
|
|
931
|
-
if mod in ["alt"]:
|
|
932
|
-
modifier_flags |= 1 # Alt = 1
|
|
933
|
-
elif mod in ["ctrl", "control"]:
|
|
934
|
-
modifier_flags |= 2 # Ctrl = 2
|
|
935
|
-
elif mod in ["meta", "cmd", "command"]:
|
|
936
|
-
modifier_flags |= 4 # Meta = 4
|
|
937
|
-
elif mod in ["shift"]:
|
|
938
|
-
modifier_flags |= 8 # Shift = 8
|
|
939
|
-
|
|
940
|
-
# Dispatch keyDown event
|
|
941
|
-
self.chrome_interface.Input.dispatchKeyEvent(
|
|
942
|
-
type="rawKeyDown",
|
|
943
|
-
windowsVirtualKeyCode=key_code,
|
|
944
|
-
modifiers=modifier_flags,
|
|
945
|
-
)
|
|
946
|
-
|
|
947
|
-
# For printable characters, also send char event
|
|
948
|
-
printable_keys = {"space", "spacebar", "enter", "return", "tab"}
|
|
949
|
-
if key_name in printable_keys:
|
|
950
|
-
if key_name in ["space", "spacebar"]:
|
|
951
|
-
char_text = " "
|
|
952
|
-
elif key_name in ["enter", "return"]:
|
|
953
|
-
char_text = "\r"
|
|
954
|
-
elif key_name == "tab":
|
|
955
|
-
char_text = "\t"
|
|
956
|
-
else:
|
|
957
|
-
char_text = ""
|
|
958
|
-
|
|
959
|
-
if char_text:
|
|
960
|
-
self.chrome_interface.Input.dispatchKeyEvent(
|
|
961
|
-
type="char",
|
|
962
|
-
windowsVirtualKeyCode=key_code,
|
|
963
|
-
text=char_text,
|
|
964
|
-
unmodifiedText=char_text,
|
|
965
|
-
modifiers=modifier_flags,
|
|
966
|
-
)
|
|
967
|
-
|
|
968
|
-
# Dispatch keyUp event
|
|
969
|
-
self.chrome_interface.Input.dispatchKeyEvent(
|
|
970
|
-
type="keyUp", windowsVirtualKeyCode=key_code, modifiers=modifier_flags
|
|
971
|
-
)
|
|
972
|
-
|
|
973
|
-
time.sleep(0.1) # Small delay for event processing
|
|
974
|
-
|
|
975
|
-
return {
|
|
976
|
-
"success": True,
|
|
977
|
-
"message": f"Successfully dispatched key '{key}' with modifiers '{modifiers}'",
|
|
978
|
-
"key": key,
|
|
979
|
-
"key_code": key_code,
|
|
980
|
-
"modifiers": modifiers,
|
|
981
|
-
"modifier_flags": modifier_flags,
|
|
982
|
-
}
|
|
983
|
-
|
|
984
|
-
except Exception as e:
|
|
985
|
-
logger.error(f"Key dispatch error: {e}")
|
|
986
|
-
return {
|
|
987
|
-
"success": False,
|
|
988
|
-
"error": f"Key dispatch error: {str(e)}",
|
|
989
|
-
"key": key,
|
|
990
|
-
"modifiers": modifiers,
|
|
991
|
-
}
|
|
992
|
-
|
|
993
764
|
def __del__(self):
|
|
994
765
|
"""Cleanup when service is destroyed."""
|
|
995
766
|
self.cleanup()
|