agentcrew-ai 0.8.2__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. AgentCrew/__init__.py +1 -1
  2. AgentCrew/modules/agents/local_agent.py +11 -0
  3. AgentCrew/modules/browser_automation/element_extractor.py +4 -3
  4. AgentCrew/modules/browser_automation/js/draw_element_boxes.js +200 -0
  5. AgentCrew/modules/browser_automation/js/extract_clickable_elements.js +57 -23
  6. AgentCrew/modules/browser_automation/js/extract_elements_by_text.js +21 -19
  7. AgentCrew/modules/browser_automation/js/extract_input_elements.js +22 -23
  8. AgentCrew/modules/browser_automation/js/filter_hidden_elements.js +104 -0
  9. AgentCrew/modules/browser_automation/js/remove_element_boxes.js +29 -0
  10. AgentCrew/modules/browser_automation/js_loader.py +385 -92
  11. AgentCrew/modules/browser_automation/service.py +118 -347
  12. AgentCrew/modules/browser_automation/tool.py +28 -29
  13. AgentCrew/modules/chat/message/conversation.py +9 -8
  14. AgentCrew/modules/console/input_handler.py +2 -0
  15. AgentCrew/modules/console/ui_effects.py +3 -4
  16. AgentCrew/modules/custom_llm/service.py +25 -3
  17. AgentCrew/modules/file_editing/tool.py +9 -11
  18. AgentCrew/modules/google/native_service.py +13 -0
  19. AgentCrew/modules/llm/constants.py +38 -1
  20. AgentCrew/modules/llm/model_registry.py +9 -0
  21. AgentCrew/modules/llm/types.py +12 -1
  22. AgentCrew/modules/memory/base_service.py +2 -2
  23. AgentCrew/modules/memory/chroma_service.py +80 -138
  24. AgentCrew/modules/memory/tool.py +15 -15
  25. AgentCrew/modules/openai/response_service.py +19 -11
  26. AgentCrew/modules/openai/service.py +15 -0
  27. AgentCrew/modules/prompts/constants.py +27 -14
  28. {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.3.dist-info}/METADATA +2 -2
  29. {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.3.dist-info}/RECORD +33 -30
  30. {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.3.dist-info}/WHEEL +0 -0
  31. {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.3.dist-info}/entry_points.txt +0 -0
  32. {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.3.dist-info}/licenses/LICENSE +0 -0
  33. {agentcrew_ai-0.8.2.dist-info → agentcrew_ai-0.8.3.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from .element_extractor import (
22
22
  clean_markdown_images,
23
23
  remove_duplicate_lines,
24
24
  )
25
- from .js_loader import js_loader, key_codes
25
+ from .js_loader import js_loader, JavaScriptExecutor
26
26
 
27
27
  import PyChromeDevTools
28
28
  from loguru import logger
@@ -116,7 +116,7 @@ class BrowserAutomationService:
116
116
  "profile": profile,
117
117
  }
118
118
 
119
- current_url = self._get_current_url()
119
+ current_url = JavaScriptExecutor.get_current_url(self.chrome_interface)
120
120
 
121
121
  return {
122
122
  "success": True,
@@ -150,7 +150,6 @@ class BrowserAutomationService:
150
150
  Returns:
151
151
  Dict containing click result
152
152
  """
153
- # Resolve UUID to XPath
154
153
  xpath = self.uuid_to_xpath_mapping.get(element_uuid)
155
154
  if not xpath:
156
155
  return {
@@ -165,40 +164,10 @@ class BrowserAutomationService:
165
164
  raise RuntimeError("Chrome interface is not initialized")
166
165
 
167
166
  js_code = js_loader.get_click_element_js(xpath)
168
-
169
- result = self.chrome_interface.Runtime.evaluate(
170
- expression=js_code, returnByValue=True
167
+ coord_result = JavaScriptExecutor.execute_and_parse_result(
168
+ self.chrome_interface, js_code
171
169
  )
172
170
 
173
- # Parse JavaScript result
174
- if isinstance(result, tuple) and len(result) >= 2:
175
- if isinstance(result[1], dict):
176
- coord_result = (
177
- result[1].get("result", {}).get("result", {}).get("value", {})
178
- )
179
- elif isinstance(result[1], list) and len(result[1]) > 0:
180
- coord_result = (
181
- result[1][0]
182
- .get("result", {})
183
- .get("result", {})
184
- .get("value", {})
185
- )
186
- else:
187
- return {
188
- "success": False,
189
- "error": "Invalid response format from coordinate calculation",
190
- "uuid": element_uuid,
191
- "xpath": xpath,
192
- }
193
- else:
194
- return {
195
- "success": False,
196
- "error": "No response from coordinate calculation",
197
- "uuid": element_uuid,
198
- "xpath": xpath,
199
- }
200
-
201
- # Check if coordinate calculation was successful
202
171
  if not coord_result.get("success", False):
203
172
  return {
204
173
  "success": False,
@@ -209,7 +178,6 @@ class BrowserAutomationService:
209
178
  "xpath": xpath,
210
179
  }
211
180
 
212
- # Extract coordinates
213
181
  x = coord_result.get("x")
214
182
  y = coord_result.get("y")
215
183
 
@@ -221,23 +189,18 @@ class BrowserAutomationService:
221
189
  "xpath": xpath,
222
190
  }
223
191
 
224
- # Wait a moment for scrollIntoView to complete
225
192
  time.sleep(0.5)
226
193
 
227
- # Step 2: Dispatch mousePressed event using Chrome DevTools Protocol
228
194
  self.chrome_interface.Input.dispatchMouseEvent(
229
195
  type="mousePressed", x=x, y=y, button="left", clickCount=1
230
196
  )
231
197
 
232
- # Small delay between press and release (simulate realistic click timing)
233
198
  time.sleep(0.02)
234
199
 
235
- # Step 3: Dispatch mouseReleased event using Chrome DevTools Protocol
236
200
  self.chrome_interface.Input.dispatchMouseEvent(
237
201
  type="mouseReleased", x=x, y=y, button="left", clickCount=1
238
202
  )
239
203
 
240
- # Wait for click to be processed
241
204
  time.sleep(1)
242
205
 
243
206
  return {
@@ -281,7 +244,6 @@ class BrowserAutomationService:
281
244
 
282
245
  scroll_distance = amount * 300
283
246
 
284
- # Resolve UUID to XPath if provided
285
247
  xpath = None
286
248
  if element_uuid:
287
249
  xpath = self.uuid_to_xpath_mapping.get(element_uuid)
@@ -294,35 +256,14 @@ class BrowserAutomationService:
294
256
  "amount": amount,
295
257
  }
296
258
 
297
- # Load JavaScript code from external file
298
259
  js_code = js_loader.get_scroll_page_js(
299
260
  direction, scroll_distance, xpath or "", element_uuid or ""
300
261
  )
301
262
 
302
- result = self.chrome_interface.Runtime.evaluate(
303
- expression=js_code, returnByValue=True
263
+ scroll_result = JavaScriptExecutor.execute_and_parse_result(
264
+ self.chrome_interface, js_code
304
265
  )
305
266
 
306
- if isinstance(result, tuple) and len(result) >= 2:
307
- if isinstance(result[1], dict):
308
- scroll_result = (
309
- result[1].get("result", {}).get("result", {}).get("value", {})
310
- )
311
- elif isinstance(result[1], list) and len(result[1]) > 0:
312
- scroll_result = (
313
- result[1][0]
314
- .get("result", {})
315
- .get("result", {})
316
- .get("value", {})
317
- )
318
- else:
319
- scroll_result = {
320
- "success": False,
321
- "error": "Invalid response format",
322
- }
323
- else:
324
- scroll_result = {"success": False, "error": "No response from browser"}
325
-
326
267
  time.sleep(1.5)
327
268
 
328
269
  result_data = {"direction": direction, "amount": amount, **scroll_result}
@@ -368,27 +309,39 @@ class BrowserAutomationService:
368
309
  if retry_count >= 5:
369
310
  break
370
311
 
371
- # Find HTML node
372
- html_node = None
373
- for node in dom_data[0]["result"]["root"]["children"]:
374
- if node.get("nodeName") == "HTML":
375
- html_node = node
376
- break
312
+ result = JavaScriptExecutor.filter_hidden_elements(self.chrome_interface)
377
313
 
378
- if not html_node:
379
- return {"success": False, "error": "Could not find HTML node in page"}
314
+ if result.get("success"):
315
+ filtered_html = result.get("html", "")
316
+ logger.info(
317
+ "Successfully filtered hidden elements using computed styles"
318
+ )
380
319
 
381
- # Get outer HTML
382
- html_content, _ = self.chrome_interface.DOM.getOuterHTML(
383
- nodeId=html_node["nodeId"]
384
- )
385
- raw_html = html_content.get("result", {}).get("outerHTML", "")
320
+ else:
321
+ # Find HTML node
322
+ html_node = None
323
+ for node in dom_data[0]["result"]["root"]["children"]:
324
+ if node.get("nodeName") == "HTML":
325
+ html_node = node
326
+ break
327
+
328
+ if not html_node:
329
+ return {
330
+ "success": False,
331
+ "error": "Could not find HTML node in page",
332
+ }
333
+
334
+ # Get outer HTML
335
+ html_content, _ = self.chrome_interface.DOM.getOuterHTML(
336
+ nodeId=html_node["nodeId"]
337
+ )
338
+ raw_html = html_content.get("result", {}).get("outerHTML", "")
386
339
 
387
- if not raw_html:
388
- return {"success": False, "error": "Could not extract HTML content"}
340
+ if not raw_html:
341
+ return {"success": False, "error": "Could not extract HTML content"}
389
342
 
390
- # Filter out hidden elements before processing
391
- filtered_html = self._filter_hidden_elements(raw_html)
343
+ # Filter out hidden elements using JavaScript (doesn't modify page)
344
+ filtered_html = self._filter_hidden_elements(raw_html)
392
345
 
393
346
  # Convert HTML to markdown
394
347
  raw_markdown_content = convert_to_markdown(
@@ -433,7 +386,7 @@ class BrowserAutomationService:
433
386
  "utf-8", "ignore"
434
387
  )
435
388
 
436
- current_url = self._get_current_url()
389
+ current_url = JavaScriptExecutor.get_current_url(self.chrome_interface)
437
390
 
438
391
  return {
439
392
  "success": True,
@@ -461,14 +414,21 @@ class BrowserAutomationService:
461
414
  super().__init__()
462
415
  self.filtered_html = []
463
416
  self.skip_depth = 0
417
+ self.tag_stack = []
464
418
 
465
419
  def handle_starttag(self, tag, attrs):
466
420
  # Convert attrs to dict for easier access
467
421
  attr_dict = dict(attrs)
468
-
469
- # Check if element should be hidden
470
422
  should_hide = False
471
423
 
424
+ if self.skip_depth > 0:
425
+ if tag in self.tag_stack:
426
+ self.skip_depth += 1
427
+ return
428
+
429
+ if tag.lower() in ["script", "style", "svg"]:
430
+ should_hide = True
431
+
472
432
  # Check for style="display:none" (case insensitive, flexible matching)
473
433
  style = attr_dict.get("style", "")
474
434
  if style:
@@ -487,6 +447,10 @@ class BrowserAutomationService:
487
447
  should_hide = True
488
448
 
489
449
  if should_hide:
450
+ if tag.lower() in ["img", "input", "br", "hr", "meta", "link"]:
451
+ # Self-closing tags, just skip
452
+ return
453
+ self.tag_stack.append(tag)
490
454
  self.skip_depth += 1
491
455
  return
492
456
 
@@ -500,8 +464,11 @@ class BrowserAutomationService:
500
464
 
501
465
  def handle_endtag(self, tag):
502
466
  if self.skip_depth > 0:
503
- self.skip_depth -= 1
504
- return
467
+ if tag in self.tag_stack:
468
+ self.skip_depth -= 1
469
+ if self.skip_depth == 0:
470
+ self.tag_stack.remove(tag)
471
+ return
505
472
 
506
473
  if self.skip_depth == 0:
507
474
  self.filtered_html.append(f"</{tag}>")
@@ -534,41 +501,6 @@ class BrowserAutomationService:
534
501
  # Return original content if filtering fails
535
502
  return html_content
536
503
 
537
- def _get_current_url(self) -> str:
538
- """Get the current page URL."""
539
- try:
540
- if self.chrome_interface is None:
541
- raise RuntimeError("Chrome interface is not initialized")
542
- runtime_result = self.chrome_interface.Runtime.evaluate(
543
- expression="window.location.href"
544
- )
545
-
546
- if isinstance(runtime_result, tuple) and len(runtime_result) >= 2:
547
- if isinstance(runtime_result[1], dict):
548
- current_url = (
549
- runtime_result[1]
550
- .get("result", {})
551
- .get("result", {})
552
- .get("value", "Unknown")
553
- )
554
- elif isinstance(runtime_result[1], list) and len(runtime_result[1]) > 0:
555
- current_url = (
556
- runtime_result[1][0]
557
- .get("result", {})
558
- .get("result", {})
559
- .get("value", "Unknown")
560
- )
561
- else:
562
- current_url = "Unknown"
563
- else:
564
- current_url = "Unknown"
565
-
566
- return current_url
567
-
568
- except Exception as e:
569
- logger.warning(f"Could not get current URL: {e}")
570
- return "Unknown"
571
-
572
504
  def cleanup(self):
573
505
  """Clean up browser resources."""
574
506
  try:
@@ -604,15 +536,17 @@ class BrowserAutomationService:
604
536
  if self.chrome_interface is None:
605
537
  raise RuntimeError("Chrome interface is not initialized")
606
538
 
607
- # Focus the element and clear any existing content
608
- focus_result = self._focus_and_clear_element(xpath)
539
+ focus_result = JavaScriptExecutor.focus_and_clear_element(
540
+ self.chrome_interface, xpath
541
+ )
609
542
  if not focus_result.get("success", False):
610
543
  return focus_result
611
544
 
612
545
  can_simulate_typing = focus_result.get("canSimulateTyping", False)
613
- # Simulate typing each character
614
546
  if can_simulate_typing:
615
- typing_result = self._simulate_typing(value)
547
+ typing_result = JavaScriptExecutor.simulate_typing(
548
+ self.chrome_interface, value
549
+ )
616
550
  if not typing_result.get("success", False):
617
551
  return {
618
552
  **typing_result,
@@ -621,7 +555,7 @@ class BrowserAutomationService:
621
555
  "input_value": value,
622
556
  }
623
557
 
624
- self._trigger_input_events(xpath, value)
558
+ JavaScriptExecutor.trigger_input_events(self.chrome_interface, xpath, value)
625
559
  time.sleep(1.5)
626
560
 
627
561
  return {
@@ -644,131 +578,36 @@ class BrowserAutomationService:
644
578
  "typing_method": "keyboard_simulation",
645
579
  }
646
580
 
647
- def _focus_and_clear_element(self, xpath: str) -> Dict[str, Any]:
581
+ def dispatch_key_event(self, key: str, modifiers: List[str] = []) -> Dict[str, Any]:
648
582
  """
649
- Focus the target element and clear any existing content.
583
+ Dispatch key events using CDP input.dispatchKeyEvent.
650
584
 
651
585
  Args:
652
- xpath: XPath selector for the element
586
+ key: Key to dispatch (e.g., 'Enter', 'Up', 'Down', 'F1', 'PageUp')
587
+ modifiers: Optional modifiers like 'ctrl', 'alt', 'shift'
653
588
 
654
589
  Returns:
655
- Dict containing focus result
590
+ Dict containing dispatch result
656
591
  """
657
- # Load JavaScript code from external file
658
- js_code = js_loader.get_focus_and_clear_element_js(xpath)
659
-
660
- if self.chrome_interface is None:
661
- raise RuntimeError("Chrome interface is not initialized")
662
-
663
- result = self.chrome_interface.Runtime.evaluate(
664
- expression=js_code, returnByValue=True
665
- )
666
-
667
- if isinstance(result, tuple) and len(result) >= 2:
668
- if isinstance(result[1], dict):
669
- focus_result = (
670
- result[1].get("result", {}).get("result", {}).get("value", {})
671
- )
672
- elif isinstance(result[1], list) and len(result[1]) > 0:
673
- focus_result = (
674
- result[1][0].get("result", {}).get("result", {}).get("value", {})
675
- )
676
- else:
677
- focus_result = {
678
- "success": False,
679
- "error": "Invalid response format from focus operation",
680
- }
681
- else:
682
- focus_result = {
683
- "success": False,
684
- "error": "No response from focus operation",
685
- }
686
-
687
- return focus_result
688
-
689
- def _simulate_typing(self, text: str) -> Dict[str, Any]:
690
- """Simulate keyboard typing character by character."""
691
- if self.chrome_interface is None:
692
- raise RuntimeError("Chrome interface is not initialized")
693
-
694
592
  try:
695
- for char in text:
696
- time.sleep(0.05)
697
-
698
- if char == "\n":
699
- self.chrome_interface.Input.dispatchKeyEvent(
700
- **{
701
- "type": "rawKeyDown",
702
- "windowsVirtualKeyCode": 13,
703
- "unmodifiedText": "\r",
704
- "text": "\r",
705
- }
706
- )
707
- self.chrome_interface.Input.dispatchKeyEvent(
708
- **{
709
- "type": "char",
710
- "windowsVirtualKeyCode": 13,
711
- "unmodifiedText": "\r",
712
- "text": "\r",
713
- }
714
- )
715
- self.chrome_interface.Input.dispatchKeyEvent(
716
- **{
717
- "type": "keyUp",
718
- "windowsVirtualKeyCode": 13,
719
- "unmodifiedText": "\r",
720
- "text": "\r",
721
- }
722
- )
723
- elif char == "\t":
724
- self.chrome_interface.Input.dispatchKeyEvent(type="char", text="\t")
725
- else:
726
- self.chrome_interface.Input.dispatchKeyEvent(type="char", text=char)
727
-
728
- return {
729
- "success": True,
730
- "message": f"Successfully typed {len(text)} characters",
731
- "characters_typed": len(text),
732
- }
733
-
734
- except Exception as e:
735
- logger.error(f"Error during typing simulation: {e}")
736
- return {"success": False, "error": f"Typing simulation failed: {str(e)}"}
737
-
738
- def _trigger_input_events(self, xpath: str, value: str) -> Dict[str, Any]:
739
- """Trigger input and change events to notify the page of input changes."""
740
- # Load JavaScript code from external file
741
- js_code = js_loader.get_trigger_input_events_js(xpath, value)
593
+ self._ensure_chrome_running()
742
594
 
743
- if self.chrome_interface is None:
744
- raise RuntimeError("Chrome interface is not initialized")
595
+ if self.chrome_interface is None:
596
+ raise RuntimeError("Chrome interface is not initialized")
745
597
 
746
- result = self.chrome_interface.Runtime.evaluate(
747
- expression=js_code, returnByValue=True
748
- )
598
+ return JavaScriptExecutor.dispatch_key_event(
599
+ self.chrome_interface, key, modifiers
600
+ )
749
601
 
750
- if isinstance(result, tuple) and len(result) >= 2:
751
- if isinstance(result[1], dict):
752
- event_result = (
753
- result[1].get("result", {}).get("result", {}).get("value", {})
754
- )
755
- elif isinstance(result[1], list) and len(result[1]) > 0:
756
- event_result = (
757
- result[1][0].get("result", {}).get("result", {}).get("value", {})
758
- )
759
- else:
760
- event_result = {
761
- "success": False,
762
- "error": "Invalid response format from event triggering",
763
- }
764
- else:
765
- event_result = {
602
+ except Exception as e:
603
+ logger.error(f"Key dispatch error: {e}")
604
+ return {
766
605
  "success": False,
767
- "error": "No response from event triggering",
606
+ "error": f"Key dispatch error: {str(e)}",
607
+ "key": key,
608
+ "modifiers": modifiers,
768
609
  }
769
610
 
770
- return event_result
771
-
772
611
  def get_elements_by_text(self, text: str) -> Dict[str, Any]:
773
612
  """Find elements containing specified text using XPath."""
774
613
  try:
@@ -805,7 +644,7 @@ class BrowserAutomationService:
805
644
  capture_beyond_viewport: bool = False,
806
645
  ) -> Dict[str, Any]:
807
646
  """
808
- Capture a screenshot of the current page.
647
+ Capture a screenshot of the current page with colored boxes and UUID labels drawn over identified elements.
809
648
 
810
649
  Args:
811
650
  format: Image format ("png", "jpeg", or "webp"). Defaults to "png"
@@ -823,6 +662,21 @@ class BrowserAutomationService:
823
662
  if self.chrome_interface is None:
824
663
  raise RuntimeError("Chrome interface is not initialized")
825
664
 
665
+ boxes_drawn = False
666
+ if self.uuid_to_xpath_mapping:
667
+ draw_result = JavaScriptExecutor.draw_element_boxes(
668
+ self.chrome_interface, self.uuid_to_xpath_mapping
669
+ )
670
+ if draw_result.get("success"):
671
+ boxes_drawn = True
672
+ logger.info(
673
+ f"Drew {draw_result.get('count', 0)} element boxes for screenshot"
674
+ )
675
+ else:
676
+ logger.warning(
677
+ f"Failed to draw element boxes: {draw_result.get('error')}"
678
+ )
679
+
826
680
  # Prepare parameters for screenshot capture
827
681
  screenshot_params = {
828
682
  "format": format,
@@ -838,18 +692,9 @@ class BrowserAutomationService:
838
692
  if clip is not None:
839
693
  screenshot_params["clip"] = clip
840
694
 
841
- # self.chrome_interface.Emulation.setDeviceMetricsOverride(
842
- # height=1280,
843
- # width=720,
844
- # deviceScaleFactor=1,
845
- # mobile=False,
846
- # )
847
-
848
695
  # Capture the screenshot
849
696
  result = self.chrome_interface.Page.captureScreenshot(**screenshot_params)
850
697
 
851
- # self.chrome_interface.Emulation.clearDeviceMetricsOverride()
852
-
853
698
  if isinstance(result, tuple) and len(result) >= 2:
854
699
  if isinstance(result[1], dict):
855
700
  screenshot_data = result[1].get("result", {}).get("data", "")
@@ -869,7 +714,20 @@ class BrowserAutomationService:
869
714
  if not screenshot_data:
870
715
  return {"success": False, "error": "No screenshot data received"}
871
716
 
872
- # Determine MIME type based on format
717
+ # import base64
718
+ # from datetime import datetime
719
+ #
720
+ # timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
721
+ # filename = f"screenshot_{timestamp}.{format}"
722
+ #
723
+ # try:
724
+ # screenshot_bytes = base64.b64decode(screenshot_data)
725
+ # with open(filename, "wb") as f:
726
+ # f.write(screenshot_bytes)
727
+ # logger.info(f"Screenshot saved to {filename} for debugging")
728
+ # except Exception as save_error:
729
+ # logger.warning(f"Failed to save screenshot to file: {save_error}")
730
+ #
873
731
  mime_type_map = {
874
732
  "png": "image/png",
875
733
  "jpeg": "image/jpeg",
@@ -877,8 +735,16 @@ class BrowserAutomationService:
877
735
  }
878
736
  mime_type = mime_type_map.get(format, "image/png")
879
737
 
880
- # Get current URL for context
881
- current_url = self._get_current_url()
738
+ current_url = JavaScriptExecutor.get_current_url(self.chrome_interface)
739
+
740
+ if boxes_drawn:
741
+ remove_result = JavaScriptExecutor.remove_element_boxes(
742
+ self.chrome_interface
743
+ )
744
+ if not remove_result.get("success"):
745
+ logger.warning(
746
+ f"Failed to remove element boxes: {remove_result.get('error')}"
747
+ )
882
748
 
883
749
  return {
884
750
  "success": True,
@@ -895,101 +761,6 @@ class BrowserAutomationService:
895
761
  logger.error(f"Screenshot capture error: {e}")
896
762
  return {"success": False, "error": f"Screenshot capture error: {str(e)}"}
897
763
 
898
- def dispatch_key_event(self, key: str, modifiers: List[str] = []) -> Dict[str, Any]:
899
- """
900
- Dispatch key events using CDP input.dispatchKeyEvent.
901
-
902
- Args:
903
- key: Key to dispatch (e.g., 'Enter', 'Up', 'Down', 'F1', 'PageUp')
904
- modifiers: Optional modifiers like 'ctrl', 'alt', 'shift' (comma-separated)
905
-
906
- Returns:
907
- Dict containing dispatch result
908
- """
909
- try:
910
- self._ensure_chrome_running()
911
-
912
- if self.chrome_interface is None:
913
- raise RuntimeError("Chrome interface is not initialized")
914
-
915
- key_name = key.lower().strip()
916
- key_code = key_codes.get(key_name)
917
-
918
- if key_code is None:
919
- return {
920
- "success": False,
921
- "error": f"Unknown key '{key}'. Supported keys: {', '.join(sorted(key_codes.keys()))}",
922
- "key": key,
923
- "modifiers": modifiers,
924
- }
925
-
926
- # Parse modifiers
927
- modifier_flags = 0
928
- if modifiers:
929
- modifier_names = [m.strip().lower() for m in modifiers]
930
- for mod in modifier_names:
931
- if mod in ["alt"]:
932
- modifier_flags |= 1 # Alt = 1
933
- elif mod in ["ctrl", "control"]:
934
- modifier_flags |= 2 # Ctrl = 2
935
- elif mod in ["meta", "cmd", "command"]:
936
- modifier_flags |= 4 # Meta = 4
937
- elif mod in ["shift"]:
938
- modifier_flags |= 8 # Shift = 8
939
-
940
- # Dispatch keyDown event
941
- self.chrome_interface.Input.dispatchKeyEvent(
942
- type="rawKeyDown",
943
- windowsVirtualKeyCode=key_code,
944
- modifiers=modifier_flags,
945
- )
946
-
947
- # For printable characters, also send char event
948
- printable_keys = {"space", "spacebar", "enter", "return", "tab"}
949
- if key_name in printable_keys:
950
- if key_name in ["space", "spacebar"]:
951
- char_text = " "
952
- elif key_name in ["enter", "return"]:
953
- char_text = "\r"
954
- elif key_name == "tab":
955
- char_text = "\t"
956
- else:
957
- char_text = ""
958
-
959
- if char_text:
960
- self.chrome_interface.Input.dispatchKeyEvent(
961
- type="char",
962
- windowsVirtualKeyCode=key_code,
963
- text=char_text,
964
- unmodifiedText=char_text,
965
- modifiers=modifier_flags,
966
- )
967
-
968
- # Dispatch keyUp event
969
- self.chrome_interface.Input.dispatchKeyEvent(
970
- type="keyUp", windowsVirtualKeyCode=key_code, modifiers=modifier_flags
971
- )
972
-
973
- time.sleep(0.1) # Small delay for event processing
974
-
975
- return {
976
- "success": True,
977
- "message": f"Successfully dispatched key '{key}' with modifiers '{modifiers}'",
978
- "key": key,
979
- "key_code": key_code,
980
- "modifiers": modifiers,
981
- "modifier_flags": modifier_flags,
982
- }
983
-
984
- except Exception as e:
985
- logger.error(f"Key dispatch error: {e}")
986
- return {
987
- "success": False,
988
- "error": f"Key dispatch error: {str(e)}",
989
- "key": key,
990
- "modifiers": modifiers,
991
- }
992
-
993
764
  def __del__(self):
994
765
  """Cleanup when service is destroyed."""
995
766
  self.cleanup()