camel-ai 0.2.74a4__py3-none-any.whl → 0.2.75a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (63) hide show
  1. camel/__init__.py +1 -1
  2. camel/interpreters/e2b_interpreter.py +34 -1
  3. camel/models/aiml_model.py +1 -16
  4. camel/models/anthropic_model.py +6 -19
  5. camel/models/aws_bedrock_model.py +1 -16
  6. camel/models/azure_openai_model.py +1 -16
  7. camel/models/base_model.py +0 -12
  8. camel/models/cohere_model.py +1 -16
  9. camel/models/crynux_model.py +1 -16
  10. camel/models/deepseek_model.py +1 -16
  11. camel/models/gemini_model.py +1 -16
  12. camel/models/groq_model.py +1 -17
  13. camel/models/internlm_model.py +1 -16
  14. camel/models/litellm_model.py +1 -16
  15. camel/models/lmstudio_model.py +1 -17
  16. camel/models/mistral_model.py +1 -16
  17. camel/models/modelscope_model.py +1 -16
  18. camel/models/moonshot_model.py +1 -16
  19. camel/models/nemotron_model.py +0 -5
  20. camel/models/netmind_model.py +1 -16
  21. camel/models/novita_model.py +1 -16
  22. camel/models/nvidia_model.py +1 -16
  23. camel/models/ollama_model.py +1 -16
  24. camel/models/openai_compatible_model.py +0 -3
  25. camel/models/openai_model.py +1 -16
  26. camel/models/openrouter_model.py +1 -17
  27. camel/models/ppio_model.py +1 -16
  28. camel/models/qianfan_model.py +1 -16
  29. camel/models/qwen_model.py +1 -16
  30. camel/models/reka_model.py +1 -16
  31. camel/models/samba_model.py +0 -32
  32. camel/models/sglang_model.py +1 -16
  33. camel/models/siliconflow_model.py +1 -16
  34. camel/models/stub_model.py +0 -4
  35. camel/models/togetherai_model.py +1 -16
  36. camel/models/vllm_model.py +1 -16
  37. camel/models/volcano_model.py +0 -17
  38. camel/models/watsonx_model.py +1 -16
  39. camel/models/yi_model.py +1 -16
  40. camel/models/zhipuai_model.py +1 -16
  41. camel/toolkits/hybrid_browser_toolkit/config_loader.py +3 -0
  42. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +225 -0
  43. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +164 -8
  44. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
  45. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +106 -1
  46. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +19 -1
  47. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +20 -0
  48. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +41 -0
  49. camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
  50. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
  51. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
  52. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +312 -3
  53. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
  54. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
  55. camel/toolkits/note_taking_toolkit.py +3 -4
  56. camel/toolkits/search_toolkit.py +182 -30
  57. camel/types/enums.py +9 -0
  58. camel/utils/mcp.py +2 -2
  59. camel/utils/token_counting.py +13 -2
  60. {camel_ai-0.2.74a4.dist-info → camel_ai-0.2.75a0.dist-info}/METADATA +6 -6
  61. {camel_ai-0.2.74a4.dist-info → camel_ai-0.2.75a0.dist-info}/RECORD +63 -63
  62. {camel_ai-0.2.74a4.dist-info → camel_ai-0.2.75a0.dist-info}/WHEEL +0 -0
  63. {camel_ai-0.2.74a4.dist-info → camel_ai-0.2.75a0.dist-info}/licenses/LICENSE +0 -0
@@ -64,11 +64,16 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
64
64
  "browser_select",
65
65
  "browser_scroll",
66
66
  "browser_enter",
67
+ "browser_mouse_control",
68
+ "browser_mouse_drag",
69
+ "browser_press_key",
67
70
  "browser_wait_user",
68
71
  "browser_solve_task",
69
72
  "browser_switch_tab",
70
73
  "browser_close_tab",
71
74
  "browser_get_tab_info",
75
+ "browser_console_view",
76
+ "browser_console_exec",
72
77
  ]
73
78
 
74
79
  def __init__(
@@ -863,6 +868,156 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
863
868
  "total_tabs": 0,
864
869
  }
865
870
 
871
+ async def browser_mouse_control(
872
+ self, *, control: str, x: float, y: float
873
+ ) -> Dict[str, Any]:
874
+ r"""Control the mouse to interact with browser with x, y coordinates
875
+
876
+ Args:
877
+ control ([str]): The action to perform: 'click', 'right_click'
878
+ or 'dblclick'.
879
+ x (float): x-coordinate for the control action.
880
+ y (float): y-coordinate for the control action.
881
+
882
+ Returns:
883
+ Dict[str, Any]: A dictionary with the result of the action:
884
+ - "result" (str): Confirmation of the action.
885
+ - "snapshot" (str): A snapshot of the page after mouse
886
+ control action.
887
+ - "tabs" (List[Dict]): Information about all open tabs.
888
+ - "current_tab" (int): Index of the active tab.
889
+ - "total_tabs" (int): Total number of open tabs.
890
+ """
891
+ try:
892
+ ws_wrapper = await self._get_ws_wrapper()
893
+ result = await ws_wrapper.mouse_control(control, x, y)
894
+
895
+ # Add tab information
896
+ tab_info = await ws_wrapper.get_tab_info()
897
+ result.update(
898
+ {
899
+ "tabs": tab_info,
900
+ "current_tab": next(
901
+ (
902
+ i
903
+ for i, tab in enumerate(tab_info)
904
+ if tab.get("is_current")
905
+ ),
906
+ 0,
907
+ ),
908
+ "total_tabs": len(tab_info),
909
+ }
910
+ )
911
+
912
+ return result
913
+ except Exception as e:
914
+ logger.error(f"Failed to control mouse: {e}")
915
+ return {
916
+ "result": f"Error with mouse control: {e}",
917
+ "snapshot": "",
918
+ "tabs": [],
919
+ "current_tab": 0,
920
+ "total_tabs": 0,
921
+ }
922
+
923
+ async def browser_mouse_drag(
924
+ self, *, from_ref: str, to_ref: str
925
+ ) -> Dict[str, Any]:
926
+ r"""Control the mouse to drag and drop in the browser using ref IDs.
927
+
928
+ Args:
929
+ from_ref (str): The `ref` ID of the source element to drag from.
930
+ to_ref (str): The `ref` ID of the target element to drag to.
931
+
932
+ Returns:
933
+ Dict[str, Any]: A dictionary with the result of the action:
934
+ - "result" (str): Confirmation of the action.
935
+ - "snapshot" (str): A new page snapshot.
936
+ - "tabs" (List[Dict]): Information about all open tabs.
937
+ - "current_tab" (int): Index of the active tab.
938
+ - "total_tabs" (int): Total number of open tabs.
939
+ """
940
+ try:
941
+ ws_wrapper = await self._get_ws_wrapper()
942
+ result = await ws_wrapper.mouse_drag(from_ref, to_ref)
943
+
944
+ # Add tab information
945
+ tab_info = await ws_wrapper.get_tab_info()
946
+ result.update(
947
+ {
948
+ "tabs": tab_info,
949
+ "current_tab": next(
950
+ (
951
+ i
952
+ for i, tab in enumerate(tab_info)
953
+ if tab.get("is_current")
954
+ ),
955
+ 0,
956
+ ),
957
+ "total_tabs": len(tab_info),
958
+ }
959
+ )
960
+
961
+ return result
962
+ except Exception as e:
963
+ logger.error(f"Error with mouse drag and drop: {e}")
964
+ return {
965
+ "result": f"Error with mouse drag and drop: {e}",
966
+ "snapshot": "",
967
+ "tabs": [],
968
+ "current_tab": 0,
969
+ "total_tabs": 0,
970
+ }
971
+
972
+ async def browser_press_key(self, *, keys: List[str]) -> Dict[str, Any]:
973
+ r"""Press key and key combinations.
974
+ Supports single key press or combination of keys by concatenating
975
+ them with '+' separator.
976
+
977
+ Args:
978
+ keys (List[str]): key or list of keys.
979
+
980
+ Returns:
981
+ Dict[str, Any]: A dictionary with the result of the action:
982
+ - "result" (str): Confirmation of the action.
983
+ - "snapshot" (str): A snapshot of the page after
984
+ press key action.
985
+ - "tabs" (List[Dict]): Information about all open tabs.
986
+ - "current_tab" (int): Index of the active tab.
987
+ - "total_tabs" (int): Total number of open tabs.
988
+ """
989
+ try:
990
+ ws_wrapper = await self._get_ws_wrapper()
991
+ result = await ws_wrapper.press_key(keys)
992
+
993
+ # Add tab information
994
+ tab_info = await ws_wrapper.get_tab_info()
995
+ result.update(
996
+ {
997
+ "tabs": tab_info,
998
+ "current_tab": next(
999
+ (
1000
+ i
1001
+ for i, tab in enumerate(tab_info)
1002
+ if tab.get("is_current")
1003
+ ),
1004
+ 0,
1005
+ ),
1006
+ "total_tabs": len(tab_info),
1007
+ }
1008
+ )
1009
+
1010
+ return result
1011
+ except Exception as e:
1012
+ logger.error(f"Failed to press key: {e}")
1013
+ return {
1014
+ "result": f"Error with press key: {e}",
1015
+ "snapshot": "",
1016
+ "tabs": [],
1017
+ "current_tab": 0,
1018
+ "total_tabs": 0,
1019
+ }
1020
+
866
1021
  async def browser_switch_tab(self, *, tab_id: str) -> Dict[str, Any]:
867
1022
  r"""Switches to a different browser tab using its ID.
868
1023
 
@@ -1002,6 +1157,71 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1002
1157
  "total_tabs": 0,
1003
1158
  }
1004
1159
 
1160
+ async def browser_console_view(self) -> Dict[str, Any]:
1161
+ r"""View current page console logs.
1162
+
1163
+ Returns:
1164
+ Dict[str, Any]: A dictionary with tab information:
1165
+ - "console_messages" (List[Dict]) : List of messages logged
1166
+ in the current page
1167
+
1168
+ """
1169
+ try:
1170
+ ws_wrapper = await self._get_ws_wrapper()
1171
+ console_logs = await ws_wrapper.console_view()
1172
+
1173
+ return {"console_messages": console_logs}
1174
+ except Exception as e:
1175
+ logger.error(f"Failed to get console view: {e}")
1176
+ return {"console_messages": []}
1177
+
1178
+ async def browser_console_exec(self, code: str) -> Dict[str, Any]:
1179
+ r"""Execute javascript code in the console of the current page and get
1180
+ results.
1181
+
1182
+ Args:
1183
+ code (str): JavaScript code to execute in the browser console.
1184
+
1185
+ Returns:
1186
+ Dict[str, Any]: A dictionary with the result of the action:
1187
+ - "result" (str): Confirmation of the action.
1188
+ - "snapshot" (str): A snapshot of the active tab after
1189
+ console execute action.
1190
+ - "tabs" (List[Dict]): Information about remaining tabs.
1191
+ - "current_tab" (int): Index of the new active tab.
1192
+ - "total_tabs" (int): Total number of remaining tabs.
1193
+ """
1194
+ try:
1195
+ ws_wrapper = await self._get_ws_wrapper()
1196
+ result = await ws_wrapper.console_exec(code)
1197
+
1198
+ tab_info = await ws_wrapper.get_tab_info()
1199
+ result.update(
1200
+ {
1201
+ "tabs": tab_info,
1202
+ "current_tab": next(
1203
+ (
1204
+ i
1205
+ for i, tab in enumerate(tab_info)
1206
+ if tab.get("is_current")
1207
+ ),
1208
+ 0,
1209
+ ),
1210
+ "total_tabs": len(tab_info),
1211
+ }
1212
+ )
1213
+
1214
+ return result
1215
+ except Exception as e:
1216
+ logger.error(f"Failed to execute javascript in console: {e}")
1217
+ return {
1218
+ "result": f"Error in code execution: {e}",
1219
+ "snapshot": "",
1220
+ "tabs": [],
1221
+ "current_tab": 0,
1222
+ "total_tabs": 0,
1223
+ }
1224
+
1005
1225
  # Additional methods for backward compatibility
1006
1226
  async def browser_wait_user(
1007
1227
  self, timeout_sec: Optional[float] = None
@@ -1146,10 +1366,15 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1146
1366
  "browser_select": self.browser_select,
1147
1367
  "browser_scroll": self.browser_scroll,
1148
1368
  "browser_enter": self.browser_enter,
1369
+ "browser_mouse_click": self.browser_mouse_control,
1370
+ "browser_mouse_drag": self.browser_mouse_drag,
1371
+ "browser_press_key": self.browser_press_key,
1149
1372
  "browser_wait_user": self.browser_wait_user,
1150
1373
  "browser_switch_tab": self.browser_switch_tab,
1151
1374
  "browser_close_tab": self.browser_close_tab,
1152
1375
  "browser_get_tab_info": self.browser_get_tab_info,
1376
+ "browser_console_view": self.browser_console_view,
1377
+ "browser_console_exec": self.browser_console_exec,
1153
1378
  }
1154
1379
 
1155
1380
  enabled_tools = []
@@ -1,4 +1,4 @@
1
- import { Page, Browser, BrowserContext, chromium } from 'playwright';
1
+ import { Page, Browser, BrowserContext, chromium, ConsoleMessage } from 'playwright';
2
2
  import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
3
  import { ConfigLoader, StealthConfig } from './config-loader';
4
4
 
@@ -6,18 +6,43 @@ export class HybridBrowserSession {
6
6
  private browser: Browser | null = null;
7
7
  private context: BrowserContext | null = null;
8
8
  private pages: Map<string, Page> = new Map();
9
+ private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
9
10
  private currentTabId: string | null = null;
10
11
  private tabCounter = 0;
11
12
  private configLoader: ConfigLoader;
12
13
  private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
13
14
  private hasNavigatedBefore = false; // Track if we've navigated before
15
+ private logLimit: number;
14
16
 
15
17
  constructor(config: BrowserToolkitConfig = {}) {
16
18
  // Use ConfigLoader's fromPythonConfig to handle conversion properly
17
19
  this.configLoader = ConfigLoader.fromPythonConfig(config);
20
+ // Load browser configuration for console log limit, default to 1000
21
+ this.logLimit = this.configLoader.getBrowserConfig().consoleLogLimit || 1000;
18
22
  }
19
23
 
20
- async ensureBrowser(): Promise<void> {
24
+ private registerNewPage(tabId: string, page: Page): void {
25
+ // Register page and logs with tabId
26
+ this.pages.set(tabId, page);
27
+ this.consoleLogs.set(tabId, []);
28
+ // Set up console log listener for the page
29
+ page.on('console', (msg: ConsoleMessage) => {
30
+ const logs = this.consoleLogs.get(tabId);
31
+ if (logs) {
32
+ logs.push(msg);
33
+ if (logs.length > this.logLimit) {
34
+ logs.shift();
35
+ }
36
+ }
37
+ });
38
+
39
+ // Clean logs on page close
40
+ page.on('close', () => {
41
+ this.consoleLogs.delete(tabId);
42
+ });
43
+ }
44
+
45
+ async ensureBrowser(): Promise<void> {
21
46
  if (this.browser) {
22
47
  return;
23
48
  }
@@ -57,7 +82,7 @@ export class HybridBrowserSession {
57
82
  // In CDP mode, only consider pages with about:blank as available
58
83
  if (pageUrl === 'about:blank') {
59
84
  const tabId = this.generateTabId();
60
- this.pages.set(tabId, page);
85
+ this.registerNewPage(tabId, page);
61
86
  if (!this.currentTabId) {
62
87
  this.currentTabId = tabId;
63
88
  availablePageFound = true;
@@ -97,7 +122,7 @@ export class HybridBrowserSession {
97
122
  const pages = this.context.pages();
98
123
  if (pages.length > 0) {
99
124
  const initialTabId = this.generateTabId();
100
- this.pages.set(initialTabId, pages[0]);
125
+ this.registerNewPage(initialTabId, pages[0]);
101
126
  this.currentTabId = initialTabId;
102
127
  }
103
128
  } else {
@@ -115,7 +140,7 @@ export class HybridBrowserSession {
115
140
 
116
141
  const initialPage = await this.context.newPage();
117
142
  const initialTabId = this.generateTabId();
118
- this.pages.set(initialTabId, initialPage);
143
+ this.registerNewPage(initialTabId, initialPage);
119
144
  this.currentTabId = initialTabId;
120
145
  }
121
146
  }
@@ -139,6 +164,13 @@ export class HybridBrowserSession {
139
164
  return this.pages.get(this.currentTabId)!;
140
165
  }
141
166
 
167
+ async getCurrentLogs(): Promise<ConsoleMessage[]> {
168
+ if (!this.currentTabId || !this.consoleLogs.has(this.currentTabId)) {
169
+ return [];
170
+ }
171
+ return this.consoleLogs.get(this.currentTabId) || [];
172
+ }
173
+
142
174
  /**
143
175
  * Get current scroll position from the page
144
176
  */
@@ -343,7 +375,7 @@ export class HybridBrowserSession {
343
375
 
344
376
  // Generate tab ID for the new page
345
377
  const newTabId = this.generateTabId();
346
- this.pages.set(newTabId, newPage);
378
+ this.registerNewPage(newTabId, newPage);
347
379
 
348
380
  // Set up page properties
349
381
  const browserConfig = this.configLoader.getBrowserConfig();
@@ -434,7 +466,97 @@ export class HybridBrowserSession {
434
466
  }
435
467
  }
436
468
 
469
+ /**
470
+ * Simplified mouse control implementation
471
+ */
472
+ private async performMouseControl(page: Page, control: string, x: number, y: number): Promise<{ success: boolean; error?: string }> {
473
+ try {
474
+ const viewport = page.viewportSize();
475
+ if (!viewport) {
476
+ return { success: false, error: 'Viewport size not available from page.' };
477
+ }
478
+ if (x < 0 || y < 0 || x > viewport.width || y > viewport.height) {
479
+ return { success: false, error: `Invalid coordinates, outside viewport bounds: (${x}, ${y})` };
480
+ }
481
+ switch (control) {
482
+ case 'click': {
483
+ await page.mouse.click(x, y);
484
+ break;
485
+ }
486
+ case 'right_click': {
487
+ await page.mouse.click(x, y, { button: 'right' });
488
+ break;
489
+ }
490
+ case 'dblclick': {
491
+ await page.mouse.dblclick(x, y);
492
+ break;
493
+ }
494
+ default:
495
+ return { success: false, error: `Invalid control action: ${control}` };
496
+ }
497
+
498
+ return { success: true };
499
+ } catch (error) {
500
+ return { success: false, error: `Mouse action failed: ${error}` };
501
+ }
502
+ }
437
503
 
504
+ /**
505
+ * Enhanced mouse drag and drop implementation using ref IDs
506
+ */
507
+ private async performMouseDrag(page: Page, fromRef: string, toRef: string): Promise<{ success: boolean; error?: string }> {
508
+ try {
509
+ // Ensure we have the latest snapshot
510
+ await (page as any)._snapshotForAI();
511
+
512
+ // Get elements using Playwright's aria-ref selector
513
+ const fromSelector = `aria-ref=${fromRef}`;
514
+ const toSelector = `aria-ref=${toRef}`;
515
+
516
+ const fromElement = await page.locator(fromSelector).first();
517
+ const toElement = await page.locator(toSelector).first();
518
+
519
+ // Check if elements exist
520
+ const fromExists = await fromElement.count() > 0;
521
+ const toExists = await toElement.count() > 0;
522
+
523
+ if (!fromExists) {
524
+ return { success: false, error: `Source element with ref ${fromRef} not found` };
525
+ }
526
+
527
+ if (!toExists) {
528
+ return { success: false, error: `Target element with ref ${toRef} not found` };
529
+ }
530
+
531
+ // Get the center coordinates of both elements
532
+ const fromBox = await fromElement.boundingBox();
533
+ const toBox = await toElement.boundingBox();
534
+
535
+ if (!fromBox) {
536
+ return { success: false, error: `Could not get bounding box for source element with ref ${fromRef}` };
537
+ }
538
+
539
+ if (!toBox) {
540
+ return { success: false, error: `Could not get bounding box for target element with ref ${toRef}` };
541
+ }
542
+
543
+ const fromX = fromBox.x + fromBox.width / 2;
544
+ const fromY = fromBox.y + fromBox.height / 2;
545
+ const toX = toBox.x + toBox.width / 2;
546
+ const toY = toBox.y + toBox.height / 2;
547
+
548
+ // Perform the drag operation
549
+ await page.mouse.move(fromX, fromY);
550
+ await page.mouse.down();
551
+ // Destination coordinates
552
+ await page.mouse.move(toX, toY);
553
+ await page.mouse.up();
554
+
555
+ return { success: true };
556
+ } catch (error) {
557
+ return { success: false, error: `Mouse drag action failed: ${error}` };
558
+ }
559
+ }
438
560
 
439
561
  async executeAction(action: BrowserAction): Promise<ActionResult> {
440
562
  const startTime = Date.now();
@@ -519,6 +641,40 @@ export class HybridBrowserSession {
519
641
  actionExecutionTime = Date.now() - enterStart;
520
642
  break;
521
643
  }
644
+
645
+ case 'mouse_control': {
646
+ elementSearchTime = Date.now() - elementSearchStart;
647
+ const mouseControlStart = Date.now();
648
+ const mouseControlResult = await this.performMouseControl(page, action.control, action.x, action.y);
649
+
650
+ if (!mouseControlResult.success) {
651
+ throw new Error(`Action failed: ${mouseControlResult.error}`);
652
+ }
653
+ actionExecutionTime = Date.now() - mouseControlStart;
654
+ break;
655
+ }
656
+
657
+ case 'mouse_drag': {
658
+ elementSearchTime = Date.now() - elementSearchStart;
659
+ const mouseDragStart = Date.now();
660
+ const mouseDragResult = await this.performMouseDrag(page, action.from_ref, action.to_ref);
661
+
662
+ if (!mouseDragResult.success) {
663
+ throw new Error(`Action failed: ${mouseDragResult.error}`);
664
+ }
665
+ actionExecutionTime = Date.now() - mouseDragStart;
666
+ break;
667
+ }
668
+
669
+ case 'press_key': {
670
+ elementSearchTime = Date.now() - elementSearchStart;
671
+ const keyPressStart = Date.now();
672
+ // concatenate keys with '+' for key combinations
673
+ const keys = action.keys.join('+');
674
+ await page.keyboard.press(keys);
675
+ actionExecutionTime = Date.now() - keyPressStart;
676
+ break;
677
+ }
522
678
 
523
679
  default:
524
680
  throw new Error(`Unknown action type: ${(action as any).type}`);
@@ -651,7 +807,7 @@ export class HybridBrowserSession {
651
807
  if (!isTracked && pageUrl === 'about:blank') {
652
808
  newPage = page;
653
809
  newTabId = this.generateTabId();
654
- this.pages.set(newTabId, newPage);
810
+ this.registerNewPage(newTabId, newPage);
655
811
  break;
656
812
  }
657
813
  }
@@ -663,7 +819,7 @@ export class HybridBrowserSession {
663
819
  // Non-CDP mode: create new page as usual
664
820
  newPage = await this.context.newPage();
665
821
  newTabId = this.generateTabId();
666
- this.pages.set(newTabId, newPage);
822
+ this.registerNewPage(newTabId, newPage);
667
823
  }
668
824
 
669
825
  // Set up page properties
@@ -30,6 +30,7 @@ export interface BrowserConfig {
30
30
  // Tab management
31
31
  tabIdPrefix: string;
32
32
  tabCounterPadding: number;
33
+ consoleLogLimit: number;
33
34
 
34
35
  // Scroll and positioning
35
36
  scrollPositionScale: number;
@@ -113,6 +114,7 @@ function getDefaultBrowserConfig(): BrowserConfig {
113
114
  clickTimeout: 3000,
114
115
  tabIdPrefix: 'tab-',
115
116
  tabCounterPadding: 3,
117
+ consoleLogLimit: 1000,
116
118
  scrollPositionScale: 0.1,
117
119
  navigationDelay: 100,
118
120
  blankPageUrls: ['about:blank', ''],
@@ -1,6 +1,7 @@
1
1
  import {HybridBrowserSession} from './browser-session';
2
2
  import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
3
3
  import {ConfigLoader} from './config-loader';
4
+ import {ConsoleMessage} from 'playwright';
4
5
 
5
6
  export class HybridBrowserToolkit {
6
7
  private session: HybridBrowserSession;
@@ -382,6 +383,21 @@ export class HybridBrowserToolkit {
382
383
  return this.executeActionWithSnapshot(action);
383
384
  }
384
385
 
386
+ async mouseControl(control: 'click' | 'right_click'| 'dblclick', x: number, y: number): Promise<any> {
387
+ const action: BrowserAction = { type: 'mouse_control', control, x, y };
388
+ return this.executeActionWithSnapshot(action);
389
+ }
390
+
391
+ async mouseDrag(from_ref: string, to_ref: string): Promise<any> {
392
+ const action: BrowserAction = { type: 'mouse_drag', from_ref, to_ref };
393
+ return this.executeActionWithSnapshot(action);
394
+ }
395
+
396
+ async pressKeys(keys: string[]): Promise<any> {
397
+ const action: BrowserAction = { type: 'press_key', keys};
398
+ return this.executeActionWithSnapshot(action);
399
+ }
400
+
385
401
  async back(): Promise<ActionResult> {
386
402
  const startTime = Date.now();
387
403
 
@@ -519,4 +535,93 @@ export class HybridBrowserToolkit {
519
535
  return await this.session.getTabInfo();
520
536
  }
521
537
 
522
- }
538
+ async getConsoleView(): Promise<any> {
539
+ const currentLogs = await this.session.getCurrentLogs();
540
+ // Format logs
541
+ return currentLogs.map(item => ({
542
+ type: item.type(),
543
+ text: item.text(),
544
+ }));
545
+ }
546
+
547
+ async consoleExecute(code: string): Promise<any> {
548
+ const startTime = Date.now();
549
+ try {
550
+ const page = await this.session.getCurrentPage();
551
+
552
+ // Wrap the code to capture console.log output
553
+ const wrappedCode = `
554
+ (function() {
555
+ const _logs = [];
556
+ const originalLog = console.log;
557
+ console.log = function(...args) {
558
+ _logs.push(args.map(arg => {
559
+ try {
560
+ return typeof arg === 'object' ? JSON.stringify(arg) : String(arg);
561
+ } catch (e) {
562
+ return String(arg);
563
+ }
564
+ }).join(' '));
565
+ originalLog.apply(console, args);
566
+ };
567
+
568
+ let result;
569
+ try {
570
+ result = eval(${JSON.stringify(code)});
571
+ } catch (e) {
572
+ try {
573
+ result = (function() { ${code} })();
574
+ } catch (error) {
575
+ console.log = originalLog;
576
+ throw error;
577
+ }
578
+ }
579
+
580
+ console.log = originalLog;
581
+ return { result, logs: _logs };
582
+ })()
583
+ `;
584
+
585
+ const evalResult = await page.evaluate(wrappedCode) as { result: any; logs: string[] };
586
+ const { result, logs } = evalResult;
587
+
588
+ const snapshotStart = Date.now();
589
+ const snapshot = await this.getPageSnapshot(this.viewportLimit);
590
+ const snapshotTime = Date.now() - snapshotStart;
591
+ const totalTime = Date.now() - startTime;
592
+
593
+ // Properly serialize the result
594
+ let resultStr: string;
595
+ try {
596
+ resultStr = JSON.stringify(result, null, 2);
597
+ } catch (e) {
598
+ // Fallback for non-serializable values
599
+ resultStr = String(result);
600
+ }
601
+
602
+ return {
603
+ result: `Console execution result: ${resultStr}`,
604
+ console_output: logs,
605
+ snapshot: snapshot,
606
+ timing: {
607
+ total_time_ms: totalTime,
608
+ snapshot_time_ms: snapshotTime,
609
+ },
610
+ };
611
+
612
+ } catch (error) {
613
+ const totalTime = Date.now() - startTime;
614
+ return {
615
+ result: `Console execution failed: ${error}`,
616
+ console_output: [],
617
+ snapshot: '',
618
+ timing: {
619
+ total_time_ms: totalTime,
620
+ snapshot_time_ms: 0,
621
+ },
622
+ };
623
+ }
624
+ }
625
+
626
+ }
627
+
@@ -101,7 +101,25 @@ export interface EnterAction {
101
101
  type: 'enter';
102
102
  }
103
103
 
104
- export type BrowserAction = ClickAction | TypeAction | SelectAction | ScrollAction | EnterAction;
104
+ export interface MouseAction {
105
+ type: 'mouse_control';
106
+ control: 'click' | 'right_click' | 'dblclick';
107
+ x: number;
108
+ y: number;
109
+ }
110
+
111
+ export interface MouseDragAction {
112
+ type: 'mouse_drag';
113
+ from_ref: string;
114
+ to_ref: string;
115
+ }
116
+
117
+ export interface PressKeyAction {
118
+ type: 'press_key';
119
+ keys: string[];
120
+ }
121
+
122
+ export type BrowserAction = ClickAction | TypeAction | SelectAction | ScrollAction | EnterAction | MouseAction | MouseDragAction | PressKeyAction;
105
123
 
106
124
  export interface VisualMarkResult {
107
125
  text: string;