camel-ai 0.2.67__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/_utils.py +38 -0
  4. camel/agents/chat_agent.py +4014 -410
  5. camel/agents/mcp_agent.py +30 -27
  6. camel/agents/repo_agent.py +2 -1
  7. camel/benchmarks/browsecomp.py +6 -6
  8. camel/configs/__init__.py +15 -0
  9. camel/configs/aihubmix_config.py +88 -0
  10. camel/configs/amd_config.py +70 -0
  11. camel/configs/cometapi_config.py +104 -0
  12. camel/configs/minimax_config.py +93 -0
  13. camel/configs/nebius_config.py +103 -0
  14. camel/configs/vllm_config.py +2 -0
  15. camel/data_collectors/alpaca_collector.py +15 -6
  16. camel/datagen/self_improving_cot.py +1 -1
  17. camel/datasets/base_generator.py +39 -10
  18. camel/environments/__init__.py +12 -0
  19. camel/environments/rlcards_env.py +860 -0
  20. camel/environments/single_step.py +28 -3
  21. camel/environments/tic_tac_toe.py +1 -1
  22. camel/interpreters/__init__.py +2 -0
  23. camel/interpreters/docker/Dockerfile +4 -16
  24. camel/interpreters/docker_interpreter.py +3 -2
  25. camel/interpreters/e2b_interpreter.py +34 -1
  26. camel/interpreters/internal_python_interpreter.py +51 -2
  27. camel/interpreters/microsandbox_interpreter.py +395 -0
  28. camel/loaders/__init__.py +11 -2
  29. camel/loaders/base_loader.py +85 -0
  30. camel/loaders/chunkr_reader.py +9 -0
  31. camel/loaders/firecrawl_reader.py +4 -4
  32. camel/logger.py +1 -1
  33. camel/memories/agent_memories.py +84 -1
  34. camel/memories/base.py +34 -0
  35. camel/memories/blocks/chat_history_block.py +122 -4
  36. camel/memories/blocks/vectordb_block.py +8 -1
  37. camel/memories/context_creators/score_based.py +29 -237
  38. camel/memories/records.py +88 -8
  39. camel/messages/base.py +166 -40
  40. camel/messages/func_message.py +32 -5
  41. camel/models/__init__.py +10 -0
  42. camel/models/aihubmix_model.py +83 -0
  43. camel/models/aiml_model.py +1 -16
  44. camel/models/amd_model.py +101 -0
  45. camel/models/anthropic_model.py +117 -18
  46. camel/models/aws_bedrock_model.py +2 -33
  47. camel/models/azure_openai_model.py +205 -91
  48. camel/models/base_audio_model.py +3 -1
  49. camel/models/base_model.py +189 -24
  50. camel/models/cohere_model.py +5 -17
  51. camel/models/cometapi_model.py +83 -0
  52. camel/models/crynux_model.py +1 -16
  53. camel/models/deepseek_model.py +6 -16
  54. camel/models/fish_audio_model.py +6 -0
  55. camel/models/gemini_model.py +71 -20
  56. camel/models/groq_model.py +1 -17
  57. camel/models/internlm_model.py +1 -16
  58. camel/models/litellm_model.py +49 -32
  59. camel/models/lmstudio_model.py +1 -17
  60. camel/models/minimax_model.py +83 -0
  61. camel/models/mistral_model.py +1 -16
  62. camel/models/model_factory.py +27 -1
  63. camel/models/model_manager.py +24 -6
  64. camel/models/modelscope_model.py +1 -16
  65. camel/models/moonshot_model.py +185 -19
  66. camel/models/nebius_model.py +83 -0
  67. camel/models/nemotron_model.py +0 -5
  68. camel/models/netmind_model.py +1 -16
  69. camel/models/novita_model.py +1 -16
  70. camel/models/nvidia_model.py +1 -16
  71. camel/models/ollama_model.py +4 -19
  72. camel/models/openai_compatible_model.py +171 -46
  73. camel/models/openai_model.py +205 -77
  74. camel/models/openrouter_model.py +1 -17
  75. camel/models/ppio_model.py +1 -16
  76. camel/models/qianfan_model.py +1 -16
  77. camel/models/qwen_model.py +1 -16
  78. camel/models/reka_model.py +1 -16
  79. camel/models/samba_model.py +34 -47
  80. camel/models/sglang_model.py +64 -31
  81. camel/models/siliconflow_model.py +1 -16
  82. camel/models/stub_model.py +0 -4
  83. camel/models/togetherai_model.py +1 -16
  84. camel/models/vllm_model.py +1 -16
  85. camel/models/volcano_model.py +0 -17
  86. camel/models/watsonx_model.py +1 -16
  87. camel/models/yi_model.py +1 -16
  88. camel/models/zhipuai_model.py +60 -16
  89. camel/parsers/__init__.py +18 -0
  90. camel/parsers/mcp_tool_call_parser.py +176 -0
  91. camel/retrievers/auto_retriever.py +1 -0
  92. camel/runtimes/configs.py +11 -11
  93. camel/runtimes/daytona_runtime.py +15 -16
  94. camel/runtimes/docker_runtime.py +6 -6
  95. camel/runtimes/remote_http_runtime.py +5 -5
  96. camel/services/agent_openapi_server.py +380 -0
  97. camel/societies/__init__.py +2 -0
  98. camel/societies/role_playing.py +26 -28
  99. camel/societies/workforce/__init__.py +2 -0
  100. camel/societies/workforce/events.py +122 -0
  101. camel/societies/workforce/prompts.py +249 -38
  102. camel/societies/workforce/role_playing_worker.py +82 -20
  103. camel/societies/workforce/single_agent_worker.py +634 -34
  104. camel/societies/workforce/structured_output_handler.py +512 -0
  105. camel/societies/workforce/task_channel.py +169 -23
  106. camel/societies/workforce/utils.py +176 -9
  107. camel/societies/workforce/worker.py +77 -23
  108. camel/societies/workforce/workflow_memory_manager.py +772 -0
  109. camel/societies/workforce/workforce.py +3168 -478
  110. camel/societies/workforce/workforce_callback.py +74 -0
  111. camel/societies/workforce/workforce_logger.py +203 -175
  112. camel/societies/workforce/workforce_metrics.py +33 -0
  113. camel/storages/__init__.py +4 -0
  114. camel/storages/key_value_storages/json.py +15 -2
  115. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  116. camel/storages/object_storages/google_cloud.py +1 -1
  117. camel/storages/vectordb_storages/__init__.py +6 -0
  118. camel/storages/vectordb_storages/chroma.py +731 -0
  119. camel/storages/vectordb_storages/oceanbase.py +13 -13
  120. camel/storages/vectordb_storages/pgvector.py +349 -0
  121. camel/storages/vectordb_storages/qdrant.py +3 -3
  122. camel/storages/vectordb_storages/surreal.py +365 -0
  123. camel/storages/vectordb_storages/tidb.py +8 -6
  124. camel/tasks/task.py +244 -27
  125. camel/toolkits/__init__.py +46 -8
  126. camel/toolkits/aci_toolkit.py +64 -19
  127. camel/toolkits/arxiv_toolkit.py +6 -6
  128. camel/toolkits/base.py +63 -5
  129. camel/toolkits/code_execution.py +28 -1
  130. camel/toolkits/context_summarizer_toolkit.py +684 -0
  131. camel/toolkits/craw4ai_toolkit.py +93 -0
  132. camel/toolkits/dappier_toolkit.py +10 -6
  133. camel/toolkits/dingtalk.py +1135 -0
  134. camel/toolkits/edgeone_pages_mcp_toolkit.py +49 -0
  135. camel/toolkits/excel_toolkit.py +901 -67
  136. camel/toolkits/file_toolkit.py +1402 -0
  137. camel/toolkits/function_tool.py +30 -6
  138. camel/toolkits/github_toolkit.py +107 -20
  139. camel/toolkits/gmail_toolkit.py +1839 -0
  140. camel/toolkits/google_calendar_toolkit.py +38 -4
  141. camel/toolkits/google_drive_mcp_toolkit.py +54 -0
  142. camel/toolkits/human_toolkit.py +34 -10
  143. camel/toolkits/hybrid_browser_toolkit/__init__.py +18 -0
  144. camel/toolkits/hybrid_browser_toolkit/config_loader.py +185 -0
  145. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +246 -0
  146. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +1973 -0
  147. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  148. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +3749 -0
  149. camel/toolkits/hybrid_browser_toolkit/ts/package.json +32 -0
  150. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-scripts.js +125 -0
  151. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +1815 -0
  152. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +233 -0
  153. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +590 -0
  154. camel/toolkits/hybrid_browser_toolkit/ts/src/index.ts +7 -0
  155. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  156. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  157. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  158. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +130 -0
  159. camel/toolkits/hybrid_browser_toolkit/ts/tsconfig.json +26 -0
  160. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +319 -0
  161. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +1032 -0
  162. camel/toolkits/hybrid_browser_toolkit_py/__init__.py +17 -0
  163. camel/toolkits/hybrid_browser_toolkit_py/actions.py +575 -0
  164. camel/toolkits/hybrid_browser_toolkit_py/agent.py +311 -0
  165. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +787 -0
  166. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +490 -0
  167. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +2390 -0
  168. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +233 -0
  169. camel/toolkits/hybrid_browser_toolkit_py/stealth_script.js +0 -0
  170. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +1043 -0
  171. camel/toolkits/image_generation_toolkit.py +390 -0
  172. camel/toolkits/jina_reranker_toolkit.py +3 -4
  173. camel/toolkits/klavis_toolkit.py +5 -1
  174. camel/toolkits/markitdown_toolkit.py +104 -0
  175. camel/toolkits/math_toolkit.py +64 -10
  176. camel/toolkits/mcp_toolkit.py +370 -45
  177. camel/toolkits/memory_toolkit.py +5 -1
  178. camel/toolkits/message_agent_toolkit.py +608 -0
  179. camel/toolkits/message_integration.py +724 -0
  180. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  181. camel/toolkits/note_taking_toolkit.py +277 -0
  182. camel/toolkits/notion_mcp_toolkit.py +224 -0
  183. camel/toolkits/openbb_toolkit.py +5 -1
  184. camel/toolkits/origene_mcp_toolkit.py +56 -0
  185. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  186. camel/toolkits/pptx_toolkit.py +25 -12
  187. camel/toolkits/resend_toolkit.py +168 -0
  188. camel/toolkits/screenshot_toolkit.py +213 -0
  189. camel/toolkits/search_toolkit.py +437 -142
  190. camel/toolkits/slack_toolkit.py +104 -50
  191. camel/toolkits/sympy_toolkit.py +1 -1
  192. camel/toolkits/task_planning_toolkit.py +3 -3
  193. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  194. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  195. camel/toolkits/terminal_toolkit/utils.py +532 -0
  196. camel/toolkits/thinking_toolkit.py +1 -1
  197. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  198. camel/toolkits/video_analysis_toolkit.py +106 -26
  199. camel/toolkits/video_download_toolkit.py +17 -14
  200. camel/toolkits/web_deploy_toolkit.py +1219 -0
  201. camel/toolkits/wechat_official_toolkit.py +483 -0
  202. camel/toolkits/zapier_toolkit.py +5 -1
  203. camel/types/__init__.py +2 -2
  204. camel/types/agents/tool_calling_record.py +4 -1
  205. camel/types/enums.py +316 -40
  206. camel/types/openai_types.py +2 -2
  207. camel/types/unified_model_type.py +31 -4
  208. camel/utils/commons.py +36 -5
  209. camel/utils/constants.py +3 -0
  210. camel/utils/context_utils.py +1003 -0
  211. camel/utils/mcp.py +138 -4
  212. camel/utils/mcp_client.py +45 -1
  213. camel/utils/message_summarizer.py +148 -0
  214. camel/utils/token_counting.py +43 -20
  215. camel/utils/tool_result.py +44 -0
  216. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +296 -85
  217. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +219 -146
  218. camel/loaders/pandas_reader.py +0 -368
  219. camel/toolkits/dalle_toolkit.py +0 -175
  220. camel/toolkits/file_write_toolkit.py +0 -444
  221. camel/toolkits/openai_agent_toolkit.py +0 -135
  222. camel/toolkits/terminal_toolkit.py +0 -1037
  223. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  224. {camel_ai-0.2.67.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1815 @@
1
+ import { Page, Browser, BrowserContext, chromium, ConsoleMessage, Frame } from 'playwright';
2
+ import { BrowserToolkitConfig, SnapshotResult, SnapshotElement, ActionResult, TabInfo, BrowserAction, DetailedTiming } from './types';
3
+ import { ConfigLoader, StealthConfig } from './config-loader';
4
+
5
+ export class HybridBrowserSession {
6
+ private browser: Browser | null = null;
7
+ private context: BrowserContext | null = null;
8
+ private contextOwnedByUs: boolean = false;
9
+ private pages: Map<string, Page> = new Map();
10
+ private consoleLogs: Map<string, ConsoleMessage[]> = new Map();
11
+ private currentTabId: string | null = null;
12
+ private tabCounter = 0;
13
+ private configLoader: ConfigLoader;
14
+ private scrollPosition: { x: number; y: number } = {x: 0, y: 0};
15
+ private hasNavigatedBefore = false; // Track if we've navigated before
16
+ private logLimit: number;
17
+
18
+ constructor(config: BrowserToolkitConfig = {}) {
19
+ // Use ConfigLoader's fromPythonConfig to handle conversion properly
20
+ this.configLoader = ConfigLoader.fromPythonConfig(config);
21
+ // Load browser configuration for console log limit, default to 1000
22
+ this.logLimit = this.configLoader.getBrowserConfig().consoleLogLimit || 1000;
23
+ }
24
+
25
+ private registerNewPage(tabId: string, page: Page): void {
26
+ // Register page and logs with tabId
27
+ this.pages.set(tabId, page);
28
+ this.consoleLogs.set(tabId, []);
29
+ // Set up console log listener for the page
30
+ page.on('console', (msg: ConsoleMessage) => {
31
+ const logs = this.consoleLogs.get(tabId);
32
+ if (logs) {
33
+ logs.push(msg);
34
+ if (logs.length > this.logLimit) {
35
+ logs.shift();
36
+ }
37
+ }
38
+ });
39
+
40
+ // Clean logs on page close
41
+ page.on('close', () => {
42
+ this.consoleLogs.delete(tabId);
43
+ });
44
+ }
45
+
46
+ async ensureBrowser(): Promise<void> {
47
+ if (this.browser) {
48
+ return;
49
+ }
50
+
51
+ const browserConfig = this.configLoader.getBrowserConfig();
52
+ const stealthConfig = this.configLoader.getStealthConfig();
53
+
54
+ // Check if CDP URL is provided
55
+ if (browserConfig.cdpUrl) {
56
+ // Connect to existing browser via CDP
57
+ this.browser = await chromium.connectOverCDP(browserConfig.cdpUrl);
58
+
59
+ // Get existing contexts or create new one
60
+ const contexts = this.browser.contexts();
61
+ if (contexts.length > 0) {
62
+ this.context = contexts[0];
63
+ this.contextOwnedByUs = false;
64
+
65
+ // Apply stealth headers to existing context if configured
66
+ // Note: userAgent cannot be changed on an existing context
67
+ if (stealthConfig.enabled) {
68
+ if (stealthConfig.extraHTTPHeaders) {
69
+ await this.context.setExtraHTTPHeaders(stealthConfig.extraHTTPHeaders);
70
+ }
71
+ if (stealthConfig.userAgent) {
72
+ console.warn('[HybridBrowserSession] Cannot apply userAgent to existing context. Consider creating a new context if userAgent customization is required.');
73
+ }
74
+ }
75
+ } else {
76
+ const contextOptions: any = {
77
+ viewport: browserConfig.viewport
78
+ };
79
+
80
+ // Apply stealth headers and UA if configured
81
+ if (stealthConfig.enabled) {
82
+ if (stealthConfig.extraHTTPHeaders) {
83
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
84
+ }
85
+ if (stealthConfig.userAgent) {
86
+ contextOptions.userAgent = stealthConfig.userAgent;
87
+ }
88
+ }
89
+
90
+ this.context = await this.browser.newContext(contextOptions);
91
+ this.contextOwnedByUs = true;
92
+ this.browser = this.context.browser();
93
+ }
94
+
95
+ const pages = this.context.pages();
96
+ console.log(`[CDP] cdpKeepCurrentPage: ${browserConfig.cdpKeepCurrentPage}, pages count: ${pages.length}`);
97
+ if (browserConfig.cdpKeepCurrentPage) {
98
+ // Use existing page without creating new ones
99
+ if (pages.length > 0) {
100
+ // Find first non-closed page
101
+ let validPage: Page | null = null;
102
+ for (const page of pages) {
103
+ if (!page.isClosed()) {
104
+ validPage = page;
105
+ break;
106
+ }
107
+ }
108
+
109
+ if (validPage) {
110
+ const tabId = this.generateTabId();
111
+ this.registerNewPage(tabId, validPage);
112
+ this.currentTabId = tabId;
113
+ console.log(`[CDP] cdpKeepCurrentPage mode: using existing page as initial tab: ${tabId}, URL: ${validPage.url()}`);
114
+ } else {
115
+ throw new Error('No active pages available in CDP mode with cdpKeepCurrentPage=true (all pages are closed)');
116
+ }
117
+ } else {
118
+ throw new Error('No pages available in CDP mode with cdpKeepCurrentPage=true');
119
+ }
120
+ } else {
121
+ // Look for blank pages or create new ones
122
+ if (pages.length > 0) {
123
+ // Find one available blank page
124
+ let availablePageFound = false;
125
+ for (const page of pages) {
126
+ const pageUrl = page.url();
127
+ if (this.isBlankPageUrl(pageUrl)) {
128
+ const tabId = this.generateTabId();
129
+ this.registerNewPage(tabId, page);
130
+ this.currentTabId = tabId;
131
+ availablePageFound = true;
132
+ console.log(`[CDP] Registered blank page as initial tab: ${tabId}, URL: ${pageUrl}`);
133
+ break;
134
+ }
135
+ }
136
+
137
+ if (!availablePageFound) {
138
+ console.log('[CDP] No blank pages found, creating new page');
139
+ const newPage = await this.context.newPage();
140
+ const tabId = this.generateTabId();
141
+ this.registerNewPage(tabId, newPage);
142
+ this.currentTabId = tabId;
143
+ }
144
+ } else {
145
+ console.log('[CDP] No existing pages, creating initial page');
146
+ const newPage = await this.context.newPage();
147
+ const tabId = this.generateTabId();
148
+ this.registerNewPage(tabId, newPage);
149
+ this.currentTabId = tabId;
150
+ }
151
+ }
152
+ } else {
153
+ // Original launch logic
154
+ const launchOptions: any = {
155
+ headless: browserConfig.headless,
156
+ };
157
+
158
+ if (stealthConfig.enabled) {
159
+ launchOptions.args = stealthConfig.args || [];
160
+
161
+ // Apply stealth user agent/headers if configured
162
+ if (stealthConfig.userAgent) {
163
+ launchOptions.userAgent = stealthConfig.userAgent;
164
+ }
165
+ if (stealthConfig.extraHTTPHeaders) {
166
+ launchOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
167
+ }
168
+ }
169
+
170
+ if (browserConfig.userDataDir) {
171
+ // Ensure viewport is honored in persistent context
172
+ launchOptions.viewport = browserConfig.viewport;
173
+ this.context = await chromium.launchPersistentContext(
174
+ browserConfig.userDataDir,
175
+ launchOptions
176
+ );
177
+ this.contextOwnedByUs = true;
178
+ this.browser = this.context.browser();
179
+ const pages = this.context.pages();
180
+ if (pages.length > 0) {
181
+ const initialTabId = this.generateTabId();
182
+ this.registerNewPage(initialTabId, pages[0]);
183
+ this.currentTabId = initialTabId;
184
+ }
185
+ } else {
186
+ this.browser = await chromium.launch(launchOptions);
187
+ const contextOptions: any = {
188
+ viewport: browserConfig.viewport
189
+ };
190
+
191
+ // Apply stealth headers and UA if configured
192
+ if (stealthConfig.enabled) {
193
+ if (stealthConfig.extraHTTPHeaders) {
194
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
195
+ }
196
+ if (stealthConfig.userAgent) {
197
+ contextOptions.userAgent = stealthConfig.userAgent;
198
+ }
199
+ }
200
+
201
+ this.context = await this.browser.newContext(contextOptions);
202
+ this.contextOwnedByUs = true;
203
+
204
+ const initialPage = await this.context.newPage();
205
+ const initialTabId = this.generateTabId();
206
+ this.registerNewPage(initialTabId, initialPage);
207
+ this.currentTabId = initialTabId;
208
+ }
209
+ }
210
+
211
+ // Set timeouts
212
+ for (const page of this.pages.values()) {
213
+ page.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
214
+ page.setDefaultTimeout(browserConfig.navigationTimeout);
215
+ }
216
+ }
217
+
218
+ private generateTabId(): string {
219
+ const browserConfig = this.configLoader.getBrowserConfig();
220
+ return `${browserConfig.tabIdPrefix}${String(++this.tabCounter).padStart(browserConfig.tabCounterPadding, '0')}`;
221
+ }
222
+
223
+ private isBlankPageUrl(url: string): boolean {
224
+ // Unified blank page detection logic used across the codebase
225
+ const browserConfig = this.configLoader.getBrowserConfig();
226
+ return (
227
+ // Standard about:blank variations (prefix match for query params)
228
+ url === 'about:blank' ||
229
+ url.startsWith('about:blank?') ||
230
+ // Configured blank page URLs (exact match for compatibility)
231
+ browserConfig.blankPageUrls.includes(url) ||
232
+ // Empty URL
233
+ url === '' ||
234
+ // Data URLs (often used for blank pages)
235
+ url.startsWith(browserConfig.dataUrlPrefix || 'data:')
236
+ );
237
+ }
238
+
239
+ async getCurrentPage(): Promise<Page> {
240
+ if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
241
+ const browserConfig = this.configLoader.getBrowserConfig();
242
+
243
+ // In CDP keep-current-page mode, find existing page
244
+ if (browserConfig.cdpKeepCurrentPage && browserConfig.cdpUrl && this.context) {
245
+ const allPages = this.context.pages();
246
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Looking for existing page, found ${allPages.length} pages`);
247
+
248
+ if (allPages.length > 0) {
249
+ // Try to find a page that's not already tracked
250
+ for (const page of allPages) {
251
+ const isTracked = Array.from(this.pages.values()).includes(page);
252
+ if (!isTracked && !page.isClosed()) {
253
+ const tabId = this.generateTabId();
254
+ this.registerNewPage(tabId, page);
255
+ this.currentTabId = tabId;
256
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Found and registered untracked page: ${tabId}`);
257
+ return page;
258
+ }
259
+ }
260
+
261
+ // If all pages are tracked, use the first available one
262
+ const firstPage = allPages[0];
263
+ if (!firstPage.isClosed()) {
264
+ // Find the tab ID for this page
265
+ for (const [tabId, page] of this.pages.entries()) {
266
+ if (page === firstPage) {
267
+ this.currentTabId = tabId;
268
+ console.log(`[getCurrentPage] cdpKeepCurrentPage mode: Using existing tracked page: ${tabId}`);
269
+ return page;
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ throw new Error('No active page available in CDP mode with cdpKeepCurrentPage=true');
276
+ }
277
+
278
+ // Normal mode: create new page
279
+ if (this.context) {
280
+ console.log('[getCurrentPage] No active page, creating new page');
281
+ const newPage = await this.context.newPage();
282
+ const tabId = this.generateTabId();
283
+ this.registerNewPage(tabId, newPage);
284
+ this.currentTabId = tabId;
285
+
286
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
287
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
288
+
289
+ return newPage;
290
+ }
291
+ throw new Error('No browser context available');
292
+ }
293
+ return this.pages.get(this.currentTabId)!;
294
+ }
295
+
296
+ async getCurrentLogs(): Promise<ConsoleMessage[]> {
297
+ if (!this.currentTabId || !this.consoleLogs.has(this.currentTabId)) {
298
+ return [];
299
+ }
300
+ return this.consoleLogs.get(this.currentTabId) || [];
301
+ }
302
+
303
+ /**
304
+ * Get current scroll position from the page
305
+ */
306
+ private async getCurrentScrollPosition(): Promise<{ x: number; y: number }> {
307
+ try {
308
+ const page = await this.getCurrentPage();
309
+ const scrollInfo = await page.evaluate(() => {
310
+ return {
311
+ x: window.pageXOffset || document.documentElement.scrollLeft || 0,
312
+ y: window.pageYOffset || document.documentElement.scrollTop || 0,
313
+ devicePixelRatio: window.devicePixelRatio || 1,
314
+ zoomLevel: window.outerWidth / window.innerWidth || 1
315
+ };
316
+ }) as { x: number; y: number; devicePixelRatio: number; zoomLevel: number };
317
+
318
+ // Store scroll position
319
+ this.scrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
320
+ return this.scrollPosition;
321
+ } catch (error) {
322
+ console.warn('Failed to get scroll position:', error);
323
+ return this.scrollPosition;
324
+ }
325
+ }
326
+
327
+ async getSnapshotForAI(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
328
+ // Always use native Playwright mapping - this is the correct approach
329
+ return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
330
+ }
331
+
332
+ private parseElementFromSnapshot(snapshotText: string, ref: string): { role?: string; text?: string } {
333
+ const lines = snapshotText.split('\n');
334
+ for (const line of lines) {
335
+ if (line.includes(`[ref=${ref}]`)) {
336
+ const typeMatch = line.match(/^\s*-?\s*([\w-]+)/);
337
+ const role = typeMatch ? typeMatch[1] : undefined;
338
+ const textMatch = line.match(/"([^"]*)"/);
339
+ const text = textMatch ? textMatch[1] : undefined;
340
+ return { role, text };
341
+ }
342
+ }
343
+ return {};
344
+ }
345
+
346
+ private buildSnapshotIndex(snapshotText: string): Map<string, { role?: string; text?: string }> {
347
+ const index = new Map<string, { role?: string; text?: string }>();
348
+ const refRe = /\[ref=([^\]]+)\]/i;
349
+ for (const line of snapshotText.split('\n')) {
350
+ const m = line.match(refRe);
351
+ if (!m) continue;
352
+ const ref = m[1];
353
+ const roleMatch = line.match(/^\s*-?\s*([a-z0-9_-]+)/i);
354
+ const role = roleMatch ? roleMatch[1].toLowerCase() : undefined;
355
+ const textMatch = line.match(/"([^"]*)"/);
356
+ const text = textMatch ? textMatch[1] : undefined;
357
+ index.set(ref, { role, text });
358
+ }
359
+ return index;
360
+ }
361
+
362
+ private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
363
+ const startTime = Date.now();
364
+ const page = await this.getCurrentPage();
365
+
366
+ try {
367
+ // Use _snapshotForAI() to properly update _lastAriaSnapshot
368
+ const snapshotStart = Date.now();
369
+ const snapshotText = await (page as any)._snapshotForAI();
370
+ const snapshotTime = Date.now() - snapshotStart;
371
+
372
+ // Extract refs from the snapshot text
373
+ const refPattern = /\[ref=([^\]]+)\]/g;
374
+ const refs: string[] = [];
375
+ let match;
376
+ while ((match = refPattern.exec(snapshotText)) !== null) {
377
+ refs.push(match[1]);
378
+ }
379
+
380
+ // Get element information including coordinates if needed
381
+ const mappingStart = Date.now();
382
+ const playwrightMapping: Record<string, any> = {};
383
+
384
+ // Parse element info in a single pass
385
+ const snapshotIndex = this.buildSnapshotIndex(snapshotText);
386
+ for (const ref of refs) {
387
+ const elementInfo = snapshotIndex.get(ref) || {};
388
+ playwrightMapping[ref] = {
389
+ ref,
390
+ role: elementInfo.role || 'unknown',
391
+ text: elementInfo.text || '',
392
+ };
393
+ }
394
+
395
+ if (includeCoordinates) {
396
+ // Get coordinates for each ref using aria-ref selector
397
+ for (const ref of refs) {
398
+ try {
399
+ const selector = `aria-ref=${ref}`;
400
+ const element = await page.locator(selector).first();
401
+ const exists = await element.count() > 0;
402
+
403
+ if (exists) {
404
+ // Get bounding box
405
+ const boundingBox = await element.boundingBox();
406
+
407
+ if (boundingBox) {
408
+ // Add coordinates to existing element info
409
+ playwrightMapping[ref] = {
410
+ ...playwrightMapping[ref],
411
+ coordinates: {
412
+ x: Math.round(boundingBox.x),
413
+ y: Math.round(boundingBox.y),
414
+ width: Math.round(boundingBox.width),
415
+ height: Math.round(boundingBox.height)
416
+ }
417
+ };
418
+ }
419
+ }
420
+ } catch (error) {
421
+ // Failed to get coordinates for element
422
+ }
423
+ }
424
+ }
425
+
426
+ const mappingTime = Date.now() - mappingStart;
427
+
428
+ // Apply viewport filtering if requested
429
+ let finalElements = playwrightMapping;
430
+ let finalSnapshot = snapshotText;
431
+
432
+ if (viewportLimit) {
433
+ const viewport = page.viewportSize() || { width: 1280, height: 720 };
434
+ const scrollPos = await this.getCurrentScrollPosition();
435
+ finalElements = this.filterElementsInViewport(playwrightMapping, viewport, scrollPos);
436
+ finalSnapshot = this.rebuildSnapshotText(snapshotText, finalElements);
437
+ }
438
+
439
+ const totalTime = Date.now() - startTime;
440
+
441
+ return {
442
+ snapshot: finalSnapshot,
443
+ elements: finalElements,
444
+ metadata: {
445
+ elementCount: Object.keys(finalElements).length,
446
+ url: page.url(),
447
+ timestamp: new Date().toISOString(),
448
+ },
449
+ timing: {
450
+ total_time_ms: totalTime,
451
+ snapshot_time_ms: snapshotTime,
452
+ coordinate_enrichment_time_ms: 0, // Integrated into mapping
453
+ aria_mapping_time_ms: mappingTime,
454
+ },
455
+ };
456
+ } catch (error) {
457
+ console.error('Failed to get AI snapshot with native mapping:', error);
458
+ const totalTime = Date.now() - startTime;
459
+
460
+ return {
461
+ snapshot: 'Error: Unable to capture page snapshot',
462
+ elements: {},
463
+ metadata: {
464
+ elementCount: 0,
465
+ url: page.url(),
466
+ timestamp: new Date().toISOString(),
467
+ },
468
+ timing: {
469
+ total_time_ms: totalTime,
470
+ snapshot_time_ms: 0,
471
+ coordinate_enrichment_time_ms: 0,
472
+ aria_mapping_time_ms: 0,
473
+ },
474
+ };
475
+ }
476
+ }
477
+
478
+
479
+
480
+ /**
481
+ * Enhanced click implementation with new tab detection and scroll fix
482
+ */
483
+ private async performClick(page: Page, ref: string): Promise<{ success: boolean; method?: string; error?: string; newTabId?: string; diffSnapshot?: string }> {
484
+
485
+ try {
486
+ // Ensure we have the latest snapshot and mapping
487
+ await (page as any)._snapshotForAI();
488
+
489
+ // Use Playwright's aria-ref selector engine
490
+ const selector = `aria-ref=${ref}`;
491
+
492
+ // Check if element exists
493
+ const element = await page.locator(selector).first();
494
+ const exists = await element.count() > 0;
495
+
496
+ if (!exists) {
497
+ return { success: false, error: `Element with ref ${ref} not found` };
498
+ }
499
+
500
+ const role = await element.getAttribute('role');
501
+ const elementTagName = await element.evaluate(el => el.tagName.toLowerCase());
502
+ const isCombobox = role === 'combobox' || elementTagName === 'combobox';
503
+ const isTextbox = role === 'textbox' || elementTagName === 'input' || elementTagName === 'textarea';
504
+ const shouldCheckDiff = isCombobox || isTextbox;
505
+
506
+ let snapshotBefore: string | null = null;
507
+ if (shouldCheckDiff) {
508
+ snapshotBefore = await (page as any)._snapshotForAI();
509
+ }
510
+
511
+ // Check element properties
512
+ const browserConfig = this.configLoader.getBrowserConfig();
513
+ const target = await element.getAttribute(browserConfig.targetAttribute);
514
+ const href = await element.getAttribute(browserConfig.hrefAttribute);
515
+ const onclick = await element.getAttribute(browserConfig.onclickAttribute);
516
+ const tagName = await element.evaluate(el => el.tagName.toLowerCase());
517
+
518
+ // Check if element naturally opens new tab
519
+ const naturallyOpensNewTab = (
520
+ target === browserConfig.blankTarget ||
521
+ (onclick && onclick.includes(browserConfig.windowOpenString)) ||
522
+ (tagName === 'a' && href && (href.includes(`javascript:${browserConfig.windowOpenString}`) || href.includes(browserConfig.blankTarget)))
523
+ );
524
+
525
+ // Open ALL links in new tabs
526
+ // Check if this is a navigable link
527
+ const isNavigableLink = tagName === 'a' && href &&
528
+ !href.startsWith(browserConfig.anchorOnly) && // Not an anchor link
529
+ !href.startsWith(browserConfig.javascriptVoidPrefix) && // Not a void javascript
530
+ href !== browserConfig.javascriptVoidEmpty && // Not empty javascript
531
+ href !== browserConfig.anchorOnly; // Not just #
532
+
533
+ const shouldOpenNewTab = naturallyOpensNewTab || isNavigableLink;
534
+
535
+
536
+ if (shouldOpenNewTab) {
537
+ // Handle new tab opening
538
+ // If it's a link that doesn't naturally open in new tab, force it
539
+ if (isNavigableLink && !naturallyOpensNewTab) {
540
+ await element.evaluate((el, blankTarget) => {
541
+ if (el.tagName.toLowerCase() === 'a') {
542
+ el.setAttribute('target', blankTarget);
543
+ }
544
+ }, browserConfig.blankTarget);
545
+ }
546
+
547
+ // Set up popup listener before clicking
548
+ const popupPromise = page.context().waitForEvent('page', { timeout: browserConfig.popupTimeout });
549
+
550
+ // Click with force to avoid scrolling issues
551
+ await element.click({ force: browserConfig.forceClick });
552
+
553
+ try {
554
+ // Wait for new page to open
555
+ const newPage = await popupPromise;
556
+
557
+ // Generate tab ID for the new page
558
+ const newTabId = this.generateTabId();
559
+ this.registerNewPage(newTabId, newPage);
560
+
561
+ // Set up page properties
562
+ const browserConfig = this.configLoader.getBrowserConfig();
563
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
564
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
565
+
566
+
567
+ // Automatically switch to the new tab
568
+ this.currentTabId = newTabId;
569
+ await newPage.bringToFront();
570
+
571
+ // Wait for new page to be ready
572
+ await newPage.waitForLoadState('domcontentloaded', { timeout: browserConfig.popupTimeout }).catch(() => {});
573
+
574
+ return { success: true, method: 'playwright-aria-ref-newtab', newTabId };
575
+ } catch (popupError) {
576
+ return { success: true, method: 'playwright-aria-ref' };
577
+ }
578
+ } else {
579
+ // Add options to prevent scrolling issues
580
+ const browserConfig = this.configLoader.getBrowserConfig();
581
+ await element.click({ force: browserConfig.forceClick });
582
+
583
+ if (shouldCheckDiff && snapshotBefore) {
584
+ await page.waitForTimeout(300);
585
+ const snapshotAfter = await (page as any)._snapshotForAI();
586
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
587
+
588
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
589
+ return { success: true, method: 'playwright-aria-ref', diffSnapshot };
590
+ }
591
+ }
592
+
593
+ return { success: true, method: 'playwright-aria-ref' };
594
+ }
595
+
596
+ } catch (error) {
597
+ console.error('[performClick] Exception during click for ref: %s', ref, error);
598
+ return { success: false, error: `Click failed with exception: ${error}` };
599
+ }
600
+ }
601
+
602
+ /**
603
+ * Extract diff between two snapshots, returning only new elements of specified types
604
+ */
605
+ private getSnapshotDiff(snapshotBefore: string, snapshotAfter: string, targetRoles: string[]): string {
606
+ const refsBefore = new Set<string>();
607
+ const refPattern = /\[ref=([^\]]+)\]/g;
608
+ let match;
609
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
610
+ refsBefore.add(match[1]);
611
+ }
612
+
613
+ const lines = snapshotAfter.split('\n');
614
+ const newElements: string[] = [];
615
+
616
+ for (const line of lines) {
617
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
618
+ if (refMatch && !refsBefore.has(refMatch[1])) {
619
+ const hasTargetRole = targetRoles.some(role => {
620
+ const rolePattern = new RegExp(`\\b${role}\\b`, 'i');
621
+ return rolePattern.test(line);
622
+ });
623
+
624
+ if (hasTargetRole) {
625
+ newElements.push(line.trim());
626
+ }
627
+ }
628
+ }
629
+
630
+ if (newElements.length > 0) {
631
+ return newElements.join('\n');
632
+ } else {
633
+ return '';
634
+ }
635
+ }
636
+
637
+ /**
638
+ * Simplified type implementation using Playwright's aria-ref selector
639
+ * Supports both single and multiple input operations
640
+ */
641
+ private async performType(page: Page, ref: string | undefined, text: string | undefined, inputs?: Array<{ ref: string; text: string }>): Promise<{ success: boolean; error?: string; details?: Record<string, any>; diffSnapshot?: string }> {
642
+ try {
643
+ // Ensure we have the latest snapshot
644
+ await (page as any)._snapshotForAI();
645
+
646
+ // Handle multiple inputs if provided
647
+ if (inputs && inputs.length > 0) {
648
+ const results: Record<string, { success: boolean; error?: string }> = {};
649
+
650
+ for (const input of inputs) {
651
+ const singleResult = await this.performType(page, input.ref, input.text);
652
+ results[input.ref] = {
653
+ success: singleResult.success,
654
+ error: singleResult.error
655
+ };
656
+ }
657
+
658
+ // Check if all inputs were successful
659
+ const allSuccess = Object.values(results).every(r => r.success);
660
+ const errors = Object.entries(results)
661
+ .filter(([_, r]) => !r.success)
662
+ .map(([ref, r]) => `${ref}: ${r.error}`)
663
+ .join('; ');
664
+
665
+ return {
666
+ success: allSuccess,
667
+ error: allSuccess ? undefined : `Some inputs failed: ${errors}`,
668
+ details: results
669
+ };
670
+ }
671
+
672
+ // Handle single input (backward compatibility)
673
+ if (ref && text !== undefined) {
674
+ const selector = `aria-ref=${ref}`;
675
+ const element = await page.locator(selector).first();
676
+
677
+ const exists = await element.count() > 0;
678
+ if (!exists) {
679
+ return { success: false, error: `Element with ref ${ref} not found` };
680
+ }
681
+
682
+ // Get element attributes to check if it's readonly or a special input type
683
+ let originalPlaceholder: string | null = null;
684
+ let isReadonly = false;
685
+ let elementType: string | null = null;
686
+ let isCombobox = false;
687
+ let isTextbox = false;
688
+ let shouldCheckDiff = false;
689
+
690
+ try {
691
+ // Get element info in one evaluation to minimize interactions
692
+ const elementInfo = await element.evaluate((el: any) => {
693
+ return {
694
+ placeholder: el.placeholder || null,
695
+ readonly: el.readOnly || el.hasAttribute('readonly'),
696
+ type: el.type || null,
697
+ tagName: el.tagName.toLowerCase(),
698
+ disabled: el.disabled || false,
699
+ role: el.getAttribute('role'),
700
+ ariaHaspopup: el.getAttribute('aria-haspopup')
701
+ };
702
+ });
703
+
704
+ originalPlaceholder = elementInfo.placeholder;
705
+ isReadonly = elementInfo.readonly;
706
+ elementType = elementInfo.type;
707
+ isCombobox = elementInfo.role === 'combobox' ||
708
+ elementInfo.tagName === 'combobox' ||
709
+ elementInfo.ariaHaspopup === 'listbox';
710
+ isTextbox = elementInfo.role === 'textbox' ||
711
+ elementInfo.tagName === 'input' ||
712
+ elementInfo.tagName === 'textarea';
713
+ shouldCheckDiff = isCombobox || isTextbox;
714
+
715
+ } catch (e) {
716
+ console.log(`Warning: Failed to get element attributes: ${e}`);
717
+ }
718
+
719
+ // Get snapshot before action to record existing elements
720
+ const snapshotBefore = await (page as any)._snapshotForAI();
721
+ const existingRefs = new Set<string>();
722
+ const refPattern = /\[ref=([^\]]+)\]/g;
723
+ let match;
724
+ while ((match = refPattern.exec(snapshotBefore)) !== null) {
725
+ existingRefs.add(match[1]);
726
+ }
727
+ console.log(`Found ${existingRefs.size} total elements before action`);
728
+
729
+ // If element is readonly or a date/time input, skip fill attempt and go directly to click
730
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
731
+ console.log(`Element ref=${ref} is readonly or date/time input, skipping direct fill attempt`);
732
+
733
+ // Click with force option to avoid scrolling
734
+ try {
735
+ await element.click({ force: true });
736
+ console.log(`Clicked readonly/special element ref=${ref} to trigger dynamic content`);
737
+ // Wait for potential dynamic content to appear
738
+ await page.waitForTimeout(500);
739
+ } catch (clickError) {
740
+ console.log(`Warning: Failed to click element: ${clickError}`);
741
+ }
742
+ } else {
743
+ // For normal inputs, click first then try to fill
744
+ try {
745
+ await element.click({ force: true });
746
+ console.log(`Clicked element ref=${ref} before typing`);
747
+ } catch (clickError) {
748
+ console.log(`Warning: Failed to click element before typing: ${clickError}`);
749
+ }
750
+
751
+ // Try to fill the element directly
752
+ try {
753
+ // Use force option to avoid scrolling during fill
754
+ await element.fill(text, { timeout: 3000, force: true });
755
+
756
+ // If this element might show dropdown, wait and check for new elements
757
+ if (shouldCheckDiff) {
758
+ await page.waitForTimeout(300);
759
+ const snapshotAfter = await (page as any)._snapshotForAI();
760
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotAfter, ['option', 'menuitem']);
761
+
762
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
763
+ return { success: true, diffSnapshot };
764
+ }
765
+ }
766
+
767
+ return { success: true };
768
+ } catch (fillError: any) {
769
+ // Log the error for debugging
770
+ console.log(`Fill error for ref ${ref}: ${fillError.message}`);
771
+
772
+ // Check for various error messages that indicate the element is not fillable
773
+ const errorMessage = fillError.message.toLowerCase();
774
+ if (errorMessage.includes('not an <input>') ||
775
+ errorMessage.includes('not have a role allowing') ||
776
+ errorMessage.includes('element is not') ||
777
+ errorMessage.includes('cannot type') ||
778
+ errorMessage.includes('readonly') ||
779
+ errorMessage.includes('not editable') ||
780
+ errorMessage.includes('timeout') ||
781
+ errorMessage.includes('timeouterror')) {
782
+
783
+ // Click the element again to trigger dynamic content (like date pickers)
784
+ try {
785
+ await element.click({ force: true });
786
+ console.log(`Clicked element ref=${ref} again to trigger dynamic content`);
787
+ // Wait for potential dynamic content to appear
788
+ await page.waitForTimeout(500);
789
+ } catch (clickError) {
790
+ console.log(`Warning: Failed to click element to trigger dynamic content: ${clickError}`);
791
+ }
792
+
793
+ // Step 1: Try to find input elements within the clicked element
794
+ const inputSelector = `input:visible, textarea:visible, [contenteditable="true"]:visible, [role="textbox"]:visible`;
795
+ const inputElement = await element.locator(inputSelector).first();
796
+
797
+ const inputExists = await inputElement.count() > 0;
798
+ if (inputExists) {
799
+ console.log(`Found input element within ref ${ref}, attempting to fill`);
800
+ try {
801
+ await inputElement.fill(text, { force: true });
802
+
803
+ // If element might show dropdown, check for new elements
804
+ if (shouldCheckDiff) {
805
+ await page.waitForTimeout(300);
806
+ const snapshotFinal = await (page as any)._snapshotForAI();
807
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
808
+
809
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
810
+ return { success: true, diffSnapshot };
811
+ }
812
+ }
813
+
814
+ return { success: true };
815
+ } catch (innerError) {
816
+ console.log(`Failed to fill child element: ${innerError}`);
817
+ }
818
+ }
819
+
820
+ // Step 2: Look for new elements that appeared after the action
821
+ console.log(`Looking for new elements that appeared after action...`);
822
+
823
+ // Get snapshot after action to find new elements
824
+ const snapshotAfter = await (page as any)._snapshotForAI();
825
+ const newRefs = new Set<string>();
826
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
827
+ let afterMatch;
828
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
829
+ const refId = afterMatch[1];
830
+ if (!existingRefs.has(refId)) {
831
+ newRefs.add(refId);
832
+ }
833
+ }
834
+
835
+ console.log(`Found ${newRefs.size} new elements after action`);
836
+
837
+ // If we have a placeholder, try to find new input elements with that placeholder
838
+ if (originalPlaceholder && newRefs.size > 0) {
839
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
840
+
841
+ // Try each new ref to see if it's an input with our placeholder
842
+ for (const newRef of newRefs) {
843
+ try {
844
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
845
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
846
+
847
+ if (tagName === 'input' || tagName === 'textarea') {
848
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
849
+ if (placeholder === originalPlaceholder) {
850
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
851
+
852
+ // Check if it's visible and fillable
853
+ const elementInfo = await newElement.evaluate((el: any) => {
854
+ return {
855
+ tagName: el.tagName,
856
+ id: el.id,
857
+ className: el.className,
858
+ placeholder: el.placeholder,
859
+ isVisible: el.offsetParent !== null,
860
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
861
+ };
862
+ });
863
+ console.log(`New element details:`, JSON.stringify(elementInfo));
864
+
865
+ // Try to fill it with force to avoid scrolling
866
+ await newElement.fill(text, { force: true });
867
+
868
+ // If element might show dropdown, check for new elements
869
+ if (shouldCheckDiff) {
870
+ await page.waitForTimeout(300);
871
+ const snapshotFinal = await (page as any)._snapshotForAI();
872
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
873
+
874
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
875
+ return { success: true, diffSnapshot };
876
+ }
877
+ }
878
+
879
+ return { success: true };
880
+ }
881
+ }
882
+ } catch (e) {
883
+ // Ignore errors for non-input elements
884
+ }
885
+ }
886
+ }
887
+
888
+ console.log(`No suitable input element found for ref ${ref}`);
889
+ }
890
+ // Re-throw the original error if we couldn't find an input element
891
+ throw fillError;
892
+ }
893
+ }
894
+
895
+ // If we skipped the fill attempt (readonly elements), look for new elements directly
896
+ if (isReadonly || ['date', 'datetime-local', 'time'].includes(elementType || '')) {
897
+ // Look for new elements that appeared after clicking
898
+ console.log(`Looking for new elements that appeared after clicking readonly element...`);
899
+
900
+ // Get snapshot after action to find new elements
901
+ const snapshotAfter = await (page as any)._snapshotForAI();
902
+ const newRefs = new Set<string>();
903
+ const afterRefPattern = /\[ref=([^\]]+)\]/g;
904
+ let afterMatch;
905
+ while ((afterMatch = afterRefPattern.exec(snapshotAfter)) !== null) {
906
+ const refId = afterMatch[1];
907
+ if (!existingRefs.has(refId)) {
908
+ newRefs.add(refId);
909
+ }
910
+ }
911
+
912
+ console.log(`Found ${newRefs.size} new elements after clicking readonly element`);
913
+
914
+ // If we have a placeholder, try to find new input elements with that placeholder
915
+ if (originalPlaceholder && newRefs.size > 0) {
916
+ console.log(`Looking for new input elements with placeholder: ${originalPlaceholder}`);
917
+
918
+ // Try each new ref to see if it's an input with our placeholder
919
+ for (const newRef of newRefs) {
920
+ try {
921
+ const newElement = await page.locator(`aria-ref=${newRef}`).first();
922
+ const tagName = await newElement.evaluate(el => el.tagName.toLowerCase()).catch(() => null);
923
+
924
+ if (tagName === 'input' || tagName === 'textarea') {
925
+ const placeholder = await newElement.getAttribute('placeholder').catch(() => null);
926
+ if (placeholder === originalPlaceholder) {
927
+ console.log(`Found new input element with matching placeholder: ref=${newRef}`);
928
+
929
+ // Check if it's visible and fillable
930
+ const elementInfo = await newElement.evaluate((el: any) => {
931
+ return {
932
+ tagName: el.tagName,
933
+ id: el.id,
934
+ className: el.className,
935
+ placeholder: el.placeholder,
936
+ isVisible: el.offsetParent !== null,
937
+ isReadonly: el.readOnly || el.getAttribute('readonly') !== null
938
+ };
939
+ });
940
+ console.log(`New element details:`, JSON.stringify(elementInfo));
941
+
942
+ // Try to fill it with force to avoid scrolling
943
+ await newElement.fill(text, { force: true });
944
+
945
+ // If element might show dropdown, check for new elements
946
+ if (shouldCheckDiff) {
947
+ await page.waitForTimeout(300);
948
+ const snapshotFinal = await (page as any)._snapshotForAI();
949
+ const diffSnapshot = this.getSnapshotDiff(snapshotBefore, snapshotFinal, ['option', 'menuitem']);
950
+
951
+ if (diffSnapshot && diffSnapshot.trim() !== '') {
952
+ return { success: true, diffSnapshot };
953
+ }
954
+ }
955
+
956
+ return { success: true };
957
+ }
958
+ }
959
+ } catch (e) {
960
+ // Ignore errors for non-input elements
961
+ }
962
+ }
963
+ }
964
+
965
+ console.log(`No suitable input element found for readonly ref ${ref}`);
966
+ return { success: false, error: `Element ref=${ref} is readonly and no suitable input was found` };
967
+ }
968
+ }
969
+
970
+ return { success: false, error: 'No valid input provided' };
971
+ } catch (error) {
972
+ return { success: false, error: `Type failed: ${error}` };
973
+ }
974
+ }
975
+
976
+ /**
977
+ * Simplified select implementation using Playwright's aria-ref selector
978
+ */
979
+ private async performSelect(page: Page, ref: string, value: string): Promise<{ success: boolean; error?: string }> {
980
+ try {
981
+ // Ensure we have the latest snapshot
982
+ await (page as any)._snapshotForAI();
983
+
984
+ // Use Playwright's aria-ref selector
985
+ const selector = `aria-ref=${ref}`;
986
+ const element = await page.locator(selector).first();
987
+
988
+ const exists = await element.count() > 0;
989
+ if (!exists) {
990
+ return { success: false, error: `Element with ref ${ref} not found` };
991
+ }
992
+
993
+ // Select value using Playwright's built-in selectOption method
994
+ await element.selectOption(value);
995
+
996
+ return { success: true };
997
+ } catch (error) {
998
+ return { success: false, error: `Select failed: ${error}` };
999
+ }
1000
+ }
1001
+
1002
+ /**
1003
+ * Simplified mouse control implementation
1004
+ */
1005
+ private async performMouseControl(page: Page, control: string, x: number, y: number): Promise<{ success: boolean; error?: string }> {
1006
+ try {
1007
+ const viewport = page.viewportSize();
1008
+ if (!viewport) {
1009
+ return { success: false, error: 'Viewport size not available from page.' };
1010
+ }
1011
+ if (x < 0 || y < 0 || x > viewport.width || y > viewport.height) {
1012
+ return { success: false, error: `Invalid coordinates, outside viewport bounds: (${x}, ${y})` };
1013
+ }
1014
+ switch (control) {
1015
+ case 'click': {
1016
+ await page.mouse.click(x, y);
1017
+ break;
1018
+ }
1019
+ case 'right_click': {
1020
+ await page.mouse.click(x, y, { button: 'right' });
1021
+ break;
1022
+ }
1023
+ case 'dblclick': {
1024
+ await page.mouse.dblclick(x, y);
1025
+ break;
1026
+ }
1027
+ default:
1028
+ return { success: false, error: `Invalid control action: ${control}` };
1029
+ }
1030
+
1031
+ return { success: true };
1032
+ } catch (error) {
1033
+ return { success: false, error: `Mouse action failed: ${error}` };
1034
+ }
1035
+ }
1036
+
1037
+ /**
1038
+ * Enhanced mouse drag and drop implementation using ref IDs
1039
+ */
1040
+ private async performMouseDrag(page: Page, fromRef: string, toRef: string): Promise<{ success: boolean; error?: string }> {
1041
+ try {
1042
+ // Ensure we have the latest snapshot
1043
+ await (page as any)._snapshotForAI();
1044
+
1045
+ // Get elements using Playwright's aria-ref selector
1046
+ const fromSelector = `aria-ref=${fromRef}`;
1047
+ const toSelector = `aria-ref=${toRef}`;
1048
+
1049
+ const fromElement = await page.locator(fromSelector).first();
1050
+ const toElement = await page.locator(toSelector).first();
1051
+
1052
+ // Check if elements exist
1053
+ const fromExists = await fromElement.count() > 0;
1054
+ const toExists = await toElement.count() > 0;
1055
+
1056
+ if (!fromExists) {
1057
+ return { success: false, error: `Source element with ref ${fromRef} not found` };
1058
+ }
1059
+
1060
+ if (!toExists) {
1061
+ return { success: false, error: `Target element with ref ${toRef} not found` };
1062
+ }
1063
+
1064
+ // Get the center coordinates of both elements
1065
+ const fromBox = await fromElement.boundingBox();
1066
+ const toBox = await toElement.boundingBox();
1067
+
1068
+ if (!fromBox) {
1069
+ return { success: false, error: `Could not get bounding box for source element with ref ${fromRef}` };
1070
+ }
1071
+
1072
+ if (!toBox) {
1073
+ return { success: false, error: `Could not get bounding box for target element with ref ${toRef}` };
1074
+ }
1075
+
1076
+ const fromX = fromBox.x + fromBox.width / 2;
1077
+ const fromY = fromBox.y + fromBox.height / 2;
1078
+ const toX = toBox.x + toBox.width / 2;
1079
+ const toY = toBox.y + toBox.height / 2;
1080
+
1081
+ // Perform the drag operation
1082
+ await page.mouse.move(fromX, fromY);
1083
+ await page.mouse.down();
1084
+ // Destination coordinates
1085
+ await page.mouse.move(toX, toY);
1086
+ await page.mouse.up();
1087
+
1088
+ return { success: true };
1089
+ } catch (error) {
1090
+ return { success: false, error: `Mouse drag action failed: ${error}` };
1091
+ }
1092
+ }
1093
+
1094
+ async executeAction(action: BrowserAction): Promise<ActionResult> {
1095
+ const startTime = Date.now();
1096
+ const page = await this.getCurrentPage();
1097
+
1098
+ let elementSearchTime = 0;
1099
+ let actionExecutionTime = 0;
1100
+ let stabilityWaitTime = 0;
1101
+
1102
+ try {
1103
+ const elementSearchStart = Date.now();
1104
+
1105
+ // No need to pre-fetch snapshot - each action method handles this
1106
+
1107
+ let newTabId: string | undefined;
1108
+ let customMessage: string | undefined;
1109
+ let actionDetails: Record<string, any> | undefined;
1110
+
1111
+ switch (action.type) {
1112
+ case 'click': {
1113
+ elementSearchTime = Date.now() - elementSearchStart;
1114
+ const clickStart = Date.now();
1115
+
1116
+ // Use simplified click logic
1117
+ const clickResult = await this.performClick(page, action.ref);
1118
+
1119
+ if (!clickResult.success) {
1120
+ throw new Error(`Click failed: ${clickResult.error}`);
1121
+ }
1122
+
1123
+ // Capture new tab ID if present
1124
+ newTabId = clickResult.newTabId;
1125
+
1126
+ // Capture diff snapshot if present
1127
+ if (clickResult.diffSnapshot) {
1128
+ actionDetails = { diffSnapshot: clickResult.diffSnapshot };
1129
+ }
1130
+
1131
+ actionExecutionTime = Date.now() - clickStart;
1132
+ break;
1133
+ }
1134
+
1135
+ case 'type': {
1136
+ elementSearchTime = Date.now() - elementSearchStart;
1137
+ const typeStart = Date.now();
1138
+
1139
+ const typeResult = await this.performType(page, action.ref, action.text, action.inputs);
1140
+
1141
+ if (!typeResult.success) {
1142
+ throw new Error(`Type failed: ${typeResult.error}`);
1143
+ }
1144
+
1145
+ // Set custom message and details if multiple inputs were used
1146
+ if (typeResult.details) {
1147
+ const successCount = Object.values(typeResult.details).filter((r: any) => r.success).length;
1148
+ const totalCount = Object.keys(typeResult.details).length;
1149
+ customMessage = `Typed text into ${successCount}/${totalCount} elements`;
1150
+ actionDetails = typeResult.details;
1151
+ }
1152
+
1153
+ // Capture diff snapshot if present
1154
+ if (typeResult.diffSnapshot) {
1155
+ if (!actionDetails) {
1156
+ actionDetails = {};
1157
+ }
1158
+ actionDetails.diffSnapshot = typeResult.diffSnapshot;
1159
+ }
1160
+
1161
+ actionExecutionTime = Date.now() - typeStart;
1162
+ break;
1163
+ }
1164
+
1165
+ case 'select': {
1166
+ elementSearchTime = Date.now() - elementSearchStart;
1167
+ const selectStart = Date.now();
1168
+
1169
+ const selectResult = await this.performSelect(page, action.ref, action.value);
1170
+
1171
+ if (!selectResult.success) {
1172
+ throw new Error(`Select failed: ${selectResult.error}`);
1173
+ }
1174
+
1175
+ actionExecutionTime = Date.now() - selectStart;
1176
+ break;
1177
+ }
1178
+
1179
+ case 'scroll': {
1180
+ elementSearchTime = Date.now() - elementSearchStart;
1181
+ const scrollStart = Date.now();
1182
+ const scrollAmount = action.direction === 'up' ? -action.amount : action.amount;
1183
+ await page.evaluate((amount: number) => {
1184
+ window.scrollBy(0, amount);
1185
+ }, scrollAmount);
1186
+ // Update scroll position tracking
1187
+ await this.getCurrentScrollPosition();
1188
+ actionExecutionTime = Date.now() - scrollStart;
1189
+ break;
1190
+ }
1191
+
1192
+ case 'enter': {
1193
+ elementSearchTime = Date.now() - elementSearchStart;
1194
+ const enterStart = Date.now();
1195
+ const browserConfig = this.configLoader.getBrowserConfig();
1196
+ await page.keyboard.press(browserConfig.enterKey);
1197
+ actionExecutionTime = Date.now() - enterStart;
1198
+ break;
1199
+ }
1200
+
1201
+ case 'mouse_control': {
1202
+ elementSearchTime = Date.now() - elementSearchStart;
1203
+ const mouseControlStart = Date.now();
1204
+ const mouseControlResult = await this.performMouseControl(page, action.control, action.x, action.y);
1205
+
1206
+ if (!mouseControlResult.success) {
1207
+ throw new Error(`Action failed: ${mouseControlResult.error}`);
1208
+ }
1209
+ actionExecutionTime = Date.now() - mouseControlStart;
1210
+ break;
1211
+ }
1212
+
1213
+ case 'mouse_drag': {
1214
+ elementSearchTime = Date.now() - elementSearchStart;
1215
+ const mouseDragStart = Date.now();
1216
+ const mouseDragResult = await this.performMouseDrag(page, action.from_ref, action.to_ref);
1217
+
1218
+ if (!mouseDragResult.success) {
1219
+ throw new Error(`Action failed: ${mouseDragResult.error}`);
1220
+ }
1221
+ actionExecutionTime = Date.now() - mouseDragStart;
1222
+ break;
1223
+ }
1224
+
1225
+ case 'press_key': {
1226
+ elementSearchTime = Date.now() - elementSearchStart;
1227
+ const keyPressStart = Date.now();
1228
+ // concatenate keys with '+' for key combinations
1229
+ const keys = action.keys.join('+');
1230
+ await page.keyboard.press(keys);
1231
+ actionExecutionTime = Date.now() - keyPressStart;
1232
+ break;
1233
+ }
1234
+
1235
+ default:
1236
+ throw new Error(`Unknown action type: ${(action as any).type}`);
1237
+ }
1238
+
1239
+ // Wait for stability after action
1240
+ const stabilityStart = Date.now();
1241
+ const stabilityResult = await this.waitForPageStability(page);
1242
+ stabilityWaitTime = Date.now() - stabilityStart;
1243
+
1244
+ const totalTime = Date.now() - startTime;
1245
+
1246
+ return {
1247
+ success: true,
1248
+ message: customMessage || `Action ${action.type} executed successfully`,
1249
+ timing: {
1250
+ total_time_ms: totalTime,
1251
+ element_search_time_ms: elementSearchTime,
1252
+ action_execution_time_ms: actionExecutionTime,
1253
+ stability_wait_time_ms: stabilityWaitTime,
1254
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1255
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1256
+ },
1257
+ ...(newTabId && { newTabId }), // Include new tab ID if present
1258
+ ...(actionDetails && { details: actionDetails }), // Include action details if present
1259
+ };
1260
+ } catch (error) {
1261
+ const totalTime = Date.now() - startTime;
1262
+ return {
1263
+ success: false,
1264
+ message: `Action ${action.type} failed: ${error}`,
1265
+ timing: {
1266
+ total_time_ms: totalTime,
1267
+ element_search_time_ms: elementSearchTime,
1268
+ action_execution_time_ms: actionExecutionTime,
1269
+ stability_wait_time_ms: stabilityWaitTime,
1270
+ },
1271
+ };
1272
+ }
1273
+ }
1274
+
1275
+ /**
1276
+ * Wait for DOM to stop changing for a specified duration
1277
+ */
1278
+ private async waitForDOMStability(page: Page, maxWaitTime: number = 500): Promise<void> {
1279
+ const startTime = Date.now();
1280
+ const stabilityThreshold = 100; // Consider stable if no changes for 100ms
1281
+ let lastChangeTime = Date.now();
1282
+
1283
+ try {
1284
+ // Monitor DOM changes
1285
+ await page.evaluate(() => {
1286
+ let changeCount = 0;
1287
+ (window as any).__domStabilityCheck = { changeCount: 0, lastChange: Date.now() };
1288
+
1289
+ const observer = new MutationObserver(() => {
1290
+ (window as any).__domStabilityCheck.changeCount++;
1291
+ (window as any).__domStabilityCheck.lastChange = Date.now();
1292
+ });
1293
+
1294
+ observer.observe(document.body, {
1295
+ childList: true,
1296
+ subtree: true,
1297
+ attributes: true,
1298
+ characterData: true
1299
+ });
1300
+
1301
+ (window as any).__domStabilityObserver = observer;
1302
+ });
1303
+
1304
+ // Wait until no changes for stabilityThreshold or timeout
1305
+ await page.waitForFunction(
1306
+ (threshold) => {
1307
+ const check = (window as any).__domStabilityCheck;
1308
+ return check && (Date.now() - check.lastChange) > threshold;
1309
+ },
1310
+ stabilityThreshold,
1311
+ { timeout: Math.max(0, maxWaitTime) }
1312
+ ).catch(() => {});
1313
+ } finally {
1314
+ // Cleanup
1315
+ await page.evaluate(() => {
1316
+ const observer = (window as any).__domStabilityObserver;
1317
+ if (observer) observer.disconnect();
1318
+ delete (window as any).__domStabilityObserver;
1319
+ delete (window as any).__domStabilityCheck;
1320
+ }).catch(() => {});
1321
+ }
1322
+ }
1323
+
1324
+ private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
1325
+ let domContentLoadedTime = 0;
1326
+ let networkIdleTime = 0;
1327
+
1328
+ try {
1329
+ const domStart = Date.now();
1330
+ const browserConfig = this.configLoader.getBrowserConfig();
1331
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
1332
+ domContentLoadedTime = Date.now() - domStart;
1333
+
1334
+ const networkStart = Date.now();
1335
+ await page.waitForLoadState(browserConfig.networkIdleState as any, { timeout: browserConfig.networkIdleTimeout });
1336
+ networkIdleTime = Date.now() - networkStart;
1337
+ } catch (error) {
1338
+ // Continue even if stability wait fails
1339
+ }
1340
+
1341
+ return { domContentLoadedTime, networkIdleTime };
1342
+ }
1343
+
1344
+ async visitPage(url: string): Promise<ActionResult & { newTabId?: string }> {
1345
+ const startTime = Date.now();
1346
+
1347
+ try {
1348
+ // Get current page to check if it's blank
1349
+ let currentPage: Page;
1350
+ let currentUrl: string;
1351
+
1352
+ try {
1353
+ currentPage = await this.getCurrentPage();
1354
+ currentUrl = currentPage.url();
1355
+ } catch (error: any) {
1356
+ // If no active page is available, getCurrentPage() will create one in CDP mode
1357
+ console.log('[visitPage] Failed to get current page:', error);
1358
+ throw new Error(`No active page available: ${error?.message || error}`);
1359
+ }
1360
+
1361
+ // Check if current page is blank or if this is the first navigation
1362
+ const browserConfig = this.configLoader.getBrowserConfig();
1363
+
1364
+ // Use unified blank page detection
1365
+ const isBlankPage = this.isBlankPageUrl(currentUrl) || currentUrl === browserConfig.defaultStartUrl;
1366
+
1367
+ const shouldUseCurrentTab = isBlankPage || !this.hasNavigatedBefore;
1368
+
1369
+
1370
+ if (shouldUseCurrentTab) {
1371
+ // Navigate in current tab if it's blank
1372
+
1373
+ const navigationStart = Date.now();
1374
+ const browserConfig = this.configLoader.getBrowserConfig();
1375
+ await currentPage.goto(url, {
1376
+ timeout: browserConfig.navigationTimeout,
1377
+ waitUntil: browserConfig.domContentLoadedState as any
1378
+ });
1379
+
1380
+ // Reset scroll position after navigation
1381
+ this.scrollPosition = { x: 0, y: 0 };
1382
+
1383
+ // Mark that we've navigated
1384
+ this.hasNavigatedBefore = true;
1385
+
1386
+ const navigationTime = Date.now() - navigationStart;
1387
+ const stabilityResult = await this.waitForPageStability(currentPage);
1388
+ const totalTime = Date.now() - startTime;
1389
+
1390
+ return {
1391
+ success: true,
1392
+ message: `Navigated to ${url}`,
1393
+ timing: {
1394
+ total_time_ms: totalTime,
1395
+ navigation_time_ms: navigationTime,
1396
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1397
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1398
+ },
1399
+ };
1400
+ } else {
1401
+ // Open in new tab if current page has content
1402
+ if (!this.context) {
1403
+ throw new Error('Browser context not initialized');
1404
+ }
1405
+
1406
+ const navigationStart = Date.now();
1407
+
1408
+ // In CDP mode, find an available blank tab instead of creating new page
1409
+ let newPage: Page | null = null;
1410
+ let newTabId: string | null = null;
1411
+
1412
+ const browserConfig = this.configLoader.getBrowserConfig();
1413
+ if (browserConfig.cdpUrl) {
1414
+ // CDP mode: find an available blank tab
1415
+ const allPages = this.context.pages();
1416
+ for (const page of allPages) {
1417
+ const pageUrl = page.url();
1418
+ // Check if this page is not already tracked and is blank
1419
+ const isTracked = Array.from(this.pages.values()).includes(page);
1420
+ if (!isTracked && this.isBlankPageUrl(pageUrl)) {
1421
+ newPage = page;
1422
+ newTabId = this.generateTabId();
1423
+ this.registerNewPage(newTabId, newPage);
1424
+ break;
1425
+ }
1426
+ }
1427
+
1428
+ if (!newPage || !newTabId) {
1429
+ console.log('[CDP] No available blank tabs, creating new page');
1430
+ newPage = await this.context.newPage();
1431
+ newTabId = this.generateTabId();
1432
+ this.registerNewPage(newTabId, newPage);
1433
+ }
1434
+ } else {
1435
+ // Non-CDP mode: create new page as usual
1436
+ newPage = await this.context.newPage();
1437
+ newTabId = this.generateTabId();
1438
+ this.registerNewPage(newTabId, newPage);
1439
+ }
1440
+
1441
+ // Set up page properties
1442
+ newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
1443
+ newPage.setDefaultTimeout(browserConfig.navigationTimeout);
1444
+
1445
+ // Navigate to the URL
1446
+ await newPage.goto(url, {
1447
+ timeout: browserConfig.navigationTimeout,
1448
+ waitUntil: browserConfig.domContentLoadedState as any
1449
+ });
1450
+
1451
+ // Automatically switch to the new tab
1452
+ this.currentTabId = newTabId;
1453
+ await newPage.bringToFront();
1454
+
1455
+ // Reset scroll position for the new page
1456
+ this.scrollPosition = { x: 0, y: 0 };
1457
+
1458
+ // Mark that we've navigated
1459
+ this.hasNavigatedBefore = true;
1460
+
1461
+ const navigationTime = Date.now() - navigationStart;
1462
+ const stabilityResult = await this.waitForPageStability(newPage);
1463
+ const totalTime = Date.now() - startTime;
1464
+
1465
+ return {
1466
+ success: true,
1467
+ message: `Opened ${url} in new tab`,
1468
+ newTabId: newTabId, // Include the new tab ID
1469
+ timing: {
1470
+ total_time_ms: totalTime,
1471
+ navigation_time_ms: navigationTime,
1472
+ dom_content_loaded_time_ms: stabilityResult.domContentLoadedTime,
1473
+ network_idle_time_ms: stabilityResult.networkIdleTime,
1474
+ },
1475
+ };
1476
+ }
1477
+ } catch (error) {
1478
+ const totalTime = Date.now() - startTime;
1479
+ return {
1480
+ success: false,
1481
+ message: `Navigation to ${url} failed: ${error}`,
1482
+ timing: {
1483
+ total_time_ms: totalTime,
1484
+ navigation_time_ms: 0,
1485
+ dom_content_loaded_time_ms: 0,
1486
+ network_idle_time_ms: 0,
1487
+ },
1488
+ };
1489
+ }
1490
+ }
1491
+
1492
+ async switchToTab(tabId: string): Promise<boolean> {
1493
+ if (!this.pages.has(tabId)) {
1494
+ return false;
1495
+ }
1496
+
1497
+ const page = this.pages.get(tabId)!;
1498
+
1499
+ if (page.isClosed()) {
1500
+ this.pages.delete(tabId);
1501
+ return false;
1502
+ }
1503
+
1504
+ try {
1505
+ console.log(`Switching to tab ${tabId}`);
1506
+
1507
+ // Update internal state first
1508
+ this.currentTabId = tabId;
1509
+
1510
+ // Try to activate the tab using a gentler approach
1511
+ // Instead of bringToFront, we'll use a combination of methods
1512
+ try {
1513
+ // Method 1: Evaluate focus in the page context
1514
+ await page.evaluate(() => {
1515
+ // Focus the window
1516
+ window.focus();
1517
+ // Dispatch a focus event
1518
+ window.dispatchEvent(new Event('focus'));
1519
+ }).catch(() => {});
1520
+
1521
+ // Method 2: For non-headless mode, schedule bringToFront asynchronously
1522
+ // This prevents WebSocket disruption by not blocking the current operation
1523
+ if (!this.configLoader.getBrowserConfig().headless) {
1524
+ // Use Promise to handle async operation without await
1525
+ Promise.resolve().then(async () => {
1526
+ // Small delay to ensure WebSocket message is processed
1527
+ const browserConfig = this.configLoader.getBrowserConfig();
1528
+ await new Promise(resolve => setTimeout(resolve, browserConfig.navigationDelay));
1529
+ try {
1530
+ await page.bringToFront();
1531
+ } catch (e) {
1532
+ // Silently ignore - tab switching still works internally
1533
+ console.debug(`bringToFront failed for ${tabId}, but tab is switched internally`);
1534
+ }
1535
+ });
1536
+ }
1537
+ } catch (error) {
1538
+ // Log but don't fail - internal state is still updated
1539
+ console.warn(`Tab focus warning for ${tabId}:`, error);
1540
+ }
1541
+
1542
+ console.log(`Successfully switched to tab ${tabId}`);
1543
+ return true;
1544
+ } catch (error) {
1545
+ console.error(`Error switching to tab ${tabId}:`, error);
1546
+ return false;
1547
+ }
1548
+ }
1549
+
1550
+ async closeTab(tabId: string): Promise<boolean> {
1551
+ if (!this.pages.has(tabId)) {
1552
+ return false;
1553
+ }
1554
+
1555
+ const page = this.pages.get(tabId)!;
1556
+
1557
+ if (!page.isClosed()) {
1558
+ await page.close();
1559
+ }
1560
+
1561
+ this.pages.delete(tabId);
1562
+
1563
+ if (tabId === this.currentTabId) {
1564
+ const remainingTabs = Array.from(this.pages.keys());
1565
+ if (remainingTabs.length > 0) {
1566
+ this.currentTabId = remainingTabs[0];
1567
+ } else {
1568
+ this.currentTabId = null;
1569
+ }
1570
+ }
1571
+
1572
+ return true;
1573
+ }
1574
+
1575
+ async batchKeyboardInput(operations: Array<{type: string, keys?: string[], text?: string, delay?: number}>, skipStabilityWait: boolean = false): Promise<any> {
1576
+ const startTime = Date.now();
1577
+ const page = await this.getCurrentPage();
1578
+
1579
+ try {
1580
+ const maxOperations = 100; // Prevent excessive number of operations per batch
1581
+ if (!Array.isArray(operations) || operations.length > maxOperations) {
1582
+ throw new Error(`Too many operations in batch (max ${maxOperations} allowed)`);
1583
+ }
1584
+
1585
+ const executionStart = Date.now();
1586
+
1587
+ for (const op of operations) {
1588
+ switch (op.type) {
1589
+ case 'press':
1590
+ if (op.keys) {
1591
+ const keys = op.keys.join('+');
1592
+ await page.keyboard.press(keys);
1593
+ }
1594
+ break;
1595
+ case 'type':
1596
+ if (op.text) {
1597
+ // Limit delay to prevent resource exhaustion attacks
1598
+ const maxTypeDelay = 1000; // 1 second per character max
1599
+ let delayValue = Number(op.delay);
1600
+ if (!isFinite(delayValue) || delayValue < 0) delayValue = 0;
1601
+ const safeTypeDelay = Math.min(delayValue, maxTypeDelay);
1602
+ await page.keyboard.type(op.text, { delay: safeTypeDelay });
1603
+ }
1604
+ break;
1605
+ case 'wait':
1606
+ // Only apply wait if op.delay is a non-negative finite number
1607
+ // Limit to prevent resource exhaustion (CodeQL js/resource-exhaustion)
1608
+ {
1609
+ const MAX_WAIT_DELAY = 10000; // 10 seconds maximum
1610
+ let delayValue = Number(op.delay);
1611
+ if (!isFinite(delayValue) || delayValue < 0) {
1612
+ delayValue = 0;
1613
+ }
1614
+ // Clamp delay to safe range [0, MAX_WAIT_DELAY]
1615
+ const safeDelay = delayValue > MAX_WAIT_DELAY ? MAX_WAIT_DELAY : delayValue;
1616
+ // lgtm[js/resource-exhaustion]
1617
+ // Safe: delay is clamped to MAX_WAIT_DELAY (10 seconds)
1618
+ await new Promise(resolve => setTimeout(resolve, safeDelay));
1619
+ }
1620
+ break;
1621
+ }
1622
+ }
1623
+
1624
+ const executionTime = Date.now() - executionStart;
1625
+ let stabilityTime = 0;
1626
+ let stabilityResult = { domContentLoadedTime: 0, networkIdleTime: 0 };
1627
+
1628
+ if (!skipStabilityWait) {
1629
+ const stabilityStart = Date.now();
1630
+
1631
+ try {
1632
+ const browserConfig = this.configLoader.getBrowserConfig();
1633
+ await page.waitForLoadState(browserConfig.domContentLoadedState as any, { timeout: browserConfig.pageStabilityTimeout });
1634
+ stabilityResult.domContentLoadedTime = Date.now() - stabilityStart;
1635
+ } catch (error) {
1636
+ }
1637
+
1638
+ await new Promise(resolve => setTimeout(resolve, 50));
1639
+ stabilityTime = Date.now() - stabilityStart;
1640
+ } else {
1641
+ await new Promise(resolve => setTimeout(resolve, 50));
1642
+ stabilityTime = 50;
1643
+ }
1644
+
1645
+ const totalTime = Date.now() - startTime;
1646
+
1647
+ return {
1648
+ success: true,
1649
+ message: `Batch keyboard input completed (${operations.length} operations)`,
1650
+ timing: {
1651
+ total_time_ms: totalTime,
1652
+ execution_time_ms: executionTime,
1653
+ stability_wait_time_ms: stabilityTime,
1654
+ operations_count: operations.length,
1655
+ skipped_stability: skipStabilityWait,
1656
+ },
1657
+ };
1658
+ } catch (error) {
1659
+ const totalTime = Date.now() - startTime;
1660
+ return {
1661
+ success: false,
1662
+ message: `Batch keyboard input failed: ${error}`,
1663
+ timing: {
1664
+ total_time_ms: totalTime,
1665
+ },
1666
+ };
1667
+ }
1668
+ }
1669
+
1670
+ async getTabInfo(): Promise<TabInfo[]> {
1671
+ const tabInfo: TabInfo[] = [];
1672
+
1673
+ for (const [tabId, page] of this.pages) {
1674
+ if (!page.isClosed()) {
1675
+ try {
1676
+ const title = await page.title();
1677
+ const url = page.url();
1678
+
1679
+ tabInfo.push({
1680
+ tab_id: tabId,
1681
+ title,
1682
+ url,
1683
+ is_current: tabId === this.currentTabId,
1684
+ });
1685
+ } catch (error) {
1686
+ // Skip tabs that can't be accessed
1687
+ }
1688
+ }
1689
+ }
1690
+
1691
+ return tabInfo;
1692
+ }
1693
+
1694
+ async takeScreenshot(): Promise<{ buffer: Buffer; timing: { screenshot_time_ms: number } }> {
1695
+ const startTime = Date.now();
1696
+ const page = await this.getCurrentPage();
1697
+
1698
+ const browserConfig = this.configLoader.getBrowserConfig();
1699
+ const buffer = await page.screenshot({
1700
+ timeout: browserConfig.screenshotTimeout,
1701
+ fullPage: browserConfig.fullPageScreenshot
1702
+ });
1703
+
1704
+ const screenshotTime = Date.now() - startTime;
1705
+
1706
+ return {
1707
+ buffer,
1708
+ timing: {
1709
+ screenshot_time_ms: screenshotTime,
1710
+ },
1711
+ };
1712
+ }
1713
+
1714
+ async close(): Promise<void> {
1715
+ const browserConfig = this.configLoader.getBrowserConfig();
1716
+
1717
+ for (const page of this.pages.values()) {
1718
+ if (!page.isClosed()) {
1719
+ await page.close();
1720
+ }
1721
+ }
1722
+
1723
+ this.pages.clear();
1724
+ this.currentTabId = null;
1725
+
1726
+ // Handle context cleanup separately for CDP mode
1727
+ if (!browserConfig.cdpUrl && this.context && this.contextOwnedByUs) {
1728
+ // For non-CDP mode, close context here
1729
+ await this.context.close();
1730
+ this.context = null;
1731
+ this.contextOwnedByUs = false;
1732
+ }
1733
+
1734
+ if (this.browser) {
1735
+ if (browserConfig.cdpUrl) {
1736
+ // In CDP mode: tear down only our context, then disconnect
1737
+ if (this.context && this.contextOwnedByUs) {
1738
+ await this.context.close().catch(() => {});
1739
+ this.context = null;
1740
+ this.contextOwnedByUs = false;
1741
+ }
1742
+ await this.browser.close(); // disconnect
1743
+ } else {
1744
+ // Local launch: close everything
1745
+ await this.browser.close();
1746
+ }
1747
+ this.browser = null;
1748
+ }
1749
+ }
1750
+
1751
+ private filterElementsInViewport(
1752
+ elements: Record<string, SnapshotElement>,
1753
+ viewport: { width: number, height: number },
1754
+ scrollPos: { x: number, y: number }
1755
+ ): Record<string, SnapshotElement> {
1756
+ const filtered: Record<string, SnapshotElement> = {};
1757
+
1758
+
1759
+ // Apply viewport filtering
1760
+ // boundingBox() returns viewport-relative coordinates, so we don't need to add scroll offsets
1761
+ const viewportLeft = 0;
1762
+ const viewportTop = 0;
1763
+ const viewportRight = viewport.width;
1764
+ const viewportBottom = viewport.height;
1765
+
1766
+ for (const [ref, element] of Object.entries(elements)) {
1767
+ // If element has no coordinates, include it (fallback)
1768
+ if (!element.coordinates) {
1769
+ filtered[ref] = element;
1770
+ continue;
1771
+ }
1772
+
1773
+ const { x, y, width, height } = element.coordinates;
1774
+
1775
+ // Check if element is visible in current viewport
1776
+ // Element is visible if it overlaps with viewport bounds
1777
+ // Since boundingBox() coords are viewport-relative, we compare directly
1778
+ const isVisible = (
1779
+ x < viewportRight && // Left edge is before viewport right
1780
+ y < viewportBottom && // Top edge is before viewport bottom
1781
+ x + width > viewportLeft && // Right edge is after viewport left
1782
+ y + height > viewportTop // Bottom edge is after viewport top
1783
+ );
1784
+
1785
+ if (isVisible) {
1786
+ filtered[ref] = element;
1787
+ }
1788
+ }
1789
+
1790
+ return filtered;
1791
+ }
1792
+
1793
+ private rebuildSnapshotText(originalSnapshot: string, filteredElements: Record<string, SnapshotElement>): string {
1794
+ const lines = originalSnapshot.split('\n');
1795
+ const filteredLines: string[] = [];
1796
+
1797
+ for (const line of lines) {
1798
+ const refMatch = line.match(/\[ref=([^\]]+)\]/);
1799
+
1800
+ if (refMatch) {
1801
+ const ref = refMatch[1];
1802
+ // Only include lines for elements that passed viewport filtering
1803
+ if (filteredElements[ref]) {
1804
+ filteredLines.push(line);
1805
+ }
1806
+ } else {
1807
+ // Include non-element lines (headers, etc.)
1808
+ filteredLines.push(line);
1809
+ }
1810
+ }
1811
+
1812
+ return filteredLines.join('\n');
1813
+ }
1814
+
1815
+ }